convert Suggest to kotlin

no functional changes
should simplify the todos
This commit is contained in:
Helium314 2024-02-06 20:55:15 +01:00
parent 26809444bd
commit 00a45a621e
5 changed files with 585 additions and 634 deletions

View file

@ -86,7 +86,7 @@ public interface DictionaryFacilitator {
boolean isActive(); boolean isActive();
Locale getMainLocale(); @NonNull Locale getMainLocale();
// useful for multilingual typing // useful for multilingual typing
Locale getCurrentLocale(); Locale getCurrentLocale();

View file

@ -266,6 +266,7 @@ public class DictionaryFacilitatorImpl implements DictionaryFacilitator {
} }
@Override @Override
@NonNull
public Locale getMainLocale() { public Locale getMainLocale() {
return mDictionaryGroups.get(0).mLocale; return mDictionaryGroups.get(0).mLocale;
} }

View file

@ -660,8 +660,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen
resetDictionaryFacilitatorIfNecessary(); resetDictionaryFacilitatorIfNecessary();
} }
refreshPersonalizationDictionarySession(currentSettingsValues); refreshPersonalizationDictionarySession(currentSettingsValues);
Suggest.nextWordSuggestionsCache.clear(); mInputLogic.mSuggest.clearNextWordSuggestionsCache();
mStatsUtilsManager.onLoadSettings(this /* context */, currentSettingsValues); mStatsUtilsManager.onLoadSettings(this, currentSettingsValues);
} }
private void refreshPersonalizationDictionarySession( private void refreshPersonalizationDictionarySession(

View file

@ -1,631 +0,0 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* modified
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
*/
package helium314.keyboard.latin;
import android.text.TextUtils;
import helium314.keyboard.latin.utils.Log;
import helium314.keyboard.keyboard.Keyboard;
import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo;
import helium314.keyboard.latin.common.ComposedData;
import helium314.keyboard.latin.common.Constants;
import helium314.keyboard.latin.common.InputPointers;
import helium314.keyboard.latin.common.StringUtils;
import helium314.keyboard.latin.define.DebugFlags;
import helium314.keyboard.latin.settings.Settings;
import helium314.keyboard.latin.settings.SettingsValuesForSuggestion;
import helium314.keyboard.latin.suggestions.SuggestionStripView;
import helium314.keyboard.latin.utils.AutoCorrectionUtils;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import helium314.keyboard.latin.utils.SuggestionResults;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import static helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION;
import static helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import kotlin.collections.CollectionsKt;
/**
* This class loads a dictionary and provides a list of suggestions for a given sequence of
* characters. This includes corrections and completions.
*/
public final class Suggest {
public static final String TAG = Suggest.class.getSimpleName();
// Session id for
// {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
// We are sharing the same ID between typing and gesture to save RAM footprint.
public static final int SESSION_ID_TYPING = 0;
public static final int SESSION_ID_GESTURE = 0;
// Close to -2**31
private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000;
private final DictionaryFacilitator mDictionaryFacilitator;
private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12;
private static final HashMap<String, Integer> sLanguageToMaximumAutoCorrectionWithSpaceLength =
new HashMap<>();
static {
// TODO: should we add Finnish here?
// TODO: This should not be hardcoded here but be written in the dictionary header
sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(),
MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN);
}
// cleared whenever LatinIME.loadSettings is called, notably on changing layout and switching input fields
public static final HashMap<NgramContext, SuggestionResults> nextWordSuggestionsCache = new HashMap<>();
private float mAutoCorrectionThreshold;
private float mPlausibilityThreshold;
public Suggest(final DictionaryFacilitator dictionaryFacilitator) {
mDictionaryFacilitator = dictionaryFacilitator;
}
/**
* Set the normalized-score threshold for a suggestion to be considered strong enough that we
* will auto-correct to this.
* @param threshold the threshold
*/
public void setAutoCorrectionThreshold(final float threshold) {
mAutoCorrectionThreshold = threshold;
}
public interface OnGetSuggestedWordsCallback {
void onGetSuggestedWords(final SuggestedWords suggestedWords);
}
public void getSuggestedWords(final WordComposer wordComposer,
final NgramContext ngramContext, final Keyboard keyboard,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber,
final OnGetSuggestedWordsCallback callback) {
if (wordComposer.isBatchMode()) {
getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard,
settingsValuesForSuggestion, inputStyle, sequenceNumber, callback);
} else {
getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard,
settingsValuesForSuggestion, inputStyle, isCorrectionEnabled,
sequenceNumber, callback);
}
}
private static ArrayList<SuggestedWordInfo> getTransformedSuggestedWordInfoList(
final WordComposer wordComposer, final SuggestionResults results,
final int trailingSingleQuotesCount, final Locale defaultLocale) {
final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase()
&& !wordComposer.isResumed();
final boolean isOnlyFirstCharCapitalized =
wordComposer.isOrWillBeOnlyFirstCharCapitalized();
final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(results);
final int suggestionsCount = suggestionsContainer.size();
if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase
|| 0 != trailingSingleQuotesCount) {
for (int i = 0; i < suggestionsCount; ++i) {
final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
final Locale wordLocale = wordInfo.mSourceDict.mLocale;
final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
wordInfo, null == wordLocale ? defaultLocale : wordLocale,
shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
trailingSingleQuotesCount);
suggestionsContainer.set(i, transformedWordInfo);
}
}
return suggestionsContainer;
}
private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
@NonNull final List<SuggestedWordInfo> suggestions) {
if (suggestions.isEmpty()) {
return null;
}
final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0);
if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
return null;
}
return firstSuggestedWordInfo;
}
// Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
// and calls the callback function with the suggestions.
private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer,
final NgramContext ngramContext, final Keyboard keyboard,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled,
final int sequenceNumber, final OnGetSuggestedWordsCallback callback) {
final String typedWordString = wordComposer.getTypedWord();
final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWordString);
final SuggestionResults suggestionResults = typedWordString.isEmpty()
? getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion)
: mDictionaryFacilitator.getSuggestionResults(
wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction);
final Locale locale = mDictionaryFacilitator.getMainLocale();
final ArrayList<SuggestedWordInfo> suggestionsContainer =
getTransformedSuggestedWordInfoList(wordComposer, suggestionResults,
trailingSingleQuotesCount, locale);
Dictionary sourceDictionaryOfRemovedWord = null;
// store the original SuggestedWordInfo for typed word, as it will be removed
// we may want to re-add it in case auto-correction happens, so that the original word can at least be selected
SuggestedWordInfo typedWordFirstOccurrenceWordInfo = null;
for (final SuggestedWordInfo info : suggestionsContainer) {
// Search for the best dictionary, defined as the first one with the highest match
// quality we can find.
if (typedWordString.equals(info.mWord)) {
// Use this source if the old match had lower quality than this match
sourceDictionaryOfRemovedWord = info.mSourceDict;
typedWordFirstOccurrenceWordInfo = info;
break;
}
}
final int firstOccurrenceOfTypedWordInSuggestions =
SuggestedWordInfo.removeDupsAndTypedWord(typedWordString, suggestionsContainer);
final boolean resultsArePredictions = !wordComposer.isComposingWord();
// SuggestedWordInfos for suggestions for empty word (based only on previously typed words)
// done in a weird way to imitate what kotlin does with lazy
final ArrayList<SuggestedWordInfo> firstAndTypedWordEmptyInfos = new ArrayList<>(2);
final boolean[] thoseTwo = shouldBeAutoCorrected(
trailingSingleQuotesCount,
typedWordString,
suggestionsContainer,
sourceDictionaryOfRemovedWord,
firstAndTypedWordEmptyInfos,
() -> {
final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0);
SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : suggestionResults.first();
putEmptyWordSuggestions(firstAndTypedWordEmptyInfos, ngramContext, keyboard,
settingsValuesForSuggestion, inputStyleIfNotPrediction, first.getWord(), typedWordString);
},
isCorrectionEnabled,
wordComposer,
suggestionResults,
firstOccurrenceOfTypedWordInSuggestions,
typedWordFirstOccurrenceWordInfo
);
final boolean allowsToBeAutoCorrected = thoseTwo[0];
final boolean hasAutoCorrection = thoseTwo[1];
final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
"" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE,
SuggestedWordInfo.KIND_TYPED,
null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED
: sourceDictionaryOfRemovedWord,
SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */,
SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */);
if (!TextUtils.isEmpty(typedWordString)) {
suggestionsContainer.add(0, typedWordInfo);
}
final ArrayList<SuggestedWordInfo> suggestionsList;
if (SuggestionStripView.DEBUG_SUGGESTIONS && !suggestionsContainer.isEmpty()) {
suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString, suggestionsContainer);
} else {
suggestionsList = suggestionsContainer;
}
final int inputStyle;
if (resultsArePredictions) {
inputStyle = suggestionResults.mIsBeginningOfSentence
? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION
: SuggestedWords.INPUT_STYLE_PREDICTION;
} else {
inputStyle = inputStyleIfNotPrediction;
}
final boolean isTypedWordValid = firstOccurrenceOfTypedWordInSuggestions > -1
|| (!resultsArePredictions && !allowsToBeAutoCorrected);
if (hasAutoCorrection) {
// make sure typed word is shown, so user is able to override incoming autocorrection
if (typedWordFirstOccurrenceWordInfo != null) {
if (SuggestionStripView.DEBUG_SUGGESTIONS)
addDebugInfo(typedWordFirstOccurrenceWordInfo, typedWordString);
suggestionsList.add(2, typedWordFirstOccurrenceWordInfo);
} else {
suggestionsList.add(2, new SuggestedWordInfo(typedWordString, "", 0, SuggestedWordInfo.KIND_TYPED,
Dictionary.DICTIONARY_USER_TYPED, SuggestedWordInfo.NOT_AN_INDEX, SuggestedWordInfo.NOT_A_CONFIDENCE));
}
}
callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions,
typedWordInfo, isTypedWordValid, hasAutoCorrection, false, inputStyle, sequenceNumber));
}
// annoyingly complicated thing to avoid getting emptyWordSuggestions more than once
// todo: now with the cache just remove it...
// and best convert the class to kotlin, that should make it much more readable
/** puts word infos for suggestions with an empty word in [infos], based on previously typed words */
private ArrayList<SuggestedWordInfo> putEmptyWordSuggestions(ArrayList<SuggestedWordInfo> infos, NgramContext ngramContext,
Keyboard keyboard, SettingsValuesForSuggestion settingsValuesForSuggestion,
int inputStyleIfNotPrediction, String firstSuggestionInContainer, String typedWordString) {
if (infos.size() != 0) return infos;
infos.add(null);
infos.add(null);
final SuggestionResults emptyWordSuggestions = getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion);
final SuggestedWordInfo nextWordSuggestionInfoForFirstSuggestionInContainer =
CollectionsKt.firstOrNull(emptyWordSuggestions, (word) -> word.mWord.equals(firstSuggestionInContainer));
final SuggestedWordInfo nextWordSuggestionInfoForTypedWord =
CollectionsKt.firstOrNull(emptyWordSuggestions, (word) -> word.mWord.equals(typedWordString));
infos.add(nextWordSuggestionInfoForFirstSuggestionInContainer);
infos.add(nextWordSuggestionInfoForTypedWord);
return infos;
}
// returns [allowsToBeAutoCorrected, hasAutoCorrection]
boolean[] shouldBeAutoCorrected(
final int trailingSingleQuotesCount,
final String typedWordString,
final List<SuggestedWordInfo> suggestionsContainer,
final Dictionary sourceDictionaryOfRemovedWord,
final List<SuggestedWordInfo> firstAndTypedWordEmptyInfos,
final Runnable putEmptyWordSuggestions,
final boolean isCorrectionEnabled,
final WordComposer wordComposer,
final SuggestionResults suggestionResults,
final int firstOccurrenceOfTypedWordInSuggestions,
final SuggestedWordInfo typedWordFirstOccurrenceWordInfo
) {
final String consideredWord = trailingSingleQuotesCount > 0
? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
: typedWordString;
final SuggestedWordInfo whitelistedWordInfo =
getWhitelistedWordInfoOrNull(suggestionsContainer);
final String whitelistedWord = whitelistedWordInfo == null
? null : whitelistedWordInfo.mWord;
final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0);
// We allow auto-correction if whitelisting is not required or the word is whitelisted,
// or if the word had more than one char and was not suggested.
final boolean allowsToBeAutoCorrected;
final int scoreLimit = Settings.getInstance().getCurrent().mScoreLimitForAutocorrect;
if ((SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
|| (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null)) // more than 1 letter and not in dictionary
) {
allowsToBeAutoCorrected = true;
} else if (firstSuggestionInContainer != null && !typedWordString.isEmpty()) {
// maybe allow autocorrect, depending on scores and emptyWordSuggestions
putEmptyWordSuggestions.run();
final SuggestedWordInfo first = firstAndTypedWordEmptyInfos.get(0);
final SuggestedWordInfo typed = firstAndTypedWordEmptyInfos.get(1);
if (firstSuggestionInContainer.mScore > scoreLimit) {
allowsToBeAutoCorrected = true; // suggestion has good score, allow
} else if (first == null) {
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
} else if (typed == null) {
allowsToBeAutoCorrected = true; // allow autocorrect if typed word not known in this context, todo: this may be too aggressive
} else {
// autocorrect if suggested word has clearly higher score for empty word suggestions
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
}
} else {
allowsToBeAutoCorrected = false;
}
final boolean hasAutoCorrection;
// If correction is not enabled, we never auto-correct. This is for example for when
// the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
if (!isCorrectionEnabled
// todo: can some parts be moved to isCorrectionEnabled? e.g. keyboardIdMode only depends on input type
// i guess then not mAutoCorrectionEnabledPerUserSettings should be read, but rather some isAutocorrectEnabled()
// If the word does not allow to be auto-corrected, then we don't auto-correct.
|| !allowsToBeAutoCorrected
// If we are doing prediction, then we never auto-correct of course
|| !wordComposer.isComposingWord()
// If we don't have suggestion results, we can't evaluate the first suggestion
// for auto-correction
|| suggestionResults.isEmpty()
// If the word has digits, we never auto-correct because it's likely the word
// was type with a lot of care
|| wordComposer.hasDigits()
// If the word is mostly caps, we never auto-correct because this is almost
// certainly intentional (and careful input)
|| wordComposer.isMostlyCaps()
// We never auto-correct when suggestions are resumed because it would be unexpected
|| wordComposer.isResumed()
// If we don't have a main dictionary, we never want to auto-correct. The reason
// for this is, the user may have a contact whose name happens to match a valid
// word in their language, and it will unexpectedly auto-correct. For example, if
// the user types in English with no dictionary and has a "Will" in their contact
// list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
// main dict => no auto-correct. Also, it would probably get obnoxious quickly.
// TODO: now that we have personalization, we may want to re-evaluate this decision
|| !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()) {
hasAutoCorrection = false;
} else {
final SuggestedWordInfo firstSuggestion = suggestionResults.first();
if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
&& firstOccurrenceOfTypedWordInSuggestions != 0) {
// mFirstSuggestionExceedsConfidenceThreshold is always set to false, so currently
// this branch is useless
return new boolean[]{ true, true };
}
if (!AutoCorrectionUtils.suggestionExceedsThreshold(
firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
// todo: maybe also do something here depending on ngram context?
// Score is too low for autocorrect
return new boolean[]{ true, false };
}
// We have a high score, so we need to check if this suggestion is in the correct
// form to allow auto-correcting to it in this language. For details of how this
// is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
// TODO: this should not have its own logic here but be handled by the dictionary.
final boolean allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > scoreLimit) {
// typed word is valid and has good score
// do not auto-correct if typed word is better match than first suggestion
final SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : firstSuggestion;
final Locale dictLocale = mDictionaryFacilitator.getCurrentLocale();
if (first.mScore < scoreLimit) {
// don't allow if suggestion has too low score
return new boolean[]{ true, false };
}
if (first.mSourceDict.mLocale != typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale) {
// dict locale different -> return the better match
return new boolean[]{ true, dictLocale == first.mSourceDict.mLocale };
}
// the score difference may need tuning, but so far it seems alright
final int firstWordBonusScore = (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST) ? 20 : 0) // large bonus because it's wanted by dictionary
+ (StringUtils.isLowerCaseAscii(typedWordString) ? 5 : 0) // small bonus because typically only ascii is typed (applies to latin keyboards only)
+ (first.mScore > typedWordFirstOccurrenceWordInfo.mScore ? 5 : 0); // small bonus if score is higher
putEmptyWordSuggestions.run();
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) {
// return the better match for ngram context
// biased towards typed word
// but with bonus depending on
return new boolean[]{ true, true };
}
hasAutoCorrection = false;
} else {
hasAutoCorrection = allowed;
}
}
return new boolean[]{ allowsToBeAutoCorrected, hasAutoCorrection };
}
// Retrieves suggestions for the batch input
// and calls the callback function with the suggestions.
private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
final NgramContext ngramContext, final Keyboard keyboard,
final SettingsValuesForSuggestion settingsValuesForSuggestion,
final int inputStyle, final int sequenceNumber,
final OnGetSuggestedWordsCallback callback) {
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle);
replaceSingleLetterFirstSuggestion(suggestionResults);
// For transforming words that don't come from a dictionary, because it's our best bet
final Locale locale = mDictionaryFacilitator.getMainLocale();
final ArrayList<SuggestedWordInfo> suggestionsContainer = new ArrayList<>(suggestionResults);
final int suggestionsCount = suggestionsContainer.size();
final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock();
final boolean isAllUpperCase = wordComposer.isAllUpperCase();
if (isFirstCharCapitalized || isAllUpperCase) {
for (int i = 0; i < suggestionsCount; ++i) {
final SuggestedWordInfo wordInfo = suggestionsContainer.get(i);
final Locale wordlocale = wordInfo.mSourceDict.mLocale;
final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo(
wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase,
isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */);
suggestionsContainer.set(i, transformedWordInfo);
}
}
final SuggestedWordInfo rejected;
if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION
&& suggestionsContainer.size() > 1
&& TextUtils.equals(suggestionsContainer.get(0).mWord,
wordComposer.getRejectedBatchModeSuggestion())) {
rejected = suggestionsContainer.remove(0);
suggestionsContainer.add(1, rejected);
} else {
rejected = null;
}
SuggestedWordInfo.removeDupsAndTypedWord(null /* typedWord */, suggestionsContainer);
// For some reason some suggestions with MIN_VALUE are making their way here.
// TODO: Find a more robust way to detect distracters.
for (int i = suggestionsContainer.size() - 1; i >= 0; --i) {
if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) {
suggestionsContainer.remove(i);
}
}
// In the batch input mode, the most relevant suggested word should act as a "typed word"
// (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
// Note that because this method is never used to get predictions, there is no need to
// modify inputType such in getSuggestedWordsForNonBatchInput.
final SuggestedWordInfo pseudoTypedWordInfo = preferNextWordSuggestion(suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0),
suggestionsContainer, getNextWordSuggestions(ngramContext, keyboard, inputStyle, settingsValuesForSuggestion), rejected);
final ArrayList<SuggestedWordInfo> suggestionsList;
if (SuggestionStripView.DEBUG_SUGGESTIONS && !suggestionsContainer.isEmpty()) {
suggestionsList = getSuggestionsInfoListWithDebugInfo(suggestionResults.first().mWord, suggestionsContainer);
} else {
suggestionsList = suggestionsContainer;
}
callback.onGetSuggestedWords(new SuggestedWords(suggestionsList,
suggestionResults.mRawSuggestions,
pseudoTypedWordInfo,
true /* typedWordValid */,
false /* willAutoCorrect */,
false /* isObsoleteSuggestions */,
inputStyle, sequenceNumber));
}
/** reduces score of the first suggestion if next one is close and has more than a single letter */
private void replaceSingleLetterFirstSuggestion(final SuggestionResults suggestionResults) {
if (suggestionResults.size() < 2 || suggestionResults.first().mWord.length() != 1) return;
// suppress single letter suggestions if next suggestion is close and has more than one letter
final Iterator<SuggestedWordInfo> iterator = suggestionResults.iterator();
final SuggestedWordInfo first = iterator.next();
final SuggestedWordInfo second = iterator.next();
if (second.mWord.length() > 1 && second.mScore > 0.94 * first.mScore) {
suggestionResults.remove(first); // remove and re-add with lower score
suggestionResults.add(new SuggestedWordInfo(first.mWord, first.mPrevWordsContext, (int) (first.mScore * 0.93),
first.mKindAndFlags, first.mSourceDict, first.mIndexOfTouchPointOfSecondWord, first.mAutoCommitFirstWordConfidence));
if (DebugFlags.DEBUG_ENABLED)
Log.d(TAG, "reduced score of "+first.mWord+" from "+first.mScore +", new first: "+suggestionResults.first().mWord+" ("+suggestionResults.first().mScore+")");
}
}
// returns new pseudoTypedWordInfo, puts it in suggestionsContainer, modifies nextWordSuggestions
@Nullable
private SuggestedWordInfo preferNextWordSuggestion(@Nullable final SuggestedWordInfo pseudoTypedWordInfo,
@NonNull final ArrayList<SuggestedWordInfo> suggestionsContainer,
@NonNull final SuggestionResults nextWordSuggestions, @Nullable final SuggestedWordInfo rejected) {
if (pseudoTypedWordInfo == null
|| !Settings.getInstance().getCurrent().mUsePersonalizedDicts
|| !pseudoTypedWordInfo.mSourceDict.mDictType.equals(Dictionary.TYPE_MAIN)
|| suggestionsContainer.size() < 2
)
return pseudoTypedWordInfo;
CollectionsKt.removeAll(nextWordSuggestions, (info) -> info.mScore < 170); // we only want reasonably often typed words, value may require tuning
if (nextWordSuggestions.isEmpty())
return pseudoTypedWordInfo;
// for each suggestion, check whether the word was already typed in this ngram context (i.e. is nextWordSuggestion)
for (final SuggestedWordInfo suggestion : suggestionsContainer) {
if (suggestion.mScore < pseudoTypedWordInfo.mScore * 0.93) break; // we only want reasonably good suggestions, value may require tuning
if (suggestion == rejected) continue; // ignore rejected suggestions
for (final SuggestedWordInfo nextWordSuggestion : nextWordSuggestions) {
if (!nextWordSuggestion.mWord.equals(suggestion.mWord))
continue;
// if we have a high scoring suggestion in next word suggestions, take it (because it's expected that user might want to type it again)
suggestionsContainer.remove(suggestion);
suggestionsContainer.add(0, suggestion);
if (DebugFlags.DEBUG_ENABLED)
Log.d(TAG, "replaced batch word "+pseudoTypedWordInfo+" with "+suggestion);
return suggestion;
}
}
return pseudoTypedWordInfo;
}
private static ArrayList<SuggestedWordInfo> getSuggestionsInfoListWithDebugInfo(
final String typedWord, final ArrayList<SuggestedWordInfo> suggestions) {
final int suggestionsSize = suggestions.size();
final ArrayList<SuggestedWordInfo> suggestionsList = new ArrayList<>(suggestionsSize);
for (final SuggestedWordInfo cur : suggestions) {
addDebugInfo(cur, typedWord);
suggestionsList.add(cur);
}
return suggestionsList;
}
private static void addDebugInfo(final SuggestedWordInfo wordInfo, final String typedWord) {
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(typedWord, wordInfo.toString(), wordInfo.mScore);
final String scoreInfoString;
String dict = wordInfo.mSourceDict.mDictType + ":" + wordInfo.mSourceDict.mLocale;
if (normalizedScore > 0) {
scoreInfoString = String.format(Locale.ROOT, "%d (%4.2f), %s", wordInfo.mScore, normalizedScore, dict);
} else {
scoreInfoString = String.format(Locale.ROOT, "%d, %s", wordInfo.mScore, dict);
}
wordInfo.setDebugString(scoreInfoString);
}
/**
* Computes whether this suggestion should be blocked or not in this language
*
* This function implements a filter that avoids auto-correcting to suggestions that contain
* spaces that are above a certain language-dependent character limit. In languages like German
* where it's possible to concatenate many words, it often happens our dictionary does not
* have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
* one or several spaces. Ideally we should understand what the user wants and display useful
* suggestions by improving the dictionary and possibly having some specific logic. Until
* that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
* whether a suggestion is useful or not. So at least for the time being we block
* auto-correction when the suggestion is long and contains a space, which should avoid the
* worst damage.
* This function is implementing that filter. If the language enforces no such limit, then it
* always returns true. If the suggestion contains no space, it also returns true. Otherwise,
* it checks the length against the language-specific limit.
*
* @param info the suggestion info
* @return whether it's fine to auto-correct to this.
*/
private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) {
final Locale locale = info.mSourceDict.mLocale;
if (null == locale) {
return true;
}
final Integer maximumLengthForThisLanguage =
sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage());
if (null == maximumLengthForThisLanguage) {
// This language does not enforce a maximum length to auto-correction
return true;
}
return info.mWord.length() <= maximumLengthForThisLanguage
|| -1 == info.mWord.indexOf(Constants.CODE_SPACE);
}
/* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo(
final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase,
final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) {
final StringBuilder sb = new StringBuilder(wordInfo.mWord.length());
if (isAllUpperCase) {
sb.append(wordInfo.mWord.toUpperCase(locale));
} else if (isOnlyFirstCharCapitalized) {
sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale));
} else {
sb.append(wordInfo.mWord);
}
// Appending quotes is here to help people quote words. However, it's not helpful
// when they type words with quotes toward the end like "it's" or "didn't", where
// it's more likely the user missed the last character (or didn't type it yet).
final int quotesToAppend = trailingSingleQuotesCount
- (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1);
for (int i = quotesToAppend - 1; i >= 0; --i) {
sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE);
}
return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext,
wordInfo.mScore, wordInfo.mKindAndFlags,
wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
wordInfo.mAutoCommitFirstWordConfidence);
}
/** get suggestions based on the current ngram context, with an empty typed word (that's what next word suggestions do) */
// todo: integrate it into shouldBeAutoCorrected, remove putEmptySuggestions
// and make that thing more readable
private SuggestionResults getNextWordSuggestions(final NgramContext ngramContext,
final Keyboard keyboard, final int inputStyle, final SettingsValuesForSuggestion settingsValuesForSuggestion
) {
final SuggestionResults cachedResults = nextWordSuggestionsCache.get(ngramContext);
if (cachedResults != null)
return cachedResults;
final SuggestionResults newResults = mDictionaryFacilitator.getSuggestionResults(
new ComposedData(new InputPointers(1), false, ""),
ngramContext,
keyboard,
settingsValuesForSuggestion,
SESSION_ID_TYPING,
inputStyle
);
nextWordSuggestionsCache.put(ngramContext, newResults);
return newResults;
}
}

View file

@ -0,0 +1,581 @@
/*
* Copyright (C) 2008 The Android Open Source Project
* modified
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
*/
package helium314.keyboard.latin
import android.text.TextUtils
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils
import helium314.keyboard.keyboard.Keyboard
import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo
import helium314.keyboard.latin.common.ComposedData
import helium314.keyboard.latin.common.Constants
import helium314.keyboard.latin.common.InputPointers
import helium314.keyboard.latin.common.StringUtils
import helium314.keyboard.latin.define.DebugFlags
import helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION
import helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION
import helium314.keyboard.latin.settings.Settings
import helium314.keyboard.latin.settings.SettingsValuesForSuggestion
import helium314.keyboard.latin.suggestions.SuggestionStripView
import helium314.keyboard.latin.utils.AutoCorrectionUtils
import helium314.keyboard.latin.utils.Log.d
import helium314.keyboard.latin.utils.SuggestionResults
import java.util.Locale
/**
* This class loads a dictionary and provides a list of suggestions for a given sequence of
* characters. This includes corrections and completions.
*/
class Suggest(private val mDictionaryFacilitator: DictionaryFacilitator) {
private var mAutoCorrectionThreshold = 0f
private val mPlausibilityThreshold = 0f
private val nextWordSuggestionsCache = HashMap<NgramContext, SuggestionResults>()
// cache cleared whenever LatinIME.loadSettings is called, notably on changing layout and switching input fields
fun clearNextWordSuggestionsCache() = nextWordSuggestionsCache.clear()
/**
* Set the normalized-score threshold for a suggestion to be considered strong enough that we
* will auto-correct to this.
* @param threshold the threshold
*/
fun setAutoCorrectionThreshold(threshold: Float) {
mAutoCorrectionThreshold = threshold
}
interface OnGetSuggestedWordsCallback {
fun onGetSuggestedWords(suggestedWords: SuggestedWords?)
}
fun getSuggestedWords(wordComposer: WordComposer, ngramContext: NgramContext, keyboard: Keyboard,
settingsValuesForSuggestion: SettingsValuesForSuggestion, isCorrectionEnabled: Boolean,
inputStyle: Int, sequenceNumber: Int, callback: OnGetSuggestedWordsCallback) {
if (wordComposer.isBatchMode) {
getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard,
settingsValuesForSuggestion, inputStyle, sequenceNumber, callback)
} else {
getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard, settingsValuesForSuggestion,
inputStyle, isCorrectionEnabled, sequenceNumber, callback)
}
}
// Retrieves suggestions for non-batch input (typing, recorrection, predictions...)
// and calls the callback function with the suggestions.
private fun getSuggestedWordsForNonBatchInput(wordComposer: WordComposer, ngramContext: NgramContext, keyboard: Keyboard,
settingsValuesForSuggestion: SettingsValuesForSuggestion, inputStyleIfNotPrediction: Int,
isCorrectionEnabled: Boolean, sequenceNumber: Int, callback: OnGetSuggestedWordsCallback) {
val typedWordString = wordComposer.typedWord
val trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWordString)
val suggestionResults = if (typedWordString.isEmpty())
getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion)
else mDictionaryFacilitator.getSuggestionResults(wordComposer.composedDataSnapshot, ngramContext, keyboard,
settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction)
val locale = mDictionaryFacilitator.mainLocale
val suggestionsContainer = getTransformedSuggestedWordInfoList(wordComposer, suggestionResults, trailingSingleQuotesCount, locale)
var sourceDictionaryOfRemovedWord: Dictionary? = null
// store the original SuggestedWordInfo for typed word, as it will be removed
// we may want to re-add it in case auto-correction happens, so that the original word can at least be selected
var typedWordFirstOccurrenceWordInfo: SuggestedWordInfo? = null
for (info in suggestionsContainer) {
// Search for the best dictionary, defined as the first one with the highest match
// quality we can find.
if (typedWordString == info.mWord) {
// Use this source if the old match had lower quality than this match
sourceDictionaryOfRemovedWord = info.mSourceDict
typedWordFirstOccurrenceWordInfo = info
break
}
}
val firstOccurrenceOfTypedWordInSuggestions = SuggestedWordInfo.removeDupsAndTypedWord(typedWordString, suggestionsContainer)
val resultsArePredictions = !wordComposer.isComposingWord
// SuggestedWordInfos for suggestions for empty word (based only on previously typed words)
// done in a weird way to imitate what kotlin does with lazy
val firstAndTypedWordEmptyInfos = ArrayList<SuggestedWordInfo?>(2)
val thoseTwo = shouldBeAutoCorrected( // todo: do it better...
trailingSingleQuotesCount,
typedWordString,
suggestionsContainer,
sourceDictionaryOfRemovedWord,
firstAndTypedWordEmptyInfos,
{
val firstSuggestionInContainer =
if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0]
val first =
firstSuggestionInContainer ?: suggestionResults.first()
putEmptyWordSuggestions(
firstAndTypedWordEmptyInfos, ngramContext, keyboard,
settingsValuesForSuggestion, inputStyleIfNotPrediction, first.word, typedWordString
)
},
isCorrectionEnabled,
wordComposer,
suggestionResults,
firstOccurrenceOfTypedWordInSuggestions,
typedWordFirstOccurrenceWordInfo
)
val allowsToBeAutoCorrected = thoseTwo[0]
val hasAutoCorrection = thoseTwo[1]
val typedWordInfo = SuggestedWordInfo(typedWordString, "", SuggestedWordInfo.MAX_SCORE,
SuggestedWordInfo.KIND_TYPED, sourceDictionaryOfRemovedWord ?: Dictionary.DICTIONARY_USER_TYPED,
SuggestedWordInfo.NOT_AN_INDEX , SuggestedWordInfo.NOT_A_CONFIDENCE)
if (!TextUtils.isEmpty(typedWordString)) {
suggestionsContainer.add(0, typedWordInfo)
}
val suggestionsList = if (SuggestionStripView.DEBUG_SUGGESTIONS && suggestionsContainer.isNotEmpty()) {
getSuggestionsInfoListWithDebugInfo(typedWordString, suggestionsContainer)
} else {
suggestionsContainer
}
val inputStyle = if (resultsArePredictions) {
if (suggestionResults.mIsBeginningOfSentence) SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION
else SuggestedWords.INPUT_STYLE_PREDICTION
} else {
inputStyleIfNotPrediction
}
val isTypedWordValid = firstOccurrenceOfTypedWordInSuggestions > -1 || !resultsArePredictions && !allowsToBeAutoCorrected
if (hasAutoCorrection) {
// make sure typed word is shown, so user is able to override incoming autocorrection
if (typedWordFirstOccurrenceWordInfo != null) {
if (SuggestionStripView.DEBUG_SUGGESTIONS) addDebugInfo(typedWordFirstOccurrenceWordInfo, typedWordString)
suggestionsList.add(2, typedWordFirstOccurrenceWordInfo)
} else {
suggestionsList.add(2,
SuggestedWordInfo(typedWordString, "", 0, SuggestedWordInfo.KIND_TYPED,
Dictionary.DICTIONARY_USER_TYPED, SuggestedWordInfo.NOT_AN_INDEX, SuggestedWordInfo.NOT_A_CONFIDENCE)
)
}
}
callback.onGetSuggestedWords(SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions,
typedWordInfo, isTypedWordValid, hasAutoCorrection, false, inputStyle, sequenceNumber))
}
// annoyingly complicated thing to avoid getting emptyWordSuggestions more than once
// todo: now with the cache just remove it...
// and best convert the class to kotlin, that should make it much more readable
/** puts word infos for suggestions with an empty word in [infos], based on previously typed words */
private fun putEmptyWordSuggestions(
infos: ArrayList<SuggestedWordInfo?>, ngramContext: NgramContext,
keyboard: Keyboard, settingsValuesForSuggestion: SettingsValuesForSuggestion,
inputStyleIfNotPrediction: Int, firstSuggestionInContainer: String, typedWordString: String
): ArrayList<SuggestedWordInfo?> {
if (infos.size != 0) return infos
infos.add(null)
infos.add(null)
val emptyWordSuggestions = getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion)
val nextWordSuggestionInfoForFirstSuggestionInContainer =
emptyWordSuggestions.firstOrNull { word: SuggestedWordInfo -> word.mWord == firstSuggestionInContainer }
val nextWordSuggestionInfoForTypedWord =
emptyWordSuggestions.firstOrNull { word: SuggestedWordInfo -> word.mWord == typedWordString }
infos.add(nextWordSuggestionInfoForFirstSuggestionInContainer)
infos.add(nextWordSuggestionInfoForTypedWord)
return infos
}
// returns [allowsToBeAutoCorrected, hasAutoCorrection]
// public for testing
fun shouldBeAutoCorrected(
trailingSingleQuotesCount: Int,
typedWordString: String,
suggestionsContainer: List<SuggestedWordInfo>,
sourceDictionaryOfRemovedWord: Dictionary?,
firstAndTypedWordEmptyInfos: List<SuggestedWordInfo?>,
putEmptyWordSuggestions: Runnable,
isCorrectionEnabled: Boolean,
wordComposer: WordComposer,
suggestionResults: SuggestionResults,
firstOccurrenceOfTypedWordInSuggestions: Int,
typedWordFirstOccurrenceWordInfo: SuggestedWordInfo?
): BooleanArray {
val consideredWord = if (trailingSingleQuotesCount > 0) typedWordString.substring(
0,
typedWordString.length - trailingSingleQuotesCount
) else typedWordString
val whitelistedWordInfo = getWhitelistedWordInfoOrNull(suggestionsContainer)
val whitelistedWord = whitelistedWordInfo?.mWord
val firstSuggestionInContainer = if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0]
// We allow auto-correction if whitelisting is not required or the word is whitelisted,
// or if the word had more than one char and was not suggested.
val allowsToBeAutoCorrected: Boolean
val scoreLimit = Settings.getInstance().current.mScoreLimitForAutocorrect
allowsToBeAutoCorrected =
if (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null || consideredWord.length > 1 && sourceDictionaryOfRemovedWord == null // more than 1 letter and not in dictionary
) {
true
} else if (firstSuggestionInContainer != null && typedWordString.isNotEmpty()) {
// maybe allow autocorrect, depending on scores and emptyWordSuggestions
putEmptyWordSuggestions.run()
val first = firstAndTypedWordEmptyInfos[0]
val typed = firstAndTypedWordEmptyInfos[1]
if (firstSuggestionInContainer.mScore > scoreLimit) {
true // suggestion has good score, allow
} else if (first == null) {
false // no autocorrect if first suggestion unknown in this context
} else if (typed == null) {
true // allow autocorrect if typed word not known in this context, todo: this may be too aggressive
} else {
// autocorrect if suggested word has clearly higher score for empty word suggestions
first.mScore - typed.mScore > 20
}
} else {
false
}
// If correction is not enabled, we never auto-correct. This is for example for when
// the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
val hasAutoCorrection =
if (!isCorrectionEnabled // todo: can some parts be moved to isCorrectionEnabled? e.g. keyboardIdMode only depends on input type
// i guess then not mAutoCorrectionEnabledPerUserSettings should be read, but rather some isAutocorrectEnabled()
// If the word does not allow to be auto-corrected, then we don't auto-correct.
|| !allowsToBeAutoCorrected // If we are doing prediction, then we never auto-correct of course
|| !wordComposer.isComposingWord // If we don't have suggestion results, we can't evaluate the first suggestion
// for auto-correction
|| suggestionResults.isEmpty() // If the word has digits, we never auto-correct because it's likely the word
// was type with a lot of care
|| wordComposer.hasDigits() // If the word is mostly caps, we never auto-correct because this is almost
// certainly intentional (and careful input)
|| wordComposer.isMostlyCaps // We never auto-correct when suggestions are resumed because it would be unexpected
|| wordComposer.isResumed // If we don't have a main dictionary, we never want to auto-correct. The reason
// for this is, the user may have a contact whose name happens to match a valid
// word in their language, and it will unexpectedly auto-correct. For example, if
// the user types in English with no dictionary and has a "Will" in their contact
// list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
// main dict => no auto-correct. Also, it would probably get obnoxious quickly.
// TODO: now that we have personalization, we may want to re-evaluate this decision
|| !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
) {
false
} else {
val firstSuggestion = suggestionResults.first()
if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
&& firstOccurrenceOfTypedWordInSuggestions != 0
) {
// mFirstSuggestionExceedsConfidenceThreshold is always set to false, so currently
// this branch is useless
return booleanArrayOf(true, true)
}
if (!AutoCorrectionUtils.suggestionExceedsThreshold(
firstSuggestion, consideredWord, mAutoCorrectionThreshold
)
) {
// todo: maybe also do something here depending on ngram context?
// Score is too low for autocorrect
return booleanArrayOf(true, false)
}
// We have a high score, so we need to check if this suggestion is in the correct
// form to allow auto-correcting to it in this language. For details of how this
// is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
// TODO: this should not have its own logic here but be handled by the dictionary.
val allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion)
if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > scoreLimit) {
// typed word is valid and has good score
// do not auto-correct if typed word is better match than first suggestion
val first = firstSuggestionInContainer ?: firstSuggestion
val dictLocale = mDictionaryFacilitator.currentLocale
if (first.mScore < scoreLimit) {
// don't allow if suggestion has too low score
return booleanArrayOf(true, false)
}
if (first.mSourceDict.mLocale !== typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale) {
// dict locale different -> return the better match
return booleanArrayOf(true, dictLocale === first.mSourceDict.mLocale)
}
// the score difference may need tuning, but so far it seems alright
val firstWordBonusScore =
((if (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) 20 else 0) // large bonus because it's wanted by dictionary
+ (if (StringUtils.isLowerCaseAscii(typedWordString)) 5 else 0) // small bonus because typically only ascii is typed (applies to latin keyboards only)
+ if (first.mScore > typedWordFirstOccurrenceWordInfo.mScore) 5 else 0) // small bonus if score is higher
putEmptyWordSuggestions.run()
val firstScoreForEmpty = if (firstAndTypedWordEmptyInfos[0] != null) firstAndTypedWordEmptyInfos[0]!!.mScore else 0
val typedScoreForEmpty = if (firstAndTypedWordEmptyInfos[1] != null) firstAndTypedWordEmptyInfos[1]!!.mScore else 0
if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) {
// return the better match for ngram context
// biased towards typed word
// but with bonus depending on
return booleanArrayOf(true, true)
}
false
} else {
allowed
}
}
return booleanArrayOf(allowsToBeAutoCorrected, hasAutoCorrection)
}
// Retrieves suggestions for the batch input
// and calls the callback function with the suggestions.
private fun getSuggestedWordsForBatchInput(
wordComposer: WordComposer,
ngramContext: NgramContext, keyboard: Keyboard,
settingsValuesForSuggestion: SettingsValuesForSuggestion,
inputStyle: Int, sequenceNumber: Int,
callback: OnGetSuggestedWordsCallback
) {
val suggestionResults = mDictionaryFacilitator.getSuggestionResults(
wordComposer.composedDataSnapshot, ngramContext, keyboard,
settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle
)
replaceSingleLetterFirstSuggestion(suggestionResults)
// For transforming words that don't come from a dictionary, because it's our best bet
val locale = mDictionaryFacilitator.mainLocale
val suggestionsContainer = ArrayList(suggestionResults)
val suggestionsCount = suggestionsContainer.size
val isFirstCharCapitalized = wordComposer.wasShiftedNoLock()
val isAllUpperCase = wordComposer.isAllUpperCase
if (isFirstCharCapitalized || isAllUpperCase) {
for (i in 0 until suggestionsCount) {
val wordInfo = suggestionsContainer[i]
val wordlocale = wordInfo!!.mSourceDict.mLocale
val transformedWordInfo = getTransformedSuggestedWordInfo(
wordInfo, wordlocale ?: locale, isAllUpperCase,
isFirstCharCapitalized, 0
)
suggestionsContainer[i] = transformedWordInfo
}
}
val rejected: SuggestedWordInfo?
if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION && suggestionsContainer.size > 1 && TextUtils.equals(
suggestionsContainer[0]!!.mWord,
wordComposer.rejectedBatchModeSuggestion
)
) {
rejected = suggestionsContainer.removeAt(0)
suggestionsContainer.add(1, rejected)
} else {
rejected = null
}
SuggestedWordInfo.removeDupsAndTypedWord(null, suggestionsContainer)
// For some reason some suggestions with MIN_VALUE are making their way here.
// TODO: Find a more robust way to detect distracters.
for (i in suggestionsContainer.indices.reversed()) {
if (suggestionsContainer[i]!!.mScore < SUPPRESS_SUGGEST_THRESHOLD) {
suggestionsContainer.removeAt(i)
}
}
// In the batch input mode, the most relevant suggested word should act as a "typed word"
// (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false).
// Note that because this method is never used to get predictions, there is no need to
// modify inputType such in getSuggestedWordsForNonBatchInput.
val pseudoTypedWordInfo = preferNextWordSuggestion(
if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0],
suggestionsContainer, getNextWordSuggestions(ngramContext, keyboard, inputStyle, settingsValuesForSuggestion), rejected
)
val suggestionsList = if (SuggestionStripView.DEBUG_SUGGESTIONS && suggestionsContainer.isNotEmpty()) {
getSuggestionsInfoListWithDebugInfo(suggestionResults.first().mWord, suggestionsContainer)
} else {
suggestionsContainer
}
callback.onGetSuggestedWords(
SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions, pseudoTypedWordInfo, true,
false, false, inputStyle, sequenceNumber)
)
}
/** reduces score of the first suggestion if next one is close and has more than a single letter */
private fun replaceSingleLetterFirstSuggestion(suggestionResults: SuggestionResults) {
if (suggestionResults.size < 2 || suggestionResults.first().mWord.length != 1) return
// suppress single letter suggestions if next suggestion is close and has more than one letter
val iterator: Iterator<SuggestedWordInfo> = suggestionResults.iterator()
val first = iterator.next()
val second = iterator.next()
if (second.mWord.length > 1 && second.mScore > 0.94 * first.mScore) {
suggestionResults.remove(first) // remove and re-add with lower score
suggestionResults.add(
SuggestedWordInfo(
first.mWord, first.mPrevWordsContext, (first.mScore * 0.93).toInt(),
first.mKindAndFlags, first.mSourceDict, first.mIndexOfTouchPointOfSecondWord, first.mAutoCommitFirstWordConfidence
)
)
if (DebugFlags.DEBUG_ENABLED) d(
TAG,
"reduced score of " + first.mWord + " from " + first.mScore + ", new first: " + suggestionResults.first().mWord + " (" + suggestionResults.first().mScore + ")"
)
}
}
// returns new pseudoTypedWordInfo, puts it in suggestionsContainer, modifies nextWordSuggestions
private fun preferNextWordSuggestion(
pseudoTypedWordInfo: SuggestedWordInfo?,
suggestionsContainer: ArrayList<SuggestedWordInfo?>,
nextWordSuggestions: SuggestionResults, rejected: SuggestedWordInfo?
): SuggestedWordInfo? {
if (pseudoTypedWordInfo == null || !Settings.getInstance().current.mUsePersonalizedDicts
|| pseudoTypedWordInfo.mSourceDict.mDictType != Dictionary.TYPE_MAIN || suggestionsContainer.size < 2
) return pseudoTypedWordInfo
nextWordSuggestions.removeAll { info: SuggestedWordInfo -> info.mScore < 170 } // we only want reasonably often typed words, value may require tuning
if (nextWordSuggestions.isEmpty()) return pseudoTypedWordInfo
// for each suggestion, check whether the word was already typed in this ngram context (i.e. is nextWordSuggestion)
for (suggestion in suggestionsContainer) {
if (suggestion!!.mScore < pseudoTypedWordInfo.mScore * 0.93) break // we only want reasonably good suggestions, value may require tuning
if (suggestion === rejected) continue // ignore rejected suggestions
for (nextWordSuggestion in nextWordSuggestions) {
if (nextWordSuggestion.mWord != suggestion.mWord) continue
// if we have a high scoring suggestion in next word suggestions, take it (because it's expected that user might want to type it again)
suggestionsContainer.remove(suggestion)
suggestionsContainer.add(0, suggestion)
if (DebugFlags.DEBUG_ENABLED) d(
TAG,
"replaced batch word $pseudoTypedWordInfo with $suggestion"
)
return suggestion
}
}
return pseudoTypedWordInfo
}
/** get suggestions based on the current ngram context, with an empty typed word (that's what next word suggestions do) */ // todo: integrate it into shouldBeAutoCorrected, remove putEmptySuggestions
// and make that thing more readable
private fun getNextWordSuggestions(
ngramContext: NgramContext,
keyboard: Keyboard, inputStyle: Int, settingsValuesForSuggestion: SettingsValuesForSuggestion
): SuggestionResults {
val cachedResults = nextWordSuggestionsCache[ngramContext]
if (cachedResults != null) return cachedResults
val newResults = mDictionaryFacilitator.getSuggestionResults(
ComposedData(InputPointers(1), false, ""),
ngramContext,
keyboard,
settingsValuesForSuggestion,
SESSION_ID_TYPING,
inputStyle
)
nextWordSuggestionsCache[ngramContext] = newResults
return newResults
}
companion object {
private val TAG: String = Suggest::class.java.simpleName
// Session id for
// {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}.
// We are sharing the same ID between typing and gesture to save RAM footprint.
const val SESSION_ID_TYPING = 0
const val SESSION_ID_GESTURE = 0
// Close to -2**31
private const val SUPPRESS_SUGGEST_THRESHOLD = -2000000000
private const val MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12
// TODO: should we add Finnish here?
// TODO: This should not be hardcoded here but be written in the dictionary header
private val sLanguageToMaximumAutoCorrectionWithSpaceLength = hashMapOf(Locale.GERMAN.language to MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN)
private fun getTransformedSuggestedWordInfoList(
wordComposer: WordComposer, results: SuggestionResults,
trailingSingleQuotesCount: Int, defaultLocale: Locale
): ArrayList<SuggestedWordInfo> {
val shouldMakeSuggestionsAllUpperCase = (wordComposer.isAllUpperCase
&& !wordComposer.isResumed)
val isOnlyFirstCharCapitalized = wordComposer.isOrWillBeOnlyFirstCharCapitalized
val suggestionsContainer = ArrayList(results)
val suggestionsCount = suggestionsContainer.size
if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase || 0 != trailingSingleQuotesCount) {
for (i in 0 until suggestionsCount) {
val wordInfo = suggestionsContainer[i]
val wordLocale = wordInfo!!.mSourceDict.mLocale
val transformedWordInfo = getTransformedSuggestedWordInfo(
wordInfo, wordLocale ?: defaultLocale,
shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized,
trailingSingleQuotesCount
)
suggestionsContainer[i] = transformedWordInfo
}
}
return suggestionsContainer
}
private fun getWhitelistedWordInfoOrNull(suggestions: List<SuggestedWordInfo>): SuggestedWordInfo? {
if (suggestions.isEmpty()) {
return null
}
val firstSuggestedWordInfo = suggestions[0]
return if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) {
null
} else firstSuggestedWordInfo
}
private fun getSuggestionsInfoListWithDebugInfo(
typedWord: String, suggestions: ArrayList<SuggestedWordInfo>
): ArrayList<SuggestedWordInfo> {
val suggestionsSize = suggestions.size
val suggestionsList = ArrayList<SuggestedWordInfo>(suggestionsSize)
for (cur in suggestions) {
addDebugInfo(cur, typedWord)
suggestionsList.add(cur)
}
return suggestionsList
}
private fun addDebugInfo(wordInfo: SuggestedWordInfo?, typedWord: String) {
val normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(typedWord, wordInfo.toString(), wordInfo!!.mScore)
val scoreInfoString: String
val dict = wordInfo.mSourceDict.mDictType + ":" + wordInfo.mSourceDict.mLocale
scoreInfoString = if (normalizedScore > 0) {
String.format(Locale.ROOT, "%d (%4.2f), %s", wordInfo.mScore, normalizedScore, dict)
} else {
String.format(Locale.ROOT, "%d, %s", wordInfo.mScore, dict)
}
wordInfo.debugString = scoreInfoString
}
/**
* Computes whether this suggestion should be blocked or not in this language
*
* This function implements a filter that avoids auto-correcting to suggestions that contain
* spaces that are above a certain language-dependent character limit. In languages like German
* where it's possible to concatenate many words, it often happens our dictionary does not
* have the longer words. In this case, we offer a lot of unhelpful suggestions that contain
* one or several spaces. Ideally we should understand what the user wants and display useful
* suggestions by improving the dictionary and possibly having some specific logic. Until
* that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell
* whether a suggestion is useful or not. So at least for the time being we block
* auto-correction when the suggestion is long and contains a space, which should avoid the
* worst damage.
* This function is implementing that filter. If the language enforces no such limit, then it
* always returns true. If the suggestion contains no space, it also returns true. Otherwise,
* it checks the length against the language-specific limit.
*
* @param info the suggestion info
* @return whether it's fine to auto-correct to this.
*/
private fun isAllowedByAutoCorrectionWithSpaceFilter(info: SuggestedWordInfo): Boolean {
val locale = info.mSourceDict.mLocale ?: return true
val maximumLengthForThisLanguage = sLanguageToMaximumAutoCorrectionWithSpaceLength[locale.language]
?: return true // This language does not enforce a maximum length to auto-correction
return (info.mWord.length <= maximumLengthForThisLanguage
|| -1 == info.mWord.indexOf(Constants.CODE_SPACE.toChar()))
}
private fun getTransformedSuggestedWordInfo(
wordInfo: SuggestedWordInfo?, locale: Locale?, isAllUpperCase: Boolean,
isOnlyFirstCharCapitalized: Boolean, trailingSingleQuotesCount: Int
): SuggestedWordInfo {
val sb = StringBuilder(wordInfo!!.mWord.length)
if (isAllUpperCase) {
sb.append(wordInfo.mWord.uppercase(locale!!))
} else if (isOnlyFirstCharCapitalized) {
sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale!!))
} else {
sb.append(wordInfo.mWord)
}
// Appending quotes is here to help people quote words. However, it's not helpful
// when they type words with quotes toward the end like "it's" or "didn't", where
// it's more likely the user missed the last character (or didn't type it yet).
val quotesToAppend = (trailingSingleQuotesCount
- if (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE.toChar())) 0 else 1)
for (i in quotesToAppend - 1 downTo 0) {
sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE)
}
return SuggestedWordInfo(
sb.toString(), wordInfo.mPrevWordsContext,
wordInfo.mScore, wordInfo.mKindAndFlags,
wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord,
wordInfo.mAutoCommitFirstWordConfidence
)
}
}
}