diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitator.java b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitator.java index 9c4540db6..e110fe9e1 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitator.java +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitator.java @@ -86,7 +86,7 @@ public interface DictionaryFacilitator { boolean isActive(); - Locale getMainLocale(); + @NonNull Locale getMainLocale(); // useful for multilingual typing Locale getCurrentLocale(); diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java index 149c95c5d..de39ec986 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java @@ -266,6 +266,7 @@ public class DictionaryFacilitatorImpl implements DictionaryFacilitator { } @Override + @NonNull public Locale getMainLocale() { return mDictionaryGroups.get(0).mLocale; } diff --git a/app/src/main/java/helium314/keyboard/latin/LatinIME.java b/app/src/main/java/helium314/keyboard/latin/LatinIME.java index 21f641841..620dc67b7 100644 --- a/app/src/main/java/helium314/keyboard/latin/LatinIME.java +++ b/app/src/main/java/helium314/keyboard/latin/LatinIME.java @@ -660,8 +660,8 @@ public class LatinIME extends InputMethodService implements KeyboardActionListen resetDictionaryFacilitatorIfNecessary(); } refreshPersonalizationDictionarySession(currentSettingsValues); - Suggest.nextWordSuggestionsCache.clear(); - mStatsUtilsManager.onLoadSettings(this /* context */, currentSettingsValues); + mInputLogic.mSuggest.clearNextWordSuggestionsCache(); + mStatsUtilsManager.onLoadSettings(this, currentSettingsValues); } private void refreshPersonalizationDictionarySession( diff --git a/app/src/main/java/helium314/keyboard/latin/Suggest.java b/app/src/main/java/helium314/keyboard/latin/Suggest.java deleted file mode 100644 index 917d03235..000000000 --- a/app/src/main/java/helium314/keyboard/latin/Suggest.java +++ /dev/null @@ -1,631 +0,0 @@ -/* - * Copyright (C) 2008 The Android Open Source Project - * modified - * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only - */ - -package helium314.keyboard.latin; - -import android.text.TextUtils; -import helium314.keyboard.latin.utils.Log; - -import helium314.keyboard.keyboard.Keyboard; -import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo; -import helium314.keyboard.latin.common.ComposedData; -import helium314.keyboard.latin.common.Constants; -import helium314.keyboard.latin.common.InputPointers; -import helium314.keyboard.latin.common.StringUtils; -import helium314.keyboard.latin.define.DebugFlags; -import helium314.keyboard.latin.settings.Settings; -import helium314.keyboard.latin.settings.SettingsValuesForSuggestion; -import helium314.keyboard.latin.suggestions.SuggestionStripView; -import helium314.keyboard.latin.utils.AutoCorrectionUtils; -import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; -import helium314.keyboard.latin.utils.SuggestionResults; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Locale; - -import static helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION; -import static helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION; - -import androidx.annotation.NonNull; -import androidx.annotation.Nullable; - -import kotlin.collections.CollectionsKt; - -/** - * This class loads a dictionary and provides a list of suggestions for a given sequence of - * characters. This includes corrections and completions. - */ -public final class Suggest { - public static final String TAG = Suggest.class.getSimpleName(); - - // Session id for - // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. - // We are sharing the same ID between typing and gesture to save RAM footprint. - public static final int SESSION_ID_TYPING = 0; - public static final int SESSION_ID_GESTURE = 0; - - // Close to -2**31 - private static final int SUPPRESS_SUGGEST_THRESHOLD = -2000000000; - - private final DictionaryFacilitator mDictionaryFacilitator; - - private static final int MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12; - private static final HashMap sLanguageToMaximumAutoCorrectionWithSpaceLength = - new HashMap<>(); - static { - // TODO: should we add Finnish here? - // TODO: This should not be hardcoded here but be written in the dictionary header - sLanguageToMaximumAutoCorrectionWithSpaceLength.put(Locale.GERMAN.getLanguage(), - MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN); - } - - // cleared whenever LatinIME.loadSettings is called, notably on changing layout and switching input fields - public static final HashMap nextWordSuggestionsCache = new HashMap<>(); - - private float mAutoCorrectionThreshold; - private float mPlausibilityThreshold; - - public Suggest(final DictionaryFacilitator dictionaryFacilitator) { - mDictionaryFacilitator = dictionaryFacilitator; - } - - /** - * Set the normalized-score threshold for a suggestion to be considered strong enough that we - * will auto-correct to this. - * @param threshold the threshold - */ - public void setAutoCorrectionThreshold(final float threshold) { - mAutoCorrectionThreshold = threshold; - } - - public interface OnGetSuggestedWordsCallback { - void onGetSuggestedWords(final SuggestedWords suggestedWords); - } - - public void getSuggestedWords(final WordComposer wordComposer, - final NgramContext ngramContext, final Keyboard keyboard, - final SettingsValuesForSuggestion settingsValuesForSuggestion, - final boolean isCorrectionEnabled, final int inputStyle, final int sequenceNumber, - final OnGetSuggestedWordsCallback callback) { - if (wordComposer.isBatchMode()) { - getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard, - settingsValuesForSuggestion, inputStyle, sequenceNumber, callback); - } else { - getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard, - settingsValuesForSuggestion, inputStyle, isCorrectionEnabled, - sequenceNumber, callback); - } - } - - private static ArrayList getTransformedSuggestedWordInfoList( - final WordComposer wordComposer, final SuggestionResults results, - final int trailingSingleQuotesCount, final Locale defaultLocale) { - final boolean shouldMakeSuggestionsAllUpperCase = wordComposer.isAllUpperCase() - && !wordComposer.isResumed(); - final boolean isOnlyFirstCharCapitalized = - wordComposer.isOrWillBeOnlyFirstCharCapitalized(); - - final ArrayList suggestionsContainer = new ArrayList<>(results); - final int suggestionsCount = suggestionsContainer.size(); - if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase - || 0 != trailingSingleQuotesCount) { - for (int i = 0; i < suggestionsCount; ++i) { - final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); - final Locale wordLocale = wordInfo.mSourceDict.mLocale; - final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( - wordInfo, null == wordLocale ? defaultLocale : wordLocale, - shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized, - trailingSingleQuotesCount); - suggestionsContainer.set(i, transformedWordInfo); - } - } - return suggestionsContainer; - } - - private static SuggestedWordInfo getWhitelistedWordInfoOrNull( - @NonNull final List suggestions) { - if (suggestions.isEmpty()) { - return null; - } - final SuggestedWordInfo firstSuggestedWordInfo = suggestions.get(0); - if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) { - return null; - } - return firstSuggestedWordInfo; - } - - // Retrieves suggestions for non-batch input (typing, recorrection, predictions...) - // and calls the callback function with the suggestions. - private void getSuggestedWordsForNonBatchInput(final WordComposer wordComposer, - final NgramContext ngramContext, final Keyboard keyboard, - final SettingsValuesForSuggestion settingsValuesForSuggestion, - final int inputStyleIfNotPrediction, final boolean isCorrectionEnabled, - final int sequenceNumber, final OnGetSuggestedWordsCallback callback) { - final String typedWordString = wordComposer.getTypedWord(); - final int trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWordString); - - final SuggestionResults suggestionResults = typedWordString.isEmpty() - ? getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion) - : mDictionaryFacilitator.getSuggestionResults( - wordComposer.getComposedDataSnapshot(), ngramContext, keyboard, - settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction); - final Locale locale = mDictionaryFacilitator.getMainLocale(); - final ArrayList suggestionsContainer = - getTransformedSuggestedWordInfoList(wordComposer, suggestionResults, - trailingSingleQuotesCount, locale); - - Dictionary sourceDictionaryOfRemovedWord = null; - // store the original SuggestedWordInfo for typed word, as it will be removed - // we may want to re-add it in case auto-correction happens, so that the original word can at least be selected - SuggestedWordInfo typedWordFirstOccurrenceWordInfo = null; - for (final SuggestedWordInfo info : suggestionsContainer) { - // Search for the best dictionary, defined as the first one with the highest match - // quality we can find. - if (typedWordString.equals(info.mWord)) { - // Use this source if the old match had lower quality than this match - sourceDictionaryOfRemovedWord = info.mSourceDict; - typedWordFirstOccurrenceWordInfo = info; - break; - } - } - - final int firstOccurrenceOfTypedWordInSuggestions = - SuggestedWordInfo.removeDupsAndTypedWord(typedWordString, suggestionsContainer); - final boolean resultsArePredictions = !wordComposer.isComposingWord(); - - // SuggestedWordInfos for suggestions for empty word (based only on previously typed words) - // done in a weird way to imitate what kotlin does with lazy - final ArrayList firstAndTypedWordEmptyInfos = new ArrayList<>(2); - - final boolean[] thoseTwo = shouldBeAutoCorrected( - trailingSingleQuotesCount, - typedWordString, - suggestionsContainer, - sourceDictionaryOfRemovedWord, - firstAndTypedWordEmptyInfos, - () -> { - final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0); - SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : suggestionResults.first(); - putEmptyWordSuggestions(firstAndTypedWordEmptyInfos, ngramContext, keyboard, - settingsValuesForSuggestion, inputStyleIfNotPrediction, first.getWord(), typedWordString); - }, - isCorrectionEnabled, - wordComposer, - suggestionResults, - firstOccurrenceOfTypedWordInSuggestions, - typedWordFirstOccurrenceWordInfo - ); - final boolean allowsToBeAutoCorrected = thoseTwo[0]; - final boolean hasAutoCorrection = thoseTwo[1]; - - final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString, - "" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE, - SuggestedWordInfo.KIND_TYPED, - null == sourceDictionaryOfRemovedWord ? Dictionary.DICTIONARY_USER_TYPED - : sourceDictionaryOfRemovedWord, - SuggestedWordInfo.NOT_AN_INDEX /* indexOfTouchPointOfSecondWord */, - SuggestedWordInfo.NOT_A_CONFIDENCE /* autoCommitFirstWordConfidence */); - if (!TextUtils.isEmpty(typedWordString)) { - suggestionsContainer.add(0, typedWordInfo); - } - - final ArrayList suggestionsList; - if (SuggestionStripView.DEBUG_SUGGESTIONS && !suggestionsContainer.isEmpty()) { - suggestionsList = getSuggestionsInfoListWithDebugInfo(typedWordString, suggestionsContainer); - } else { - suggestionsList = suggestionsContainer; - } - - final int inputStyle; - if (resultsArePredictions) { - inputStyle = suggestionResults.mIsBeginningOfSentence - ? SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION - : SuggestedWords.INPUT_STYLE_PREDICTION; - } else { - inputStyle = inputStyleIfNotPrediction; - } - - final boolean isTypedWordValid = firstOccurrenceOfTypedWordInSuggestions > -1 - || (!resultsArePredictions && !allowsToBeAutoCorrected); - - if (hasAutoCorrection) { - // make sure typed word is shown, so user is able to override incoming autocorrection - if (typedWordFirstOccurrenceWordInfo != null) { - if (SuggestionStripView.DEBUG_SUGGESTIONS) - addDebugInfo(typedWordFirstOccurrenceWordInfo, typedWordString); - suggestionsList.add(2, typedWordFirstOccurrenceWordInfo); - } else { - suggestionsList.add(2, new SuggestedWordInfo(typedWordString, "", 0, SuggestedWordInfo.KIND_TYPED, - Dictionary.DICTIONARY_USER_TYPED, SuggestedWordInfo.NOT_AN_INDEX, SuggestedWordInfo.NOT_A_CONFIDENCE)); - } - } - - callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions, - typedWordInfo, isTypedWordValid, hasAutoCorrection, false, inputStyle, sequenceNumber)); - } - - // annoyingly complicated thing to avoid getting emptyWordSuggestions more than once - // todo: now with the cache just remove it... - // and best convert the class to kotlin, that should make it much more readable - /** puts word infos for suggestions with an empty word in [infos], based on previously typed words */ - private ArrayList putEmptyWordSuggestions(ArrayList infos, NgramContext ngramContext, - Keyboard keyboard, SettingsValuesForSuggestion settingsValuesForSuggestion, - int inputStyleIfNotPrediction, String firstSuggestionInContainer, String typedWordString) { - if (infos.size() != 0) return infos; - infos.add(null); - infos.add(null); - final SuggestionResults emptyWordSuggestions = getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion); - final SuggestedWordInfo nextWordSuggestionInfoForFirstSuggestionInContainer = - CollectionsKt.firstOrNull(emptyWordSuggestions, (word) -> word.mWord.equals(firstSuggestionInContainer)); - final SuggestedWordInfo nextWordSuggestionInfoForTypedWord = - CollectionsKt.firstOrNull(emptyWordSuggestions, (word) -> word.mWord.equals(typedWordString)); - infos.add(nextWordSuggestionInfoForFirstSuggestionInContainer); - infos.add(nextWordSuggestionInfoForTypedWord); - return infos; - } - - // returns [allowsToBeAutoCorrected, hasAutoCorrection] - boolean[] shouldBeAutoCorrected( - final int trailingSingleQuotesCount, - final String typedWordString, - final List suggestionsContainer, - final Dictionary sourceDictionaryOfRemovedWord, - final List firstAndTypedWordEmptyInfos, - final Runnable putEmptyWordSuggestions, - final boolean isCorrectionEnabled, - final WordComposer wordComposer, - final SuggestionResults suggestionResults, - final int firstOccurrenceOfTypedWordInSuggestions, - final SuggestedWordInfo typedWordFirstOccurrenceWordInfo - ) { - final String consideredWord = trailingSingleQuotesCount > 0 - ? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount) - : typedWordString; - - final SuggestedWordInfo whitelistedWordInfo = - getWhitelistedWordInfoOrNull(suggestionsContainer); - final String whitelistedWord = whitelistedWordInfo == null - ? null : whitelistedWordInfo.mWord; - final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0); - - // We allow auto-correction if whitelisting is not required or the word is whitelisted, - // or if the word had more than one char and was not suggested. - final boolean allowsToBeAutoCorrected; - final int scoreLimit = Settings.getInstance().getCurrent().mScoreLimitForAutocorrect; - if ((SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null) - || (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null)) // more than 1 letter and not in dictionary - ) { - allowsToBeAutoCorrected = true; - } else if (firstSuggestionInContainer != null && !typedWordString.isEmpty()) { - // maybe allow autocorrect, depending on scores and emptyWordSuggestions - putEmptyWordSuggestions.run(); - final SuggestedWordInfo first = firstAndTypedWordEmptyInfos.get(0); - final SuggestedWordInfo typed = firstAndTypedWordEmptyInfos.get(1); - if (firstSuggestionInContainer.mScore > scoreLimit) { - allowsToBeAutoCorrected = true; // suggestion has good score, allow - } else if (first == null) { - allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context - } else if (typed == null) { - allowsToBeAutoCorrected = true; // allow autocorrect if typed word not known in this context, todo: this may be too aggressive - } else { - // autocorrect if suggested word has clearly higher score for empty word suggestions - allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20; - } - } else { - allowsToBeAutoCorrected = false; - } - - final boolean hasAutoCorrection; - // If correction is not enabled, we never auto-correct. This is for example for when - // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct. - if (!isCorrectionEnabled - // todo: can some parts be moved to isCorrectionEnabled? e.g. keyboardIdMode only depends on input type - // i guess then not mAutoCorrectionEnabledPerUserSettings should be read, but rather some isAutocorrectEnabled() - // If the word does not allow to be auto-corrected, then we don't auto-correct. - || !allowsToBeAutoCorrected - // If we are doing prediction, then we never auto-correct of course - || !wordComposer.isComposingWord() - // If we don't have suggestion results, we can't evaluate the first suggestion - // for auto-correction - || suggestionResults.isEmpty() - // If the word has digits, we never auto-correct because it's likely the word - // was type with a lot of care - || wordComposer.hasDigits() - // If the word is mostly caps, we never auto-correct because this is almost - // certainly intentional (and careful input) - || wordComposer.isMostlyCaps() - // We never auto-correct when suggestions are resumed because it would be unexpected - || wordComposer.isResumed() - // If we don't have a main dictionary, we never want to auto-correct. The reason - // for this is, the user may have a contact whose name happens to match a valid - // word in their language, and it will unexpectedly auto-correct. For example, if - // the user types in English with no dictionary and has a "Will" in their contact - // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no - // main dict => no auto-correct. Also, it would probably get obnoxious quickly. - // TODO: now that we have personalization, we may want to re-evaluate this decision - || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()) { - hasAutoCorrection = false; - } else { - final SuggestedWordInfo firstSuggestion = suggestionResults.first(); - if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold - && firstOccurrenceOfTypedWordInSuggestions != 0) { - // mFirstSuggestionExceedsConfidenceThreshold is always set to false, so currently - // this branch is useless - return new boolean[]{ true, true }; - } - if (!AutoCorrectionUtils.suggestionExceedsThreshold( - firstSuggestion, consideredWord, mAutoCorrectionThreshold)) { - // todo: maybe also do something here depending on ngram context? - // Score is too low for autocorrect - return new boolean[]{ true, false }; - } - // We have a high score, so we need to check if this suggestion is in the correct - // form to allow auto-correcting to it in this language. For details of how this - // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter. - // TODO: this should not have its own logic here but be handled by the dictionary. - final boolean allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion); - if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > scoreLimit) { - // typed word is valid and has good score - // do not auto-correct if typed word is better match than first suggestion - final SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : firstSuggestion; - final Locale dictLocale = mDictionaryFacilitator.getCurrentLocale(); - - if (first.mScore < scoreLimit) { - // don't allow if suggestion has too low score - return new boolean[]{ true, false }; - } - if (first.mSourceDict.mLocale != typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale) { - // dict locale different -> return the better match - return new boolean[]{ true, dictLocale == first.mSourceDict.mLocale }; - } - // the score difference may need tuning, but so far it seems alright - final int firstWordBonusScore = (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST) ? 20 : 0) // large bonus because it's wanted by dictionary - + (StringUtils.isLowerCaseAscii(typedWordString) ? 5 : 0) // small bonus because typically only ascii is typed (applies to latin keyboards only) - + (first.mScore > typedWordFirstOccurrenceWordInfo.mScore ? 5 : 0); // small bonus if score is higher - putEmptyWordSuggestions.run(); - int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0; - int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0; - if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) { - // return the better match for ngram context - // biased towards typed word - // but with bonus depending on - return new boolean[]{ true, true }; - } - hasAutoCorrection = false; - } else { - hasAutoCorrection = allowed; - } - } - return new boolean[]{ allowsToBeAutoCorrected, hasAutoCorrection }; - } - - // Retrieves suggestions for the batch input - // and calls the callback function with the suggestions. - private void getSuggestedWordsForBatchInput(final WordComposer wordComposer, - final NgramContext ngramContext, final Keyboard keyboard, - final SettingsValuesForSuggestion settingsValuesForSuggestion, - final int inputStyle, final int sequenceNumber, - final OnGetSuggestedWordsCallback callback) { - final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults( - wordComposer.getComposedDataSnapshot(), ngramContext, keyboard, - settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle); - replaceSingleLetterFirstSuggestion(suggestionResults); - - // For transforming words that don't come from a dictionary, because it's our best bet - final Locale locale = mDictionaryFacilitator.getMainLocale(); - final ArrayList suggestionsContainer = new ArrayList<>(suggestionResults); - final int suggestionsCount = suggestionsContainer.size(); - final boolean isFirstCharCapitalized = wordComposer.wasShiftedNoLock(); - final boolean isAllUpperCase = wordComposer.isAllUpperCase(); - if (isFirstCharCapitalized || isAllUpperCase) { - for (int i = 0; i < suggestionsCount; ++i) { - final SuggestedWordInfo wordInfo = suggestionsContainer.get(i); - final Locale wordlocale = wordInfo.mSourceDict.mLocale; - final SuggestedWordInfo transformedWordInfo = getTransformedSuggestedWordInfo( - wordInfo, null == wordlocale ? locale : wordlocale, isAllUpperCase, - isFirstCharCapitalized, 0 /* trailingSingleQuotesCount */); - suggestionsContainer.set(i, transformedWordInfo); - } - } - - final SuggestedWordInfo rejected; - if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION - && suggestionsContainer.size() > 1 - && TextUtils.equals(suggestionsContainer.get(0).mWord, - wordComposer.getRejectedBatchModeSuggestion())) { - rejected = suggestionsContainer.remove(0); - suggestionsContainer.add(1, rejected); - } else { - rejected = null; - } - SuggestedWordInfo.removeDupsAndTypedWord(null /* typedWord */, suggestionsContainer); - - // For some reason some suggestions with MIN_VALUE are making their way here. - // TODO: Find a more robust way to detect distracters. - for (int i = suggestionsContainer.size() - 1; i >= 0; --i) { - if (suggestionsContainer.get(i).mScore < SUPPRESS_SUGGEST_THRESHOLD) { - suggestionsContainer.remove(i); - } - } - - // In the batch input mode, the most relevant suggested word should act as a "typed word" - // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). - // Note that because this method is never used to get predictions, there is no need to - // modify inputType such in getSuggestedWordsForNonBatchInput. - final SuggestedWordInfo pseudoTypedWordInfo = preferNextWordSuggestion(suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0), - suggestionsContainer, getNextWordSuggestions(ngramContext, keyboard, inputStyle, settingsValuesForSuggestion), rejected); - - final ArrayList suggestionsList; - if (SuggestionStripView.DEBUG_SUGGESTIONS && !suggestionsContainer.isEmpty()) { - suggestionsList = getSuggestionsInfoListWithDebugInfo(suggestionResults.first().mWord, suggestionsContainer); - } else { - suggestionsList = suggestionsContainer; - } - - callback.onGetSuggestedWords(new SuggestedWords(suggestionsList, - suggestionResults.mRawSuggestions, - pseudoTypedWordInfo, - true /* typedWordValid */, - false /* willAutoCorrect */, - false /* isObsoleteSuggestions */, - inputStyle, sequenceNumber)); - } - - /** reduces score of the first suggestion if next one is close and has more than a single letter */ - private void replaceSingleLetterFirstSuggestion(final SuggestionResults suggestionResults) { - if (suggestionResults.size() < 2 || suggestionResults.first().mWord.length() != 1) return; - // suppress single letter suggestions if next suggestion is close and has more than one letter - final Iterator iterator = suggestionResults.iterator(); - final SuggestedWordInfo first = iterator.next(); - final SuggestedWordInfo second = iterator.next(); - if (second.mWord.length() > 1 && second.mScore > 0.94 * first.mScore) { - suggestionResults.remove(first); // remove and re-add with lower score - suggestionResults.add(new SuggestedWordInfo(first.mWord, first.mPrevWordsContext, (int) (first.mScore * 0.93), - first.mKindAndFlags, first.mSourceDict, first.mIndexOfTouchPointOfSecondWord, first.mAutoCommitFirstWordConfidence)); - if (DebugFlags.DEBUG_ENABLED) - Log.d(TAG, "reduced score of "+first.mWord+" from "+first.mScore +", new first: "+suggestionResults.first().mWord+" ("+suggestionResults.first().mScore+")"); - } - } - - // returns new pseudoTypedWordInfo, puts it in suggestionsContainer, modifies nextWordSuggestions - @Nullable - private SuggestedWordInfo preferNextWordSuggestion(@Nullable final SuggestedWordInfo pseudoTypedWordInfo, - @NonNull final ArrayList suggestionsContainer, - @NonNull final SuggestionResults nextWordSuggestions, @Nullable final SuggestedWordInfo rejected) { - if (pseudoTypedWordInfo == null - || !Settings.getInstance().getCurrent().mUsePersonalizedDicts - || !pseudoTypedWordInfo.mSourceDict.mDictType.equals(Dictionary.TYPE_MAIN) - || suggestionsContainer.size() < 2 - ) - return pseudoTypedWordInfo; - CollectionsKt.removeAll(nextWordSuggestions, (info) -> info.mScore < 170); // we only want reasonably often typed words, value may require tuning - if (nextWordSuggestions.isEmpty()) - return pseudoTypedWordInfo; - // for each suggestion, check whether the word was already typed in this ngram context (i.e. is nextWordSuggestion) - for (final SuggestedWordInfo suggestion : suggestionsContainer) { - if (suggestion.mScore < pseudoTypedWordInfo.mScore * 0.93) break; // we only want reasonably good suggestions, value may require tuning - if (suggestion == rejected) continue; // ignore rejected suggestions - for (final SuggestedWordInfo nextWordSuggestion : nextWordSuggestions) { - if (!nextWordSuggestion.mWord.equals(suggestion.mWord)) - continue; - // if we have a high scoring suggestion in next word suggestions, take it (because it's expected that user might want to type it again) - suggestionsContainer.remove(suggestion); - suggestionsContainer.add(0, suggestion); - if (DebugFlags.DEBUG_ENABLED) - Log.d(TAG, "replaced batch word "+pseudoTypedWordInfo+" with "+suggestion); - return suggestion; - } - } - return pseudoTypedWordInfo; - } - - private static ArrayList getSuggestionsInfoListWithDebugInfo( - final String typedWord, final ArrayList suggestions) { - final int suggestionsSize = suggestions.size(); - final ArrayList suggestionsList = new ArrayList<>(suggestionsSize); - for (final SuggestedWordInfo cur : suggestions) { - addDebugInfo(cur, typedWord); - suggestionsList.add(cur); - } - return suggestionsList; - } - - private static void addDebugInfo(final SuggestedWordInfo wordInfo, final String typedWord) { - final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(typedWord, wordInfo.toString(), wordInfo.mScore); - final String scoreInfoString; - String dict = wordInfo.mSourceDict.mDictType + ":" + wordInfo.mSourceDict.mLocale; - if (normalizedScore > 0) { - scoreInfoString = String.format(Locale.ROOT, "%d (%4.2f), %s", wordInfo.mScore, normalizedScore, dict); - } else { - scoreInfoString = String.format(Locale.ROOT, "%d, %s", wordInfo.mScore, dict); - } - wordInfo.setDebugString(scoreInfoString); - } - - /** - * Computes whether this suggestion should be blocked or not in this language - * - * This function implements a filter that avoids auto-correcting to suggestions that contain - * spaces that are above a certain language-dependent character limit. In languages like German - * where it's possible to concatenate many words, it often happens our dictionary does not - * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain - * one or several spaces. Ideally we should understand what the user wants and display useful - * suggestions by improving the dictionary and possibly having some specific logic. Until - * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell - * whether a suggestion is useful or not. So at least for the time being we block - * auto-correction when the suggestion is long and contains a space, which should avoid the - * worst damage. - * This function is implementing that filter. If the language enforces no such limit, then it - * always returns true. If the suggestion contains no space, it also returns true. Otherwise, - * it checks the length against the language-specific limit. - * - * @param info the suggestion info - * @return whether it's fine to auto-correct to this. - */ - private static boolean isAllowedByAutoCorrectionWithSpaceFilter(final SuggestedWordInfo info) { - final Locale locale = info.mSourceDict.mLocale; - if (null == locale) { - return true; - } - final Integer maximumLengthForThisLanguage = - sLanguageToMaximumAutoCorrectionWithSpaceLength.get(locale.getLanguage()); - if (null == maximumLengthForThisLanguage) { - // This language does not enforce a maximum length to auto-correction - return true; - } - return info.mWord.length() <= maximumLengthForThisLanguage - || -1 == info.mWord.indexOf(Constants.CODE_SPACE); - } - - /* package for test */ static SuggestedWordInfo getTransformedSuggestedWordInfo( - final SuggestedWordInfo wordInfo, final Locale locale, final boolean isAllUpperCase, - final boolean isOnlyFirstCharCapitalized, final int trailingSingleQuotesCount) { - final StringBuilder sb = new StringBuilder(wordInfo.mWord.length()); - if (isAllUpperCase) { - sb.append(wordInfo.mWord.toUpperCase(locale)); - } else if (isOnlyFirstCharCapitalized) { - sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale)); - } else { - sb.append(wordInfo.mWord); - } - // Appending quotes is here to help people quote words. However, it's not helpful - // when they type words with quotes toward the end like "it's" or "didn't", where - // it's more likely the user missed the last character (or didn't type it yet). - final int quotesToAppend = trailingSingleQuotesCount - - (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE) ? 0 : 1); - for (int i = quotesToAppend - 1; i >= 0; --i) { - sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE); - } - return new SuggestedWordInfo(sb.toString(), wordInfo.mPrevWordsContext, - wordInfo.mScore, wordInfo.mKindAndFlags, - wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, - wordInfo.mAutoCommitFirstWordConfidence); - } - - /** get suggestions based on the current ngram context, with an empty typed word (that's what next word suggestions do) */ - // todo: integrate it into shouldBeAutoCorrected, remove putEmptySuggestions - // and make that thing more readable - private SuggestionResults getNextWordSuggestions(final NgramContext ngramContext, - final Keyboard keyboard, final int inputStyle, final SettingsValuesForSuggestion settingsValuesForSuggestion - ) { - final SuggestionResults cachedResults = nextWordSuggestionsCache.get(ngramContext); - if (cachedResults != null) - return cachedResults; - final SuggestionResults newResults = mDictionaryFacilitator.getSuggestionResults( - new ComposedData(new InputPointers(1), false, ""), - ngramContext, - keyboard, - settingsValuesForSuggestion, - SESSION_ID_TYPING, - inputStyle - ); - nextWordSuggestionsCache.put(ngramContext, newResults); - return newResults; - } -} diff --git a/app/src/main/java/helium314/keyboard/latin/Suggest.kt b/app/src/main/java/helium314/keyboard/latin/Suggest.kt new file mode 100644 index 000000000..0bef92283 --- /dev/null +++ b/app/src/main/java/helium314/keyboard/latin/Suggest.kt @@ -0,0 +1,581 @@ +/* + * Copyright (C) 2008 The Android Open Source Project + * modified + * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only + */ +package helium314.keyboard.latin + +import android.text.TextUtils +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils +import helium314.keyboard.keyboard.Keyboard +import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo +import helium314.keyboard.latin.common.ComposedData +import helium314.keyboard.latin.common.Constants +import helium314.keyboard.latin.common.InputPointers +import helium314.keyboard.latin.common.StringUtils +import helium314.keyboard.latin.define.DebugFlags +import helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION +import helium314.keyboard.latin.define.DecoderSpecificConstants.SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION +import helium314.keyboard.latin.settings.Settings +import helium314.keyboard.latin.settings.SettingsValuesForSuggestion +import helium314.keyboard.latin.suggestions.SuggestionStripView +import helium314.keyboard.latin.utils.AutoCorrectionUtils +import helium314.keyboard.latin.utils.Log.d +import helium314.keyboard.latin.utils.SuggestionResults +import java.util.Locale + +/** + * This class loads a dictionary and provides a list of suggestions for a given sequence of + * characters. This includes corrections and completions. + */ +class Suggest(private val mDictionaryFacilitator: DictionaryFacilitator) { + private var mAutoCorrectionThreshold = 0f + private val mPlausibilityThreshold = 0f + private val nextWordSuggestionsCache = HashMap() + + // cache cleared whenever LatinIME.loadSettings is called, notably on changing layout and switching input fields + fun clearNextWordSuggestionsCache() = nextWordSuggestionsCache.clear() + + /** + * Set the normalized-score threshold for a suggestion to be considered strong enough that we + * will auto-correct to this. + * @param threshold the threshold + */ + fun setAutoCorrectionThreshold(threshold: Float) { + mAutoCorrectionThreshold = threshold + } + + interface OnGetSuggestedWordsCallback { + fun onGetSuggestedWords(suggestedWords: SuggestedWords?) + } + + fun getSuggestedWords(wordComposer: WordComposer, ngramContext: NgramContext, keyboard: Keyboard, + settingsValuesForSuggestion: SettingsValuesForSuggestion, isCorrectionEnabled: Boolean, + inputStyle: Int, sequenceNumber: Int, callback: OnGetSuggestedWordsCallback) { + if (wordComposer.isBatchMode) { + getSuggestedWordsForBatchInput(wordComposer, ngramContext, keyboard, + settingsValuesForSuggestion, inputStyle, sequenceNumber, callback) + } else { + getSuggestedWordsForNonBatchInput(wordComposer, ngramContext, keyboard, settingsValuesForSuggestion, + inputStyle, isCorrectionEnabled, sequenceNumber, callback) + } + } + + // Retrieves suggestions for non-batch input (typing, recorrection, predictions...) + // and calls the callback function with the suggestions. + private fun getSuggestedWordsForNonBatchInput(wordComposer: WordComposer, ngramContext: NgramContext, keyboard: Keyboard, + settingsValuesForSuggestion: SettingsValuesForSuggestion, inputStyleIfNotPrediction: Int, + isCorrectionEnabled: Boolean, sequenceNumber: Int, callback: OnGetSuggestedWordsCallback) { + val typedWordString = wordComposer.typedWord + val trailingSingleQuotesCount = StringUtils.getTrailingSingleQuotesCount(typedWordString) + val suggestionResults = if (typedWordString.isEmpty()) + getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion) + else mDictionaryFacilitator.getSuggestionResults(wordComposer.composedDataSnapshot, ngramContext, keyboard, + settingsValuesForSuggestion, SESSION_ID_TYPING, inputStyleIfNotPrediction) + val locale = mDictionaryFacilitator.mainLocale + val suggestionsContainer = getTransformedSuggestedWordInfoList(wordComposer, suggestionResults, trailingSingleQuotesCount, locale) + var sourceDictionaryOfRemovedWord: Dictionary? = null + // store the original SuggestedWordInfo for typed word, as it will be removed + // we may want to re-add it in case auto-correction happens, so that the original word can at least be selected + var typedWordFirstOccurrenceWordInfo: SuggestedWordInfo? = null + for (info in suggestionsContainer) { + // Search for the best dictionary, defined as the first one with the highest match + // quality we can find. + if (typedWordString == info.mWord) { + // Use this source if the old match had lower quality than this match + sourceDictionaryOfRemovedWord = info.mSourceDict + typedWordFirstOccurrenceWordInfo = info + break + } + } + val firstOccurrenceOfTypedWordInSuggestions = SuggestedWordInfo.removeDupsAndTypedWord(typedWordString, suggestionsContainer) + val resultsArePredictions = !wordComposer.isComposingWord + + // SuggestedWordInfos for suggestions for empty word (based only on previously typed words) + // done in a weird way to imitate what kotlin does with lazy + val firstAndTypedWordEmptyInfos = ArrayList(2) + val thoseTwo = shouldBeAutoCorrected( // todo: do it better... + trailingSingleQuotesCount, + typedWordString, + suggestionsContainer, + sourceDictionaryOfRemovedWord, + firstAndTypedWordEmptyInfos, + { + val firstSuggestionInContainer = + if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0] + val first = + firstSuggestionInContainer ?: suggestionResults.first() + putEmptyWordSuggestions( + firstAndTypedWordEmptyInfos, ngramContext, keyboard, + settingsValuesForSuggestion, inputStyleIfNotPrediction, first.word, typedWordString + ) + }, + isCorrectionEnabled, + wordComposer, + suggestionResults, + firstOccurrenceOfTypedWordInSuggestions, + typedWordFirstOccurrenceWordInfo + ) + val allowsToBeAutoCorrected = thoseTwo[0] + val hasAutoCorrection = thoseTwo[1] + val typedWordInfo = SuggestedWordInfo(typedWordString, "", SuggestedWordInfo.MAX_SCORE, + SuggestedWordInfo.KIND_TYPED, sourceDictionaryOfRemovedWord ?: Dictionary.DICTIONARY_USER_TYPED, + SuggestedWordInfo.NOT_AN_INDEX , SuggestedWordInfo.NOT_A_CONFIDENCE) + if (!TextUtils.isEmpty(typedWordString)) { + suggestionsContainer.add(0, typedWordInfo) + } + val suggestionsList = if (SuggestionStripView.DEBUG_SUGGESTIONS && suggestionsContainer.isNotEmpty()) { + getSuggestionsInfoListWithDebugInfo(typedWordString, suggestionsContainer) + } else { + suggestionsContainer + } + val inputStyle = if (resultsArePredictions) { + if (suggestionResults.mIsBeginningOfSentence) SuggestedWords.INPUT_STYLE_BEGINNING_OF_SENTENCE_PREDICTION + else SuggestedWords.INPUT_STYLE_PREDICTION + } else { + inputStyleIfNotPrediction + } + val isTypedWordValid = firstOccurrenceOfTypedWordInSuggestions > -1 || !resultsArePredictions && !allowsToBeAutoCorrected + if (hasAutoCorrection) { + // make sure typed word is shown, so user is able to override incoming autocorrection + if (typedWordFirstOccurrenceWordInfo != null) { + if (SuggestionStripView.DEBUG_SUGGESTIONS) addDebugInfo(typedWordFirstOccurrenceWordInfo, typedWordString) + suggestionsList.add(2, typedWordFirstOccurrenceWordInfo) + } else { + suggestionsList.add(2, + SuggestedWordInfo(typedWordString, "", 0, SuggestedWordInfo.KIND_TYPED, + Dictionary.DICTIONARY_USER_TYPED, SuggestedWordInfo.NOT_AN_INDEX, SuggestedWordInfo.NOT_A_CONFIDENCE) + ) + } + } + callback.onGetSuggestedWords(SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions, + typedWordInfo, isTypedWordValid, hasAutoCorrection, false, inputStyle, sequenceNumber)) + } + + // annoyingly complicated thing to avoid getting emptyWordSuggestions more than once + // todo: now with the cache just remove it... + // and best convert the class to kotlin, that should make it much more readable + /** puts word infos for suggestions with an empty word in [infos], based on previously typed words */ + private fun putEmptyWordSuggestions( + infos: ArrayList, ngramContext: NgramContext, + keyboard: Keyboard, settingsValuesForSuggestion: SettingsValuesForSuggestion, + inputStyleIfNotPrediction: Int, firstSuggestionInContainer: String, typedWordString: String + ): ArrayList { + if (infos.size != 0) return infos + infos.add(null) + infos.add(null) + val emptyWordSuggestions = getNextWordSuggestions(ngramContext, keyboard, inputStyleIfNotPrediction, settingsValuesForSuggestion) + val nextWordSuggestionInfoForFirstSuggestionInContainer = + emptyWordSuggestions.firstOrNull { word: SuggestedWordInfo -> word.mWord == firstSuggestionInContainer } + val nextWordSuggestionInfoForTypedWord = + emptyWordSuggestions.firstOrNull { word: SuggestedWordInfo -> word.mWord == typedWordString } + infos.add(nextWordSuggestionInfoForFirstSuggestionInContainer) + infos.add(nextWordSuggestionInfoForTypedWord) + return infos + } + + // returns [allowsToBeAutoCorrected, hasAutoCorrection] + // public for testing + fun shouldBeAutoCorrected( + trailingSingleQuotesCount: Int, + typedWordString: String, + suggestionsContainer: List, + sourceDictionaryOfRemovedWord: Dictionary?, + firstAndTypedWordEmptyInfos: List, + putEmptyWordSuggestions: Runnable, + isCorrectionEnabled: Boolean, + wordComposer: WordComposer, + suggestionResults: SuggestionResults, + firstOccurrenceOfTypedWordInSuggestions: Int, + typedWordFirstOccurrenceWordInfo: SuggestedWordInfo? + ): BooleanArray { + val consideredWord = if (trailingSingleQuotesCount > 0) typedWordString.substring( + 0, + typedWordString.length - trailingSingleQuotesCount + ) else typedWordString + val whitelistedWordInfo = getWhitelistedWordInfoOrNull(suggestionsContainer) + val whitelistedWord = whitelistedWordInfo?.mWord + val firstSuggestionInContainer = if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0] + + // We allow auto-correction if whitelisting is not required or the word is whitelisted, + // or if the word had more than one char and was not suggested. + val allowsToBeAutoCorrected: Boolean + val scoreLimit = Settings.getInstance().current.mScoreLimitForAutocorrect + allowsToBeAutoCorrected = + if (SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null || consideredWord.length > 1 && sourceDictionaryOfRemovedWord == null // more than 1 letter and not in dictionary + ) { + true + } else if (firstSuggestionInContainer != null && typedWordString.isNotEmpty()) { + // maybe allow autocorrect, depending on scores and emptyWordSuggestions + putEmptyWordSuggestions.run() + val first = firstAndTypedWordEmptyInfos[0] + val typed = firstAndTypedWordEmptyInfos[1] + if (firstSuggestionInContainer.mScore > scoreLimit) { + true // suggestion has good score, allow + } else if (first == null) { + false // no autocorrect if first suggestion unknown in this context + } else if (typed == null) { + true // allow autocorrect if typed word not known in this context, todo: this may be too aggressive + } else { + // autocorrect if suggested word has clearly higher score for empty word suggestions + first.mScore - typed.mScore > 20 + } + } else { + false + } + // If correction is not enabled, we never auto-correct. This is for example for when + // the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct. + val hasAutoCorrection = + if (!isCorrectionEnabled // todo: can some parts be moved to isCorrectionEnabled? e.g. keyboardIdMode only depends on input type + // i guess then not mAutoCorrectionEnabledPerUserSettings should be read, but rather some isAutocorrectEnabled() + // If the word does not allow to be auto-corrected, then we don't auto-correct. + || !allowsToBeAutoCorrected // If we are doing prediction, then we never auto-correct of course + || !wordComposer.isComposingWord // If we don't have suggestion results, we can't evaluate the first suggestion + // for auto-correction + || suggestionResults.isEmpty() // If the word has digits, we never auto-correct because it's likely the word + // was type with a lot of care + || wordComposer.hasDigits() // If the word is mostly caps, we never auto-correct because this is almost + // certainly intentional (and careful input) + || wordComposer.isMostlyCaps // We never auto-correct when suggestions are resumed because it would be unexpected + || wordComposer.isResumed // If we don't have a main dictionary, we never want to auto-correct. The reason + // for this is, the user may have a contact whose name happens to match a valid + // word in their language, and it will unexpectedly auto-correct. For example, if + // the user types in English with no dictionary and has a "Will" in their contact + // list, "will" would always auto-correct to "Will" which is unwanted. Hence, no + // main dict => no auto-correct. Also, it would probably get obnoxious quickly. + // TODO: now that we have personalization, we may want to re-evaluate this decision + || !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary() + ) { + false + } else { + val firstSuggestion = suggestionResults.first() + if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold + && firstOccurrenceOfTypedWordInSuggestions != 0 + ) { + // mFirstSuggestionExceedsConfidenceThreshold is always set to false, so currently + // this branch is useless + return booleanArrayOf(true, true) + } + if (!AutoCorrectionUtils.suggestionExceedsThreshold( + firstSuggestion, consideredWord, mAutoCorrectionThreshold + ) + ) { + // todo: maybe also do something here depending on ngram context? + // Score is too low for autocorrect + return booleanArrayOf(true, false) + } + // We have a high score, so we need to check if this suggestion is in the correct + // form to allow auto-correcting to it in this language. For details of how this + // is determined, see #isAllowedByAutoCorrectionWithSpaceFilter. + // TODO: this should not have its own logic here but be handled by the dictionary. + val allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion) + if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > scoreLimit) { + // typed word is valid and has good score + // do not auto-correct if typed word is better match than first suggestion + val first = firstSuggestionInContainer ?: firstSuggestion + val dictLocale = mDictionaryFacilitator.currentLocale + if (first.mScore < scoreLimit) { + // don't allow if suggestion has too low score + return booleanArrayOf(true, false) + } + if (first.mSourceDict.mLocale !== typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale) { + // dict locale different -> return the better match + return booleanArrayOf(true, dictLocale === first.mSourceDict.mLocale) + } + // the score difference may need tuning, but so far it seems alright + val firstWordBonusScore = + ((if (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) 20 else 0) // large bonus because it's wanted by dictionary + + (if (StringUtils.isLowerCaseAscii(typedWordString)) 5 else 0) // small bonus because typically only ascii is typed (applies to latin keyboards only) + + if (first.mScore > typedWordFirstOccurrenceWordInfo.mScore) 5 else 0) // small bonus if score is higher + putEmptyWordSuggestions.run() + val firstScoreForEmpty = if (firstAndTypedWordEmptyInfos[0] != null) firstAndTypedWordEmptyInfos[0]!!.mScore else 0 + val typedScoreForEmpty = if (firstAndTypedWordEmptyInfos[1] != null) firstAndTypedWordEmptyInfos[1]!!.mScore else 0 + if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) { + // return the better match for ngram context + // biased towards typed word + // but with bonus depending on + return booleanArrayOf(true, true) + } + false + } else { + allowed + } + } + return booleanArrayOf(allowsToBeAutoCorrected, hasAutoCorrection) + } + + // Retrieves suggestions for the batch input + // and calls the callback function with the suggestions. + private fun getSuggestedWordsForBatchInput( + wordComposer: WordComposer, + ngramContext: NgramContext, keyboard: Keyboard, + settingsValuesForSuggestion: SettingsValuesForSuggestion, + inputStyle: Int, sequenceNumber: Int, + callback: OnGetSuggestedWordsCallback + ) { + val suggestionResults = mDictionaryFacilitator.getSuggestionResults( + wordComposer.composedDataSnapshot, ngramContext, keyboard, + settingsValuesForSuggestion, SESSION_ID_GESTURE, inputStyle + ) + replaceSingleLetterFirstSuggestion(suggestionResults) + + // For transforming words that don't come from a dictionary, because it's our best bet + val locale = mDictionaryFacilitator.mainLocale + val suggestionsContainer = ArrayList(suggestionResults) + val suggestionsCount = suggestionsContainer.size + val isFirstCharCapitalized = wordComposer.wasShiftedNoLock() + val isAllUpperCase = wordComposer.isAllUpperCase + if (isFirstCharCapitalized || isAllUpperCase) { + for (i in 0 until suggestionsCount) { + val wordInfo = suggestionsContainer[i] + val wordlocale = wordInfo!!.mSourceDict.mLocale + val transformedWordInfo = getTransformedSuggestedWordInfo( + wordInfo, wordlocale ?: locale, isAllUpperCase, + isFirstCharCapitalized, 0 + ) + suggestionsContainer[i] = transformedWordInfo + } + } + val rejected: SuggestedWordInfo? + if (SHOULD_REMOVE_PREVIOUSLY_REJECTED_SUGGESTION && suggestionsContainer.size > 1 && TextUtils.equals( + suggestionsContainer[0]!!.mWord, + wordComposer.rejectedBatchModeSuggestion + ) + ) { + rejected = suggestionsContainer.removeAt(0) + suggestionsContainer.add(1, rejected) + } else { + rejected = null + } + SuggestedWordInfo.removeDupsAndTypedWord(null, suggestionsContainer) + + // For some reason some suggestions with MIN_VALUE are making their way here. + // TODO: Find a more robust way to detect distracters. + for (i in suggestionsContainer.indices.reversed()) { + if (suggestionsContainer[i]!!.mScore < SUPPRESS_SUGGEST_THRESHOLD) { + suggestionsContainer.removeAt(i) + } + } + + // In the batch input mode, the most relevant suggested word should act as a "typed word" + // (typedWordValid=true), not as an "auto correct word" (willAutoCorrect=false). + // Note that because this method is never used to get predictions, there is no need to + // modify inputType such in getSuggestedWordsForNonBatchInput. + val pseudoTypedWordInfo = preferNextWordSuggestion( + if (suggestionsContainer.isEmpty()) null else suggestionsContainer[0], + suggestionsContainer, getNextWordSuggestions(ngramContext, keyboard, inputStyle, settingsValuesForSuggestion), rejected + ) + val suggestionsList = if (SuggestionStripView.DEBUG_SUGGESTIONS && suggestionsContainer.isNotEmpty()) { + getSuggestionsInfoListWithDebugInfo(suggestionResults.first().mWord, suggestionsContainer) + } else { + suggestionsContainer + } + callback.onGetSuggestedWords( + SuggestedWords(suggestionsList, suggestionResults.mRawSuggestions, pseudoTypedWordInfo, true, + false, false, inputStyle, sequenceNumber) + ) + } + + /** reduces score of the first suggestion if next one is close and has more than a single letter */ + private fun replaceSingleLetterFirstSuggestion(suggestionResults: SuggestionResults) { + if (suggestionResults.size < 2 || suggestionResults.first().mWord.length != 1) return + // suppress single letter suggestions if next suggestion is close and has more than one letter + val iterator: Iterator = suggestionResults.iterator() + val first = iterator.next() + val second = iterator.next() + if (second.mWord.length > 1 && second.mScore > 0.94 * first.mScore) { + suggestionResults.remove(first) // remove and re-add with lower score + suggestionResults.add( + SuggestedWordInfo( + first.mWord, first.mPrevWordsContext, (first.mScore * 0.93).toInt(), + first.mKindAndFlags, first.mSourceDict, first.mIndexOfTouchPointOfSecondWord, first.mAutoCommitFirstWordConfidence + ) + ) + if (DebugFlags.DEBUG_ENABLED) d( + TAG, + "reduced score of " + first.mWord + " from " + first.mScore + ", new first: " + suggestionResults.first().mWord + " (" + suggestionResults.first().mScore + ")" + ) + } + } + + // returns new pseudoTypedWordInfo, puts it in suggestionsContainer, modifies nextWordSuggestions + private fun preferNextWordSuggestion( + pseudoTypedWordInfo: SuggestedWordInfo?, + suggestionsContainer: ArrayList, + nextWordSuggestions: SuggestionResults, rejected: SuggestedWordInfo? + ): SuggestedWordInfo? { + if (pseudoTypedWordInfo == null || !Settings.getInstance().current.mUsePersonalizedDicts + || pseudoTypedWordInfo.mSourceDict.mDictType != Dictionary.TYPE_MAIN || suggestionsContainer.size < 2 + ) return pseudoTypedWordInfo + nextWordSuggestions.removeAll { info: SuggestedWordInfo -> info.mScore < 170 } // we only want reasonably often typed words, value may require tuning + if (nextWordSuggestions.isEmpty()) return pseudoTypedWordInfo + // for each suggestion, check whether the word was already typed in this ngram context (i.e. is nextWordSuggestion) + for (suggestion in suggestionsContainer) { + if (suggestion!!.mScore < pseudoTypedWordInfo.mScore * 0.93) break // we only want reasonably good suggestions, value may require tuning + if (suggestion === rejected) continue // ignore rejected suggestions + for (nextWordSuggestion in nextWordSuggestions) { + if (nextWordSuggestion.mWord != suggestion.mWord) continue + // if we have a high scoring suggestion in next word suggestions, take it (because it's expected that user might want to type it again) + suggestionsContainer.remove(suggestion) + suggestionsContainer.add(0, suggestion) + if (DebugFlags.DEBUG_ENABLED) d( + TAG, + "replaced batch word $pseudoTypedWordInfo with $suggestion" + ) + return suggestion + } + } + return pseudoTypedWordInfo + } + + /** get suggestions based on the current ngram context, with an empty typed word (that's what next word suggestions do) */ // todo: integrate it into shouldBeAutoCorrected, remove putEmptySuggestions + // and make that thing more readable + private fun getNextWordSuggestions( + ngramContext: NgramContext, + keyboard: Keyboard, inputStyle: Int, settingsValuesForSuggestion: SettingsValuesForSuggestion + ): SuggestionResults { + val cachedResults = nextWordSuggestionsCache[ngramContext] + if (cachedResults != null) return cachedResults + val newResults = mDictionaryFacilitator.getSuggestionResults( + ComposedData(InputPointers(1), false, ""), + ngramContext, + keyboard, + settingsValuesForSuggestion, + SESSION_ID_TYPING, + inputStyle + ) + nextWordSuggestionsCache[ngramContext] = newResults + return newResults + } + + companion object { + private val TAG: String = Suggest::class.java.simpleName + + // Session id for + // {@link #getSuggestedWords(WordComposer,String,ProximityInfo,boolean,int)}. + // We are sharing the same ID between typing and gesture to save RAM footprint. + const val SESSION_ID_TYPING = 0 + const val SESSION_ID_GESTURE = 0 + + // Close to -2**31 + private const val SUPPRESS_SUGGEST_THRESHOLD = -2000000000 + private const val MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN = 12 + // TODO: should we add Finnish here? + // TODO: This should not be hardcoded here but be written in the dictionary header + private val sLanguageToMaximumAutoCorrectionWithSpaceLength = hashMapOf(Locale.GERMAN.language to MAXIMUM_AUTO_CORRECT_LENGTH_FOR_GERMAN) + + private fun getTransformedSuggestedWordInfoList( + wordComposer: WordComposer, results: SuggestionResults, + trailingSingleQuotesCount: Int, defaultLocale: Locale + ): ArrayList { + val shouldMakeSuggestionsAllUpperCase = (wordComposer.isAllUpperCase + && !wordComposer.isResumed) + val isOnlyFirstCharCapitalized = wordComposer.isOrWillBeOnlyFirstCharCapitalized + val suggestionsContainer = ArrayList(results) + val suggestionsCount = suggestionsContainer.size + if (isOnlyFirstCharCapitalized || shouldMakeSuggestionsAllUpperCase || 0 != trailingSingleQuotesCount) { + for (i in 0 until suggestionsCount) { + val wordInfo = suggestionsContainer[i] + val wordLocale = wordInfo!!.mSourceDict.mLocale + val transformedWordInfo = getTransformedSuggestedWordInfo( + wordInfo, wordLocale ?: defaultLocale, + shouldMakeSuggestionsAllUpperCase, isOnlyFirstCharCapitalized, + trailingSingleQuotesCount + ) + suggestionsContainer[i] = transformedWordInfo + } + } + return suggestionsContainer + } + + private fun getWhitelistedWordInfoOrNull(suggestions: List): SuggestedWordInfo? { + if (suggestions.isEmpty()) { + return null + } + val firstSuggestedWordInfo = suggestions[0] + return if (!firstSuggestedWordInfo.isKindOf(SuggestedWordInfo.KIND_WHITELIST)) { + null + } else firstSuggestedWordInfo + } + + private fun getSuggestionsInfoListWithDebugInfo( + typedWord: String, suggestions: ArrayList + ): ArrayList { + val suggestionsSize = suggestions.size + val suggestionsList = ArrayList(suggestionsSize) + for (cur in suggestions) { + addDebugInfo(cur, typedWord) + suggestionsList.add(cur) + } + return suggestionsList + } + + private fun addDebugInfo(wordInfo: SuggestedWordInfo?, typedWord: String) { + val normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(typedWord, wordInfo.toString(), wordInfo!!.mScore) + val scoreInfoString: String + val dict = wordInfo.mSourceDict.mDictType + ":" + wordInfo.mSourceDict.mLocale + scoreInfoString = if (normalizedScore > 0) { + String.format(Locale.ROOT, "%d (%4.2f), %s", wordInfo.mScore, normalizedScore, dict) + } else { + String.format(Locale.ROOT, "%d, %s", wordInfo.mScore, dict) + } + wordInfo.debugString = scoreInfoString + } + + /** + * Computes whether this suggestion should be blocked or not in this language + * + * This function implements a filter that avoids auto-correcting to suggestions that contain + * spaces that are above a certain language-dependent character limit. In languages like German + * where it's possible to concatenate many words, it often happens our dictionary does not + * have the longer words. In this case, we offer a lot of unhelpful suggestions that contain + * one or several spaces. Ideally we should understand what the user wants and display useful + * suggestions by improving the dictionary and possibly having some specific logic. Until + * that's possible we should avoid displaying unhelpful suggestions. But it's hard to tell + * whether a suggestion is useful or not. So at least for the time being we block + * auto-correction when the suggestion is long and contains a space, which should avoid the + * worst damage. + * This function is implementing that filter. If the language enforces no such limit, then it + * always returns true. If the suggestion contains no space, it also returns true. Otherwise, + * it checks the length against the language-specific limit. + * + * @param info the suggestion info + * @return whether it's fine to auto-correct to this. + */ + private fun isAllowedByAutoCorrectionWithSpaceFilter(info: SuggestedWordInfo): Boolean { + val locale = info.mSourceDict.mLocale ?: return true + val maximumLengthForThisLanguage = sLanguageToMaximumAutoCorrectionWithSpaceLength[locale.language] + ?: return true // This language does not enforce a maximum length to auto-correction + return (info.mWord.length <= maximumLengthForThisLanguage + || -1 == info.mWord.indexOf(Constants.CODE_SPACE.toChar())) + } + + private fun getTransformedSuggestedWordInfo( + wordInfo: SuggestedWordInfo?, locale: Locale?, isAllUpperCase: Boolean, + isOnlyFirstCharCapitalized: Boolean, trailingSingleQuotesCount: Int + ): SuggestedWordInfo { + val sb = StringBuilder(wordInfo!!.mWord.length) + if (isAllUpperCase) { + sb.append(wordInfo.mWord.uppercase(locale!!)) + } else if (isOnlyFirstCharCapitalized) { + sb.append(StringUtils.capitalizeFirstCodePoint(wordInfo.mWord, locale!!)) + } else { + sb.append(wordInfo.mWord) + } + // Appending quotes is here to help people quote words. However, it's not helpful + // when they type words with quotes toward the end like "it's" or "didn't", where + // it's more likely the user missed the last character (or didn't type it yet). + val quotesToAppend = (trailingSingleQuotesCount + - if (-1 == wordInfo.mWord.indexOf(Constants.CODE_SINGLE_QUOTE.toChar())) 0 else 1) + for (i in quotesToAppend - 1 downTo 0) { + sb.appendCodePoint(Constants.CODE_SINGLE_QUOTE) + } + return SuggestedWordInfo( + sb.toString(), wordInfo.mPrevWordsContext, + wordInfo.mScore, wordInfo.mKindAndFlags, + wordInfo.mSourceDict, wordInfo.mIndexOfTouchPointOfSecondWord, + wordInfo.mAutoCommitFirstWordConfidence + ) + } + } +}