mirror of
https://github.com/Helium314/HeliBoard.git
synced 2025-05-18 16:03:12 +00:00
move shouldBeAutoCorrected to a separate function
This commit is contained in:
parent
0687d10420
commit
f0e5a38fa2
2 changed files with 159 additions and 118 deletions
|
@ -33,6 +33,7 @@ import org.dslul.openboard.inputmethod.latin.utils.SuggestionResults;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
|
@ -139,7 +140,7 @@ public final class Suggest {
|
|||
}
|
||||
|
||||
private static SuggestedWordInfo getWhitelistedWordInfoOrNull(
|
||||
@Nonnull final ArrayList<SuggestedWordInfo> suggestions) {
|
||||
@Nonnull final List<SuggestedWordInfo> suggestions) {
|
||||
if (suggestions.isEmpty()) {
|
||||
return null;
|
||||
}
|
||||
|
@ -160,9 +161,6 @@ public final class Suggest {
|
|||
final String typedWordString = wordComposer.getTypedWord();
|
||||
final int trailingSingleQuotesCount =
|
||||
StringUtils.getTrailingSingleQuotesCount(typedWordString);
|
||||
final String consideredWord = trailingSingleQuotesCount > 0
|
||||
? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
|
||||
: typedWordString;
|
||||
|
||||
final SuggestionResults suggestionResults = mDictionaryFacilitator.getSuggestionResults(
|
||||
wordComposer.getComposedDataSnapshot(), ngramContext, keyboard,
|
||||
|
@ -191,124 +189,35 @@ public final class Suggest {
|
|||
|
||||
final int firstOccurrenceOfTypedWordInSuggestions =
|
||||
SuggestedWordInfo.removeDupsAndTypedWord(typedWordString, suggestionsContainer);
|
||||
|
||||
final SuggestedWordInfo whitelistedWordInfo =
|
||||
getWhitelistedWordInfoOrNull(suggestionsContainer);
|
||||
final String whitelistedWord = whitelistedWordInfo == null
|
||||
? null : whitelistedWordInfo.mWord;
|
||||
final boolean resultsArePredictions = !wordComposer.isComposingWord();
|
||||
|
||||
final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0);
|
||||
// SuggestedWordInfos for suggestions for empty word (based only on previously typed words)
|
||||
// done in a weird way to imitate what kotlin does with lazy
|
||||
final ArrayList<SuggestedWordInfo> firstAndTypedWordEmptyInfos = new ArrayList<>(2);
|
||||
|
||||
// We allow auto-correction if whitelisting is not required or the word is whitelisted,
|
||||
// or if the word had more than one char and was not suggested.
|
||||
final boolean allowsToBeAutoCorrected;
|
||||
if ((SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
|
||||
|| (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null)) // more than 1 letter and not in dictionary
|
||||
) {
|
||||
allowsToBeAutoCorrected = true;
|
||||
} else if (firstSuggestionInContainer != null && !typedWordString.isEmpty()) {
|
||||
// maybe allow autocorrect, depending on emptyWordSuggestions
|
||||
putEmptyWordSuggestions(firstAndTypedWordEmptyInfos,
|
||||
ngramContext, keyboard, settingsValuesForSuggestion, inputStyleIfNotPrediction,
|
||||
firstSuggestionInContainer.getWord(), typedWordString);
|
||||
final SuggestedWordInfo first = firstAndTypedWordEmptyInfos.get(0);
|
||||
final SuggestedWordInfo typed = firstAndTypedWordEmptyInfos.get(1);
|
||||
if (first == null) {
|
||||
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
|
||||
} else if (typed == null) {
|
||||
allowsToBeAutoCorrected = true; // autocorrect if typed word not known in this context (this may be too aggressive)
|
||||
} else {
|
||||
// autocorrect only if suggested word has clearly higher score
|
||||
// todo: maybe adjust the score difference? but already 15 requires typing several times (but doesn't go back quickly...)
|
||||
// maybe this should depend on mAutoCorrectionThreshold
|
||||
// 0.185 for modest, 0.067 for aggressive, negative infinity for very aggressive
|
||||
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
|
||||
}
|
||||
} else
|
||||
allowsToBeAutoCorrected = false;
|
||||
// todo: hope autocorrect doesn't trigger too often now (remove this comment if ok)
|
||||
// yes, it triggered too often / weirdly in some cases, but hopefully improved
|
||||
|
||||
final boolean hasAutoCorrection;
|
||||
// If correction is not enabled, we never auto-correct. This is for example for when
|
||||
// the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
|
||||
if (!isCorrectionEnabled
|
||||
// If the word does not allow to be auto-corrected, then we don't auto-correct.
|
||||
|| !allowsToBeAutoCorrected
|
||||
// If we are doing prediction, then we never auto-correct of course
|
||||
|| resultsArePredictions
|
||||
// If we don't have suggestion results, we can't evaluate the first suggestion
|
||||
// for auto-correction
|
||||
|| suggestionResults.isEmpty()
|
||||
// If the word has digits, we never auto-correct because it's likely the word
|
||||
// was type with a lot of care
|
||||
|| wordComposer.hasDigits()
|
||||
// If the word is mostly caps, we never auto-correct because this is almost
|
||||
// certainly intentional (and careful input)
|
||||
|| wordComposer.isMostlyCaps()
|
||||
// We never auto-correct when suggestions are resumed because it would be unexpected
|
||||
|| wordComposer.isResumed()
|
||||
// We don't autocorrect in URL or email input, since websites and emails can be
|
||||
// deliberate misspellings of actual words
|
||||
|| keyboard.mId.mMode == KeyboardId.MODE_URL
|
||||
|| keyboard.mId.mMode == KeyboardId.MODE_EMAIL
|
||||
// If we don't have a main dictionary, we never want to auto-correct. The reason
|
||||
// for this is, the user may have a contact whose name happens to match a valid
|
||||
// word in their language, and it will unexpectedly auto-correct. For example, if
|
||||
// the user types in English with no dictionary and has a "Will" in their contact
|
||||
// list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
|
||||
// main dict => no auto-correct. Also, it would probably get obnoxious quickly.
|
||||
// TODO: now that we have personalization, we may want to re-evaluate this decision
|
||||
|| !mDictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
|
||||
// If the first suggestion is a shortcut we never auto-correct to it, regardless
|
||||
// of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST).
|
||||
// TODO: we may want to have shortcut-only entries auto-correct in the future.
|
||||
|| suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
|
||||
hasAutoCorrection = false;
|
||||
} else {
|
||||
final SuggestedWordInfo firstSuggestion = suggestionResults.first();
|
||||
if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
|
||||
&& firstOccurrenceOfTypedWordInSuggestions != 0) {
|
||||
// todo: mFirstSuggestionExceedsConfidenceThreshold is always false, so currently
|
||||
// this branch is useless. remove the related logic, or actually use it
|
||||
hasAutoCorrection = true;
|
||||
} else if (!AutoCorrectionUtils.suggestionExceedsThreshold(
|
||||
firstSuggestion, consideredWord, mAutoCorrectionThreshold)) {
|
||||
// todo: maybe also do something here depending on ngram context?
|
||||
// Score is too low for autocorrect
|
||||
hasAutoCorrection = false;
|
||||
} else {
|
||||
// We have a high score, so we need to check if this suggestion is in the correct
|
||||
// form to allow auto-correcting to it in this language. For details of how this
|
||||
// is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
|
||||
// TODO: this should not have its own logic here but be handled by the dictionary.
|
||||
final boolean allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
|
||||
// todo: the threshold (currently 1000000) may need tuning
|
||||
if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > 1000000) {
|
||||
// typed word is valid and has good score
|
||||
// do not auto-correct if typed word is better prediction than possible correction from ngram context alone
|
||||
final SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : firstSuggestion;
|
||||
putEmptyWordSuggestions(firstAndTypedWordEmptyInfos,
|
||||
ngramContext, keyboard, settingsValuesForSuggestion, inputStyleIfNotPrediction,
|
||||
first.getWord(), typedWordString);
|
||||
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
|
||||
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
|
||||
final Locale dictLocale = mDictionaryFacilitator.getCurrentLocale();
|
||||
// slightly prefer suggestion for the current locale, this is very useful e.g.
|
||||
// for Polish i vs English I, or French un vs un->in shortcut in English default dictionary
|
||||
if (dictLocale == first.mSourceDict.mLocale)
|
||||
firstScoreForEmpty += 1;
|
||||
if (dictLocale == typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale)
|
||||
typedScoreForEmpty += 1;
|
||||
hasAutoCorrection = firstScoreForEmpty >= typedScoreForEmpty;
|
||||
} else
|
||||
hasAutoCorrection = allowed;
|
||||
}
|
||||
}
|
||||
final boolean[] thoseTwo = shouldBeAutoCorrected(
|
||||
trailingSingleQuotesCount,
|
||||
typedWordString,
|
||||
suggestionsContainer,
|
||||
sourceDictionaryOfRemovedWord,
|
||||
firstAndTypedWordEmptyInfos,
|
||||
() -> {
|
||||
final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0);
|
||||
SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : suggestionResults.first();
|
||||
putEmptyWordSuggestions(firstAndTypedWordEmptyInfos, ngramContext, keyboard,
|
||||
settingsValuesForSuggestion, inputStyleIfNotPrediction, first.getWord(), typedWordString);
|
||||
},
|
||||
isCorrectionEnabled,
|
||||
keyboard.mId.mMode,
|
||||
wordComposer,
|
||||
suggestionResults,
|
||||
mDictionaryFacilitator,
|
||||
mAutoCorrectionThreshold,
|
||||
firstOccurrenceOfTypedWordInSuggestions,
|
||||
typedWordFirstOccurrenceWordInfo
|
||||
);
|
||||
final boolean allowsToBeAutoCorrected = thoseTwo[0];
|
||||
final boolean hasAutoCorrection = thoseTwo[1];
|
||||
|
||||
final SuggestedWordInfo typedWordInfo = new SuggestedWordInfo(typedWordString,
|
||||
"" /* prevWordsContext */, SuggestedWordInfo.MAX_SCORE,
|
||||
|
@ -380,6 +289,138 @@ public final class Suggest {
|
|||
return infos;
|
||||
}
|
||||
|
||||
// returns [allowsToBeAutoCorrected, hasAutoCorrection]
|
||||
static boolean[] shouldBeAutoCorrected(
|
||||
final int trailingSingleQuotesCount,
|
||||
final String typedWordString,
|
||||
final List<SuggestedWordInfo> suggestionsContainer,
|
||||
final Dictionary sourceDictionaryOfRemovedWord,
|
||||
final List<SuggestedWordInfo> firstAndTypedWordEmptyInfos,
|
||||
final Runnable putEmptyWordSuggestions,
|
||||
final boolean isCorrectionEnabled,
|
||||
final int keyboardIdMode,
|
||||
final WordComposer wordComposer,
|
||||
final SuggestionResults suggestionResults,
|
||||
final DictionaryFacilitator dictionaryFacilitator,
|
||||
final float autoCorrectionThreshold,
|
||||
final int firstOccurrenceOfTypedWordInSuggestions,
|
||||
final SuggestedWordInfo typedWordFirstOccurrenceWordInfo
|
||||
) {
|
||||
final String consideredWord = trailingSingleQuotesCount > 0
|
||||
? typedWordString.substring(0, typedWordString.length() - trailingSingleQuotesCount)
|
||||
: typedWordString;
|
||||
|
||||
final SuggestedWordInfo whitelistedWordInfo =
|
||||
getWhitelistedWordInfoOrNull(suggestionsContainer);
|
||||
final String whitelistedWord = whitelistedWordInfo == null
|
||||
? null : whitelistedWordInfo.mWord;
|
||||
final SuggestedWordInfo firstSuggestionInContainer = suggestionsContainer.isEmpty() ? null : suggestionsContainer.get(0);
|
||||
|
||||
// We allow auto-correction if whitelisting is not required or the word is whitelisted,
|
||||
// or if the word had more than one char and was not suggested.
|
||||
final boolean allowsToBeAutoCorrected;
|
||||
if ((SHOULD_AUTO_CORRECT_USING_NON_WHITE_LISTED_SUGGESTION || whitelistedWord != null)
|
||||
|| (consideredWord.length() > 1 && (sourceDictionaryOfRemovedWord == null)) // more than 1 letter and not in dictionary
|
||||
) {
|
||||
allowsToBeAutoCorrected = true;
|
||||
} else if (firstSuggestionInContainer != null && !typedWordString.isEmpty()) {
|
||||
// maybe allow autocorrect, depending on emptyWordSuggestions
|
||||
putEmptyWordSuggestions.run();
|
||||
final SuggestedWordInfo first = firstAndTypedWordEmptyInfos.get(0);
|
||||
final SuggestedWordInfo typed = firstAndTypedWordEmptyInfos.get(1);
|
||||
if (first == null) {
|
||||
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
|
||||
} else if (typed == null) {
|
||||
allowsToBeAutoCorrected = true; // autocorrect if typed word not known in this context (this may be too aggressive)
|
||||
} else {
|
||||
// autocorrect only if suggested word has clearly higher score
|
||||
// todo: maybe adjust the score difference? but already 15 requires typing several times (but doesn't go back quickly...)
|
||||
// maybe this should depend on mAutoCorrectionThreshold
|
||||
// 0.185 for modest, 0.067 for aggressive, negative infinity for very aggressive
|
||||
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
|
||||
}
|
||||
} else
|
||||
allowsToBeAutoCorrected = false;
|
||||
// todo: hope autocorrect doesn't trigger too often now (remove this comment if ok)
|
||||
// yes, it triggered too often / weirdly in some cases, but hopefully improved
|
||||
|
||||
final boolean hasAutoCorrection;
|
||||
// If correction is not enabled, we never auto-correct. This is for example for when
|
||||
// the setting "Auto-correction" is "off": we still suggest, but we don't auto-correct.
|
||||
if (!isCorrectionEnabled
|
||||
// If the word does not allow to be auto-corrected, then we don't auto-correct.
|
||||
|| !allowsToBeAutoCorrected
|
||||
// If we are doing prediction, then we never auto-correct of course
|
||||
|| !wordComposer.isComposingWord()
|
||||
// If we don't have suggestion results, we can't evaluate the first suggestion
|
||||
// for auto-correction
|
||||
|| suggestionResults.isEmpty()
|
||||
// If the word has digits, we never auto-correct because it's likely the word
|
||||
// was type with a lot of care
|
||||
|| wordComposer.hasDigits()
|
||||
// If the word is mostly caps, we never auto-correct because this is almost
|
||||
// certainly intentional (and careful input)
|
||||
|| wordComposer.isMostlyCaps()
|
||||
// We never auto-correct when suggestions are resumed because it would be unexpected
|
||||
|| wordComposer.isResumed()
|
||||
// We don't autocorrect in URL or email input, since websites and emails can be
|
||||
// deliberate misspellings of actual words
|
||||
|| keyboardIdMode == KeyboardId.MODE_URL
|
||||
|| keyboardIdMode == KeyboardId.MODE_EMAIL
|
||||
// If we don't have a main dictionary, we never want to auto-correct. The reason
|
||||
// for this is, the user may have a contact whose name happens to match a valid
|
||||
// word in their language, and it will unexpectedly auto-correct. For example, if
|
||||
// the user types in English with no dictionary and has a "Will" in their contact
|
||||
// list, "will" would always auto-correct to "Will" which is unwanted. Hence, no
|
||||
// main dict => no auto-correct. Also, it would probably get obnoxious quickly.
|
||||
// TODO: now that we have personalization, we may want to re-evaluate this decision
|
||||
|| !dictionaryFacilitator.hasAtLeastOneInitializedMainDictionary()
|
||||
// If the first suggestion is a shortcut we never auto-correct to it, regardless
|
||||
// of how strong it is (whitelist entries are not KIND_SHORTCUT but KIND_WHITELIST).
|
||||
// TODO: we may want to have shortcut-only entries auto-correct in the future.
|
||||
|| suggestionResults.first().isKindOf(SuggestedWordInfo.KIND_SHORTCUT)) {
|
||||
hasAutoCorrection = false;
|
||||
} else {
|
||||
final SuggestedWordInfo firstSuggestion = suggestionResults.first();
|
||||
if (suggestionResults.mFirstSuggestionExceedsConfidenceThreshold
|
||||
&& firstOccurrenceOfTypedWordInSuggestions != 0) {
|
||||
// todo: mFirstSuggestionExceedsConfidenceThreshold is always false, so currently
|
||||
// this branch is useless. remove the related logic, or actually use it
|
||||
hasAutoCorrection = true;
|
||||
} else if (!AutoCorrectionUtils.suggestionExceedsThreshold(
|
||||
firstSuggestion, consideredWord, autoCorrectionThreshold)) {
|
||||
// todo: maybe also do something here depending on ngram context?
|
||||
// Score is too low for autocorrect
|
||||
hasAutoCorrection = false;
|
||||
} else {
|
||||
// We have a high score, so we need to check if this suggestion is in the correct
|
||||
// form to allow auto-correcting to it in this language. For details of how this
|
||||
// is determined, see #isAllowedByAutoCorrectionWithSpaceFilter.
|
||||
// TODO: this should not have its own logic here but be handled by the dictionary.
|
||||
final boolean allowed = isAllowedByAutoCorrectionWithSpaceFilter(firstSuggestion);
|
||||
// todo: the threshold (currently 1000000) may need tuning
|
||||
if (allowed && typedWordFirstOccurrenceWordInfo != null && typedWordFirstOccurrenceWordInfo.mScore > 1000000) {
|
||||
// typed word is valid and has good score
|
||||
// do not auto-correct if typed word is better prediction than possible correction from ngram context alone
|
||||
final SuggestedWordInfo first = firstSuggestionInContainer != null ? firstSuggestionInContainer : firstSuggestion;
|
||||
putEmptyWordSuggestions.run();
|
||||
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
|
||||
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
|
||||
final Locale dictLocale = dictionaryFacilitator.getCurrentLocale();
|
||||
// slightly prefer suggestion for the current locale, this is very useful e.g.
|
||||
// for Polish i vs English I, or French un vs un->in shortcut in English default dictionary
|
||||
if (dictLocale == first.mSourceDict.mLocale)
|
||||
firstScoreForEmpty += 1;
|
||||
if (dictLocale == typedWordFirstOccurrenceWordInfo.mSourceDict.mLocale)
|
||||
typedScoreForEmpty += 1;
|
||||
hasAutoCorrection = firstScoreForEmpty >= typedScoreForEmpty;
|
||||
} else
|
||||
hasAutoCorrection = allowed;
|
||||
}
|
||||
}
|
||||
return new boolean[]{ allowsToBeAutoCorrected, hasAutoCorrection };
|
||||
}
|
||||
|
||||
// Retrieves suggestions for the batch input
|
||||
// and calls the callback function with the suggestions.
|
||||
private void getSuggestedWordsForBatchInput(final WordComposer wordComposer,
|
||||
|
|
|
@ -383,7 +383,7 @@ public class SuggestedWords {
|
|||
|
||||
/**
|
||||
* This will always remove the higher index if a duplicate is found.
|
||||
* Will also remove al occurrences of the typed word.
|
||||
* Will also remove all occurrences of the typed word.
|
||||
*
|
||||
* @return position of typed word in the candidate list
|
||||
*/
|
||||
|
@ -394,7 +394,7 @@ public class SuggestedWords {
|
|||
return -1;
|
||||
}
|
||||
int firstOccurrenceOfWord = -1;
|
||||
if (!TextUtils.isEmpty(typedWord)) {
|
||||
if (typedWord != null && typedWord.length() > 0) {
|
||||
firstOccurrenceOfWord = removeSuggestedWordInfoFromList(
|
||||
typedWord, candidates, -1 /* startIndexExclusive */);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue