tune autocorrections once again

This commit is contained in:
Helium314 2023-08-30 09:05:29 +02:00
parent 963ceacec9
commit b3764239b8
3 changed files with 92 additions and 21 deletions

View file

@ -336,7 +336,7 @@ public final class Suggest {
} else if (first == null) {
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
} else if (typed == null) {
allowsToBeAutoCorrected = true; // autocorrect if typed word not known in this context, todo: this may be too aggressive
allowsToBeAutoCorrected = true; // allow autocorrect if typed word not known in this context, todo: this may be too aggressive
} else {
// autocorrect if suggested word has clearly higher score for empty word suggestions
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
@ -416,20 +416,17 @@ public final class Suggest {
// dict locale different -> return the better match
return new boolean[]{ true, dictLocale == first.mSourceDict.mLocale };
}
if (first.mScore < typedWordFirstOccurrenceWordInfo.mScore - 100000) {
// don't autocorrect if typed word is clearly the better suggestion
// todo: maybe this should be reduced more, to 50k or even 0
return new boolean[]{ true, false };
}
// todo: this may need tuning, especially the score difference thing
final int firstWordBonusScore = (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST) ? 20 : 0) // large bonus because it's wanted by dictionary
+ (StringUtils.isLowerCaseAscii(typedWordString) ? 5 : 0) // small bonus because typically only ascii is typed (applies to latin keyboards only)
+ (first.mScore > typedWordFirstOccurrenceWordInfo.mScore ? 5 : 0); // small bonus if score is higher
putEmptyWordSuggestions.run();
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
if (firstScoreForEmpty == 0 && typedScoreForEmpty == 0) {
// both words unknown in this ngram context -> return the correction
return new boolean[]{ true, true };
}
if (firstScoreForEmpty > typedScoreForEmpty + 20) {
// return the better match for ngram context, biased towards typed word
if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) {
// return the better match for ngram context
// biased towards typed word
// but with bonus depending on
return new boolean[]{ true, true };
}
hasAutoCorrection = false;

View file

@ -712,9 +712,9 @@ public final class StringUtils {
return false;
}
public static boolean probablyContainsEmoji(String s) {
public static boolean probablyContainsEmoji(final String s) {
int offset = 0;
int length = s.length();
final int length = s.length();
while (offset < length) {
int c = Character.codePointAt(s, offset);
if (probablyIsEmojiCodePoint(c))
@ -725,10 +725,19 @@ public final class StringUtils {
}
// seemingly arbitrary ranges taken from "somewhere on the internet"
public static boolean probablyIsEmojiCodePoint(int c) {
public static boolean probablyIsEmojiCodePoint(final int c) {
return (0x200D <= c && c <= 0x3299) // ??
|| (0x1F004 <= c && c <= 0x1F251) // ??
|| (0x1F300 <= c && c <= 0x1FFFF) // ??
|| c == 0xFE0F; // variation selector emoji with color
}
public static boolean isLowerCaseAscii(final String s) {
final int length = s.length();
for (int i = 0; i < length; i++) {
final int c = s.charAt(i);
if (c < 97 || c > 122) return false;
}
return true;
}
}