mirror of
https://github.com/Helium314/HeliBoard.git
synced 2025-05-18 16:03:12 +00:00
tune autocorrections once again
This commit is contained in:
parent
963ceacec9
commit
b3764239b8
3 changed files with 92 additions and 21 deletions
|
@ -336,7 +336,7 @@ public final class Suggest {
|
||||||
} else if (first == null) {
|
} else if (first == null) {
|
||||||
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
|
allowsToBeAutoCorrected = false; // no autocorrect if first suggestion unknown in this context
|
||||||
} else if (typed == null) {
|
} else if (typed == null) {
|
||||||
allowsToBeAutoCorrected = true; // autocorrect if typed word not known in this context, todo: this may be too aggressive
|
allowsToBeAutoCorrected = true; // allow autocorrect if typed word not known in this context, todo: this may be too aggressive
|
||||||
} else {
|
} else {
|
||||||
// autocorrect if suggested word has clearly higher score for empty word suggestions
|
// autocorrect if suggested word has clearly higher score for empty word suggestions
|
||||||
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
|
allowsToBeAutoCorrected = (first.mScore - typed.mScore) > 20;
|
||||||
|
@ -416,20 +416,17 @@ public final class Suggest {
|
||||||
// dict locale different -> return the better match
|
// dict locale different -> return the better match
|
||||||
return new boolean[]{ true, dictLocale == first.mSourceDict.mLocale };
|
return new boolean[]{ true, dictLocale == first.mSourceDict.mLocale };
|
||||||
}
|
}
|
||||||
if (first.mScore < typedWordFirstOccurrenceWordInfo.mScore - 100000) {
|
// todo: this may need tuning, especially the score difference thing
|
||||||
// don't autocorrect if typed word is clearly the better suggestion
|
final int firstWordBonusScore = (first.isKindOf(SuggestedWordInfo.KIND_WHITELIST) ? 20 : 0) // large bonus because it's wanted by dictionary
|
||||||
// todo: maybe this should be reduced more, to 50k or even 0
|
+ (StringUtils.isLowerCaseAscii(typedWordString) ? 5 : 0) // small bonus because typically only ascii is typed (applies to latin keyboards only)
|
||||||
return new boolean[]{ true, false };
|
+ (first.mScore > typedWordFirstOccurrenceWordInfo.mScore ? 5 : 0); // small bonus if score is higher
|
||||||
}
|
|
||||||
putEmptyWordSuggestions.run();
|
putEmptyWordSuggestions.run();
|
||||||
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
|
int firstScoreForEmpty = firstAndTypedWordEmptyInfos.get(0) != null ? firstAndTypedWordEmptyInfos.get(0).mScore : 0;
|
||||||
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
|
int typedScoreForEmpty = firstAndTypedWordEmptyInfos.get(1) != null ? firstAndTypedWordEmptyInfos.get(1).mScore : 0;
|
||||||
if (firstScoreForEmpty == 0 && typedScoreForEmpty == 0) {
|
if (firstScoreForEmpty + firstWordBonusScore >= typedScoreForEmpty + 20) {
|
||||||
// both words unknown in this ngram context -> return the correction
|
// return the better match for ngram context
|
||||||
return new boolean[]{ true, true };
|
// biased towards typed word
|
||||||
}
|
// but with bonus depending on
|
||||||
if (firstScoreForEmpty > typedScoreForEmpty + 20) {
|
|
||||||
// return the better match for ngram context, biased towards typed word
|
|
||||||
return new boolean[]{ true, true };
|
return new boolean[]{ true, true };
|
||||||
}
|
}
|
||||||
hasAutoCorrection = false;
|
hasAutoCorrection = false;
|
||||||
|
|
|
@ -712,9 +712,9 @@ public final class StringUtils {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean probablyContainsEmoji(String s) {
|
public static boolean probablyContainsEmoji(final String s) {
|
||||||
int offset = 0;
|
int offset = 0;
|
||||||
int length = s.length();
|
final int length = s.length();
|
||||||
while (offset < length) {
|
while (offset < length) {
|
||||||
int c = Character.codePointAt(s, offset);
|
int c = Character.codePointAt(s, offset);
|
||||||
if (probablyIsEmojiCodePoint(c))
|
if (probablyIsEmojiCodePoint(c))
|
||||||
|
@ -725,10 +725,19 @@ public final class StringUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
// seemingly arbitrary ranges taken from "somewhere on the internet"
|
// seemingly arbitrary ranges taken from "somewhere on the internet"
|
||||||
public static boolean probablyIsEmojiCodePoint(int c) {
|
public static boolean probablyIsEmojiCodePoint(final int c) {
|
||||||
return (0x200D <= c && c <= 0x3299) // ??
|
return (0x200D <= c && c <= 0x3299) // ??
|
||||||
|| (0x1F004 <= c && c <= 0x1F251) // ??
|
|| (0x1F004 <= c && c <= 0x1F251) // ??
|
||||||
|| (0x1F300 <= c && c <= 0x1FFFF) // ??
|
|| (0x1F300 <= c && c <= 0x1FFFF) // ??
|
||||||
|| c == 0xFE0F; // variation selector emoji with color
|
|| c == 0xFE0F; // variation selector emoji with color
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isLowerCaseAscii(final String s) {
|
||||||
|
final int length = s.length();
|
||||||
|
for (int i = 0; i < length; i++) {
|
||||||
|
final int c = s.charAt(i);
|
||||||
|
if (c < 97 || c > 122) return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,7 +62,7 @@ class SuggestTest {
|
||||||
// not corrected because first empty score not high enough
|
// not corrected because first empty score not high enough
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test fun `not "ill" to "I'll" if both were used before in this context`() {
|
@Test fun `"ill" to "I'll" if both have same ngram score`() {
|
||||||
val locale = Locale.ENGLISH
|
val locale = Locale.ENGLISH
|
||||||
val result = shouldBeAutoCorrected(
|
val result = shouldBeAutoCorrected(
|
||||||
"ill",
|
"ill",
|
||||||
|
@ -72,8 +72,20 @@ class SuggestTest {
|
||||||
locale,
|
locale,
|
||||||
thresholdModest
|
thresholdModest
|
||||||
)
|
)
|
||||||
|
assert(result.last()) // should be corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test fun `no "ill" to "I'll" if "ill" has somewhat better ngram score`() {
|
||||||
|
val locale = Locale.ENGLISH
|
||||||
|
val result = shouldBeAutoCorrected(
|
||||||
|
"ill",
|
||||||
|
listOf(suggestion("I'll", Int.MAX_VALUE, locale), suggestion("ill", 1500000, locale)),
|
||||||
|
suggestion("I'll", 200, locale),
|
||||||
|
suggestion("ill", 211, locale),
|
||||||
|
locale,
|
||||||
|
thresholdModest
|
||||||
|
)
|
||||||
assert(!result.last()) // should not be corrected
|
assert(!result.last()) // should not be corrected
|
||||||
// essentially same as `not "ill" to "I'll" if only "ill" was used before in this context`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test fun `no English "I" for Polish "i" when typing in Polish`() {
|
@Test fun `no English "I" for Polish "i" when typing in Polish`() {
|
||||||
|
@ -119,17 +131,70 @@ class SuggestTest {
|
||||||
// not corrected because of locale matching
|
// not corrected because of locale matching
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test fun `no "lé" instead of "le"`() {
|
@Test fun `no "né" instead of "ne"`() {
|
||||||
val result = shouldBeAutoCorrected(
|
val result = shouldBeAutoCorrected(
|
||||||
"le",
|
"ne",
|
||||||
listOf(suggestion("le", 1900000, Locale.FRENCH), suggestion("lé", 1500000, Locale.FRENCH)),
|
listOf(suggestion("ne", 1900000, Locale.FRENCH), suggestion("né", 1900000-1, Locale.FRENCH)),
|
||||||
null,
|
null,
|
||||||
null,
|
null,
|
||||||
Locale.FRENCH,
|
Locale.FRENCH,
|
||||||
thresholdModest
|
thresholdModest
|
||||||
)
|
)
|
||||||
assert(!result.last()) // should not be corrected
|
assert(!result.last()) // should not be corrected
|
||||||
// not corrected because of score difference
|
// not corrected because score is lower
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test fun `"né" instead of "ne" if "né" in ngram context`() {
|
||||||
|
val locale = Locale.FRENCH
|
||||||
|
val result = shouldBeAutoCorrected(
|
||||||
|
"ne",
|
||||||
|
listOf(suggestion("ne", 1900000, locale), suggestion("né", 1900000-1, locale)),
|
||||||
|
suggestion("né", 200, locale),
|
||||||
|
null,
|
||||||
|
locale,
|
||||||
|
thresholdModest
|
||||||
|
)
|
||||||
|
assert(result.last()) // should be corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test fun `"né" instead of "ne" if "né" has clearly better score in ngram context`() {
|
||||||
|
val locale = Locale.FRENCH
|
||||||
|
val result = shouldBeAutoCorrected(
|
||||||
|
"ne",
|
||||||
|
listOf(suggestion("ne", 1900000, locale), suggestion("né", 1900000-1, locale)),
|
||||||
|
suggestion("né", 215, locale),
|
||||||
|
suggestion("ne", 200, locale),
|
||||||
|
locale,
|
||||||
|
thresholdModest
|
||||||
|
)
|
||||||
|
assert(result.last()) // should be corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test fun `no "né" instead of "ne" if both with same score in ngram context`() {
|
||||||
|
val locale = Locale.FRENCH
|
||||||
|
val result = shouldBeAutoCorrected(
|
||||||
|
"ne",
|
||||||
|
listOf(suggestion("ne", 1900000, locale), suggestion("né", 1900000-1, locale)),
|
||||||
|
suggestion("né", 200, locale),
|
||||||
|
suggestion("ne", 200, locale),
|
||||||
|
locale,
|
||||||
|
thresholdModest
|
||||||
|
)
|
||||||
|
assert(!result.last()) // should not be corrected
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test fun `no "ne" instead of "né"`() {
|
||||||
|
val locale = Locale.FRENCH
|
||||||
|
val result = shouldBeAutoCorrected(
|
||||||
|
"né",
|
||||||
|
listOf(suggestion("ne", 600000, locale), suggestion("né", 1600000, locale)),
|
||||||
|
suggestion("né", 200, locale),
|
||||||
|
suggestion("ne", 200, locale),
|
||||||
|
locale,
|
||||||
|
thresholdModest
|
||||||
|
)
|
||||||
|
assert(!result.last()) // should not be corrected
|
||||||
|
// not even allowed to check because of low score for ne
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue