add unit tests for shouldBeAutoCorrected, small dict fix

This commit is contained in:
Helium314 2023-08-28 19:50:55 +02:00
parent e9488b9ea0
commit 78cccee9bb
7 changed files with 427 additions and 4 deletions

View file

@ -64,6 +64,9 @@ Changes to OpenBoard:
* Fix bug with space before word being deleted in some apps / input fields, https://github.com/Helium314/openboard/commit/ce0bf06545c4547d3fc5791cc769508db0a89e87
* Allow using auto theme on some devices with Android 9
* Add auto theme for the new theming system
* Add number pad
* Overhauled language settings
* Updated translations
Further plan / to do:
* ~upgrade dependencies~
@ -94,7 +97,8 @@ Further plan / to do:
* add emojis to user history, to be used for next word
-----
# readme for original version of OpenBoard below
-----
<h1 align="center"><b>OpenBoard</b></h1>
<h4 align="center">100% FOSS keyboard, based on AOSP.</h4>
<p align="center"><img src='fastlane/metadata/android/en-US/images/icon.png' height='128'></p>

View file

@ -54,4 +54,5 @@ dependencies {
implementation 'androidx.core:core-ktx:1.10.1'
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
implementation 'androidx.viewpager2:viewpager2:1.0.0'
testImplementation 'junit:junit:4.13.2'
}

View file

@ -189,7 +189,8 @@ final public class BinaryDictionaryGetter {
if (null == currentBestMatch || currentBestMatch.mMatchLevel <= matchLevel) {
// todo: not nice, related to getDictionaryFiles todo
// this is so user-added main dict has priority over internal main dict
if ("main".equals(category) && (wordList.getName().endsWith(USER_DICTIONARY_SUFFIX) || currentBestMatch == null))
// actually any user-added dict has priority, but there aren't any other built-in types
if (wordList.getName().endsWith(USER_DICTIONARY_SUFFIX) || currentBestMatch == null)
cacheFiles.put(category, new FileAndMatchLevel(wordList, matchLevel));
}
}

View file

@ -478,4 +478,15 @@ public final class WordComposer {
void setTypedWordCacheForTests(String typedWordCacheForTests) {
mTypedWordCache = typedWordCacheForTests;
}
@UsedForTesting
static WordComposer getComposerForTest(boolean isEmpty) {
return new WordComposer(isEmpty);
}
private WordComposer(boolean isEmpty) {
mCodePointSize = isEmpty ? 0 : 1;
mEvents = null;
}
}

View file

@ -17,6 +17,7 @@
package org.dslul.openboard.inputmethod.latin.common;
import org.dslul.openboard.inputmethod.annotations.UsedForTesting;
import org.dslul.openboard.inputmethod.latin.BuildConfig;
import javax.annotation.Nonnull;
@ -177,7 +178,7 @@ public final class Constants {
// Key events coming any faster than this are long-presses.
public static final int LONG_PRESS_MILLISECONDS = 200;
// TODO: Set this value appropriately.
public static final int GET_SUGGESTED_WORDS_TIMEOUT = 200;
public static final int GET_SUGGESTED_WORDS_TIMEOUT = BuildConfig.DEBUG ? 500 : 200; // debug build is slow, and timeout is annoying for testing
// How many continuous deletes at which to start deleting at a higher speed.
public static final int DELETE_ACCELERATE_AT = 20;

View file

@ -16,11 +16,17 @@
package org.dslul.openboard.inputmethod.latin.utils;
import static android.view.KeyEvent.KEYCODE_SPACE;
import android.os.Build;
import android.util.Log;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import org.dslul.openboard.inputmethod.annotations.UsedForTesting;
import org.dslul.openboard.inputmethod.latin.BuildConfig;
import org.dslul.openboard.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import org.dslul.openboard.inputmethod.latin.common.StringUtils;
import org.dslul.openboard.inputmethod.latin.define.DebugFlags;
public final class AutoCorrectionUtils {
@ -45,7 +51,11 @@ public final class AutoCorrectionUtils {
final int autoCorrectionSuggestionScore = suggestion.mScore;
// TODO: when the normalized score of the first suggestion is nearly equals to
// the normalized score of the second suggestion, behave less aggressive.
final float normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
final float normalizedScore;
if (BuildConfig.DEBUG && Build.VERSION.SDK_INT == 0) // SDK_INT is 0 in unit tests
normalizedScore = calcNormalizedScore(StringUtils.toCodePointArray(consideredWord), StringUtils.toCodePointArray(suggestion.mWord), autoCorrectionSuggestionScore, editDistance(consideredWord, suggestion.mWord));
else
normalizedScore = BinaryDictionaryUtils.calcNormalizedScore(
consideredWord, suggestion.mWord, autoCorrectionSuggestionScore);
if (DBG) {
Log.d(TAG, "Normalized " + consideredWord + "," + suggestion + ","
@ -61,4 +71,72 @@ public final class AutoCorrectionUtils {
}
return false;
}
// below is normalized score calculation in java, to allow unit tests involving suggestionExceedsThreshold
@UsedForTesting
private static float calcNormalizedScore(final int[] before,
final int[] after, final int score, final int distance) {
final int beforeLength = before.length;
final int afterLength = after.length;
if (0 == beforeLength || 0 == afterLength)
return 0.0f;
int spaceCount = 0;
for (int j : after) {
if (j == KEYCODE_SPACE)
++spaceCount;
}
if (spaceCount == afterLength)
return 0.0f;
if (score <= 0 || distance >= afterLength) {
// normalizedScore must be 0.0f (the minimum value) if the score is less than or equal to 0,
// or if the edit distance is larger than or equal to afterLength.
return 0.0f;
}
// add a weight based on edit distance.
final float weight = 1.0f - (float) distance / (float) afterLength;
return ((float) score / 1000000.0f) * weight;
}
@UsedForTesting
private static int editDistance(String x, String y) {
int[][] dp = new int[x.length() + 1][y.length() + 1];
for (int i = 0; i <= x.length(); i++) {
for (int j = 0; j <= y.length(); j++) {
if (i == 0) {
dp[i][j] = j;
}
else if (j == 0) {
dp[i][j] = i;
}
else {
dp[i][j] = min(dp[i - 1][j - 1]
+ costOfSubstitution(x.charAt(i - 1), y.charAt(j - 1)),
dp[i - 1][j] + 1,
dp[i][j - 1] + 1);
}
}
}
return dp[x.length()][y.length()];
}
@UsedForTesting
private static int min(int... numbers) {
int min = Integer.MAX_VALUE;
for (int n : numbers) {
if (n < min)
min = n;
}
return min;
}
@UsedForTesting
private static int costOfSubstitution(char a, char b) {
return a == b ? 0 : 1;
}
}

View file

@ -0,0 +1,327 @@
package org.dslul.openboard.inputmethod.latin
import android.content.Context
import android.util.LruCache
import org.dslul.openboard.inputmethod.keyboard.Keyboard
import org.dslul.openboard.inputmethod.latin.SuggestedWords.SuggestedWordInfo
import org.dslul.openboard.inputmethod.latin.SuggestedWords.SuggestedWordInfo.KIND_FLAG_APPROPRIATE_FOR_AUTO_CORRECTION
import org.dslul.openboard.inputmethod.latin.SuggestedWords.SuggestedWordInfo.KIND_WHITELIST
import org.dslul.openboard.inputmethod.latin.common.ComposedData
import org.dslul.openboard.inputmethod.latin.common.StringUtils
import org.dslul.openboard.inputmethod.latin.settings.SettingsValuesForSuggestion
import org.dslul.openboard.inputmethod.latin.utils.SuggestionResults
import org.junit.Test
import java.io.File
import java.util.*
import java.util.concurrent.TimeUnit
class SuggestTest {
private val thresholdModest = 0.185f
private val thresholdAggressive = 0.067f
private val thresholdVeryAggressive = Float.NEGATIVE_INFINITY
@Test fun `"on" to "in" if "in" was used before in this context`() {
val locale = Locale.ENGLISH
val result = shouldBeAutoCorrected(
"on",
listOf(suggestion("on", 1800000, locale), suggestion("in", 600000, locale)),
suggestion("in", 240, locale),
null, // never typed "on" in this context
locale,
thresholdModest
)
assert(!result.last()) // should not be corrected
// not corrected because first suggestion score is too low
}
@Test fun `"ill" to "I'll" if "ill" not used before in this context, and I'll has shortcut`() {
val locale = Locale.ENGLISH
val result = shouldBeAutoCorrected(
"ill",
listOf(suggestion("I'll", Int.MAX_VALUE, locale), suggestion("ill", 1500000, locale)),
null,
null,
locale,
thresholdModest
)
assert(result.last()) // should be corrected
// correction because both empty scores are 0, which should be fine (next check is comparing empty scores)
}
@Test fun `not "ill" to "I'll" if only "ill" was used before in this context`() {
val locale = Locale.ENGLISH
val result = shouldBeAutoCorrected(
"ill",
listOf(suggestion("I'll", Int.MAX_VALUE, locale), suggestion("ill", 1500000, locale)),
null,
suggestion("ill", 200, locale),
locale,
thresholdModest
)
assert(!result.last()) // should not be corrected
// not corrected because first empty score not high enough
}
@Test fun `not "ill" to "I'll" if both were used before in this context`() {
val locale = Locale.ENGLISH
val result = shouldBeAutoCorrected(
"ill",
listOf(suggestion("I'll", Int.MAX_VALUE, locale), suggestion("ill", 1500000, locale)),
suggestion("I'll", 200, locale),
suggestion("ill", 200, locale),
locale,
thresholdModest
)
assert(!result.last()) // should not be corrected
// essentially same as `not "ill" to "I'll" if only "ill" was used before in this context`
}
@Test fun `no English "I" for Polish "i" when typing in Polish`() {
val result = shouldBeAutoCorrected(
"i",
listOf(suggestion("I", Int.MAX_VALUE, Locale.ENGLISH), suggestion("i", 1500000, Locale("pl"))),
null,
null,
Locale("pl"),
thresholdVeryAggressive
)
assert(!result.last()) // should not be corrected
// not even checking at modest and aggressive thresholds, this is a locale thing
// if very aggressive, still no correction because locale matches with typed word only
}
@Test fun `English "I" instead of Polish "i" when typing in English`() {
val result = shouldBeAutoCorrected(
"i",
listOf(suggestion("I", Int.MAX_VALUE, Locale.ENGLISH), suggestion("i", 1500000, Locale("pl"))),
null,
null,
Locale.ENGLISH,
thresholdModest
)
assert(result.last()) // should be corrected
// only corrected because it's whitelisted (int max value)
// if it wasn't whitelisted, it would never be allowed due to utoCorrectionUtils.suggestionExceedsThreshold (unless set to very aggressive)
// -> maybe normalizedScore needs adjustment if the only difference is upper/lowercase
// todo: consider special score for case-only difference?
}
@Test fun `no English "in" instead of French "un" when typing in French`() {
val result = shouldBeAutoCorrected(
"un",
listOf(suggestion("in", Int.MAX_VALUE, Locale.ENGLISH), suggestion("un", 1500000, Locale.FRENCH)),
null,
null,
Locale.FRENCH,
thresholdModest
)
assert(!result.last()) // should not be corrected
// not corrected because of locale matching
}
}
private fun suggestion(word: String, score: Int, locale: Locale) =
SuggestedWordInfo(
/* word */ word,
/* prevWordsContext */ "", // irrelevant
// typically 2B for shortcut, 1.5M for exact match, 600k for close match
// when previous word context is empty, scores are usually 200+ if word is known and somewhat often used, 0 if unknown
/* score */ score,
/* kindAndFlags */ if (score == Int.MAX_VALUE) KIND_WHITELIST else KIND_FLAG_APPROPRIATE_FOR_AUTO_CORRECTION,
/* sourceDict */ TestDict(locale),
/* indexOfTouchPointOfSecondWord */ 0, // irrelevant
/* autoCommitFirstWordConfidence */ 0 // irrelevant?
)
fun shouldBeAutoCorrected(word: String, // typed word
suggestions: List<SuggestedWordInfo>, // suggestions ordered by score, including suggestion for typed word if in dictionary
firstSuggestionForEmpty: SuggestedWordInfo?, // first suggestion if typed word would be empty (null if none)
typedWordSuggestionForEmpty: SuggestedWordInfo?, // suggestion for actually typed word if typed word would be empty (null if none)
currentTypingLocale: Locale, // used for checking whether suggestion locale is the same, relevant e.g. for English i -> I shortcut, but we want Polish i
autoCorrectThreshold: Float // -inf, 0.067, 0.185 (for very aggressive, aggressive, modest)
): List<Boolean> {
val suggestionsContainer = ArrayList<SuggestedWordInfo>().apply { addAll(suggestions) }
val suggestionResults = SuggestionResults(suggestions.size, false, false)
suggestions.forEach { suggestionResults.add(it) }
// store the original SuggestedWordInfo for typed word, as it will be removed
// we may want to re-add it in case auto-correction happens, so that the original word can at least be selected
var typedWordFirstOccurrenceWordInfo: SuggestedWordInfo? = null
var foundInDictionary = false
var sourceDictionaryOfRemovedWord: Dictionary? = null
for (info in suggestionsContainer) {
// Search for the best dictionary, defined as the first one with the highest match
// quality we can find.
if (!foundInDictionary && word == info.mWord) {
// Use this source if the old match had lower quality than this match
sourceDictionaryOfRemovedWord = info.mSourceDict
foundInDictionary = true
typedWordFirstOccurrenceWordInfo = info
break
}
}
val firstOccurrenceOfTypedWordInSuggestions =
SuggestedWordInfo.removeDupsAndTypedWord(word, suggestionsContainer)
return Suggest.shouldBeAutoCorrected(
StringUtils.getTrailingSingleQuotesCount(word),
word,
suggestionsContainer, // todo: get from suggestions? mostly it's just removing the typed word, right?
sourceDictionaryOfRemovedWord,
listOf(firstSuggestionForEmpty, typedWordSuggestionForEmpty),
{}, // only used to fill above if needed
true, // doesn't make sense otherwise
0, // not really relevant here
WordComposer.getComposerForTest(false),
suggestionResults,
facilitator(currentTypingLocale),
autoCorrectThreshold,
firstOccurrenceOfTypedWordInSuggestions,
typedWordFirstOccurrenceWordInfo
).toList()
}
private fun facilitator(currentTypingLocale: Locale): DictionaryFacilitator =
object : DictionaryFacilitator {
override fun setValidSpellingWordReadCache(cache: LruCache<String, Boolean>?) {
TODO("Not yet implemented")
}
override fun setValidSpellingWordWriteCache(cache: LruCache<String, Boolean>?) {
TODO("Not yet implemented")
}
override fun isForLocale(locale: Locale?): Boolean {
TODO("Not yet implemented")
}
override fun isForAccount(account: String?): Boolean {
TODO("Not yet implemented")
}
override fun onStartInput() {
TODO("Not yet implemented")
}
override fun onFinishInput(context: Context?) {
TODO("Not yet implemented")
}
override fun isActive(): Boolean {
TODO("Not yet implemented")
}
override fun getLocale(): Locale {
TODO("Not yet implemented")
}
override fun getCurrentLocale(): Locale = currentTypingLocale
override fun usesContacts(): Boolean {
TODO("Not yet implemented")
}
override fun getAccount(): String {
TODO("Not yet implemented")
}
override fun resetDictionaries(
context: Context?,
newLocale: Locale?,
useContactsDict: Boolean,
usePersonalizedDicts: Boolean,
forceReloadMainDictionary: Boolean,
account: String?,
dictNamePrefix: String?,
listener: DictionaryFacilitator.DictionaryInitializationListener?
) {
TODO("Not yet implemented")
}
override fun removeWord(word: String?) {
TODO("Not yet implemented")
}
override fun resetDictionariesForTesting(
context: Context?,
locale: Locale?,
dictionaryTypes: java.util.ArrayList<String>?,
dictionaryFiles: HashMap<String, File>?,
additionalDictAttributes: MutableMap<String, MutableMap<String, String>>?,
account: String?
) {
TODO("Not yet implemented")
}
override fun closeDictionaries() {
TODO("Not yet implemented")
}
override fun getSubDictForTesting(dictName: String?): ExpandableBinaryDictionary {
TODO("Not yet implemented")
}
override fun hasAtLeastOneInitializedMainDictionary(): Boolean = true
override fun hasAtLeastOneUninitializedMainDictionary(): Boolean {
TODO("Not yet implemented")
}
override fun waitForLoadingMainDictionaries(timeout: Long, unit: TimeUnit?) {
TODO("Not yet implemented")
}
override fun waitForLoadingDictionariesForTesting(timeout: Long, unit: TimeUnit?) {
TODO("Not yet implemented")
}
override fun addToUserHistory(
suggestion: String?,
wasAutoCapitalized: Boolean,
ngramContext: NgramContext,
timeStampInSeconds: Long,
blockPotentiallyOffensive: Boolean
) {
TODO("Not yet implemented")
}
override fun unlearnFromUserHistory(
word: String?,
ngramContext: NgramContext,
timeStampInSeconds: Long,
eventType: Int
) {
TODO("Not yet implemented")
}
override fun getSuggestionResults(
composedData: ComposedData?,
ngramContext: NgramContext?,
keyboard: Keyboard,
settingsValuesForSuggestion: SettingsValuesForSuggestion?,
sessionId: Int,
inputStyle: Int
): SuggestionResults {
TODO("Not yet implemented")
}
override fun isValidSpellingWord(word: String?): Boolean {
TODO("Not yet implemented")
}
override fun isValidSuggestionWord(word: String?): Boolean {
TODO("Not yet implemented")
}
override fun clearUserHistoryDictionary(context: Context?): Boolean {
TODO("Not yet implemented")
}
override fun dump(context: Context?): String {
TODO("Not yet implemented")
}
override fun dumpDictionaryForDebug(dictName: String?) {
TODO("Not yet implemented")
}
override fun getDictionaryStats(context: Context?): MutableList<DictionaryStats> {
TODO("Not yet implemented")
}
}
private class TestDict(locale: Locale) : Dictionary("testDict", locale) {
override fun getSuggestions(
composedData: ComposedData?,
ngramContext: NgramContext?,
proximityInfoHandle: Long,
settingsValuesForSuggestion: SettingsValuesForSuggestion?,
sessionId: Int,
weightForLocale: Float,
inOutWeightOfLangModelVsSpatialModel: FloatArray?
): ArrayList<SuggestedWordInfo> {
TODO("Not yet implemented")
}
override fun isInDictionary(word: String?): Boolean {
TODO("Not yet implemented")
}
}