From 960f058b7e27985c6fbe9e940e7712f7172c408c Mon Sep 17 00:00:00 2001 From: Helium314 Date: Tue, 20 May 2025 20:44:57 +0200 Subject: [PATCH] move DictionaryFacilitatoryImpl to Kotlin only very minor changes to behavior using coroutines instead of ExecutorUtils some code moved out of "main" facilitator --- .../latin/DictionaryFacilitatorImpl.java | 1106 ----------------- .../latin/DictionaryFacilitatorImpl.kt | 827 ++++++++++++ 2 files changed, 827 insertions(+), 1106 deletions(-) delete mode 100644 app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java create mode 100644 app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java deleted file mode 100644 index 2bd17f62e..000000000 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.java +++ /dev/null @@ -1,1106 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * modified - * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only - */ - -package helium314.keyboard.latin; - -import android.Manifest; -import android.content.Context; -import android.provider.UserDictionary; -import android.text.TextUtils; -import android.util.LruCache; -import android.view.inputmethod.InputMethodSubtype; - -import androidx.annotation.NonNull; -import androidx.annotation.Nullable; - -import java.io.File; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Locale; -import java.util.Map; -import java.util.Scanner; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; - -import helium314.keyboard.keyboard.Keyboard; -import helium314.keyboard.keyboard.emoji.SupportedEmojis; -import helium314.keyboard.latin.NgramContext.WordInfo; -import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo; -import helium314.keyboard.latin.common.ComposedData; -import helium314.keyboard.latin.common.Constants; -import helium314.keyboard.latin.common.StringUtils; -import helium314.keyboard.latin.common.StringUtilsKt; -import helium314.keyboard.latin.permissions.PermissionsUtil; -import helium314.keyboard.latin.personalization.UserHistoryDictionary; -import helium314.keyboard.latin.settings.Settings; -import helium314.keyboard.latin.settings.SettingsValues; -import helium314.keyboard.latin.settings.SettingsValuesForSuggestion; -import helium314.keyboard.latin.utils.ExecutorUtils; -import helium314.keyboard.latin.utils.KtxKt; -import helium314.keyboard.latin.utils.Log; -import helium314.keyboard.latin.utils.SubtypeSettings; -import helium314.keyboard.latin.utils.SubtypeUtilsKt; -import helium314.keyboard.latin.utils.SuggestionResults; - -/** - * Facilitates interaction with different kinds of dictionaries. Provides APIs - * to instantiate and select the correct dictionaries (based on language or account), - * update entries and fetch suggestions. - *

- * Currently AndroidSpellCheckerService and LatinIME both use DictionaryFacilitator as - * a client for interacting with dictionaries. - */ -public class DictionaryFacilitatorImpl implements DictionaryFacilitator { - public static final String TAG = DictionaryFacilitatorImpl.class.getSimpleName(); - - // HACK: This threshold is being used when adding a capitalized entry in the User History - // dictionary. - private static final int CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140; - - private ArrayList mDictionaryGroups = new ArrayList<>() {{ add(new DictionaryGroup()); }}; - private volatile CountDownLatch mLatchForWaitingLoadingMainDictionaries = new CountDownLatch(0); - // To synchronize assigning mDictionaryGroup to ensure closing dictionaries. - private final Object mLock = new Object(); - // library does not deal well with ngram history for auto-capitalized words, so we adjust the ngram - // context to store next word suggestions for such cases - private boolean mTryChangingWords = false; - private String mChangeFrom = ""; - private String mChangeTo = ""; - - // todo: write cache never set, and never read (only written) - // tried to use read cache for a while, but small performance improvements are not worth the work (https://github.com/Helium314/HeliBoard/issues/307) - private LruCache mValidSpellingWordReadCache; - private LruCache mValidSpellingWordWriteCache; - - @Override - public void setValidSpellingWordReadCache(final LruCache cache) { - mValidSpellingWordReadCache = cache; - } - - @Override - public void setValidSpellingWordWriteCache(final LruCache cache) { - mValidSpellingWordWriteCache = cache; - } - - // judging by usage, this should check primary locale only - @Override - public boolean isForLocale(final Locale locale) { - return locale != null && locale.equals(mDictionaryGroups.get(0).mLocale); - } - - private boolean hasLocale(final Locale locale) { - if (locale == null) return false; - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - if (locale.equals(dictionaryGroup.mLocale)) return true; - } - return false; - } - - /** - * A group of dictionaries that work together for a single language. - */ - private static class DictionaryGroup { - private static final int MAX_CONFIDENCE = 2; - - /** - * The locale associated with the dictionary group. - */ - @NonNull public final Locale mLocale; - - /** - * The user account associated with the dictionary group. - */ - @Nullable public final String mAccount; - - @Nullable private Dictionary mMainDict; - // Confidence that the most probable language is actually the language the user is - // typing in. For now, this is simply the number of times a word from this language - // has been committed in a row, with an exception when typing a single word not contained - // in this language. - private int mConfidence = 1; - - // words cannot be removed from main dictionary, so we use a blacklist instead - public String blacklistFileName = null; - public Set blacklist = new HashSet<>(); - - // allow to go above max confidence, for better determination of currently preferred language - // when decreasing confidence or getting weight factor, limit to maximum - public void increaseConfidence() { - mConfidence += 1; - } - - // If confidence is above max, drop to max confidence. This does not change weights and - // allows conveniently typing single words from the other language without affecting suggestions - public void decreaseConfidence() { - if (mConfidence > MAX_CONFIDENCE) - mConfidence = MAX_CONFIDENCE; - else if (mConfidence > 0) { - mConfidence -= 1; - } - } - - public float getWeightForTypingInLocale(List groups) { - return getWeightForLocale(groups, 0.15f); - } - - public float getWeightForGesturingInLocale(List groups) { - return getWeightForLocale(groups, 0.05f); - } - - // might need some more tuning - private float getWeightForLocale(final List groups, final float step) { - if (groups.size() == 1) return 1f; - if (mConfidence < 2) return 1f - step * (MAX_CONFIDENCE - mConfidence); - for (DictionaryGroup group : groups) { - if (group != this && group.mConfidence >= mConfidence) return 1f - step / 2f; - } - return 1f; - } - public final ConcurrentHashMap mSubDictMap = - new ConcurrentHashMap<>(); - - public DictionaryGroup() { - this(new Locale(""), null, null, Collections.emptyMap()); - } - - public DictionaryGroup(@NonNull final Locale locale, - @Nullable final Dictionary mainDict, - @Nullable final String account, - @NonNull final Map subDicts) { - mLocale = locale; - mAccount = account; - // The main dictionary can be asynchronously loaded. - setMainDict(mainDict); - for (final Map.Entry entry : subDicts.entrySet()) { - setSubDict(entry.getKey(), entry.getValue()); - } - } - - private void setSubDict(@NonNull final String dictType, @NonNull final ExpandableBinaryDictionary dict) { - mSubDictMap.put(dictType, dict); - } - - public void setMainDict(@Nullable final Dictionary mainDict) { - // Close old dictionary if exists. Main dictionary can be assigned multiple times. - final Dictionary oldDict = mMainDict; - mMainDict = mainDict; - if (oldDict != null && mainDict != oldDict) { - oldDict.close(); - } - } - - public @Nullable Dictionary getDict(@NonNull final String dictType) { - if (Dictionary.TYPE_MAIN.equals(dictType)) { - return mMainDict; - } - return getSubDict(dictType); - } - - public @Nullable ExpandableBinaryDictionary getSubDict(@NonNull final String dictType) { - return mSubDictMap.get(dictType); - } - - public boolean hasDict(@NonNull final String dictType, @Nullable final String account) { - if (Dictionary.TYPE_MAIN.equals(dictType)) { - return mMainDict != null; - } - if (Dictionary.TYPE_USER_HISTORY.equals(dictType) && - !TextUtils.equals(account, mAccount)) { - // If the dictionary type is user history, & if the account doesn't match, - // return immediately. If the account matches, continue looking it up in the - // sub dictionary map. - return false; - } - return mSubDictMap.containsKey(dictType); - } - - public void closeDict(@NonNull final String dictType) { - final Dictionary dict; - if (Dictionary.TYPE_MAIN.equals(dictType)) { - dict = mMainDict; - } else { - dict = mSubDictMap.remove(dictType); - } - if (dict != null) { - dict.close(); - } - } - } - - public DictionaryFacilitatorImpl() { - } - - @Override - public void onStartInput() { - } - - @Override - public void onFinishInput(Context context) { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - for (final String dictType : ALL_DICTIONARY_TYPES) { - Dictionary dict = dictionaryGroup.getDict(dictType); - if (dict != null) dict.onFinishInput(); - } - } - } - - @Override - public boolean isActive() { - return !mDictionaryGroups.get(0).mLocale.getLanguage().isEmpty(); - } - - @Override - @NonNull - public Locale getMainLocale() { - return mDictionaryGroups.get(0).mLocale; - } - - @Override - public Locale getCurrentLocale() { - return getCurrentlyPreferredDictionaryGroup().mLocale; - } - - public boolean usesContacts() { - return mDictionaryGroups.get(0).getSubDict(Dictionary.TYPE_CONTACTS) != null; - } - - public boolean usesApps() { - return mDictionaryGroups.get(0).getSubDict(Dictionary.TYPE_APPS) != null; - } - - public boolean usesPersonalization() { - return mDictionaryGroups.get(0).getSubDict(Dictionary.TYPE_USER_HISTORY) != null; - } - - @Override - public String getAccount() { - return null; - } - - @Override - public boolean usesSameSettings(@NonNull final List locales, final boolean contacts, - final boolean apps, final boolean personalization, @Nullable final String account) { - final boolean first = usesContacts() == contacts && usesApps() == apps - && usesPersonalization() == personalization - && TextUtils.equals(mDictionaryGroups.get(0).mAccount, account) - && locales.size() == mDictionaryGroups.size(); - if (!first) return false; - for (int i = 0; i < locales.size(); i++) { - if (locales.get(i) != mDictionaryGroups.get(i).mLocale) return false; - } - return true; - } - - @Nullable - private static ExpandableBinaryDictionary getSubDict(final String dictType, - final Context context, final Locale locale, final File dictFile, - final String dictNamePrefix, @Nullable final String account) { - ExpandableBinaryDictionary dict = null; - try { - dict = switch (dictType) { - case Dictionary.TYPE_USER_HISTORY -> UserHistoryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account); - case Dictionary.TYPE_USER -> UserBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account); - case Dictionary.TYPE_CONTACTS -> ContactsBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account); - case Dictionary.TYPE_APPS -> AppsBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account); - default -> null; - }; - } catch (final SecurityException | IllegalArgumentException e) { - Log.e(TAG, "Cannot create dictionary: " + dictType, e); - } - if (dict == null) - Log.e(TAG, "Cannot create dictionary for " + dictType); - return dict; - } - - @Nullable - static DictionaryGroup findDictionaryGroupWithLocale(final List dictionaryGroups, - @NonNull final Locale locale) { - if (dictionaryGroups == null) return null; - for (DictionaryGroup dictionaryGroup : dictionaryGroups) { - if (locale.equals(dictionaryGroup.mLocale)) - return dictionaryGroup; - } - return null; - } - - // original - public void resetDictionaries( - final Context context, - @NonNull final Locale newLocale, - final boolean useContactsDict, - final boolean useAppsDict, - final boolean usePersonalizedDicts, - final boolean forceReloadMainDictionary, - @Nullable final String account, - final String dictNamePrefix, - @Nullable final DictionaryInitializationListener listener) { - final HashMap> existingDictionariesToCleanup = new HashMap<>(); - final HashSet subDictTypesToUse = new HashSet<>(); - subDictTypesToUse.add(Dictionary.TYPE_USER); - Log.i(TAG, "resetDictionaries, force reloading main dictionary: " + forceReloadMainDictionary); - final List allLocales = new ArrayList<>() {{ - add(newLocale); - - // adding secondary locales is a bit tricky since they depend on the subtype - // but usually this is called with the selected subtype locale - final InputMethodSubtype selected = SubtypeSettings.INSTANCE.getSelectedSubtype(KtxKt.prefs(context)); - if (SubtypeUtilsKt.locale(selected).equals(newLocale)) { - addAll(SubtypeUtilsKt.getSecondaryLocales(selected.getExtraValue())); - } else { - // probably we're called from the spell checker when using a different app as keyboard - final List enabled = SubtypeSettings.INSTANCE.getEnabledSubtypes(false); - for (InputMethodSubtype subtype : enabled) { - if (SubtypeUtilsKt.locale(subtype).equals(newLocale)) - addAll(SubtypeUtilsKt.getSecondaryLocales(subtype.getExtraValue())); - } - } - }}; - - // Do not use contacts dictionary if we do not have permissions to read contacts. - if (useContactsDict - && PermissionsUtil.checkAllPermissionsGranted(context, Manifest.permission.READ_CONTACTS)) { - subDictTypesToUse.add(Dictionary.TYPE_CONTACTS); - } - if (useAppsDict) { - subDictTypesToUse.add(Dictionary.TYPE_APPS); - } - if (usePersonalizedDicts) { - subDictTypesToUse.add(Dictionary.TYPE_USER_HISTORY); - } - - // Gather all dictionaries by locale. We may remove some from the list to clean up later. - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - final ArrayList dictTypeForLocale = new ArrayList<>(); - existingDictionariesToCleanup.put(dictionaryGroup.mLocale, dictTypeForLocale); - - for (final String dictType : DYNAMIC_DICTIONARY_TYPES) { - if (dictionaryGroup.hasDict(dictType, account)) { - dictTypeForLocale.add(dictType); - } - } - if (dictionaryGroup.hasDict(Dictionary.TYPE_MAIN, account)) { - dictTypeForLocale.add(Dictionary.TYPE_MAIN); - } - } - - // create new dictionary groups and remove dictionaries to re-use from existingDictionariesToCleanup - final ArrayList newDictionaryGroups = new ArrayList<>(allLocales.size()); - for (Locale locale : allLocales) { - // get existing dictionary group for new locale - final DictionaryGroup oldDictionaryGroupForLocale = findDictionaryGroupWithLocale(mDictionaryGroups, locale); - final ArrayList dictTypesToCleanupForLocale = existingDictionariesToCleanup.get(locale); - final boolean noExistingDictsForThisLocale = (null == oldDictionaryGroupForLocale); - - // create new or re-use already loaded main dict - final Dictionary mainDict; - if (forceReloadMainDictionary || noExistingDictsForThisLocale - || !oldDictionaryGroupForLocale.hasDict(Dictionary.TYPE_MAIN, account)) { - mainDict = null; - } else { - mainDict = oldDictionaryGroupForLocale.getDict(Dictionary.TYPE_MAIN); - dictTypesToCleanupForLocale.remove(Dictionary.TYPE_MAIN); - } - - // create new or re-use already loaded sub-dicts - final Map subDicts = new HashMap<>(); - for (final String subDictType : subDictTypesToUse) { - final ExpandableBinaryDictionary subDict; - if (noExistingDictsForThisLocale || forceReloadMainDictionary - || !oldDictionaryGroupForLocale.hasDict(subDictType, account)) { - // Create a new dictionary. - subDict = getSubDict(subDictType, context, locale, null, dictNamePrefix, account); - if (subDict == null) continue; // https://github.com/Helium314/HeliBoard/issues/293 - } else { - // Reuse the existing dictionary, and don't close it at the end - subDict = oldDictionaryGroupForLocale.getSubDict(subDictType); - dictTypesToCleanupForLocale.remove(subDictType); - } - subDicts.put(subDictType, subDict); - } - DictionaryGroup newDictGroup = new DictionaryGroup(locale, mainDict, account, subDicts); - newDictionaryGroups.add(newDictGroup); - - // load blacklist - if (noExistingDictsForThisLocale) { - newDictGroup.blacklistFileName = context.getFilesDir().getAbsolutePath() + File.separator + "blacklists" + File.separator + locale.toLanguageTag() + ".txt"; - if (!new File(newDictGroup.blacklistFileName).exists()) - new File(context.getFilesDir().getAbsolutePath() + File.separator + "blacklists").mkdirs(); - newDictGroup.blacklist.addAll(readBlacklistFile(newDictGroup.blacklistFileName)); - } else { - // re-use if possible - newDictGroup.blacklistFileName = oldDictionaryGroupForLocale.blacklistFileName; - newDictGroup.blacklist.addAll(oldDictionaryGroupForLocale.blacklist); - } - } - - - // Replace Dictionaries. - final List oldDictionaryGroups; - synchronized (mLock) { - oldDictionaryGroups = mDictionaryGroups; - mDictionaryGroups = newDictionaryGroups; - if (hasAtLeastOneUninitializedMainDictionary()) { - asyncReloadUninitializedMainDictionaries(context, allLocales, listener); - } - } - - if (listener != null) { - listener.onUpdateMainDictionaryAvailability(hasAtLeastOneInitializedMainDictionary()); - } - - // Clean up old dictionaries. - for (final Locale localeToCleanUp : existingDictionariesToCleanup.keySet()) { - final ArrayList dictTypesToCleanUp = existingDictionariesToCleanup.get(localeToCleanUp); - final DictionaryGroup dictionarySetToCleanup = findDictionaryGroupWithLocale(oldDictionaryGroups, localeToCleanUp); - for (final String dictType : dictTypesToCleanUp) { - dictionarySetToCleanup.closeDict(dictType); - } - } - - if (mValidSpellingWordWriteCache != null) { - mValidSpellingWordWriteCache.evictAll(); - } - if (mValidSpellingWordReadCache != null) { - mValidSpellingWordReadCache.evictAll(); - } - } - - private void asyncReloadUninitializedMainDictionaries(final Context context, - final List locales, final DictionaryInitializationListener listener) { - final CountDownLatch latchForWaitingLoadingMainDictionary = new CountDownLatch(1); - mLatchForWaitingLoadingMainDictionaries = latchForWaitingLoadingMainDictionary; - ExecutorUtils.getBackgroundExecutor(ExecutorUtils.KEYBOARD).execute(() -> - doReloadUninitializedMainDictionaries(context, locales, listener, latchForWaitingLoadingMainDictionary)); - } - - void doReloadUninitializedMainDictionaries(final Context context, final List locales, - final DictionaryInitializationListener listener, - final CountDownLatch latchForWaitingLoadingMainDictionary) { - final Dictionary[] mainDicts = new Dictionary[locales.size()]; - final ArrayList dictionaryGroups = new ArrayList<>(); - for (int i = 0; i < locales.size(); i++) { - Locale locale = locales.get(i); - DictionaryGroup dictionaryGroup = findDictionaryGroupWithLocale(mDictionaryGroups, locale); - if (null == dictionaryGroup) { - // This should never happen, but better safe than crashy - Log.w(TAG, "Expected a dictionary group for " + locale + " but none found"); - return; - } - dictionaryGroups.add(dictionaryGroup); - // do nothing if main dict already initialized - if (dictionaryGroup.mMainDict != null && dictionaryGroup.mMainDict.isInitialized()) { - mainDicts[i] = null; - continue; - } - mainDicts[i] = DictionaryFactoryKt.createMainDictionary(context, dictionaryGroup.mLocale); - } - - synchronized (mLock) { - for (int i = 0; i < locales.size(); i++) { - final Locale locale = locales.get(i); - if (mainDicts[i] == null) - continue; - if (locale.equals(dictionaryGroups.get(i).mLocale)) { - dictionaryGroups.get(i).setMainDict(mainDicts[i]); - } else { - // Dictionary facilitator has been reset for another locale. - mainDicts[i].close(); - } - } - } - if (listener != null) { - listener.onUpdateMainDictionaryAvailability(hasAtLeastOneInitializedMainDictionary()); - } - latchForWaitingLoadingMainDictionary.countDown(); - } - - public void closeDictionaries() { - final ArrayList dictionaryGroupsToClose; - synchronized (mLock) { - dictionaryGroupsToClose = new ArrayList<>(mDictionaryGroups); - mDictionaryGroups.clear(); - mDictionaryGroups.add(new DictionaryGroup()); - } - for (DictionaryGroup dictionaryGroup : dictionaryGroupsToClose) { - for (final String dictType : ALL_DICTIONARY_TYPES) { - dictionaryGroup.closeDict(dictType); - } - } - } - - // The main dictionaries are loaded asynchronously. Don't cache the return value - // of these methods. - public boolean hasAtLeastOneInitializedMainDictionary() { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - final Dictionary mainDict = dictionaryGroup.getDict(Dictionary.TYPE_MAIN); - if (mainDict != null && mainDict.isInitialized()) return true; - } - return false; - } - - public boolean hasAtLeastOneUninitializedMainDictionary() { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - final Dictionary mainDict = dictionaryGroup.getDict(Dictionary.TYPE_MAIN); - if (mainDict == null || !mainDict.isInitialized()) return true; - } - return false; - } - - public void waitForLoadingMainDictionaries(final long timeout, final TimeUnit unit) - throws InterruptedException { - mLatchForWaitingLoadingMainDictionaries.await(timeout, unit); - } - - public void addToUserHistory(final String suggestion, final boolean wasAutoCapitalized, - @NonNull final NgramContext ngramContext, final long timeStampInSeconds, - final boolean blockPotentiallyOffensive) { - // Update the spelling cache before learning. Words that are not yet added to user history - // and appear in no other language model are not considered valid. - putWordIntoValidSpellingWordCache("addToUserHistory", suggestion); - - final String[] words = suggestion.split(Constants.WORD_SEPARATOR); - - // increase / decrease confidence if we have more than one dictionary group - boolean[] validWordForDictionary; // store results to avoid unnecessary duplicate lookups - if (mDictionaryGroups.size() > 1 && words.length == 1) { // ignore if more than a single word, this only happens with (badly working) spaceAwareGesture - validWordForDictionary = adjustConfidencesInternal(suggestion, wasAutoCapitalized); - } else - validWordForDictionary = null; - - // add word to user dictionary if it is in no other dictionary except user history dictionary, - // reasoning: typing the same word again -> we probably want it in some dictionary permanently - final SettingsValues sv = Settings.getValues(); - if (sv.mAddToPersonalDictionary // require the setting - && sv.mAutoCorrectEnabled == sv.mAutoCorrectionEnabledPerUserSettings // don't add if user wants autocorrect but input field does not, see https://github.com/Helium314/HeliBoard/issues/427#issuecomment-1905438000 - && mDictionaryGroups.get(0).hasDict(Dictionary.TYPE_USER_HISTORY, mDictionaryGroups.get(0).mAccount) // require personalized suggestions - && !wasAutoCapitalized // we can't be 100% sure about what the user intended to type, so better don't add it - && words.length == 1) { // ignore if more than a single word, this only happens with (badly working) spaceAwareGesture - addToPersonalDictionaryIfInvalidButInHistory(suggestion, validWordForDictionary); - } - - NgramContext ngramContextForCurrentWord = ngramContext; - for (int i = 0; i < words.length; i++) { - final String currentWord = words[i]; - final boolean wasCurrentWordAutoCapitalized = (i == 0) && wasAutoCapitalized; - // add to history for preferred dictionary group, to avoid mixing languages in history - addWordToUserHistory(getCurrentlyPreferredDictionaryGroup(), ngramContextForCurrentWord, currentWord, - wasCurrentWordAutoCapitalized, (int) timeStampInSeconds, - blockPotentiallyOffensive); - ngramContextForCurrentWord = - ngramContextForCurrentWord.getNextNgramContext(new WordInfo(currentWord)); - - // remove manually entered blacklisted words from blacklist - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - if (dictionaryGroup.blacklist.remove(currentWord)) - removeWordFromBlacklistFile(currentWord, dictionaryGroup.blacklistFileName); - } - } - } - - @Override public void adjustConfidences(final String word, final boolean wasAutoCapitalized) { - if (mDictionaryGroups.size() > 1 && !word.contains(Constants.WORD_SEPARATOR)) - adjustConfidencesInternal(word, wasAutoCapitalized); - } - - private boolean[] adjustConfidencesInternal(final String word, final boolean wasAutoCapitalized) { - final boolean[] validWordForDictionary = new boolean[mDictionaryGroups.size()]; - // if suggestion was auto-capitalized, check against both the suggestion and the de-capitalized suggestion - final String decapitalizedSuggestion; - if (wasAutoCapitalized) - decapitalizedSuggestion = StringUtilsKt.decapitalize(word, getCurrentLocale()); - else - decapitalizedSuggestion = word; - for (int i = 0; i < mDictionaryGroups.size(); i ++) { - final DictionaryGroup dictionaryGroup = mDictionaryGroups.get(i); - final boolean isValidWord = isValidWord(word, ALL_DICTIONARY_TYPES, dictionaryGroup); - if (isValidWord || (wasAutoCapitalized && isValidWord(decapitalizedSuggestion, ALL_DICTIONARY_TYPES, dictionaryGroup))) - dictionaryGroup.increaseConfidence(); - else dictionaryGroup.decreaseConfidence(); - validWordForDictionary[i] = isValidWord; - } - return validWordForDictionary; - } - - // main and secondary isValid provided to avoid duplicate lookups - private void addToPersonalDictionaryIfInvalidButInHistory(String word, boolean[] validWordForDictionary) { - final DictionaryGroup dictionaryGroup = getClearlyPreferredDictionaryGroupOrNull(); - if (dictionaryGroup == null) return; - if (validWordForDictionary == null - ? isValidWord(word, ALL_DICTIONARY_TYPES, dictionaryGroup) - : validWordForDictionary[mDictionaryGroups.indexOf(dictionaryGroup)] - ) - return; - - final ExpandableBinaryDictionary userDict = dictionaryGroup.getSubDict(Dictionary.TYPE_USER); - final Dictionary userHistoryDict = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY); - if (userDict == null || userHistoryDict == null) return; - - // user history always reports words as invalid, so here we need to check isInDictionary instead - // update: now getFrequency returns the correct value instead of -1, so better use that - // a little testing shows that after 2 times adding, the frequency is 111, and then rises slowly with usage - // 120 is after 3 uses of the word, so we simply require more than that. - // also maybe a problem: words added to dictionaries (user and history) are apparently found - // only after some delay. but this is not too bad, it just delays adding - if (userHistoryDict.getFrequency(word) > 120) { - if (userDict.isInDictionary(word)) // is this check necessary? - return; - ExecutorUtils.getBackgroundExecutor(ExecutorUtils.KEYBOARD).execute(() -> - UserDictionary.Words.addWord(userDict.mContext, word, - 250 /*FREQUENCY_FOR_USER_DICTIONARY_ADDS*/, null, dictionaryGroup.mLocale)); - } - } - - private void putWordIntoValidSpellingWordCache( - @NonNull final String caller, - @NonNull final String originalWord) { - if (mValidSpellingWordWriteCache == null) { - return; - } - - final String lowerCaseWord = originalWord.toLowerCase(getCurrentLocale()); - final boolean lowerCaseValid = isValidSpellingWord(lowerCaseWord); - mValidSpellingWordWriteCache.put(lowerCaseWord, lowerCaseValid); - - final String capitalWord = - StringUtils.capitalizeFirstAndDowncaseRest(originalWord, getCurrentLocale()); - final boolean capitalValid; - if (lowerCaseValid) { - // The lower case form of the word is valid, so the upper case must be valid. - capitalValid = true; - } else { - capitalValid = isValidSpellingWord(capitalWord); - } - mValidSpellingWordWriteCache.put(capitalWord, capitalValid); - } - - private void addWordToUserHistory(final DictionaryGroup dictionaryGroup, - final NgramContext ngramContext, final String word, final boolean wasAutoCapitalized, - final int timeStampInSeconds, final boolean blockPotentiallyOffensive) { - final ExpandableBinaryDictionary userHistoryDictionary = - dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY); - if (userHistoryDictionary == null || !hasLocale(userHistoryDictionary.mLocale)) { - return; - } - final int mainFreq = dictionaryGroup.hasDict(Dictionary.TYPE_MAIN, null) - ? dictionaryGroup.getDict(Dictionary.TYPE_MAIN).getFrequency(word) - : Dictionary.NOT_A_PROBABILITY; - if (mainFreq == 0 && blockPotentiallyOffensive) { - return; - } - if (mTryChangingWords) - mTryChangingWords = ngramContext.changeWordIfAfterBeginningOfSentence(mChangeFrom, mChangeTo); - final String secondWord; - // check for isBeginningOfSentenceContext too, because not all text fields auto-capitalize in this case - // and even if the user capitalizes manually, they most likely don't want the capitalized form suggested - if (wasAutoCapitalized || ngramContext.isBeginningOfSentenceContext()) { - // used word with lower-case first letter instead of all lower-case, as auto-capitalize - // does not affect the other letters - final String decapitalizedWord = StringUtilsKt.decapitalize(word, dictionaryGroup.mLocale); - if (isValidWord(word, ALL_DICTIONARY_TYPES, dictionaryGroup) && !isValidWord(decapitalizedWord, ALL_DICTIONARY_TYPES, dictionaryGroup)) { - // If the word was auto-capitalized and exists only as a capitalized word in the - // dictionary, then we must not downcase it before registering it. For example, - // the name of the contacts in start-of-sentence position would come here with the - // wasAutoCapitalized flag: if we downcase it, we'd register a lower-case version - // of that contact's name which would end up popping in suggestions. - secondWord = word; - } else { - // If however the word is not in the dictionary, or exists as a de-capitalized word - // only, then we consider that was a lower-case word that had been auto-capitalized. - secondWord = decapitalizedWord; - mTryChangingWords = true; - mChangeFrom = word; - mChangeTo = secondWord; - } - } else { - // HACK: We'd like to avoid adding the capitalized form of common words to the User - // History dictionary in order to avoid suggesting them until the dictionary - // consolidation is done. - // TODO: Remove this hack when ready. - final String lowerCasedWord = word.toLowerCase(dictionaryGroup.mLocale); - final int lowerCaseFreqInMainDict = dictionaryGroup.hasDict(Dictionary.TYPE_MAIN, null) - ? dictionaryGroup.getDict(Dictionary.TYPE_MAIN).getFrequency(lowerCasedWord) - : Dictionary.NOT_A_PROBABILITY; - if (mainFreq < lowerCaseFreqInMainDict - && lowerCaseFreqInMainDict >= CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT) { - // Use lower cased word as the word can be a distracter of the popular word. - secondWord = lowerCasedWord; - } else { - secondWord = word; - } - } - // We demote unrecognized words (frequency < 0, below) by specifying them as "invalid". - // We don't add words with 0-frequency (assuming they would be profanity etc.). - // comment: so this means words not in main dict are always invalid... weird (but still works) - final boolean isValid = mainFreq > 0; - UserHistoryDictionary.addToDictionary(userHistoryDictionary, ngramContext, secondWord, - isValid, timeStampInSeconds); - } - - /** returns the dictionaryGroup with most confidence, first group when tied */ - private DictionaryGroup getCurrentlyPreferredDictionaryGroup() { - DictionaryGroup dictGroup = null; - int highestConfidence = -1; - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - if (dictionaryGroup.mConfidence > highestConfidence) { - dictGroup = dictionaryGroup; - highestConfidence = dictGroup.mConfidence; - } - } - return dictGroup; - } - - private DictionaryGroup getClearlyPreferredDictionaryGroupOrNull() { - // we want one clearly preferred group and return null otherwise - if (mDictionaryGroups.size() == 1) - return mDictionaryGroups.get(0); - // that preferred group should have at least MAX_CONFIDENCE, and all others should have 0 (we want to be really sure!) - int preferredGroup = -1; - for (int i = 0; i < mDictionaryGroups.size(); i ++) { - final DictionaryGroup dictionaryGroup = mDictionaryGroups.get(i); - if (dictionaryGroup.mConfidence == 0) continue; - if (dictionaryGroup.mConfidence >= DictionaryGroup.MAX_CONFIDENCE && preferredGroup == -1) { - preferredGroup = i; - continue; - } - // either we have 2 groups with high confidence, or a group with low but non-0 confidence - // in either case, we're not sure enough and return null - return null; - } - if (preferredGroup == -1) return null; - return mDictionaryGroups.get(preferredGroup); - } - - private void removeWord(final String dictName, final String word) { - final ExpandableBinaryDictionary dictionary = getCurrentlyPreferredDictionaryGroup().getSubDict(dictName); - if (dictionary != null) { - dictionary.removeUnigramEntryDynamically(word); - } - } - - @Override - public void unlearnFromUserHistory(final String word, - @NonNull final NgramContext ngramContext, final long timeStampInSeconds, - final int eventType) { - // TODO: Decide whether or not to remove the word on EVENT_BACKSPACE. - if (eventType != Constants.EVENT_BACKSPACE) { - removeWord(Dictionary.TYPE_USER_HISTORY, word); - } - - // Update the spelling cache after unlearning. Words that are removed from user history - // and appear in no other language model are not considered valid. - putWordIntoValidSpellingWordCache("unlearnFromUserHistory", word.toLowerCase()); - } - - // TODO: Revise the way to fusion suggestion results. - @Override - @SuppressWarnings("unchecked") - @NonNull public SuggestionResults getSuggestionResults(ComposedData composedData, - NgramContext ngramContext, @NonNull final Keyboard keyboard, - SettingsValuesForSuggestion settingsValuesForSuggestion, int sessionId, - int inputStyle) { - long proximityInfoHandle = keyboard.getProximityInfo().getNativeProximityInfo(); - final SuggestionResults suggestionResults = new SuggestionResults( - SuggestedWords.MAX_SUGGESTIONS, ngramContext.isBeginningOfSentenceContext(), - false /* firstSuggestionExceedsConfidenceThreshold */); - final float[] weightOfLangModelVsSpatialModel = - new float[] { Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL }; - - // start getting suggestions for non-main locales first, but in background - final ArrayList[] otherDictionarySuggestions = (ArrayList[]) new ArrayList[mDictionaryGroups.size() - 1]; - final CountDownLatch waitForOtherDictionaries; - if (mDictionaryGroups.size() > 1) { - waitForOtherDictionaries = new CountDownLatch(mDictionaryGroups.size() - 1); - for (int i = 1; i < mDictionaryGroups.size(); i ++) { - final DictionaryGroup dictionaryGroup = mDictionaryGroups.get(i); - final int index = i - 1; - ExecutorUtils.getBackgroundExecutor(ExecutorUtils.KEYBOARD).execute(() -> { - otherDictionarySuggestions[index] = getSuggestions(composedData, - ngramContext, settingsValuesForSuggestion, sessionId, proximityInfoHandle, - weightOfLangModelVsSpatialModel, dictionaryGroup); - waitForOtherDictionaries.countDown(); - }); - } - } else - waitForOtherDictionaries = null; - - // get main locale suggestions - final ArrayList dictionarySuggestions = getSuggestions(composedData, - ngramContext, settingsValuesForSuggestion, sessionId, proximityInfoHandle, - weightOfLangModelVsSpatialModel, mDictionaryGroups.get(0)); - suggestionResults.addAll(dictionarySuggestions); - if (null != suggestionResults.mRawSuggestions) { - suggestionResults.mRawSuggestions.addAll(dictionarySuggestions); - } - - // wait for other locale suggestions - if (waitForOtherDictionaries != null) { - try { waitForOtherDictionaries.await(); } - catch (InterruptedException e) { - Log.w(TAG, "Interrupted while trying to get secondary locale suggestions", e); - } - for (int i = 1; i < mDictionaryGroups.size(); i ++) { - suggestionResults.addAll(otherDictionarySuggestions[i - 1]); - if (null != suggestionResults.mRawSuggestions) { - suggestionResults.mRawSuggestions.addAll(otherDictionarySuggestions[i - 1]); - } - } - } - - return suggestionResults; - } - - private ArrayList getSuggestions(ComposedData composedData, - NgramContext ngramContext, SettingsValuesForSuggestion settingsValuesForSuggestion, - int sessionId, long proximityInfoHandle, float[] weightOfLangModelVsSpatialModel, - DictionaryGroup dictGroup) { - final ArrayList suggestions = new ArrayList<>(); - float weightForLocale = composedData.mIsBatchMode - ? dictGroup.getWeightForGesturingInLocale(mDictionaryGroups) - : dictGroup.getWeightForTypingInLocale(mDictionaryGroups); - for (final String dictType : ALL_DICTIONARY_TYPES) { - final Dictionary dictionary = dictGroup.getDict(dictType); - if (null == dictionary) continue; - final ArrayList dictionarySuggestions = - dictionary.getSuggestions(composedData, ngramContext, - proximityInfoHandle, settingsValuesForSuggestion, sessionId, - weightForLocale, weightOfLangModelVsSpatialModel); - if (null == dictionarySuggestions) continue; - - // for some reason, garbage words are produced when glide typing - // for user history and main dictionary we can filter them out by checking whether the - // dictionary actually contains the word - // but personal dictionary and addon dictionaries may contain shortcuts, which do not - // pass an isInDictionary check (e.g. emojis) - // (if the main dict contains shortcuts to non-words, this will break) - final boolean checkForGarbage = composedData.mIsBatchMode && (dictType.equals(Dictionary.TYPE_USER_HISTORY) || dictType.equals(Dictionary.TYPE_MAIN)); - for (SuggestedWordInfo info : dictionarySuggestions) { - final String word = info.getWord(); - if (!isBlacklisted(word) && !SupportedEmojis.INSTANCE.isUnsupported(word)) { // don't add blacklisted words and unsupported emojis - if (checkForGarbage - // only check history and "main main dictionary" - // consider the user might use custom main dictionary containing shortcuts - // assume this is unlikely to happen, and take care about common shortcuts that are not actual words (emoji, symbols) - && word.length() > 2 // should exclude most symbol shortcuts - && info.mSourceDict.mDictType.equals(dictType) // dictType is always main, but info.mSourceDict.mDictType contains the actual dict (main dict is a dictionary group) - && !StringUtils.mightBeEmoji(word) // emojis often have more than 2 chars; simplified check for performance reasons - && !dictionary.isInDictionary(word)) - continue; - suggestions.add(info); - } - } - } - return suggestions; - } - - // Spell checker is using this, and has its own instance of DictionaryFacilitatorImpl, - // meaning that it always has default mConfidence. So we cannot choose to only check preferred - // locale, and instead simply return true if word is in any of the available dictionaries - public boolean isValidSpellingWord(final String word) { - if (mValidSpellingWordReadCache != null) { - final Boolean cachedValue = mValidSpellingWordReadCache.get(word); - if (cachedValue != null) { - return cachedValue; - } - } - boolean result = false; - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - if (isValidWord(word, ALL_DICTIONARY_TYPES, dictionaryGroup)) { - result = true; - break; - } - } - if (mValidSpellingWordReadCache != null) - mValidSpellingWordReadCache.put(word, result); - return result; - } - - // this is unused, so leave it for now (redirecting to isValidWord seems to defeat the purpose...) - public boolean isValidSuggestionWord(final String word) { - return isValidWord(word, ALL_DICTIONARY_TYPES, mDictionaryGroups.get(0)); - } - - private boolean isValidWord(final String word, final String[] dictionariesToCheck, final DictionaryGroup dictionaryGroup) { - if (TextUtils.isEmpty(word)) { - return false; - } - if (isBlacklisted(word)) return false; - for (final String dictType : dictionariesToCheck) { - final Dictionary dictionary = dictionaryGroup.getDict(dictType); - // Ideally the passed map would come out of a {@link java.util.concurrent.Future} and - // would be immutable once it's finished initializing, but concretely a null test is - // probably good enough for the time being. - if (null == dictionary) continue; - if (dictionary.isValidWord(word)) { - return true; - } - } - return false; - } - - private boolean isBlacklisted(final String word) { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - if (dictionaryGroup.blacklist.contains(word)) - return true; - } - return false; - } - - @Override - public void removeWord(String word) { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - removeWordFromGroup(word, dictionaryGroup); - } - } - - private void removeWordFromGroup(String word, DictionaryGroup group) { - // remove from user history - final ExpandableBinaryDictionary historyDict = group.getSubDict(Dictionary.TYPE_USER_HISTORY); - if (historyDict != null) { - historyDict.removeUnigramEntryDynamically(word); - } - - // and from personal dictionary - final ExpandableBinaryDictionary userDict = group.getSubDict(Dictionary.TYPE_USER); - if (userDict != null) { - userDict.removeUnigramEntryDynamically(word); - } - - final ExpandableBinaryDictionary contactsDict = group.getSubDict(Dictionary.TYPE_CONTACTS); - if (contactsDict != null && contactsDict.isInDictionary(word)) { - contactsDict.removeUnigramEntryDynamically(word); // will be gone until next reload of dict - addToBlacklist(word, group); - return; - } - - final ExpandableBinaryDictionary appsDict = group.getSubDict(Dictionary.TYPE_APPS); - if (appsDict != null && appsDict.isInDictionary(word)) { - appsDict.removeUnigramEntryDynamically(word); // will be gone until next reload of dict - addToBlacklist(word, group); - return; - } - - if (!group.hasDict(Dictionary.TYPE_MAIN, null)) { - return; - } - - if (group.getDict(Dictionary.TYPE_MAIN).isValidWord(word)) { - addToBlacklist(word, group); - return; - } - - final String lowercase = word.toLowerCase(group.mLocale); - if (group.getDict(Dictionary.TYPE_MAIN).isValidWord(lowercase)) { - addToBlacklist(lowercase, group); - } - } - - private void addToBlacklist(final String word, final DictionaryGroup group) { - if (!group.blacklist.add(word)) - return; - ExecutorUtils.getBackgroundExecutor(ExecutorUtils.KEYBOARD).execute(() -> { - try { - FileOutputStream fos = new FileOutputStream(group.blacklistFileName, true); - fos.write((word + "\n").getBytes(StandardCharsets.UTF_8)); - fos.close(); - } catch (IOException e) { - Log.e(TAG, "Exception while trying to write blacklist", e); - } - }); - } - - private ArrayList readBlacklistFile(final String filename) { - final ArrayList blacklist = new ArrayList<>(); - if (filename == null) return blacklist; - File blacklistFile = new File(filename); - if (!blacklistFile.exists()) return blacklist; - try { - final Scanner scanner = new Scanner(blacklistFile, StandardCharsets.UTF_8.name()).useDelimiter("\n"); - while (scanner.hasNext()) { - blacklist.add(scanner.next()); - } - } catch (IOException e) { - Log.e(TAG, "Exception while reading blacklist", e); - } - return blacklist; - } - - private void removeWordFromBlacklistFile(String word, String filename) { - ExecutorUtils.getBackgroundExecutor(ExecutorUtils.KEYBOARD).execute(() -> { - try { - ArrayList blacklist = readBlacklistFile(filename); - blacklist.remove(word); - FileOutputStream fos = new FileOutputStream(filename); - for (String entry : blacklist) { - fos.write((entry + "\n").getBytes(StandardCharsets.UTF_8)); - } - fos.close(); - } catch (IOException e) { - Log.e(TAG, "Exception while trying to write blacklist" + filename, e); - } - }); - - } - - @Override - public boolean clearUserHistoryDictionary(final Context context) { - for (DictionaryGroup dictionaryGroup : mDictionaryGroups) { - final ExpandableBinaryDictionary dictionary = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY); - if (dictionary == null) { - return false; // should only ever happen for primary dictionary, so this is safe - } - dictionary.clear(); - } - return true; - } - - @Override - public String localesAndConfidences() { - if (mDictionaryGroups.size() < 2) return null; - final StringBuilder sb = new StringBuilder(); - for (final DictionaryGroup dictGroup : mDictionaryGroups) { - if (sb.length() > 0) - sb.append(", "); - sb.append(dictGroup.mLocale).append(" ").append(dictGroup.mConfidence); - } - return sb.toString(); - } - - @Override - public void dumpDictionaryForDebug(final String dictName) { - final ExpandableBinaryDictionary dictToDump = mDictionaryGroups.get(0).getSubDict(dictName); - if (dictToDump == null) { - Log.e(TAG, "Cannot dump " + dictName + ". " - + "The dictionary is not being used for suggestion or cannot be dumped."); - return; - } - dictToDump.dumpAllWordsForDebug(); - } - - @Override - // this is unused, so leave it for now - @NonNull public List getDictionaryStats(final Context context) { - final ArrayList statsOfEnabledSubDicts = new ArrayList<>(); - for (final String dictType : DYNAMIC_DICTIONARY_TYPES) { - final ExpandableBinaryDictionary dictionary = mDictionaryGroups.get(0).getSubDict(dictType); - if (dictionary == null) continue; - statsOfEnabledSubDicts.add(dictionary.getDictionaryStats()); - } - return statsOfEnabledSubDicts; - } - - @Override - public String dump(final Context context) { - return ""; - } -} diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt new file mode 100644 index 000000000..c5212c0d5 --- /dev/null +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt @@ -0,0 +1,827 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * modified + * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only + */ +package helium314.keyboard.latin + +import android.Manifest +import android.content.Context +import android.provider.UserDictionary +import android.util.LruCache +import helium314.keyboard.keyboard.Keyboard +import helium314.keyboard.keyboard.emoji.SupportedEmojis +import helium314.keyboard.latin.DictionaryFacilitator.DictionaryInitializationListener +import helium314.keyboard.latin.NgramContext.WordInfo +import helium314.keyboard.latin.SuggestedWords.SuggestedWordInfo +import helium314.keyboard.latin.common.ComposedData +import helium314.keyboard.latin.common.Constants +import helium314.keyboard.latin.common.StringUtils +import helium314.keyboard.latin.common.decapitalize +import helium314.keyboard.latin.common.splitOnWhitespace +import helium314.keyboard.latin.permissions.PermissionsUtil +import helium314.keyboard.latin.personalization.UserHistoryDictionary +import helium314.keyboard.latin.settings.Settings +import helium314.keyboard.latin.settings.SettingsValuesForSuggestion +import helium314.keyboard.latin.utils.Log +import helium314.keyboard.latin.utils.SubtypeSettings +import helium314.keyboard.latin.utils.SuggestionResults +import helium314.keyboard.latin.utils.getSecondaryLocales +import helium314.keyboard.latin.utils.locale +import helium314.keyboard.latin.utils.prefs +import kotlinx.coroutines.CoroutineScope +import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.async +import kotlinx.coroutines.awaitAll +import kotlinx.coroutines.launch +import kotlinx.coroutines.runBlocking +import java.io.File +import java.io.IOException +import java.util.Locale +import java.util.concurrent.ConcurrentHashMap +import java.util.concurrent.CountDownLatch +import java.util.concurrent.TimeUnit +import kotlin.concurrent.Volatile + +/** + * Facilitates interaction with different kinds of dictionaries. Provides APIs + * to instantiate and select the correct dictionaries (based on language or account), + * update entries and fetch suggestions. + * + * + * Currently AndroidSpellCheckerService and LatinIME both use DictionaryFacilitator as + * a client for interacting with dictionaries. + */ +class DictionaryFacilitatorImpl : DictionaryFacilitator { + private var dictionaryGroups = listOf(DictionaryGroup()) + + @Volatile + private var mLatchForWaitingLoadingMainDictionaries = CountDownLatch(0) + + // To synchronize assigning mDictionaryGroup to ensure closing dictionaries. + private val mLock = Any() + + // library does not deal well with ngram history for auto-capitalized words, so we adjust the ngram + // context to store next word suggestions for such cases + // todo: this looks awful, find a better solution / workaround + private var tryChangingWords = false + private var changeFrom = "" + private var changeTo = "" + + // todo: write cache never set, and never read (only written) + // tried to use read cache for a while, but small performance improvements are not worth the work, + // see https://github.com/Helium314/HeliBoard/issues/307 + private var mValidSpellingWordReadCache: LruCache? = null + private var mValidSpellingWordWriteCache: LruCache? = null + + private val scope = CoroutineScope(Dispatchers.Default) + + override fun setValidSpellingWordReadCache(cache: LruCache) { + mValidSpellingWordReadCache = cache + } + + override fun setValidSpellingWordWriteCache(cache: LruCache) { + mValidSpellingWordWriteCache = cache + } + + // judging by usage before adding multilingual typing, this should check primary group locale only + override fun isForLocale(locale: Locale?): Boolean { + return locale != null && locale == dictionaryGroups[0].locale + } + + override fun onStartInput() { + } + + override fun onFinishInput(context: Context) { + for (dictGroup in dictionaryGroups) { + DictionaryFacilitator.ALL_DICTIONARY_TYPES.forEach { dictGroup.getDict(it)?.onFinishInput() } + } + } + + override fun isActive(): Boolean { + return dictionaryGroups[0].locale.language.isNotEmpty() + } + + override fun getMainLocale(): Locale { + return dictionaryGroups[0].locale + } + + override fun getCurrentLocale(): Locale { + return currentlyPreferredDictionaryGroup.locale + } + + override fun getAccount(): String? { + return null + } + + override fun usesSameSettings( + locales: List, contacts: Boolean, apps: Boolean, personalization: Boolean, account: String? + ): Boolean { + val dictGroup = dictionaryGroups[0] // settings are the same for all groups + return contacts == dictGroup.hasDict(Dictionary.TYPE_CONTACTS, account) + && apps == dictGroup.hasDict(Dictionary.TYPE_APPS, account) + && personalization == dictGroup.hasDict(Dictionary.TYPE_USER_HISTORY, account) + && account == dictGroup.account + && locales.size == dictionaryGroups.size + && locales.none { findDictionaryGroupWithLocale(dictionaryGroups, it) == null } + } + + // -------------- managing (loading & closing) dictionaries ------------ + + override fun resetDictionaries( + context: Context, + newLocale: Locale, + useContactsDict: Boolean, + useAppsDict: Boolean, + usePersonalizedDicts: Boolean, + forceReloadMainDictionary: Boolean, + account: String?, + dictNamePrefix: String, + listener: DictionaryInitializationListener? + ) { + Log.i(TAG, "resetDictionaries, force reloading main dictionary: $forceReloadMainDictionary") + + val locales = getUsedLocales(newLocale, context) + + val subDictTypesToUse = listOfNotNull( + Dictionary.TYPE_USER, + if (useAppsDict) Dictionary.TYPE_APPS else null, + if (usePersonalizedDicts) Dictionary.TYPE_USER_HISTORY else null, + if (useContactsDict && PermissionsUtil.checkAllPermissionsGranted(context, Manifest.permission.READ_CONTACTS)) + Dictionary.TYPE_CONTACTS else null + ) + + val (newDictionaryGroups, existingDictsToCleanup) = + getNewDictGroupsAndDictsToCleanup(locales, subDictTypesToUse, forceReloadMainDictionary, dictNamePrefix, context) + + // Replace Dictionaries. + val oldDictionaryGroups: List + synchronized(mLock) { + oldDictionaryGroups = dictionaryGroups + dictionaryGroups = newDictionaryGroups + if (hasAtLeastOneUninitializedMainDictionary()) { + asyncReloadUninitializedMainDictionaries(context, locales, listener) + } + } + + listener?.onUpdateMainDictionaryAvailability(hasAtLeastOneInitializedMainDictionary()) + + // Clean up old dictionaries. + existingDictsToCleanup.forEach { (locale, dictTypes) -> + val dictGroupToCleanup = findDictionaryGroupWithLocale(oldDictionaryGroups, locale) ?: return@forEach + for (dictType in dictTypes) { + dictGroupToCleanup.closeDict(dictType) + } + } + + mValidSpellingWordWriteCache?.evictAll() + mValidSpellingWordReadCache?.evictAll() + } + + /** creates dictionaryGroups for [newLocales] with given [newSubDictTypes], trying to re-use existing dictionaries. + * returns the new dictionaryGroups and unused dictionary types by locale */ + private fun getNewDictGroupsAndDictsToCleanup( + newLocales: Collection, + newSubDictTypes: Collection, + forceReload: Boolean, + dictNamePrefix: String, + context: Context + ): Pair, Map>> { + // Gather all dictionaries by locale. We may remove some from the list later. + val existingDictsToCleanup = HashMap>() + for (dictGroup in dictionaryGroups) { + existingDictsToCleanup[dictGroup.locale] = DictionaryFacilitator.ALL_DICTIONARY_TYPES + .filterTo(mutableListOf()) { dictGroup.hasDict(it, account) } + } + + // create new dictionary groups and remove dictionaries to re-use from existingDictsToCleanup + val newDictionaryGroups = mutableListOf() + for (locale in newLocales) { + // get existing dictionary group for new locale + val oldDictGroupForLocale = findDictionaryGroupWithLocale(dictionaryGroups, locale) + val dictTypesToCleanupForLocale = existingDictsToCleanup[locale] + + // create new or re-use already loaded main dict + val mainDict: Dictionary? + if (forceReload || oldDictGroupForLocale == null + || !oldDictGroupForLocale.hasDict(Dictionary.TYPE_MAIN, account) + ) { + mainDict = null // null main dicts will be loaded later in asyncReloadUninitializedMainDictionaries + } else { + mainDict = oldDictGroupForLocale.getDict(Dictionary.TYPE_MAIN) + dictTypesToCleanupForLocale?.remove(Dictionary.TYPE_MAIN) + } + + // create new or re-use already loaded sub-dicts + val subDicts: MutableMap = HashMap() + for (subDictType in newSubDictTypes) { + val subDict: ExpandableBinaryDictionary + if (forceReload || oldDictGroupForLocale == null + || !oldDictGroupForLocale.hasDict(subDictType, account) + ) { + // Create a new dictionary. + subDict = getSubDict(subDictType, context, locale, null, dictNamePrefix, account) ?: continue + } else { + // Reuse the existing dictionary. + subDict = oldDictGroupForLocale.getSubDict(subDictType) ?: continue + dictTypesToCleanupForLocale?.remove(subDictType) + } + subDicts[subDictType] = subDict + } + val newDictGroup = DictionaryGroup(locale, mainDict, account, subDicts, context) + newDictionaryGroups.add(newDictGroup) + } + return newDictionaryGroups to existingDictsToCleanup + } + + private fun asyncReloadUninitializedMainDictionaries( + context: Context, locales: Collection, listener: DictionaryInitializationListener? + ) { + val latchForWaitingLoadingMainDictionary = CountDownLatch(1) + mLatchForWaitingLoadingMainDictionaries = latchForWaitingLoadingMainDictionary + scope.launch { + val dictGroupsWithNewMainDict = locales.mapNotNull { + val dictionaryGroup = findDictionaryGroupWithLocale(dictionaryGroups, it) + if (dictionaryGroup == null) { + Log.w(TAG, "Expected a dictionary group for $it but none found") + return@mapNotNull null // This should never happen + } + if (dictionaryGroup.getDict(Dictionary.TYPE_MAIN)?.isInitialized == true) null + else dictionaryGroup to createMainDictionary(context, it) + } + synchronized(mLock) { + dictGroupsWithNewMainDict.forEach { (dictGroup, mainDict) -> + dictGroup.setMainDict(mainDict) + } + } + + listener?.onUpdateMainDictionaryAvailability(hasAtLeastOneInitializedMainDictionary()) + latchForWaitingLoadingMainDictionary.countDown() + } + } + + override fun closeDictionaries() { + val dictionaryGroupsToClose: List + synchronized(mLock) { + dictionaryGroupsToClose = dictionaryGroups + dictionaryGroups = listOf(DictionaryGroup()) + } + for (dictionaryGroup in dictionaryGroupsToClose) { + for (dictType in DictionaryFacilitator.ALL_DICTIONARY_TYPES) { + dictionaryGroup.closeDict(dictType) + } + } + } + + // The main dictionaries are loaded asynchronously. Don't cache the return value of these methods. + override fun hasAtLeastOneInitializedMainDictionary(): Boolean = + dictionaryGroups.any { it.getDict(Dictionary.TYPE_MAIN)?.isInitialized == true } + + override fun hasAtLeastOneUninitializedMainDictionary(): Boolean = + dictionaryGroups.any { it.getDict(Dictionary.TYPE_MAIN)?.isInitialized != true } + + @Throws(InterruptedException::class) + override fun waitForLoadingMainDictionaries(timeout: Long, unit: TimeUnit) { + mLatchForWaitingLoadingMainDictionaries.await(timeout, unit) + } + + // -------------- actual dictionary stuff like getting suggestions ------------ + + override fun addToUserHistory( + suggestion: String, wasAutoCapitalized: Boolean, ngramContext: NgramContext, + timeStampInSeconds: Long, blockPotentiallyOffensive: Boolean + ) { + // Update the spelling cache before learning. Words that are not yet added to user history + // and appear in no other language model are not considered valid. + putWordIntoValidSpellingWordCache("addToUserHistory", suggestion) + + val words = suggestion.splitOnWhitespace().dropLastWhile { it.isEmpty() } + + // increase / decrease confidence + if (words.size == 1) // ignore if more than a single word, which only happens with (badly working) spaceAwareGesture + adjustConfidences(suggestion, wasAutoCapitalized) + + // Add word to user dictionary if it is in no other dictionary except user history dictionary (i.e. typed again). + val sv = Settings.getValues() + if (sv.mAddToPersonalDictionary // require the opt-in + && sv.mAutoCorrectEnabled == sv.mAutoCorrectionEnabledPerUserSettings // don't add if user wants autocorrect but input field does not, see https://github.com/Helium314/HeliBoard/issues/427#issuecomment-1905438000 + && dictionaryGroups[0].hasDict(Dictionary.TYPE_USER_HISTORY, dictionaryGroups[0].account) // require personalized suggestions + && !wasAutoCapitalized // we can't be 100% sure about what the user intended to type, so better don't add it + && words.size == 1 // only single words + ) { + addToPersonalDictionaryIfInvalidButInHistory(suggestion) + } + + var ngramContextForCurrentWord = ngramContext + val preferredGroup = currentlyPreferredDictionaryGroup + for (i in words.indices) { + val currentWord = words[i] + val wasCurrentWordAutoCapitalized = (i == 0) && wasAutoCapitalized + // add to history for preferred dictionary group, to avoid mixing languages in history + addWordToUserHistory( + preferredGroup, ngramContextForCurrentWord, currentWord, + wasCurrentWordAutoCapitalized, timeStampInSeconds.toInt(), blockPotentiallyOffensive + ) + ngramContextForCurrentWord = ngramContextForCurrentWord.getNextNgramContext(WordInfo(currentWord)) + + // remove manually entered blacklisted words from blacklist for likely matching languages + dictionaryGroups.filter { it.confidence == preferredGroup.confidence }.forEach { + it.removeFromBlacklist(currentWord) + } + } + } + + private fun addWordToUserHistory( + dictionaryGroup: DictionaryGroup, ngramContext: NgramContext, word: String, wasAutoCapitalized: Boolean, + timeStampInSeconds: Int, blockPotentiallyOffensive: Boolean + ) { + val userHistoryDictionary = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY) ?: return + + val mainFreq = dictionaryGroup.getDict(Dictionary.TYPE_MAIN)?.getFrequency(word) ?: Dictionary.NOT_A_PROBABILITY + if (mainFreq == 0 && blockPotentiallyOffensive) + return + if (tryChangingWords) // todo: ew... + tryChangingWords = ngramContext.changeWordIfAfterBeginningOfSentence(changeFrom, changeTo) + + val wordToUse: String + // Check for isBeginningOfSentenceContext too, because not all text fields auto-capitalize in this case. + // Even if the user capitalizes manually, they most likely don't want the capitalized form suggested. + if (wasAutoCapitalized || ngramContext.isBeginningOfSentenceContext) { + val decapitalizedWord = word.decapitalize(dictionaryGroup.locale) // try undoing auto-capitalization + if (isValidWord(word, DictionaryFacilitator.ALL_DICTIONARY_TYPES, dictionaryGroup) + && !isValidWord(decapitalizedWord, DictionaryFacilitator.ALL_DICTIONARY_TYPES, dictionaryGroup) + ) { + // If the word was auto-capitalized and exists only as a capitalized word in the + // dictionary, then we must not downcase it before registering it. For example, + // the name of the contacts in start-of-sentence position would come here with the + // wasAutoCapitalized flag: if we downcase it, we'd register a lower-case version + // of that contact's name which would end up popping in suggestions. + wordToUse = word + } else { + // If however the word is not in the dictionary, or exists as a de-capitalized word + // only, then we consider that was a lower-case word that had been auto-capitalized. + wordToUse = decapitalizedWord + tryChangingWords = true + changeFrom = word + changeTo = wordToUse + } + } else { + // HACK: We'd like to avoid adding the capitalized form of common words to the User + // History dictionary in order to avoid suggesting them until the dictionary + // consolidation is done. + // TODO: Remove this hack when ready. + val lowerCasedWord = word.lowercase(dictionaryGroup.locale) + val lowerCaseFreqInMainDict = dictionaryGroup.getDict(Dictionary.TYPE_MAIN)?.getFrequency(lowerCasedWord) + ?: Dictionary.NOT_A_PROBABILITY + wordToUse = if (mainFreq < lowerCaseFreqInMainDict + && lowerCaseFreqInMainDict >= CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT + ) { + // Use lower cased word as the word can be a distracter of the popular word. + lowerCasedWord + } else { + word + } + } + // We demote unrecognized words (frequency <= 0) by specifying them as "invalid". + // We don't add words with 0-frequency (assuming they would be profanity etc.). + val isValid = mainFreq > 0 + UserHistoryDictionary.addToDictionary(userHistoryDictionary, ngramContext, wordToUse, isValid, timeStampInSeconds) + } + + private fun addToPersonalDictionaryIfInvalidButInHistory(word: String) { + val dictionaryGroup = clearlyPreferredDictionaryGroup ?: return + val userDict = dictionaryGroup.getSubDict(Dictionary.TYPE_USER) ?: return + val userHistoryDict = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY) ?: return + if (isValidWord(word, DictionaryFacilitator.ALL_DICTIONARY_TYPES, dictionaryGroup)) + return // valid word, no reason to auto-add it to personal dict + if (userDict.isInDictionary(word)) + return // should never happen, but better be safe + + // User history always reports words as invalid, so we check the frequency instead. + // Testing shows that after 2 times adding, the frequency is 111, and then rises slowly with usage (values vary slightly). + // 120 is after 3 uses of the word, so we simply require more than that. todo: Could be made configurable. + // Words added to dictionaries (user and history) seem to be found only after some delay. + // This is not too bad, but it delays adding in case a user wants to fill a dictionary using this functionality + if (userHistoryDict.getFrequency(word) > 120) { + scope.launch { + UserDictionary.Words.addWord(userDict.mContext, word, 250, null, dictionaryGroup.locale) + } + } + } + + private fun putWordIntoValidSpellingWordCache(caller: String, originalWord: String) { + if (mValidSpellingWordWriteCache == null) + return + + val lowerCaseWord = originalWord.lowercase(currentLocale) + val lowerCaseValid = isValidSpellingWord(lowerCaseWord) + mValidSpellingWordWriteCache?.put(lowerCaseWord, lowerCaseValid) + + val capitalWord = StringUtils.capitalizeFirstAndDowncaseRest(originalWord, currentLocale) + val capitalValid = if (lowerCaseValid) { + true // The lower case form of the word is valid, so the upper case must be valid. + } else { + isValidSpellingWord(capitalWord) + } + mValidSpellingWordWriteCache?.put(capitalWord, capitalValid) + } + + override fun adjustConfidences(word: String, wasAutoCapitalized: Boolean) { + if (dictionaryGroups.size == 1 || word.contains(Constants.WORD_SEPARATOR)) + return + + // if suggestion was auto-capitalized, check against both the suggestion and the de-capitalized suggestion + val decapitalizedSuggestion = if (wasAutoCapitalized) word.decapitalize(currentLocale) else word + dictionaryGroups.forEach { + if (isValidWord(word, DictionaryFacilitator.ALL_DICTIONARY_TYPES, it)) { + it.increaseConfidence() + return@forEach + } + // also increase confidence if suggestion was auto-capitalized and the lowercase variant it valid + if (wasAutoCapitalized && isValidWord(decapitalizedSuggestion, DictionaryFacilitator.ALL_DICTIONARY_TYPES, it)) + it.increaseConfidence() + else it.decreaseConfidence() + } + } + + /** the dictionaryGroup with most confidence, first group when tied */ + private val currentlyPreferredDictionaryGroup: DictionaryGroup get() = dictionaryGroups.maxBy { it.confidence } + + /** the only dictionary group, or the dictionaryGroup confidence >= DictionaryGroup.MAX_CONFIDENCE if all others have 0 */ + private val clearlyPreferredDictionaryGroup: DictionaryGroup? get() { + if (dictionaryGroups.size == 1) return dictionaryGroups.first() // confidence not used if we only have a single group + + val preferred = currentlyPreferredDictionaryGroup + if (preferred.confidence < DictionaryGroup.MAX_CONFIDENCE) return null + if (dictionaryGroups.any { it.confidence > 0 && it !== preferred }) + return null + return preferred + } + + override fun unlearnFromUserHistory(word: String, ngramContext: NgramContext, timeStampInSeconds: Long, eventType: Int) { + // TODO: Decide whether or not to remove the word on EVENT_BACKSPACE. + if (eventType != Constants.EVENT_BACKSPACE) { + currentlyPreferredDictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY)?.removeUnigramEntryDynamically(word) + } + + // Update the spelling cache after unlearning. Words that are removed from user history + // and appear in no other language model are not considered valid. + putWordIntoValidSpellingWordCache("unlearnFromUserHistory", word.lowercase(Locale.getDefault())) + } + + // TODO: Revise the way to fusion suggestion results. + override fun getSuggestionResults( + composedData: ComposedData, ngramContext: NgramContext, keyboard: Keyboard, + settingsValuesForSuggestion: SettingsValuesForSuggestion, sessionId: Int, inputStyle: Int + ): SuggestionResults { + val proximityInfoHandle = keyboard.proximityInfo.nativeProximityInfo + val weightOfLangModelVsSpatialModel = floatArrayOf(Dictionary.NOT_A_WEIGHT_OF_LANG_MODEL_VS_SPATIAL_MODEL) + + val deferredSuggestions = dictionaryGroups.map { + scope.async { + // todo: if the order does not matter, we could add the suggestions right away without awaitAll first + getSuggestions(composedData, ngramContext, settingsValuesForSuggestion, sessionId, + proximityInfoHandle, weightOfLangModelVsSpatialModel, it) + } + } + val suggestionResults = SuggestionResults( + SuggestedWords.MAX_SUGGESTIONS, ngramContext.isBeginningOfSentenceContext, + false + ) + runBlocking { deferredSuggestions.awaitAll() }.forEach { + suggestionResults.addAll(it) + suggestionResults.mRawSuggestions?.addAll(it) + } + + return suggestionResults + } + + private fun getSuggestions( + composedData: ComposedData, ngramContext: NgramContext, + settingsValuesForSuggestion: SettingsValuesForSuggestion, sessionId: Int, + proximityInfoHandle: Long, weightOfLangModelVsSpatialModel: FloatArray, dictGroup: DictionaryGroup + ): List { + val suggestions = ArrayList() + val weightForLocale = dictGroup.getWeightForLocale(dictionaryGroups, composedData.mIsBatchMode) + for (dictType in DictionaryFacilitator.ALL_DICTIONARY_TYPES) { + val dictionary = dictGroup.getDict(dictType) ?: continue + val dictionarySuggestions = dictionary.getSuggestions(composedData, ngramContext, proximityInfoHandle, + settingsValuesForSuggestion, sessionId, weightForLocale, weightOfLangModelVsSpatialModel + ) ?: continue + + // For some reason "garbage" words are produced when glide typing. For user history + // and main dictionaries we can filter them out by checking whether the dictionary + // actually contains the word. But personal and addon dictionaries may contain shortcuts, + // which do not pass an isInDictionary check (e.g. emojis). + // (if the main dict contains shortcuts to non-words, this will break!) + val checkForGarbage = composedData.mIsBatchMode && (dictType == Dictionary.TYPE_USER_HISTORY || dictType == Dictionary.TYPE_MAIN) + + for (info in dictionarySuggestions) { + val word = info.word + if (isBlacklisted(word) || SupportedEmojis.isUnsupported(word)) // don't add blacklisted words and unsupported emojis + continue + if (checkForGarbage + // consider the user might use custom main dictionary containing shortcuts + // assume this is unlikely to happen, and take care about common shortcuts that are not actual words (emoji, symbols) + && word.length > 2 // should exclude most symbol shortcuts + && info.mSourceDict.mDictType == dictType // dictType is always main, but info.mSourceDict.mDictType contains the actual dict (main dict is a dictionary group) + && !StringUtils.mightBeEmoji(word) // simplified check for performance reasons + && !dictionary.isInDictionary(word) + ) + continue + suggestions.add(info) + } + } + return suggestions + } + + // Spell checker is using this, and has its own instance of DictionaryFacilitatorImpl, + // meaning that it always has default mConfidence. So we cannot choose to only check preferred + // locale, and instead simply return true if word is in any of the available dictionaries + override fun isValidSpellingWord(word: String): Boolean { + mValidSpellingWordReadCache?.get(word)?.let { return it } + val result = dictionaryGroups.any { isValidWord(word, DictionaryFacilitator.ALL_DICTIONARY_TYPES, it) } + mValidSpellingWordReadCache?.put(word, result) + return result + } + + // this is unused, so leave it for now (redirecting to isValidWord seems to defeat the purpose...) + override fun isValidSuggestionWord(word: String): Boolean { + return isValidWord(word, DictionaryFacilitator.ALL_DICTIONARY_TYPES, dictionaryGroups[0]) + } + + // todo: move into dictionaryGroup? + private fun isValidWord(word: String, dictionariesToCheck: Array, dictionaryGroup: DictionaryGroup): Boolean { + if (word.isEmpty() || dictionaryGroup.isBlacklisted(word)) return false + return dictionariesToCheck.any { dictionaryGroup.getDict(it)?.isValidWord(word) == true } + } + + private fun isBlacklisted(word: String): Boolean = dictionaryGroups.any { it.isBlacklisted(word) } + + override fun removeWord(word: String) { + for (dictionaryGroup in dictionaryGroups) { + dictionaryGroup.removeWord(word) + } + } + + // todo: remove return value, not used + override fun clearUserHistoryDictionary(context: Context): Boolean { + for (dictionaryGroup in dictionaryGroups) { + val dictionary = dictionaryGroup.getSubDict(Dictionary.TYPE_USER_HISTORY) + ?: return false + dictionary.clear() + } + return true + } + + override fun localesAndConfidences(): String? { + if (dictionaryGroups.size < 2) return null + return dictionaryGroups.joinToString(", ") { "${it.locale} ${it.confidence}" } + } + + override fun dumpDictionaryForDebug(dictName: String) { + val dictToDump = dictionaryGroups[0].getSubDict(dictName) + if (dictToDump == null) { + Log.e(TAG, ("Cannot dump $dictName. The dictionary is not being used for suggestion or cannot be dumped.")) + return + } + dictToDump.dumpAllWordsForDebug() + } + + override fun getDictionaryStats(context: Context): List = + DictionaryFacilitator.DYNAMIC_DICTIONARY_TYPES.mapNotNull { + dictionaryGroups[0].getSubDict(it)?.dictionaryStats + } + + // todo: remove from interface? + override fun dump(context: Context) = "" + + companion object { + private val TAG = DictionaryFacilitatorImpl::class.java.simpleName + + // HACK: This threshold is being used when adding a capitalized entry in the User History dictionary. + private const val CAPITALIZED_FORM_MAX_PROBABILITY_FOR_INSERT = 140 + + private fun getSubDict(dictType: String, context: Context, locale: Locale, dictFile: File?, + dictNamePrefix: String, account: String? + ): ExpandableBinaryDictionary? { + try { + return when (dictType) { + Dictionary.TYPE_USER_HISTORY -> UserHistoryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account) + Dictionary.TYPE_USER -> UserBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account) + Dictionary.TYPE_CONTACTS -> ContactsBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account) + Dictionary.TYPE_APPS -> AppsBinaryDictionary.getDictionary(context, locale, dictFile, dictNamePrefix, account) + else -> throw IllegalArgumentException("unknown dictionary type $dictType") + } + } catch (e: SecurityException) { + Log.e(TAG, "Cannot create dictionary: $dictType", e) + } catch (e: IllegalArgumentException) { + Log.e(TAG, "Cannot create dictionary: $dictType", e) + } + return null + } + + private fun findDictionaryGroupWithLocale(dictGroups: List?, locale: Locale): DictionaryGroup? { + return dictGroups?.firstOrNull { it.locale == locale } + } + + private fun getUsedLocales(mainLocale: Locale, context: Context): Collection { + val locales = hashSetOf(mainLocale) + // adding secondary locales is a bit tricky since they depend on the subtype + // but usually this is called with the selected subtype locale + val selectedSubtype = SubtypeSettings.getSelectedSubtype(context.prefs()) + if (selectedSubtype.locale() == mainLocale) { + locales.addAll(getSecondaryLocales(selectedSubtype.extraValue)) + } else { + // probably we're called from the spell checker when using a different app as keyboard + // so best bet is adding all secondary locales for matching main locale + SubtypeSettings.getEnabledSubtypes(false).forEach { + if (it.locale() == mainLocale) + locales.addAll(getSecondaryLocales(it.extraValue)) + } + } + return locales + } + } +} + +/** A group of dictionaries that work together for a single language. */ +private class DictionaryGroup( + val locale: Locale = Locale(""), + private var mainDict: Dictionary? = null, + val account: String? = null, // todo: not used, simply remove + subDicts: Map = emptyMap(), + context: Context? = null +) { + private val subDicts: ConcurrentHashMap = ConcurrentHashMap(subDicts) + + /** Removes a word from all dictionaries in this group. If the word is in a read-only dictionary, it is blacklisted. */ + fun removeWord(word: String) { + // remove from user history + getSubDict(Dictionary.TYPE_USER_HISTORY)?.removeUnigramEntryDynamically(word) + + // and from personal dictionary + getSubDict(Dictionary.TYPE_USER)?.removeUnigramEntryDynamically(word) + + val contactsDict = getSubDict(Dictionary.TYPE_CONTACTS) + if (contactsDict != null && contactsDict.isInDictionary(word)) { + contactsDict.removeUnigramEntryDynamically(word) // will be gone until next reload of dict + addToBlacklist(word) + return + } + + val appsDict = getSubDict(Dictionary.TYPE_APPS) + if (appsDict != null && appsDict.isInDictionary(word)) { + appsDict.removeUnigramEntryDynamically(word) // will be gone until next reload of dict + addToBlacklist(word) + return + } + + val mainDict = mainDict ?: return + if (mainDict.isValidWord(word)) { + addToBlacklist(word) + return + } + + val lowercase = word.lowercase(locale) + if (getDict(Dictionary.TYPE_MAIN)!!.isValidWord(lowercase)) { + addToBlacklist(lowercase) + } + } + + // --------------- Confidence for multilingual typing ------------------- + + // Confidence that the most probable language is actually the language the user is + // typing in. For now, this is simply the number of times a word from this language + // has been committed in a row, with an exception when typing a single word not contained + // in this language. + var confidence = 1 + + // allow to go above max confidence, for better determination of currently preferred language + // when decreasing confidence or getting weight factor, limit to maximum + fun increaseConfidence() { + confidence += 1 + } + + // If confidence is above max, drop to max confidence. This does not change weights and + // allows conveniently typing single words from the other language without affecting suggestions + fun decreaseConfidence() { + if (confidence > MAX_CONFIDENCE) confidence = MAX_CONFIDENCE + else if (confidence > 0) { + confidence -= 1 + } + } + + fun getWeightForLocale(groups: List, isGesturing: Boolean) = + getWeightForLocale(groups, if (isGesturing) 0.05f else 0.15f) + + // might need some more tuning + fun getWeightForLocale(groups: List, step: Float): Float { + if (groups.size == 1) return 1f + if (confidence < 2) return 1f - step * (MAX_CONFIDENCE - confidence) + for (group in groups) { + if (group !== this && group.confidence >= confidence) return 1f - step / 2f + } + return 1f + } + + // --------------- Blacklist ------------------- + + private val scope = CoroutineScope(Dispatchers.IO) + + // words cannot be (permanently) removed from some dictionaries, so we use a blacklist for "removing" words + private val blacklistFile = context?.let { + File(it.filesDir.absolutePath + File.separator + "blacklists" + File.separator + locale.toLanguageTag() + ".txt") + .also { it.mkdirs() } + } + + private val blacklist = hashSetOf().apply { + if (blacklistFile?.exists() != true) return@apply + scope.launch { + synchronized(this) { + try { + addAll(blacklistFile.readLines()) + } catch (e: IOException) { + Log.e(TAG, "Exception while trying to read blacklist from ${blacklistFile.name}", e) + } + } + } + } + + fun isBlacklisted(word: String) = blacklist.contains(word) + + fun addToBlacklist(word: String) { + if (!blacklist.add(word) || blacklistFile == null) return + scope.launch { + synchronized(this) { + try { + blacklistFile.appendText("$word\n") + } catch (e: IOException) { + Log.e(TAG, "Exception while trying to add word \"$word\" to blacklist ${blacklistFile.name}", e) + } + } + } + } + + fun removeFromBlacklist(word: String) { + if (!blacklist.remove(word) || blacklistFile == null) return + scope.launch { + synchronized(this) { + try { + val newLines = blacklistFile.readLines().filterNot { it == word } + blacklistFile.writeText(newLines.joinToString("\n")) + } catch (e: IOException) { + Log.e(TAG, "Exception while trying to remove word \"$word\" to blacklist ${blacklistFile.name}", e) + } + } + } + } + + // --------------- Dictionary handling ------------------- + + fun setMainDict(newMainDict: Dictionary?) { + // Close old dictionary if exists. Main dictionary can be assigned multiple times. + val oldDict = mainDict + mainDict = newMainDict + if (oldDict != null && newMainDict !== oldDict) + oldDict.close() + } + + fun getDict(dictType: String): Dictionary? { + if (dictType == Dictionary.TYPE_MAIN) { + return mainDict + } + return getSubDict(dictType) + } + + fun getSubDict(dictType: String): ExpandableBinaryDictionary? { + return subDicts[dictType] + } + + fun hasDict(dictType: String, forAccount: String?): Boolean { + if (dictType == Dictionary.TYPE_MAIN) { + return mainDict != null + } + if (dictType == Dictionary.TYPE_USER_HISTORY && forAccount != account) { + // If the dictionary type is user history, & if the account doesn't match, + // return immediately. If the account matches, continue looking it up in the + // sub dictionary map. + return false + } + return subDicts.containsKey(dictType) + } + + fun closeDict(dictType: String) { + val dict = if (Dictionary.TYPE_MAIN == dictType) { + mainDict + } else { + subDicts.remove(dictType) + } + dict?.close() + } + + companion object { + private val TAG = DictionaryGroup::class.java.simpleName + const val MAX_CONFIDENCE = 2 + } +}