From 954a27b7c93342619a098a71b43e92025453bfde Mon Sep 17 00:00:00 2001 From: Helium314 Date: Wed, 21 May 2025 22:13:53 +0200 Subject: [PATCH] refactor creation of main dictionary --- .../main/java/helium314/keyboard/latin/App.kt | 3 +- .../latin/DictionaryFacilitatorImpl.kt | 2 +- .../keyboard/latin/DictionaryFactory.kt | 150 +++++++++--------- .../latin/utils/DictionaryInfoUtils.kt | 36 +++-- .../keyboard/latin/utils/DictionaryUtils.kt | 5 +- .../settings/dialogs/NewDictionaryDialog.kt | 2 +- .../settings/screens/DictionaryScreen.kt | 4 +- .../settings/screens/LanguageScreen.kt | 2 +- 8 files changed, 104 insertions(+), 100 deletions(-) diff --git a/app/src/main/java/helium314/keyboard/latin/App.kt b/app/src/main/java/helium314/keyboard/latin/App.kt index 2f0920399..631929dbf 100644 --- a/app/src/main/java/helium314/keyboard/latin/App.kt +++ b/app/src/main/java/helium314/keyboard/latin/App.kt @@ -85,8 +85,7 @@ fun checkVersionUpgrade(context: Context) { if (oldVersion == BuildConfig.VERSION_CODE) return // clear extracted dictionaries, in case updated version contains newer ones - DictionaryInfoUtils.getCachedDirectoryList(context)?.forEach { - if (!it.isDirectory) return@forEach + DictionaryInfoUtils.getCacheDirectories(context).forEach { val files = it.listFiles() ?: return@forEach for (file in files) { if (!file.name.endsWith(USER_DICTIONARY_SUFFIX)) diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt index 0cdf506c8..854bcb9d6 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFacilitatorImpl.kt @@ -239,7 +239,7 @@ class DictionaryFacilitatorImpl : DictionaryFacilitator { return@mapNotNull null // This should never happen } if (dictionaryGroup.getDict(Dictionary.TYPE_MAIN)?.isInitialized == true) null - else dictionaryGroup to createMainDictionary(context, it) + else dictionaryGroup to DictionaryFactory.createMainDictionaryCollection(context, it) } synchronized(this) { dictGroupsWithNewMainDict.forEach { (dictGroup, mainDict) -> diff --git a/app/src/main/java/helium314/keyboard/latin/DictionaryFactory.kt b/app/src/main/java/helium314/keyboard/latin/DictionaryFactory.kt index 4fc9c6859..17f1ca893 100644 --- a/app/src/main/java/helium314/keyboard/latin/DictionaryFactory.kt +++ b/app/src/main/java/helium314/keyboard/latin/DictionaryFactory.kt @@ -6,89 +6,87 @@ package helium314.keyboard.latin import android.content.Context -import helium314.keyboard.latin.common.FileUtils import helium314.keyboard.latin.common.LocaleUtils -import helium314.keyboard.latin.common.LocaleUtils.constructLocale import helium314.keyboard.latin.utils.DictionaryInfoUtils import helium314.keyboard.latin.utils.Log import java.io.File import java.util.LinkedList import java.util.Locale -/** - * Initializes a main dictionary collection from a dictionary pack, with explicit flags. - * - * - * This searches for a content provider providing a dictionary pack for the specified - * locale. If none is found, it falls back to the built-in dictionary - if any. - * @param context application context for reading resources - * @param locale the locale for which to create the dictionary - * @return an initialized instance of DictionaryCollection - */ -// todo: this needs updating, and then we can expose the weight for custom dictionaries (useful for addons like emoji dict) -fun createMainDictionary(context: Context, locale: Locale): DictionaryCollection { - val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, context) - val dictList = LinkedList() - // get cached dict files - val (userDicts, extractedDicts) = DictionaryInfoUtils.getCachedDictsForLocale(locale, context) - .partition { it.name.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) } - // add user dicts to list - userDicts.forEach { checkAndAddDictionaryToListIfNotExisting(it, dictList, locale) } - // add extracted dicts to list (after userDicts, to skip extracted dicts of same type) - extractedDicts.forEach { checkAndAddDictionaryToListIfNotExisting(it, dictList, locale) } - if (dictList.any { it.mDictType == Dictionary.TYPE_MAIN }) - return DictionaryCollection(Dictionary.TYPE_MAIN, locale, dictList, FloatArray(dictList.size) { 1f }) - - // no main dict found -> check assets - val assetsDicts = DictionaryInfoUtils.getAssetsDictionaryList(context) - // file name is _.dict - val dictsByType = assetsDicts?.groupBy { it.substringBefore("_") } - // for each type find the best match - dictsByType?.forEach { (dictType, dicts) -> - val bestMatch = LocaleUtils.getBestMatch(locale, dicts) { it.substringAfter("_") - .substringBefore(".").constructLocale() } ?: return@forEach - // extract dict and add extracted file - val targetFile = File(cacheDir, "$dictType.dict") - FileUtils.copyStreamToNewFile( - context.assets.open(DictionaryInfoUtils.ASSETS_DICTIONARY_FOLDER + File.separator + bestMatch), - targetFile - ) - checkAndAddDictionaryToListIfNotExisting(targetFile, dictList, locale) - } - // If the list is empty, that means we should not use any dictionary (for example, the user - // explicitly disabled the main dictionary), so the following is okay. dictList is never - // null, but if for some reason it is, DictionaryCollection handles it gracefully. - return DictionaryCollection(Dictionary.TYPE_MAIN, locale, dictList, FloatArray(dictList.size) { 1f }) -} - -/** - * add dictionary created from [file] to [dicts] - * if [file] cannot be loaded it is deleted - * if the dictionary type already exists in [dicts], the [file] is skipped - */ -private fun checkAndAddDictionaryToListIfNotExisting(file: File, dicts: MutableList, locale: Locale) { - if (!file.isFile) return - val header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(file) ?: return killDictionary(file) - val dictType = header.mIdString.split(":").first() - if (dicts.any { it.mDictType == dictType }) return - val readOnlyBinaryDictionary = ReadOnlyBinaryDictionary( - file.absolutePath, 0, file.length(), false, locale, dictType - ) - - if (readOnlyBinaryDictionary.isValidDictionary) { - if (locale.language == "ko") { - // Use KoreanDictionary for Korean locale - dicts.add(KoreanDictionary(readOnlyBinaryDictionary)) - } else { - dicts.add(readOnlyBinaryDictionary) +object DictionaryFactory { + /** + * Initializes a main dictionary collection for a locale. + * Uses all dictionaries in cache folder for locale, and adds built-in + * dictionaries of matching locales if type is not already in cache folder. + * + * @return an initialized instance of DictionaryCollection + */ + // todo: + // expose the weight so users can adjust dictionary "importance" (useful for addons like emoji dict) + // allow users to block certain dictionaries (not sure how this should work exactly) + fun createMainDictionaryCollection(context: Context, locale: Locale): DictionaryCollection { + val dictList = LinkedList() + val (extracted, nonExtracted) = getAvailableDictsForLocale(locale, context) + extracted.sortedBy { !it.name.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) }.forEach { + // we sort to have user dicts first, so they have priority over internal dicts of the same type + checkAndAddDictionaryToListNewType(it, dictList, locale) } - } else { - readOnlyBinaryDictionary.close() - killDictionary(file) + nonExtracted.forEach { filename -> + val type = filename.substringBefore(".") + if (dictList.any { it.mDictType == type }) return@forEach + val extractedFile = DictionaryInfoUtils.extractAssetsDictionary(filename, locale, context) + checkAndAddDictionaryToListNewType(extractedFile, dictList, locale) + } + return DictionaryCollection(Dictionary.TYPE_MAIN, locale, dictList, FloatArray(dictList.size) { 1f }) + } + + fun getAvailableDictsForLocale(locale: Locale, context: Context): Pair, List> { + val cachedDicts = DictionaryInfoUtils.getCachedDictsForLocale(locale, context) + + val nonExtractedDicts = mutableListOf() + DictionaryInfoUtils.getAssetsDictionaryList(context) + // file name is _.dict + ?.groupBy { it.substringBefore("_") } + ?.forEach { (dictType, dicts) -> + if (cachedDicts.any { it.name == "$dictType.dict" }) + return@forEach // dictionary is already extracted (can't be old because of cleanup on upgrade) + val bestMatch = LocaleUtils.getBestMatch(locale, dicts) { + DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(it) + } ?: return@forEach + nonExtractedDicts.add(bestMatch) + } + return cachedDicts to nonExtractedDicts + } + + /** + * add dictionary created from [file] to [dicts] + * if [file] cannot be loaded it is deleted + * if the dictionary type already exists in [dicts], the [file] is skipped + */ + private fun checkAndAddDictionaryToListNewType(file: File, dicts: MutableList, locale: Locale) { + if (!file.isFile) return + val header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(file) ?: return killDictionary(file) + val dictType = header.mIdString.split(":").first() + if (dicts.any { it.mDictType == dictType }) return + val readOnlyBinaryDictionary = ReadOnlyBinaryDictionary( + file.absolutePath, 0, file.length(), false, locale, dictType + ) + + if (readOnlyBinaryDictionary.isValidDictionary) { + if (locale.language == "ko") { + // Use KoreanDictionary for Korean locale + dicts.add(KoreanDictionary(readOnlyBinaryDictionary)) + } else { + dicts.add(readOnlyBinaryDictionary) + } + } else { + readOnlyBinaryDictionary.close() + killDictionary(file) + } + } + + private fun killDictionary(file: File) { + Log.e("DictionaryFactory", "could not load dictionary ${file.parentFile?.name}/${file.name}, deleting") + file.delete() } } - -private fun killDictionary(file: File) { - Log.e("DictionaryFactory", "could not load dictionary ${file.parentFile?.name}/${file.name}, deleting") - file.delete() -} diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt index ec6322cbd..eac4b7c02 100644 --- a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt +++ b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt @@ -8,6 +8,8 @@ package helium314.keyboard.latin.utils import android.content.Context import android.text.TextUtils import com.android.inputmethod.latin.utils.BinaryDictionaryUtils +import helium314.keyboard.latin.common.FileUtils +import helium314.keyboard.latin.common.LocaleUtils.constructLocale import helium314.keyboard.latin.common.loopOverCodePoints import helium314.keyboard.latin.define.DecoderSpecificConstants import helium314.keyboard.latin.makedict.DictionaryHeader @@ -60,6 +62,8 @@ object DictionaryInfoUtils { return sb.toString() } + // we cache the extracted dictionaries in filesDir, because actual cache might be cleared at + // any time, and we can't permanently check whether the dictionary still exists fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts" /** Reverse escaping done by replaceFileNameDangerousCharacters. */ @@ -83,11 +87,12 @@ object DictionaryInfoUtils { } /** Helper method to the list of cache directories, one for each distinct locale. */ - fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty() + fun getCacheDirectories(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty() /** Find out the cache directory associated with a specific locale. */ - fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String { - val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context) + fun getCacheDirectoryForLocale(locale: Locale, context: Context): String { + val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag()) + val absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator + relativeDirectoryName val directory = File(absoluteDirectoryName) if (!directory.exists() && !directory.mkdirs()) { Log.e(TAG, "Could not create the directory for locale $locale") @@ -95,13 +100,8 @@ object DictionaryInfoUtils { return absoluteDirectoryName } - fun getCacheDirectoryForLocale(locale: Locale, context: Context): String { - val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag()) - return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName - } - fun getCachedDictsForLocale(locale: Locale, context: Context) = - File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty() + File(getCacheDirectoryForLocale(locale, context)).listFiles().orEmpty() fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? { return try { @@ -129,11 +129,19 @@ object DictionaryInfoUtils { * Assumes file name main_[locale].dict * Returns the locale, or null if file name does not match the pattern */ - fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? { - if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) { - return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.')) - } - return null + fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): Locale { + if (dictionaryFileName.contains('_') && !dictionaryFileName.contains('.')) + throw IllegalStateException("invalid asset dictionary name $dictionaryFileName") + return dictionaryFileName.substringAfter("_").substringBefore(".").constructLocale() + } + + fun extractAssetsDictionary(dictionaryFileName: String, locale: Locale, context: Context): File { + val targetFile = File(getCacheDirectoryForLocale(locale, context), "$dictionaryFileName.dict") + FileUtils.copyStreamToNewFile( + context.assets.open(ASSETS_DICTIONARY_FOLDER + File.separator + dictionaryFileName), + targetFile + ) + return targetFile } fun getAssetsDictionaryList(context: Context): Array? = try { diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt index 1f329bd9f..e4f119d8b 100644 --- a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt +++ b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt @@ -25,7 +25,7 @@ fun getDictionaryLocales(context: Context): MutableSet { val locales = HashSet() // get cached dictionaries: extracted or user-added dictionaries - DictionaryInfoUtils.getCachedDirectoryList(context).forEach { directory -> + DictionaryInfoUtils.getCacheDirectories(context).forEach { directory -> if (!directory.isDirectory) return@forEach if (!hasAnythingOtherThanExtractedMainDictionary(directory)) return@forEach val locale = DictionaryInfoUtils.getWordListIdFromFileName(directory.name).constructLocale() @@ -35,8 +35,7 @@ fun getDictionaryLocales(context: Context): MutableSet { val assetsDictionaryList = DictionaryInfoUtils.getAssetsDictionaryList(context) if (assetsDictionaryList != null) { for (dictionary in assetsDictionaryList) { - val locale = DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(dictionary)?.constructLocale() ?: continue - locales.add(locale) + locales.add(DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(dictionary)) } } return locales diff --git a/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt b/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt index 9ebc0ee9f..4d02c0687 100644 --- a/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt +++ b/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt @@ -53,7 +53,7 @@ fun NewDictionaryDialog( val locales = SubtypeSettings.getAvailableSubtypeLocales() .filter { it.script() == dictLocale.script() || it.script() == mainLocale?.script() } .sortedWith(comparer) - val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, ctx) + val cacheDir = DictionaryInfoUtils.getCacheDirectoryForLocale(locale, ctx) val dictFile = File(cacheDir, header.mIdString.substringBefore(":") + "_" + DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) val type = header.mIdString.substringBefore(":") val info = header.info(LocalConfiguration.current.locale()) diff --git a/app/src/main/java/helium314/keyboard/settings/screens/DictionaryScreen.kt b/app/src/main/java/helium314/keyboard/settings/screens/DictionaryScreen.kt index 77d1b9b42..3ad01cb06 100644 --- a/app/src/main/java/helium314/keyboard/settings/screens/DictionaryScreen.kt +++ b/app/src/main/java/helium314/keyboard/settings/screens/DictionaryScreen.kt @@ -55,7 +55,7 @@ fun DictionaryScreen( ) { val ctx = LocalContext.current val enabledLanguages = SubtypeSettings.getEnabledSubtypes(true).map { it.locale().language } - val cachedDictFolders = DictionaryInfoUtils.getCachedDirectoryList(ctx).orEmpty().map { it.name } + val cachedDictFolders = DictionaryInfoUtils.getCacheDirectories(ctx).orEmpty().map { it.name } val comparer = compareBy({ it.language !in enabledLanguages }, { it.toLanguageTag() !in cachedDictFolders}, { it.displayName }) val dictionaryLocales = getDictionaryLocales(ctx).sortedWith(comparer).toMutableList() dictionaryLocales.add(0, Locale(SubtypeLocaleUtils.NO_LANGUAGE)) @@ -155,7 +155,7 @@ fun getUserAndInternalDictionaries(context: Context, locale: Locale): Pair { val systemLocales = SubtypeSettings.getSystemLocales() val enabledSubtypes = SubtypeSettings.getEnabledSubtypes(true) - val localesWithDictionary = DictionaryInfoUtils.getCachedDirectoryList(context)?.mapNotNull { dir -> + val localesWithDictionary = DictionaryInfoUtils.getCacheDirectories(context)?.mapNotNull { dir -> if (!dir.isDirectory) return@mapNotNull null if (dir.list()?.any { it.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) } == true)