refactor creation of main dictionary

This commit is contained in:
Helium314 2025-05-21 22:13:53 +02:00
parent b1b357d6b8
commit 954a27b7c9
8 changed files with 104 additions and 100 deletions

View file

@ -85,8 +85,7 @@ fun checkVersionUpgrade(context: Context) {
if (oldVersion == BuildConfig.VERSION_CODE)
return
// clear extracted dictionaries, in case updated version contains newer ones
DictionaryInfoUtils.getCachedDirectoryList(context)?.forEach {
if (!it.isDirectory) return@forEach
DictionaryInfoUtils.getCacheDirectories(context).forEach {
val files = it.listFiles() ?: return@forEach
for (file in files) {
if (!file.name.endsWith(USER_DICTIONARY_SUFFIX))

View file

@ -239,7 +239,7 @@ class DictionaryFacilitatorImpl : DictionaryFacilitator {
return@mapNotNull null // This should never happen
}
if (dictionaryGroup.getDict(Dictionary.TYPE_MAIN)?.isInitialized == true) null
else dictionaryGroup to createMainDictionary(context, it)
else dictionaryGroup to DictionaryFactory.createMainDictionaryCollection(context, it)
}
synchronized(this) {
dictGroupsWithNewMainDict.forEach { (dictGroup, mainDict) ->

View file

@ -6,67 +6,64 @@
package helium314.keyboard.latin
import android.content.Context
import helium314.keyboard.latin.common.FileUtils
import helium314.keyboard.latin.common.LocaleUtils
import helium314.keyboard.latin.common.LocaleUtils.constructLocale
import helium314.keyboard.latin.utils.DictionaryInfoUtils
import helium314.keyboard.latin.utils.Log
import java.io.File
import java.util.LinkedList
import java.util.Locale
object DictionaryFactory {
/**
* Initializes a main dictionary collection from a dictionary pack, with explicit flags.
* Initializes a main dictionary collection for a locale.
* Uses all dictionaries in cache folder for locale, and adds built-in
* dictionaries of matching locales if type is not already in cache folder.
*
*
* This searches for a content provider providing a dictionary pack for the specified
* locale. If none is found, it falls back to the built-in dictionary - if any.
* @param context application context for reading resources
* @param locale the locale for which to create the dictionary
* @return an initialized instance of DictionaryCollection
*/
// todo: this needs updating, and then we can expose the weight for custom dictionaries (useful for addons like emoji dict)
fun createMainDictionary(context: Context, locale: Locale): DictionaryCollection {
val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, context)
// todo:
// expose the weight so users can adjust dictionary "importance" (useful for addons like emoji dict)
// allow users to block certain dictionaries (not sure how this should work exactly)
fun createMainDictionaryCollection(context: Context, locale: Locale): DictionaryCollection {
val dictList = LinkedList<Dictionary>()
// get cached dict files
val (userDicts, extractedDicts) = DictionaryInfoUtils.getCachedDictsForLocale(locale, context)
.partition { it.name.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) }
// add user dicts to list
userDicts.forEach { checkAndAddDictionaryToListIfNotExisting(it, dictList, locale) }
// add extracted dicts to list (after userDicts, to skip extracted dicts of same type)
extractedDicts.forEach { checkAndAddDictionaryToListIfNotExisting(it, dictList, locale) }
if (dictList.any { it.mDictType == Dictionary.TYPE_MAIN })
return DictionaryCollection(Dictionary.TYPE_MAIN, locale, dictList, FloatArray(dictList.size) { 1f })
// no main dict found -> check assets
val assetsDicts = DictionaryInfoUtils.getAssetsDictionaryList(context)
// file name is <type>_<language tag>.dict
val dictsByType = assetsDicts?.groupBy { it.substringBefore("_") }
// for each type find the best match
dictsByType?.forEach { (dictType, dicts) ->
val bestMatch = LocaleUtils.getBestMatch(locale, dicts) { it.substringAfter("_")
.substringBefore(".").constructLocale() } ?: return@forEach
// extract dict and add extracted file
val targetFile = File(cacheDir, "$dictType.dict")
FileUtils.copyStreamToNewFile(
context.assets.open(DictionaryInfoUtils.ASSETS_DICTIONARY_FOLDER + File.separator + bestMatch),
targetFile
)
checkAndAddDictionaryToListIfNotExisting(targetFile, dictList, locale)
val (extracted, nonExtracted) = getAvailableDictsForLocale(locale, context)
extracted.sortedBy { !it.name.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) }.forEach {
// we sort to have user dicts first, so they have priority over internal dicts of the same type
checkAndAddDictionaryToListNewType(it, dictList, locale)
}
nonExtracted.forEach { filename ->
val type = filename.substringBefore(".")
if (dictList.any { it.mDictType == type }) return@forEach
val extractedFile = DictionaryInfoUtils.extractAssetsDictionary(filename, locale, context)
checkAndAddDictionaryToListNewType(extractedFile, dictList, locale)
}
// If the list is empty, that means we should not use any dictionary (for example, the user
// explicitly disabled the main dictionary), so the following is okay. dictList is never
// null, but if for some reason it is, DictionaryCollection handles it gracefully.
return DictionaryCollection(Dictionary.TYPE_MAIN, locale, dictList, FloatArray(dictList.size) { 1f })
}
fun getAvailableDictsForLocale(locale: Locale, context: Context): Pair<Array<out File>, List<String>> {
val cachedDicts = DictionaryInfoUtils.getCachedDictsForLocale(locale, context)
val nonExtractedDicts = mutableListOf<String>()
DictionaryInfoUtils.getAssetsDictionaryList(context)
// file name is <type>_<language tag>.dict
?.groupBy { it.substringBefore("_") }
?.forEach { (dictType, dicts) ->
if (cachedDicts.any { it.name == "$dictType.dict" })
return@forEach // dictionary is already extracted (can't be old because of cleanup on upgrade)
val bestMatch = LocaleUtils.getBestMatch(locale, dicts) {
DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(it)
} ?: return@forEach
nonExtractedDicts.add(bestMatch)
}
return cachedDicts to nonExtractedDicts
}
/**
* add dictionary created from [file] to [dicts]
* if [file] cannot be loaded it is deleted
* if the dictionary type already exists in [dicts], the [file] is skipped
*/
private fun checkAndAddDictionaryToListIfNotExisting(file: File, dicts: MutableList<Dictionary>, locale: Locale) {
private fun checkAndAddDictionaryToListNewType(file: File, dicts: MutableList<Dictionary>, locale: Locale) {
if (!file.isFile) return
val header = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(file) ?: return killDictionary(file)
val dictType = header.mIdString.split(":").first()
@ -92,3 +89,4 @@ private fun killDictionary(file: File) {
Log.e("DictionaryFactory", "could not load dictionary ${file.parentFile?.name}/${file.name}, deleting")
file.delete()
}
}

View file

@ -8,6 +8,8 @@ package helium314.keyboard.latin.utils
import android.content.Context
import android.text.TextUtils
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils
import helium314.keyboard.latin.common.FileUtils
import helium314.keyboard.latin.common.LocaleUtils.constructLocale
import helium314.keyboard.latin.common.loopOverCodePoints
import helium314.keyboard.latin.define.DecoderSpecificConstants
import helium314.keyboard.latin.makedict.DictionaryHeader
@ -60,6 +62,8 @@ object DictionaryInfoUtils {
return sb.toString()
}
// we cache the extracted dictionaries in filesDir, because actual cache might be cleared at
// any time, and we can't permanently check whether the dictionary still exists
fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts"
/** Reverse escaping done by replaceFileNameDangerousCharacters. */
@ -83,11 +87,12 @@ object DictionaryInfoUtils {
}
/** Helper method to the list of cache directories, one for each distinct locale. */
fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty()
fun getCacheDirectories(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty()
/** Find out the cache directory associated with a specific locale. */
fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String {
val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context)
fun getCacheDirectoryForLocale(locale: Locale, context: Context): String {
val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag())
val absoluteDirectoryName = getWordListCacheDirectory(context) + File.separator + relativeDirectoryName
val directory = File(absoluteDirectoryName)
if (!directory.exists() && !directory.mkdirs()) {
Log.e(TAG, "Could not create the directory for locale $locale")
@ -95,13 +100,8 @@ object DictionaryInfoUtils {
return absoluteDirectoryName
}
fun getCacheDirectoryForLocale(locale: Locale, context: Context): String {
val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag())
return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName
}
fun getCachedDictsForLocale(locale: Locale, context: Context) =
File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty()
File(getCacheDirectoryForLocale(locale, context)).listFiles().orEmpty()
fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? {
return try {
@ -129,11 +129,19 @@ object DictionaryInfoUtils {
* Assumes file name main_[locale].dict
* Returns the locale, or null if file name does not match the pattern
*/
fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? {
if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) {
return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.'))
fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): Locale {
if (dictionaryFileName.contains('_') && !dictionaryFileName.contains('.'))
throw IllegalStateException("invalid asset dictionary name $dictionaryFileName")
return dictionaryFileName.substringAfter("_").substringBefore(".").constructLocale()
}
return null
fun extractAssetsDictionary(dictionaryFileName: String, locale: Locale, context: Context): File {
val targetFile = File(getCacheDirectoryForLocale(locale, context), "$dictionaryFileName.dict")
FileUtils.copyStreamToNewFile(
context.assets.open(ASSETS_DICTIONARY_FOLDER + File.separator + dictionaryFileName),
targetFile
)
return targetFile
}
fun getAssetsDictionaryList(context: Context): Array<String>? = try {

View file

@ -25,7 +25,7 @@ fun getDictionaryLocales(context: Context): MutableSet<Locale> {
val locales = HashSet<Locale>()
// get cached dictionaries: extracted or user-added dictionaries
DictionaryInfoUtils.getCachedDirectoryList(context).forEach { directory ->
DictionaryInfoUtils.getCacheDirectories(context).forEach { directory ->
if (!directory.isDirectory) return@forEach
if (!hasAnythingOtherThanExtractedMainDictionary(directory)) return@forEach
val locale = DictionaryInfoUtils.getWordListIdFromFileName(directory.name).constructLocale()
@ -35,8 +35,7 @@ fun getDictionaryLocales(context: Context): MutableSet<Locale> {
val assetsDictionaryList = DictionaryInfoUtils.getAssetsDictionaryList(context)
if (assetsDictionaryList != null) {
for (dictionary in assetsDictionaryList) {
val locale = DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(dictionary)?.constructLocale() ?: continue
locales.add(locale)
locales.add(DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(dictionary))
}
}
return locales

View file

@ -53,7 +53,7 @@ fun NewDictionaryDialog(
val locales = SubtypeSettings.getAvailableSubtypeLocales()
.filter { it.script() == dictLocale.script() || it.script() == mainLocale?.script() }
.sortedWith(comparer)
val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, ctx)
val cacheDir = DictionaryInfoUtils.getCacheDirectoryForLocale(locale, ctx)
val dictFile = File(cacheDir, header.mIdString.substringBefore(":") + "_" + DictionaryInfoUtils.USER_DICTIONARY_SUFFIX)
val type = header.mIdString.substringBefore(":")
val info = header.info(LocalConfiguration.current.locale())

View file

@ -55,7 +55,7 @@ fun DictionaryScreen(
) {
val ctx = LocalContext.current
val enabledLanguages = SubtypeSettings.getEnabledSubtypes(true).map { it.locale().language }
val cachedDictFolders = DictionaryInfoUtils.getCachedDirectoryList(ctx).orEmpty().map { it.name }
val cachedDictFolders = DictionaryInfoUtils.getCacheDirectories(ctx).orEmpty().map { it.name }
val comparer = compareBy<Locale>({ it.language !in enabledLanguages }, { it.toLanguageTag() !in cachedDictFolders}, { it.displayName })
val dictionaryLocales = getDictionaryLocales(ctx).sortedWith(comparer).toMutableList()
dictionaryLocales.add(0, Locale(SubtypeLocaleUtils.NO_LANGUAGE))
@ -155,7 +155,7 @@ fun getUserAndInternalDictionaries(context: Context, locale: Locale): Pair<List<
return userDicts to true
val internalDicts = DictionaryInfoUtils.getAssetsDictionaryList(context) ?: return userDicts to false
val best = LocaleUtils.getBestMatch(locale, internalDicts.toList()) {
DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(it)?.constructLocale() ?: SubtypeLocaleUtils.NO_LANGUAGE.constructLocale()
DictionaryInfoUtils.extractLocaleFromAssetsDictionaryFile(it)
}
return userDicts to (best != null)
}

View file

@ -129,7 +129,7 @@ private fun dictsAvailable(locale: Locale, context: Context): Boolean {
private fun getSortedSubtypes(context: Context): List<InputMethodSubtype> {
val systemLocales = SubtypeSettings.getSystemLocales()
val enabledSubtypes = SubtypeSettings.getEnabledSubtypes(true)
val localesWithDictionary = DictionaryInfoUtils.getCachedDirectoryList(context)?.mapNotNull { dir ->
val localesWithDictionary = DictionaryInfoUtils.getCacheDirectories(context)?.mapNotNull { dir ->
if (!dir.isDirectory)
return@mapNotNull null
if (dir.list()?.any { it.endsWith(DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) } == true)