diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java deleted file mode 100644 index b27de7655..000000000 --- a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * modified - * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only - */ - -package helium314.keyboard.latin.utils; - -import android.content.Context; -import android.text.TextUtils; - -import androidx.annotation.NonNull; -import androidx.annotation.Nullable; - -import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; - -import helium314.keyboard.latin.define.DecoderSpecificConstants; -import helium314.keyboard.latin.makedict.DictionaryHeader; -import helium314.keyboard.latin.makedict.UnsupportedFormatException; -import helium314.keyboard.latin.settings.SpacingAndPunctuations; - -import java.io.File; -import java.io.IOException; -import java.util.Locale; - -/** - * This class encapsulates the logic for the Latin-IME side of dictionary information management. - */ -public class DictionaryInfoUtils { - private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); - public static final String DEFAULT_MAIN_DICT = "main"; - public static final String USER_DICTIONARY_SUFFIX = "user.dict"; - public static final String MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_"; - // 6 digits - unicode is limited to 21 bits - private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; - public static final String ASSETS_DICTIONARY_FOLDER = "dicts"; - public static final String ID_CATEGORY_SEPARATOR = ":"; - private static final String DICTIONARY_CATEGORY_SEPARATOR_EXPRESSION = "[" + ID_CATEGORY_SEPARATOR + "_]"; - - private DictionaryInfoUtils() { - // Private constructor to forbid instantation of this helper class. - } - - /** - * Returns whether we may want to use this character as part of a file name. - *

- * This basically only accepts ascii letters and numbers, and rejects everything else. - */ - private static boolean isFileNameCharacter(int codePoint) { - if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit - if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase - if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase - return codePoint == '_' || codePoint == '-'; - } - - /** - * Escapes a string for any characters that may be suspicious for a file or directory name. - *

- * Concretely this does a sort of URL-encoding except it will encode everything that's not - * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which - * we cannot allow here) - */ - // TODO: create a unit test for this method - public static String replaceFileNameDangerousCharacters(final String name) { - // This assumes '%' is fully available as a non-separator, normal - // character in a file name. This is probably true for all file systems. - final StringBuilder sb = new StringBuilder(); - final int nameLength = name.length(); - for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { - final int codePoint = name.codePointAt(i); - if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { - sb.appendCodePoint(codePoint); - } else { - sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint)); - } - } - return sb.toString(); - } - - /** - * Helper method to get the top level cache directory. - */ - public static String getWordListCacheDirectory(final Context context) { - return context.getFilesDir() + File.separator + "dicts"; - } - - /** - * Reverse escaping done by {@link #replaceFileNameDangerousCharacters(String)}. - */ - @NonNull - public static String getWordListIdFromFileName(@NonNull final String fname) { - final StringBuilder sb = new StringBuilder(); - final int fnameLength = fname.length(); - for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { - final int codePoint = fname.codePointAt(i); - if ('%' != codePoint) { - sb.appendCodePoint(codePoint); - } else { - // + 1 to pass the % sign - final int encodedCodePoint = - Integer.parseInt(fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); - i += MAX_HEX_DIGITS_FOR_CODEPOINT; - sb.appendCodePoint(encodedCodePoint); - } - } - return sb.toString(); - } - - /** - * Helper method to the list of cache directories, one for each distinct locale. - */ - @Nullable public static File[] getCachedDirectoryList(final Context context) { - return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); - } - - /** - * Find out the cache directory associated with a specific locale. - */ - public static String getAndCreateCacheDirectoryForLocale(final Locale locale, final Context context) { - final String absoluteDirectoryName = getCacheDirectoryForLocale(locale, context); - final File directory = new File(absoluteDirectoryName); - if (!directory.exists()) { - if (!directory.mkdirs()) { - Log.e(TAG, "Could not create the directory for locale" + locale); - } - } - return absoluteDirectoryName; - } - - public static String getCacheDirectoryForLocale(final Locale locale, final Context context) { - final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag()); - return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName; - } - - public static File[] getCachedDictsForLocale(final Locale locale, final Context context) { - final File cachedDir = new File(getAndCreateCacheDirectoryForLocale(locale, context)); - if (!cachedDir.isDirectory()) - return new File[]{}; - return cachedDir.listFiles(); - } - - public static String getExtractedMainDictFilename() { - return DEFAULT_MAIN_DICT + ".dict"; - } - - @Nullable - public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file, - final long offset, final long length) { - try { - return BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length); - } catch (UnsupportedFormatException | IOException e) { - return null; - } - } - - @Nullable - public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) { - try { - return BinaryDictionaryUtils.getHeader(file); - } catch (UnsupportedFormatException | IOException e) { - return null; - } - } - - /** - * Returns the locale for a dictionary file name stored in assets. - *

- * Assumes file name main_[locale].dict - *

- * Returns the locale, or null if file name does not match the pattern - */ - @Nullable public static String extractLocaleFromAssetsDictionaryFile(final String dictionaryFileName) { - if (dictionaryFileName.startsWith(DictionaryInfoUtils.MAIN_DICT_PREFIX) - && dictionaryFileName.endsWith(".dict")) { - return dictionaryFileName.substring( - DictionaryInfoUtils.MAIN_DICT_PREFIX.length(), - dictionaryFileName.lastIndexOf('.') - ); - } - return null; - } - - @Nullable public static String[] getAssetsDictionaryList(final Context context) { - final String[] dictionaryList; - try { - dictionaryList = context.getAssets().list(ASSETS_DICTIONARY_FOLDER); - } catch (IOException e) { - return null; - } - return dictionaryList; - } - - public static boolean looksValidForDictionaryInsertion(final CharSequence text, - final SpacingAndPunctuations spacingAndPunctuations) { - if (TextUtils.isEmpty(text)) { - return false; - } - final int length = text.length(); - if (length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) { - return false; - } - int i = 0; - int digitCount = 0; - while (i < length) { - final int codePoint = Character.codePointAt(text, i); - final int charCount = Character.charCount(codePoint); - i += charCount; - if (Character.isDigit(codePoint)) { - // Count digits: see below - digitCount += charCount; - continue; - } - if (!spacingAndPunctuations.isWordCodePoint(codePoint)) { - return false; - } - } - // We reject strings entirely comprised of digits to avoid using PIN codes or credit - // card numbers. It would come in handy for word prediction though; a good example is - // when writing one's address where the street number is usually quite discriminative, - // as well as the postal code. - return digitCount < length; - } -} diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt new file mode 100644 index 000000000..ec6322cbd --- /dev/null +++ b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * modified + * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only + */ +package helium314.keyboard.latin.utils + +import android.content.Context +import android.text.TextUtils +import com.android.inputmethod.latin.utils.BinaryDictionaryUtils +import helium314.keyboard.latin.common.loopOverCodePoints +import helium314.keyboard.latin.define.DecoderSpecificConstants +import helium314.keyboard.latin.makedict.DictionaryHeader +import helium314.keyboard.latin.makedict.UnsupportedFormatException +import helium314.keyboard.latin.settings.SpacingAndPunctuations +import java.io.File +import java.io.IOException +import java.util.Locale + +/** encapsulates the logic for the Latin-IME side of dictionary information management */ +object DictionaryInfoUtils { + private val TAG = DictionaryInfoUtils::class.java.simpleName + const val DEFAULT_MAIN_DICT = "main" + const val USER_DICTIONARY_SUFFIX = "user.dict" + const val MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_" + const val ASSETS_DICTIONARY_FOLDER = "dicts" + const val MAIN_DICT_FILE_NAME = DEFAULT_MAIN_DICT + ".dict" + private const val MAX_HEX_DIGITS_FOR_CODEPOINT = 6 // unicode is limited to 21 bits + + /** + * Returns whether we may want to use this character as part of a file name. + * This basically only accepts ascii letters and numbers, and rejects everything else. + */ + private fun isFileNameCharacter(codePoint: Int): Boolean { + if (codePoint in 0x30..0x39) return true // Digit + if (codePoint in 0x41..0x5A) return true // Uppercase + if (codePoint in 0x61..0x7A) return true // Lowercase + return codePoint == '_'.code || codePoint == '-'.code + } + + /** + * Escapes a string for any characters that may be suspicious for a file or directory name. + * + * Concretely this does a sort of URL-encoding except it will encode everything that's not + * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which + * we cannot allow here) + */ + private fun replaceFileNameDangerousCharacters(name: String): String { + // This assumes '%' is fully available as a non-separator, normal + // character in a file name. This is probably true for all file systems. + val sb = StringBuilder() + loopOverCodePoints(name) { codePoint, _ -> + if (isFileNameCharacter(codePoint)) { + sb.appendCodePoint(codePoint) + } else { + sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint)) + } + false + } + return sb.toString() + } + + fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts" + + /** Reverse escaping done by replaceFileNameDangerousCharacters. */ + fun getWordListIdFromFileName(fname: String): String { + val sb = StringBuilder() + val fnameLength = fname.length + var i = 0 + while (i < fnameLength) { + val codePoint = fname.codePointAt(i) + if ('%'.code != codePoint) { + sb.appendCodePoint(codePoint) + } else { + // + 1 to pass the % sign + val encodedCodePoint = fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT).toInt(16) + i += MAX_HEX_DIGITS_FOR_CODEPOINT + sb.appendCodePoint(encodedCodePoint) + } + i = fname.offsetByCodePoints(i, 1) + } + return sb.toString() + } + + /** Helper method to the list of cache directories, one for each distinct locale. */ + fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty() + + /** Find out the cache directory associated with a specific locale. */ + fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String { + val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context) + val directory = File(absoluteDirectoryName) + if (!directory.exists() && !directory.mkdirs()) { + Log.e(TAG, "Could not create the directory for locale $locale") + } + return absoluteDirectoryName + } + + fun getCacheDirectoryForLocale(locale: Locale, context: Context): String { + val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag()) + return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName + } + + fun getCachedDictsForLocale(locale: Locale, context: Context) = + File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty() + + fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? { + return try { + BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length) + } catch (e: UnsupportedFormatException) { + null + } catch (e: IOException) { + null + } + } + + fun getDictionaryFileHeaderOrNull(file: File): DictionaryHeader? { + return try { + BinaryDictionaryUtils.getHeader(file) + } catch (e: UnsupportedFormatException) { + null + } catch (e: IOException) { + null + } + } + + /** + * Returns the locale for a dictionary file name stored in assets. + * + * Assumes file name main_[locale].dict + * Returns the locale, or null if file name does not match the pattern + */ + fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? { + if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) { + return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.')) + } + return null + } + + fun getAssetsDictionaryList(context: Context): Array? = try { + context.assets.list(ASSETS_DICTIONARY_FOLDER) + } catch (e: IOException) { + null + } + + @JvmStatic + fun looksValidForDictionaryInsertion(text: CharSequence, spacingAndPunctuations: SpacingAndPunctuations): Boolean { + if (TextUtils.isEmpty(text)) { + return false + } + if (text.length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) { + return false + } + var digitCount = 0 + loopOverCodePoints(text) { codePoint, charCount -> + if (Character.isDigit(codePoint)) { + // Count digits: see below + digitCount += charCount + return@loopOverCodePoints false + } + if (!spacingAndPunctuations.isWordCodePoint(codePoint)) { + return false + } + false + } + // We reject strings entirely comprised of digits to avoid using PIN codes or credit + // card numbers. It would come in handy for word prediction though; a good example is + // when writing one's address where the street number is usually quite discriminative, + // as well as the postal code. + return digitCount < text.length + } +} diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt index 3bbb310c4..1f329bd9f 100644 --- a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt +++ b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryUtils.kt @@ -25,7 +25,7 @@ fun getDictionaryLocales(context: Context): MutableSet { val locales = HashSet() // get cached dictionaries: extracted or user-added dictionaries - DictionaryInfoUtils.getCachedDirectoryList(context)?.forEach { directory -> + DictionaryInfoUtils.getCachedDirectoryList(context).forEach { directory -> if (!directory.isDirectory) return@forEach if (!hasAnythingOtherThanExtractedMainDictionary(directory)) return@forEach val locale = DictionaryInfoUtils.getWordListIdFromFileName(directory.name).constructLocale() @@ -135,4 +135,4 @@ fun cleanUnusedMainDicts(context: Context) { } private fun hasAnythingOtherThanExtractedMainDictionary(dir: File) = - dir.listFiles()?.any { it.name != DictionaryInfoUtils.getExtractedMainDictFilename() } != false + dir.listFiles()?.any { it.name != DictionaryInfoUtils.MAIN_DICT_FILE_NAME } != false diff --git a/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt b/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt index 2bf3da39c..9ebc0ee9f 100644 --- a/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt +++ b/app/src/main/java/helium314/keyboard/settings/dialogs/NewDictionaryDialog.kt @@ -32,6 +32,7 @@ import helium314.keyboard.settings.DropDownField import helium314.keyboard.settings.WithSmallTitle import java.io.File import java.util.Locale +import androidx.compose.ui.platform.LocalConfiguration @Composable fun NewDictionaryDialog( @@ -55,7 +56,7 @@ fun NewDictionaryDialog( val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, ctx) val dictFile = File(cacheDir, header.mIdString.substringBefore(":") + "_" + DictionaryInfoUtils.USER_DICTIONARY_SUFFIX) val type = header.mIdString.substringBefore(":") - val info = header.info(ctx.resources.configuration.locale()) + val info = header.info(LocalConfiguration.current.locale()) ThreeButtonAlertDialog( onDismissRequest = { onDismissRequest(); cachedFile.delete() }, onConfirmed = { @@ -64,7 +65,7 @@ fun NewDictionaryDialog( cachedFile.renameTo(dictFile) if (type == Dictionary.TYPE_MAIN) { // replaced main dict, remove the one created from internal data - val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.getExtractedMainDictFilename()) + val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.MAIN_DICT_FILE_NAME) internalMainDictFile.delete() } val newDictBroadcast = Intent(DictionaryPackConstants.NEW_DICTIONARY_INTENT_ACTION) @@ -92,7 +93,7 @@ fun NewDictionaryDialog( ) } if (dictFile.exists()) { - val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(ctx.resources.configuration.locale()) + val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(LocalConfiguration.current.locale()) HorizontalDivider() Text( stringResource(R.string.replace_dictionary_message, type, oldInfo ?: "(no info)", info),