diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java deleted file mode 100644 index b27de7655..000000000 --- a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * modified - * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only - */ - -package helium314.keyboard.latin.utils; - -import android.content.Context; -import android.text.TextUtils; - -import androidx.annotation.NonNull; -import androidx.annotation.Nullable; - -import com.android.inputmethod.latin.utils.BinaryDictionaryUtils; - -import helium314.keyboard.latin.define.DecoderSpecificConstants; -import helium314.keyboard.latin.makedict.DictionaryHeader; -import helium314.keyboard.latin.makedict.UnsupportedFormatException; -import helium314.keyboard.latin.settings.SpacingAndPunctuations; - -import java.io.File; -import java.io.IOException; -import java.util.Locale; - -/** - * This class encapsulates the logic for the Latin-IME side of dictionary information management. - */ -public class DictionaryInfoUtils { - private static final String TAG = DictionaryInfoUtils.class.getSimpleName(); - public static final String DEFAULT_MAIN_DICT = "main"; - public static final String USER_DICTIONARY_SUFFIX = "user.dict"; - public static final String MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_"; - // 6 digits - unicode is limited to 21 bits - private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6; - public static final String ASSETS_DICTIONARY_FOLDER = "dicts"; - public static final String ID_CATEGORY_SEPARATOR = ":"; - private static final String DICTIONARY_CATEGORY_SEPARATOR_EXPRESSION = "[" + ID_CATEGORY_SEPARATOR + "_]"; - - private DictionaryInfoUtils() { - // Private constructor to forbid instantation of this helper class. - } - - /** - * Returns whether we may want to use this character as part of a file name. - *
- * This basically only accepts ascii letters and numbers, and rejects everything else. - */ - private static boolean isFileNameCharacter(int codePoint) { - if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit - if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase - if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase - return codePoint == '_' || codePoint == '-'; - } - - /** - * Escapes a string for any characters that may be suspicious for a file or directory name. - *
- * Concretely this does a sort of URL-encoding except it will encode everything that's not - * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which - * we cannot allow here) - */ - // TODO: create a unit test for this method - public static String replaceFileNameDangerousCharacters(final String name) { - // This assumes '%' is fully available as a non-separator, normal - // character in a file name. This is probably true for all file systems. - final StringBuilder sb = new StringBuilder(); - final int nameLength = name.length(); - for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) { - final int codePoint = name.codePointAt(i); - if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) { - sb.appendCodePoint(codePoint); - } else { - sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint)); - } - } - return sb.toString(); - } - - /** - * Helper method to get the top level cache directory. - */ - public static String getWordListCacheDirectory(final Context context) { - return context.getFilesDir() + File.separator + "dicts"; - } - - /** - * Reverse escaping done by {@link #replaceFileNameDangerousCharacters(String)}. - */ - @NonNull - public static String getWordListIdFromFileName(@NonNull final String fname) { - final StringBuilder sb = new StringBuilder(); - final int fnameLength = fname.length(); - for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) { - final int codePoint = fname.codePointAt(i); - if ('%' != codePoint) { - sb.appendCodePoint(codePoint); - } else { - // + 1 to pass the % sign - final int encodedCodePoint = - Integer.parseInt(fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16); - i += MAX_HEX_DIGITS_FOR_CODEPOINT; - sb.appendCodePoint(encodedCodePoint); - } - } - return sb.toString(); - } - - /** - * Helper method to the list of cache directories, one for each distinct locale. - */ - @Nullable public static File[] getCachedDirectoryList(final Context context) { - return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles(); - } - - /** - * Find out the cache directory associated with a specific locale. - */ - public static String getAndCreateCacheDirectoryForLocale(final Locale locale, final Context context) { - final String absoluteDirectoryName = getCacheDirectoryForLocale(locale, context); - final File directory = new File(absoluteDirectoryName); - if (!directory.exists()) { - if (!directory.mkdirs()) { - Log.e(TAG, "Could not create the directory for locale" + locale); - } - } - return absoluteDirectoryName; - } - - public static String getCacheDirectoryForLocale(final Locale locale, final Context context) { - final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag()); - return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName; - } - - public static File[] getCachedDictsForLocale(final Locale locale, final Context context) { - final File cachedDir = new File(getAndCreateCacheDirectoryForLocale(locale, context)); - if (!cachedDir.isDirectory()) - return new File[]{}; - return cachedDir.listFiles(); - } - - public static String getExtractedMainDictFilename() { - return DEFAULT_MAIN_DICT + ".dict"; - } - - @Nullable - public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file, - final long offset, final long length) { - try { - return BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length); - } catch (UnsupportedFormatException | IOException e) { - return null; - } - } - - @Nullable - public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) { - try { - return BinaryDictionaryUtils.getHeader(file); - } catch (UnsupportedFormatException | IOException e) { - return null; - } - } - - /** - * Returns the locale for a dictionary file name stored in assets. - *
- * Assumes file name main_[locale].dict - *
- * Returns the locale, or null if file name does not match the pattern
- */
- @Nullable public static String extractLocaleFromAssetsDictionaryFile(final String dictionaryFileName) {
- if (dictionaryFileName.startsWith(DictionaryInfoUtils.MAIN_DICT_PREFIX)
- && dictionaryFileName.endsWith(".dict")) {
- return dictionaryFileName.substring(
- DictionaryInfoUtils.MAIN_DICT_PREFIX.length(),
- dictionaryFileName.lastIndexOf('.')
- );
- }
- return null;
- }
-
- @Nullable public static String[] getAssetsDictionaryList(final Context context) {
- final String[] dictionaryList;
- try {
- dictionaryList = context.getAssets().list(ASSETS_DICTIONARY_FOLDER);
- } catch (IOException e) {
- return null;
- }
- return dictionaryList;
- }
-
- public static boolean looksValidForDictionaryInsertion(final CharSequence text,
- final SpacingAndPunctuations spacingAndPunctuations) {
- if (TextUtils.isEmpty(text)) {
- return false;
- }
- final int length = text.length();
- if (length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
- return false;
- }
- int i = 0;
- int digitCount = 0;
- while (i < length) {
- final int codePoint = Character.codePointAt(text, i);
- final int charCount = Character.charCount(codePoint);
- i += charCount;
- if (Character.isDigit(codePoint)) {
- // Count digits: see below
- digitCount += charCount;
- continue;
- }
- if (!spacingAndPunctuations.isWordCodePoint(codePoint)) {
- return false;
- }
- }
- // We reject strings entirely comprised of digits to avoid using PIN codes or credit
- // card numbers. It would come in handy for word prediction though; a good example is
- // when writing one's address where the street number is usually quite discriminative,
- // as well as the postal code.
- return digitCount < length;
- }
-}
diff --git a/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt
new file mode 100644
index 000000000..ec6322cbd
--- /dev/null
+++ b/app/src/main/java/helium314/keyboard/latin/utils/DictionaryInfoUtils.kt
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ * modified
+ * SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
+ */
+package helium314.keyboard.latin.utils
+
+import android.content.Context
+import android.text.TextUtils
+import com.android.inputmethod.latin.utils.BinaryDictionaryUtils
+import helium314.keyboard.latin.common.loopOverCodePoints
+import helium314.keyboard.latin.define.DecoderSpecificConstants
+import helium314.keyboard.latin.makedict.DictionaryHeader
+import helium314.keyboard.latin.makedict.UnsupportedFormatException
+import helium314.keyboard.latin.settings.SpacingAndPunctuations
+import java.io.File
+import java.io.IOException
+import java.util.Locale
+
+/** encapsulates the logic for the Latin-IME side of dictionary information management */
+object DictionaryInfoUtils {
+ private val TAG = DictionaryInfoUtils::class.java.simpleName
+ const val DEFAULT_MAIN_DICT = "main"
+ const val USER_DICTIONARY_SUFFIX = "user.dict"
+ const val MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_"
+ const val ASSETS_DICTIONARY_FOLDER = "dicts"
+ const val MAIN_DICT_FILE_NAME = DEFAULT_MAIN_DICT + ".dict"
+ private const val MAX_HEX_DIGITS_FOR_CODEPOINT = 6 // unicode is limited to 21 bits
+
+ /**
+ * Returns whether we may want to use this character as part of a file name.
+ * This basically only accepts ascii letters and numbers, and rejects everything else.
+ */
+ private fun isFileNameCharacter(codePoint: Int): Boolean {
+ if (codePoint in 0x30..0x39) return true // Digit
+ if (codePoint in 0x41..0x5A) return true // Uppercase
+ if (codePoint in 0x61..0x7A) return true // Lowercase
+ return codePoint == '_'.code || codePoint == '-'.code
+ }
+
+ /**
+ * Escapes a string for any characters that may be suspicious for a file or directory name.
+ *
+ * Concretely this does a sort of URL-encoding except it will encode everything that's not
+ * alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
+ * we cannot allow here)
+ */
+ private fun replaceFileNameDangerousCharacters(name: String): String {
+ // This assumes '%' is fully available as a non-separator, normal
+ // character in a file name. This is probably true for all file systems.
+ val sb = StringBuilder()
+ loopOverCodePoints(name) { codePoint, _ ->
+ if (isFileNameCharacter(codePoint)) {
+ sb.appendCodePoint(codePoint)
+ } else {
+ sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint))
+ }
+ false
+ }
+ return sb.toString()
+ }
+
+ fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts"
+
+ /** Reverse escaping done by replaceFileNameDangerousCharacters. */
+ fun getWordListIdFromFileName(fname: String): String {
+ val sb = StringBuilder()
+ val fnameLength = fname.length
+ var i = 0
+ while (i < fnameLength) {
+ val codePoint = fname.codePointAt(i)
+ if ('%'.code != codePoint) {
+ sb.appendCodePoint(codePoint)
+ } else {
+ // + 1 to pass the % sign
+ val encodedCodePoint = fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT).toInt(16)
+ i += MAX_HEX_DIGITS_FOR_CODEPOINT
+ sb.appendCodePoint(encodedCodePoint)
+ }
+ i = fname.offsetByCodePoints(i, 1)
+ }
+ return sb.toString()
+ }
+
+ /** Helper method to the list of cache directories, one for each distinct locale. */
+ fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty()
+
+ /** Find out the cache directory associated with a specific locale. */
+ fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String {
+ val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context)
+ val directory = File(absoluteDirectoryName)
+ if (!directory.exists() && !directory.mkdirs()) {
+ Log.e(TAG, "Could not create the directory for locale $locale")
+ }
+ return absoluteDirectoryName
+ }
+
+ fun getCacheDirectoryForLocale(locale: Locale, context: Context): String {
+ val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag())
+ return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName
+ }
+
+ fun getCachedDictsForLocale(locale: Locale, context: Context) =
+ File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty()
+
+ fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? {
+ return try {
+ BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length)
+ } catch (e: UnsupportedFormatException) {
+ null
+ } catch (e: IOException) {
+ null
+ }
+ }
+
+ fun getDictionaryFileHeaderOrNull(file: File): DictionaryHeader? {
+ return try {
+ BinaryDictionaryUtils.getHeader(file)
+ } catch (e: UnsupportedFormatException) {
+ null
+ } catch (e: IOException) {
+ null
+ }
+ }
+
+ /**
+ * Returns the locale for a dictionary file name stored in assets.
+ *
+ * Assumes file name main_[locale].dict
+ * Returns the locale, or null if file name does not match the pattern
+ */
+ fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? {
+ if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) {
+ return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.'))
+ }
+ return null
+ }
+
+ fun getAssetsDictionaryList(context: Context): Array