move DictionaryInfoUtils to Kotlin

This commit is contained in:
Helium314 2025-05-21 21:02:16 +02:00
parent e32a0c8e98
commit b1b357d6b8
4 changed files with 177 additions and 228 deletions

View file

@ -1,223 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* modified
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
*/
package helium314.keyboard.latin.utils;
import android.content.Context;
import android.text.TextUtils;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
import helium314.keyboard.latin.define.DecoderSpecificConstants;
import helium314.keyboard.latin.makedict.DictionaryHeader;
import helium314.keyboard.latin.makedict.UnsupportedFormatException;
import helium314.keyboard.latin.settings.SpacingAndPunctuations;
import java.io.File;
import java.io.IOException;
import java.util.Locale;
/**
* This class encapsulates the logic for the Latin-IME side of dictionary information management.
*/
public class DictionaryInfoUtils {
private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
public static final String DEFAULT_MAIN_DICT = "main";
public static final String USER_DICTIONARY_SUFFIX = "user.dict";
public static final String MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_";
// 6 digits - unicode is limited to 21 bits
private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
public static final String ASSETS_DICTIONARY_FOLDER = "dicts";
public static final String ID_CATEGORY_SEPARATOR = ":";
private static final String DICTIONARY_CATEGORY_SEPARATOR_EXPRESSION = "[" + ID_CATEGORY_SEPARATOR + "_]";
private DictionaryInfoUtils() {
// Private constructor to forbid instantation of this helper class.
}
/**
* Returns whether we may want to use this character as part of a file name.
* <p>
* This basically only accepts ascii letters and numbers, and rejects everything else.
*/
private static boolean isFileNameCharacter(int codePoint) {
if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
return codePoint == '_' || codePoint == '-';
}
/**
* Escapes a string for any characters that may be suspicious for a file or directory name.
* <p>
* Concretely this does a sort of URL-encoding except it will encode everything that's not
* alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
* we cannot allow here)
*/
// TODO: create a unit test for this method
public static String replaceFileNameDangerousCharacters(final String name) {
// This assumes '%' is fully available as a non-separator, normal
// character in a file name. This is probably true for all file systems.
final StringBuilder sb = new StringBuilder();
final int nameLength = name.length();
for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
final int codePoint = name.codePointAt(i);
if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
sb.appendCodePoint(codePoint);
} else {
sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint));
}
}
return sb.toString();
}
/**
* Helper method to get the top level cache directory.
*/
public static String getWordListCacheDirectory(final Context context) {
return context.getFilesDir() + File.separator + "dicts";
}
/**
* Reverse escaping done by {@link #replaceFileNameDangerousCharacters(String)}.
*/
@NonNull
public static String getWordListIdFromFileName(@NonNull final String fname) {
final StringBuilder sb = new StringBuilder();
final int fnameLength = fname.length();
for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
final int codePoint = fname.codePointAt(i);
if ('%' != codePoint) {
sb.appendCodePoint(codePoint);
} else {
// + 1 to pass the % sign
final int encodedCodePoint =
Integer.parseInt(fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
i += MAX_HEX_DIGITS_FOR_CODEPOINT;
sb.appendCodePoint(encodedCodePoint);
}
}
return sb.toString();
}
/**
* Helper method to the list of cache directories, one for each distinct locale.
*/
@Nullable public static File[] getCachedDirectoryList(final Context context) {
return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
}
/**
* Find out the cache directory associated with a specific locale.
*/
public static String getAndCreateCacheDirectoryForLocale(final Locale locale, final Context context) {
final String absoluteDirectoryName = getCacheDirectoryForLocale(locale, context);
final File directory = new File(absoluteDirectoryName);
if (!directory.exists()) {
if (!directory.mkdirs()) {
Log.e(TAG, "Could not create the directory for locale" + locale);
}
}
return absoluteDirectoryName;
}
public static String getCacheDirectoryForLocale(final Locale locale, final Context context) {
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag());
return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName;
}
public static File[] getCachedDictsForLocale(final Locale locale, final Context context) {
final File cachedDir = new File(getAndCreateCacheDirectoryForLocale(locale, context));
if (!cachedDir.isDirectory())
return new File[]{};
return cachedDir.listFiles();
}
public static String getExtractedMainDictFilename() {
return DEFAULT_MAIN_DICT + ".dict";
}
@Nullable
public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file,
final long offset, final long length) {
try {
return BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length);
} catch (UnsupportedFormatException | IOException e) {
return null;
}
}
@Nullable
public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) {
try {
return BinaryDictionaryUtils.getHeader(file);
} catch (UnsupportedFormatException | IOException e) {
return null;
}
}
/**
* Returns the locale for a dictionary file name stored in assets.
* <p>
* Assumes file name main_[locale].dict
* <p>
* Returns the locale, or null if file name does not match the pattern
*/
@Nullable public static String extractLocaleFromAssetsDictionaryFile(final String dictionaryFileName) {
if (dictionaryFileName.startsWith(DictionaryInfoUtils.MAIN_DICT_PREFIX)
&& dictionaryFileName.endsWith(".dict")) {
return dictionaryFileName.substring(
DictionaryInfoUtils.MAIN_DICT_PREFIX.length(),
dictionaryFileName.lastIndexOf('.')
);
}
return null;
}
@Nullable public static String[] getAssetsDictionaryList(final Context context) {
final String[] dictionaryList;
try {
dictionaryList = context.getAssets().list(ASSETS_DICTIONARY_FOLDER);
} catch (IOException e) {
return null;
}
return dictionaryList;
}
public static boolean looksValidForDictionaryInsertion(final CharSequence text,
final SpacingAndPunctuations spacingAndPunctuations) {
if (TextUtils.isEmpty(text)) {
return false;
}
final int length = text.length();
if (length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
return false;
}
int i = 0;
int digitCount = 0;
while (i < length) {
final int codePoint = Character.codePointAt(text, i);
final int charCount = Character.charCount(codePoint);
i += charCount;
if (Character.isDigit(codePoint)) {
// Count digits: see below
digitCount += charCount;
continue;
}
if (!spacingAndPunctuations.isWordCodePoint(codePoint)) {
return false;
}
}
// We reject strings entirely comprised of digits to avoid using PIN codes or credit
// card numbers. It would come in handy for word prediction though; a good example is
// when writing one's address where the street number is usually quite discriminative,
// as well as the postal code.
return digitCount < length;
}
}

View file

@ -0,0 +1,171 @@
/*
* Copyright (C) 2013 The Android Open Source Project
* modified
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
*/
package helium314.keyboard.latin.utils
import android.content.Context
import android.text.TextUtils
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils
import helium314.keyboard.latin.common.loopOverCodePoints
import helium314.keyboard.latin.define.DecoderSpecificConstants
import helium314.keyboard.latin.makedict.DictionaryHeader
import helium314.keyboard.latin.makedict.UnsupportedFormatException
import helium314.keyboard.latin.settings.SpacingAndPunctuations
import java.io.File
import java.io.IOException
import java.util.Locale
/** encapsulates the logic for the Latin-IME side of dictionary information management */
object DictionaryInfoUtils {
private val TAG = DictionaryInfoUtils::class.java.simpleName
const val DEFAULT_MAIN_DICT = "main"
const val USER_DICTIONARY_SUFFIX = "user.dict"
const val MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_"
const val ASSETS_DICTIONARY_FOLDER = "dicts"
const val MAIN_DICT_FILE_NAME = DEFAULT_MAIN_DICT + ".dict"
private const val MAX_HEX_DIGITS_FOR_CODEPOINT = 6 // unicode is limited to 21 bits
/**
* Returns whether we may want to use this character as part of a file name.
* This basically only accepts ascii letters and numbers, and rejects everything else.
*/
private fun isFileNameCharacter(codePoint: Int): Boolean {
if (codePoint in 0x30..0x39) return true // Digit
if (codePoint in 0x41..0x5A) return true // Uppercase
if (codePoint in 0x61..0x7A) return true // Lowercase
return codePoint == '_'.code || codePoint == '-'.code
}
/**
* Escapes a string for any characters that may be suspicious for a file or directory name.
*
* Concretely this does a sort of URL-encoding except it will encode everything that's not
* alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
* we cannot allow here)
*/
private fun replaceFileNameDangerousCharacters(name: String): String {
// This assumes '%' is fully available as a non-separator, normal
// character in a file name. This is probably true for all file systems.
val sb = StringBuilder()
loopOverCodePoints(name) { codePoint, _ ->
if (isFileNameCharacter(codePoint)) {
sb.appendCodePoint(codePoint)
} else {
sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint))
}
false
}
return sb.toString()
}
fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts"
/** Reverse escaping done by replaceFileNameDangerousCharacters. */
fun getWordListIdFromFileName(fname: String): String {
val sb = StringBuilder()
val fnameLength = fname.length
var i = 0
while (i < fnameLength) {
val codePoint = fname.codePointAt(i)
if ('%'.code != codePoint) {
sb.appendCodePoint(codePoint)
} else {
// + 1 to pass the % sign
val encodedCodePoint = fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT).toInt(16)
i += MAX_HEX_DIGITS_FOR_CODEPOINT
sb.appendCodePoint(encodedCodePoint)
}
i = fname.offsetByCodePoints(i, 1)
}
return sb.toString()
}
/** Helper method to the list of cache directories, one for each distinct locale. */
fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty()
/** Find out the cache directory associated with a specific locale. */
fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String {
val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context)
val directory = File(absoluteDirectoryName)
if (!directory.exists() && !directory.mkdirs()) {
Log.e(TAG, "Could not create the directory for locale $locale")
}
return absoluteDirectoryName
}
fun getCacheDirectoryForLocale(locale: Locale, context: Context): String {
val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag())
return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName
}
fun getCachedDictsForLocale(locale: Locale, context: Context) =
File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty()
fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? {
return try {
BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length)
} catch (e: UnsupportedFormatException) {
null
} catch (e: IOException) {
null
}
}
fun getDictionaryFileHeaderOrNull(file: File): DictionaryHeader? {
return try {
BinaryDictionaryUtils.getHeader(file)
} catch (e: UnsupportedFormatException) {
null
} catch (e: IOException) {
null
}
}
/**
* Returns the locale for a dictionary file name stored in assets.
*
* Assumes file name main_[locale].dict
* Returns the locale, or null if file name does not match the pattern
*/
fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? {
if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) {
return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.'))
}
return null
}
fun getAssetsDictionaryList(context: Context): Array<String>? = try {
context.assets.list(ASSETS_DICTIONARY_FOLDER)
} catch (e: IOException) {
null
}
@JvmStatic
fun looksValidForDictionaryInsertion(text: CharSequence, spacingAndPunctuations: SpacingAndPunctuations): Boolean {
if (TextUtils.isEmpty(text)) {
return false
}
if (text.length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
return false
}
var digitCount = 0
loopOverCodePoints(text) { codePoint, charCount ->
if (Character.isDigit(codePoint)) {
// Count digits: see below
digitCount += charCount
return@loopOverCodePoints false
}
if (!spacingAndPunctuations.isWordCodePoint(codePoint)) {
return false
}
false
}
// We reject strings entirely comprised of digits to avoid using PIN codes or credit
// card numbers. It would come in handy for word prediction though; a good example is
// when writing one's address where the street number is usually quite discriminative,
// as well as the postal code.
return digitCount < text.length
}
}

View file

@ -25,7 +25,7 @@ fun getDictionaryLocales(context: Context): MutableSet<Locale> {
val locales = HashSet<Locale>()
// get cached dictionaries: extracted or user-added dictionaries
DictionaryInfoUtils.getCachedDirectoryList(context)?.forEach { directory ->
DictionaryInfoUtils.getCachedDirectoryList(context).forEach { directory ->
if (!directory.isDirectory) return@forEach
if (!hasAnythingOtherThanExtractedMainDictionary(directory)) return@forEach
val locale = DictionaryInfoUtils.getWordListIdFromFileName(directory.name).constructLocale()
@ -135,4 +135,4 @@ fun cleanUnusedMainDicts(context: Context) {
}
private fun hasAnythingOtherThanExtractedMainDictionary(dir: File) =
dir.listFiles()?.any { it.name != DictionaryInfoUtils.getExtractedMainDictFilename() } != false
dir.listFiles()?.any { it.name != DictionaryInfoUtils.MAIN_DICT_FILE_NAME } != false

View file

@ -32,6 +32,7 @@ import helium314.keyboard.settings.DropDownField
import helium314.keyboard.settings.WithSmallTitle
import java.io.File
import java.util.Locale
import androidx.compose.ui.platform.LocalConfiguration
@Composable
fun NewDictionaryDialog(
@ -55,7 +56,7 @@ fun NewDictionaryDialog(
val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, ctx)
val dictFile = File(cacheDir, header.mIdString.substringBefore(":") + "_" + DictionaryInfoUtils.USER_DICTIONARY_SUFFIX)
val type = header.mIdString.substringBefore(":")
val info = header.info(ctx.resources.configuration.locale())
val info = header.info(LocalConfiguration.current.locale())
ThreeButtonAlertDialog(
onDismissRequest = { onDismissRequest(); cachedFile.delete() },
onConfirmed = {
@ -64,7 +65,7 @@ fun NewDictionaryDialog(
cachedFile.renameTo(dictFile)
if (type == Dictionary.TYPE_MAIN) {
// replaced main dict, remove the one created from internal data
val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.getExtractedMainDictFilename())
val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.MAIN_DICT_FILE_NAME)
internalMainDictFile.delete()
}
val newDictBroadcast = Intent(DictionaryPackConstants.NEW_DICTIONARY_INTENT_ACTION)
@ -92,7 +93,7 @@ fun NewDictionaryDialog(
)
}
if (dictFile.exists()) {
val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(ctx.resources.configuration.locale())
val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(LocalConfiguration.current.locale())
HorizontalDivider()
Text(
stringResource(R.string.replace_dictionary_message, type, oldInfo ?: "(no info)", info),