mirror of
https://github.com/Helium314/HeliBoard.git
synced 2025-06-09 08:07:42 +00:00
move DictionaryInfoUtils to Kotlin
This commit is contained in:
parent
e32a0c8e98
commit
b1b357d6b8
4 changed files with 177 additions and 228 deletions
|
@ -1,223 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
* modified
|
||||
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
|
||||
*/
|
||||
|
||||
package helium314.keyboard.latin.utils;
|
||||
|
||||
import android.content.Context;
|
||||
import android.text.TextUtils;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils;
|
||||
|
||||
import helium314.keyboard.latin.define.DecoderSpecificConstants;
|
||||
import helium314.keyboard.latin.makedict.DictionaryHeader;
|
||||
import helium314.keyboard.latin.makedict.UnsupportedFormatException;
|
||||
import helium314.keyboard.latin.settings.SpacingAndPunctuations;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* This class encapsulates the logic for the Latin-IME side of dictionary information management.
|
||||
*/
|
||||
public class DictionaryInfoUtils {
|
||||
private static final String TAG = DictionaryInfoUtils.class.getSimpleName();
|
||||
public static final String DEFAULT_MAIN_DICT = "main";
|
||||
public static final String USER_DICTIONARY_SUFFIX = "user.dict";
|
||||
public static final String MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_";
|
||||
// 6 digits - unicode is limited to 21 bits
|
||||
private static final int MAX_HEX_DIGITS_FOR_CODEPOINT = 6;
|
||||
public static final String ASSETS_DICTIONARY_FOLDER = "dicts";
|
||||
public static final String ID_CATEGORY_SEPARATOR = ":";
|
||||
private static final String DICTIONARY_CATEGORY_SEPARATOR_EXPRESSION = "[" + ID_CATEGORY_SEPARATOR + "_]";
|
||||
|
||||
private DictionaryInfoUtils() {
|
||||
// Private constructor to forbid instantation of this helper class.
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether we may want to use this character as part of a file name.
|
||||
* <p>
|
||||
* This basically only accepts ascii letters and numbers, and rejects everything else.
|
||||
*/
|
||||
private static boolean isFileNameCharacter(int codePoint) {
|
||||
if (codePoint >= 0x30 && codePoint <= 0x39) return true; // Digit
|
||||
if (codePoint >= 0x41 && codePoint <= 0x5A) return true; // Uppercase
|
||||
if (codePoint >= 0x61 && codePoint <= 0x7A) return true; // Lowercase
|
||||
return codePoint == '_' || codePoint == '-';
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes a string for any characters that may be suspicious for a file or directory name.
|
||||
* <p>
|
||||
* Concretely this does a sort of URL-encoding except it will encode everything that's not
|
||||
* alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
|
||||
* we cannot allow here)
|
||||
*/
|
||||
// TODO: create a unit test for this method
|
||||
public static String replaceFileNameDangerousCharacters(final String name) {
|
||||
// This assumes '%' is fully available as a non-separator, normal
|
||||
// character in a file name. This is probably true for all file systems.
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final int nameLength = name.length();
|
||||
for (int i = 0; i < nameLength; i = name.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = name.codePointAt(i);
|
||||
if (DictionaryInfoUtils.isFileNameCharacter(codePoint)) {
|
||||
sb.appendCodePoint(codePoint);
|
||||
} else {
|
||||
sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint));
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to get the top level cache directory.
|
||||
*/
|
||||
public static String getWordListCacheDirectory(final Context context) {
|
||||
return context.getFilesDir() + File.separator + "dicts";
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse escaping done by {@link #replaceFileNameDangerousCharacters(String)}.
|
||||
*/
|
||||
@NonNull
|
||||
public static String getWordListIdFromFileName(@NonNull final String fname) {
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
final int fnameLength = fname.length();
|
||||
for (int i = 0; i < fnameLength; i = fname.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = fname.codePointAt(i);
|
||||
if ('%' != codePoint) {
|
||||
sb.appendCodePoint(codePoint);
|
||||
} else {
|
||||
// + 1 to pass the % sign
|
||||
final int encodedCodePoint =
|
||||
Integer.parseInt(fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT), 16);
|
||||
i += MAX_HEX_DIGITS_FOR_CODEPOINT;
|
||||
sb.appendCodePoint(encodedCodePoint);
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to the list of cache directories, one for each distinct locale.
|
||||
*/
|
||||
@Nullable public static File[] getCachedDirectoryList(final Context context) {
|
||||
return new File(DictionaryInfoUtils.getWordListCacheDirectory(context)).listFiles();
|
||||
}
|
||||
|
||||
/**
|
||||
* Find out the cache directory associated with a specific locale.
|
||||
*/
|
||||
public static String getAndCreateCacheDirectoryForLocale(final Locale locale, final Context context) {
|
||||
final String absoluteDirectoryName = getCacheDirectoryForLocale(locale, context);
|
||||
final File directory = new File(absoluteDirectoryName);
|
||||
if (!directory.exists()) {
|
||||
if (!directory.mkdirs()) {
|
||||
Log.e(TAG, "Could not create the directory for locale" + locale);
|
||||
}
|
||||
}
|
||||
return absoluteDirectoryName;
|
||||
}
|
||||
|
||||
public static String getCacheDirectoryForLocale(final Locale locale, final Context context) {
|
||||
final String relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag());
|
||||
return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName;
|
||||
}
|
||||
|
||||
public static File[] getCachedDictsForLocale(final Locale locale, final Context context) {
|
||||
final File cachedDir = new File(getAndCreateCacheDirectoryForLocale(locale, context));
|
||||
if (!cachedDir.isDirectory())
|
||||
return new File[]{};
|
||||
return cachedDir.listFiles();
|
||||
}
|
||||
|
||||
public static String getExtractedMainDictFilename() {
|
||||
return DEFAULT_MAIN_DICT + ".dict";
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file,
|
||||
final long offset, final long length) {
|
||||
try {
|
||||
return BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length);
|
||||
} catch (UnsupportedFormatException | IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static DictionaryHeader getDictionaryFileHeaderOrNull(final File file) {
|
||||
try {
|
||||
return BinaryDictionaryUtils.getHeader(file);
|
||||
} catch (UnsupportedFormatException | IOException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the locale for a dictionary file name stored in assets.
|
||||
* <p>
|
||||
* Assumes file name main_[locale].dict
|
||||
* <p>
|
||||
* Returns the locale, or null if file name does not match the pattern
|
||||
*/
|
||||
@Nullable public static String extractLocaleFromAssetsDictionaryFile(final String dictionaryFileName) {
|
||||
if (dictionaryFileName.startsWith(DictionaryInfoUtils.MAIN_DICT_PREFIX)
|
||||
&& dictionaryFileName.endsWith(".dict")) {
|
||||
return dictionaryFileName.substring(
|
||||
DictionaryInfoUtils.MAIN_DICT_PREFIX.length(),
|
||||
dictionaryFileName.lastIndexOf('.')
|
||||
);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Nullable public static String[] getAssetsDictionaryList(final Context context) {
|
||||
final String[] dictionaryList;
|
||||
try {
|
||||
dictionaryList = context.getAssets().list(ASSETS_DICTIONARY_FOLDER);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
return dictionaryList;
|
||||
}
|
||||
|
||||
public static boolean looksValidForDictionaryInsertion(final CharSequence text,
|
||||
final SpacingAndPunctuations spacingAndPunctuations) {
|
||||
if (TextUtils.isEmpty(text)) {
|
||||
return false;
|
||||
}
|
||||
final int length = text.length();
|
||||
if (length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
return false;
|
||||
}
|
||||
int i = 0;
|
||||
int digitCount = 0;
|
||||
while (i < length) {
|
||||
final int codePoint = Character.codePointAt(text, i);
|
||||
final int charCount = Character.charCount(codePoint);
|
||||
i += charCount;
|
||||
if (Character.isDigit(codePoint)) {
|
||||
// Count digits: see below
|
||||
digitCount += charCount;
|
||||
continue;
|
||||
}
|
||||
if (!spacingAndPunctuations.isWordCodePoint(codePoint)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// We reject strings entirely comprised of digits to avoid using PIN codes or credit
|
||||
// card numbers. It would come in handy for word prediction though; a good example is
|
||||
// when writing one's address where the street number is usually quite discriminative,
|
||||
// as well as the postal code.
|
||||
return digitCount < length;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
* modified
|
||||
* SPDX-License-Identifier: Apache-2.0 AND GPL-3.0-only
|
||||
*/
|
||||
package helium314.keyboard.latin.utils
|
||||
|
||||
import android.content.Context
|
||||
import android.text.TextUtils
|
||||
import com.android.inputmethod.latin.utils.BinaryDictionaryUtils
|
||||
import helium314.keyboard.latin.common.loopOverCodePoints
|
||||
import helium314.keyboard.latin.define.DecoderSpecificConstants
|
||||
import helium314.keyboard.latin.makedict.DictionaryHeader
|
||||
import helium314.keyboard.latin.makedict.UnsupportedFormatException
|
||||
import helium314.keyboard.latin.settings.SpacingAndPunctuations
|
||||
import java.io.File
|
||||
import java.io.IOException
|
||||
import java.util.Locale
|
||||
|
||||
/** encapsulates the logic for the Latin-IME side of dictionary information management */
|
||||
object DictionaryInfoUtils {
|
||||
private val TAG = DictionaryInfoUtils::class.java.simpleName
|
||||
const val DEFAULT_MAIN_DICT = "main"
|
||||
const val USER_DICTIONARY_SUFFIX = "user.dict"
|
||||
const val MAIN_DICT_PREFIX = DEFAULT_MAIN_DICT + "_"
|
||||
const val ASSETS_DICTIONARY_FOLDER = "dicts"
|
||||
const val MAIN_DICT_FILE_NAME = DEFAULT_MAIN_DICT + ".dict"
|
||||
private const val MAX_HEX_DIGITS_FOR_CODEPOINT = 6 // unicode is limited to 21 bits
|
||||
|
||||
/**
|
||||
* Returns whether we may want to use this character as part of a file name.
|
||||
* This basically only accepts ascii letters and numbers, and rejects everything else.
|
||||
*/
|
||||
private fun isFileNameCharacter(codePoint: Int): Boolean {
|
||||
if (codePoint in 0x30..0x39) return true // Digit
|
||||
if (codePoint in 0x41..0x5A) return true // Uppercase
|
||||
if (codePoint in 0x61..0x7A) return true // Lowercase
|
||||
return codePoint == '_'.code || codePoint == '-'.code
|
||||
}
|
||||
|
||||
/**
|
||||
* Escapes a string for any characters that may be suspicious for a file or directory name.
|
||||
*
|
||||
* Concretely this does a sort of URL-encoding except it will encode everything that's not
|
||||
* alphanumeric or underscore. (true URL-encoding leaves alone characters like '*', which
|
||||
* we cannot allow here)
|
||||
*/
|
||||
private fun replaceFileNameDangerousCharacters(name: String): String {
|
||||
// This assumes '%' is fully available as a non-separator, normal
|
||||
// character in a file name. This is probably true for all file systems.
|
||||
val sb = StringBuilder()
|
||||
loopOverCodePoints(name) { codePoint, _ ->
|
||||
if (isFileNameCharacter(codePoint)) {
|
||||
sb.appendCodePoint(codePoint)
|
||||
} else {
|
||||
sb.append(String.format(Locale.US, "%%%1$0" + MAX_HEX_DIGITS_FOR_CODEPOINT + "x", codePoint))
|
||||
}
|
||||
false
|
||||
}
|
||||
return sb.toString()
|
||||
}
|
||||
|
||||
fun getWordListCacheDirectory(context: Context): String = context.filesDir.toString() + File.separator + "dicts"
|
||||
|
||||
/** Reverse escaping done by replaceFileNameDangerousCharacters. */
|
||||
fun getWordListIdFromFileName(fname: String): String {
|
||||
val sb = StringBuilder()
|
||||
val fnameLength = fname.length
|
||||
var i = 0
|
||||
while (i < fnameLength) {
|
||||
val codePoint = fname.codePointAt(i)
|
||||
if ('%'.code != codePoint) {
|
||||
sb.appendCodePoint(codePoint)
|
||||
} else {
|
||||
// + 1 to pass the % sign
|
||||
val encodedCodePoint = fname.substring(i + 1, i + 1 + MAX_HEX_DIGITS_FOR_CODEPOINT).toInt(16)
|
||||
i += MAX_HEX_DIGITS_FOR_CODEPOINT
|
||||
sb.appendCodePoint(encodedCodePoint)
|
||||
}
|
||||
i = fname.offsetByCodePoints(i, 1)
|
||||
}
|
||||
return sb.toString()
|
||||
}
|
||||
|
||||
/** Helper method to the list of cache directories, one for each distinct locale. */
|
||||
fun getCachedDirectoryList(context: Context) = File(getWordListCacheDirectory(context)).listFiles().orEmpty()
|
||||
|
||||
/** Find out the cache directory associated with a specific locale. */
|
||||
fun getAndCreateCacheDirectoryForLocale(locale: Locale, context: Context): String {
|
||||
val absoluteDirectoryName = getCacheDirectoryForLocale(locale, context)
|
||||
val directory = File(absoluteDirectoryName)
|
||||
if (!directory.exists() && !directory.mkdirs()) {
|
||||
Log.e(TAG, "Could not create the directory for locale $locale")
|
||||
}
|
||||
return absoluteDirectoryName
|
||||
}
|
||||
|
||||
fun getCacheDirectoryForLocale(locale: Locale, context: Context): String {
|
||||
val relativeDirectoryName = replaceFileNameDangerousCharacters(locale.toLanguageTag())
|
||||
return getWordListCacheDirectory(context) + File.separator + relativeDirectoryName
|
||||
}
|
||||
|
||||
fun getCachedDictsForLocale(locale: Locale, context: Context) =
|
||||
File(getAndCreateCacheDirectoryForLocale(locale, context)).listFiles().orEmpty()
|
||||
|
||||
fun getDictionaryFileHeaderOrNull(file: File, offset: Long, length: Long): DictionaryHeader? {
|
||||
return try {
|
||||
BinaryDictionaryUtils.getHeaderWithOffsetAndLength(file, offset, length)
|
||||
} catch (e: UnsupportedFormatException) {
|
||||
null
|
||||
} catch (e: IOException) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
fun getDictionaryFileHeaderOrNull(file: File): DictionaryHeader? {
|
||||
return try {
|
||||
BinaryDictionaryUtils.getHeader(file)
|
||||
} catch (e: UnsupportedFormatException) {
|
||||
null
|
||||
} catch (e: IOException) {
|
||||
null
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the locale for a dictionary file name stored in assets.
|
||||
*
|
||||
* Assumes file name main_[locale].dict
|
||||
* Returns the locale, or null if file name does not match the pattern
|
||||
*/
|
||||
fun extractLocaleFromAssetsDictionaryFile(dictionaryFileName: String): String? {
|
||||
if (dictionaryFileName.startsWith(MAIN_DICT_PREFIX) && dictionaryFileName.endsWith(".dict")) {
|
||||
return dictionaryFileName.substring(MAIN_DICT_PREFIX.length, dictionaryFileName.lastIndexOf('.'))
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
fun getAssetsDictionaryList(context: Context): Array<String>? = try {
|
||||
context.assets.list(ASSETS_DICTIONARY_FOLDER)
|
||||
} catch (e: IOException) {
|
||||
null
|
||||
}
|
||||
|
||||
@JvmStatic
|
||||
fun looksValidForDictionaryInsertion(text: CharSequence, spacingAndPunctuations: SpacingAndPunctuations): Boolean {
|
||||
if (TextUtils.isEmpty(text)) {
|
||||
return false
|
||||
}
|
||||
if (text.length > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
return false
|
||||
}
|
||||
var digitCount = 0
|
||||
loopOverCodePoints(text) { codePoint, charCount ->
|
||||
if (Character.isDigit(codePoint)) {
|
||||
// Count digits: see below
|
||||
digitCount += charCount
|
||||
return@loopOverCodePoints false
|
||||
}
|
||||
if (!spacingAndPunctuations.isWordCodePoint(codePoint)) {
|
||||
return false
|
||||
}
|
||||
false
|
||||
}
|
||||
// We reject strings entirely comprised of digits to avoid using PIN codes or credit
|
||||
// card numbers. It would come in handy for word prediction though; a good example is
|
||||
// when writing one's address where the street number is usually quite discriminative,
|
||||
// as well as the postal code.
|
||||
return digitCount < text.length
|
||||
}
|
||||
}
|
|
@ -25,7 +25,7 @@ fun getDictionaryLocales(context: Context): MutableSet<Locale> {
|
|||
val locales = HashSet<Locale>()
|
||||
|
||||
// get cached dictionaries: extracted or user-added dictionaries
|
||||
DictionaryInfoUtils.getCachedDirectoryList(context)?.forEach { directory ->
|
||||
DictionaryInfoUtils.getCachedDirectoryList(context).forEach { directory ->
|
||||
if (!directory.isDirectory) return@forEach
|
||||
if (!hasAnythingOtherThanExtractedMainDictionary(directory)) return@forEach
|
||||
val locale = DictionaryInfoUtils.getWordListIdFromFileName(directory.name).constructLocale()
|
||||
|
@ -135,4 +135,4 @@ fun cleanUnusedMainDicts(context: Context) {
|
|||
}
|
||||
|
||||
private fun hasAnythingOtherThanExtractedMainDictionary(dir: File) =
|
||||
dir.listFiles()?.any { it.name != DictionaryInfoUtils.getExtractedMainDictFilename() } != false
|
||||
dir.listFiles()?.any { it.name != DictionaryInfoUtils.MAIN_DICT_FILE_NAME } != false
|
||||
|
|
|
@ -32,6 +32,7 @@ import helium314.keyboard.settings.DropDownField
|
|||
import helium314.keyboard.settings.WithSmallTitle
|
||||
import java.io.File
|
||||
import java.util.Locale
|
||||
import androidx.compose.ui.platform.LocalConfiguration
|
||||
|
||||
@Composable
|
||||
fun NewDictionaryDialog(
|
||||
|
@ -55,7 +56,7 @@ fun NewDictionaryDialog(
|
|||
val cacheDir = DictionaryInfoUtils.getAndCreateCacheDirectoryForLocale(locale, ctx)
|
||||
val dictFile = File(cacheDir, header.mIdString.substringBefore(":") + "_" + DictionaryInfoUtils.USER_DICTIONARY_SUFFIX)
|
||||
val type = header.mIdString.substringBefore(":")
|
||||
val info = header.info(ctx.resources.configuration.locale())
|
||||
val info = header.info(LocalConfiguration.current.locale())
|
||||
ThreeButtonAlertDialog(
|
||||
onDismissRequest = { onDismissRequest(); cachedFile.delete() },
|
||||
onConfirmed = {
|
||||
|
@ -64,7 +65,7 @@ fun NewDictionaryDialog(
|
|||
cachedFile.renameTo(dictFile)
|
||||
if (type == Dictionary.TYPE_MAIN) {
|
||||
// replaced main dict, remove the one created from internal data
|
||||
val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.getExtractedMainDictFilename())
|
||||
val internalMainDictFile = File(cacheDir, DictionaryInfoUtils.MAIN_DICT_FILE_NAME)
|
||||
internalMainDictFile.delete()
|
||||
}
|
||||
val newDictBroadcast = Intent(DictionaryPackConstants.NEW_DICTIONARY_INTENT_ACTION)
|
||||
|
@ -92,7 +93,7 @@ fun NewDictionaryDialog(
|
|||
)
|
||||
}
|
||||
if (dictFile.exists()) {
|
||||
val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(ctx.resources.configuration.locale())
|
||||
val oldInfo = DictionaryInfoUtils.getDictionaryFileHeaderOrNull(dictFile, 0, dictFile.length())?.info(LocalConfiguration.current.locale())
|
||||
HorizontalDivider()
|
||||
Text(
|
||||
stringResource(R.string.replace_dictionary_message, type, oldInfo ?: "(no info)", info),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue