update tools scripts

This commit is contained in:
Helium314 2024-06-01 13:50:06 +02:00
parent f0ab9cdd21
commit c048ff6ff6
6 changed files with 149 additions and 95 deletions

1
.gitignore vendored
View file

@ -9,3 +9,4 @@ app/build
app/release
app/.cxx
fastlane/Appfile
tools/*.txt

91
tools/diacritics.py Normal file
View file

@ -0,0 +1,91 @@
#!/bin/python
import sys
import os
import re
file_ending_filter = "-words.txt"
word_lists_dir = "../../wordlists/"
def find_word_lists(language: str) -> list[str]:
# return a list of files
files = list()
if not os.path.isdir(word_lists_dir + language):
return files
for (dirpath, dirnames, filenames) in os.walk(word_lists_dir + language):
for n in filenames:
if n.endswith(file_ending_filter):
files.append(dirpath + "/" + n)
return files
def check_diacritics(language: str, diacritics: list[str], all_diacritics: set[str]):
word_lists = find_word_lists(language)
if len(word_lists) == 0:
return
for dia in diacritics:
all_diacritics.remove(dia)
foreign_dia = "".join(all_diacritics)
dia_regex = fr"[{foreign_dia}]"
print("checking", language, "with", diacritics)
foreigns = list()
dia_count = dict()
for dia in diacritics:
dia_count[dia] = 0
for word_list in word_lists:
with open(word_list) as f:
# check whether file contains any diacritics that are not in the list
for line in f:
if re.search(dia_regex, line):
foreigns.append(line.rstrip())
else:
# search for language diacritics and add a count
for dia in diacritics:
if dia in line:
try:
# assuming the format from https://www.wortschatz.uni-leipzig.de/en/download
count = int(line.split("\t")[2])
except:
count = 1
dia_count[dia] = dia_count[dia] + count
dia_results = f"language: {language}\n"
dia_results = dia_results + f"diacritics: {diacritics}\n"
dia_results = dia_results + f"language diacritics counts: {dia_count}\n"
dia_results = dia_results + "foreign diacritics:\n"
dia_results = dia_results + "\n".join(foreigns)
with open(f"diacritics_report_{language}.txt", 'w') as f:
f.write(dia_results)
def make_all_diacritics(dia_lists: list[list[str]]) -> set[str]:
all_dia = set()
for dia_list in dia_lists:
for dia in dia_list:
all_dia.add(dia)
return all_dia
def read_diacritics() -> dict[str, list[str]]:
d = dict()
language = ""
with open("diacritics.txt") as f:
for line in f:
if language == "":
language = line.strip()
else:
d[language] = list(map(str.strip, line.split(",")))
language = ""
return d
def main():
diacritics = read_diacritics()
all_diacritics = make_all_diacritics(list(diacritics.values()))
for key in diacritics:
check_diacritics(key, diacritics[key], all_diacritics.copy())
if __name__ == "__main__":
main()

View file

@ -1,5 +0,0 @@
# make-dict-list
This module takes care of generating a list of dictionaries available in the [dictionaries repository](https://codeberg.org/Helium314/aosp-dictionaries) for convenient linking when adding dictionaries in HeliBoard.
To use it, simply run `./gradlew tools:make-dict-list:makeDictList`

View file

@ -1,18 +0,0 @@
apply plugin: "java"
apply plugin: 'kotlin'
ext {
javaMainClass = "tools.dict.MakeDictList"
}
java {
sourceCompatibility = JavaVersion.VERSION_17
targetCompatibility = JavaVersion.VERSION_17
}
tasks.register('makeDictList', JavaExec) {
args project.rootProject.project('app').projectDir.path + File.separator + 'src' +
File.separator + 'main' + File.separator + 'assets'
classpath = sourceSets.main.runtimeClasspath
main = javaMainClass
}

View file

@ -1,63 +0,0 @@
package tools.dict
import java.io.File
import java.net.URL
class MakeDictList {
companion object {
@JvmStatic fun main(args: Array<String>) {
val readmeUrl = "https://codeberg.org/Helium314/aosp-dictionaries/raw/branch/main/README.md"
val readmeText = URL(readmeUrl).readText()
val fileText = doIt(readmeText)
val targetDir = args[0]
File(targetDir).mkdirs()
File("$targetDir/dictionaries_in_dict_repo.csv").writeText(fileText)
}
}
}
/**
* extract dictionary list from README.md
* output format: <localeString>,<type>,<experimental>
* <experimental> is empty if dictionary is not experimental, no other check done
* requires README.md to have dicts in correct "# Dictionaries" or "# Experimental dictionaries" sections
*/
private fun doIt(readme: String): String {
// output format: <localeString>,<type>,<experimental>
// experimental is empty if dictionary is not experimental, no other check done
var mode = MODE_NOTHING
val outLines = mutableListOf<String>()
readme.split("\n").forEach { line ->
if (line.startsWith("#")) {
mode = if (line.trim() == "# Dictionaries")
MODE_NORMAL
else if (line.trim() == "# Experimental dictionaries")
MODE_EXPERIMENTAL
else
MODE_NOTHING
return@forEach
}
if (mode == MODE_NOTHING || !line.startsWith("*")) return@forEach
val dictName = line.substringAfter("]").substringAfter("(").substringBefore(")")
.substringAfterLast("/").substringBefore(".dict")
val type = dictName.substringBefore("_")
val rawLocale = dictName.substringAfter("_")
val locale = if ("_" !in rawLocale) rawLocale
else {
val split = rawLocale.split("_").toMutableList()
if (!split[1].startsWith("#"))
split[1] = split[1].uppercase()
split.joinToString("_")
}
outLines.add("$type,$locale,${if (mode == MODE_EXPERIMENTAL) "exp" else ""}")
}
return outLines.joinToString("\n") + "\n"
}
private const val MODE_NOTHING = 0
private const val MODE_NORMAL = 1
private const val MODE_EXPERIMENTAL = 2

View file

@ -2,6 +2,7 @@
import os
import subprocess
import sys
import zipfile
from urllib.request import urlretrieve
@ -10,7 +11,9 @@ from urllib.request import urlretrieve
def check_git():
result = subprocess.run(["git", "diff", "--name-only"], capture_output=True)
if result.returncode != 0 or len(result.stdout) != 0:
raise ValueError("uncommitted changes")
cont = input("uncommitted changes found, continue? [y/N] ")
if cont != "y":
sys.exit()
# download and update translations
@ -36,22 +39,65 @@ def check_default_values_diff():
raise ValueError("default strings changed after translation import, something is wrong")
# run that task
def read_dicts_readme() -> list[str]:
dicts_readme_file = "../dictionaries/README.md"
if os.path.isfile(dicts_readme_file):
f = open(dicts_readme_file)
lines = f.readlines()
f.close()
return lines
readme_url = "https://codeberg.org/Helium314/aosp-dictionaries/raw/branch/main/README.md"
tmp_readme = "dicts_readme_tmp.md"
urlretrieve(readme_url, tmp_readme)
f = open(tmp_readme)
lines = f.readlines()
f.close()
os.remove(tmp_readme)
return lines
# generate a list of dictionaries available in the dictionaries repository at (https://codeberg.org/Helium314/aosp-dictionaries
# for convenient linking when adding dictionaries in HeliBoard.
def update_dict_list():
# gradle = "gradlew" # Linux
# gradle = "gradlew.bat" # Windows
gradle = "../../builder/realgradle.sh" # weird path for historic reasons
result = subprocess.run([gradle, ":tools:make-dict-list:makeDictList"]) # todo: replace with python code
assert result.returncode == 0
lines = read_dicts_readme()
mode = 0
dicts = []
for line in lines:
line = line.strip()
if line.startswith("#"):
if line == "# Dictionaries":
mode = 1
elif line == "# Experimental dictionaries":
mode = 2
else:
mode = 0
if mode == 0 or not line.startswith("*"):
continue
dict_name = line.split("]")[1].split("(")[1].split(")")[0].split("/")[-1].split(".dict")[0]
(dict_type, locale) = dict_name.split("_", 1)
if "_" in locale:
sp = locale.split("_")
locale = sp[0]
for s in sp[1:]:
locale = locale + "_" + s.upper()
if mode == 2:
dicts.append(f"{dict_type},{locale},exp\n")
else:
dicts.append(f"{dict_type},{locale},\n")
target_file = "app/src/main/assets/dictionaries_in_dict_repo.csv"
with open(target_file, 'w') as f:
f.writelines(dicts)
# check whether there is a changelog file for current version and print result and version code
def check_changelog():
changelog_dir = "fastlane/metadata/android/en-US/changelogs"
assert os.path.isdir(changelog_dir)
filenames = list(os.scandir(changelog_dir))
filenames = []
for file in os.scandir(changelog_dir):
filenames.append(file.name)
filenames.sort()
changelog_version = filenames[-1].name.replace(".txt", "")
changelog_version = filenames[-1].replace(".txt", "")
version = ""
with open("app/build.gradle") as f:
for line in f:
@ -66,6 +112,8 @@ def check_changelog():
def main():
if os.getcwd().endswith("tools"):
os.chdir("../")
check_git()
update_translations()
check_default_values_diff()