sort the all/more more keys by frequency

This commit is contained in:
Helium314 2023-11-27 17:43:40 +01:00
parent 79b84a0b58
commit 1dfce9f89a
3 changed files with 34 additions and 29 deletions

View file

@ -1,24 +1,26 @@
[morekeys] [morekeys]
a á â ä à æ ã å ā ą ª ă ả ạ ằ ắ ẳ ẵ ặ ầ ấ ẩ ẫ ậ a á â ä à ã æ å ā ą ª ă ả ạ ằ ắ ẳ ẵ ặ ầ ấ ẩ ẫ ậ
e é è ê ë ę ė ē ə ě ẻ ẽ ẹ ề ế ể ễ ệ ĕ e é è ê ë ē ė ę ě ə ẻ ẽ ẹ ề ế ể ễ ệ ĕ
i í ì ï î į ī ij ı ĩ ỉ ị ĭ i í ì ï î ī į ı ij ĩ ỉ ị ĭ
o ó ô ö ò õ œ ø ō º ő ỏ ọ ồ ố ổ ỗ ộ ơ ờ ớ ở ỡ ợ ŏ o ó ô ö ò õ œ ø ō º ő ỏ ọ ồ ố ổ ỗ ộ ơ ờ ớ ở ỡ ợ ŏ
u ú û ü ù ū ů ũ ű ų µ ủ ụ ư ừ ứ ử ữ ự ŭ u ú ü ù ū û ů ű ų ũ µ ủ ụ ư ừ ứ ử ữ ự ŭ
n ñ ń ň ņ ʼn ŋ n ñ ń ň ņ ʼn ŋ
y ý ij ÿ y ŷ þ ỳ ỷ ỹ ỵ y ý ÿ ij ŷ y þ ỳ ỷ ỹ ỵ
s ş ß ś š ẞ ș ŝ ſ s ß š ś ş ș ŝ ſ
g ğ ġ ģ g\' ĝ g ğ ģ ġ g\' ĝ
c ç ć č ċ ĉ c ç ć č ċ ĉ
z ž ź ż z ž ź ż
l l·l ł ĺ ļ ľ ŀ l ł ĺ ļ ľ ŀ l·l
punctuation !autoColumnOrder!9 \, ? ! · # ) ( / ; ' @ : - " + \% & ¡ ¿ punctuation !autoColumnOrder!10 \, ? ! # ) ( / ; ' @ : - " + \% & · ¡ ¿
d ď ð đ d ď đ ð
r ř ŕ ŗ r ř ŕ ŗ
t ť ț ţ ŧ þ t ť ţ þ ț ŧ
'
" ” „ “ » «
k ķ ĸ k ķ ĸ
v w ŵ v w ŵ
h ĥ ħ h ĥ ħ
w w ŵ w ŵ w
q q q q
x x x x
j ĵ j ĵ

View file

@ -1,20 +1,22 @@
[morekeys] [morekeys]
a á â ä à æ ã å ā ą ª ă a á â ä à ã æ å ā ą ª ă
e é è ê ë ę ė ē ə ě e é è ê ë ē ė ę ě ə
i í ì ï î į ī ij ı ĩ i í ì ï î ī į ı ij ĩ
o ó ô ö ò õ œ ø ō º ő o ó ô ö ò õ œ ø ō º ő
u ú û ü ù ū ů ũ ű ų u ú ü ù ū û ů ű ų ũ
n ñ ń ň ņ ʼn ŋ n ñ ń ň ņ ʼn ŋ
y ý ij ÿ ŷ y ý ÿ ij ŷ
s ş ß ś š ș s ß š ś ş ș
g ğ ġ ģ g ğ ģ ġ
c ç ć č ċ c ç ć č ċ
z ž ź ż z ž ź ż
l ł ĺ ļ ľ ŀ l ł ĺ ļ ľ ŀ
punctuation !autoColumnOrder!9 \, ? ! # ) ( / ; ' @ : - " + \% & punctuation !autoColumnOrder!9 \, ? ! # ) ( / ; ' @ : - " + \% & ¡ ¿
d ď ð đ d ď đ ð
r ř ŕ ŗ r ř ŕ ŗ
t ť ț ţ ŧ þ t ť ţ þ ț ŧ
'
" ” „ “ » «
k ķ ĸ k ķ ĸ
h ĥ h ĥ
w ŵ w ŵ

View file

@ -223,7 +223,7 @@ def get_morekeys_texts(write=False):
script = "Latn" script = "Latn"
if script is None: if script is None:
raise ValueError("undefined script") raise ValueError("undefined script")
if script == "Latn": if script != "Latn":
continue # skip non-latin scripts for now continue # skip non-latin scripts for now
print(file) print(file)
keys = read_keys(f"{file}/donottranslate-more-keys.xml") keys = read_keys(f"{file}/donottranslate-more-keys.xml")
@ -249,15 +249,16 @@ def write_combined_lists(keys):
continue continue
infos[l] = infos.get(l, 0) + 1 infos[l] = infos.get(l, 0) + 1
infos_by_letters[k] = infos infos_by_letters[k] = infos
with open(out_folder + "all_more_keys.txt", 'w') as f: with open(out_folder / "all_more_keys.txt", 'w') as f:
f.write("[morekeys]\n") f.write("[morekeys]\n")
for letter, info in infos_by_letters.items(): for letter, info in infos_by_letters.items():
f.write(letter + " " + " ".join(info.keys()) + "\n") sorted_info = dict(sorted(info.items(), key=lambda item: item[1], reverse=True))
with open(out_folder + "more_more_keys.txt", 'w') as f: f.write(letter + " " + " ".join(sorted_info.keys()) + "\n")
with open(out_folder / "more_more_keys.txt", 'w') as f:
f.write("[morekeys]\n") f.write("[morekeys]\n")
for letter, info in infos_by_letters.items(): for letter, info in infos_by_letters.items():
morekeys = [] morekeys = []
for morekey, count in info.items(): for morekey, count in sorted(info.items(), key=lambda item: item[1], reverse=True):
if count > 1: if count > 1:
morekeys.append(morekey) morekeys.append(morekey)
if len(morekeys) > 0: if len(morekeys) > 0:
@ -267,8 +268,8 @@ def write_combined_lists(keys):
def main(): def main():
# k = read_keys(default_file) # k = read_keys(default_file)
# write_keys(pathlib.Path(__file__).parent / f"defaultkeys.txt", k) # write_keys(pathlib.Path(__file__).parent / f"defaultkeys.txt", k)
keys = get_morekeys_texts(True) keys = get_morekeys_texts(False)
# write_combined_lists(keys) write_combined_lists(keys)
# need to check strings: # need to check strings: