use new parser for symbols layouts

This commit is contained in:
Helium314 2023-11-27 13:50:46 +01:00
parent 4d0b7915c8
commit 57bf742da0
45 changed files with 574 additions and 136 deletions

View file

@ -29,6 +29,8 @@ def read_keys(file):
root = ET.parse(file).getroot()
morekeys = dict()
extra_keys = dict()
labels = dict()
number_row = dict()
for key in root.iter("resources"):
for string in key.iter("string"):
for tag, value in string.items():
@ -41,25 +43,53 @@ def read_keys(file):
text = resolve_text(string.text, file)
if "!text/" in text:
raise ValueError(f"can't have !text/ in {text} (from {string.text})")
if " " in text:
if " " in text and "fixedColumnOrder" not in text: # issues with arabic punctuation (more) keys
raise ValueError(f"can't have consecutive spaces in {text} (from {string.text})")
if value.startswith("keyspec_") and "_row" in value:
# put additional key labels (for nordic, spanish, swiss)
if value.startswith("keyspec_") and "_row" in value and "slavic" not in value:
# put additional key labels (for nordic, spanish, swiss, german, but not for slavic, because here the keys are not extra)
key = value.split("_row")[1]
d = extra_keys.get(key, dict())
d["label"] = text
extra_keys[key] = d
elif value.startswith("morekeys_") and "_row" in value:
# put additional key morekeys (for nordic, spanish, swiss)
elif value.startswith("morekeys_") and "_row" in value and "slavic" not in value:
# put additional key morekeys (for nordic, spanish, swiss, german, but not for slavic, because here the keys are not extra)
key = value.split("_row")[1]
d = extra_keys.get(key, dict())
d["morekeys"] = text
extra_keys[key] = d
elif value.startswith("morekeys_"):
key_label = value.split("morekeys_")[1]
if key_label == "period":
key_label = "punctuation" # used in the same place
if len(key_label) > 1 and key_label != "punctuation":
print(f"ignoring long more key: {key_label}: {text}")
continue
if key_label.startswith("cyrillic_"):
label = key_label.split("cyrillic_")[1]
if label == "u":
key_label = "у"
elif label == "ka":
key_label = "к"
elif label == "ie":
key_label = "е"
elif label == "en":
key_label = "н"
elif label == "ghe":
key_label = "г"
elif label == "o":
key_label = "о"
elif label == "soft_sign":
key_label = "ь"
elif label == "a":
key_label = "а"
elif label == "i":
key_label = "и"
else:
print(f"ignoring cyrillic long more key: {key_label}: {text}")
continue
else:
if key_label not in ["bullet", "star", "left_parenthesis", "right_parenthesis", "less_than", "greater_than", "symbols_semicolon", "symbols_percent"]:
# only print for keys that are not already handled
print(f"ignoring long more key: {key_label}: {text}")
continue
morekeys[key_label] = text
elif value == "single_quotes":
prepend_to_morekeys(morekeys, text, '\'')
@ -69,19 +99,35 @@ def read_keys(file):
prepend_to_morekeys(morekeys, text, '\"')
elif value == "double_angle_quotes":
append_to_morekeys(morekeys, text, '\"')
# todo: labels should be in [labels] and use sth like symbols: ?123
elif value == "keylabel_to_alpha":
labels["alphabet"] = text
elif value == "keylabel_to_symbol":
labels["symbol"] = text
elif value == "keyspec_comma":
labels["comma"] = text
elif value == "keyspec_period":
labels["period"] = text
elif value.startswith("keyspec_symbols_") and len(value.split("keyspec_symbols_")[1]) == 1: # checking whether it's an int would be better, but bah
number_row[value.split("keyspec_symbols_")[1]] = text
elif "values-ur" in file and value.startswith("additional_morekeys_symbols_"):
number_row[value.split("additional_morekeys_symbols_")[1]] = text
# for some reason ur has the arabic numbers in moreKeys
elif value in ["keyspec_currency", "symbols_semicolon", "symbols_percent"] or value.startswith("additional_morekeys_symbols_") or "_left_" in value or "_right_" in value or "_greater" in value or "_less_" in value:
pass # ignore keys handled somewhere else (currency key not yet fully replaced)
else:
print(f"ignored tag: {tag}={value}, {text}")
keys = dict()
keys["morekeys"] = morekeys
keys["extra_keys"] = extra_keys
keys["labels"] = labels
keys["number_row"] = number_row
return keys
def resolve_text(text, file):
if text.startswith("\"") and text.endswith("\"") and len(text) > 1:
text = text[1:-1]
sp = re.split("(?<!\\\\),", text)
sp = re.split("(?<!\\\\),", text) # split on comma, but not on escaped comma "\,"
if len(sp) > 1: # resolve each entry separately
result = []
@ -121,13 +167,16 @@ def read_locale_from_folder(folder):
def write_keys(outfile, keys, locc=""):
with open(outfile, "w") as f:
# write section [more_keys], then [extra_keys], skip if empty
has_extra_keys = len(keys["extra_keys"]) > 0
has_labels = len(keys["labels"]) > 0
has_number_row = len(keys["number_row"]) > 0
if len(keys["morekeys"]) > 0:
f.write("[morekeys]\n")
for k, v in keys["morekeys"].items():
f.write(f"{k} {v}\n")
if len(keys["extra_keys"]) > 0:
if has_labels or has_number_row or has_extra_keys:
f.write("\n")
if len(keys["extra_keys"]) > 0 and locc != "eo": # eo has the extra key moved into the layout
if has_extra_keys and locc != "eo": # eo has the extra key moved into the layout
f.write("[extra_keys]\n")
# clarify somewhere that extra keys only apply to default layout (where to get?)
for k, v in sorted(keys["extra_keys"].items()):
@ -138,6 +187,24 @@ def write_keys(outfile, keys, locc=""):
f.write(f"{row}: {label}\n")
else:
f.write(f"{row}: {label} {morekeys}\n")
if has_labels or has_number_row:
f.write("\n")
if has_labels:
f.write("[labels]\n")
for k, v in keys["labels"].items():
f.write(f"{k}: {v}\n")
if has_number_row:
f.write("\n")
if has_number_row:
if len(keys["number_row"]) != 10:
raise ValueError("number row must have 10 keys")
f.write("[number_row]\n")
zero = keys["number_row"]["0"]
for k, v in sorted(keys["number_row"].items()):
if k == "0":
continue
f.write(f"{v} ")
f.write(f"{zero}\n")
def get_morekeys_texts(write=False):
@ -156,7 +223,7 @@ def get_morekeys_texts(write=False):
script = "Latn"
if script is None:
raise ValueError("undefined script")
if script != "Latn":
if script == "Latn":
continue # skip non-latin scripts for now
print(file)
keys = read_keys(f"{file}/donottranslate-more-keys.xml")
@ -164,12 +231,14 @@ def get_morekeys_texts(write=False):
if not write:
continue
outfile_name = locc.replace("-r", "_").lower() + ".txt"
outfile = pathlib.Path(out_folder + outfile_name)
# outfile = pathlib.Path(out_folder + outfile_name)
outfile = out_folder / outfile_name
outfile.parent.mkdir(exist_ok=True, parents=True)
write_keys(outfile, keys, locc)
return val
# write lists of all moreKeys from different languages
def write_combined_lists(keys):
infos_by_letters = dict()
for key in keys:
@ -198,10 +267,8 @@ def write_combined_lists(keys):
def main():
# k = read_keys(default_file)
# write_keys(pathlib.Path(__file__).parent / f"defaultkeys.txt", k)
keys = get_morekeys_texts()
write_combined_lists(keys)
keys = get_morekeys_texts(True)
# write_combined_lists(keys)
# need to check strings: