use new parser for symbols layouts

2025-06-20 08:00:54 +00:00 · 2023-11-27 13:50:46 +01:00 · 2023-11-27 13:50:46 +01:00 · 57bf742da0
commit 57bf742da0
parent 4d0b7915c8
45 changed files with 574 additions and 136 deletions
--- a/tools/morekeys_reader.py
+++ b/tools/morekeys_reader.py
@ -29,6 +29,8 @@ def read_keys(file):
    root = ET.parse(file).getroot()
    morekeys = dict()
    extra_keys = dict()
+    labels = dict()
+    number_row = dict()
    for key in root.iter("resources"):
        for string in key.iter("string"):
            for tag, value in string.items():
@ -41,25 +43,53 @@ def read_keys(file):
                    text = resolve_text(string.text, file)
                if "!text/" in text:
                    raise ValueError(f"can't have !text/ in {text} (from {string.text})")
-                if "  " in text:
+                if "  " in text and "fixedColumnOrder" not in text:  # issues with arabic punctuation (more) keys
                    raise ValueError(f"can't have consecutive spaces in {text} (from {string.text})")
-                if value.startswith("keyspec_") and "_row" in value:
-                    # put additional key labels (for nordic, spanish, swiss)
+                if value.startswith("keyspec_") and "_row" in value and "slavic" not in value:
+                    # put additional key labels (for nordic, spanish, swiss, german, but not for slavic, because here the keys are not extra)
                    key = value.split("_row")[1]
                    d = extra_keys.get(key, dict())
                    d["label"] = text
                    extra_keys[key] = d
-                elif value.startswith("morekeys_") and "_row" in value:
-                    # put additional key morekeys (for nordic, spanish, swiss)
+                elif value.startswith("morekeys_") and "_row" in value and "slavic" not in value:
+                    # put additional key morekeys (for nordic, spanish, swiss, german, but not for slavic, because here the keys are not extra)
                    key = value.split("_row")[1]
                    d = extra_keys.get(key, dict())
                    d["morekeys"] = text
                    extra_keys[key] = d
                elif value.startswith("morekeys_"):
                    key_label = value.split("morekeys_")[1]
+                    if key_label == "period":
+                        key_label = "punctuation"  # used in the same place
                    if len(key_label) > 1 and key_label != "punctuation":
-                        print(f"ignoring long more key: {key_label}: {text}")
-                        continue
+                        if key_label.startswith("cyrillic_"):
+                            label = key_label.split("cyrillic_")[1]
+                            if label == "u":
+                                key_label = "у"
+                            elif label == "ka":
+                                key_label = "к"
+                            elif label == "ie":
+                                key_label = "е"
+                            elif label == "en":
+                                key_label = "н"
+                            elif label == "ghe":
+                                key_label = "г"
+                            elif label == "o":
+                                key_label = "о"
+                            elif label == "soft_sign":
+                                key_label = "ь"
+                            elif label == "a":
+                                key_label = "а"
+                            elif label == "i":
+                                key_label = "и"
+                            else:
+                                print(f"ignoring cyrillic long more key: {key_label}: {text}")
+                                continue
+                        else:
+                            if key_label not in ["bullet", "star", "left_parenthesis", "right_parenthesis", "less_than", "greater_than", "symbols_semicolon", "symbols_percent"]:
+                                # only print for keys that are not already handled
+                                print(f"ignoring long more key: {key_label}: {text}")
+                            continue
                    morekeys[key_label] = text
                elif value == "single_quotes":
                    prepend_to_morekeys(morekeys, text, '\'')
@ -69,19 +99,35 @@ def read_keys(file):
                    prepend_to_morekeys(morekeys, text, '\"')
                elif value == "double_angle_quotes":
                    append_to_morekeys(morekeys, text, '\"')
-                # todo: labels should be in [labels] and use sth like symbols: ?123
+                elif value == "keylabel_to_alpha":
+                    labels["alphabet"] = text
+                elif value == "keylabel_to_symbol":
+                    labels["symbol"] = text
+                elif value == "keyspec_comma":
+                    labels["comma"] = text
+                elif value == "keyspec_period":
+                    labels["period"] = text
+                elif value.startswith("keyspec_symbols_") and len(value.split("keyspec_symbols_")[1]) == 1:  # checking whether it's an int would be better, but bah
+                    number_row[value.split("keyspec_symbols_")[1]] = text
+                elif "values-ur" in file and value.startswith("additional_morekeys_symbols_"):
+                    number_row[value.split("additional_morekeys_symbols_")[1]] = text
+                    # for some reason ur has the arabic numbers in moreKeys
+                elif value in ["keyspec_currency", "symbols_semicolon", "symbols_percent"] or value.startswith("additional_morekeys_symbols_") or "_left_" in value or "_right_" in value or "_greater" in value or "_less_" in value:
+                    pass  # ignore keys handled somewhere else (currency key not yet fully replaced)
                else:
                    print(f"ignored tag: {tag}={value}, {text}")
    keys = dict()
    keys["morekeys"] = morekeys
    keys["extra_keys"] = extra_keys
+    keys["labels"] = labels
+    keys["number_row"] = number_row
    return keys


 def resolve_text(text, file):
    if text.startswith("\"") and text.endswith("\"") and len(text) > 1:
        text = text[1:-1]
-    sp = re.split("(?<!\\\\),", text)
+    sp = re.split("(?<!\\\\),", text)  # split on comma, but not on escaped comma "\,"

    if len(sp) > 1:  # resolve each entry separately
        result = []
@ -121,13 +167,16 @@ def read_locale_from_folder(folder):
 def write_keys(outfile, keys, locc=""):
    with open(outfile, "w") as f:
        # write section [more_keys], then [extra_keys], skip if empty
+        has_extra_keys = len(keys["extra_keys"]) > 0
+        has_labels = len(keys["labels"]) > 0
+        has_number_row = len(keys["number_row"]) > 0
        if len(keys["morekeys"]) > 0:
            f.write("[morekeys]\n")
            for k, v in keys["morekeys"].items():
                f.write(f"{k} {v}\n")
-            if len(keys["extra_keys"]) > 0:
+            if has_labels or has_number_row or has_extra_keys:
                f.write("\n")
-        if len(keys["extra_keys"]) > 0 and locc != "eo":  # eo has the extra key moved into the layout
+        if has_extra_keys and locc != "eo":  # eo has the extra key moved into the layout
            f.write("[extra_keys]\n")
            # clarify somewhere that extra keys only apply to default layout (where to get?)
            for k, v in sorted(keys["extra_keys"].items()):
@ -138,6 +187,24 @@ def write_keys(outfile, keys, locc=""):
                    f.write(f"{row}: {label}\n")
                else:
                    f.write(f"{row}: {label} {morekeys}\n")
+            if has_labels or has_number_row:
+                f.write("\n")
+        if has_labels:
+            f.write("[labels]\n")
+            for k, v in keys["labels"].items():
+                f.write(f"{k}: {v}\n")
+            if has_number_row:
+                f.write("\n")
+        if has_number_row:
+            if len(keys["number_row"]) != 10:
+                raise ValueError("number row must have 10 keys")
+            f.write("[number_row]\n")
+            zero = keys["number_row"]["0"]
+            for k, v in sorted(keys["number_row"].items()):
+                if k == "0":
+                    continue
+                f.write(f"{v} ")
+            f.write(f"{zero}\n")


 def get_morekeys_texts(write=False):
@ -156,7 +223,7 @@ def get_morekeys_texts(write=False):
            script = "Latn"
        if script is None:
            raise ValueError("undefined script")
-        if script != "Latn":
+        if script == "Latn":
            continue  # skip non-latin scripts for now
        print(file)
        keys = read_keys(f"{file}/donottranslate-more-keys.xml")
@ -164,12 +231,14 @@ def get_morekeys_texts(write=False):
        if not write:
            continue
        outfile_name = locc.replace("-r", "_").lower() + ".txt"
-        outfile = pathlib.Path(out_folder + outfile_name)
+#        outfile = pathlib.Path(out_folder + outfile_name)
+        outfile = out_folder / outfile_name
        outfile.parent.mkdir(exist_ok=True, parents=True)
        write_keys(outfile, keys, locc)
    return val


+# write lists of all moreKeys from different languages
 def write_combined_lists(keys):
    infos_by_letters = dict()
    for key in keys:
@ -198,10 +267,8 @@ def write_combined_lists(keys):
 def main():
 #    k = read_keys(default_file)
 #    write_keys(pathlib.Path(__file__).parent / f"defaultkeys.txt", k)
-    keys = get_morekeys_texts()
-    write_combined_lists(keys)
-
-
+    keys = get_morekeys_texts(True)
+#    write_combined_lists(keys)


 # need to check strings: