mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2025-04-20 05:49:16 +00:00
Merge pull request #764 from pierotofy/emoji
Detect emoji translation requests
This commit is contained in:
commit
cfb1a92170
1 changed files with 58 additions and 20 deletions
|
@ -37,6 +37,19 @@ from libretranslate.locales import (
|
||||||
from .api_keys import Database, RemoteDatabase
|
from .api_keys import Database, RemoteDatabase
|
||||||
from .suggestions import Database as SuggestionsDatabase
|
from .suggestions import Database as SuggestionsDatabase
|
||||||
|
|
||||||
|
# Rough map of emoji characters
|
||||||
|
emojis = {e: True for e in \
|
||||||
|
[ord(' ')] + # Spaces
|
||||||
|
list(range(0x1F600,0x1F64F)) + # Emoticons
|
||||||
|
list(range(0x1F300,0x1F5FF)) + # Misc Symbols and Pictographs
|
||||||
|
list(range(0x1F680,0x1F6FF)) + # Transport and Map
|
||||||
|
list(range(0x2600,0x26FF)) + # Misc symbols
|
||||||
|
list(range(0x2700,0x27BF)) + # Dingbats
|
||||||
|
list(range(0xFE00,0xFE0F)) + # Variation Selectors
|
||||||
|
list(range(0x1F900,0x1F9FF)) + # Supplemental Symbols and Pictographs
|
||||||
|
list(range(0x1F1E6,0x1F1FF)) + # Flags
|
||||||
|
list(range(0x20D0,0x20FF)) # Combining Diacritical Marks for Symbols
|
||||||
|
}
|
||||||
|
|
||||||
def get_version():
|
def get_version():
|
||||||
try:
|
try:
|
||||||
|
@ -153,6 +166,19 @@ def filter_unique(seq, extra):
|
||||||
seen_add = seen.add
|
seen_add = seen.add
|
||||||
return [x for x in seq if not (x in seen or seen_add(x))]
|
return [x for x in seq if not (x in seen or seen_add(x))]
|
||||||
|
|
||||||
|
|
||||||
|
def detect_translatable(src_texts):
|
||||||
|
if isinstance(src_texts, list):
|
||||||
|
return any(detect_translatable(t) for t in src_texts)
|
||||||
|
|
||||||
|
for ch in src_texts:
|
||||||
|
if not (ord(ch) in emojis):
|
||||||
|
return True
|
||||||
|
|
||||||
|
# All emojis
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def create_app(args):
|
def create_app(args):
|
||||||
from libretranslate.init import boot
|
from libretranslate.init import boot
|
||||||
|
|
||||||
|
@ -648,11 +674,15 @@ def create_app(args):
|
||||||
if batch:
|
if batch:
|
||||||
request.req_cost = max(1, len(q))
|
request.req_cost = max(1, len(q))
|
||||||
|
|
||||||
|
translatable = detect_translatable(src_texts)
|
||||||
|
if translatable:
|
||||||
if source_lang == "auto":
|
if source_lang == "auto":
|
||||||
candidate_langs = detect_languages(src_texts)
|
candidate_langs = detect_languages(src_texts)
|
||||||
detected_src_lang = candidate_langs[0]
|
detected_src_lang = candidate_langs[0]
|
||||||
else:
|
else:
|
||||||
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
detected_src_lang = {"confidence": 100.0, "language": source_lang}
|
||||||
|
else:
|
||||||
|
detected_src_lang = {"confidence": 0.0, "language": "en"}
|
||||||
|
|
||||||
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
|
src_lang = next(iter([l for l in languages if l.code == detected_src_lang["language"]]), None)
|
||||||
|
|
||||||
|
@ -679,6 +709,7 @@ def create_app(args):
|
||||||
if translator is None:
|
if translator is None:
|
||||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||||
|
|
||||||
|
if translatable:
|
||||||
if text_format == "html":
|
if text_format == "html":
|
||||||
translated_text = unescape(str(translate_html(translator, text)))
|
translated_text = unescape(str(translate_html(translator, text)))
|
||||||
alternatives = [] # Not supported for html yet
|
alternatives = [] # Not supported for html yet
|
||||||
|
@ -686,6 +717,9 @@ def create_app(args):
|
||||||
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
hypotheses = translator.hypotheses(text, num_alternatives + 1)
|
||||||
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
translated_text = unescape(improve_translation_formatting(text, hypotheses[0].value))
|
||||||
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
alternatives = filter_unique([unescape(improve_translation_formatting(text, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||||
|
else:
|
||||||
|
translated_text = text # Cannot translate, send the original text back
|
||||||
|
alternatives = []
|
||||||
|
|
||||||
batch_results.append(translated_text)
|
batch_results.append(translated_text)
|
||||||
batch_alternatives.append(alternatives)
|
batch_alternatives.append(alternatives)
|
||||||
|
@ -703,6 +737,7 @@ def create_app(args):
|
||||||
if translator is None:
|
if translator is None:
|
||||||
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
abort(400, description=_("%(tname)s (%(tcode)s) is not available as a target language from %(sname)s (%(scode)s)", tname=_lazy(tgt_lang.name), tcode=tgt_lang.code, sname=_lazy(src_lang.name), scode=src_lang.code))
|
||||||
|
|
||||||
|
if translatable:
|
||||||
if text_format == "html":
|
if text_format == "html":
|
||||||
translated_text = unescape(str(translate_html(translator, q)))
|
translated_text = unescape(str(translate_html(translator, q)))
|
||||||
alternatives = [] # Not supported for html yet
|
alternatives = [] # Not supported for html yet
|
||||||
|
@ -710,6 +745,9 @@ def create_app(args):
|
||||||
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
hypotheses = translator.hypotheses(q, num_alternatives + 1)
|
||||||
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
translated_text = unescape(improve_translation_formatting(q, hypotheses[0].value))
|
||||||
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
alternatives = filter_unique([unescape(improve_translation_formatting(q, hypotheses[i].value)) for i in range(1, len(hypotheses))], translated_text)
|
||||||
|
else:
|
||||||
|
translated_text = q # Cannot translate, send the original text back
|
||||||
|
alternatives = []
|
||||||
|
|
||||||
result = {"translatedText": translated_text}
|
result = {"translatedText": translated_text}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue