mirror of
https://github.com/LibreTranslate/LibreTranslate.git
synced 2025-06-18 23:21:00 +00:00
use polyglot for detecting the language
This commit is contained in:
parent
9301ad0bda
commit
51980f6ff5
2 changed files with 40 additions and 23 deletions
|
@ -1,3 +1,36 @@
|
|||
from argostranslate import translate
|
||||
from polyglot.detect.base import Detector
|
||||
|
||||
languages = translate.load_installed_languages()
|
||||
|
||||
languages = translate.load_installed_languages()
|
||||
|
||||
|
||||
__lang_codes = [l.code for l in languages]
|
||||
|
||||
|
||||
def detect_languages(text):
|
||||
f = Detector(text).languages
|
||||
|
||||
# get the candidates
|
||||
candidate_langs = list(filter(lambda l: l.read_bytes != 0 and l.code in __lang_codes, f))
|
||||
|
||||
# this happens if no language can be detected
|
||||
if not candidate_langs:
|
||||
# use language "en" by default but with zero confidence
|
||||
return [
|
||||
{
|
||||
'confidence': 0.0,
|
||||
'language': "en"
|
||||
}
|
||||
]
|
||||
|
||||
# sort the candidates descending based on the detected confidence
|
||||
candidate_langs.sort(key=lambda l: l.confidence, reverse=True)
|
||||
|
||||
return [
|
||||
{
|
||||
'confidence': l.confidence,
|
||||
'language': l.code
|
||||
}
|
||||
for l in candidate_langs
|
||||
]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue