Merge pull request #765 from pierotofy/langexp
Some checks failed
Build and Publish Docker Image / main (push) Has been cancelled
Run tests / tests_python (3.10) (push) Has been cancelled
Run tests / tests_python (3.8) (push) Has been cancelled
Run tests / tests_python (3.9) (push) Has been cancelled
Run tests / test_docker_build (push) Has been cancelled

Catch cant detect errors in lang detection
This commit is contained in:
Piero Toffanin 2025-03-20 11:58:34 -04:00 committed by GitHub
commit 4d433b6077
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -3,7 +3,8 @@ from langdetect import DetectorFactory
DetectorFactory.seed = 0 DetectorFactory.seed = 0
from langdetect import detect_langs from langdetect import detect_langs, LangDetectException
from langdetect.lang_detect_exception import ErrorCode
from lexilang.detector import detect as lldetect from lexilang.detector import detect as lldetect
@ -35,11 +36,17 @@ class Detector:
if conf > 0: if conf > 0:
return [Language(code, round(conf * 100))] return [Language(code, round(conf * 100))]
top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3] try:
if not len(top_3_choices): top_3_choices = [lang for lang in detect_langs(text) if check_lang(self.langcodes, lang)][:3]
return [Language("en", 0)] if not len(top_3_choices):
if top_3_choices[0].prob == 0: return [Language("en", 0)]
return [Language("en", 0)] if top_3_choices[0].prob == 0:
return [Language("en", 0)]
except LangDetectException as e:
if e.code == ErrorCode.CantDetectError:
return [Language("en", 0)]
else:
raise e
return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices] return [Language(normalized_lang_code(lang), round(lang.prob * 100)) for lang in top_3_choices]