mirror of
https://github.com/searxng/searxng.git
synced 2025-06-28 11:39:52 +00:00
[fix] utils: truncated result (#4949)
Make sure to prase everything before returning. Related: \ ``` FAIL: test_html_to_text (tests.unit.test_utils.TestUtils.test_html_to_text) ---------------------------------------------------------------------- Traceback (most recent call last): File "/home/runner/work/searxng/searxng/tests/unit/test_utils.py", line 53, in test_html_to_text self.assertEqual(utils.html_to_text(r"regexp: (?<![a-zA-Z]"), "regexp: (?<![a-zA-Z]") ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AssertionError: 'regexp: (?' != 'regexp: (?<![a-zA-Z]' - regexp: (? + regexp: (?<![a-zA-Z] ```
This commit is contained in:
parent
a76ccba9c5
commit
49fdf4edd9
1 changed files with 2 additions and 0 deletions
|
@ -161,9 +161,11 @@ def html_to_text(html_str: str) -> str:
|
|||
s = _HTMLTextExtractor()
|
||||
try:
|
||||
s.feed(html_str)
|
||||
s.close()
|
||||
except AssertionError:
|
||||
s = _HTMLTextExtractor()
|
||||
s.feed(escape(html_str, quote=True))
|
||||
s.close()
|
||||
except _HTMLTextExtractorException:
|
||||
logger.debug("HTMLTextExtractor: invalid HTML\n%s", html_str)
|
||||
return s.get_text()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue