From b15d9d6c848bfef272d6091ce02e5c528ec381a0 Mon Sep 17 00:00:00 2001 From: David Lord Date: Mon, 14 Mar 2022 08:49:47 -0700 Subject: [PATCH] avoid ambiguous regex in striptags --- CHANGES.rst | 8 ++++++++ src/markupsafe/__init__.py | 12 ++++++++---- tests/test_markupsafe.py | 10 +++++++++- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 25f7737f..649aebe5 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,3 +1,11 @@ +Version 2.1.1 +------------- + +Unreleased + +- Avoid ambiguous regex matches in ``striptags``. :pr:`293` + + Version 2.1.0 ------------- diff --git a/src/markupsafe/__init__.py b/src/markupsafe/__init__.py index 2acb04e4..e066ebd1 100644 --- a/src/markupsafe/__init__.py +++ b/src/markupsafe/__init__.py @@ -11,9 +11,10 @@ def __html__(self) -> str: pass -__version__ = "2.1.0" +__version__ = "2.1.1.dev0" -_striptags_re = re.compile(r"(|<[^>]*>)") +_strip_comments_re = re.compile(r"") +_strip_tags_re = re.compile(r"<.*?>") def _simple_escaping_wrapper(name: str) -> t.Callable[..., "Markup"]: @@ -158,8 +159,11 @@ def striptags(self) -> str: >>> Markup("Main »\tAbout").striptags() 'Main ยป About' """ - stripped = " ".join(_striptags_re.sub("", self).split()) - return Markup(stripped).unescape() + # Use two regexes to avoid ambiguous matches. + value = _strip_comments_re.sub("", self) + value = _strip_tags_re.sub("", value) + value = " ".join(value.split()) + return Markup(value).unescape() @classmethod def escape(cls, s: t.Any) -> "Markup": diff --git a/tests/test_markupsafe.py b/tests/test_markupsafe.py index 2f138854..236f35e7 100644 --- a/tests/test_markupsafe.py +++ b/tests/test_markupsafe.py @@ -69,7 +69,15 @@ def test_dict_interpol(): def test_escaping(escape): assert escape("\"<>&'") == ""<>&'" - assert Markup("Foo & Bar").striptags() == "Foo & Bar" + assert ( + Markup( + "" + "Foo & Bar" + "" + "" + ).striptags() + == "Foo & Bar" + ) def test_unescape():