From 1fe897d740ebc67bd1f6493bcc889846f16abf10 Mon Sep 17 00:00:00 2001 From: devdanzin <74280297+devdanzin@users.noreply.github.com> Date: Thu, 4 Jul 2024 17:40:49 -0300 Subject: [PATCH] feat: multiline exclusion regexes (fix #996) (#1807) * Support excluding multiline regexes, O(N) algorithm by nedbat. * Fix test_parser.py. * Actually fix tests and mypy check. * Format multiline exclusion tests. * Apply suggestions from code review by @nedbat. Co-authored-by: Ned Batchelder * Improve and add new tests to test_parser.py. * Skip test_multiline_exclusion_block2 if Python version < 3.10. * Add tests for exclusion of a whole module or from a marker until the end of the file. * tweak the regexes in the tests * test: add one more test for the specific #996 use * docs: explain multi-line exclusion regexes * build: the next version will be 7.6.0 * better: no whitespace in regexes --------- Co-authored-by: Ned Batchelder --- CHANGES.rst | 14 ++++ coverage/parser.py | 20 +++-- coverage/version.py | 2 +- doc/excluding.rst | 96 ++++++++++++++++++++- tests/test_parser.py | 195 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 317 insertions(+), 10 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 070098aa5..f6fb6f5b2 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -23,8 +23,22 @@ upgrading your version of coverage.py. Unreleased ---------- +- Exclusion patterns can now be multi-line, thanks to `Daniel Diniz `_. This enables many interesting exclusion use-cases, including those + requested in issues `118 `_ (entire files), `996 + `_ (multiple lines only when appearing together), `1741 + `_ (remainder of a function), and `1803 `_ + (arbitrary sequence of marked lines). See the :ref:`multi_line_exclude` + section of the docs for more details and examples. + - Python 3.13.0b3 is supported. +.. _issue 118: /~https://github.com/nedbat/coveragepy/issues/118 +.. _issue 996: /~https://github.com/nedbat/coveragepy/issues/996 +.. _issue 1741: /~https://github.com/nedbat/coveragepy/issues/1741 +.. _issue 1803: /~https://github.com/nedbat/coveragepy/issues/1803 + +.. _pull 1807: /~https://github.com/nedbat/coveragepy/pull/1807 .. scriv-start-here diff --git a/coverage/parser.py b/coverage/parser.py index 00ccbf10a..19267a718 100644 --- a/coverage/parser.py +++ b/coverage/parser.py @@ -105,14 +105,22 @@ def lines_matching(self, regex: str) -> set[TLineNo]: """Find the lines matching a regex. Returns a set of line numbers, the lines that contain a match for - `regex`. The entire line needn't match, just a part of it. + `regex`. The entire line needn't match, just a part of it. + Handles multiline regex patterns. """ - regex_c = re.compile(regex) - matches = set() - for i, ltext in enumerate(self.text.split("\n"), start=1): - if regex_c.search(ltext): - matches.add(self._multiline.get(i, i)) + regex_c = re.compile(regex, re.MULTILINE) + matches: set[TLineNo] = set() + + last_start = 0 + last_start_line = 0 + for match in regex_c.finditer(self.text): + start, end = match.span() + start_line = last_start_line + self.text.count('\n', last_start, start) + end_line = last_start_line + self.text.count('\n', last_start, end) + matches.update(self._multiline.get(i, i) for i in range(start_line + 1, end_line + 2)) + last_start = start + last_start_line = start_line return matches def _raw_parse(self) -> None: diff --git a/coverage/version.py b/coverage/version.py index 15be60ad2..fae238809 100644 --- a/coverage/version.py +++ b/coverage/version.py @@ -8,7 +8,7 @@ # version_info: same semantics as sys.version_info. # _dev: the .devN suffix if any. -version_info = (7, 5, 5, "alpha", 0) +version_info = (7, 6, 0, "alpha", 0) _dev = 1 diff --git a/doc/excluding.rst b/doc/excluding.rst index 80f0c79dd..3ade282fc 100644 --- a/doc/excluding.rst +++ b/doc/excluding.rst @@ -73,13 +73,17 @@ line, so it isn't considered a branch at all. Advanced exclusion ------------------ -Coverage.py identifies exclusions by matching lines against a list of regular -expressions. Using :ref:`configuration files ` or the coverage +Coverage.py identifies exclusions by matching source code against a list of +regular expressions. Using :ref:`configuration files ` or the coverage :ref:`API `, you can add to that list. This is useful if you have often-used constructs to exclude that can be matched with a regex. You can exclude them all at once without littering your code with exclusion pragmas. -If the matched line introduces a block, the entire block is excluded from +Before coverage.py 7.6.0, the regexes were matched against single lines of your +source code. Now they can be multi-line regexes that find matches across +lines. See :ref:`multi_line_exclude`. + +If a matched line introduces a block, the entire block is excluded from reporting. Matching a ``def`` line or decorator line will exclude an entire function. @@ -232,6 +236,92 @@ A similar pragma, "no branch", can be used to tailor branch coverage measurement. See :ref:`branch` for details. +.. _multi_line_exclude: + +Multi-line exclusion regexes +---------------------------- + +.. versionadded:: 7.6.0 + +Exclusion regexes can match multi-line regions. All of the lines in a matched +region will be excluded. If part of the region introduces a block, the entire +block is excluded even if part of it is outside the matched region. + +When writing regexes to match multiple lines, remember that ``"."`` won't match +a newline character, but ``"\n"`` or ``"(?s:.)"`` will. Using the ``"(?s)"`` +flag in your regex will also make dot match a newline. + +Here are some examples: + +.. [[[cog + show_configs( + ini=r""" + [report] + exclude_also = + ; Exclude an except clause of a specific form: + except ValueError:\n\s*assume\(False\) + ; A pragma comment that excludes an entire file: + (?s)\A.*# pragma: exclude file.*\Z + """, + toml=r""" + [tool.coverage.report] + exclude_also = [ + # Exclude an except clause of a specific form: + "except ValueError:\\n\\s*assume\\(False\\)", + # A pragma comment that excludes an entire file: + "(?s)\\A.*# pragma: exclude file.*\\Z", + ] + """, + ) +.. ]]] + +.. tabs:: + + .. code-tab:: ini + :caption: .coveragerc + + [report] + exclude_also = + ; Exclude an except clause of a specific form: + except ValueError:\n\s*assume\(False\) + ; A pragma comment that excludes an entire file: + (?s)\A.*# pragma: exclude file.*\Z + + .. code-tab:: toml + :caption: pyproject.toml + + [tool.coverage.report] + exclude_also = [ + # Exclude an except clause of a specific form: + "except ValueError:\\n\\s*assume\\(False\\)", + # A pragma comment that excludes an entire file: + "(?s)\\A.*# pragma: exclude file.*\\Z", + ] + + .. code-tab:: ini + :caption: setup.cfg or tox.ini + + [coverage:report] + exclude_also = + ; Exclude an except clause of a specific form: + except ValueError:\n\s*assume\(False\) + ; A pragma comment that excludes an entire file: + (?s)\A.*# pragma: exclude file.*\Z + +.. [[[end]]] (checksum: 8892a4efef9da67fb0080d15811e1c19) + +The first regex matches a specific except line followed by a specific function +call. Both lines must be present for the exclusion to take effect. Note that +the regex uses ``"\n\s*"`` to match the newline and the indentation of the +second line. Without these, the regex won't match. + +The second regex matches the entire text of a file containing the comment ``# +pragma: exclude file``. This lets you exclude files from coverage measurement +with an internal comment instead of naming them in a settings file. This regex +uses the ``"(?s)"`` regex flag to let a dot match any character including a +newline. + + Excluding source files ---------------------- diff --git a/tests/test_parser.py b/tests/test_parser.py index e9ad91a05..f5891e127 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -781,6 +781,201 @@ def function() -> int: assert parser.raw_statements == {1, 3, 4, 5, 6, 8, 9} assert parser.statements == {1, 8, 9} + def test_multiline_exclusion_single_line(self) -> None: + regex = r"print\('.*'\)" + parser = self.parse_text("""\ + def foo(): + print('Hello, world!') + """, regex) + assert parser.lines_matching(regex) == {2} + assert parser.raw_statements == {1, 2} + assert parser.statements == {1} + + def test_multiline_exclusion_suite(self) -> None: + # A multi-line exclusion that matches a colon line still excludes the entire block. + regex = r"if T:\n\s+print\('Hello, world!'\)" + parser = self.parse_text("""\ + def foo(): + if T: + print('Hello, world!') + print('This is a multiline regex test.') + a = 5 + """, regex) + assert parser.lines_matching(regex) == {2, 3} + assert parser.raw_statements == {1, 2, 3, 4, 5} + assert parser.statements == {1, 5} + + def test_multiline_exclusion_no_match(self) -> None: + regex = r"nonexistent" + parser = self.parse_text("""\ + def foo(): + print('Hello, world!') + """, regex) + assert parser.lines_matching(regex) == set() + assert parser.raw_statements == {1, 2} + assert parser.statements == {1, 2} + + def test_multiline_exclusion_no_source(self) -> None: + regex = r"anything" + parser = PythonParser(text="", filename="dummy.py", exclude=regex) + assert parser.lines_matching(regex) == set() + assert parser.raw_statements == set() + assert parser.statements == set() + + def test_multiline_exclusion_all_lines_must_match(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/996 + regex = r"except ValueError:\n\s*print\('false'\)" + parser = self.parse_text("""\ + try: + a = 2 + print('false') + except ValueError: + print('false') + except ValueError: + print('something else') + except IndexError: + print('false') + """, regex) + assert parser.lines_matching(regex) == {4, 5} + assert parser.raw_statements == {1, 2, 3, 4, 5, 6, 7, 8, 9} + assert parser.statements == {1, 2, 3, 6, 7, 8, 9} + + def test_multiline_exclusion_multiple_matches(self) -> None: + regex = r"print\('.*'\)\n\s+. = \d" + parser = self.parse_text("""\ + def foo(): + print('Hello, world!') + a = 5 + def bar(): + print('Hello again!') + b = 6 + """, regex) + assert parser.lines_matching(regex) == {2, 3, 5, 6} + assert parser.raw_statements == {1, 2, 3, 4, 5, 6} + assert parser.statements == {1, 4} + + def test_multiline_exclusion_suite2(self) -> None: + regex = r"print\('Hello, world!'\)\n\s+if T:" + parser = self.parse_text("""\ + def foo(): + print('Hello, world!') + if T: + print('This is a test.') + """, regex) + assert parser.lines_matching(regex) == {2, 3} + assert parser.raw_statements == {1, 2, 3, 4} + assert parser.statements == {1} + + def test_multiline_exclusion_match_all(self) -> None: + regex = ( + r"def foo\(\):\n\s+print\('Hello, world!'\)\n" + + r"\s+if T:\n\s+print\('This is a test\.'\)" + ) + parser = self.parse_text("""\ + def foo(): + print('Hello, world!') + if T: + print('This is a test.') + """, regex) + assert parser.lines_matching(regex) == {1, 2, 3, 4} + assert parser.raw_statements == {1, 2, 3, 4} + assert parser.statements == set() + + def test_multiline_exclusion_block(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/1803 + regex = "# no cover: start(?s:.)*# no cover: stop" + parser = self.parse_text("""\ + a = my_function1() + if debug: + msg = "blah blah" + # no cover: start + log_message(msg, a) + b = my_function2() + # no cover: stop + """, regex) + assert parser.lines_matching(regex) == {4, 5, 6, 7} + assert parser.raw_statements == {1, 2, 3, 5, 6} + assert parser.statements == {1, 2, 3} + + @pytest.mark.skipif(not env.PYBEHAVIOR.match_case, reason="Match-case is new in 3.10") + def test_multiline_exclusion_block2(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/1797 + regex = r"case _:\n\s+assert_never\(" + parser = self.parse_text("""\ + match something: + case type_1(): + logic_1() + case type_2(): + logic_2() + case _: + assert_never(something) + match something: + case type_1(): + logic_1() + case type_2(): + logic_2() + case _: + print("Default case") + """, regex) + assert parser.lines_matching(regex) == {6, 7} + assert parser.raw_statements == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14} + assert parser.statements == {1, 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14} + + def test_multiline_exclusion_block3(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/1741 + # This will only work if there's exactly one return statement in the rest of the function + regex = r"# no cover: to return(?s:.)*return" + parser = self.parse_text("""\ + def my_function(args, j): + if args.command == Commands.CMD.value: + return cmd_handler(j, args) + # no cover: to return + print(f"Command '{args.command}' was not handled.", file=sys.stderr) + parser.print_help(file=sys.stderr) + + return os.EX_USAGE + print("not excluded") + """, regex) + assert parser.lines_matching(regex) == {4, 5, 6, 7, 8} + assert parser.raw_statements == {1, 2, 3, 5, 6, 8, 9} + assert parser.statements == {1, 2, 3, 9} + + def test_multiline_exclusion_whole_source(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/118 + regex = r"(?s)\A.*# pragma: exclude file.*\Z" + parser = self.parse_text("""\ + import coverage + # pragma: exclude file + def the_void() -> None: + if "py" not in __file__: + print("Not a Python file.") + print("Everything here is excluded.") + + return + print("Excluded too") + """, regex) + assert parser.lines_matching(regex) == {1, 2, 3, 4, 5, 6, 7, 8, 9, 10} + assert parser.raw_statements == {1, 3, 4, 5, 6, 8, 9} + assert parser.statements == set() + + def test_multiline_exclusion_from_marker(self) -> None: + # /~https://github.com/nedbat/coveragepy/issues/118 + regex = r"# pragma: rest of file(?s:.)*\Z" + parser = self.parse_text("""\ + import coverage + # pragma: rest of file + def the_void() -> None: + if "py" not in __file__: + print("Not a Python file.") + print("Everything here is excluded.") + + return + print("Excluded too") + """, regex) + assert parser.lines_matching(regex) == {2, 3, 4, 5, 6, 7, 8, 9, 10} + assert parser.raw_statements == {1, 3, 4, 5, 6, 8, 9} + assert parser.statements == {1} + class ParserMissingArcDescriptionTest(PythonParserTestBase): """Tests for PythonParser.missing_arc_description."""