Skip to content

Commit

Permalink
Fix pattern matching files for retention too restrictive (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
Delgan committed Apr 5, 2020
1 parent 590aa8a commit efd090a
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 38 deletions.
8 changes: 7 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
`Unreleased`
============

- Make the filter listing files for ``retention`` less strict, it now matches files based on the pattern ``"basename(.*).ext(.*)"`` (`#229 </~https://github.com/Delgan/loguru/issues/229>`_).


`0.4.1`_ (2020-01-19)
=====================

- Deprecate the ``ansi`` parameter of ``.opt()`` in favor of ``colors`` which is a name more appropriate.
- Prevent unrelated files and directories to be incorrectly collected thus causing errors during the `retention` process (`#195 </~https://github.com/Delgan/loguru/issues/195>`_, thanks `@gazpachoking </~https://github.com/gazpachoking>`_).
- Prevent unrelated files and directories to be incorrectly collected thus causing errors during the ``retention`` process (`#195 </~https://github.com/Delgan/loguru/issues/195>`_, thanks `@gazpachoking </~https://github.com/gazpachoking>`_).
- Strip color markups contained in ``record["message"]`` when logging with ``.opt(ansi=True)`` instead of leaving them as is (`#198 </~https://github.com/Delgan/loguru/issues/198>`_).
- Ignore color markups contained in ``*args`` and ``**kwargs`` when logging with ``.opt(ansi=True)``, leave them as is instead of trying to use them to colorize the message which could cause undesirable errors (`#197 </~https://github.com/Delgan/loguru/issues/197>`_).

Expand Down
43 changes: 15 additions & 28 deletions loguru/_file_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import locale
import numbers
import os
import re
import shutil
import string
from functools import partial
Expand Down Expand Up @@ -156,7 +155,7 @@ def __init__(
self._kwargs = {**kwargs, "mode": mode, "buffering": buffering, "encoding": self.encoding}
self._path = str(path)

self._glob_pattern, self._regex_pattern = self._make_file_patterns(self._path)
self._glob_patterns = self._make_glob_patterns(self._path)
self._rotation_function = self._make_rotation_function(rotation)
self._retention_function = self._make_retention_function(retention)
self._compression_function = self._make_compression_function(compression)
Expand Down Expand Up @@ -193,30 +192,17 @@ def _initialize_file(self, *, rename_existing):
self._file = open(new_path, **self._kwargs)

@staticmethod
def _make_file_patterns(path):
def escape(string_, escape_function, rep):
tokens = string.Formatter().parse(string_)
parts = (escape_function(text) + rep * (name is not None) for text, name, *_ in tokens)
return "".join(parts)
def _make_glob_patterns(path):
formatter = string.Formatter()
tokens = formatter.parse(path)
escaped = "".join(glob.escape(text) + "*" * (name is not None) for text, name, *_ in tokens)

# Cleanup tokens to prevent false-positive in "splitext"
pattern = escape(path, lambda x: x, "{}")
root, ext = os.path.splitext(escaped)

root, ext = os.path.splitext(pattern)
basename = os.path.basename(root)
if not ext:
return [escaped, escaped + ".*"]

# Used to quickly list files matching the log filename base (ignoring renaming / extension)
glob_pattern = escape(root, glob.escape, "*") + "*"

# Used to double-check the listed files, ensuring they entirely match a log filename pattern
regex_pattern = re.compile(
escape(basename, re.escape, r"[\s\S]*")
+ r"(\.[0-9]+-[0-9]+-[0-9]+_[0-9]+-[0-9]+-[0-9]+_[0-9]+(\.[0-9]+)?)?"
+ escape(ext, re.escape, r"[\s\S]*")
+ r"(\.[\s\S]*)?$"
)

return glob_pattern, regex_pattern
return [escaped, escaped + ".*", root + ".*" + ext, root + ".*" + ext + ".*"]

@staticmethod
def _make_rotation_function(rotation):
Expand Down Expand Up @@ -357,12 +343,13 @@ def _terminate(self, *, teardown):
self._compression_function(self._file_path)

if self._retention_function is not None:
logs = [
logs = {
file
for file in glob.glob(self._glob_pattern)
if self._regex_pattern.match(os.path.basename(file)) and os.path.isfile(file)
]
self._retention_function(logs)
for pattern in self._glob_patterns
for file in glob.glob(pattern)
if os.path.isfile(file)
}
self._retention_function(list(logs))

self._file = None
self._file_path = None
Expand Down
6 changes: 3 additions & 3 deletions loguru/_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -583,8 +583,8 @@ def add(
happen now, ``False`` otherwise.
The ``retention`` occurs at rotation or at sink stop if rotation is ``None``. Files are
selected according to their basename, if it is the same that the sink file, with possible
time field being replaced with ``.*``. This parameter accepts:
selected if they match the pattern ``"basename(.*).ext(.*)"`` (possible time fields are
beforehand replaced with ``.*``) based on the sink file. This parameter accepts:
- an |int| which indicates the number of log files to keep, while older files are removed.
- a |timedelta| which specifies the maximum age of files to keep.
Expand Down Expand Up @@ -620,7 +620,7 @@ def add(
possible misuse. If you wish to display a markup tag literally, you can escape it by
prepending a ``\`` like for example ``\<blue>``. If, for some reason, you need to escape a
string programmatically, note that the regex used internally to parse markup tags is
``r"\\?</?((?:[fb]g\s)?[^<>\s]*)>"``.'
``r"\\?</?((?:[fb]g\s)?[^<>\s]*)>"``.
Note that when logging a message with ``opt(colors=True)``, color tags present in the
formatting arguments (``args`` and ``kwargs``) are completely ignored. This is important if
Expand Down
48 changes: 42 additions & 6 deletions tests/test_filesink_retention.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ def test_managed_files(tmpdir):
"test.2019-11-12_03-22-07_018985.log.tar.gz",
"test.2019-11-12_03-22-07_018985.2.log",
"test.2019-11-12_03-22-07_018985.2.log.tar.gz",
"test.foo.log",
"test.123.log",
"test.2019-11-12_03-22-07_018985.abc.log",
"test.2019-11-12_03-22-07_018985.123.abc.log",
"test.foo.log.bar",
"test.log.log",
]

for other in others:
Expand All @@ -85,22 +91,22 @@ def test_managed_files(tmpdir):


def test_not_managed_files(tmpdir):
others = {
others = [
"test_.log",
"_test.log",
"tes.log",
"te.st.log",
"testlog",
"test",
"test.",
"test.tar.gz",
"test.logs",
"test.foo",
"test.foo.logs",
"tests.logs.zip",
"foo.test.log",
"test.foo.log",
"test.123.log",
"test.2019-11-12_03-22-07_018985.abc.log",
"test.2019-11-12_03-22-07_018985.123.abc.log",
}
"foo.test.log.zip",
]

for other in others:
tmpdir.join(other).write(other)
Expand All @@ -111,6 +117,36 @@ def test_not_managed_files(tmpdir):
assert set(f.basename for f in tmpdir.listdir()) == others


@pytest.mark.parametrize("filename", ["test", "test.log"])
def test_no_duplicates_in_listed_files(tmpdir, filename):
matching_files = None
others = [
"test.log",
"test.log.log",
"test.log.log.log",
"test",
"test..",
"test.log..",
"test..log",
"test...log",
"test.log..",
"test.log.a.log.b",
]

def retention(files):
nonlocal matching_files
matching_files = files

for other in others:
tmpdir.join(other).write(other)

i = logger.add(str(tmpdir.join(filename)), retention=retention, catch=False)
logger.remove(i)

assert matching_files is not None
assert len(matching_files) == len(set(matching_files))


def test_directories_ignored(tmpdir):
others = ["test.log.2", "test.123.log", "test.log.tar.gz", "test.archive"]

Expand Down

0 comments on commit efd090a

Please sign in to comment.