From 35c1c95d0242213aa5d7046c6559c9d16e3cf1e0 Mon Sep 17 00:00:00 2001 From: akikuno Date: Thu, 27 Feb 2025 16:00:33 +0900 Subject: [PATCH] Refactoring: Add descriptions at docstring of `io.sanitize_name` --- src/DAJIN2/utils/io.py | 34 +++++++++++++++++++++++++++++++++- tests/src/utils/test_io.py | 2 +- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/DAJIN2/utils/io.py b/src/DAJIN2/utils/io.py index f95af7e..fd47e87 100644 --- a/src/DAJIN2/utils/io.py +++ b/src/DAJIN2/utils/io.py @@ -224,11 +224,43 @@ def convert_to_posix(path: str) -> str: def sanitize_name(file_name: Path | str) -> str: """ - Sanitize the file name by replacing invalid characters on Windows OS with '-' + Sanitize the file name by replacing invalid characters on Windows OS with '-'. + + The regular expression r'[<>:"/\\|?*\x00-\x1f .]' matches the following characters: + + - **Windows prohibited characters**: + `<`, `>`, `:`, `"`, `/`, `\\`, `|`, `?`, `*` + - **Control characters (ASCII \x00 - \x1f)**: + Examples: `\t` (tab), `\n` (newline), `\r` (carriage return) + - **Period `.`**: + Matches as a literal character inside `[]`. Used to remove trailing or leading dots. + - **Space `" "`**: + Spaces are explicitly included (`.` is preceded by `" "` in the regex), + so they are replaced with `-` as well. + + Example: + >>> sanitize_name("test file.txt") + 'test-file-txt' + >>> sanitize_name("invalid|name?.txt") + 'invalid-name-.txt' + >>> sanitize_name(" leading space.txt ") + 'leading-space-txt' + >>> sanitize_name("file name with spaces") + 'file-name-with-spaces' + + Args: + file_name (Path | str): The original file name. + + Returns: + str: The sanitized file name with invalid characters replaced by '-'. + + Raises: + ValueError: If the provided file name is empty or contains only whitespace. """ file_name = str(file_name).strip() if not file_name: raise ValueError("Provided name is empty or consists only of whitespace") + forbidden_chars = r'[<>:"/\\|?*\x00-\x1F .]' return re.sub(forbidden_chars, "-", file_name) diff --git a/tests/src/utils/test_io.py b/tests/src/utils/test_io.py index ffca488..cc72f17 100644 --- a/tests/src/utils/test_io.py +++ b/tests/src/utils/test_io.py @@ -20,7 +20,7 @@ def test_sanitize_name_with_invalid_characters(): def test_sanitize_name_with_whitespace(): - assert io.sanitize_name(" leading_space") == "leading_space" + assert io.sanitize_name(" leading space") == "leading-space" assert io.sanitize_name("trailing space ") == "trailing-space"