Skip to content

Commit

Permalink
Refactoring: Add descriptions at docstring of io.sanitize_name
Browse files Browse the repository at this point in the history
  • Loading branch information
akikuno committed Feb 27, 2025
1 parent c7ef91d commit 35c1c95
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
34 changes: 33 additions & 1 deletion src/DAJIN2/utils/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,11 +224,43 @@ def convert_to_posix(path: str) -> str:

def sanitize_name(file_name: Path | str) -> str:
"""
Sanitize the file name by replacing invalid characters on Windows OS with '-'
Sanitize the file name by replacing invalid characters on Windows OS with '-'.
The regular expression r'[<>:"/\\|?*\x00-\x1f .]' matches the following characters:
- **Windows prohibited characters**:
`<`, `>`, `:`, `"`, `/`, `\\`, `|`, `?`, `*`
- **Control characters (ASCII \x00 - \x1f)**:
Examples: `\t` (tab), `\n` (newline), `\r` (carriage return)
- **Period `.`**:
Matches as a literal character inside `[]`. Used to remove trailing or leading dots.
- **Space `" "`**:
Spaces are explicitly included (`.` is preceded by `" "` in the regex),
so they are replaced with `-` as well.
Example:
>>> sanitize_name("test file.txt")
'test-file-txt'
>>> sanitize_name("invalid|name?.txt")
'invalid-name-.txt'
>>> sanitize_name(" leading space.txt ")
'leading-space-txt'
>>> sanitize_name("file name with spaces")
'file-name-with-spaces'
Args:
file_name (Path | str): The original file name.
Returns:
str: The sanitized file name with invalid characters replaced by '-'.
Raises:
ValueError: If the provided file name is empty or contains only whitespace.
"""
file_name = str(file_name).strip()
if not file_name:
raise ValueError("Provided name is empty or consists only of whitespace")

forbidden_chars = r'[<>:"/\\|?*\x00-\x1F .]'

return re.sub(forbidden_chars, "-", file_name)
Expand Down
2 changes: 1 addition & 1 deletion tests/src/utils/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_sanitize_name_with_invalid_characters():


def test_sanitize_name_with_whitespace():
assert io.sanitize_name(" leading_space") == "leading_space"
assert io.sanitize_name(" leading space") == "leading-space"
assert io.sanitize_name("trailing space ") == "trailing-space"


Expand Down

0 comments on commit 35c1c95

Please sign in to comment.