Skip to content

Commit

Permalink
Normalize author name unicode before matching (#2006)
Browse files Browse the repository at this point in the history
* Fix accented characters not being matched in author name

Fixes #2004

* Normalized the strings instead of modifying the pattern

* Applied isort & black
  • Loading branch information
raphaelyancey authored Feb 28, 2020
1 parent ab66bb9 commit ae6d64d
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
5 changes: 3 additions & 2 deletions poetry/packages/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from contextlib import contextmanager
from typing import Union
from unicodedata import normalize
from warnings import warn

from poetry.semver import Version
Expand Down Expand Up @@ -160,7 +161,7 @@ def _get_author(self): # type: () -> dict
if not self._authors:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._authors[0])
m = AUTHOR_REGEX.match(normalize("NFC", self._authors[0]))

name = m.group("name")
email = m.group("email")
Expand All @@ -171,7 +172,7 @@ def _get_maintainer(self): # type: () -> dict
if not self._maintainers:
return {"name": None, "email": None}

m = AUTHOR_REGEX.match(self._maintainers[0])
m = AUTHOR_REGEX.match(normalize("NFC", self._maintainers[0]))

name = m.group("name")
email = m.group("email")
Expand Down
12 changes: 12 additions & 0 deletions tests/packages/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@ def test_package_authors():
assert package.author_name == "Sébastien Eustace"
assert package.author_email == "sebastien@eustace.io"

package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # With combining diacritics (ë = e + ¨ = e\u0308)
assert package.author_name == "Raphaël Yancey" # Is normalized into \u00EB
assert package.author_email == "raphael@badfile.net"

package.authors.insert(
0, "Raphaël Yancey <raphael@badfile.net>"
) # Without (ë = \u00EB)
assert package.author_name == "Raphaël Yancey"
assert package.author_email == "raphael@badfile.net"

package.authors.insert(0, "John Doe")
assert package.author_name == "John Doe"
assert package.author_email is None
Expand Down

0 comments on commit ae6d64d

Please sign in to comment.