Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Review urlescape percent-safe set, and use + behavior for form spaces. #3373

Merged
merged 1 commit into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 3 additions & 25 deletions httpx/_urlparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
]
)


# {scheme}: (optional)
# //{authority} (optional)
# {path}
Expand Down Expand Up @@ -478,7 +479,7 @@ def PERCENT(string: str) -> str:
return "".join([f"%{byte:02X}" for byte in string.encode("utf-8")])


def percent_encoded(string: str, safe: str = "/") -> str:
def percent_encoded(string: str, safe: str) -> str:
"""
Use percent-encoding to quote a string.
"""
Expand All @@ -493,7 +494,7 @@ def percent_encoded(string: str, safe: str = "/") -> str:
)


def quote(string: str, safe: str = "/") -> str:
def quote(string: str, safe: str) -> str:
"""
Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.

Expand Down Expand Up @@ -524,26 +525,3 @@ def quote(string: str, safe: str = "/") -> str:
parts.append(percent_encoded(trailing_text, safe=safe))

return "".join(parts)


def urlencode(items: list[tuple[str, str]]) -> str:
"""
We can use a much simpler version of the stdlib urlencode here because
we don't need to handle a bunch of different typing cases, such as bytes vs str.

/~https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926

Note that we use '%20' encoding for spaces. and '%2F for '/'.
This is slightly different than `requests`, but is the behaviour that browsers use.

See
- /~https://github.com/encode/httpx/issues/2536
- /~https://github.com/encode/httpx/issues/2721
- https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
"""
return "&".join(
[
percent_encoded(k, safe="") + "=" + percent_encoded(v, safe="")
for k, v in items
]
)
27 changes: 17 additions & 10 deletions httpx/_urls.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
from __future__ import annotations

import typing
from urllib.parse import parse_qs, unquote
from urllib.parse import parse_qs, unquote, urlencode

import idna

from ._types import QueryParamTypes
from ._urlparse import urlencode, urlparse
from ._urlparse import urlparse
from ._utils import primitive_value_to_str

__all__ = ["URL", "QueryParams"]


# To urlencode query parameters, we use the whatwg query percent-encode set
# and additionally escape U+0025 (%), U+0026 (&), U+002B (+) and U+003D (=).

# https://url.spec.whatwg.org/#percent-encoded-bytes

URLENCODE_SAFE = "".join(
[
chr(i)
for i in range(0x20, 0x7F)
if i not in (0x20, 0x22, 0x23, 0x25, 0x26, 0x2B, 0x3C, 0x3D, 0x3E)
]
)


class URL:
"""
url = httpx.URL("HTTPS://jo%40email.com:a%20secret@müller.de:1234/pa%20th?search=ab#anchorlink")
Expand Down Expand Up @@ -605,14 +619,7 @@ def __eq__(self, other: typing.Any) -> bool:
return sorted(self.multi_items()) == sorted(other.multi_items())

def __str__(self) -> str:
"""
Note that we use '%20' encoding for spaces, and treat '/' as a safe
character.

See /~https://github.com/encode/httpx/issues/2536 and
https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
"""
return urlencode(self.multi_items())
return urlencode(self.multi_items(), safe=URLENCODE_SAFE)

def __repr__(self) -> str:
class_name = self.__class__.__name__
Expand Down
20 changes: 8 additions & 12 deletions tests/models/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,19 +141,14 @@ def test_path_query_fragment(url, raw_path, path, query, fragment):


def test_url_query_encoding():
"""
URL query parameters should use '%20' for encoding spaces,
and should treat '/' as a safe character. This behaviour differs
across clients, but we're matching browser behaviour here.

See /~https://github.com/encode/httpx/issues/2536
and /~https://github.com/encode/httpx/discussions/2460
"""
url = httpx.URL("https://www.example.com/?a=b c&d=e/f")
assert url.raw_path == b"/?a=b%20c&d=e/f"

url = httpx.URL("https://www.example.com/?a=b+c&d=e/f")
assert url.raw_path == b"/?a=b+c&d=e/f"

url = httpx.URL("https://www.example.com/", params={"a": "b c", "d": "e/f"})
assert url.raw_path == b"/?a=b%20c&d=e%2Ff"
assert url.raw_path == b"/?a=b+c&d=e/f"


def test_url_params():
Expand Down Expand Up @@ -289,9 +284,10 @@ def test_url_leading_dot_prefix_on_relative_url():


def test_param_with_space():
# Params passed as form key-value pairs should be escaped.
# Params passed as form key-value pairs should be form escaped,
# Including the special case of "+" for space seperators.
url = httpx.URL("http://webservice", params={"u": "with spaces"})
assert str(url) == "http://webservice?u=with%20spaces"
assert str(url) == "http://webservice?u=with+spaces"


def test_param_requires_encoding():
Expand All @@ -313,7 +309,7 @@ def test_param_with_existing_escape_requires_encoding():
# even if they include a valid escape sequence.
# We want to match browser form behaviour here.
url = httpx.URL("http://webservice", params={"u": "http://example.com?q=foo%2Fa"})
assert str(url) == "http://webservice?u=http%3A%2F%2Fexample.com%3Fq%3Dfoo%252Fa"
assert str(url) == "http://webservice?u=http://example.com?q%3Dfoo%252Fa"


# Tests for query parameter percent encoding.
Expand Down