Skip to content

Commit

Permalink
Merge pull request #694 from willkg/minor-fixes
Browse files Browse the repository at this point in the history
Convert tags, skip_tags, recognized_tags to sets; fix doctests; f-strings
  • Loading branch information
willkg authored Jan 23, 2023
2 parents b2a0d57 + aec2c0e commit b7e8da3
Show file tree
Hide file tree
Showing 8 changed files with 229 additions and 237 deletions.
242 changes: 123 additions & 119 deletions bleach/html5lib_shim.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,127 +81,129 @@

#: List of valid HTML tags, from WHATWG HTML Living Standard as of 2018-10-17
#: https://html.spec.whatwg.org/multipage/indices.html#elements-3
HTML_TAGS = [
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
]
HTML_TAGS = frozenset(
(
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"picture",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"slot",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
)
)


#: List of block level HTML tags, as per /~https://github.com/mozilla/bleach/issues/369
#: from mozilla on 2019.07.11
#: https://developer.mozilla.org/en-US/docs/Web/HTML/Block-level_elements#Elements
HTML_TAGS_BLOCK_LEVEL = frozenset(
[
(
"address",
"article",
"aside",
Expand Down Expand Up @@ -235,7 +237,7 @@
"section",
"table",
"ul",
]
)
)


Expand Down Expand Up @@ -476,15 +478,17 @@ class BleachHTMLParser(HTMLParser):

def __init__(self, tags, strip, consume_entities, **kwargs):
"""
:arg tags: list of allowed tags--everything else is either stripped or
:arg tags: set of allowed tags--everything else is either stripped or
escaped; if None, then this doesn't look at tags at all
:arg strip: whether to strip disallowed tags (True) or escape them (False);
if tags=None, then this doesn't have any effect
:arg consume_entities: whether to consume entities (default behavior) or
leave them as is when tokenizing (BleachHTMLTokenizer-added behavior)
"""
self.tags = [tag.lower() for tag in tags] if tags is not None else None
self.tags = (
frozenset((tag.lower() for tag in tags)) if tags is not None else None
)
self.strip = strip
self.consume_entities = consume_entities
super().__init__(**kwargs)
Expand Down Expand Up @@ -694,7 +698,7 @@ def escape_base_amp(self, stoken):
# Only leave entities in that are not ambiguous. If they're
# ambiguous, then we escape the ampersand.
if entity is not None and convert_entity(entity) is not None:
yield "&" + entity + ";"
yield f"&{entity};"

# Length of the entity plus 2--one for & at the beginning
# and one for ; at the end
Expand Down
17 changes: 9 additions & 8 deletions bleach/linkifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,18 @@ def __init__(
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
:arg list skip_tags: list of tags that you don't want to linkify the
contents of; for example, you could set this to ``['pre']`` to skip
linkifying contents of ``pre`` tags
:arg set skip_tags: set of tags that you don't want to linkify the
contents of; for example, you could set this to ``{'pre'}`` to skip
linkifying contents of ``pre`` tags; ``None`` means you don't
want linkify to skip any tags
:arg bool parse_email: whether or not to linkify email addresses
:arg url_re: url matching regex
:arg email_re: email matching regex
:arg list recognized_tags: the list of tags that linkify knows about;
:arg set recognized_tags: the set of tags that linkify knows about;
everything else gets escaped
:returns: linkified text as unicode
Expand All @@ -145,7 +146,7 @@ def __init__(
# Create a parser/tokenizer that allows all HTML tags and escapes
# anything not in that list.
self.parser = html5lib_shim.BleachHTMLParser(
tags=recognized_tags,
tags=frozenset(recognized_tags),
strip=False,
consume_entities=False,
namespaceHTMLElements=False,
Expand Down Expand Up @@ -221,8 +222,8 @@ def __init__(
:arg list callbacks: list of callbacks to run when adjusting tag attributes;
defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``
:arg list skip_tags: list of tags that you don't want to linkify the
contents of; for example, you could set this to ``['pre']`` to skip
:arg set skip_tags: set of tags that you don't want to linkify the
contents of; for example, you could set this to ``{'pre'}`` to skip
linkifying contents of ``pre`` tags
:arg bool parse_email: whether or not to linkify email addresses
Expand All @@ -235,7 +236,7 @@ def __init__(
super().__init__(source)

self.callbacks = callbacks or []
self.skip_tags = skip_tags or []
self.skip_tags = skip_tags or {}
self.parse_email = parse_email

self.url_re = url_re
Expand Down
Loading

0 comments on commit b7e8da3

Please sign in to comment.