Skip to content

Commit

Permalink
rustdoc: check parsing diffs between pulldown-cmark 0.9.6 and 0.10
Browse files Browse the repository at this point in the history
This commit is not meant to be merged as-is. It's meant to run
in Crater, so that we can estimate the impact of bumping to the
new version of the markdown parser.
  • Loading branch information
notriddle committed Feb 27, 2024
1 parent 829308e commit 3d931ab
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 0 deletions.
1 change: 1 addition & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4704,6 +4704,7 @@ dependencies = [
"itertools",
"minifier",
"once_cell",
"pulldown-cmark 0.10.0",
"regex",
"rustdoc-json-types",
"serde",
Expand Down
1 change: 1 addition & 0 deletions src/librustdoc/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ itertools = "0.11"
indexmap = "2"
minifier = "0.3.0"
once_cell = "1.10.0"
pulldown-cmark-new = { version = "0.10", package = "pulldown-cmark", default-features = false }
regex = "1"
rustdoc-json-types = { path = "../rustdoc-json-types" }
serde_json = "1.0"
Expand Down
9 changes: 9 additions & 0 deletions src/librustdoc/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,14 @@ declare_rustdoc_lint! {
"detects redundant explicit links in doc comments"
}

declare_rustdoc_lint! {
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
/// the new one.
UNPORTABLE_MARKDOWN,
Deny,
"detects markdown that is interpreted differently in different parser"
}

pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
vec![
BROKEN_INTRA_DOC_LINKS,
Expand All @@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
MISSING_CRATE_LEVEL_DOCS,
UNESCAPED_BACKTICKS,
REDUNDANT_EXPLICIT_LINKS,
UNPORTABLE_MARKDOWN,
]
});

Expand Down
2 changes: 2 additions & 0 deletions src/librustdoc/passes/lint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod check_code_block_syntax;
mod html_tags;
mod redundant_explicit_links;
mod unescaped_backticks;
mod unportable_markdown;

use super::Pass;
use crate::clean::*;
Expand All @@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> {
html_tags::visit_item(self.cx, item);
unescaped_backticks::visit_item(self.cx, item);
redundant_explicit_links::visit_item(self.cx, item);
unportable_markdown::visit_item(self.cx, item);

self.visit_item_recur(item)
}
Expand Down
158 changes: 158 additions & 0 deletions src/librustdoc/passes/lint/unportable_markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//! Detects markdown syntax that's different between pulldown-cmark
//! 0.9 and 0.10.
use crate::clean::Item;
use crate::core::DocContext;
use crate::html::markdown::main_body_opts;
use pulldown_cmark as cmarko;
use pulldown_cmark_new as cmarkn;
use rustc_resolve::rustdoc::source_span_for_markdown_range;

pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
let tcx = cx.tcx;
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
// If non-local, no need to check anything.
return;
};

let dox = item.doc_value();
if dox.is_empty() {
return;
}

let link_names = item.link_names(&cx.cache);
let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| {
link_names
.iter()
.find(|link| *link.original_text == *broken_link.reference)
.map(|link| ((*link.href).into(), (*link.new_text).into()))
};
let parser_old = cmarko::Parser::new_with_broken_link_callback(
&dox,
main_body_opts(),
Some(&mut replacer_old)
).into_offset_iter()
// Not worth cleaning up minor "distinctions without difference" in the AST.
// Text events get chopped up differently between versions.
// <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints.
.filter(|(event, _event_range)| !matches!(event, cmarko::Event::Code(_) | cmarko::Event::Text(_) | cmarko::Event::Html(_)));

pub fn main_body_opts_new() -> cmarkn::Options {
cmarkn::Options::ENABLE_TABLES
| cmarkn::Options::ENABLE_FOOTNOTES
| cmarkn::Options::ENABLE_STRIKETHROUGH
| cmarkn::Options::ENABLE_TASKLISTS
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
}
let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| {
link_names
.iter()
.find(|link| *link.original_text == *broken_link.reference)
.map(|link| ((*link.href).into(), (*link.new_text).into()))
};
let parser_new = cmarkn::Parser::new_with_broken_link_callback(
&dox,
main_body_opts_new(),
Some(&mut replacer_new)
).into_offset_iter()
.filter(|(event, _event_range)| !matches!(event, cmarkn::Event::Code(_) | cmarkn::Event::Text(_) | cmarkn::Event::Html(_) | cmarkn::Event::InlineHtml(_)));

let mut reported_an_error = false;
for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) {
match (event_old, event_new) {
| (cmarko::Event::Start(cmarko::Tag::Emphasis), cmarkn::Event::Start(cmarkn::Tag::Emphasis))
| (cmarko::Event::Start(cmarko::Tag::Strong), cmarkn::Event::Start(cmarkn::Tag::Strong))
| (cmarko::Event::Start(cmarko::Tag::Strikethrough), cmarkn::Event::Start(cmarkn::Tag::Strikethrough))
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::Start(cmarkn::Tag::Link { .. }))
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::Start(cmarkn::Tag::Image { .. }))
| (cmarko::Event::End(cmarko::Tag::Emphasis), cmarkn::Event::End(cmarkn::TagEnd::Emphasis))
| (cmarko::Event::End(cmarko::Tag::Strong), cmarkn::Event::End(cmarkn::TagEnd::Strong))
| (cmarko::Event::End(cmarko::Tag::Strikethrough), cmarkn::Event::End(cmarkn::TagEnd::Strikethrough))
| (cmarko::Event::End(cmarko::Tag::Link(..)), cmarkn::Event::End(cmarkn::TagEnd::Link))
| (cmarko::Event::End(cmarko::Tag::Image(..)), cmarkn::Event::End(cmarkn::TagEnd::Image))
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..))
| (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak)
| (cmarko::Event::HardBreak, cmarkn::Event::HardBreak)
if event_range_old == event_range_new => {
// Matching tags. Do nothing.
}
| (cmarko::Event::Start(cmarko::Tag::Paragraph), cmarkn::Event::Start(cmarkn::Tag::Paragraph))
| (cmarko::Event::Start(cmarko::Tag::Heading(..)), cmarkn::Event::Start(cmarkn::Tag::Heading { .. }))
| (cmarko::Event::Start(cmarko::Tag::BlockQuote), cmarkn::Event::Start(cmarkn::Tag::BlockQuote))
| (cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)))
| (cmarko::Event::Start(cmarko::Tag::List(..)), cmarkn::Event::Start(cmarkn::Tag::List(..)))
| (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item))
| (cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)))
| (cmarko::Event::Start(cmarko::Tag::Table(..)), cmarkn::Event::Start(cmarkn::Tag::Table(..)))
| (cmarko::Event::Start(cmarko::Tag::TableHead), cmarkn::Event::Start(cmarkn::Tag::TableHead))
| (cmarko::Event::Start(cmarko::Tag::TableRow), cmarkn::Event::Start(cmarkn::Tag::TableRow))
| (cmarko::Event::Start(cmarko::Tag::TableCell), cmarkn::Event::Start(cmarkn::Tag::TableCell))
| (cmarko::Event::End(cmarko::Tag::Paragraph), cmarkn::Event::End(cmarkn::TagEnd::Paragraph))
| (cmarko::Event::End(cmarko::Tag::Heading(..)), cmarkn::Event::End(cmarkn::TagEnd::Heading(_)))
| (cmarko::Event::End(cmarko::Tag::BlockQuote), cmarkn::Event::End(cmarkn::TagEnd::BlockQuote))
| (cmarko::Event::End(cmarko::Tag::CodeBlock(..)), cmarkn::Event::End(cmarkn::TagEnd::CodeBlock))
| (cmarko::Event::End(cmarko::Tag::List(..)), cmarkn::Event::End(cmarkn::TagEnd::List(_)))
| (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item))
| (cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition))
| (cmarko::Event::End(cmarko::Tag::Table(..)), cmarkn::Event::End(cmarkn::TagEnd::Table))
| (cmarko::Event::End(cmarko::Tag::TableHead), cmarkn::Event::End(cmarkn::TagEnd::TableHead))
| (cmarko::Event::End(cmarko::Tag::TableRow), cmarkn::Event::End(cmarkn::TagEnd::TableRow))
| (cmarko::Event::End(cmarko::Tag::TableCell), cmarkn::Event::End(cmarkn::TagEnd::TableCell))
=> {
// Matching tags. Do nothing.
//
// Parsers sometimes differ in what they consider the "range of an event,"
// even though the event is really the same. Inlines are pretty consistent,
// but stuff like list items? Not really.
//
// Mismatched block elements will usually nest differently, so ignoring it
// works good enough.
}
// If we've already reported an error on the start tag, don't bother on the end tag.
(cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {}
// Non-matching inline.
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..))
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::FootnoteReference(..))
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Link { .. }))
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) if event_range_old == event_range_new => {
reported_an_error = true;
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
// use the span of the entire attribute as a fallback.
let span = source_span_for_markdown_range(
tcx,
&dox,
&event_range_old,
&item.attrs.doc_strings,
).unwrap_or_else(|| item.attr_span(tcx));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| {
lint.help(format!("syntax ambiguous between footnote and link"));
});
}
// Non-matching results.
(event_old, event_new) => {
reported_an_error = true;
let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end - event_range_old.start < event_range_new.end - event_range_new.start {
(event_range_old, event_range_new, "old", "new", format!("{event_old:?}"), format!("{event_new:?}"))
} else {
(event_range_new, event_range_old, "new", "old", format!("{event_new:?}"), format!("{event_old:?}"))
};
let (range, tag_other) = if range_other.start <= range.start && range_other.end <= range.end {
(range_other.start..range.end, tag_other)
} else {
(range, format!("nothing"))
};
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
// use the span of the entire attribute as a fallback.
let span = source_span_for_markdown_range(
tcx,
&dox,
&range,
&item.attrs.doc_strings,
).unwrap_or_else(|| item.attr_span(tcx));
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| {
lint.help(format!("{desc} parser sees {tag}, {desc_other} sees {tag_other}"));
});
}
}
}
}
44 changes: 44 additions & 0 deletions tests/rustdoc-ui/unportable-markdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
//
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes.

/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
///
/// Test footnote [^foot].
///
/// [^foot]: This is nested within the footnote now, but didn't used to be.
//~^ ERROR unportable markdown
///
/// This is a multi-paragraph footnote.
pub struct GfmFootnotes;

/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/750>
///
/// test [^]
//~^ ERROR unportable markdown
///
/// [^]: test2
pub struct FootnoteEmptyName;

/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
///
/// - _t
/// # test
//~^ ERROR unportable markdown
/// t_
pub struct NestingCornerCase;

/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
///
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
//~^ ERROR unportable markdown
//~| ERROR unportable markdown
pub struct Emphasis1;

/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
///
/// |
//~^ ERROR unportable markdown
//~| ERROR unportable markdown
/// |
pub struct NotEnoughTable;
72 changes: 72 additions & 0 deletions tests/rustdoc-ui/unportable-markdown.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
error: unportable markdown
--> $DIR/unportable-markdown.rs:9:5
|
LL | /// [^foot]: This is nested within the footnote now, but didn't used to be.
| _____^
LL | |
LL | | ///
LL | | /// This is a multi-paragraph footnote.
| |___________________________________________^
|
= help: new parser sees Start(Paragraph), old sees End(FootnoteDefinition(Borrowed("foot")))
= note: `#[deny(rustdoc::unportable_markdown)]` on by default

error: unportable markdown
--> $DIR/unportable-markdown.rs:17:10
|
LL | /// test [^]
| ^^^
|
= help: new parser sees Start(Link { link_type: Shortcut, dest_url: Borrowed("test2"), title: Borrowed(""), id: Borrowed("^") }), old sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:26:7
|
LL | /// # test
| _______^
LL | |
LL | | /// t_
| |____^
|
= help: new parser sees Start(Heading { level: H1, id: None, classes: [], attrs: [] }), old sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:33:40
|
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= help: old parser sees Start(Emphasis), new sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:33:41
|
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
|
= help: old parser sees Start(Strong), new sees nothing

error: unportable markdown
--> $DIR/unportable-markdown.rs:40:5
|
LL | /// |
| _____^
LL | |
LL | |
| |____^
|
= help: new parser sees Start(Paragraph), old sees Start(Table([]))

error: unportable markdown
--> $DIR/unportable-markdown.rs:40:5
|
LL | /// |
| _____^
LL | |
LL | |
| |___^
|
= help: new parser sees SoftBreak, old sees Start(TableHead)

error: aborting due to 7 previous errors

0 comments on commit 3d931ab

Please sign in to comment.