-
Notifications
You must be signed in to change notification settings - Fork 13k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
rustdoc: check parsing diffs between pulldown-cmark 0.9.6 and 0.10
This commit is not meant to be merged as-is. It's meant to run in Crater, so that we can estimate the impact of bumping to the new version of the markdown parser.
- Loading branch information
Showing
7 changed files
with
287 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
//! Detects markdown syntax that's different between pulldown-cmark | ||
//! 0.9 and 0.10. | ||
use crate::clean::Item; | ||
use crate::core::DocContext; | ||
use crate::html::markdown::main_body_opts; | ||
use pulldown_cmark as cmarko; | ||
use pulldown_cmark_new as cmarkn; | ||
use rustc_resolve::rustdoc::source_span_for_markdown_range; | ||
|
||
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { | ||
let tcx = cx.tcx; | ||
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { | ||
// If non-local, no need to check anything. | ||
return; | ||
}; | ||
|
||
let dox = item.doc_value(); | ||
if dox.is_empty() { | ||
return; | ||
} | ||
|
||
let link_names = item.link_names(&cx.cache); | ||
let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| { | ||
link_names | ||
.iter() | ||
.find(|link| *link.original_text == *broken_link.reference) | ||
.map(|link| ((*link.href).into(), (*link.new_text).into())) | ||
}; | ||
let parser_old = cmarko::Parser::new_with_broken_link_callback( | ||
&dox, | ||
main_body_opts(), | ||
Some(&mut replacer_old) | ||
).into_offset_iter() | ||
// Not worth cleaning up minor "distinctions without difference" in the AST. | ||
// Text events get chopped up differently between versions. | ||
// <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints. | ||
.filter(|(event, _event_range)| !matches!(event, cmarko::Event::Code(_) | cmarko::Event::Text(_) | cmarko::Event::Html(_))); | ||
|
||
pub fn main_body_opts_new() -> cmarkn::Options { | ||
cmarkn::Options::ENABLE_TABLES | ||
| cmarkn::Options::ENABLE_FOOTNOTES | ||
| cmarkn::Options::ENABLE_STRIKETHROUGH | ||
| cmarkn::Options::ENABLE_TASKLISTS | ||
| cmarkn::Options::ENABLE_SMART_PUNCTUATION | ||
} | ||
let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| { | ||
link_names | ||
.iter() | ||
.find(|link| *link.original_text == *broken_link.reference) | ||
.map(|link| ((*link.href).into(), (*link.new_text).into())) | ||
}; | ||
let parser_new = cmarkn::Parser::new_with_broken_link_callback( | ||
&dox, | ||
main_body_opts_new(), | ||
Some(&mut replacer_new) | ||
).into_offset_iter() | ||
.filter(|(event, _event_range)| !matches!(event, cmarkn::Event::Code(_) | cmarkn::Event::Text(_) | cmarkn::Event::Html(_) | cmarkn::Event::InlineHtml(_))); | ||
|
||
let mut reported_an_error = false; | ||
for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) { | ||
match (event_old, event_new) { | ||
| (cmarko::Event::Start(cmarko::Tag::Emphasis), cmarkn::Event::Start(cmarkn::Tag::Emphasis)) | ||
| (cmarko::Event::Start(cmarko::Tag::Strong), cmarkn::Event::Start(cmarkn::Tag::Strong)) | ||
| (cmarko::Event::Start(cmarko::Tag::Strikethrough), cmarkn::Event::Start(cmarkn::Tag::Strikethrough)) | ||
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::Start(cmarkn::Tag::Link { .. })) | ||
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) | ||
| (cmarko::Event::End(cmarko::Tag::Emphasis), cmarkn::Event::End(cmarkn::TagEnd::Emphasis)) | ||
| (cmarko::Event::End(cmarko::Tag::Strong), cmarkn::Event::End(cmarkn::TagEnd::Strong)) | ||
| (cmarko::Event::End(cmarko::Tag::Strikethrough), cmarkn::Event::End(cmarkn::TagEnd::Strikethrough)) | ||
| (cmarko::Event::End(cmarko::Tag::Link(..)), cmarkn::Event::End(cmarkn::TagEnd::Link)) | ||
| (cmarko::Event::End(cmarko::Tag::Image(..)), cmarkn::Event::End(cmarkn::TagEnd::Image)) | ||
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..)) | ||
| (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak) | ||
| (cmarko::Event::HardBreak, cmarkn::Event::HardBreak) | ||
if event_range_old == event_range_new => { | ||
// Matching tags. Do nothing. | ||
} | ||
| (cmarko::Event::Start(cmarko::Tag::Paragraph), cmarkn::Event::Start(cmarkn::Tag::Paragraph)) | ||
| (cmarko::Event::Start(cmarko::Tag::Heading(..)), cmarkn::Event::Start(cmarkn::Tag::Heading { .. })) | ||
| (cmarko::Event::Start(cmarko::Tag::BlockQuote), cmarkn::Event::Start(cmarkn::Tag::BlockQuote)) | ||
| (cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..))) | ||
| (cmarko::Event::Start(cmarko::Tag::List(..)), cmarkn::Event::Start(cmarkn::Tag::List(..))) | ||
| (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item)) | ||
| (cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..))) | ||
| (cmarko::Event::Start(cmarko::Tag::Table(..)), cmarkn::Event::Start(cmarkn::Tag::Table(..))) | ||
| (cmarko::Event::Start(cmarko::Tag::TableHead), cmarkn::Event::Start(cmarkn::Tag::TableHead)) | ||
| (cmarko::Event::Start(cmarko::Tag::TableRow), cmarkn::Event::Start(cmarkn::Tag::TableRow)) | ||
| (cmarko::Event::Start(cmarko::Tag::TableCell), cmarkn::Event::Start(cmarkn::Tag::TableCell)) | ||
| (cmarko::Event::End(cmarko::Tag::Paragraph), cmarkn::Event::End(cmarkn::TagEnd::Paragraph)) | ||
| (cmarko::Event::End(cmarko::Tag::Heading(..)), cmarkn::Event::End(cmarkn::TagEnd::Heading(_))) | ||
| (cmarko::Event::End(cmarko::Tag::BlockQuote), cmarkn::Event::End(cmarkn::TagEnd::BlockQuote)) | ||
| (cmarko::Event::End(cmarko::Tag::CodeBlock(..)), cmarkn::Event::End(cmarkn::TagEnd::CodeBlock)) | ||
| (cmarko::Event::End(cmarko::Tag::List(..)), cmarkn::Event::End(cmarkn::TagEnd::List(_))) | ||
| (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item)) | ||
| (cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition)) | ||
| (cmarko::Event::End(cmarko::Tag::Table(..)), cmarkn::Event::End(cmarkn::TagEnd::Table)) | ||
| (cmarko::Event::End(cmarko::Tag::TableHead), cmarkn::Event::End(cmarkn::TagEnd::TableHead)) | ||
| (cmarko::Event::End(cmarko::Tag::TableRow), cmarkn::Event::End(cmarkn::TagEnd::TableRow)) | ||
| (cmarko::Event::End(cmarko::Tag::TableCell), cmarkn::Event::End(cmarkn::TagEnd::TableCell)) | ||
=> { | ||
// Matching tags. Do nothing. | ||
// | ||
// Parsers sometimes differ in what they consider the "range of an event," | ||
// even though the event is really the same. Inlines are pretty consistent, | ||
// but stuff like list items? Not really. | ||
// | ||
// Mismatched block elements will usually nest differently, so ignoring it | ||
// works good enough. | ||
} | ||
// If we've already reported an error on the start tag, don't bother on the end tag. | ||
(cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {} | ||
// Non-matching inline. | ||
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..)) | ||
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::FootnoteReference(..)) | ||
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Link { .. })) | ||
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) if event_range_old == event_range_new => { | ||
reported_an_error = true; | ||
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, | ||
// use the span of the entire attribute as a fallback. | ||
let span = source_span_for_markdown_range( | ||
tcx, | ||
&dox, | ||
&event_range_old, | ||
&item.attrs.doc_strings, | ||
).unwrap_or_else(|| item.attr_span(tcx)); | ||
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| { | ||
lint.help(format!("syntax ambiguous between footnote and link")); | ||
}); | ||
} | ||
// Non-matching results. | ||
(event_old, event_new) => { | ||
reported_an_error = true; | ||
let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end - event_range_old.start < event_range_new.end - event_range_new.start { | ||
(event_range_old, event_range_new, "old", "new", format!("{event_old:?}"), format!("{event_new:?}")) | ||
} else { | ||
(event_range_new, event_range_old, "new", "old", format!("{event_new:?}"), format!("{event_old:?}")) | ||
}; | ||
let (range, tag_other) = if range_other.start <= range.start && range_other.end <= range.end { | ||
(range_other.start..range.end, tag_other) | ||
} else { | ||
(range, format!("nothing")) | ||
}; | ||
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, | ||
// use the span of the entire attribute as a fallback. | ||
let span = source_span_for_markdown_range( | ||
tcx, | ||
&dox, | ||
&range, | ||
&item.attrs.doc_strings, | ||
).unwrap_or_else(|| item.attr_span(tcx)); | ||
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| { | ||
lint.help(format!("{desc} parser sees {tag}, {desc_other} sees {tag_other}")); | ||
}); | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929 | ||
// | ||
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes. | ||
|
||
/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/654> | ||
/// | ||
/// Test footnote [^foot]. | ||
/// | ||
/// [^foot]: This is nested within the footnote now, but didn't used to be. | ||
//~^ ERROR unportable markdown | ||
/// | ||
/// This is a multi-paragraph footnote. | ||
pub struct GfmFootnotes; | ||
|
||
/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/750> | ||
/// | ||
/// test [^] | ||
//~^ ERROR unportable markdown | ||
/// | ||
/// [^]: test2 | ||
pub struct FootnoteEmptyName; | ||
|
||
/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/829> | ||
/// | ||
/// - _t | ||
/// # test | ||
//~^ ERROR unportable markdown | ||
/// t_ | ||
pub struct NestingCornerCase; | ||
|
||
/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/650> | ||
/// | ||
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ | ||
//~^ ERROR unportable markdown | ||
//~| ERROR unportable markdown | ||
pub struct Emphasis1; | ||
|
||
/// </~https://github.com/pulldown-cmark/pulldown-cmark/pull/732> | ||
/// | ||
/// | | ||
//~^ ERROR unportable markdown | ||
//~| ERROR unportable markdown | ||
/// | | ||
pub struct NotEnoughTable; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:9:5 | ||
| | ||
LL | /// [^foot]: This is nested within the footnote now, but didn't used to be. | ||
| _____^ | ||
LL | | | ||
LL | | /// | ||
LL | | /// This is a multi-paragraph footnote. | ||
| |___________________________________________^ | ||
| | ||
= help: new parser sees Start(Paragraph), old sees End(FootnoteDefinition(Borrowed("foot"))) | ||
= note: `#[deny(rustdoc::unportable_markdown)]` on by default | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:17:10 | ||
| | ||
LL | /// test [^] | ||
| ^^^ | ||
| | ||
= help: new parser sees Start(Link { link_type: Shortcut, dest_url: Borrowed("test2"), title: Borrowed(""), id: Borrowed("^") }), old sees nothing | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:26:7 | ||
| | ||
LL | /// # test | ||
| _______^ | ||
LL | | | ||
LL | | /// t_ | ||
| |____^ | ||
| | ||
= help: new parser sees Start(Heading { level: H1, id: None, classes: [], attrs: [] }), old sees nothing | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:33:40 | ||
| | ||
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
| | ||
= help: old parser sees Start(Emphasis), new sees nothing | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:33:41 | ||
| | ||
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~ | ||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
| | ||
= help: old parser sees Start(Strong), new sees nothing | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:40:5 | ||
| | ||
LL | /// | | ||
| _____^ | ||
LL | | | ||
LL | | | ||
| |____^ | ||
| | ||
= help: new parser sees Start(Paragraph), old sees Start(Table([])) | ||
|
||
error: unportable markdown | ||
--> $DIR/unportable-markdown.rs:40:5 | ||
| | ||
LL | /// | | ||
| _____^ | ||
LL | | | ||
LL | | | ||
| |___^ | ||
| | ||
= help: new parser sees SoftBreak, old sees Start(TableHead) | ||
|
||
error: aborting due to 7 previous errors | ||
|