Skip to content

Commit

Permalink
Fix set.mm check (#143)
Browse files Browse the repository at this point in the history
* Fix set.mm check

First of all, use the develop branch rather than the master branch
because the latter is rather old and we haven't been using it.

Second of all, add --verify-markup and --parse-typesetting so that we
run more of the checks (including most notably the ones which the set.mm
automated checks enable).

* `\r\n`-compatible newline detection

---------

Co-authored-by: Mario Carneiro <di.gama@gmail.com>
  • Loading branch information
jkingdon and digama0 authored Sep 30, 2023
1 parent 6589b4c commit fe5e7fc
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 10 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,5 @@ jobs:
run: cargo fmt -- --check
- name: Verify official set.mm
run: |
curl -s -G https://raw.githubusercontent.com/metamath/set.mm/master/set.mm > set.mm
cargo run -- --jobs 8 set.mm --verify --grammar --parse-stmt --verify-parse-stmt
curl -s -G https://raw.githubusercontent.com/metamath/set.mm/develop/set.mm > set.mm
cargo run -- --jobs 8 set.mm --verify --grammar --parse-stmt --verify-parse-stmt --verify-markup --parse-typesetting
8 changes: 4 additions & 4 deletions src/comment_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ impl<'a> CommentParser<'a> {
while let Some(&c) = self.buf.get(self.pos) {
match c {
b'[' | b'`' if self.buf.get(self.pos + 1) == Some(&c) => self.pos += 2,
b' ' | b'\n' | b'`' => break,
b' ' | b'\r' | b'\n' | b'`' => break,
b'[' if self.parse_bib().is_some() => break,
b'<' if self.buf[self.pos..].starts_with(b"<HTML>")
|| self.buf[self.pos..].starts_with(b"</HTML>") =>
Expand Down Expand Up @@ -534,9 +534,9 @@ impl<'a> ParentheticalIter<'a> {
pub fn new(buf: &'a [u8], span: Span) -> Self {
lazy_static! {
static ref PARENTHETICALS: Regex = Regex::new(concat!(
r"\((Contributed|Revised|Proof[ \n]+shortened)",
r"[ \n]+by[ \n]+([^,)]+),[ \n]+([0-9]{1,2}-[A-Z][a-z]{2}-[0-9]{4})\.\)|",
r"\((Proof[ \n]+modification|New[ \n]+usage)[ \n]+is[ \n]+discouraged\.\)",
r"\((Contributed|Revised|Proof[ \r\n]+shortened)",
r"[ \r\n]+by[ \r\n]+([^,)]+),[ \r\n]+([0-9]{1,2}-[A-Z][a-z]{2}-[0-9]{4})\.\)|",
r"\((Proof[ \r\n]+modification|New[ \r\n]+usage)[ \r\n]+is[ \r\n]+discouraged\.\)",
))
.unwrap();
}
Expand Down
8 changes: 4 additions & 4 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1110,13 +1110,13 @@ impl HeadingComment {
pub fn parse(buf: &[u8], lvl: HeadingLevel, sp: Span) -> Option<Self> {
lazy_static::lazy_static! {
static ref MAJOR_PART: Regex =
Regex::new(r"^[ \n]+#{4,}\n *([^\n]*)\n#{4,}\n").unwrap();
Regex::new(r"^[ \r\n]+#{4,}\r?\n *([^\n]*)\r?\n#{4,}\r?\n").unwrap();
static ref SECTION: Regex =
Regex::new(r"^[ \n]+(?:#\*){2,}#?\n *([^\n]*)\n(?:#\*){2,}#?\n").unwrap();
Regex::new(r"^[ \r\n]+(?:#\*){2,}#?\r?\n *([^\n]*)\r?\n(?:#\*){2,}#?\r?\n").unwrap();
static ref SUBSECTION: Regex =
Regex::new(r"^[ \n]+(?:=-){2,}=?\n *([^\n]*)\n(?:=-){2,}=?\n").unwrap();
Regex::new(r"^[ \r\n]+(?:=-){2,}=?\r?\n *([^\n]*)\r?\n(?:=-){2,}=?\r?\n").unwrap();
static ref SUBSUBSECTION: Regex =
Regex::new(r"^[ \n]+(?:-\.){2,}-?\n *([^\n]*)\n(?:-\.){2,}-?\n").unwrap();
Regex::new(r"^[ \r\n]+(?:-\.){2,}-?\r?\n *([^\n]*)\r?\n(?:-\.){2,}-?\r?\n").unwrap();
}
let regex = match lvl {
HeadingLevel::MajorPart => &*MAJOR_PART,
Expand Down
9 changes: 9 additions & 0 deletions src/verify_markup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,15 @@ impl Database {
eol_check(&mut diags, &seg, line_start, i);
line_start = i + 1;
}
b'\r' => {
eol_check(&mut diags, &seg, line_start, i);
line_start = if seg.buffer.get(i + 1) == Some(&b'\n') {
iter.next();
i + 2
} else {
i + 1
}
}
b'\t' => {
let count = seg.buffer[i..].iter().take_while(|&&c| c == b'\t').count();
for _ in 1..count {
Expand Down

0 comments on commit fe5e7fc

Please sign in to comment.