From 9d1033cdcb6fa266b3a7b2fb0bff02ac6fb6e8be Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 25 Mar 2018 15:51:40 -0700 Subject: [PATCH] Adjust max length of decimal/numeric entities. See commonmark/CommonMark#487. --- src/scanners.c | 437 +++++++++++++++++++++--------------------------- src/scanners.re | 2 +- 2 files changed, 191 insertions(+), 248 deletions(-) diff --git a/src/scanners.c b/src/scanners.c index fd855a593..aad651d09 100644 --- a/src/scanners.c +++ b/src/scanners.c @@ -12643,7 +12643,7 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy902; + goto yy901; if (yych <= ':') goto yy879; goto yy885; @@ -12651,12 +12651,12 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= 'F') { if (yych <= '@') goto yy879; - goto yy902; + goto yy901; } else { if (yych <= '`') goto yy879; if (yych <= 'f') - goto yy902; + goto yy901; goto yy879; } } @@ -12666,7 +12666,7 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy903; + goto yy902; if (yych <= ':') goto yy879; goto yy885; @@ -12674,21 +12674,17 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= 'Z') { if (yych <= '@') goto yy879; - goto yy903; + goto yy902; } else { if (yych <= '`') goto yy879; if (yych <= 'z') - goto yy903; + goto yy902; goto yy879; } } yy901: yych = *++p; - if (yych <= '/') - goto yy879; - if (yych <= '9') - goto yy904; if (yych == ';') goto yy885; goto yy879; @@ -12698,21 +12694,19 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy905; + goto yy903; if (yych <= ':') goto yy879; goto yy885; } else { - if (yych <= 'F') { + if (yych <= 'Z') { if (yych <= '@') goto yy879; - goto yy905; } else { if (yych <= '`') goto yy879; - if (yych <= 'f') - goto yy905; - goto yy879; + if (yych >= '{') + goto yy879; } } yy903: @@ -12721,7 +12715,7 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy906; + goto yy904; if (yych <= ':') goto yy879; goto yy885; @@ -12729,41 +12723,53 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= 'Z') { if (yych <= '@') goto yy879; - goto yy906; } else { if (yych <= '`') goto yy879; - if (yych <= 'z') - goto yy906; - goto yy879; + if (yych >= '{') + goto yy879; } } yy904: yych = *++p; - if (yych == ';') + if (yych <= ';') { + if (yych <= '/') + goto yy879; + if (yych <= '9') + goto yy905; + if (yych <= ':') + goto yy879; goto yy885; - goto yy879; + } else { + if (yych <= 'Z') { + if (yych <= '@') + goto yy879; + } else { + if (yych <= '`') + goto yy879; + if (yych >= '{') + goto yy879; + } + } yy905: yych = *++p; if (yych <= ';') { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy904; + goto yy906; if (yych <= ':') goto yy879; goto yy885; } else { - if (yych <= 'F') { + if (yych <= 'Z') { if (yych <= '@') goto yy879; - goto yy904; } else { if (yych <= '`') goto yy879; - if (yych <= 'f') - goto yy904; - goto yy879; + if (yych >= '{') + goto yy879; } } yy906: @@ -13166,75 +13172,12 @@ bufsize_t _scan_entity(const unsigned char *p) { } } yy925: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy879; - if (yych <= '9') - goto yy926; - if (yych <= ':') - goto yy879; - goto yy885; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy879; - } else { - if (yych <= '`') - goto yy879; - if (yych >= '{') - goto yy879; - } - } - yy926: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy879; - if (yych <= '9') - goto yy927; - if (yych <= ':') - goto yy879; - goto yy885; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy879; - } else { - if (yych <= '`') - goto yy879; - if (yych >= '{') - goto yy879; - } - } - yy927: - yych = *++p; - if (yych <= ';') { - if (yych <= '/') - goto yy879; - if (yych <= '9') - goto yy928; - if (yych <= ':') - goto yy879; - goto yy885; - } else { - if (yych <= 'Z') { - if (yych <= '@') - goto yy879; - } else { - if (yych <= '`') - goto yy879; - if (yych >= '{') - goto yy879; - } - } - yy928: ++p; if ((yych = *p) <= ';') { if (yych <= '/') goto yy879; if (yych <= '9') - goto yy904; + goto yy901; if (yych <= ':') goto yy879; goto yy885; @@ -13242,12 +13185,12 @@ bufsize_t _scan_entity(const unsigned char *p) { if (yych <= 'Z') { if (yych <= '@') goto yy879; - goto yy904; + goto yy901; } else { if (yych <= '`') goto yy879; if (yych <= 'z') - goto yy904; + goto yy901; goto yy879; } } @@ -13267,304 +13210,304 @@ bufsize_t _scan_dangerous_url(const unsigned char *p) { if (yych <= 'V') { if (yych <= 'F') { if (yych == 'D') - goto yy933; + goto yy930; if (yych >= 'F') - goto yy934; + goto yy931; } else { if (yych == 'J') - goto yy935; + goto yy932; if (yych >= 'V') - goto yy936; + goto yy933; } } else { if (yych <= 'f') { if (yych == 'd') - goto yy933; + goto yy930; if (yych >= 'f') - goto yy934; + goto yy931; } else { if (yych <= 'j') { if (yych >= 'j') - goto yy935; + goto yy932; } else { if (yych == 'v') - goto yy936; + goto yy933; } } } ++p; - yy932 : { return 0; } - yy933: + yy929 : { return 0; } + yy930: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy937; + goto yy934; if (yych == 'a') - goto yy937; - goto yy932; - yy934: + goto yy934; + goto yy929; + yy931: yyaccept = 0; yych = *(marker = ++p); if (yych == 'I') - goto yy939; + goto yy936; if (yych == 'i') - goto yy939; - goto yy932; - yy935: + goto yy936; + goto yy929; + yy932: yyaccept = 0; yych = *(marker = ++p); if (yych == 'A') - goto yy940; + goto yy937; if (yych == 'a') - goto yy940; - goto yy932; - yy936: + goto yy937; + goto yy929; + yy933: yyaccept = 0; yych = *(marker = ++p); if (yych == 'B') - goto yy941; + goto yy938; if (yych == 'b') - goto yy941; - goto yy932; - yy937: + goto yy938; + goto yy929; + yy934: yych = *++p; if (yych == 'T') - goto yy942; + goto yy939; if (yych == 't') - goto yy942; - yy938: + goto yy939; + yy935: p = marker; if (yyaccept == 0) { - goto yy932; + goto yy929; } else { - goto yy950; + goto yy947; } - yy939: + yy936: yych = *++p; if (yych == 'L') - goto yy943; + goto yy940; if (yych == 'l') - goto yy943; - goto yy938; - yy940: + goto yy940; + goto yy935; + yy937: yych = *++p; if (yych == 'V') - goto yy944; + goto yy941; if (yych == 'v') - goto yy944; - goto yy938; - yy941: + goto yy941; + goto yy935; + yy938: yych = *++p; if (yych == 'S') - goto yy945; + goto yy942; if (yych == 's') - goto yy945; - goto yy938; - yy942: + goto yy942; + goto yy935; + yy939: yych = *++p; if (yych == 'A') - goto yy946; + goto yy943; if (yych == 'a') - goto yy946; - goto yy938; - yy943: + goto yy943; + goto yy935; + yy940: yych = *++p; if (yych == 'E') - goto yy947; + goto yy944; if (yych == 'e') - goto yy947; - goto yy938; - yy944: + goto yy944; + goto yy935; + yy941: yych = *++p; if (yych == 'A') - goto yy941; + goto yy938; if (yych == 'a') - goto yy941; - goto yy938; - yy945: + goto yy938; + goto yy935; + yy942: yych = *++p; if (yych == 'C') - goto yy948; + goto yy945; if (yych == 'c') - goto yy948; - goto yy938; - yy946: + goto yy945; + goto yy935; + yy943: yych = *++p; if (yych == ':') - goto yy949; - goto yy938; - yy947: + goto yy946; + goto yy935; + yy944: yych = *++p; if (yych == ':') - goto yy951; - goto yy938; - yy948: + goto yy948; + goto yy935; + yy945: yych = *++p; if (yych == 'R') - goto yy952; + goto yy949; if (yych == 'r') - goto yy952; - goto yy938; - yy949: + goto yy949; + goto yy935; + yy946: yyaccept = 1; yych = *(marker = ++p); if (yych == 'I') - goto yy953; + goto yy950; if (yych == 'i') - goto yy953; - yy950 : { return (bufsize_t)(p - start); } - yy951: + goto yy950; + yy947 : { return (bufsize_t)(p - start); } + yy948: yych = *++p; - goto yy950; - yy952: + goto yy947; + yy949: yych = *++p; if (yych == 'I') - goto yy954; + goto yy951; if (yych == 'i') - goto yy954; - goto yy938; - yy953: + goto yy951; + goto yy935; + yy950: yych = *++p; if (yych == 'M') - goto yy955; + goto yy952; if (yych == 'm') - goto yy955; - goto yy938; - yy954: + goto yy952; + goto yy935; + yy951: yych = *++p; if (yych == 'P') - goto yy956; + goto yy953; if (yych == 'p') - goto yy956; - goto yy938; - yy955: + goto yy953; + goto yy935; + yy952: yych = *++p; if (yych == 'A') - goto yy957; + goto yy954; if (yych == 'a') - goto yy957; - goto yy938; - yy956: + goto yy954; + goto yy935; + yy953: yych = *++p; if (yych == 'T') - goto yy947; + goto yy944; if (yych == 't') - goto yy947; - goto yy938; - yy957: + goto yy944; + goto yy935; + yy954: yych = *++p; if (yych == 'G') - goto yy958; + goto yy955; if (yych != 'g') - goto yy938; - yy958: + goto yy935; + yy955: yych = *++p; if (yych == 'E') - goto yy959; + goto yy956; if (yych != 'e') - goto yy938; - yy959: + goto yy935; + yy956: yych = *++p; if (yych != '/') - goto yy938; + goto yy935; yych = *++p; if (yych <= 'W') { if (yych <= 'J') { if (yych == 'G') - goto yy961; + goto yy958; if (yych <= 'I') - goto yy938; - goto yy962; + goto yy935; + goto yy959; } else { if (yych == 'P') - goto yy963; + goto yy960; if (yych <= 'V') - goto yy938; - goto yy964; + goto yy935; + goto yy961; } } else { if (yych <= 'j') { if (yych == 'g') - goto yy961; + goto yy958; if (yych <= 'i') - goto yy938; - goto yy962; + goto yy935; + goto yy959; } else { if (yych <= 'p') { if (yych <= 'o') - goto yy938; - goto yy963; + goto yy935; + goto yy960; } else { if (yych == 'w') - goto yy964; - goto yy938; + goto yy961; + goto yy935; } } } - yy961: + yy958: yych = *++p; if (yych == 'I') - goto yy965; + goto yy962; if (yych == 'i') - goto yy965; - goto yy938; - yy962: + goto yy962; + goto yy935; + yy959: yych = *++p; if (yych == 'P') - goto yy966; + goto yy963; if (yych == 'p') - goto yy966; - goto yy938; - yy963: + goto yy963; + goto yy935; + yy960: yych = *++p; if (yych == 'N') - goto yy967; + goto yy964; if (yych == 'n') - goto yy967; - goto yy938; - yy964: + goto yy964; + goto yy935; + yy961: yych = *++p; if (yych == 'E') - goto yy968; + goto yy965; if (yych == 'e') - goto yy968; - goto yy938; - yy965: + goto yy965; + goto yy935; + yy962: yych = *++p; if (yych == 'F') - goto yy969; + goto yy966; if (yych == 'f') - goto yy969; - goto yy938; - yy966: + goto yy966; + goto yy935; + yy963: yych = *++p; if (yych == 'E') - goto yy967; + goto yy964; if (yych != 'e') - goto yy938; - yy967: + goto yy935; + yy964: yych = *++p; if (yych == 'G') - goto yy969; + goto yy966; if (yych == 'g') - goto yy969; - goto yy938; - yy968: + goto yy966; + goto yy935; + yy965: yych = *++p; if (yych == 'B') - goto yy971; + goto yy968; if (yych == 'b') - goto yy971; - goto yy938; - yy969: + goto yy968; + goto yy935; + yy966: ++p; { return 0; } - yy971: + yy968: ++p; if ((yych = *p) == 'P') - goto yy969; + goto yy966; if (yych == 'p') - goto yy969; - goto yy938; + goto yy966; + goto yy935; } } diff --git a/src/scanners.re b/src/scanners.re index c3c68217c..89e300786 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -299,7 +299,7 @@ bufsize_t _scan_entity(const unsigned char *p) const unsigned char *marker = NULL; const unsigned char *start = p; /*!re2c - [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] + [&] ([#] ([Xx][A-Fa-f0-9]{1,6}|[0-9]{1,7}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] { return (bufsize_t)(p - start); } * { return 0; } */