diff --git a/lib/packages/docutils/rst.nim b/lib/packages/docutils/rst.nim index 64910aa193b97..7833e6e9a0123 100644 --- a/lib/packages/docutils/rst.nim +++ b/lib/packages/docutils/rst.nim @@ -1246,15 +1246,43 @@ proc isUrl(p: RstParser, i: int): bool = p.tok[i+3].kind == tkWord and p.tok[i].symbol in ["http", "https", "ftp", "telnet", "file"] +proc checkParen(token: Token, parensStack: var seq[char]): bool {.inline.} = + ## Returns `true` iff `token` is a closing parenthesis for some + ## previous opening parenthesis saved in `parensStack`. + ## This is according Markdown balanced parentheses rule + ## (https://spec.commonmark.org/0.29/#link-destination) + ## to allow links like + ## https://en.wikipedia.org/wiki/APL_(programming_language), + ## we use it for RST also. + result = false + if token.kind == tkPunct: + let c = token.symbol[0] + if c in {'(', '[', '{'}: # push + parensStack.add c + elif c in {')', ']', '}'}: # try pop + # a case like ([) inside a link is allowed and [ is also `pop`ed: + for i in countdown(parensStack.len - 1, 0): + if (parensStack[i] == '(' and c == ')' or + parensStack[i] == '[' and c == ']' or + parensStack[i] == '{' and c == '}'): + parensStack.setLen i + result = true + break + proc parseUrl(p: var RstParser): PRstNode = ## https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#standalone-hyperlinks result = newRstNode(rnStandaloneHyperlink) var lastIdx = p.idx + var closedParenIdx = p.idx - 1 # for balanced parens rule + var parensStack: seq[char] while p.tok[lastIdx].kind in {tkWord, tkPunct, tkOther}: + let isClosing = checkParen(p.tok[lastIdx], parensStack) + if isClosing: + closedParenIdx = lastIdx inc lastIdx dec lastIdx # standalone URL can not end with punctuation in RST - while lastIdx >= p.idx and p.tok[lastIdx].kind == tkPunct and + while lastIdx > closedParenIdx and p.tok[lastIdx].kind == tkPunct and p.tok[lastIdx].symbol != "/": dec lastIdx var s = "" @@ -1393,11 +1421,15 @@ proc parseMarkdownLink(p: var RstParser; father: PRstNode): bool = var desc, link = "" var i = p.idx + var parensStack: seq[char] template parse(endToken, dest) = + parensStack.setLen 0 inc i # skip begin token while true: if p.tok[i].kind in {tkEof, tkIndent}: return false - if p.tok[i].symbol == endToken: break + let isClosing = checkParen(p.tok[i], parensStack) + if p.tok[i].symbol == endToken and not isClosing: + break dest.add p.tok[i].symbol inc i inc i # skip end token diff --git a/tests/stdlib/trst.nim b/tests/stdlib/trst.nim index ec34edc915a26..c69450cd95560 100644 --- a/tests/stdlib/trst.nim +++ b/tests/stdlib/trst.nim @@ -580,3 +580,44 @@ suite "RST inline markup": rnLeaf ' ' rnLeaf 'end' """) + + test "URL with balanced parentheses (Markdown rule)": + # 2 balanced parens, 1 unbalanced: + check(dedent""" + https://en.wikipedia.org/wiki/APL_((programming_language)))""".toAst == + dedent""" + rnInner + rnStandaloneHyperlink + rnLeaf 'https://en.wikipedia.org/wiki/APL_((programming_language))' + rnLeaf ')' + """) + + # the same for Markdown-style link: + check(dedent""" + [foo [bar]](https://en.wikipedia.org/wiki/APL_((programming_language))))""".toAst == + dedent""" + rnInner + rnHyperlink + rnLeaf 'foo [bar]' + rnLeaf 'https://en.wikipedia.org/wiki/APL_((programming_language))' + rnLeaf ')' + """) + + # unbalanced (here behavior is more RST-like actually): + check(dedent""" + https://en.wikipedia.org/wiki/APL_(programming_language(""".toAst == + dedent""" + rnInner + rnStandaloneHyperlink + rnLeaf 'https://en.wikipedia.org/wiki/APL_(programming_language' + rnLeaf '(' + """) + + # unbalanced [, but still acceptable: + check(dedent""" + [my {link example](http://example.com/bracket_(symbol_[))""".toAst == + dedent""" + rnHyperlink + rnLeaf 'my {link example' + rnLeaf 'http://example.com/bracket_(symbol_[)' + """)