Skip to content

Commit

Permalink
Improve string syntax support in TOML lexer (#1419)
Browse files Browse the repository at this point in the history
The TOML lexer does not currently parse single-quoted and
multline-quoted strings correctly. This commit adds support for TOML's
various string syntaxes.
  • Loading branch information
jneen authored Feb 1, 2020
1 parent 835f83f commit 242f373
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 0 deletions.
23 changes: 23 additions & 0 deletions lib/rouge/lexers/toml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,39 @@ class TOML < RegexLexer

state :content do
mixin :basic
rule %r/"""/, Str, :mdq
rule %r/"/, Str, :dq
rule %r/'''/, Str, :msq
rule %r/'/, Str, :sq
mixin :esc_str
rule %r/\,/, Punctuation
rule %r/\[/, Punctuation, :array
end

state :dq do
rule %r/"/, Str, :pop!
rule %r/\n/, Error, :pop!
mixin :esc_str
rule %r/[^\\"\n]+/, Str
end

state :mdq do
rule %r/"""/, Str, :pop!
mixin :esc_str
rule %r/[^\\"]+/, Str
rule %r/"+/, Str
end

state :sq do
rule %r/'/, Str, :pop!
rule %r/\n/, Error, :pop!
rule %r/[^'\n]+/, Str
end

state :msq do
rule %r/'''/, Str, :pop!
rule %r/[^']+/, Str
rule %r/'+/, Str
end

state :esc_str do
Expand Down
39 changes: 39 additions & 0 deletions spec/visual/samples/toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,45 @@ funky = "I'm a string. \"You can quote me\". Tab \t newline \n you get it."
right = "C:\\Users\\nodejs\\templates"
wrong = "C:\Users\nodejs\templates" # note: doesn't produce a valid path

# What you see is what you get - no escapes
winpath = 'C:\Users\nodejs\templates'
winpath2 = '\\ServerX\admin$\system32\'
quoted = 'Tom "Dubs" Preston-Werner'
regex = '<\i\c*\s*>'
wrong = 'no multiline - but rest of lex is okay

# multiline
regex2 = '''I [dw]on't need \d{2} apples'''
lines = '''
The first newline is
trimmed in raw strings.
All other whitespace
is preserved.
'''

str4 = """Here are two quotation marks: "". Simple enough."""
# str5 = """Here are three quotation marks: """.""" # INVALID
str5 = """Here are three quotation marks: ""\"."""
str6 = """Here are fifteen quotation marks: ""\"""\"""\"""\"""\"."""

# "This," she said, "is just a pointless statement."
str7 = """"This," she said, "is just a pointless statement."""
# The following strings are byte-for-byte equivalent:
str1 = "The quick brown fox jumps over the lazy dog."

str2 = """
The quick brown \


fox jumps over \
the lazy dog."""

str3 = """\
The quick brown \
fox jumps over \
the lazy dog.\
"""

# Test file for TOML
# Only this one tries to emulate a TOML file written by a user of the kind of parser writers probably hate
# This part you'll really hate
Expand Down

0 comments on commit 242f373

Please sign in to comment.