Skip to content

Commit

Permalink
Fix code blocks in Markdown lexer (#1053)
Browse files Browse the repository at this point in the history
This commit:

- updates the Markdown lexer to recognise code blocks that start at the
  very first line of a file; 
- updates the Markdown lexer to recognise code blocks if the lexer is
  passed input line by line with the "continue" flag; 
- updates the Markdown lexer to guess the language of code blocks when
  no language tag is provided.
  • Loading branch information
vidarh authored and pyrmont committed Jun 2, 2019
1 parent a050f9e commit dabd56e
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 4 deletions.
25 changes: 21 additions & 4 deletions lib/rouge/lexers/markdown.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,33 @@ def html
rule /^#(?=[^#]).*?$/, Generic::Heading
rule /^##*.*?$/, Generic::Subheading

rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?\n)(\2)/m do |m|
sublexer = Lexer.find_fancy(m[3].strip, m[4], @options)
rule /^([ \t]*)(```|~~~)([^\n]*\n)((.*?)(\2))?/m do |m|
name = m[3].strip
sublexer = Lexer.find_fancy(name.empty? ? "guess" : name, m[5], @options)
sublexer ||= PlainText.new(@options.merge(:token => Str::Backtick))
sublexer.reset!

token Text, m[1]
token Punctuation, m[2]
token Name::Label, m[3]
delegate sublexer, m[4]
token Punctuation, m[5]
if m[5]
delegate sublexer, m[5]
end

if m[6]
token Punctuation, m[6]
else
push do
rule /^([ \t]*)(#{m[2]})/ do |mb|
pop!
token Text, mb[1]
token Punctuation, mb[2]
end
rule /^.*\n/ do |mb|
delegate sublexer, mb[1]
end
end
end
end

rule /\n\n(( |\t).*?\n|\n)+/, Str::Backtick
Expand Down
27 changes: 27 additions & 0 deletions spec/lexers/markdown_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,31 @@
assert_guess :mimetype => 'text/x-markdown'
end
end

describe 'lexing' do
include Support::Lexing

it 'recognizes code blocks' do
assert_has_token("Name.Label","\n```ruby\nfoo\n```\n")
end

it 'recognizes code blocks starting at the first character of the input string' do
assert_has_token("Name.Label","```ruby\nfoo\n```\n")
end

it 'recognizes code block when lexer is continued' do
subject.lex("```ruby\n").to_a
actual = subject.lex("@foo\n```\n",continue: true).map { |token, value| [ token.qualname, value ] }
assert { ["Name.Variable.Instance", "@foo"] == actual.first }
end

it 'guesses sub-lexer based on code-block content' do
assert_has_token("Comment.Single","```\n#!/usr/bin/env ruby\n```\n")
end

it 'recognizes backticks instead of code block if inside string' do
assert_has_token("Literal.String.Backtick","\nx```ruby\nfoo\n```\n")
deny_has_token("Name.Label","\nx```ruby\nfoo\n```\n")
end
end
end

0 comments on commit dabd56e

Please sign in to comment.