diff --git a/spec/support/similarity.rb b/spec/support/similarity.rb new file mode 100644 index 0000000000..4c37f161c0 --- /dev/null +++ b/spec/support/similarity.rb @@ -0,0 +1,31 @@ +module Similarity + def self.test(lexer_class) + # state_defintions is an InheritableHash, so we use `own_keys` to + # exclude states inherited from superclasses + state_names = Set.new(lexer_class.state_definitions.own_keys) + + candidates = Rouge::Lexer.all.select do |x| + # we can only compare to RegexLexers which have state_definitions + next false unless x < Rouge::RegexLexer + + # don't compare a lexer to itself or any subclasses + next false if x <= lexer_class + + true + end + + max_score = 1 + matches = [] + candidates.each do |candidate| + score = (state_names & candidate.state_definitions.keys).size + if score > max_score + max_score = score + matches = [candidate] + elsif score == max_score + matches << candidate + end + end + + [max_score, matches] + end +end diff --git a/tasks/similarity.rake b/tasks/similarity.rake new file mode 100644 index 0000000000..c772e10460 --- /dev/null +++ b/tasks/similarity.rake @@ -0,0 +1,26 @@ +def test_similarity(lexer_class) + score, matches = Similarity.test(lexer_class) + + if score == 1 + puts "[none]" + else + puts "[#{score}] #{matches.map(&:tag).join(', ')}" + end +end + +desc "tests the similarity with existing lexers" +task :similarity, [:language] do |t, args| + require 'rouge' + require "#{File.dirname(__dir__)}/spec/support/similarity.rb" + + language = args.language + + if language + test_similarity Rouge::Lexer.find(language) + else + Rouge::Lexer.all.each do |lexer_class| + print "#{lexer_class.tag}: " + test_similarity lexer_class if lexer_class < Rouge::RegexLexer + end + end +end