From 03816fc6f34f31fcdbbcb4f0ae049e41e40d5d5d Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Thu, 20 Jun 2019 12:27:59 +0900 Subject: [PATCH 1/3] add a `rake similarity[lexer_name]` and Similarity tester This finds lexers that are similar (in state names only, for now) to a given lexer. This will be useful in maintenance to flag a submitted lexer as a likely copy-paste job. --- spec/support/similarity.rb | 26 ++++++++++++++++++++++++++ tasks/similarity.rake | 16 ++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 spec/support/similarity.rb create mode 100644 tasks/similarity.rake diff --git a/spec/support/similarity.rb b/spec/support/similarity.rb new file mode 100644 index 0000000000..c03131c5c4 --- /dev/null +++ b/spec/support/similarity.rb @@ -0,0 +1,26 @@ +module Similarity + def self.test(lexer_class) + state_names = Set.new(lexer_class.state_definitions.keys) + + candidates = Rouge::Lexer.all.select do |x| + next false if x == lexer_class + next false unless x < Rouge::RegexLexer + next false if x < lexer_class || lexer_class < x + true + end + + max_score = 1 + matches = [] + candidates.each do |candidate| + score = (state_names & candidate.state_definitions.keys).size + if score > max_score + max_score = score + matches = [candidate] + elsif score == max_score + matches << candidate + end + end + + [max_score, matches] + end +end diff --git a/tasks/similarity.rake b/tasks/similarity.rake new file mode 100644 index 0000000000..305a229d5c --- /dev/null +++ b/tasks/similarity.rake @@ -0,0 +1,16 @@ +desc "tests the similarity with existing lexers" +task :similarity, [:language] do |t, args| + require 'rouge' + require "#{File.dirname(File.dirname(__FILE__))}/spec/support/similarity.rb" + + language = args.language + lexer_class = Rouge::Lexer.find(language) + + score, matches = Similarity.test(lexer_class) + + if score == 1 + puts "No similarity found" + else + puts "Similarity index #{score} with #{matches.map(&:tag).join(', ')}" + end +end From 88047c447d55a893b821007f785e0d85628903f3 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Thu, 20 Jun 2019 13:07:41 +0900 Subject: [PATCH 2/3] improve the logic for shared lexers, and add a test all functionality --- spec/support/similarity.rb | 11 ++++++++--- tasks/similarity.rake | 23 +++++++++++++++++------ 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/spec/support/similarity.rb b/spec/support/similarity.rb index c03131c5c4..4c37f161c0 100644 --- a/spec/support/similarity.rb +++ b/spec/support/similarity.rb @@ -1,11 +1,16 @@ module Similarity def self.test(lexer_class) - state_names = Set.new(lexer_class.state_definitions.keys) + # state_defintions is an InheritableHash, so we use `own_keys` to + # exclude states inherited from superclasses + state_names = Set.new(lexer_class.state_definitions.own_keys) candidates = Rouge::Lexer.all.select do |x| - next false if x == lexer_class + # we can only compare to RegexLexers which have state_definitions next false unless x < Rouge::RegexLexer - next false if x < lexer_class || lexer_class < x + + # don't compare a lexer to itself or any subclasses + next false if x <= lexer_class + true end diff --git a/tasks/similarity.rake b/tasks/similarity.rake index 305a229d5c..687ad9f3ee 100644 --- a/tasks/similarity.rake +++ b/tasks/similarity.rake @@ -1,16 +1,27 @@ +def test_similarity(lexer_class) + score, matches = Similarity.test(lexer_class) + + if score == 1 + puts "[none]" + else + puts "[#{score}] #{matches.map(&:tag).join(', ')}" + end +end + desc "tests the similarity with existing lexers" task :similarity, [:language] do |t, args| require 'rouge' require "#{File.dirname(File.dirname(__FILE__))}/spec/support/similarity.rb" language = args.language - lexer_class = Rouge::Lexer.find(language) - - score, matches = Similarity.test(lexer_class) - if score == 1 - puts "No similarity found" + if language + test_similarity Rouge::Lexer.find(language) else - puts "Similarity index #{score} with #{matches.map(&:tag).join(', ')}" + Rouge::Lexer.all.each do |lexer_class| + print "#{lexer_class.tag}: " + test_similarity lexer_class if lexer_class < Rouge::RegexLexer + end end + end From ccfcfe451d7525f1e8d5c3febbd2a36537b3c048 Mon Sep 17 00:00:00 2001 From: "http://jneen.net/" Date: Thu, 20 Jun 2019 15:33:30 +0900 Subject: [PATCH 3/3] use __dir__ --- tasks/similarity.rake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tasks/similarity.rake b/tasks/similarity.rake index 687ad9f3ee..c772e10460 100644 --- a/tasks/similarity.rake +++ b/tasks/similarity.rake @@ -11,7 +11,7 @@ end desc "tests the similarity with existing lexers" task :similarity, [:language] do |t, args| require 'rouge' - require "#{File.dirname(File.dirname(__FILE__))}/spec/support/similarity.rb" + require "#{File.dirname(__dir__)}/spec/support/similarity.rb" language = args.language @@ -23,5 +23,4 @@ task :similarity, [:language] do |t, args| test_similarity lexer_class if lexer_class < Rouge::RegexLexer end end - end