From b4ae5079e12587242b4ccb1240542039818ba0d9 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 9 Feb 2024 13:07:21 +0100 Subject: [PATCH 1/3] Changes to MatchMerging as its tests by nils. --- .../java/de/jplag/merging/MatchMerging.java | 71 +++++++++---------- .../java/de/jplag/merging/MergingTest.java | 67 ++++++++++++----- .../de/jplag/samples/merging/sanityA.java | 18 +++++ .../de/jplag/samples/merging/sanityB.java | 21 ++++++ 4 files changed, 123 insertions(+), 54 deletions(-) create mode 100644 core/src/test/resources/de/jplag/samples/merging/sanityA.java create mode 100644 core/src/test/resources/de/jplag/samples/merging/sanityB.java diff --git a/core/src/main/java/de/jplag/merging/MatchMerging.java b/core/src/main/java/de/jplag/merging/MatchMerging.java index cdcb426dd..e9206dd7d 100644 --- a/core/src/main/java/de/jplag/merging/MatchMerging.java +++ b/core/src/main/java/de/jplag/merging/MatchMerging.java @@ -1,11 +1,5 @@ package de.jplag.merging; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - import de.jplag.JPlagComparison; import de.jplag.JPlagResult; import de.jplag.Match; @@ -14,6 +8,10 @@ import de.jplag.Token; import de.jplag.options.JPlagOptions; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + /** * This class implements a match merging algorithm which serves as a defense mechanism against obfuscation attacks. * Based on configurable parameters MinimumNeighborLength and MaximumGapSize, it alters prior results from pairwise @@ -24,10 +22,11 @@ * {@link JPlagOptions} as {@link MergingOptions} and default to (2,6). */ public class MatchMerging { - private JPlagOptions options; + private final JPlagOptions options; /** * Instantiates the match merging algorithm for a comparison result and a set of specific options. + * * @param options encapsulates the adjustable options */ public MatchMerging(JPlagOptions options) { @@ -37,6 +36,7 @@ public MatchMerging(JPlagOptions options) { /** * Runs the internal match merging pipeline. It computes neighboring matches, merges them based on * {@link MergingOptions} and removes remaining too short matches afterwards. + * * @param result is the initially computed result object * @return JPlagResult containing the merged matches */ @@ -63,30 +63,21 @@ public JPlagResult mergeMatchesOf(JPlagResult result) { /** * Computes neighbors by sorting based on order of matches in the left and right submissions and then checking which are * next to each other in both. + * * @param globalMatches * @return neighbors containing a list of pairs of neighboring matches */ private List computeNeighbors(List globalMatches) { List neighbors = new ArrayList<>(); + List sortedByLeft = new ArrayList<>(globalMatches); + List sortedByRight = new ArrayList<>(globalMatches); - Map> matchesByLeft = new HashMap<>(); - Map> matchesByRight = new HashMap<>(); - - // Group matches by their left and right positions - for (Match match : globalMatches) { - matchesByLeft.computeIfAbsent(match.startOfFirst(), key -> new ArrayList<>()).add(match); - matchesByRight.computeIfAbsent(match.startOfSecond(), key -> new ArrayList<>()).add(match); - } + sortedByLeft.sort(Comparator.comparingInt(Match::startOfFirst)); + sortedByRight.sort(Comparator.comparingInt(Match::startOfSecond)); - // Iterate through the matches and find neighbors - for (List matches : matchesByLeft.values()) { - for (Match match : matches) { - List rightMatches = matchesByRight.getOrDefault(match.startOfSecond(), Collections.emptyList()); - for (Match rightMatch : rightMatches) { - if (rightMatch != match) { - neighbors.add(new Neighbor(match, rightMatch)); - } - } + for (int i = 0; i < sortedByLeft.size() - 1; i++) { + if (sortedByRight.indexOf(sortedByLeft.get(i)) == (sortedByRight.indexOf(sortedByLeft.get(i + 1)) - 1)) { + neighbors.add(new Neighbor(sortedByLeft.get(i), sortedByLeft.get(i + 1))); } } @@ -97,6 +88,7 @@ private List computeNeighbors(List globalMatches) { * This function iterates through the neighboring matches and checks which fit the merging criteria. Those who do are * merged and the original matches are removed. This is done, until there are either no neighbors left, or none fit the * criteria + * * @return globalMatches containing merged matches. */ private List mergeNeighbors(List globalMatches, Submission leftSubmission, Submission rightSubmission) { @@ -130,17 +122,18 @@ private List mergeNeighbors(List globalMatches, Submission leftSub /** * This function checks if a merge would go over file boundaries. - * @param leftSubmission is the left submission - * @param rightSubmission is the right submission - * @param upperNeighbor is the upper neighboring match - * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be - * removed + * + * @param leftSubmission is the left submission + * @param rightSubmission is the right submission + * @param upperNeighbor is the upper neighboring match + * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be + * removed * @param tokensBetweenRight amount token that separate the neighboring matches in the send submission and need to be - * removed + * removed * @return true if the merge goes over file boundaries. */ private boolean mergeOverlapsFiles(Submission leftSubmission, Submission rightSubmission, Match upperNeighbor, int tokensBetweenLeft, - int tokensBetweenRight) { + int tokensBetweenRight) { if (leftSubmission.getFiles().size() == 1 && rightSubmission.getFiles().size() == 1) { return false; } @@ -158,6 +151,7 @@ private boolean mergeOverlapsFiles(Submission leftSubmission, Submission rightSu /** * This function checks whether a list of token contains FILE_END + * * @param token is the list of token * @return true if FILE_END is in token */ @@ -168,18 +162,19 @@ private boolean containsFileEndToken(List token) { /** * This function removes token from both submissions after a merge has been performed. Additionally it moves the * starting positions from matches, that occur after the merged neighboring matches, by the amount of removed token. + * * @param globalMatches - * @param leftSubmission is the left submission - * @param rightSubmission is the right submission - * @param upperNeighbor is the upper neighboring match - * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be - * removed + * @param leftSubmission is the left submission + * @param rightSubmission is the right submission + * @param upperNeighbor is the upper neighboring match + * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be + * removed * @param tokensBetweenRight amount token that separate the neighboring matches in the send submission and need to be - * removed + * removed * @return shiftedMatches with the mentioned changes. */ private List removeToken(List globalMatches, Submission leftSubmission, Submission rightSubmission, Match upperNeighbor, - int tokensBetweenLeft, int tokensBetweenRight) { + int tokensBetweenLeft, int tokensBetweenRight) { int startLeft = upperNeighbor.startOfFirst(); int startRight = upperNeighbor.startOfSecond(); int lengthUpper = upperNeighbor.length(); diff --git a/core/src/test/java/de/jplag/merging/MergingTest.java b/core/src/test/java/de/jplag/merging/MergingTest.java index 0d6780b9f..e28007812 100644 --- a/core/src/test/java/de/jplag/merging/MergingTest.java +++ b/core/src/test/java/de/jplag/merging/MergingTest.java @@ -5,6 +5,7 @@ import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.function.Function; @@ -33,15 +34,15 @@ * CC BY 4.0 license. */ class MergingTest extends TestBase { - private JPlagOptions options; + private final JPlagOptions options; private JPlagResult result; private List matches; private List comparisonsBefore; private List comparisonsAfter; - private ComparisonStrategy comparisonStrategy; - private SubmissionSet submissionSet; - private final int MINIMUM_NEIGHBOR_LENGTH = 1; - private final int MAXIMUM_GAP_SIZE = 10; + private final ComparisonStrategy comparisonStrategy; + private final SubmissionSet submissionSet; + private static final int MINIMUM_NEIGHBOR_LENGTH = 1; + private static final int MAXIMUM_GAP_SIZE = 10; MergingTest() throws ExitException { options = getDefaultOptions("merging").withMergingOptions(new MergingOptions(true, MINIMUM_NEIGHBOR_LENGTH, MAXIMUM_GAP_SIZE)); @@ -56,12 +57,15 @@ class MergingTest extends TestBase { @BeforeEach void prepareTestState() { result = comparisonStrategy.compareSubmissions(submissionSet); - comparisonsBefore = result.getAllComparisons(); + comparisonsBefore = new ArrayList<>(result.getAllComparisons()); if (options.mergingOptions().enabled()) { result = new MatchMerging(options).mergeMatchesOf(result); } - comparisonsAfter = result.getAllComparisons(); + comparisonsAfter = new ArrayList<>(result.getAllComparisons()); + + comparisonsBefore.sort(Comparator.comparing(Object::toString)); + comparisonsAfter.sort(Comparator.comparing(Object::toString)); } @Test @@ -83,10 +87,10 @@ void testGSTIgnoredMatches() { } private void checkMatchLength(Function> matchFunction, int threshold, List comparisons) { - for (int i = 0; i < comparisons.size(); i++) { - matches = matchFunction.apply(comparisons.get(i)); - for (int j = 0; j < matches.size(); j++) { - assertTrue(matches.get(j).length() >= threshold); + for (JPlagComparison comparison : comparisons) { + matches = matchFunction.apply(comparison); + for (Match match : matches) { + assertTrue(match.length() >= threshold); } } } @@ -169,11 +173,11 @@ void testCorrectMerges() { matches = comparisonsAfter.get(i).matches(); List sortedByFirst = new ArrayList<>(comparisonsBefore.get(i).matches()); sortedByFirst.addAll(comparisonsBefore.get(i).ignoredMatches()); - Collections.sort(sortedByFirst, (m1, m2) -> m1.startOfFirst() - m2.startOfFirst()); - for (int j = 0; j < matches.size(); j++) { + sortedByFirst.sort(Comparator.comparingInt(Match::startOfFirst)); + for (Match match : matches) { int begin = -1; for (int k = 0; k < sortedByFirst.size(); k++) { - if (sortedByFirst.get(k).startOfFirst() == matches.get(j).startOfFirst()) { + if (sortedByFirst.get(k).startOfFirst() == match.startOfFirst()) { begin = k; break; } @@ -182,10 +186,10 @@ void testCorrectMerges() { correctMerges = false; } else { int foundToken = 0; - while (foundToken < matches.get(j).length()) { + while (foundToken < match.length()) { foundToken += sortedByFirst.get(begin).length(); begin++; - if (foundToken > matches.get(j).length()) { + if (foundToken > match.length()) { correctMerges = false; } } @@ -194,4 +198,35 @@ void testCorrectMerges() { } assertTrue(correctMerges); } + + @Test + @DisplayName("Sanity check for match merging") + void testSanity() { + + List matchesBefore = new ArrayList<>(); + List matchesAfter = new ArrayList<>(); + + for(JPlagComparison comparison : comparisonsBefore) { + if(comparison.toString().equals("sanityA.java <-> sanityB.java")) { + matchesBefore=comparison.ignoredMatches(); + } + } + for(JPlagComparison comparison : comparisonsAfter) { + if(comparison.toString().equals("sanityA.java <-> sanityB.java")) { + matchesAfter=comparison.matches(); + } + } + + List expectedBefore = new ArrayList<>(); + expectedBefore.add(new Match(5,3,6)); + expectedBefore.add(new Match(11,12,6)); + expectedBefore.add(new Match(0,0,3)); + expectedBefore.add(new Match(3,18,2)); + expectedBefore.add(new Match(17,20,2)); + + List expectedAfter = new ArrayList<>(); + expectedAfter.add(new Match(5,3,12)); + + assertTrue(matchesBefore.equals(expectedBefore) && matchesAfter.equals(expectedAfter)); + } } \ No newline at end of file diff --git a/core/src/test/resources/de/jplag/samples/merging/sanityA.java b/core/src/test/resources/de/jplag/samples/merging/sanityA.java new file mode 100644 index 000000000..d4a042dc6 --- /dev/null +++ b/core/src/test/resources/de/jplag/samples/merging/sanityA.java @@ -0,0 +1,18 @@ +public class Minimal { + public static void main (String [] Argv) { + System.out.println("Test"); + System.out.println("Test"); + int a = 1; + a = 1; + int b = 1; + a = 1; + int c = 1; + a = 1; + int d = 1; + a = 1; + int e = 1; + a = 1; + int f = 1; + a = 1; + } +} \ No newline at end of file diff --git a/core/src/test/resources/de/jplag/samples/merging/sanityB.java b/core/src/test/resources/de/jplag/samples/merging/sanityB.java new file mode 100644 index 000000000..75c55752a --- /dev/null +++ b/core/src/test/resources/de/jplag/samples/merging/sanityB.java @@ -0,0 +1,21 @@ +public class Minimal { + public static void main (String [] Argv) { + int a = 1; + a = 1; + int b = 1; + a = 1; + int c = 1; + a = 1; + if(a==1){ + a = 2; + } + int d = 1; + a = 1; + int e = 1; + a = 1; + int f = 1; + a = 1; + System.out.println("Test"); + System.out.println("Test"); + } +} \ No newline at end of file From bd10512abeae809014b6a02220d8e388e459aa9c Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Fri, 9 Feb 2024 13:09:04 +0100 Subject: [PATCH 2/3] spotless. --- .../java/de/jplag/merging/MatchMerging.java | 43 ++++++++----------- .../java/de/jplag/merging/MergingTest.java | 35 ++++++++------- 2 files changed, 35 insertions(+), 43 deletions(-) diff --git a/core/src/main/java/de/jplag/merging/MatchMerging.java b/core/src/main/java/de/jplag/merging/MatchMerging.java index e9206dd7d..3067e32a6 100644 --- a/core/src/main/java/de/jplag/merging/MatchMerging.java +++ b/core/src/main/java/de/jplag/merging/MatchMerging.java @@ -1,5 +1,9 @@ package de.jplag.merging; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + import de.jplag.JPlagComparison; import de.jplag.JPlagResult; import de.jplag.Match; @@ -8,10 +12,6 @@ import de.jplag.Token; import de.jplag.options.JPlagOptions; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.List; - /** * This class implements a match merging algorithm which serves as a defense mechanism against obfuscation attacks. * Based on configurable parameters MinimumNeighborLength and MaximumGapSize, it alters prior results from pairwise @@ -26,7 +26,6 @@ public class MatchMerging { /** * Instantiates the match merging algorithm for a comparison result and a set of specific options. - * * @param options encapsulates the adjustable options */ public MatchMerging(JPlagOptions options) { @@ -36,7 +35,6 @@ public MatchMerging(JPlagOptions options) { /** * Runs the internal match merging pipeline. It computes neighboring matches, merges them based on * {@link MergingOptions} and removes remaining too short matches afterwards. - * * @param result is the initially computed result object * @return JPlagResult containing the merged matches */ @@ -63,7 +61,6 @@ public JPlagResult mergeMatchesOf(JPlagResult result) { /** * Computes neighbors by sorting based on order of matches in the left and right submissions and then checking which are * next to each other in both. - * * @param globalMatches * @return neighbors containing a list of pairs of neighboring matches */ @@ -88,7 +85,6 @@ private List computeNeighbors(List globalMatches) { * This function iterates through the neighboring matches and checks which fit the merging criteria. Those who do are * merged and the original matches are removed. This is done, until there are either no neighbors left, or none fit the * criteria - * * @return globalMatches containing merged matches. */ private List mergeNeighbors(List globalMatches, Submission leftSubmission, Submission rightSubmission) { @@ -122,18 +118,17 @@ private List mergeNeighbors(List globalMatches, Submission leftSub /** * This function checks if a merge would go over file boundaries. - * - * @param leftSubmission is the left submission - * @param rightSubmission is the right submission - * @param upperNeighbor is the upper neighboring match - * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be - * removed + * @param leftSubmission is the left submission + * @param rightSubmission is the right submission + * @param upperNeighbor is the upper neighboring match + * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be + * removed * @param tokensBetweenRight amount token that separate the neighboring matches in the send submission and need to be - * removed + * removed * @return true if the merge goes over file boundaries. */ private boolean mergeOverlapsFiles(Submission leftSubmission, Submission rightSubmission, Match upperNeighbor, int tokensBetweenLeft, - int tokensBetweenRight) { + int tokensBetweenRight) { if (leftSubmission.getFiles().size() == 1 && rightSubmission.getFiles().size() == 1) { return false; } @@ -151,7 +146,6 @@ private boolean mergeOverlapsFiles(Submission leftSubmission, Submission rightSu /** * This function checks whether a list of token contains FILE_END - * * @param token is the list of token * @return true if FILE_END is in token */ @@ -162,19 +156,18 @@ private boolean containsFileEndToken(List token) { /** * This function removes token from both submissions after a merge has been performed. Additionally it moves the * starting positions from matches, that occur after the merged neighboring matches, by the amount of removed token. - * * @param globalMatches - * @param leftSubmission is the left submission - * @param rightSubmission is the right submission - * @param upperNeighbor is the upper neighboring match - * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be - * removed + * @param leftSubmission is the left submission + * @param rightSubmission is the right submission + * @param upperNeighbor is the upper neighboring match + * @param tokensBetweenLeft amount of token that separate the neighboring matches in the left submission and need to be + * removed * @param tokensBetweenRight amount token that separate the neighboring matches in the send submission and need to be - * removed + * removed * @return shiftedMatches with the mentioned changes. */ private List removeToken(List globalMatches, Submission leftSubmission, Submission rightSubmission, Match upperNeighbor, - int tokensBetweenLeft, int tokensBetweenRight) { + int tokensBetweenLeft, int tokensBetweenRight) { int startLeft = upperNeighbor.startOfFirst(); int startRight = upperNeighbor.startOfSecond(); int lengthUpper = upperNeighbor.length(); diff --git a/core/src/test/java/de/jplag/merging/MergingTest.java b/core/src/test/java/de/jplag/merging/MergingTest.java index e28007812..580592e70 100644 --- a/core/src/test/java/de/jplag/merging/MergingTest.java +++ b/core/src/test/java/de/jplag/merging/MergingTest.java @@ -4,7 +4,6 @@ import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; -import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.function.Function; @@ -63,7 +62,7 @@ void prepareTestState() { result = new MatchMerging(options).mergeMatchesOf(result); } comparisonsAfter = new ArrayList<>(result.getAllComparisons()); - + comparisonsBefore.sort(Comparator.comparing(Object::toString)); comparisonsAfter.sort(Comparator.comparing(Object::toString)); } @@ -198,35 +197,35 @@ void testCorrectMerges() { } assertTrue(correctMerges); } - + @Test @DisplayName("Sanity check for match merging") void testSanity() { - + List matchesBefore = new ArrayList<>(); List matchesAfter = new ArrayList<>(); - for(JPlagComparison comparison : comparisonsBefore) { - if(comparison.toString().equals("sanityA.java <-> sanityB.java")) { - matchesBefore=comparison.ignoredMatches(); + for (JPlagComparison comparison : comparisonsBefore) { + if (comparison.toString().equals("sanityA.java <-> sanityB.java")) { + matchesBefore = comparison.ignoredMatches(); } } - for(JPlagComparison comparison : comparisonsAfter) { - if(comparison.toString().equals("sanityA.java <-> sanityB.java")) { - matchesAfter=comparison.matches(); + for (JPlagComparison comparison : comparisonsAfter) { + if (comparison.toString().equals("sanityA.java <-> sanityB.java")) { + matchesAfter = comparison.matches(); } } - + List expectedBefore = new ArrayList<>(); - expectedBefore.add(new Match(5,3,6)); - expectedBefore.add(new Match(11,12,6)); - expectedBefore.add(new Match(0,0,3)); - expectedBefore.add(new Match(3,18,2)); - expectedBefore.add(new Match(17,20,2)); + expectedBefore.add(new Match(5, 3, 6)); + expectedBefore.add(new Match(11, 12, 6)); + expectedBefore.add(new Match(0, 0, 3)); + expectedBefore.add(new Match(3, 18, 2)); + expectedBefore.add(new Match(17, 20, 2)); List expectedAfter = new ArrayList<>(); - expectedAfter.add(new Match(5,3,12)); - + expectedAfter.add(new Match(5, 3, 12)); + assertTrue(matchesBefore.equals(expectedBefore) && matchesAfter.equals(expectedAfter)); } } \ No newline at end of file From 73859965ee91da409dc9f6ee7d855ffd3cc117c7 Mon Sep 17 00:00:00 2001 From: Alexander Milster Date: Tue, 13 Feb 2024 14:24:58 +0100 Subject: [PATCH 3/3] Simplified code --- core/src/test/java/de/jplag/merging/MergingTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/test/java/de/jplag/merging/MergingTest.java b/core/src/test/java/de/jplag/merging/MergingTest.java index 580592e70..e2a606e16 100644 --- a/core/src/test/java/de/jplag/merging/MergingTest.java +++ b/core/src/test/java/de/jplag/merging/MergingTest.java @@ -34,7 +34,6 @@ */ class MergingTest extends TestBase { private final JPlagOptions options; - private JPlagResult result; private List matches; private List comparisonsBefore; private List comparisonsAfter; @@ -55,7 +54,7 @@ class MergingTest extends TestBase { @BeforeEach void prepareTestState() { - result = comparisonStrategy.compareSubmissions(submissionSet); + JPlagResult result = comparisonStrategy.compareSubmissions(submissionSet); comparisonsBefore = new ArrayList<>(result.getAllComparisons()); if (options.mergingOptions().enabled()) { @@ -226,6 +225,7 @@ void testSanity() { List expectedAfter = new ArrayList<>(); expectedAfter.add(new Match(5, 3, 12)); - assertTrue(matchesBefore.equals(expectedBefore) && matchesAfter.equals(expectedAfter)); + assertEquals(expectedBefore, matchesBefore); + assertEquals(expectedAfter, matchesAfter); } } \ No newline at end of file