From 4ee34e6c96ea2cf8335e3b425afa44c535229347 Mon Sep 17 00:00:00 2001 From: Yuki Takizawa Date: Thu, 11 Oct 2018 18:10:41 +0900 Subject: [PATCH] Add named-capturing group (#70) * Add named-capturing group * Update coveralls-maven-plugin version * Add usage example for Builder#capture(String) and getText(String, String) * Fix local variable name to camelCase * Use assertThat instead of assertEquals --- .travis.yml | 2 +- .../lanwen/verbalregex/VerbalExpression.java | 53 ++++- .../BasicFunctionalityUnitTest.java | 185 +++++++++++++++--- .../lanwen/verbalregex/NegativeCasesTest.java | 23 +++ .../lanwen/verbalregex/RealWorldUnitTest.java | 20 +- 5 files changed, 251 insertions(+), 32 deletions(-) diff --git a/.travis.yml b/.travis.yml index cce0519..22fa1e3 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ before_install: - if [ ! -z "$GPG_OWNERTRUST" ]; then echo $GPG_OWNERTRUST | base64 --decode | $GPG_EXECUTABLE --import-ownertrust; fi after_success: - - mvn clean cobertura:cobertura -Dcobertura.report.format=xml org.eluder.coveralls:coveralls-maven-plugin:3.0.1:report + - mvn clean cobertura:cobertura -Dcobertura.report.format=xml org.eluder.coveralls:coveralls-maven-plugin:4.3.0:report notifications: email: false diff --git a/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java b/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java index 1955936..3863055 100644 --- a/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java +++ b/src/main/java/ru/lanwen/verbalregex/VerbalExpression.java @@ -578,8 +578,30 @@ public Builder oneOf(final String... pValues) { * @return this builder */ public Builder capture() { + return this.capture(null); + } + + /** + * Adds named-capture - open brace to current position and closed to suffixes + *

+ *

Example:{@code
+         * String text = "test@example.com";
+         * VerbalExpression regex = regex()
+         *         .find("@")
+         *         .capture("domain").anything().build();
+         * regex.getText(text, "domain"); // => "example.com"
+         * }
+ * + * @return this builder + * @since 1.6 + */ + public Builder capture(final String name) { this.suffixes.append(")"); - return this.add("("); + + if (name == null || name.trim().isEmpty()) { + return this.add("("); + } + return this.add("(?<" + name + ">"); } /** @@ -592,6 +614,16 @@ public Builder capt() { return this.capture(); } + /** + * Shortcut for {@link #capture(String)} + * + * @return this builder + * @since 1.6 + */ + public Builder capt(final String name) { + return this.capture(name); + } + /** * Same as {@link #capture()}, but don't save result * May be used to set count of duplicated captures, without creating a new saved capture @@ -716,6 +748,25 @@ public String getText(final String toTest, final int group) { return result.toString(); } + /** + * Extract exact named-group from string + *

+ * Example is see to {@link Builder#capture(String)} + * + * @param toTest - string to extract from + * @param group - group to extract + * @return extracted group + * @since 1.6 + */ + public String getText(final String toTest, final String group) { + Matcher m = pattern.matcher(toTest); + StringBuilder result = new StringBuilder(); + while (m.find()) { + result.append(m.group(group)); + } + return result.toString(); + } + /** * Extract exact group from string and add it to list * diff --git a/src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java b/src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java index 076f67c..16cf01b 100644 --- a/src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java +++ b/src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java @@ -227,13 +227,13 @@ public void testWord() { .startOfLine() .word() .build(); - + assertThat("word", testRegex, matchesTo("abc123")); assertThat("non-word", testRegex, not(matchesTo("@#"))); } - + @Test - public void testMultipleNoRange() { + public void testMultipleNoRange() { VerbalExpression testRegexStringOnly = new VerbalExpression.Builder() .startOfLine() .multiple("abc") @@ -247,54 +247,54 @@ public void testMultipleNoRange() { .multiple("abc", 2, 4, 8) .build(); VerbalExpression[] testRegexesSameBehavior = { - testRegexStringOnly, + testRegexStringOnly, testRegexStringAndNull, testRegexMoreThan2Ints }; for (VerbalExpression testRegex : testRegexesSameBehavior) { - assertThat("abc once", testRegex, + assertThat("abc once", testRegex, matchesTo("abc")); - assertThat("abc more than once", testRegex, + assertThat("abc more than once", testRegex, matchesTo("abcabcabc")); - assertThat("no abc", testRegex, + assertThat("no abc", testRegex, not(matchesTo("xyz"))); } } - + @Test - public void testMultipleFrom() { + public void testMultipleFrom() { VerbalExpression testRegexFrom = new VerbalExpression.Builder() .startOfLine() .multiple("abc", 2) .build(); - assertThat("no abc", testRegexFrom, + assertThat("no abc", testRegexFrom, not(matchesTo("xyz"))); - assertThat("abc less than 2 times", testRegexFrom, + assertThat("abc less than 2 times", testRegexFrom, not(matchesTo("abc"))); - assertThat("abc exactly 2 times", testRegexFrom, + assertThat("abc exactly 2 times", testRegexFrom, matchesTo("abcabc")); - assertThat("abc more than 2 times", testRegexFrom, + assertThat("abc more than 2 times", testRegexFrom, matchesTo("abcabcabc")); } - + @Test - public void testMultipleFromTo() { + public void testMultipleFromTo() { VerbalExpression testRegexFromTo = new VerbalExpression.Builder() .startOfLine() .multiple("abc", 2, 4) .build(); assertThat("no abc", testRegexFromTo, not(matchesTo("xyz"))); - assertThat("abc less than 2 times", testRegexFromTo, + assertThat("abc less than 2 times", testRegexFromTo, not(matchesTo("abc"))); assertThat("abc exactly 2 times", testRegexFromTo, matchesTo("abcabc")); - assertThat("abc between 2 and 4 times", testRegexFromTo, + assertThat("abc between 2 and 4 times", testRegexFromTo, matchesTo("abcabcabc")); - assertThat("abc exactly 4 times", testRegexFromTo, + assertThat("abc exactly 4 times", testRegexFromTo, matchesTo("abcabcabcabc")); - assertThat("abc more than 4 times", testRegexFromTo, + assertThat("abc more than 4 times", testRegexFromTo, not(matchesExactly("abcabcabcabcabc"))); } - + @Test public void testWithAnyCase() { VerbalExpression testRegex = new VerbalExpression.Builder() @@ -384,12 +384,33 @@ public void testStartCapture() { assertThat("can't get first captured group", regex.getText(text, 1), equalTo("bcd")); } + @Test + public void testStartNamedCapture() { + String text = "test@example.com"; + String captureName = "domain"; + VerbalExpression regex = regex() + .find("@") + .capture(captureName).anything().build(); + + assertThat("can't get captured group named " + captureName, + regex.getText(text, captureName), + equalTo("example.com")); + } + @Test public void captIsSameAsCapture() { - assertThat("Capt produce defferent than capture regex", regex().capt().build().toString(), + assertThat("Capt produce different than capture regex", regex().capt().build().toString(), equalTo(regex().capture().build().toString())); } + @Test + public void namedCaptIsSameAsNamedCapture() { + String name = "test"; + assertThat("Named-capt produce different than named-capture regex", + regex().capt(name).build().toString(), + equalTo(regex().capture(name).build().toString())); + } + @Test public void shouldReturnEmptyStringWhenNoGroupFound() { String text = "abc"; @@ -424,6 +445,19 @@ public void testEndCapture() { assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b")); } + @Test + public void testEndNamedCapture() { + String text = "aaabcd"; + String captureName = "str"; + VerbalExpression regex = regex() + .find("a") + .capture(captureName).find("b").anything().endCapture() + .then("cd").build(); + + assertThat(regex.getText(text), equalTo("abcd")); + assertThat("can't get captured group named " + captureName, + regex.getText(text, captureName), equalTo("b")); + } @Test public void testMultiplyCapture() { @@ -436,6 +470,22 @@ public void testMultiplyCapture() { assertThat("can't get second captured group", regex.getText(text, 2), equalTo("d")); } + @Test + public void testMultiplyNamedCapture() { + String text = "aaabcd"; + String captureName1 = "str1"; + String captureName2 = "str2"; + VerbalExpression regex = regex() + .find("a").count(1) + .capture(captureName1).find("b").endCapture() + .anything().capture(captureName2).find("d").build(); + + assertThat("can't get captured group named " + captureName1, + regex.getText(text, captureName1), equalTo("b")); + assertThat("can't get captured group named " + captureName2, + regex.getText(text, captureName2), equalTo("d")); + } + @Test public void testOrWithCapture() { VerbalExpression testRegex = regex() @@ -452,6 +502,26 @@ public void testOrWithCapture() { assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcnull")); } + @Test + public void testOrWithNamedCapture() { + String captureName = "test"; + VerbalExpression testRegex = regex() + .capture(captureName) + .find("abc") + .or("def") + .build(); + assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); + assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); + assertThat("Doesn't start with abc or def", + testRegex, not(matchesExactly("xyzabcefg"))); + + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcnull")); + assertThat(testRegex.getText("xxxdefzzz", captureName), + equalTo("null")); + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcnull")); + } @Test public void testOrWithClosedCapture() { @@ -470,6 +540,28 @@ public void testOrWithClosedCapture() { assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcnull")); } + @Test + public void testOrWithClosedNamedCapture() { + String captureName = "test"; + VerbalExpression testRegex = regex() + .capture(captureName) + .find("abc") + .endCapt() + .or("def") + .build(); + assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); + assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); + assertThat("Doesn't start with abc or def", + testRegex, not(matchesExactly("xyzabcefg"))); + + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcnull")); + assertThat(testRegex.getText("xxxdefzzz", captureName), + equalTo("null")); + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcnull")); + } + @Test public void addRegexBuilderWrapsItWithUnsavedGroup() throws Exception { VerbalExpression regex = regex() @@ -563,7 +655,7 @@ public void zeroOreMoreSameAsAtLeast0() throws Exception { assertThat(regexWithOneOrMore, matchesTo(empty)); assertThat(regexWithOneOrMore, matchesExactly(empty)); } - + @Test public void testOneOf() { VerbalExpression testRegex = new VerbalExpression.Builder() @@ -575,7 +667,7 @@ public void testOneOf() { assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); assertThat("Doesn't start with abc nor def", testRegex, not(matchesTo("xyzabc"))); } - + @Test public void testOneOfWithCapture() { VerbalExpression testRegex = regex() @@ -590,6 +682,24 @@ public void testOneOfWithCapture() { assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("def")); } + @Test + public void testOneOfWithNamedCapture() { + String captureName = "test"; + VerbalExpression testRegex = regex() + .capture(captureName) + .oneOf("abc", "def") + .build(); + assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); + assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); + assertThat("Doesn't start with abc or def", + testRegex, not(matchesExactly("xyzabcefg"))); + + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcdef")); + assertThat(testRegex.getText("xxxdefzzz", captureName), + equalTo("def")); + } + @Test public void testOneOfWithClosedCapture() { VerbalExpression testRegex = regex() @@ -604,7 +714,26 @@ public void testOneOfWithClosedCapture() { assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcdef")); assertThat(testRegex.getText("xxxdefzzz", 1), equalTo("def")); } - + + @Test + public void testOneOfWithClosedNamedCapture() { + String captureName = "test"; + VerbalExpression testRegex = regex() + .capture(captureName) + .oneOf("abc", "def") + .endCapt() + .build(); + assertThat("Starts with abc or def", testRegex, matchesTo("defzzz")); + assertThat("Starts with abc or def", testRegex, matchesTo("abczzz")); + assertThat("Doesn't start with abc or def", + testRegex, not(matchesExactly("xyzabcefg"))); + + assertThat(testRegex.getText("xxxabcdefzzz", captureName), + equalTo("abcdef")); + assertThat(testRegex.getText("xxxdefzzz", captureName), + equalTo("def")); + } + @Test public void shouldAddMaybeWithOneOfFromAnotherBuilder() { VerbalExpression.Builder namePrefix = regex().oneOf("Mr.", "Ms."); @@ -615,12 +744,12 @@ public void shouldAddMaybeWithOneOfFromAnotherBuilder() { .word() .oneOrMore() .build(); - + assertThat("Is a name with prefix", name, matchesTo("Mr. Bond")); assertThat("Is a name without prefix", name, matchesTo("James")); - + } - + @Test public void testListOfTextGroups() { String text = "SampleHelloWorldString"; @@ -630,7 +759,7 @@ public void testListOfTextGroups() { .endCapt() .maybe("String") .build(); - + List groups0 = regex.getTextGroups(text, 0); assertThat(groups0.get(0), equalTo("Hello")); diff --git a/src/test/java/ru/lanwen/verbalregex/NegativeCasesTest.java b/src/test/java/ru/lanwen/verbalregex/NegativeCasesTest.java index bd7b9b5..f8808c7 100644 --- a/src/test/java/ru/lanwen/verbalregex/NegativeCasesTest.java +++ b/src/test/java/ru/lanwen/verbalregex/NegativeCasesTest.java @@ -30,6 +30,15 @@ public void shouldExceptionWhenTryGetMoreThanCapturedGroup() { regex.getText(text, 2); } + @Test(expected = IllegalArgumentException.class) + public void shouldExceptionWhenTryGetByNonExistentCaptureName() { + String text = "abc"; + VerbalExpression regex = regex().find("b") + .capture("test1").find("c").build(); + + regex.getText(text, "test2"); + } + @Test(expected = PatternSyntaxException.class) public void testRangeWithoutArgs() throws Exception { regex().startOfLine().range().build(); @@ -66,6 +75,20 @@ public void orAfterCaptureProduceEmptyGroup() throws Exception { assertThat("regex dont extract a by first group", regex.getText("abcd", 1), equalTo("")); } + @Test + public void orAfterNamedCaptureProduceEmptyGroup() { + String captureName = "test"; + VerbalExpression regex = regex().startOfLine().then("a") + .capture(captureName).or("b").build(); + + assertThat(regex.toString(), containsString("(?)|")); + + assertThat("regex don't matches string abcd", + regex.getText("abcd", 0), equalTo("a")); + assertThat("regex don't extract a by group named " + captureName, + regex.getText("abcd", captureName), equalTo("")); + } + @Test public void multiplyWithNullOnCountEqualToWithOneAndMore() throws Exception { VerbalExpression regex = regex().multiple("some", null).build(); diff --git a/src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java b/src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java index f1084fb..cfbbb49 100644 --- a/src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java +++ b/src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java @@ -123,7 +123,7 @@ public void oneOfShouldFindEpisodeTitleOfStarWarsMovies() { @Test public void captureAfterNewLineHasGroupNumberOne() throws Exception { - final String line_break = "\n"; + final String lineBreak = "\n"; final String some = "some"; final String text = " text"; final VerbalExpression expression = VerbalExpression.regex(). @@ -131,6 +131,22 @@ public void captureAfterNewLineHasGroupNumberOne() throws Exception { .capture().find(some).endCapture().then(text) .build(); - assertEquals(some, expression.getText(line_break + some + text, 1)); + assertThat(some, equalTo(expression.getText(lineBreak + some + text, 1))); + } + + @Test + public void captureAfterNewLineHasANamedGroup() { + + final String lineBreak = "\n"; + final String some = "some"; + final String text = " text"; + final String captureName = "name"; + final VerbalExpression expression = VerbalExpression.regex(). + lineBreak() + .capture(captureName).find(some).endCapture().then(text) + .build(); + + assertThat(some, + equalTo(expression.getText(lineBreak + some + text, captureName))); } }