From 96eb0390d69ed2e0c3e59f77fb65fbb79615a11c Mon Sep 17 00:00:00 2001 From: Naoto Sato Date: Tue, 6 Feb 2024 17:43:12 +0000 Subject: [PATCH] 8324665: Loose matching of space separators in the lenient date/time parsing mode Reviewed-by: joehw, jlu --- .../share/classes/java/text/DateFormat.java | 6 +- .../classes/java/text/SimpleDateFormat.java | 13 +- .../time/format/DateTimeFormatterBuilder.java | 23 ++- .../DateFormat/LenientSpaceParsingTest.java | 141 ++++++++++++++++++ 4 files changed, 177 insertions(+), 6 deletions(-) create mode 100644 test/jdk/java/text/Format/DateFormat/LenientSpaceParsingTest.java diff --git a/src/java.base/share/classes/java/text/DateFormat.java b/src/java.base/share/classes/java/text/DateFormat.java index 5b04239fc8f5d..1d93a40a0cb2f 100644 --- a/src/java.base/share/classes/java/text/DateFormat.java +++ b/src/java.base/share/classes/java/text/DateFormat.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1996, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1996, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -746,6 +746,10 @@ public TimeZone getTimeZone() *

This leniency value is overwritten by a call to {@link * #setCalendar(java.util.Calendar) setCalendar()}. * + * @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input + * text will match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s + * in the pattern with lenient parsing; otherwise, it will not match. + * * @param lenient when {@code true}, parsing is lenient * @see java.util.Calendar#setLenient(boolean) */ diff --git a/src/java.base/share/classes/java/text/SimpleDateFormat.java b/src/java.base/share/classes/java/text/SimpleDateFormat.java index d2f71bd77510a..6f4aa130a4145 100644 --- a/src/java.base/share/classes/java/text/SimpleDateFormat.java +++ b/src/java.base/share/classes/java/text/SimpleDateFormat.java @@ -1487,7 +1487,8 @@ public Date parse(String text, ParsePosition pos) switch (tag) { case TAG_QUOTE_ASCII_CHAR: - if (start >= textLength || text.charAt(start) != (char)count) { + if (start >= textLength || + !charEquals(text.charAt(start), (char)count)) { pos.index = oldStart; pos.errorIndex = start; return null; @@ -1497,7 +1498,8 @@ public Date parse(String text, ParsePosition pos) case TAG_QUOTE_CHARS: while (count-- > 0) { - if (start >= textLength || text.charAt(start) != compiledPattern[i++]) { + if (start >= textLength || + !charEquals(text.charAt(start), compiledPattern[i++])) { pos.index = oldStart; pos.errorIndex = start; return null; @@ -1580,6 +1582,13 @@ public Date parse(String text, ParsePosition pos) return parsedDate; } + private boolean charEquals(char ch1, char ch2) { + return ch1 == ch2 || + isLenient() && + Character.getType(ch1) == Character.SPACE_SEPARATOR && + Character.getType(ch2) == Character.SPACE_SEPARATOR; + } + /* If the next tag/pattern is a then the parser * should consider the count of digits while parsing the contiguous digits * for the current tag/pattern diff --git a/src/java.base/share/classes/java/time/format/DateTimeFormatterBuilder.java b/src/java.base/share/classes/java/time/format/DateTimeFormatterBuilder.java index edd5e16f8a555..0385c4a167403 100644 --- a/src/java.base/share/classes/java/time/format/DateTimeFormatterBuilder.java +++ b/src/java.base/share/classes/java/time/format/DateTimeFormatterBuilder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -353,6 +353,10 @@ public DateTimeFormatterBuilder parseCaseInsensitive() { * The change will remain in force until the end of the formatter that is eventually * constructed or until {@code parseLenient} is called. * + * @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input + * text will not match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s + * in the pattern with the strict parse style. + * * @return this, for chaining, not null */ public DateTimeFormatterBuilder parseStrict() { @@ -372,6 +376,10 @@ public DateTimeFormatterBuilder parseStrict() { * The change will remain in force until the end of the formatter that is eventually * constructed or until {@code parseStrict} is called. * + * @implSpec A {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR} in the input + * text will match any other {@link Character#SPACE_SEPARATOR SPACE_SEPARATOR}s + * in the pattern with the lenient parse style. + * * @return this, for chaining, not null */ public DateTimeFormatterBuilder parseLenient() { @@ -2731,9 +2739,11 @@ public int parse(DateTimeParseContext context, CharSequence text, int position) */ static final class CharLiteralPrinterParser implements DateTimePrinterParser { private final char literal; + private final boolean isSpaceSeparator; private CharLiteralPrinterParser(char literal) { this.literal = literal; + isSpaceSeparator = Character.getType(literal) == Character.SPACE_SEPARATOR; } @Override @@ -2750,9 +2760,10 @@ public int parse(DateTimeParseContext context, CharSequence text, int position) } char ch = text.charAt(position); if (ch != literal) { - if (context.isCaseSensitive() || + if ((context.isCaseSensitive() || (Character.toUpperCase(ch) != Character.toUpperCase(literal) && - Character.toLowerCase(ch) != Character.toLowerCase(literal))) { + Character.toLowerCase(ch) != Character.toLowerCase(literal))) && + !spaceEquals(context, ch)) { return ~position; } } @@ -2766,6 +2777,12 @@ public String toString() { } return "'" + literal + "'"; } + + private boolean spaceEquals(DateTimeParseContext context, char ch) { + return !context.isStrict() && isSpaceSeparator && + Character.getType(ch) == Character.SPACE_SEPARATOR; + } + } //----------------------------------------------------------------------- diff --git a/test/jdk/java/text/Format/DateFormat/LenientSpaceParsingTest.java b/test/jdk/java/text/Format/DateFormat/LenientSpaceParsingTest.java new file mode 100644 index 0000000000000..5d59e66cf0d1c --- /dev/null +++ b/test/jdk/java/text/Format/DateFormat/LenientSpaceParsingTest.java @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 8324665 + * @summary Checks if SPACE_SEPARATOR are correctly parsed in lenient mode + * @run junit LenientSpaceParsingTest + */ +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.time.format.DateTimeFormatterBuilder; +import java.time.format.DateTimeParseException; +import java.util.stream.Stream; + +public class LenientSpaceParsingTest { + @MethodSource + private static Stream strictSpaces() { + // input, pattern + return Stream.of( + Arguments.of("00\u002000", "H\u0020m"), + Arguments.of("00\u202f00", "H\u202fm"), + Arguments.of("00\u00a000", "H\u00a0m"), + Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u0020\u202f\u0020\u00a0m") + ); + } + + @MethodSource + private static Stream lenientSpaces() { + // input, pattern + return Stream.of( + Arguments.of("00\u002000", "H\u202fm"), + Arguments.of("00\u202f00", "H\u0020m"), + Arguments.of("00\u00a000", "H\u0020m"), + Arguments.of("00\u002000", "H\u00a0m"), + Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u0020\u0020\u0020\u0020m"), + Arguments.of("00\u0020\u202f\u0020\u00a000", "H\u202f\u00a0\u202f\u00a0m") + ); + } + + @MethodSource + private static Stream nonSpaces() { + // input, pattern + return Stream.of( + Arguments.of("00a00", "H\u202fm"), + Arguments.of("00a00", "H\u00a0m"), + Arguments.of("00a00", "H\u0020m"), + Arguments.of("00aa00", "H\u0020\u0020m"), + Arguments.of("00aa00", "H\u00a0\u202fm") + ); + } + + @ParameterizedTest + @MethodSource({"strictSpaces", "lenientSpaces"}) + public void checkDateTimeFormatter_Lenient(String input, String pattern) { + new DateTimeFormatterBuilder().parseLenient().appendPattern(pattern).toFormatter().parse(input); + } + + @ParameterizedTest + @MethodSource("nonSpaces") + public void checkDateTimeFormatter_Lenient_Exception(String input, String pattern) { + var dtf = new DateTimeFormatterBuilder().parseLenient().appendPattern(pattern).toFormatter(); + assertThrows(DateTimeParseException.class, () -> { + dtf.parse(input); + }); + } + + @ParameterizedTest + @MethodSource("strictSpaces") + public void checkDateTimeFormatter_Strict(String input, String pattern) { + new DateTimeFormatterBuilder().parseStrict().appendPattern(pattern).toFormatter().parse(input); + } + + @ParameterizedTest + @MethodSource({"lenientSpaces", "nonSpaces"}) + public void checkDateTimeFormatter_Strict_Exception(String input, String pattern) { + var dtf = new DateTimeFormatterBuilder().parseStrict().appendPattern(pattern).toFormatter(); + assertThrows(DateTimeParseException.class, () -> { + dtf.parse(input); + }); + } + + @ParameterizedTest + @MethodSource({"strictSpaces", "lenientSpaces"}) + public void checkSimpleDateFormat_Lenient(String input, String pattern) throws ParseException { + new SimpleDateFormat(pattern).parse(input); + } + + @ParameterizedTest + @MethodSource("nonSpaces") + public void checkSimpleDateFormat_Lenient_Exception(String input, String pattern) { + var sdf = new SimpleDateFormat(pattern); + assertThrows(ParseException.class, () -> { + sdf.parse(input); + }); + } + + @ParameterizedTest + @MethodSource("strictSpaces") + public void checkSimpleDateFormat_Strict(String input, String pattern) throws ParseException { + var sdf = new SimpleDateFormat(pattern); + sdf.setLenient(false); + sdf.parse(input); + } + + @ParameterizedTest + @MethodSource({"lenientSpaces", "nonSpaces"}) + public void checkSimpleDateFormat_Strict_Exception(String input, String pattern) { + var sdf = new SimpleDateFormat(pattern); + sdf.setLenient(false); + assertThrows(ParseException.class, () -> { + sdf.parse(input); + }); + } +}