From 870fd9b66c51b18966fb18ec65705efab9cf0d50 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Tue, 25 Aug 2009 12:55:27 -0700 Subject: [PATCH] Fix "whatever".split(".") behavior. Two special cases in Pattern.split's behavior had been incorrectly combined into one. Separate the two cases, and add tests. I've run the tests against Java 1.5 and 1.6 too. Bug: 1957900 --- .../src/main/java/java/util/regex/Pattern.java | 59 ++++++++++++---------- .../regex/tests/java/util/regex/SplitTest.java | 56 ++++++++++++++++++-- 2 files changed, 86 insertions(+), 29 deletions(-) diff --git a/libcore/regex/src/main/java/java/util/regex/Pattern.java b/libcore/regex/src/main/java/java/util/regex/Pattern.java index c058db8fb..2853bbe39 100644 --- a/libcore/regex/src/main/java/java/util/regex/Pattern.java +++ b/libcore/regex/src/main/java/java/util/regex/Pattern.java @@ -356,28 +356,33 @@ public final class Pattern implements Serializable { } /** - * Splits the given input sequence around occurrences of the {@code Pattern}. - * The function first determines all occurrences of the {@code Pattern} - * inside the input sequence. It then builds an array of the - * "remaining" strings before, in-between, and after these - * occurrences. An additional parameter determines the maximal number of - * entries in the resulting array and the handling of trailing empty - * strings. + * Splits the given input sequence at occurrences of this {@code Pattern}. + * + * If this {@code Pattern} does not occur in the input, the result is an + * array containing the input (converted from a {@code CharSequence} to + * a {@code String}). + * + * Otherwise, the {@code limit} parameter controls the contents of the + * returned array as described below. * * @param inputSeq * the input sequence. * @param limit - * Determines the maximal number of entries in the resulting - * array. + * Determines the maximum number of entries in the resulting + * array, and the treatment of trailing empty strings. * * * @return the resulting array. @@ -385,6 +390,13 @@ public final class Pattern implements Serializable { * @since Android 1.0 */ public String[] split(CharSequence inputSeq, int limit) { + if (inputSeq.length() == 0) { + // Unlike Perl, which considers the result of splitting the empty + // string to be the empty array, Java returns an array containing + // the empty string. + return new String[] { "" }; + } + int maxLength = limit <= 0 ? Integer.MAX_VALUE : limit; String input = inputSeq.toString(); @@ -393,14 +405,10 @@ public final class Pattern implements Serializable { Matcher matcher = new Matcher(this, inputSeq); int savedPos = 0; - // Add text preceding each occurrence, if enough space. Only do this for - // non-empty input sequences, because otherwise we'd add the "trailing - // empty string" twice. - if (inputSeq.length() != 0) { - while(matcher.find() && list.size() + 1 < maxLength) { - list.add(input.substring(savedPos, matcher.start())); - savedPos = matcher.end(); - } + // Add text preceding each occurrence, if enough space. + while(matcher.find() && list.size() + 1 < maxLength) { + list.add(input.substring(savedPos, matcher.start())); + savedPos = matcher.end(); } // Add trailing text if enough space. @@ -412,11 +420,10 @@ public final class Pattern implements Serializable { } } - // Remove trailing spaces, if limit == 0 is requested. + // Remove trailing empty matches in the limit == 0 case. if (limit == 0) { int i = list.size() - 1; - // Don't remove 1st element, since array must not be empty. - while(i > 0 && "".equals(list.get(i))) { + while (i >= 0 && "".equals(list.get(i))) { list.remove(i); i--; } diff --git a/libcore/regex/src/test/java/org/apache/harmony/regex/tests/java/util/regex/SplitTest.java b/libcore/regex/src/test/java/org/apache/harmony/regex/tests/java/util/regex/SplitTest.java index ea615c05c..894dfff56 100644 --- a/libcore/regex/src/test/java/org/apache/harmony/regex/tests/java/util/regex/SplitTest.java +++ b/libcore/regex/src/test/java/org/apache/harmony/regex/tests/java/util/regex/SplitTest.java @@ -32,12 +32,62 @@ public class SplitTest extends TestCase { Pattern p = Pattern.compile("/"); String[] results = p.split("have/you/done/it/right"); String[] expected = new String[] { "have", "you", "done", "it", "right" }; - assertEquals(expected.length, results.length); + assertArraysEqual(expected, results); + } + + @TestTargets({ + @TestTargetNew( + level = TestLevel.PARTIAL_COMPLETE, + notes = "Verifies the basic functionality of split with empty matches.", + method = "split", + args = {java.lang.CharSequence.class} + ) + }) + public void testEmptySplits() { + // Trailing empty matches are removed. + assertArraysEqual(new String[0], "hello".split(".")); + assertArraysEqual(new String[] { "1", "2" }, "1:2:".split(":")); + // ...including when that results in an empty result. + assertArraysEqual(new String[0], ":".split(":")); + // ...but not when limit < 0. + assertArraysEqual(new String[] { "1", "2", "" }, "1:2:".split(":", -1)); + + // Leading empty matches are retained. + assertArraysEqual(new String[] { "", "", "o" }, "hello".split("..")); + + // A separator that doesn't occur in the input gets you the input. + assertArraysEqual(new String[] { "hello" }, "hello".split("not-present-in-test")); + // ...including when the input is the empty string. + // (Perl returns an empty list instead.) + assertArraysEqual(new String[] { "" }, "".split("not-present-in-test")); + assertArraysEqual(new String[] { "" }, "".split("A?")); + + // The limit argument controls the size of the result. + // If l == 0, the result is as long as needed, except trailing empty matches are dropped. + // If l < 0, the result is as long as needed, and trailing empty matches are retained. + // If l > 0, the result contains the first l matches, plus one string containing the remaining input. + // Examples without a trailing separator (and hence without a trailing empty match): + assertArraysEqual(new String[] { "a", "b", "c" }, "a,b,c".split(",", 0)); + assertArraysEqual(new String[] { "a,b,c" }, "a,b,c".split(",", 1)); + assertArraysEqual(new String[] { "a", "b,c" }, "a,b,c".split(",", 2)); + assertArraysEqual(new String[] { "a", "b", "c" }, "a,b,c".split(",", 3)); + assertArraysEqual(new String[] { "a", "b", "c" }, "a,b,c".split(",", Integer.MAX_VALUE)); + // Examples with a trailing separator (and hence possibly with a trailing empty match): + assertArraysEqual(new String[] { "a", "b", "c" }, "a,b,c,".split(",", 0)); + assertArraysEqual(new String[] { "a,b,c," }, "a,b,c,".split(",", 1)); + assertArraysEqual(new String[] { "a", "b,c," }, "a,b,c,".split(",", 2)); + assertArraysEqual(new String[] { "a", "b", "c," }, "a,b,c,".split(",", 3)); + assertArraysEqual(new String[] { "a", "b", "c", "" }, "a,b,c,".split(",", Integer.MAX_VALUE)); + assertArraysEqual(new String[] { "a", "b", "c", "" }, "a,b,c,".split(",", -1)); + } + + private void assertArraysEqual(String[] expected, String[] actual) { + assertEquals(expected.length, actual.length); for (int i = 0; i < expected.length; i++) { - assertEquals(results[i], expected[i]); + assertEquals(Integer.toString(i), expected[i], actual[i]); } } - + @TestTargetNew( level = TestLevel.PARTIAL_COMPLETE, notes = "Verifies the functionality of split(java.lang.CharSequence). Test uses not empty pattern.", -- 2.11.0