OSDN Git Service

Exclude unicode space characters from autoLink URL patterns
authorSiyamed Sinir <siyamed@google.com>
Tue, 5 Apr 2016 23:32:36 +0000 (16:32 -0700)
committerSiyamed Sinir <siyamed@google.com>
Tue, 5 Apr 2016 23:33:58 +0000 (16:33 -0700)
Excludes the following space characters from autoLink URL patterns:
\u00A0: no-break space
\u2000: en quad
\u2001: em quad
\u2002: en space
\u2003: em space
\u2004: three-per-em space
\u2005: four-per-em space
\u2006: six-per-em space
\u2007: figure space
\u2008: punctuation space
\u2009: thin space
\u200A: hair space
\u2028: line separator
\u2029: paragraph separator
\u202F: narrow no-break space
\u3000: ideographic space

Bug: 28020781
Change-Id: Ie6df818dc4d33dfee6ee54432a2231cca51ec423

core/java/android/util/Patterns.java
core/tests/coretests/src/android/util/PatternsTest.java

index 9ed4850..df91525 100644 (file)
@@ -251,7 +251,7 @@ public class Patterns {
             + "|[1-9][0-9]|[0-9]))");
 
     /**
-     * Valid UCS characters defined in RFC 3987.
+     * Valid UCS characters defined in RFC 3987. Excludes space characters.
      */
     private static final String UCS_CHAR =
             "\u00A0-\uD7FF" +
@@ -270,7 +270,8 @@ public class Patterns {
             "\uDA80\uDC00-\uDABF\uDFFD" +
             "\uDAC0\uDC00-\uDAFF\uDFFD" +
             "\uDB00\uDC00-\uDB3F\uDFFD" +
-            "\uDB44\uDC00-\uDB7F\uDFFD";
+            "\uDB44\uDC00-\uDB7F\uDFFD" +
+            "&&[^\u00A0[\u2000-\u200A]\u2028\u2029\u202F\u3000]";
 
     /**
      * Valid characters for IRI label defined in RFC 3987.
index 348f8fd..edb3082 100644 (file)
@@ -419,6 +419,36 @@ public class PatternsTest extends TestCase {
                 Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
     }
 
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchUnicodeSpaces() throws Exception {
+        String part1 = "http://and";
+        String part2 = "roid";
+        String[] emptySpaces = new String[]{
+                "\u00A0", // no-break space
+                "\u2000", // en quad
+                "\u2001", // em quad
+                "\u2002", // en space
+                "\u2003", // em space
+                "\u2004", // three-per-em space
+                "\u2005", // four-per-em space
+                "\u2006", // six-per-em space
+                "\u2007", // figure space
+                "\u2008", // punctuation space
+                "\u2009", // thin space
+                "\u200A", // hair space
+                "\u2028", // line separator
+                "\u2029", // paragraph separator
+                "\u202F", // narrow no-break space
+                "\u3000" // ideographic space
+        };
+
+        for (String emptySpace : emptySpaces) {
+            String url = part1 + emptySpace + part2;
+            assertFalse("Should not match empty space - code:" + emptySpace.codePointAt(0),
+                    Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+        }
+    }
+
     // Tests for Patterns.IP_ADDRESS
 
     @SmallTest