OSDN Git Service

New regular expression to autolink URLs
authorSiyamed Sinir <siyamed@google.com>
Wed, 18 Nov 2015 23:02:57 +0000 (15:02 -0800)
committerSiyamed Sinir <siyamed@google.com>
Tue, 5 Jan 2016 01:59:45 +0000 (17:59 -0800)
Introduced a new regular expression to autolink URLs. Previous regular
expression used the protocol as an optional rule. The new expression
applies separate rules for strings that do not start with a known
protocol and strings that do start with a known protocol.

- In order to reuse previous regular expression definitions for
different parts of a URL, created constants for different parts of
WEB_URL expression.
- Separated the test cases for WEB_URL in order to remove the suppress
command and make them running again.
- Fixed failing tests for punycode urls, which were causing existing
tests to fail.
- Introduced new a regular expression for URL matching.
- Changed the protocol rule to be case insensitive.
- Updated IANA TLDs.
- Added tests for the new regular expression.
- Added tests for Linkify.
- Bug 9622849 is partially fixed.

Bug: 25727599
Bug: 9622849
Bug: 24500693
Bug: 23189367
Bug: 24543148

Change-Id: Ifd62455d342ca7adb22996246ff652e1d5944bec

api/current.txt
api/system-current.txt
api/test-current.txt
core/java/android/text/util/Linkify.java
core/java/android/util/Patterns.java
core/tests/coretests/src/android/util/PatternsTest.java

index 30d3020..47baf4e 100644 (file)
@@ -39175,7 +39175,7 @@ package android.util {
     method public static final java.lang.String digitsAndPlusOnly(java.util.regex.Matcher);
     field public static final java.util.regex.Pattern DOMAIN_NAME;
     field public static final java.util.regex.Pattern EMAIL_ADDRESS;
-    field public static final java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
+    field public static final deprecated java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
     field public static final java.util.regex.Pattern IP_ADDRESS;
     field public static final java.util.regex.Pattern PHONE;
     field public static final deprecated java.util.regex.Pattern TOP_LEVEL_DOMAIN;
index d7393b7..450c097 100644 (file)
@@ -41528,7 +41528,7 @@ package android.util {
     method public static final java.lang.String digitsAndPlusOnly(java.util.regex.Matcher);
     field public static final java.util.regex.Pattern DOMAIN_NAME;
     field public static final java.util.regex.Pattern EMAIL_ADDRESS;
-    field public static final java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
+    field public static final deprecated java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
     field public static final java.util.regex.Pattern IP_ADDRESS;
     field public static final java.util.regex.Pattern PHONE;
     field public static final deprecated java.util.regex.Pattern TOP_LEVEL_DOMAIN;
index 9058fe7..2d7291f 100644 (file)
@@ -39178,7 +39178,7 @@ package android.util {
     method public static final java.lang.String digitsAndPlusOnly(java.util.regex.Matcher);
     field public static final java.util.regex.Pattern DOMAIN_NAME;
     field public static final java.util.regex.Pattern EMAIL_ADDRESS;
-    field public static final java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
+    field public static final deprecated java.lang.String GOOD_IRI_CHAR = "a-zA-Z0-9\u00a0-\ud7ff\uf900-\ufdcf\ufdf0-\uffef";
     field public static final java.util.regex.Pattern IP_ADDRESS;
     field public static final java.util.regex.Pattern PHONE;
     field public static final deprecated java.util.regex.Pattern TOP_LEVEL_DOMAIN;
index c119277..fbd9924 100644 (file)
@@ -218,7 +218,7 @@ public class Linkify {
         ArrayList<LinkSpec> links = new ArrayList<LinkSpec>();
 
         if ((mask & WEB_URLS) != 0) {
-            gatherLinks(links, text, Patterns.WEB_URL,
+            gatherLinks(links, text, Patterns.AUTOLINK_WEB_URL,
                 new String[] { "http://", "https://", "rtsp://" },
                 sUrlMatchFilter, null);
         }
index 2cc91b9..9f2bcfd 100644 (file)
@@ -109,11 +109,137 @@ public class Patterns {
         + "|z[amw]))";
 
     /**
-     * Good characters for Internationalized Resource Identifiers (IRI).
-     * This comprises most common used Unicode characters allowed in IRI
-     * as detailed in RFC 3987.
-     * Specifically, those two byte Unicode characters are not included.
+     *  Regular expression to match all IANA top-level domains.
+     *
+     *  List accurate as of 2015/11/24.  List taken from:
+     *  http://data.iana.org/TLD/tlds-alpha-by-domain.txt
+     *  This pattern is auto-generated by frameworks/ex/common/tools/make-iana-tld-pattern.py
+     *
+     *  @hide
      */
+    static final String IANA_TOP_LEVEL_DOMAINS =
+        "(?:"
+        + "(?:aaa|aarp|abb|abbott|abogado|academy|accenture|accountant|accountants|aco|active"
+        + "|actor|ads|adult|aeg|aero|afl|agency|aig|airforce|airtel|allfinanz|alsace|amica|amsterdam"
+        + "|android|apartments|app|apple|aquarelle|aramco|archi|army|arpa|arte|asia|associates"
+        + "|attorney|auction|audio|auto|autos|axa|azure|a[cdefgilmoqrstuwxz])"
+        + "|(?:band|bank|bar|barcelona|barclaycard|barclays|bargains|bauhaus|bayern|bbc|bbva"
+        + "|bcn|beats|beer|bentley|berlin|best|bet|bharti|bible|bid|bike|bing|bingo|bio|biz|black"
+        + "|blackfriday|bloomberg|blue|bms|bmw|bnl|bnpparibas|boats|bom|bond|boo|boots|boutique"
+        + "|bradesco|bridgestone|broadway|broker|brother|brussels|budapest|build|builders|business"
+        + "|buzz|bzh|b[abdefghijmnorstvwyz])"
+        + "|(?:cab|cafe|cal|camera|camp|cancerresearch|canon|capetown|capital|car|caravan|cards"
+        + "|care|career|careers|cars|cartier|casa|cash|casino|cat|catering|cba|cbn|ceb|center|ceo"
+        + "|cern|cfa|cfd|chanel|channel|chat|cheap|chloe|christmas|chrome|church|cipriani|cisco"
+        + "|citic|city|cityeats|claims|cleaning|click|clinic|clothing|cloud|club|clubmed|coach"
+        + "|codes|coffee|college|cologne|com|commbank|community|company|computer|comsec|condos"
+        + "|construction|consulting|contractors|cooking|cool|coop|corsica|country|coupons|courses"
+        + "|credit|creditcard|creditunion|cricket|crown|crs|cruises|csc|cuisinella|cymru|cyou|c[acdfghiklmnoruvwxyz])"
+        + "|(?:dabur|dad|dance|date|dating|datsun|day|dclk|deals|degree|delivery|dell|delta"
+        + "|democrat|dental|dentist|desi|design|dev|diamonds|diet|digital|direct|directory|discount"
+        + "|dnp|docs|dog|doha|domains|doosan|download|drive|durban|dvag|d[ejkmoz])"
+        + "|(?:earth|eat|edu|education|email|emerck|energy|engineer|engineering|enterprises"
+        + "|epson|equipment|erni|esq|estate|eurovision|eus|events|everbank|exchange|expert|exposed"
+        + "|express|e[cegrstu])"
+        + "|(?:fage|fail|fairwinds|faith|family|fan|fans|farm|fashion|feedback|ferrero|film"
+        + "|final|finance|financial|firmdale|fish|fishing|fit|fitness|flights|florist|flowers|flsmidth"
+        + "|fly|foo|football|forex|forsale|forum|foundation|frl|frogans|fund|furniture|futbol|fyi"
+        + "|f[ijkmor])"
+        + "|(?:gal|gallery|game|garden|gbiz|gdn|gea|gent|genting|ggee|gift|gifts|gives|giving"
+        + "|glass|gle|global|globo|gmail|gmo|gmx|gold|goldpoint|golf|goo|goog|google|gop|gov|grainger"
+        + "|graphics|gratis|green|gripe|group|gucci|guge|guide|guitars|guru|g[abdefghilmnpqrstuwy])"
+        + "|(?:hamburg|hangout|haus|healthcare|help|here|hermes|hiphop|hitachi|hiv|hockey|holdings"
+        + "|holiday|homedepot|homes|honda|horse|host|hosting|hoteles|hotmail|house|how|hsbc|hyundai"
+        + "|h[kmnrtu])"
+        + "|(?:ibm|icbc|ice|icu|ifm|iinet|immo|immobilien|industries|infiniti|info|ing|ink|institute"
+        + "|insure|int|international|investments|ipiranga|irish|ist|istanbul|itau|iwc|i[delmnoqrst])"
+        + "|(?:jaguar|java|jcb|jetzt|jewelry|jlc|jll|jobs|joburg|jprs|juegos|j[emop])"
+        + "|(?:kaufen|kddi|kia|kim|kinder|kitchen|kiwi|koeln|komatsu|krd|kred|kyoto|k[eghimnprwyz])"
+        + "|(?:lacaixa|lancaster|land|landrover|lasalle|lat|latrobe|law|lawyer|lds|lease|leclerc"
+        + "|legal|lexus|lgbt|liaison|lidl|life|lifestyle|lighting|limited|limo|linde|link|live"
+        + "|lixil|loan|loans|lol|london|lotte|lotto|love|ltd|ltda|lupin|luxe|luxury|l[abcikrstuvy])"
+        + "|(?:madrid|maif|maison|man|management|mango|market|marketing|markets|marriott|mba"
+        + "|media|meet|melbourne|meme|memorial|men|menu|meo|miami|microsoft|mil|mini|mma|mobi|moda"
+        + "|moe|moi|mom|monash|money|montblanc|mormon|mortgage|moscow|motorcycles|mov|movie|movistar"
+        + "|mtn|mtpc|mtr|museum|mutuelle|m[acdeghklmnopqrstuvwxyz])"
+        + "|(?:nadex|nagoya|name|navy|nec|net|netbank|network|neustar|new|news|nexus|ngo|nhk"
+        + "|nico|ninja|nissan|nokia|nra|nrw|ntt|nyc|n[acefgilopruz])"
+        + "|(?:obi|office|okinawa|omega|one|ong|onl|online|ooo|oracle|orange|org|organic|osaka"
+        + "|otsuka|ovh|om)"
+        + "|(?:page|panerai|paris|partners|parts|party|pet|pharmacy|philips|photo|photography"
+        + "|photos|physio|piaget|pics|pictet|pictures|ping|pink|pizza|place|play|playstation|plumbing"
+        + "|plus|pohl|poker|porn|post|praxi|press|pro|prod|productions|prof|properties|property"
+        + "|protection|pub|p[aefghklmnrstwy])"
+        + "|(?:qpon|quebec|qa)"
+        + "|(?:racing|realtor|realty|recipes|red|redstone|rehab|reise|reisen|reit|ren|rent|rentals"
+        + "|repair|report|republican|rest|restaurant|review|reviews|rich|ricoh|rio|rip|rocher|rocks"
+        + "|rodeo|rsvp|ruhr|run|rwe|ryukyu|r[eosuw])"
+        + "|(?:saarland|sakura|sale|samsung|sandvik|sandvikcoromant|sanofi|sap|sapo|sarl|saxo"
+        + "|sbs|sca|scb|schmidt|scholarships|school|schule|schwarz|science|scor|scot|seat|security"
+        + "|seek|sener|services|seven|sew|sex|sexy|shiksha|shoes|show|shriram|singles|site|ski"
+        + "|sky|skype|sncf|soccer|social|software|sohu|solar|solutions|sony|soy|space|spiegel|spreadbetting"
+        + "|srl|stada|starhub|statoil|stc|stcgroup|stockholm|studio|study|style|sucks|supplies"
+        + "|supply|support|surf|surgery|suzuki|swatch|swiss|sydney|systems|s[abcdeghijklmnortuvxyz])"
+        + "|(?:tab|taipei|tatamotors|tatar|tattoo|tax|taxi|team|tech|technology|tel|telefonica"
+        + "|temasek|tennis|thd|theater|theatre|tickets|tienda|tips|tires|tirol|today|tokyo|tools"
+        + "|top|toray|toshiba|tours|town|toyota|toys|trade|trading|training|travel|trust|tui|t[cdfghjklmnortvwz])"
+        + "|(?:ubs|university|uno|uol|u[agksyz])"
+        + "|(?:vacations|vana|vegas|ventures|versicherung|vet|viajes|video|villas|vin|virgin"
+        + "|vision|vista|vistaprint|viva|vlaanderen|vodka|vote|voting|voto|voyage|v[aceginu])"
+        + "|(?:wales|walter|wang|watch|webcam|website|wed|wedding|weir|whoswho|wien|wiki|williamhill"
+        + "|win|windows|wine|wme|work|works|world|wtc|wtf|w[fs])"
+        + "|(?:\u03b5\u03bb|\u0431\u0435\u043b|\u0434\u0435\u0442\u0438|\u043a\u043e\u043c|\u043c\u043a\u0434"
+        + "|\u043c\u043e\u043d|\u043c\u043e\u0441\u043a\u0432\u0430|\u043e\u043d\u043b\u0430\u0439\u043d"
+        + "|\u043e\u0440\u0433|\u0440\u0443\u0441|\u0440\u0444|\u0441\u0430\u0439\u0442|\u0441\u0440\u0431"
+        + "|\u0443\u043a\u0440|\u049b\u0430\u0437|\u0570\u0561\u0575|\u05e7\u05d5\u05dd|\u0627\u0631\u0627\u0645\u0643\u0648"
+        + "|\u0627\u0644\u0627\u0631\u062f\u0646|\u0627\u0644\u062c\u0632\u0627\u0626\u0631|\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629"
+        + "|\u0627\u0644\u0645\u063a\u0631\u0628|\u0627\u0645\u0627\u0631\u0627\u062a|\u0627\u06cc\u0631\u0627\u0646"
+        + "|\u0628\u0627\u0632\u0627\u0631|\u0628\u06be\u0627\u0631\u062a|\u062a\u0648\u0646\u0633"
+        + "|\u0633\u0648\u062f\u0627\u0646|\u0633\u0648\u0631\u064a\u0629|\u0634\u0628\u0643\u0629"
+        + "|\u0639\u0631\u0627\u0642|\u0639\u0645\u0627\u0646|\u0641\u0644\u0633\u0637\u064a\u0646"
+        + "|\u0642\u0637\u0631|\u0643\u0648\u0645|\u0645\u0635\u0631|\u0645\u0644\u064a\u0633\u064a\u0627"
+        + "|\u0645\u0648\u0642\u0639|\u0915\u0949\u092e|\u0928\u0947\u091f|\u092d\u093e\u0930\u0924"
+        + "|\u0938\u0902\u0917\u0920\u0928|\u09ad\u09be\u09b0\u09a4|\u0a2d\u0a3e\u0a30\u0a24|\u0aad\u0abe\u0ab0\u0aa4"
+        + "|\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe|\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8|\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd"
+        + "|\u0c2d\u0c3e\u0c30\u0c24\u0c4d|\u0dbd\u0d82\u0d9a\u0dcf|\u0e04\u0e2d\u0e21|\u0e44\u0e17\u0e22"
+        + "|\u10d2\u10d4|\u307f\u3093\u306a|\u30b0\u30fc\u30b0\u30eb|\u30b3\u30e0|\u4e16\u754c"
+        + "|\u4e2d\u4fe1|\u4e2d\u56fd|\u4e2d\u570b|\u4e2d\u6587\u7f51|\u4f01\u4e1a|\u4f5b\u5c71"
+        + "|\u4fe1\u606f|\u5065\u5eb7|\u516b\u5366|\u516c\u53f8|\u516c\u76ca|\u53f0\u6e7e|\u53f0\u7063"
+        + "|\u5546\u57ce|\u5546\u5e97|\u5546\u6807|\u5728\u7ebf|\u5927\u62ff|\u5a31\u4e50|\u5de5\u884c"
+        + "|\u5e7f\u4e1c|\u6148\u5584|\u6211\u7231\u4f60|\u624b\u673a|\u653f\u52a1|\u653f\u5e9c"
+        + "|\u65b0\u52a0\u5761|\u65b0\u95fb|\u65f6\u5c1a|\u673a\u6784|\u6de1\u9a6c\u9521|\u6e38\u620f"
+        + "|\u70b9\u770b|\u79fb\u52a8|\u7ec4\u7ec7\u673a\u6784|\u7f51\u5740|\u7f51\u5e97|\u7f51\u7edc"
+        + "|\u8c37\u6b4c|\u96c6\u56e2|\u98de\u5229\u6d66|\u9910\u5385|\u9999\u6e2f|\ub2f7\ub137"
+        + "|\ub2f7\ucef4|\uc0bc\uc131|\ud55c\uad6d|xbox"
+        + "|xerox|xin|xn\\-\\-11b4c3d|xn\\-\\-1qqw23a|xn\\-\\-30rr7y|xn\\-\\-3bst00m|xn\\-\\-3ds443g"
+        + "|xn\\-\\-3e0b707e|xn\\-\\-3pxu8k|xn\\-\\-42c2d9a|xn\\-\\-45brj9c|xn\\-\\-45q11c|xn\\-\\-4gbrim"
+        + "|xn\\-\\-55qw42g|xn\\-\\-55qx5d|xn\\-\\-6frz82g|xn\\-\\-6qq986b3xl|xn\\-\\-80adxhks"
+        + "|xn\\-\\-80ao21a|xn\\-\\-80asehdb|xn\\-\\-80aswg|xn\\-\\-90a3ac|xn\\-\\-90ais|xn\\-\\-9dbq2a"
+        + "|xn\\-\\-9et52u|xn\\-\\-b4w605ferd|xn\\-\\-c1avg|xn\\-\\-c2br7g|xn\\-\\-cg4bki|xn\\-\\-clchc0ea0b2g2a9gcd"
+        + "|xn\\-\\-czr694b|xn\\-\\-czrs0t|xn\\-\\-czru2d|xn\\-\\-d1acj3b|xn\\-\\-d1alf|xn\\-\\-efvy88h"
+        + "|xn\\-\\-estv75g|xn\\-\\-fhbei|xn\\-\\-fiq228c5hs|xn\\-\\-fiq64b|xn\\-\\-fiqs8s|xn\\-\\-fiqz9s"
+        + "|xn\\-\\-fjq720a|xn\\-\\-flw351e|xn\\-\\-fpcrj9c3d|xn\\-\\-fzc2c9e2c|xn\\-\\-gecrj9c"
+        + "|xn\\-\\-h2brj9c|xn\\-\\-hxt814e|xn\\-\\-i1b6b1a6a2e|xn\\-\\-imr513n|xn\\-\\-io0a7i"
+        + "|xn\\-\\-j1aef|xn\\-\\-j1amh|xn\\-\\-j6w193g|xn\\-\\-kcrx77d1x4a|xn\\-\\-kprw13d|xn\\-\\-kpry57d"
+        + "|xn\\-\\-kput3i|xn\\-\\-l1acc|xn\\-\\-lgbbat1ad8j|xn\\-\\-mgb9awbf|xn\\-\\-mgba3a3ejt"
+        + "|xn\\-\\-mgba3a4f16a|xn\\-\\-mgbaam7a8h|xn\\-\\-mgbab2bd|xn\\-\\-mgbayh7gpa|xn\\-\\-mgbbh1a71e"
+        + "|xn\\-\\-mgbc0a9azcg|xn\\-\\-mgberp4a5d4ar|xn\\-\\-mgbpl2fh|xn\\-\\-mgbtx2b|xn\\-\\-mgbx4cd0ab"
+        + "|xn\\-\\-mk1bu44c|xn\\-\\-mxtq1m|xn\\-\\-ngbc5azd|xn\\-\\-node|xn\\-\\-nqv7f|xn\\-\\-nqv7fs00ema"
+        + "|xn\\-\\-nyqy26a|xn\\-\\-o3cw4h|xn\\-\\-ogbpf8fl|xn\\-\\-p1acf|xn\\-\\-p1ai|xn\\-\\-pgbs0dh"
+        + "|xn\\-\\-pssy2u|xn\\-\\-q9jyb4c|xn\\-\\-qcka1pmc|xn\\-\\-qxam|xn\\-\\-rhqv96g|xn\\-\\-s9brj9c"
+        + "|xn\\-\\-ses554g|xn\\-\\-t60b56a|xn\\-\\-tckwe|xn\\-\\-unup4y|xn\\-\\-vermgensberater\\-ctb"
+        + "|xn\\-\\-vermgensberatung\\-pwb|xn\\-\\-vhquv|xn\\-\\-vuq861b|xn\\-\\-wgbh1c|xn\\-\\-wgbl6a"
+        + "|xn\\-\\-xhq521b|xn\\-\\-xkc2al3hye2a|xn\\-\\-xkc2dl3a5ee0h|xn\\-\\-y9a3aq|xn\\-\\-yfro4i67o"
+        + "|xn\\-\\-ygbi2ammx|xn\\-\\-zfr164b|xperia|xxx|xyz)"
+        + "|(?:yachts|yamaxun|yandex|yodobashi|yoga|yokohama|youtube|y[et])"
+        + "|(?:zara|zip|zone|zuerich|z[amw]))";
+
+    /**
+     * Kept for backward compatibility reasons.
+     *
+     * @deprecated Deprecated since it does not include all IRI characters defined in RFC 3987
+     */
+    @Deprecated
     public static final String GOOD_IRI_CHAR =
         "a-zA-Z0-9\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
 
@@ -125,35 +251,148 @@ public class Patterns {
             + "|[1-9][0-9]|[0-9]))");
 
     /**
+     * Valid UCS characters defined in RFC 3987.
+     */
+    private static final String UCS_CHAR =
+            "\u00A0-\uD7FF" +
+            "\uF900-\uFDCF" +
+            "\uFDF0-\uFFEF" +
+            "\uD800\uDC00-\uD83F\uDFFD" +
+            "\uD840\uDC00-\uD87F\uDFFD" +
+            "\uD880\uDC00-\uD8BF\uDFFD" +
+            "\uD8C0\uDC00-\uD8FF\uDFFD" +
+            "\uD900\uDC00-\uD93F\uDFFD" +
+            "\uD940\uDC00-\uD97F\uDFFD" +
+            "\uD980\uDC00-\uD9BF\uDFFD" +
+            "\uD9C0\uDC00-\uD9FF\uDFFD" +
+            "\uDA00\uDC00-\uDA3F\uDFFD" +
+            "\uDA40\uDC00-\uDA7F\uDFFD" +
+            "\uDA80\uDC00-\uDABF\uDFFD" +
+            "\uDAC0\uDC00-\uDAFF\uDFFD" +
+            "\uDB00\uDC00-\uDB3F\uDFFD" +
+            "\uDB44\uDC00-\uDB7F\uDFFD";
+
+    /**
+     * Valid characters for IRI label defined in RFC 3987.
+     */
+    private static final String LABEL_CHAR = "a-zA-Z0-9" + UCS_CHAR;
+
+    /**
+     * Valid characters for IRI TLD defined in RFC 3987.
+     */
+    private static final String TLD_CHAR = "a-zA-Z" + UCS_CHAR;
+
+    /**
      * RFC 1035 Section 2.3.4 limits the labels to a maximum 63 octets.
      */
-    private static final String IRI
-        = "[" + GOOD_IRI_CHAR + "]([" + GOOD_IRI_CHAR + "\\-]{0,61}[" + GOOD_IRI_CHAR + "]){0,1}";
+    private static final String IRI_LABEL =
+            "[" + LABEL_CHAR + "](?:[" + LABEL_CHAR + "\\-]{0,61}[" + LABEL_CHAR + "]){0,1}";
 
-    private static final String GOOD_GTLD_CHAR =
-        "a-zA-Z\u00A0-\uD7FF\uF900-\uFDCF\uFDF0-\uFFEF";
-    private static final String GTLD = "[" + GOOD_GTLD_CHAR + "]{2,63}";
-    private static final String HOST_NAME = "(" + IRI + "\\.)+" + GTLD;
+    /**
+     * RFC 3492 references RFC 1034 and limits Punycode algorithm output to 63 characters.
+     */
+    private static final String PUNYCODE_TLD = "xn\\-\\-[\\w\\-]{0,58}\\w";
+
+    private static final String TLD = "(" + PUNYCODE_TLD + "|" + "[" + TLD_CHAR + "]{2,63}" +")";
+
+    private static final String HOST_NAME = "(" + IRI_LABEL + "\\.)+" + TLD;
 
     public static final Pattern DOMAIN_NAME
         = Pattern.compile("(" + HOST_NAME + "|" + IP_ADDRESS + ")");
 
+    private static final String PROTOCOL = "(?i:http|https|rtsp):\\/\\/";
+
+    /* A word boundary or end of input.  This is to stop foo.sure from matching as foo.su */
+    private static final String WORD_BOUNDARY = "(?:\\b|$|^)";
+
+    private static final String USER_INFO = "(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
+            + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
+            + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@";
+
+    private static final String PORT_NUMBER = "\\:\\d{1,5}";
+
+    private static final String PATH_AND_QUERY = "\\/(?:(?:[" + LABEL_CHAR
+            + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus optional query params
+            + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*";
+
     /**
      *  Regular expression pattern to match most part of RFC 3987
-     *  Internationalized URLs, aka IRIs.  Commonly used Unicode characters are
-     *  added.
-     */
-    public static final Pattern WEB_URL = Pattern.compile(
-        "((?:(http|https|Http|Https|rtsp|Rtsp):\\/\\/(?:(?:[a-zA-Z0-9\\$\\-\\_\\.\\+\\!\\*\\'\\(\\)"
-        + "\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,64}(?:\\:(?:[a-zA-Z0-9\\$\\-\\_"
-        + "\\.\\+\\!\\*\\'\\(\\)\\,\\;\\?\\&\\=]|(?:\\%[a-fA-F0-9]{2})){1,25})?\\@)?)?"
-        + "(?:" + DOMAIN_NAME + ")"
-        + "(?:\\:\\d{1,5})?)" // plus option port number
-        + "(\\/(?:(?:[" + GOOD_IRI_CHAR + "\\;\\/\\?\\:\\@\\&\\=\\#\\~"  // plus option query params
-        + "\\-\\.\\+\\!\\*\\'\\(\\)\\,\\_])|(?:\\%[a-fA-F0-9]{2}))*)?"
-        + "(?:\\b|$)"); // and finally, a word boundary or end of
-                        // input.  This is to stop foo.sure from
-                        // matching as foo.su
+     *  Internationalized URLs, aka IRIs.
+     */
+    public static final Pattern WEB_URL = Pattern.compile("("
+            + "("
+            + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")?"
+            + "(?:" + DOMAIN_NAME + ")"
+            + "(?:" + PORT_NUMBER + ")?"
+            + ")"
+            + "(" + PATH_AND_QUERY + ")?"
+            + WORD_BOUNDARY
+            + ")");
+
+    /**
+     * Regular expression that matches known TLDs and punycode TLDs
+     */
+    private static final String STRICT_TLD = "(?:" +
+            IANA_TOP_LEVEL_DOMAINS + "|" + PUNYCODE_TLD + ")";
+
+    /**
+     * Regular expression that matches host names using {@link #STRICT_TLD}
+     */
+    private static final String STRICT_HOST_NAME = "(?:(?:" + IRI_LABEL + "\\.)+"
+            + STRICT_TLD + ")";
+
+    /**
+     * Regular expression that matches domain names using either {@link #STRICT_HOST_NAME} or
+     * {@link #IP_ADDRESS}
+     */
+    private static final Pattern STRICT_DOMAIN_NAME
+            = Pattern.compile("(?:" + STRICT_HOST_NAME + "|" + IP_ADDRESS + ")");
+
+    /**
+     * Regular expression that matches domain names without a TLD
+     */
+    private static final String RELAXED_DOMAIN_NAME =
+            "(?:" + "(?:" + IRI_LABEL + "(?:\\.(?=\\S))" +"?)+" + "|" + IP_ADDRESS + ")";
+
+    /**
+     * Regular expression to match strings that do not start with a supported protocol. The TLDs
+     * are expected to be one of the known TLDs.
+     */
+    private static final String WEB_URL_WITHOUT_PROTOCOL = "("
+            + WORD_BOUNDARY
+            + "(?<!:\\/\\/)"
+            + "("
+            + "(?:" + STRICT_DOMAIN_NAME + ")"
+            + "(?:" + PORT_NUMBER + ")?"
+            + ")"
+            + "(?:" + PATH_AND_QUERY + ")?"
+            + WORD_BOUNDARY
+            + ")";
+
+    /**
+     * Regular expression to match strings that start with a supported protocol. Rules for domain
+     * names and TLDs are more relaxed. TLDs are optional.
+     */
+    private static final String WEB_URL_WITH_PROTOCOL = "("
+            + WORD_BOUNDARY
+            + "(?:"
+            + "(?:" + PROTOCOL + "(?:" + USER_INFO + ")?" + ")"
+            + "(?:" + RELAXED_DOMAIN_NAME + ")?"
+            + "(?:" + PORT_NUMBER + ")?"
+            + ")"
+            + "(?:" + PATH_AND_QUERY + ")?"
+            + WORD_BOUNDARY
+            + ")";
+
+    /**
+     * Regular expression pattern to match IRIs. If a string starts with http(s):// the expression
+     * tries to match the URL structure with a relaxed rule for TLDs. If the string does not start
+     * with http(s):// the TLDs are expected to be one of the known TLDs.
+     *
+     * @hide
+     */
+    public static final Pattern AUTOLINK_WEB_URL = Pattern.compile(
+            "(" + WEB_URL_WITH_PROTOCOL + "|" + WEB_URL_WITHOUT_PROTOCOL + ")");
 
     public static final Pattern EMAIL_ADDRESS
         = Pattern.compile(
index 253eb25..d383775 100644 (file)
@@ -17,14 +17,16 @@ package android.util;
 
 import android.test.suitebuilder.annotation.SmallTest;
 import android.test.suitebuilder.annotation.Suppress;
-import android.util.Patterns;
 
 import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import junit.framework.TestCase;
 
 public class PatternsTest extends TestCase {
 
+    //Tests for Patterns.TOP_LEVEL_DOMAIN
+
     @SmallTest
     public void testTldPattern() throws Exception {
         boolean t;
@@ -40,7 +42,7 @@ public class PatternsTest extends TestCase {
         t = Patterns.TOP_LEVEL_DOMAIN.matcher("xn--0zwm56d").matches();
         assertTrue("Missed valid TLD", t);
 
-        // One of the new top level internationalized domain.
+        // One of the new top level unicode domain.
         t = Patterns.TOP_LEVEL_DOMAIN.matcher("\uD55C\uAD6D").matches();
         assertTrue("Missed valid TLD", t);
 
@@ -54,60 +56,372 @@ public class PatternsTest extends TestCase {
         assertFalse("Matched invalid TLD!", t);
     }
 
+    //Tests for Patterns.IANA_TOP_LEVEL_DOMAINS
+
     @SmallTest
-    @Suppress // Failing.
-    public void testUrlPattern() throws Exception {
-        boolean t;
+    public void testIanaTopLevelDomains_matchesValidTld() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertTrue("Should match 'com'", pattern.matcher("com").matches());
+    }
 
-        t = Patterns.WEB_URL.matcher("http://www.google.com").matches();
-        assertTrue("Valid URL", t);
+    @SmallTest
+    public void testIanaTopLevelDomains_matchesValidNewTld() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertTrue("Should match 'me'", pattern.matcher("me").matches());
+    }
 
-        // Google in one of the new top level domain.
-        t = Patterns.WEB_URL.matcher("http://www.google.me").matches();
-        assertTrue("Valid URL", t);
-        t = Patterns.WEB_URL.matcher("google.me").matches();
-        assertTrue("Valid URL", t);
+    @SmallTest
+    public void testIanaTopLevelDomains_matchesPunycodeTld() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertTrue("Should match Punycode TLD", pattern.matcher("xn--qxam").matches());
+    }
 
-        // Test url in Chinese: http://xn--fsqu00a.xn--0zwm56d
-        t = Patterns.WEB_URL.matcher("http://xn--fsqu00a.xn--0zwm56d").matches();
-        assertTrue("Valid URL", t);
-        t = Patterns.WEB_URL.matcher("xn--fsqu00a.xn--0zwm56d").matches();
-        assertTrue("Valid URL", t);
+    @SmallTest
+    public void testIanaTopLevelDomains_matchesIriTLD() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertTrue("Should match IRI TLD", pattern.matcher("\uD55C\uAD6D").matches());
+    }
 
-        // Url for testing top level Arabic country code domain in Punycode:
-        //   http://xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx
-        t = Patterns.WEB_URL.matcher("http://xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx").matches();
-        assertTrue("Valid URL", t);
-        t = Patterns.WEB_URL.matcher("xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx").matches();
-        assertTrue("Valid URL", t);
+    @SmallTest
+    public void testIanaTopLevelDomains_doesNotMatchWrongTld() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertFalse("Should not match 'mem'", pattern.matcher("mem").matches());
+    }
 
-        // Internationalized URL.
-        t = Patterns.WEB_URL.matcher("http://\uD604\uAE08\uC601\uC218\uC99D.kr").matches();
-        assertTrue("Valid URL", t);
-        t = Patterns.WEB_URL.matcher("\uD604\uAE08\uC601\uC218\uC99D.kr").matches();
-        assertTrue("Valid URL", t);
-        // URL with international TLD.
-        t = Patterns.WEB_URL.matcher("\uB3C4\uBA54\uC778.\uD55C\uAD6D").matches();
-        assertTrue("Valid URL", t);
+    @SmallTest
+    public void testIanaTopLevelDomains_doesNotMatchWrongPunycodeTld() throws Exception {
+        Pattern pattern = Pattern.compile(Patterns.IANA_TOP_LEVEL_DOMAINS);
+        assertFalse("Should not match invalid Punycode TLD", pattern.matcher("xn").matches());
+    }
 
-        t = Patterns.WEB_URL.matcher("http://brainstormtech.blogs.fortune.cnn.com/2010/03/11/" +
-            "top-five-moments-from-eric-schmidt\u2019s-talk-in-abu-dhabi/").matches();
-        assertTrue("Valid URL", t);
+    //Tests for Patterns.WEB_URL
 
-        t = Patterns.WEB_URL.matcher("ftp://www.example.com").matches();
-        assertFalse("Matched invalid protocol", t);
+    @SmallTest
+    public void testWebUrl_matchesValidUrlWithSchemeAndHostname() throws Exception {
+        String url = "http://www.android.com";
+        assertTrue("Should match URL with scheme and hostname",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
 
-        t = Patterns.WEB_URL.matcher("http://www.example.com:8080").matches();
-        assertTrue("Didn't match valid URL with port", t);
+    @SmallTest
+    public void testWebUrl_matchesValidUrlWithSchemeHostnameAndNewTld() throws Exception {
+        String url = "http://www.android.me";
+        assertTrue("Should match URL with scheme, hostname and new TLD",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
 
-        t = Patterns.WEB_URL.matcher("http://www.example.com:8080/?foo=bar").matches();
-        assertTrue("Didn't match valid URL with port and query args", t);
+    @SmallTest
+    public void testWebUrl_matchesValidUrlWithHostnameAndNewTld() throws Exception {
+        String url = "android.me";
+        assertTrue("Should match URL with hostname and new TLD",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
 
-        t = Patterns.WEB_URL.matcher("http://www.example.com:8080/~user/?foo=bar").matches();
-        assertTrue("Didn't match valid URL with ~", t);
+    @SmallTest
+    public void testWebUrl_matchesChinesePunycodeUrlWithProtocol() throws Exception {
+        String url = "http://xn--fsqu00a.xn--0zwm56d";
+        assertTrue("Should match Chinese Punycode URL with protocol",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesChinesePunycodeUrlWithoutProtocol() throws Exception {
+        String url = "xn--fsqu00a.xn--0zwm56d";
+        assertTrue("Should match Chinese Punycode URL without protocol",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+
+    @SmallTest
+    public void testWebUrl_matchesArabicPunycodeUrlWithProtocol() throws Exception {
+        String url = "http://xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx";
+        assertTrue("Should match arabic Punycode URL with protocol",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesArabicPunycodeUrlWithoutProtocol() throws Exception {
+        String url = "xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx";
+        assertTrue("Should match Arabic Punycode URL without protocol",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithUnicodeDomainNameWithProtocol() throws Exception {
+        String url = "http://\uD604\uAE08\uC601\uC218\uC99D.kr";
+        assertTrue("Should match URL with Unicode domain name",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithUnicodeDomainNameWithoutProtocol() throws Exception {
+        String url = "\uD604\uAE08\uC601\uC218\uC99D.kr";
+        assertTrue("Should match URL without protocol and with Unicode domain name",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithUnicodeTld() throws Exception {
+        String url = "\uB3C4\uBA54\uC778.\uD55C\uAD6D";
+        assertTrue("Should match URL with Unicode TLD",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithUnicodePath() throws Exception {
+        String url = "http://brainstormtech.blogs.fortune.cnn.com/2010/03/11/" +
+                "top-five-moments-from-eric-schmidt\u2019s-talk-in-abu-dhabi/";
+        assertTrue("Should match URL with Unicode path",
+                Patterns.WEB_URL.matcher(url).matches());
     }
 
     @SmallTest
+    public void testWebUrl_doesNotMatchValidUrlWithInvalidProtocol() throws Exception {
+        String url = "ftp://www.example.com";
+        assertFalse("Should not match URL with invalid protocol",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesValidUrlWithPort() throws Exception {
+        String url = "http://www.example.com:8080";
+        assertTrue("Should match URL with port", Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithPortAndQuery() throws Exception {
+        String url = "http://www.example.com:8080/?foo=bar";
+        assertTrue("Should match URL with port and query",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesUrlWithTilde() throws Exception {
+        String url = "http://www.example.com:8080/~user/?foo=bar";
+        assertTrue("Should match URL with tilde", Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testWebUrl_matchesProtocolCaseInsensitive() throws Exception {
+        String url = "hTtP://android.com";
+        assertTrue("Protocol matching should be case insensitive",
+                Patterns.WEB_URL.matcher(url).matches());
+    }
+
+    //Tests for Patterns.AUTOLINK_WEB_URL
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesValidUrlWithSchemeAndHostname() throws Exception {
+        String url = "http://www.android.com";
+        assertTrue("Should match URL with scheme and hostname",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesValidUrlWithSchemeHostnameAndNewTld() throws Exception {
+        String url = "http://www.android.me";
+        assertTrue("Should match URL with scheme, hostname and new TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesValidUrlWithHostnameAndNewTld() throws Exception {
+        String url = "android.me";
+        assertTrue("Should match URL with hostname and new TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+
+        url = "android.camera";
+        assertTrue("Should match URL with hostname and new TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesChinesePunycodeUrlWithProtocol() throws Exception {
+        String url = "http://xn--fsqu00a.xn--0zwm56d";
+        assertTrue("Should match Chinese Punycode URL with protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesChinesePunycodeUrlWithoutProtocol() throws Exception {
+        String url = "xn--fsqu00a.xn--0zwm56d";
+        assertTrue("Should match Chinese Punycode URL without protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesArabicPunycodeUrlWithProtocol() throws Exception {
+        String url = "http://xn--4gbrim.xn--rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx";
+        assertTrue("Should match Arabic Punycode URL with protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesArabicPunycodeUrlWithoutProtocol() throws Exception {
+        String url = "xn--4gbrim.xn--rmckbbajlc6dj7bxne2c.xn--wgbh1c/ar/default.aspx";
+        assertTrue("Should match Arabic Punycode URL without protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchPunycodeTldThatStartsWithDash() throws Exception {
+        String url = "http://xn--fsqu00a.-xn--0zwm56d";
+        assertFalse("Should not match Punycode TLD that starts with dash",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchPunycodeTldThatEndsWithDash() throws Exception {
+        String url = "http://xn--fsqu00a.xn--0zwm56d-";
+        assertFalse("Should not match Punycode TLD that ends with dash",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlWithUnicodeDomainName() throws Exception {
+        String url = "http://\uD604\uAE08\uC601\uC218\uC99D.kr";
+        assertTrue("Should match URL with Unicode domain name",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+
+        url = "\uD604\uAE08\uC601\uC218\uC99D.kr";
+        assertTrue("hould match URL without protocol and with Unicode domain name",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlWithUnicodeTld() throws Exception {
+        String url = "\uB3C4\uBA54\uC778.\uD55C\uAD6D";
+        assertTrue("Should match URL with Unicode TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlWithUnicodePath() throws Exception {
+        String url = "http://brainstormtech.blogs.fortune.cnn.com/2010/03/11/" +
+                "top-five-moments-from-eric-schmidt\u2019s-talk-in-abu-dhabi/";
+        assertTrue("Should match URL with Unicode path",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchValidUrlWithInvalidProtocol() throws Exception {
+        String url = "ftp://www.example.com";
+        assertFalse("Should not match URL with invalid protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesValidUrlWithPort() throws Exception {
+        String url = "http://www.example.com:8080";
+        assertTrue("Should match URL with port",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlWithPortAndQuery() throws Exception {
+        String url = "http://www.example.com:8080/?foo=bar";
+        assertTrue("Should match URL with port and query",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlWithTilde() throws Exception {
+        String url = "http://www.example.com:8080/~user/?foo=bar";
+        assertTrue("Should match URL with tilde",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesProtocolCaseInsensitive() throws Exception {
+        String url = "hTtP://android.com";
+        assertTrue("Protocol matching should be case insensitive",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesUrlStartingWithHttpAndDoesNotHaveTld() throws Exception {
+        String url = "http://android/#notld///a/n/d/r/o/i/d&p1=1&p2=2";
+        assertTrue("Should match URL without a TLD and starting with http ",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchUrlsWithoutProtocolAndWithUnknownTld()
+            throws Exception {
+        String url = "thank.you";
+        assertFalse("Should not match URL that does not start with a protocol and " +
+                "does not contain a known TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchUrlWithInvalidRequestParameter() throws Exception {
+        String url = "http://android.com?p=value";
+        assertFalse("Should not match URL with invalid request parameter",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotPartiallyMatchUnknownProtocol() throws Exception {
+        String url = "ftp://foo.bar/baz";
+        assertFalse("Should not partially match URL with unknown protocol",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).find());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesValidUrlWithEmoji() throws Exception {
+        String url = "Thank\u263A.com";
+        assertTrue("Should match URL with emoji",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchUrlsWithEmojiWithoutProtocolAndWithoutKnownTld()
+            throws Exception {
+        String url = "Thank\u263A.you";
+        assertFalse("Should not match URLs containing emoji and with unknown TLD",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchEmailAddress()
+            throws Exception {
+        String url = "android@android.com";
+        assertFalse("Should not match email address",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesDomainNameWithSurrogatePairs() throws Exception {
+        String url = "android\uD83C\uDF38.com";
+        assertTrue("Should match domain name with Unicode surrogate pairs",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesTldWithSurrogatePairs() throws Exception {
+        String url = "http://android.\uD83C\uDF38com";
+        assertTrue("Should match TLD with Unicode surrogate pairs",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_matchesPathWithSurrogatePairs() throws Exception {
+        String url = "http://android.com/path-with-\uD83C\uDF38?v=\uD83C\uDF38";
+        assertTrue("Should match path and query with Unicode surrogate pairs",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    @SmallTest
+    public void testAutoLinkWebUrl_doesNotMatchUrlWithExcludedSurrogate() throws Exception {
+        String url = "http://android\uD83F\uDFFE.com";
+        assertFalse("Should not match URL with excluded Unicode surrogate pair",
+                Patterns.AUTOLINK_WEB_URL.matcher(url).matches());
+    }
+
+    //Tests for Patterns.IP_ADDRESS
+
+    @SmallTest
     public void testIpPattern() throws Exception {
         boolean t;
 
@@ -118,34 +432,85 @@ public class PatternsTest extends TestCase {
         assertFalse("Invalid IP", t);
     }
 
+    //Tests for Patterns.DOMAIN_NAME
+
     @SmallTest
-    @Suppress // Failing.
-    public void testDomainPattern() throws Exception {
-        boolean t;
+    public void testDomain_matchesPunycodeTld() throws Exception {
+        String domain = "xn--fsqu00a.xn--0zwm56d";
+        assertTrue("Should match domain name in Punycode",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
+
+    @SmallTest
+    public void testDomain_doesNotMatchPunycodeThatStartsWithDash() throws Exception {
+        String domain = "xn--fsqu00a.-xn--0zwm56d";
+        assertFalse("Should not match Punycode TLD that starts with a dash",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        t = Patterns.DOMAIN_NAME.matcher("mail.example.com").matches();
-        assertTrue("Valid domain", t);
+    @SmallTest
+    public void testDomain_doesNotMatchPunycodeThatEndsWithDash() throws Exception {
+        String domain = "xn--fsqu00a.xn--0zwm56d-";
+        assertFalse("Should not match Punycode TLD that ends with a dash",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        t = Patterns.DOMAIN_NAME.matcher("google.me").matches();
-        assertTrue("Valid domain", t);
+    @SmallTest
+    public void testDomain_doesNotMatchPunycodeLongerThanAllowed() throws Exception {
+        String tld = "xn--";
+        for(int i=0; i<=6; i++) {
+            tld += "0123456789";
+        }
+        String domain = "xn--fsqu00a." + tld;
+        assertFalse("Should not match Punycode TLD that is longer than 63 chars",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        // Internationalized domains.
-        t = Patterns.DOMAIN_NAME.matcher("\uD604\uAE08\uC601\uC218\uC99D.kr").matches();
-        assertTrue("Valid domain", t);
+    @SmallTest
+    public void testDomain_matchesObsoleteTld() throws Exception {
+        String domain = "test.yu";
+        assertTrue("Should match domain names with obsolete TLD",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        t = Patterns.DOMAIN_NAME.matcher("__+&42.xer").matches();
-        assertFalse("Invalid domain", t);
+    @SmallTest
+    public void testDomain_matchesWithSubDomain() throws Exception {
+        String domain = "mail.example.com";
+        assertTrue("Should match domain names with subdomains",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        // Obsolete domain .yu
-        t = Patterns.DOMAIN_NAME.matcher("test.yu").matches();
-        assertFalse("Obsolete country code top level domain", t);
+    @SmallTest
+    public void testDomain_matchesWithoutSubDomain() throws Exception {
+        String domain = "android.me";
+        assertTrue("Should match domain names without subdomains",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
+
+    @SmallTest
+    public void testDomain_matchesUnicodeDomainNames() throws Exception {
+        String domain = "\uD604\uAE08\uC601\uC218\uC99D.kr";
+        assertTrue("Should match unicodedomain names",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
 
-        // Testing top level Arabic country code domain in Punycode:
-        t = Patterns.DOMAIN_NAME.matcher("xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c").matches();
-        assertTrue("Valid domain", t);
+    @SmallTest
+    public void testDomain_doesNotMatchInvalidDomain() throws Exception {
+        String domain = "__+&42.xer";
+        assertFalse("Should not match invalid domain name",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
     }
 
     @SmallTest
+    public void testDomain_matchesPunycodeArabicDomainName() throws Exception {
+        String domain = "xn--4gbrim.xn----rmckbbajlc6dj7bxne2c.xn--wgbh1c";
+        assertTrue("Should match Punycode Arabic domain name",
+                Patterns.DOMAIN_NAME.matcher(domain).matches());
+    }
+
+    //Tests for Patterns.PHONE
+
+    @SmallTest
     public void testPhonePattern() throws Exception {
         boolean t;