frameworks/base/core/java/android/webkit/URLUtil.java

   1 /*
   2  * Copyright (C) 2006 The Android Open Source Project
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 package android.webkit;
  18
  19 import java.io.UnsupportedEncodingException;
  20 import java.util.regex.Matcher;
  21 import java.util.regex.Pattern;
  22
  23 import android.net.Uri;
  24 import android.net.ParseException;
  25 import android.net.WebAddress;
  26 import android.util.Log;
  27
  28 public final class URLUtil {
  29
  30     private static final String LOGTAG = "webkit";
  31
  32     // to refer to bar.png under your package's asset/foo/ directory, use
  33     // "file:///android_asset/foo/bar.png".
  34     static final String ASSET_BASE = "file:///android_asset/";
  35     // to refer to bar.png under your package's res/drawable/ directory, use
  36     // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
  37     // "drawable-hdpi" directory as well.
  38     static final String RESOURCE_BASE = "file:///android_res/";
  39     static final String FILE_BASE = "file://";
  40     static final String PROXY_BASE = "file:///cookieless_proxy/";
  41
  42     /**
  43      * Cleans up (if possible) user-entered web addresses
  44      */
  45     public static String guessUrl(String inUrl) {
  46
  47         String retVal = inUrl;
  48         WebAddress webAddress;
  49
  50         Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
  51
  52         if (inUrl.length() == 0) return inUrl;
  53         if (inUrl.startsWith("about:")) return inUrl;
  54         // Do not try to interpret data scheme URLs
  55         if (inUrl.startsWith("data:")) return inUrl;
  56         // Do not try to interpret file scheme URLs
  57         if (inUrl.startsWith("file:")) return inUrl;
  58         // Do not try to interpret javascript scheme URLs
  59         if (inUrl.startsWith("javascript:")) return inUrl;
  60
  61         // bug 762454: strip period off end of url
  62         if (inUrl.endsWith(".") == true) {
  63             inUrl = inUrl.substring(0, inUrl.length() - 1);
  64         }
  65
  66         try {
  67             webAddress = new WebAddress(inUrl);
  68         } catch (ParseException ex) {
  69
  70             if (DebugFlags.URL_UTIL) {
  71                 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
  72             }
  73             return retVal;
  74         }
  75
  76         // Check host
  77         if (webAddress.mHost.indexOf('.') == -1) {
  78             // no dot: user probably entered a bare domain.  try .com
  79             webAddress.mHost = "www." + webAddress.mHost + ".com";
  80         }
  81         return webAddress.toString();
  82     }
  83
  84     public static String composeSearchUrl(String inQuery, String template,
  85                                           String queryPlaceHolder) {
  86         int placeHolderIndex = template.indexOf(queryPlaceHolder);
  87         if (placeHolderIndex < 0) {
  88             return null;
  89         }
  90
  91         String query;
  92         StringBuilder buffer = new StringBuilder();
  93         buffer.append(template.substring(0, placeHolderIndex));
  94
  95         try {
  96             query = java.net.URLEncoder.encode(inQuery, "utf-8");
  97             buffer.append(query);
  98         } catch (UnsupportedEncodingException ex) {
  99             return null;
 100         }
 101
 102         buffer.append(template.substring(
 103                 placeHolderIndex + queryPlaceHolder.length()));
 104
 105         return buffer.toString();
 106     }
 107
 108     public static byte[] decode(byte[] url) throws IllegalArgumentException {
 109         if (url.length == 0) {
 110             return new byte[0];
 111         }
 112
 113         // Create a new byte array with the same length to ensure capacity
 114         byte[] tempData = new byte[url.length];
 115
 116         int tempCount = 0;
 117         for (int i = 0; i < url.length; i++) {
 118             byte b = url[i];
 119             if (b == '%') {
 120                 if (url.length - i > 2) {
 121                     b = (byte) (parseHex(url[i + 1]) * 16
 122                             + parseHex(url[i + 2]));
 123                     i += 2;
 124                 } else {
 125                     throw new IllegalArgumentException("Invalid format");
 126                 }
 127             }
 128             tempData[tempCount++] = b;
 129         }
 130         byte[] retData = new byte[tempCount];
 131         System.arraycopy(tempData, 0, retData, 0, tempCount);
 132         return retData;
 133     }
 134
 135     /**
 136      * @return True iff the url is correctly URL encoded
 137      */
 138     static boolean verifyURLEncoding(String url) {
 139         int count = url.length();
 140         if (count == 0) {
 141             return false;
 142         }
 143
 144         int index = url.indexOf('%');
 145         while (index >= 0 && index < count) {
 146             if (index < count - 2) {
 147                 try {
 148                     parseHex((byte) url.charAt(++index));
 149                     parseHex((byte) url.charAt(++index));
 150                 } catch (IllegalArgumentException e) {
 151                     return false;
 152                 }
 153             } else {
 154                 return false;
 155             }
 156             index = url.indexOf('%', index + 1);
 157         }
 158         return true;
 159     }
 160
 161     private static int parseHex(byte b) {
 162         if (b >= '0' && b <= '9') return (b - '0');
 163         if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
 164         if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
 165
 166         throw new IllegalArgumentException("Invalid hex char '" + b + "'");
 167     }
 168
 169     /**
 170      * @return True iff the url is an asset file.
 171      */
 172     public static boolean isAssetUrl(String url) {
 173         return (null != url) && url.startsWith(ASSET_BASE);
 174     }
 175
 176     /**
 177      * @return True iff the url is a resource file.
 178      * @hide
 179      */
 180     public static boolean isResourceUrl(String url) {
 181         return (null != url) && url.startsWith(RESOURCE_BASE);
 182     }
 183
 184     /**
 185      * @return True iff the url is an proxy url to allow cookieless network
 186      * requests from a file url.
 187      * @deprecated Cookieless proxy is no longer supported.
 188      */
 189     @Deprecated
 190     public static boolean isCookielessProxyUrl(String url) {
 191         return (null != url) && url.startsWith(PROXY_BASE);
 192     }
 193
 194     /**
 195      * @return True iff the url is a local file.
 196      */
 197     public static boolean isFileUrl(String url) {
 198         return (null != url) && (url.startsWith(FILE_BASE) &&
 199                                  !url.startsWith(ASSET_BASE) &&
 200                                  !url.startsWith(PROXY_BASE));
 201     }
 202
 203     /**
 204      * @return True iff the url is an about: url.
 205      */
 206     public static boolean isAboutUrl(String url) {
 207         return (null != url) && url.startsWith("about:");
 208     }
 209
 210     /**
 211      * @return True iff the url is a data: url.
 212      */
 213     public static boolean isDataUrl(String url) {
 214         return (null != url) && url.startsWith("data:");
 215     }
 216
 217     /**
 218      * @return True iff the url is a javascript: url.
 219      */
 220     public static boolean isJavaScriptUrl(String url) {
 221         return (null != url) && url.startsWith("javascript:");
 222     }
 223
 224     /**
 225      * @return True iff the url is an http: url.
 226      */
 227     public static boolean isHttpUrl(String url) {
 228         return (null != url) &&
 229                (url.length() > 6) &&
 230                url.substring(0, 7).equalsIgnoreCase("http://");
 231     }
 232
 233     /**
 234      * @return True iff the url is an https: url.
 235      */
 236     public static boolean isHttpsUrl(String url) {
 237         return (null != url) &&
 238                (url.length() > 7) &&
 239                url.substring(0, 8).equalsIgnoreCase("https://");
 240     }
 241
 242     /**
 243      * @return True iff the url is a network url.
 244      */
 245     public static boolean isNetworkUrl(String url) {
 246         if (url == null || url.length() == 0) {
 247             return false;
 248         }
 249         return isHttpUrl(url) || isHttpsUrl(url);
 250     }
 251
 252     /**
 253      * @return True iff the url is a content: url.
 254      */
 255     public static boolean isContentUrl(String url) {
 256         return (null != url) && url.startsWith("content:");
 257     }
 258
 259     /**
 260      * @return True iff the url is valid.
 261      */
 262     public static boolean isValidUrl(String url) {
 263         if (url == null || url.length() == 0) {
 264             return false;
 265         }
 266
 267         return (isAssetUrl(url) ||
 268                 isResourceUrl(url) ||
 269                 isFileUrl(url) ||
 270                 isAboutUrl(url) ||
 271                 isHttpUrl(url) ||
 272                 isHttpsUrl(url) ||
 273                 isJavaScriptUrl(url) ||
 274                 isContentUrl(url));
 275     }
 276
 277     /**
 278      * Strips the url of the anchor.
 279      */
 280     public static String stripAnchor(String url) {
 281         int anchorIndex = url.indexOf('#');
 282         if (anchorIndex != -1) {
 283             return url.substring(0, anchorIndex);
 284         }
 285         return url;
 286     }
 287
 288     /**
 289      * Guesses canonical filename that a download would have, using
 290      * the URL and contentDisposition. File extension, if not defined,
 291      * is added based on the mimetype
 292      * @param url Url to the content
 293      * @param contentDisposition Content-Disposition HTTP header or null
 294      * @param mimeType Mime-type of the content or null
 295      *
 296      * @return suggested filename
 297      */
 298     public static final String guessFileName(
 299             String url,
 300             String contentDisposition,
 301             String mimeType) {
 302         String filename = null;
 303         String extension = null;
 304
 305         // If we couldn't do anything with the hint, move toward the content disposition
 306         if (filename == null && contentDisposition != null) {
 307             filename = parseContentDisposition(contentDisposition);
 308             if (filename != null) {
 309                 int index = filename.lastIndexOf('/') + 1;
 310                 if (index > 0) {
 311                     filename = filename.substring(index);
 312                 }
 313             }
 314         }
 315
 316         // If all the other http-related approaches failed, use the plain uri
 317         if (filename == null) {
 318             String decodedUrl = Uri.decode(url);
 319             if (decodedUrl != null) {
 320                 int queryIndex = decodedUrl.indexOf('?');
 321                 // If there is a query string strip it, same as desktop browsers
 322                 if (queryIndex > 0) {
 323                     decodedUrl = decodedUrl.substring(0, queryIndex);
 324                 }
 325                 if (!decodedUrl.endsWith("/")) {
 326                     int index = decodedUrl.lastIndexOf('/') + 1;
 327                     if (index > 0) {
 328                         filename = decodedUrl.substring(index);
 329                     }
 330                 }
 331             }
 332         }
 333
 334         // Finally, if couldn't get filename from URI, get a generic filename
 335         if (filename == null) {
 336             filename = "downloadfile";
 337         }
 338
 339         // Split filename between base and extension
 340         // Add an extension if filename does not have one
 341         int dotIndex = filename.indexOf('.');
 342         if (dotIndex < 0) {
 343             if (mimeType != null) {
 344                 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
 345                 if (extension != null) {
 346                     extension = "." + extension;
 347                 }
 348             }
 349             if (extension == null) {
 350                 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
 351                     if (mimeType.equalsIgnoreCase("text/html")) {
 352                         extension = ".html";
 353                     } else {
 354                         extension = ".txt";
 355                     }
 356                 } else {
 357                     extension = ".bin";
 358                 }
 359             }
 360         } else {
 361             if (mimeType != null) {
 362                 // Compare the last segment of the extension against the mime type.
 363                 // If there's a mismatch, discard the entire extension.
 364                 int lastDotIndex = filename.lastIndexOf('.');
 365                 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
 366                         filename.substring(lastDotIndex + 1));
 367                 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
 368                     extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
 369                     if (extension != null) {
 370                         extension = "." + extension;
 371                     }
 372                 }
 373             }
 374             if (extension == null) {
 375                 extension = filename.substring(dotIndex);
 376             }
 377             filename = filename.substring(0, dotIndex);
 378         }
 379
 380         return filename + extension;
 381     }
 382
 383     /** Regex used to parse content-disposition headers */
 384     private static final Pattern CONTENT_DISPOSITION_PATTERN =
 385             Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
 386             Pattern.CASE_INSENSITIVE);
 387
 388     /*
 389      * Parse the Content-Disposition HTTP Header. The format of the header
 390      * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
 391      * This header provides a filename for content that is going to be
 392      * downloaded to the file system. We only support the attachment type.
 393      * Note that RFC 2616 specifies the filename value must be double-quoted.
 394      * Unfortunately some servers do not quote the value so to maintain
 395      * consistent behaviour with other browsers, we allow unquoted values too.
 396      */
 397     static String parseContentDisposition(String contentDisposition) {
 398         try {
 399             Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
 400             if (m.find()) {
 401                 return m.group(2);
 402             }
 403         } catch (IllegalStateException ex) {
 404              // This function is defined as returning null when it can't parse the header
 405         }
 406         return null;
 407     }
 408 }