2 * Copyright (C) 2006 The Android Open Source Project
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 package android.webkit;
19 import java.io.UnsupportedEncodingException;
20 import java.util.regex.Matcher;
21 import java.util.regex.Pattern;
23 import android.net.Uri;
24 import android.net.ParseException;
25 import android.net.WebAddress;
26 import android.util.Log;
28 public final class URLUtil {
30 private static final String LOGTAG = "webkit";
32 // to refer to bar.png under your package's asset/foo/ directory, use
33 // "file:///android_asset/foo/bar.png".
34 static final String ASSET_BASE = "file:///android_asset/";
35 // to refer to bar.png under your package's res/drawable/ directory, use
36 // "file:///android_res/drawable/bar.png". Use "drawable" to refer to
37 // "drawable-hdpi" directory as well.
38 static final String RESOURCE_BASE = "file:///android_res/";
39 static final String FILE_BASE = "file://";
40 static final String PROXY_BASE = "file:///cookieless_proxy/";
43 * Cleans up (if possible) user-entered web addresses
45 public static String guessUrl(String inUrl) {
47 String retVal = inUrl;
48 WebAddress webAddress;
50 Log.v(LOGTAG, "guessURL before queueRequest: " + inUrl);
52 if (inUrl.length() == 0) return inUrl;
53 if (inUrl.startsWith("about:")) return inUrl;
54 // Do not try to interpret data scheme URLs
55 if (inUrl.startsWith("data:")) return inUrl;
56 // Do not try to interpret file scheme URLs
57 if (inUrl.startsWith("file:")) return inUrl;
58 // Do not try to interpret javascript scheme URLs
59 if (inUrl.startsWith("javascript:")) return inUrl;
61 // bug 762454: strip period off end of url
62 if (inUrl.endsWith(".") == true) {
63 inUrl = inUrl.substring(0, inUrl.length() - 1);
67 webAddress = new WebAddress(inUrl);
68 } catch (ParseException ex) {
70 if (DebugFlags.URL_UTIL) {
71 Log.v(LOGTAG, "smartUrlFilter: failed to parse url = " + inUrl);
77 if (webAddress.mHost.indexOf('.') == -1) {
78 // no dot: user probably entered a bare domain. try .com
79 webAddress.mHost = "www." + webAddress.mHost + ".com";
81 return webAddress.toString();
84 public static String composeSearchUrl(String inQuery, String template,
85 String queryPlaceHolder) {
86 int placeHolderIndex = template.indexOf(queryPlaceHolder);
87 if (placeHolderIndex < 0) {
92 StringBuilder buffer = new StringBuilder();
93 buffer.append(template.substring(0, placeHolderIndex));
96 query = java.net.URLEncoder.encode(inQuery, "utf-8");
98 } catch (UnsupportedEncodingException ex) {
102 buffer.append(template.substring(
103 placeHolderIndex + queryPlaceHolder.length()));
105 return buffer.toString();
108 public static byte[] decode(byte[] url) throws IllegalArgumentException {
109 if (url.length == 0) {
113 // Create a new byte array with the same length to ensure capacity
114 byte[] tempData = new byte[url.length];
117 for (int i = 0; i < url.length; i++) {
120 if (url.length - i > 2) {
121 b = (byte) (parseHex(url[i + 1]) * 16
122 + parseHex(url[i + 2]));
125 throw new IllegalArgumentException("Invalid format");
128 tempData[tempCount++] = b;
130 byte[] retData = new byte[tempCount];
131 System.arraycopy(tempData, 0, retData, 0, tempCount);
136 * @return True iff the url is correctly URL encoded
138 static boolean verifyURLEncoding(String url) {
139 int count = url.length();
144 int index = url.indexOf('%');
145 while (index >= 0 && index < count) {
146 if (index < count - 2) {
148 parseHex((byte) url.charAt(++index));
149 parseHex((byte) url.charAt(++index));
150 } catch (IllegalArgumentException e) {
156 index = url.indexOf('%', index + 1);
161 private static int parseHex(byte b) {
162 if (b >= '0' && b <= '9') return (b - '0');
163 if (b >= 'A' && b <= 'F') return (b - 'A' + 10);
164 if (b >= 'a' && b <= 'f') return (b - 'a' + 10);
166 throw new IllegalArgumentException("Invalid hex char '" + b + "'");
170 * @return True iff the url is an asset file.
172 public static boolean isAssetUrl(String url) {
173 return (null != url) && url.startsWith(ASSET_BASE);
177 * @return True iff the url is a resource file.
180 public static boolean isResourceUrl(String url) {
181 return (null != url) && url.startsWith(RESOURCE_BASE);
185 * @return True iff the url is an proxy url to allow cookieless network
186 * requests from a file url.
187 * @deprecated Cookieless proxy is no longer supported.
190 public static boolean isCookielessProxyUrl(String url) {
191 return (null != url) && url.startsWith(PROXY_BASE);
195 * @return True iff the url is a local file.
197 public static boolean isFileUrl(String url) {
198 return (null != url) && (url.startsWith(FILE_BASE) &&
199 !url.startsWith(ASSET_BASE) &&
200 !url.startsWith(PROXY_BASE));
204 * @return True iff the url is an about: url.
206 public static boolean isAboutUrl(String url) {
207 return (null != url) && url.startsWith("about:");
211 * @return True iff the url is a data: url.
213 public static boolean isDataUrl(String url) {
214 return (null != url) && url.startsWith("data:");
218 * @return True iff the url is a javascript: url.
220 public static boolean isJavaScriptUrl(String url) {
221 return (null != url) && url.startsWith("javascript:");
225 * @return True iff the url is an http: url.
227 public static boolean isHttpUrl(String url) {
228 return (null != url) &&
229 (url.length() > 6) &&
230 url.substring(0, 7).equalsIgnoreCase("http://");
234 * @return True iff the url is an https: url.
236 public static boolean isHttpsUrl(String url) {
237 return (null != url) &&
238 (url.length() > 7) &&
239 url.substring(0, 8).equalsIgnoreCase("https://");
243 * @return True iff the url is a network url.
245 public static boolean isNetworkUrl(String url) {
246 if (url == null || url.length() == 0) {
249 return isHttpUrl(url) || isHttpsUrl(url);
253 * @return True iff the url is a content: url.
255 public static boolean isContentUrl(String url) {
256 return (null != url) && url.startsWith("content:");
260 * @return True iff the url is valid.
262 public static boolean isValidUrl(String url) {
263 if (url == null || url.length() == 0) {
267 return (isAssetUrl(url) ||
268 isResourceUrl(url) ||
273 isJavaScriptUrl(url) ||
278 * Strips the url of the anchor.
280 public static String stripAnchor(String url) {
281 int anchorIndex = url.indexOf('#');
282 if (anchorIndex != -1) {
283 return url.substring(0, anchorIndex);
289 * Guesses canonical filename that a download would have, using
290 * the URL and contentDisposition. File extension, if not defined,
291 * is added based on the mimetype
292 * @param url Url to the content
293 * @param contentDisposition Content-Disposition HTTP header or null
294 * @param mimeType Mime-type of the content or null
296 * @return suggested filename
298 public static final String guessFileName(
300 String contentDisposition,
302 String filename = null;
303 String extension = null;
305 // If we couldn't do anything with the hint, move toward the content disposition
306 if (filename == null && contentDisposition != null) {
307 filename = parseContentDisposition(contentDisposition);
308 if (filename != null) {
309 int index = filename.lastIndexOf('/') + 1;
311 filename = filename.substring(index);
316 // If all the other http-related approaches failed, use the plain uri
317 if (filename == null) {
318 String decodedUrl = Uri.decode(url);
319 if (decodedUrl != null) {
320 int queryIndex = decodedUrl.indexOf('?');
321 // If there is a query string strip it, same as desktop browsers
322 if (queryIndex > 0) {
323 decodedUrl = decodedUrl.substring(0, queryIndex);
325 if (!decodedUrl.endsWith("/")) {
326 int index = decodedUrl.lastIndexOf('/') + 1;
328 filename = decodedUrl.substring(index);
334 // Finally, if couldn't get filename from URI, get a generic filename
335 if (filename == null) {
336 filename = "downloadfile";
339 // Split filename between base and extension
340 // Add an extension if filename does not have one
341 int dotIndex = filename.indexOf('.');
343 if (mimeType != null) {
344 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
345 if (extension != null) {
346 extension = "." + extension;
349 if (extension == null) {
350 if (mimeType != null && mimeType.toLowerCase().startsWith("text/")) {
351 if (mimeType.equalsIgnoreCase("text/html")) {
361 if (mimeType != null) {
362 // Compare the last segment of the extension against the mime type.
363 // If there's a mismatch, discard the entire extension.
364 int lastDotIndex = filename.lastIndexOf('.');
365 String typeFromExt = MimeTypeMap.getSingleton().getMimeTypeFromExtension(
366 filename.substring(lastDotIndex + 1));
367 if (typeFromExt != null && !typeFromExt.equalsIgnoreCase(mimeType)) {
368 extension = MimeTypeMap.getSingleton().getExtensionFromMimeType(mimeType);
369 if (extension != null) {
370 extension = "." + extension;
374 if (extension == null) {
375 extension = filename.substring(dotIndex);
377 filename = filename.substring(0, dotIndex);
380 return filename + extension;
383 /** Regex used to parse content-disposition headers */
384 private static final Pattern CONTENT_DISPOSITION_PATTERN =
385 Pattern.compile("attachment;\\s*filename\\s*=\\s*(\"?)([^\"]*)\\1\\s*$",
386 Pattern.CASE_INSENSITIVE);
389 * Parse the Content-Disposition HTTP Header. The format of the header
390 * is defined here: http://www.w3.org/Protocols/rfc2616/rfc2616-sec19.html
391 * This header provides a filename for content that is going to be
392 * downloaded to the file system. We only support the attachment type.
393 * Note that RFC 2616 specifies the filename value must be double-quoted.
394 * Unfortunately some servers do not quote the value so to maintain
395 * consistent behaviour with other browsers, we allow unquoted values too.
397 static String parseContentDisposition(String contentDisposition) {
399 Matcher m = CONTENT_DISPOSITION_PATTERN.matcher(contentDisposition);
403 } catch (IllegalStateException ex) {
404 // This function is defined as returning null when it can't parse the header