-// URI pickup
-
-// Return an array of URIs in the $string
-// [OK] http://nasty.example.org#nasty_string
-// [OK] http://nasty.example.org:80/foo/xxx#nasty_string/bar
-// [OK] ftp://nasty.example.org:80/dfsdfs
-// [OK] ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm (from RFC3986)
-function uri_pickup($string = '', $normalize = TRUE,
- $preserve_rawuri = FALSE, $preserve_chunk = TRUE)
-{
- // Not available for: IDN(ignored)
- $array = array();
- preg_match_all(
- // scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
- // Refer RFC3986 (Regex below is not strict)
- '#(\b[a-z][a-z0-9.+-]{1,8})://' . // 1: Scheme
- '(?:' .
- '([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
- '@)?' .
- '(' .
- // 3: Host
- '\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
- '(?:[0-9]{1-3}\.){3}[0-9]{1-3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
- '[a-z0-9.-]+' . // hostname(FQDN) : foo.example.org
- ')' .
- '(?::([0-9]*))?' . // 4: Port
- '((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
- '([^\s<>"\'\[\]\#?]+)?' . // 6: File?
- '(?:\?([^\s<>"\'\[\]\#]+))?' . // 7: Query string
- '(?:\#([a-z0-9._~%!$&\'()*+,;=:@-]*))?' . // 8: Fragment
- '#i',
- $string, $array, PREG_SET_ORDER | PREG_OFFSET_CAPTURE
- );
-
- // Shrink $array
- static $parts = array(
- 1 => 'scheme', 2 => 'userinfo', 3 => 'host', 4 => 'port',
- 5 => 'path', 6 => 'file', 7 => 'query', 8 => 'fragment'
- );
- $default = array('');
- foreach(array_keys($array) as $uri) {
- $_uri = & $array[$uri];
- array_rename_keys($_uri, $parts, TRUE, $default);
-
- $offset = $_uri['scheme'][1]; // Scheme's offset
- foreach(array_keys($_uri) as $part) {
- // Remove offsets for each part
- $_uri[$part] = & $_uri[$part][0];
- }
-
- if ($normalize) {
- $_uri['scheme'] = scheme_normalize($_uri['scheme']);
- if ($_uri['scheme'] === '') {
- unset($array[$uri]);
- continue;
- }
- $_uri['host'] = strtolower($_uri['host']);
- $_uri['port'] = port_normalize($_uri['port'], $_uri['scheme'], FALSE);
- $_uri['path'] = path_normalize($_uri['path']);
- if ($preserve_rawuri) $_uri['rawuri'] = & $_uri[0];
-
- // DEBUG
- //$_uri['uri'] = uri_array_implode($_uri);
- } else {
- $_uri['uri'] = & $_uri[0]; // Raw
- }
- unset($_uri[0]); // Matched string itself
- if (! $preserve_chunk) {
- unset(
- $_uri['scheme'],
- $_uri['userinfo'],
- $_uri['host'],
- $_uri['port'],
- $_uri['path'],
- $_uri['file'],
- $_uri['query'],
- $_uri['fragment']
- );
- }
-
- // Area offset for area_measure()
- $_uri['area']['offset'] = $offset;
- }
-
- return $array;
-}