<?php
-// $Id: SpamPickupTest.php,v 1.2 2007/07/02 15:27:20 henoheno Exp $
+// $Id: SpamPickupTest.php,v 1.3 2007/08/20 14:37:23 henoheno Exp $
// Copyright (C) 2007 heno
//
// Design test case for spam.php (called from runner.php)
$results = uri_pickup_normalize(uri_pickup($test_string));
$this->assertEquals('backslash.org', $results[0]['host']);
+ // Divider: percent-encoded
+ //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
+ //$results = uri_pickup_normalize(uri_pickup($test_string));
+ //$this->assertEquals('percent-encoded.org', $results[0]['host']);
+
// Host: Underscore
$test_string = ' http://under_score.org/fobar.html ';
$results = uri_pickup_normalize(uri_pickup($test_string));
$this->assertEquals('foo.html', $results[0]['file']);
}
+ function testFunc_spam_uri_pickup()
+ {
+ // Divider: percent-encoded
+ $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
+ $results = spam_uri_pickup($test_string);
+ $this->assertEquals('victim.example.org', $results[0]['host']);
+ $this->assertEquals('nasty.example.org', $results[1]['host']);
+ }
}
?>
\ No newline at end of file
<?php
-// $Id: spam_pickup.php,v 1.54 2007/08/19 03:12:35 henoheno Exp $
+// $Id: spam_pickup.php,v 1.55 2007/08/20 14:37:23 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
preg_match_all(
// scheme://userinfo@host:port/path/or/pathinfo/maybefile.and?query=string#fragment
// Refer RFC3986 (Regex below is not strict)
- '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
+ '#(\b[a-z][a-z0-9.+-]{1,8}):[/\\\]+' . // 1: Scheme
'(?:' .
'([^\s<>"\'\[\]/\#?@]*)' . // 2: Userinfo (Username)
'@)?' .
// 3: Host
'\[[0-9a-f:.]+\]' . '|' . // IPv6([colon-hex and dot]): RFC2732
'(?:[0-9]{1,3}\.){3}[0-9]{1,3}' . '|' . // IPv4(dot-decimal): 001.22.3.44
- '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
+ '[a-z0-9_-][a-z0-9_.-]+[a-z0-9_-]' . // hostname(FQDN) : foo.example.org
')' .
'(?::([0-9]*))?' . // 4: Port
'((?:/+[^\s<>"\'\[\]/\#]+)*/+)?' . // 5: Directory path or path-info
return $result;
}
-// Preprocess: rawurldecode() and adding space(s) and something
+// Preprocess: minor-rawurldecode() and adding space(s) and something
// to detect/count some URIs _if possible_
// NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:']
// [OK] http://victim.example.org/?site:nasty.example.org
{
if (! is_string($string)) return '';
- $string = spam_uri_removing_hocus_pocus(rawurldecode($string), $method);
+ // rawurldecode(), just to catch encoded 'http://path/to/file', not to change '%20' to ' '
+ $string = strtr(
+ $string,
+ array(
+ '%3A' => ':',
+ '%3a' => ':',
+ '%2F' => '/',
+ '%2f' => '/',
+ '%5C' => '\\',
+ '%5c' => '\\',
+ )
+ );
+
+ $string = spam_uri_removing_hocus_pocus($string, $method);
//var_dump(htmlspecialchars($string));
// Domain exposure (simple)