2 // $Id: SpamPickupTest.php,v 1.5 2008/12/27 11:50:55 henoheno Exp $
3 // Copyright (C) 2007 heno
5 // Design test case for spam.php (called from runner.php)
7 if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
9 require_once('spam_pickup.php');
10 require_once('PHPUnit/PHPUnit.php');
12 class SpamPickupTest extends PHPUnit_TestCase
14 function setup_string_null()
20 '[array(foobar)]' => array('foobar'),
27 function testFunc_scheme_normalize()
30 foreach($this->setup_string_null() as $key => $value){
31 $this->assertEquals('', scheme_normalize($value), $key);
35 $this->assertEquals('http', scheme_normalize('HTTP'));
38 $this->assertEquals('pop3', scheme_normalize('pop'));
39 $this->assertEquals('nntp', scheme_normalize('news'));
40 $this->assertEquals('imap', scheme_normalize('imap4'));
41 $this->assertEquals('nntps', scheme_normalize('snntp'));
42 $this->assertEquals('nntps', scheme_normalize('snews'));
43 $this->assertEquals('pop3s', scheme_normalize('spop3'));
44 $this->assertEquals('pop3s', scheme_normalize('pops'));
47 $this->assertEquals('http', scheme_normalize('ttp'));
48 $this->assertEquals('https', scheme_normalize('ttps'));
50 // Abbrevs considererd harmless
51 $this->assertEquals('', scheme_normalize('ttp', FALSE));
52 $this->assertEquals('', scheme_normalize('ttps', FALSE));
55 function testFunc_host_normalize()
58 foreach($this->setup_string_null() as $key => $value){
59 $this->assertEquals('', host_normalize($value), $key);
62 // Hostname is case-insensitive
63 $this->assertEquals('example.org', host_normalize('ExAMPle.ORG'));
65 // Cut 'www' with traditional ASCII-based FQDN (destructive)
66 $this->assertEquals('example.org', host_normalize('WWW.example.org'));
68 // Don't cut 'www' with Non-ASCII-based string such as IDN
69 $this->assertEquals("www.example.org\0foobar",
70 host_normalize("WWW.example.org\0foobar"));
73 function testFunc_port_normalize()
75 $scheme = 'dont_care';
78 $this->assertEquals('', port_normalize(NULL, $scheme));
79 $this->assertEquals('', port_normalize(TRUE, $scheme));
80 $this->assertEquals('', port_normalize(FALSE, $scheme));
81 $this->assertEquals('', port_normalize(array('foobar'), $scheme));
82 $this->assertEquals('', port_normalize('', $scheme));
84 // 1st argument: Known port
85 $this->assertEquals('', port_normalize( -1, $scheme));
86 $this->assertEquals(0, port_normalize( 0, $scheme));
87 $this->assertEquals(1, port_normalize( 1, $scheme));
88 $this->assertEquals('', port_normalize( 21, 'ftp'));
89 $this->assertEquals('', port_normalize( 22, 'ssh'));
90 $this->assertEquals('', port_normalize( 23, 'telnet'));
91 $this->assertEquals('', port_normalize( 25, 'smtp'));
92 $this->assertEquals('', port_normalize( 69, 'tftp'));
93 $this->assertEquals('', port_normalize( 70, 'gopher'));
94 $this->assertEquals('', port_normalize( 79, 'finger'));
95 $this->assertEquals('', port_normalize( 80, 'http'));
96 $this->assertEquals('', port_normalize( 110, 'pop3'));
97 $this->assertEquals('', port_normalize( 115, 'sftp'));
98 $this->assertEquals('', port_normalize( 119, 'nntp'));
99 $this->assertEquals('', port_normalize( 143, 'imap'));
100 $this->assertEquals('', port_normalize( 194, 'irc'));
101 $this->assertEquals('', port_normalize( 210, 'wais'));
102 $this->assertEquals('', port_normalize( 443, 'https'));
103 $this->assertEquals('', port_normalize( 563, 'nntps'));
104 $this->assertEquals('', port_normalize( 873, 'rsync'));
105 $this->assertEquals('', port_normalize( 990, 'ftps'));
106 $this->assertEquals('', port_normalize( 992, 'telnets'));
107 $this->assertEquals('', port_normalize( 993, 'imaps'));
108 $this->assertEquals('', port_normalize( 994, 'ircs'));
109 $this->assertEquals('', port_normalize( 995, 'pop3s'));
110 $this->assertEquals('', port_normalize( 3306, 'mysql'));
111 $this->assertEquals(8080, port_normalize( 8080, $scheme));
112 $this->assertEquals(65535, port_normalize(65535, $scheme));
113 $this->assertEquals(65536, port_normalize(65536, $scheme)); // Seems not invalid in RFC
115 // 1st argument: Invalid type
116 $this->assertEquals('1x', port_normalize('001', $scheme) . 'x');
117 $this->assertEquals('', port_normalize('+0', $scheme));
118 $this->assertEquals('', port_normalize('0-1', $scheme)); // intval() says '0'
119 $this->assertEquals('', port_normalize('str', $scheme));
121 // 2nd and 3rd argument: Null
122 $this->assertEquals(80, port_normalize(80, NULL, TRUE));
123 $this->assertEquals(80, port_normalize(80, TRUE, TRUE));
124 $this->assertEquals(80, port_normalize(80, FALSE, TRUE));
125 $this->assertEquals(80, port_normalize(80, array('foobar'), TRUE));
126 $this->assertEquals(80, port_normalize(80, '', TRUE));
128 // 2nd and 3rd argument: Do $scheme_normalize
129 $this->assertEquals('', port_normalize(80, 'TTP', TRUE));
130 $this->assertEquals('', port_normalize(110, 'POP', TRUE));
131 $this->assertEquals(80, port_normalize(80, 'HTTP', FALSE));
134 function testFunc_path_normalize()
136 // 1st argument: Null
137 foreach($this->setup_string_null() as $key => $value){
138 $this->assertEquals('/', path_normalize($value), $key);
141 // 1st argument: CASE sensitive
142 $this->assertEquals('/ExAMPle', path_normalize('ExAMPle'));
143 $this->assertEquals('/#hoge', path_normalize('#hoge'));
144 $this->assertEquals('/a/b/c/d', path_normalize('/a/b/./c////./d'));
145 $this->assertEquals('/b/', path_normalize('/a/../../../b/'));
148 $this->assertEquals('\\b\\c\\d\\', path_normalize('\\a\\..\\b\\.\\c\\\\.\\d\\', '\\'));
149 $this->assertEquals('str1str3str', path_normalize('str1strstr2str..str3str', 'str'));
150 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', TRUE));
151 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', array('a')));
152 $this->assertEquals('', path_normalize(array('a'), array('b')));
155 function testFunc_query_normalize()
157 // 1st argument: Null
158 foreach($this->setup_string_null() as $key => $value){
159 $this->assertEquals('', query_normalize($value), $key);
162 $this->assertEquals('a=0dd&b&c&d&f=d', query_normalize('&&&&f=d&b&d&c&a=0dd'));
163 $this->assertEquals('eg=foobar', query_normalize('nothing==&eg=dummy&eg=padding&eg=foobar'));
166 function testFunc_file_normalize()
168 // 1st argument: Null
169 foreach($this->setup_string_null() as $key => $value){
170 $this->assertEquals('', file_normalize($value), $key);
173 // 1st argument: Cut DirectoryIndexes (Destructive)
193 // Apache 2.0.59 default 'index.html' variants
195 'index.html.cz.iso8859-2',
204 'index.html.he.iso8859-8',
205 'index.html.hr.iso8859-2',
207 'index.html.ja.iso2022-jp',
208 'index.html.ko.euc-kr',
209 'index.html.lb.utf8',
213 'index.html.po.iso8859-2',
216 'index.html.ru.cp866',
217 'index.html.ru.cp-1251',
218 'index.html.ru.iso-ru',
219 'index.html.ru.koi8-r',
220 'index.html.ru.utf8',
222 'index.html.var', // default
223 'index.html.zh-cn.gb2312',
224 'index.html.zh-tw.big5',
226 'index.html.po.iso8859-2',
227 'index.html.zh-tw.big5',
229 'index.ja.en.de.html',
233 'index.html.en.ja.ca.z',
235 $this->assertEquals('', file_normalize($arg));
238 //$this->assertEquals('foo/', file_normalize('foo/index.html'));
240 //$this->assertEquals('ExAMPle', file_normalize('ExAMPle'));
241 //$this->assertEquals('exe.exe', file_normalize('exe.exe'));
242 //$this->assertEquals('sample.html', file_normalize('sample.html.en'));
243 //$this->assertEquals('sample.html', file_normalize('sample.html.pt-br'));
244 //$this->assertEquals('sample.html', file_normalize('sample.html.po.iso8859-2'));
245 //$this->assertEquals('sample.html', file_normalize('sample.html.zh-tw.big5'));
248 function testFunc_uri_pickup()
250 // 1st argument: Null
251 foreach($this->setup_string_null() as $key => $value){
252 $this->assertEquals(0, count(uri_pickup($value)), $key);
255 // 1st argument: Some
256 $test_string = <<<EOF
257 TTP://wwW.Example.Org#TTP_and_www
258 https://nasty.example.org:443/foo/xxx#port443/slash
259 sftp://foobar.example.org:80/dfsdfs#ftp_bat_port80
260 ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
261 http://192.168.1.4:443#IPv4
263 $results = uri_pickup_normalize(uri_pickup($test_string));
264 $this->assertEquals(5, count($results));
266 // ttp://wwW.Example.Org:80#TTP_and_www
267 $this->assertEquals('http', $results[0]['scheme']);
268 $this->assertEquals('', $results[0]['userinfo']);
269 $this->assertEquals('example.org', $results[0]['host']);
270 $this->assertEquals('', $results[0]['port']);
271 $this->assertEquals('/', $results[0]['path']);
272 $this->assertEquals('', $results[0]['file']);
273 $this->assertEquals('', $results[0]['query']);
274 $this->assertEquals('ttp_and_www', $results[0]['fragment']);
276 // https://nasty.example.org:443/foo/xxx#port443/slash
277 $this->assertEquals('https', $results[1]['scheme']);
278 $this->assertEquals('', $results[1]['userinfo']);
279 $this->assertEquals('nasty.example.org', $results[1]['host']);
280 $this->assertEquals('', $results[1]['port']);
281 $this->assertEquals('/foo/', $results[1]['path']);
282 $this->assertEquals('xxx', $results[1]['file']);
283 $this->assertEquals('', $results[1]['query']);
284 $this->assertEquals('port443', $results[1]['fragment']);
286 // sftp://foobar.example.org:80/dfsdfs#sftp_bat_port80
287 $this->assertEquals('sftp', $results[2]['scheme']);
288 $this->assertEquals('', $results[2]['userinfo']);
289 $this->assertEquals('foobar.example.org', $results[2]['host']);
290 $this->assertEquals('80', $results[2]['port']);
291 $this->assertEquals('/', $results[2]['path']);
292 $this->assertEquals('dfsdfs', $results[2]['file']);
293 $this->assertEquals('', $results[2]['query']);
294 $this->assertEquals('ftp_bat_port80', $results[2]['fragment']);
296 // ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
297 $this->assertEquals('ftp', $results[3]['scheme']);
298 $this->assertEquals('cnn.example.com&story=breaking_news', $results[3]['userinfo']);
299 $this->assertEquals('10.0.0.1', $results[3]['host']);
300 $this->assertEquals('', $results[3]['port']);
301 $this->assertEquals('/', $results[3]['path']);
302 $this->assertEquals('top_story.htm', $results[3]['file']);
303 $this->assertEquals('', $results[3]['query']);
304 $this->assertEquals('', $results[3]['fragment']);
307 // Specific tests ----
309 // Divider: Back-slash
310 $test_string = ' http:\\backslash.org\fobar.html ';
311 $results = uri_pickup_normalize(uri_pickup($test_string));
312 $this->assertEquals('backslash.org', $results[0]['host']);
314 // Divider: percent-encoded
315 //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
316 //$results = uri_pickup_normalize(uri_pickup($test_string));
317 //$this->assertEquals('percent-encoded.org', $results[0]['host']);
320 $test_string = ' http://under_score.org/fobar.html ';
321 $results = uri_pickup_normalize(uri_pickup($test_string));
322 $this->assertEquals('under_score.org',$results[0]['host']); // Not 'under'
325 $test_string = ' http://192.168.0.1/fobar.html ';
326 $results = uri_pickup_normalize(uri_pickup($test_string));
327 $this->assertEquals('192.168.0.1', $results[0]['host']);
330 $test_string = ' http://_sss/foo.html ';
331 $results = uri_pickup_normalize(uri_pickup($test_string));
332 $this->assertEquals('_sss', $results[0]['host']);
333 $this->assertEquals('foo.html', $results[0]['file']);
336 $test_string = ' http://sss_/foo.html ';
337 $results = uri_pickup_normalize(uri_pickup($test_string));
338 $this->assertEquals('sss_', $results[0]['host']);
339 $this->assertEquals('foo.html', $results[0]['file']);
342 // Specific tests ---- Fails
344 // Divider: Colon only (Too sensitive to capture)
345 $test_string = ' http:colon.org ';
346 $results = uri_pickup_normalize(uri_pickup($test_string));
347 $this->assertEquals(0, count($results));
350 $test_string = ' http://s/foo.html http://ss/foo.html ';
351 $results = uri_pickup_normalize(uri_pickup($test_string));
352 $this->assertEquals(0, count($results));
354 $test_string = ' http://sss/foo.html ';
355 $results = uri_pickup_normalize(uri_pickup($test_string));
356 $this->assertEquals('sss', $results[0]['host']);
357 $this->assertEquals('foo.html', $results[0]['file']);
360 function testFunc_spam_uri_pickup()
362 // Divider: percent-encoded
363 $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
364 $results = spam_uri_pickup($test_string);
365 $this->assertEquals('victim.example.org', $results[0]['host']);
366 $this->assertEquals('nasty.example.org', $results[1]['host']);
368 // Domain exposure (site:)
369 $test_string = ' http://search.example.org/?q=%20site:nasty.example.org ';
370 $results = spam_uri_pickup($test_string);
371 $this->assertEquals('nasty.example.org', $results[0]['host']);
372 $this->assertEquals('search.example.org', $results[1]['host']);
374 // Domain exposure (%20site:)
375 $test_string = ' http://search2.example.org/?q=%20site:nasty2.example.org ';
376 $results = spam_uri_pickup($test_string);
377 $this->assertEquals('nasty2.example.org', $results[0]['host']);
378 $this->assertEquals('search2.example.org', $results[1]['host']);