OSDN Git Service

005ca3c1c66082b8daa690b739f59efd0d6de515
[pukiwiki/pukiwiki_sandbox.git] / spam / SpamPickupTest.php
1 <?php
2 // $Id: SpamPickupTest.php,v 1.7 2008/12/31 15:44:14 henoheno Exp $
3 // Copyright (C) 2007 heno
4 //
5 // Design test case for spam.php (called from runner.php)
6
7 if (! defined('SPAM_INI_FILE')) define('SPAM_INI_FILE', 'spam.ini.php');
8
9 require_once('spam_pickup.php');
10 require_once('PHPUnit/PHPUnit.php');
11
12 class SpamPickupTest extends PHPUnit_TestCase
13 {
14         function setup_string_null()
15         {
16                 return array(
17                         '[NULL]'        => NULL,
18                         '[TRUE]'        => TRUE,
19                         '[FALSE]'       => FALSE,
20                         '[array(foobar)]' => array('foobar'),
21                         '[]'            => '',
22                         '[0]'           => 0,
23                         '[1]'           => 1
24                 );
25         }
26
27         function testFunc_scheme_normalize()
28         {
29                 // Null
30                 foreach($this->setup_string_null() as $key => $value){
31                         $this->assertEquals('', scheme_normalize($value), $key);
32                 }
33
34                 // CASE
35                 $this->assertEquals('http', scheme_normalize('HTTP'));
36
37                 // Aliases
38                 $this->assertEquals('pop3',  scheme_normalize('pop'));
39                 $this->assertEquals('nntp',  scheme_normalize('news'));
40                 $this->assertEquals('imap',  scheme_normalize('imap4'));
41                 $this->assertEquals('nntps', scheme_normalize('snntp'));
42                 $this->assertEquals('nntps', scheme_normalize('snews'));
43                 $this->assertEquals('pop3s', scheme_normalize('spop3'));
44                 $this->assertEquals('pop3s', scheme_normalize('pops'));
45                 
46                 // Abbrevs
47                 $this->assertEquals('http',  scheme_normalize('ttp'));
48                 $this->assertEquals('https', scheme_normalize('ttps'));
49
50                 // Abbrevs considererd harmless
51                 $this->assertEquals('', scheme_normalize('ttp',  FALSE));
52                 $this->assertEquals('', scheme_normalize('ttps', FALSE));
53         }
54
55         function testFunc_host_normalize()
56         {
57                 // Invalid: Null
58                 foreach($this->setup_string_null() as $key => $value){
59                         $this->assertEquals('', host_normalize($value), $key);
60                 }
61
62                 // Hostname is case-insensitive
63                 $this->assertEquals('example.org', host_normalize('ExAMPle.ORG'));
64
65                 // Cut 'www' with traditional ASCII-based FQDN (destructive)
66                 $this->assertEquals('example.org', host_normalize('WWW.example.org'));
67
68                 // Don't cut 'www' with Non-ASCII-based string such as IDN
69                 $this->assertEquals("www.example.org\0foobar",
70                          host_normalize("WWW.example.org\0foobar"));
71         }
72
73         function testFunc_port_normalize()
74         {
75                 $scheme = 'dont_care';
76
77                 // 1st argument: Null
78                 $this->assertEquals('', port_normalize(NULL, $scheme));
79                 $this->assertEquals('', port_normalize(TRUE, $scheme));
80                 $this->assertEquals('', port_normalize(FALSE, $scheme));
81                 $this->assertEquals('', port_normalize(array('foobar'), $scheme));
82                 $this->assertEquals('', port_normalize('',   $scheme));
83
84                 // 1st argument: Known port
85                 $this->assertEquals('',    port_normalize(   -1, $scheme));
86                 $this->assertEquals(0,     port_normalize(    0, $scheme));
87                 $this->assertEquals(1,     port_normalize(    1, $scheme));
88                 $this->assertEquals('',    port_normalize(   21, 'ftp'));
89                 $this->assertEquals('',    port_normalize(   22, 'ssh'));
90                 $this->assertEquals('',    port_normalize(   23, 'telnet'));
91                 $this->assertEquals('',    port_normalize(   25, 'smtp'));
92                 $this->assertEquals('',    port_normalize(   69, 'tftp'));
93                 $this->assertEquals('',    port_normalize(   70, 'gopher'));
94                 $this->assertEquals('',    port_normalize(   79, 'finger'));
95                 $this->assertEquals('',    port_normalize(   80, 'http'));
96                 $this->assertEquals('',    port_normalize(  110, 'pop3'));
97                 $this->assertEquals('',    port_normalize(  115, 'sftp'));
98                 $this->assertEquals('',    port_normalize(  119, 'nntp'));
99                 $this->assertEquals('',    port_normalize(  143, 'imap'));
100                 $this->assertEquals('',    port_normalize(  194, 'irc'));
101                 $this->assertEquals('',    port_normalize(  210, 'wais'));
102                 $this->assertEquals('',    port_normalize(  443, 'https'));
103                 $this->assertEquals('',    port_normalize(  563, 'nntps'));
104                 $this->assertEquals('',    port_normalize(  873, 'rsync'));
105                 $this->assertEquals('',    port_normalize(  990, 'ftps'));
106                 $this->assertEquals('',    port_normalize(  992, 'telnets'));
107                 $this->assertEquals('',    port_normalize(  993, 'imaps'));
108                 $this->assertEquals('',    port_normalize(  994, 'ircs'));
109                 $this->assertEquals('',    port_normalize(  995, 'pop3s'));
110                 $this->assertEquals('',    port_normalize( 3306, 'mysql'));
111                 $this->assertEquals(8080,  port_normalize( 8080, $scheme));
112                 $this->assertEquals(65535, port_normalize(65535, $scheme));
113                 $this->assertEquals(65536, port_normalize(65536, $scheme)); // Seems not invalid in RFC
114
115                 // 1st argument: Invalid type
116                 $this->assertEquals('1x',  port_normalize('001', $scheme) . 'x');
117                 $this->assertEquals('',    port_normalize('+0',  $scheme));
118                 $this->assertEquals('',    port_normalize('0-1', $scheme)); // intval() says '0'
119                 $this->assertEquals('',    port_normalize('str', $scheme));
120
121                 // 2nd and 3rd argument: Null
122                 $this->assertEquals(80,    port_normalize(80, NULL,  TRUE));
123                 $this->assertEquals(80,    port_normalize(80, TRUE,  TRUE));
124                 $this->assertEquals(80,    port_normalize(80, FALSE, TRUE));
125                 $this->assertEquals(80,    port_normalize(80, array('foobar'), TRUE));
126                 $this->assertEquals(80,    port_normalize(80, '', TRUE));
127
128                 // 2nd and 3rd argument: Do $scheme_normalize
129                 $this->assertEquals('',    port_normalize(80,  'TTP',  TRUE));
130                 $this->assertEquals('',    port_normalize(110, 'POP',  TRUE));
131                 $this->assertEquals(80,    port_normalize(80,  'HTTP', FALSE));
132         }
133
134         function testFunc_path_normalize()
135         {
136                 // 1st argument: Null
137                 foreach($this->setup_string_null() as $key => $value){
138                         $this->assertEquals('/', path_normalize($value), $key);
139                 }
140
141                 // 1st argument: CASE sensitive
142                 $this->assertEquals('/ExAMPle', path_normalize('ExAMPle'));
143                 $this->assertEquals('/#hoge',   path_normalize('#hoge'));
144                 $this->assertEquals('/a/b/c/d', path_normalize('/a/b/./c////./d'));
145                 $this->assertEquals('/b/',      path_normalize('/a/../../../b/'));
146
147                 // 2nd argument
148                 $this->assertEquals('\\b\\c\\d\\', path_normalize('\\a\\..\\b\\.\\c\\\\.\\d\\', '\\'));
149                 $this->assertEquals('str1str3str', path_normalize('str1strstr2str..str3str', 'str'));
150                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', TRUE));
151                 $this->assertEquals('/do/../nothing/', path_normalize('/do/../nothing/', array('a')));
152                 $this->assertEquals('',            path_normalize(array('a'), array('b')));
153         }
154
155         function testFunc_query_normalize()
156         {
157                 // 1st argument: Null
158                 foreach($this->setup_string_null() as $key => $value){
159                         $this->assertEquals('', query_normalize($value), $key);
160                 }
161
162                 $this->assertEquals('a=0dd&b&c&d&f=d', query_normalize('&&&&f=d&b&d&c&a=0dd'));
163                 $this->assertEquals('eg=foobar',       query_normalize('nothing==&eg=dummy&eg=padding&eg=foobar'));
164         }
165
166         function testFunc_file_normalize()
167         {
168                 // 1st argument: Null
169                 foreach($this->setup_string_null() as $key => $value){
170                         $this->assertEquals('', file_normalize($value), $key);
171                 }
172
173                 // 1st argument: Cut DirectoryIndexes (Destructive)
174                 foreach(array(
175                         'default.htm',
176                         'default.html',
177                         'default.asp',
178                         'default.aspx',
179 \r                       'index',
180                         'index.htm',
181                         'index.html',
182                         'index.shtml',
183                         'index.jsp',
184                         'index.php',
185                         'index.php',
186                         'index.php3',
187                         'index.php4',
188                         'index.pl',
189                         'index.py',
190                         'index.rb',
191                         'index.cgi',
192
193                         // Apache 2.0.59 default 'index.html' variants
194                         'index.html.ca',
195                         'index.html.cz.iso8859-2',
196                         'index.html.de',
197                         'index.html.dk',
198                         'index.html.ee',
199                         'index.html.el',
200                         'index.html.en',
201                         'index.html.es',
202                         'index.html.et',
203                         'index.html.fr',
204                         'index.html.he.iso8859-8',
205                         'index.html.hr.iso8859-2',
206                         'index.html.it',
207                         'index.html.ja.iso2022-jp',
208                         'index.html.ko.euc-kr',
209                         'index.html.lb.utf8',
210                         'index.html.nl',
211                         'index.html.nn',
212                         'index.html.no',
213                         'index.html.po.iso8859-2',
214                         'index.html.pt',
215                         'index.html.pt-br',
216                         'index.html.ru.cp866',
217                         'index.html.ru.cp-1251',
218                         'index.html.ru.iso-ru',
219                         'index.html.ru.koi8-r',
220                         'index.html.ru.utf8',
221                         'index.html.sv',
222                         'index.html.var',       // default
223                         'index.html.zh-cn.gb2312',
224                         'index.html.zh-tw.big5',
225
226                         'index.html.po.iso8859-2',
227                         'index.html.zh-tw.big5',
228
229                         'index.ja.en.de.html',
230                 
231                         // .gz
232                         'index.html.ca.gz',
233                         'index.html.en.ja.ca.z',
234                 ) as $arg){
235                         $this->assertEquals('', file_normalize($arg));
236                 }
237
238                 //$this->assertEquals('foo/', file_normalize('foo/index.html'));
239
240                 //$this->assertEquals('ExAMPle', file_normalize('ExAMPle'));
241                 //$this->assertEquals('exe.exe', file_normalize('exe.exe'));
242                 //$this->assertEquals('sample.html', file_normalize('sample.html.en'));
243                 //$this->assertEquals('sample.html', file_normalize('sample.html.pt-br'));
244                 //$this->assertEquals('sample.html', file_normalize('sample.html.po.iso8859-2'));
245                 //$this->assertEquals('sample.html', file_normalize('sample.html.zh-tw.big5'));
246         }
247
248         function testFunc_uri_pickup()
249         {
250                 // 1st argument: Null
251                 foreach($this->setup_string_null() as $key => $value){
252                         $this->assertEquals(0, count(uri_pickup($value)), $key);
253                 }
254
255                 // 1st argument: Some
256                 $test_string = <<<EOF
257                         TTP://wwW.Example.Org#TTP_and_www
258                         https://nasty.example.org:443/foo/xxx#port443/slash
259                         sftp://foobar.example.org:80/dfsdfs#ftp_bat_port80
260                         ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
261                         http://192.168.1.4:443#IPv4
262 EOF;
263                 $results = uri_pickup_normalize(uri_pickup($test_string));
264                 $this->assertEquals(5, count($results));
265
266                 // ttp://wwW.Example.Org:80#TTP_and_www
267                 $this->assertEquals('http',           $results[0]['scheme']);
268                 $this->assertEquals('',               $results[0]['userinfo']);
269                 $this->assertEquals('example.org',    $results[0]['host']);
270                 $this->assertEquals('',               $results[0]['port']);
271                 $this->assertEquals('/',              $results[0]['path']);
272                 $this->assertEquals('',               $results[0]['file']);
273                 $this->assertEquals('',               $results[0]['query']);
274                 $this->assertEquals('ttp_and_www',    $results[0]['fragment']);
275
276                 // https://nasty.example.org:443/foo/xxx#port443/slash
277                 $this->assertEquals('https',          $results[1]['scheme']);
278                 $this->assertEquals('',               $results[1]['userinfo']);
279                 $this->assertEquals('nasty.example.org', $results[1]['host']);
280                 $this->assertEquals('',               $results[1]['port']);
281                 $this->assertEquals('/foo/',          $results[1]['path']);
282                 $this->assertEquals('xxx',            $results[1]['file']);
283                 $this->assertEquals('',               $results[1]['query']);
284                 $this->assertEquals('port443',        $results[1]['fragment']);
285
286                 // sftp://foobar.example.org:80/dfsdfs#sftp_bat_port80
287                 $this->assertEquals('sftp',           $results[2]['scheme']);
288                 $this->assertEquals('',               $results[2]['userinfo']);
289                 $this->assertEquals('foobar.example.org', $results[2]['host']);
290                 $this->assertEquals('80',             $results[2]['port']);
291                 $this->assertEquals('/',              $results[2]['path']);
292                 $this->assertEquals('dfsdfs',         $results[2]['file']);
293                 $this->assertEquals('',               $results[2]['query']);
294                 $this->assertEquals('ftp_bat_port80', $results[2]['fragment']);
295
296                 // ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm
297                 $this->assertEquals('ftp',            $results[3]['scheme']);
298                 $this->assertEquals('cnn.example.com&story=breaking_news', $results[3]['userinfo']);
299                 $this->assertEquals('10.0.0.1',       $results[3]['host']);
300                 $this->assertEquals('',               $results[3]['port']);
301                 $this->assertEquals('/',              $results[3]['path']);
302                 $this->assertEquals('top_story.htm',  $results[3]['file']);
303                 $this->assertEquals('',               $results[3]['query']);
304                 $this->assertEquals('',               $results[3]['fragment']);
305
306
307                 // Specific tests ----
308
309                 // Divider: Back-slash
310                 $test_string = ' http:\\backslash.org\fobar.html ';
311                 $results = uri_pickup_normalize(uri_pickup($test_string));
312                 $this->assertEquals('backslash.org',  $results[0]['host']);
313
314                 // Divider: percent-encoded
315                 //$test_string = ' http%3A%2F%5Cpercent-encoded.org%5Cfobar.html ';
316                 //$results = uri_pickup_normalize(uri_pickup($test_string));
317                 //$this->assertEquals('percent-encoded.org',  $results[0]['host']);
318
319                 // Host: Without path
320                 $test_string = ' http://nopathstring.com ';
321                 $results = uri_pickup($test_string);
322                 $this->assertEquals('', $results[0]['path']);
323                 $this->assertEquals('', $results[0]['file']);
324                 $results[0]['path'] = '/';
325                 $this->assertEquals('', $results[0]['file'], '[Seems referense trouble]');
326                 //
327                 $results = uri_pickup($test_string);
328                 $results = uri_pickup_normalize($results);
329                 $this->assertEquals('/',$results[0]['path']);
330                 $this->assertEquals('', $results[0]['file']);
331
332                 // Host: Underscore
333                 $test_string = ' http://under_score.org/fobar.html ';
334                 $results = uri_pickup_normalize(uri_pickup($test_string));
335                 $this->assertEquals('under_score.org',$results[0]['host']);     // Not 'under'
336
337                 // Host: IPv4
338                 $test_string = ' http://192.168.0.1/fobar.html ';
339                 $results = uri_pickup_normalize(uri_pickup($test_string));
340                 $this->assertEquals('192.168.0.1',    $results[0]['host']);
341
342                 // Host: Starts
343                 $test_string = ' http://_sss/foo.html ';
344                 $results = uri_pickup_normalize(uri_pickup($test_string));
345                 $this->assertEquals('_sss',           $results[0]['host']);
346                 $this->assertEquals('foo.html',       $results[0]['file']);
347
348                 // Host: Ends
349                 $test_string = ' http://sss_/foo.html ';
350                 $results = uri_pickup_normalize(uri_pickup($test_string));
351                 $this->assertEquals('sss_',           $results[0]['host']);
352                 $this->assertEquals('foo.html',       $results[0]['file']);
353
354
355                 // Specific tests ---- Fails
356
357                 // Divider: Colon only (Too sensitive to capture)
358                 $test_string = ' http:colon.org ';
359                 $results = uri_pickup_normalize(uri_pickup($test_string));
360                 $this->assertEquals(0, count($results));
361
362                 // Host: Too short
363                 $test_string = ' http://s/foo.html http://ss/foo.html ';
364                 $results = uri_pickup_normalize(uri_pickup($test_string));
365                 $this->assertEquals(0, count($results));
366
367                 $test_string = ' http://sss/foo.html ';
368                 $results = uri_pickup_normalize(uri_pickup($test_string));
369                 $this->assertEquals('sss',            $results[0]['host']);
370                 $this->assertEquals('foo.html',       $results[0]['file']);
371
372                 // uri_pickup_normalize_pathtofile()
373                 $test_string = ' http://example.com/path/to/directory-accidentally-not-ended-with-slash ';
374                 $results = uri_pickup_normalize_pathtofile(uri_pickup($test_string));
375                 $this->assertEquals('/path/to/directory-accidentally-not-ended-with-slash',
376                         $results[0]['pathtofile']);
377                 $this->assertEquals(FALSE, isset($results[0]['path']));
378                 $this->assertEquals(FALSE, isset($results[0]['file']));
379         }
380
381         function testFunc_spam_uri_pickup()
382         {
383                 // Divider: percent-encoded
384                 $test_string = ' http://victim.example.org/http%3A%2F%5Cnasty.example.org ';
385                 $results = spam_uri_pickup($test_string);
386                 $this->assertEquals('victim.example.org', $results[0]['host']);
387                 $this->assertEquals('nasty.example.org',  $results[1]['host']);
388
389                 // Domain exposure (site:)
390                 $test_string = ' http://search.example.org/?q=%20site:nasty.example.org ';
391                 $results = spam_uri_pickup($test_string);
392                 $this->assertEquals('nasty.example.org', $results[0]['host']);
393                 $this->assertEquals('search.example.org',  $results[1]['host']);
394                 
395                 // Domain exposure (%20site:)
396                 $test_string = ' http://search2.example.org/?q=%20site:nasty2.example.org ';
397                 $results = spam_uri_pickup($test_string);
398                 $this->assertEquals('nasty2.example.org', $results[0]['host']);
399                 $this->assertEquals('search2.example.org',  $results[1]['host']);
400         }
401 }
402
403 ?>