From: henoheno Date: Thu, 28 Jun 2007 14:52:55 +0000 (+0900) Subject: Change order of function, myspace, live search, etc X-Git-Url: http://git.osdn.net/view?p=pukiwiki%2Fpukiwiki_sandbox.git;a=commitdiff_plain;h=33ed0524c1b702e4ead573c97639287103560ff2 Change order of function, myspace, live search, etc --- diff --git a/spam/spam.php b/spam/spam.php index 27afbda..9298fb5 100644 --- a/spam/spam.php +++ b/spam/spam.php @@ -1,5 +1,5 @@ '(1) or ''(4) + // '[uri'(4) or ']'(1) or '[/uri]'(6) + if (isset($method['area_anchor']) || isset($method['uri_anchor']) || + isset($method['area_bbcode']) || isset($method['uri_bbcode'])) + $length = 1; // Seems not effective + } + + // Removing sequential spaces and too short lines + $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed + + // Remove words (has no '<>[]:') between spaces + $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary); + + return $binary; +} + +// Preprocess: Domain exposure callback (See spam_uri_pickup_preprocess()) // http://victim.example.org/?foo+site:nasty.example.com+bar // => http://nasty.example.com/?refer=victim.example.org // NOTE: 'refer=' is not so good for (at this time). @@ -456,27 +477,6 @@ function _preg_replace_callback_domain_exposure($matches = array()) return $result; } -// Preprocess: Removing uninterest part for URI detection -function spam_uri_removing_hocus_pocus($binary = '', $method = array()) -{ - $length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1) - if (is_array($method)) { - // ''(1) or ''(4) - // '[uri'(4) or ']'(1) or '[/uri]'(6) - if (isset($method['area_anchor']) || isset($method['uri_anchor']) || - isset($method['area_bbcode']) || isset($method['uri_bbcode'])) - $length = 1; // Seems not effective - } - - // Removing sequential spaces and too short lines - $binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed - - // Remove words (has no '<>[]:') between spaces - $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary); - - return $binary; -} - // Preprocess: rawurldecode() and adding space(s) and something // to detect/count some URIs _if possible_ // NOTE: It's maybe danger to var_dump(result). [e.g. 'javascript:'] @@ -527,19 +527,30 @@ function spam_uri_pickup_preprocess($string = '', $method = array()) // Domain exposure (See _preg_replace_callback_domain_exposure()) $string = preg_replace_callback( array( - '#(http)://' . + '#(http)://' . // 1:Scheme + // 2:Host '(' . // Something Google: http://www.google.com/supported_domains '(?:[a-z0-9.]+\.)?google\.[a-z]{2,3}(?:\.[a-z]{2})?' . '|' . // AltaVista + // http://es.altavista.com/web/results?q=site%3Anasty.example.org+foobar '(?:[a-z0-9.]+\.)?altavista.com' . - + '|' . + // Live Search + 'search.live.com' . + '|' . + // MySpace + // http://sads.myspace.com/Modules/Search/Pages/Search.aspx?_snip_&searchString=site:nasty.example.org + // also searchresults.myspace.com + '(?:[a-z0-9.]+\.)?myspace.com' . + '|' . + 'search.orange.co.uk' . ')' . '/' . - '([a-z0-9?=&.%_/\'\\\+-]+)' . // path/?query=foo+bar+ - '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // site:nasty.example.com - //'()' . // Preserve or remove? + '([a-z0-9?=&.%_/\'\\\+-]+)' . // 3:path/?query=foo+bar+ + '\bsite:([a-z0-9.%_-]+\.[a-z0-9.%_-]+)' . // 4:site:nasty.example.com + '()' . // 5:Preserve or remove? '#i', ), '_preg_replace_callback_domain_exposure',