From c446d04947fe94830c3bf72a8866c84f9e83686b Mon Sep 17 00:00:00 2001 From: henoheno Date: Sat, 18 Aug 2007 18:10:58 +0900 Subject: [PATCH] Ignorance of 'quantity'/'non_uniqXXX'/etc checks for 'goodhost' (kindly commented by ats) * get_blocklist(): Added special key 'pre' * check_uri_spam(): Added pre-filter * spam.ini.php: 'goodhost' moved to 'pre' --- spam/spam.ini.php | 55 ++++++++++++++++++--------- spam/spam.php | 109 +++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 121 insertions(+), 43 deletions(-) diff --git a/spam/spam.ini.php b/spam/spam.ini.php index 25ef1bf..465866f 100644 --- a/spam/spam.ini.php +++ b/spam/spam.ini.php @@ -1,5 +1,5 @@ FALSE, +); + +// 'list': Normal list +$blocklist['list'] = array( 'A-1' => TRUE, //'A-2' => TRUE, 'B-1' => TRUE, @@ -49,7 +56,7 @@ $blocklist['list'] = array( ); -// ---- +// -------------------------------------------------- $blocklist['goodhost'] = array( // Sample setting of ignorance list @@ -71,6 +78,7 @@ $blocklist['goodhost'] = array( ); +// -------------------------------------------------- // A: Sample setting of // Existing URI redirection or masking services @@ -313,7 +321,6 @@ $blocklist['A-1'] = array( '*.fnbi.jp', 'forgeturl.com', '*.free.bg', - '*.freecities.com', 'Freeservers.com' => array( // United Online Web Services, Inc. '*.4mg.com', '*.4t.com', @@ -396,7 +403,6 @@ $blocklist['A-1'] = array( '*.zip.io', ), 'funkurl.com', // by Leonard Lyle (len at ballandchain.net) - '*.fw.bz', '*.fx.to', 'fyad.org', 'fype.com', @@ -416,7 +422,6 @@ $blocklist['A-1'] = array( 'goonlink.com', '.gourl.org', '.greatitem.com', - '*.greatnow.com', // by Per Olof Sandholm (peo at peakspace.com) 'gzurl.com', 'url.grillsportverein.de', 'Harudake.net' => array('*.hyu.jp'), @@ -1370,6 +1375,7 @@ $blocklist['A-1'] = array( '*.zwap.to', ); +// -------------------------------------------------- $blocklist['A-2'] = array( @@ -1439,6 +1445,7 @@ $blocklist['A-2'] = array( // http://agoga.com/aboutus.html ); +// -------------------------------------------------- // B: Sample setting of: // Jacked (taken advantage of) and cleaning-less sites @@ -1473,6 +1480,7 @@ $blocklist['B-1'] = array( // 209.63.57.10(www1.0catch.com) by domains at netgears.com, ns *.0catch.com '*.741.com', + '*.freecities.com', '*.freesite.org', '*.freewebpages.org', '*.freewebsitehosting.com', @@ -1481,7 +1489,11 @@ $blocklist['B-1'] = array( // 209.63.57.10(www1.0catch.com) by luke at dcpages.com, ns *.0catch.com '*.freespaceusa.com', '*.usafreespace.com', - + + // 209.63.57.10(www1.0catch.com) by rickybrown at usa.com, ns *.0catch.com + '*.dex1.com', + '*.questh.com', + // 209.63.57.10(www1.0catch.com), ns *.0catch.com '*.00freehost.com', // by David Mccall (superjeeves at yahoo.com) '*.012webpages.com', // by support at 0catch.com @@ -1491,12 +1503,13 @@ $blocklist['B-1'] = array( '*.9cy.com', // by paulw0t at gmail.com '*.angelcities.com', // by cliff at eccentrix.com '*.arcadepages.com', // by admin at site-see.com - '*.dex1.com', // by rickybrown at usa.com '*.e-host.ws', // by dns at jomax.net '*.envy.nu', // by Dave Ellis (dave at larryblackandassoc.com) + '*.fw.bz', // by ben at kuehl.as '*.freewebportal.com', // by mmouneeb at hotmail.com - '*.galaxy99.net', // by admin at bagchi.org '*.g0g.net', // by domains at seem.co.uk + '*.galaxy99.net', // by admin at bagchi.org + '*.greatnow.com', // by peo at peakspace.com '*.hautlynx.com', // by hlewis28 at juno.com '*.ibnsites.com', // by cmrojas at mail.com '*.just-allen.com', // by extremehype at msn.com @@ -2248,6 +2261,7 @@ $blocklist['B-1'] = array( 'xeboards.com', // by Brian Shea (bshea at xeservers.com) '*.xforum.se', 'xfreeforum.com', + '*.xoomwebs.com', '.freeblogs.xp.tl', '*.xphost.org', // by alex alex (alrusnac at hotmail.com) '*.ya.com', // 'geo.ya.com', 'blogs.ya.com', 'humano.ya.com', 'audio.ya.com'... @@ -2264,6 +2278,7 @@ $blocklist['B-1'] = array( ), ); +// -------------------------------------------------- $blocklist['B-2'] = array( @@ -2476,6 +2491,7 @@ $blocklist['B-2'] = array( 'Zope/Python Users Group of Washington, DC' => array('zpugdc.org'), ); +// -------------------------------------------------- $blocklist['C'] = array( @@ -4348,6 +4364,7 @@ $blocklist['C'] = array( '.bestcreola.com', '.crekatierra.com', '.creolafire.com', + '.eflashpoint.com', '.exoticmed.com', '.feelview.com', '.greatexotic.com', @@ -4958,7 +4975,6 @@ $blocklist['C'] = array( '.searchit.com', // 205.237.204.151(reverse.dcomm.com) by domains at inet-traffic.com, ns *.dcomm.com // http://www.trendmicro.com/vinfo/grayware/ve_GraywareDetails.asp?GNAME=ADW_SOFTOMATE.A // ... - '.gameroom.com', // 72.32.22.210 by julieisbusy at yahoo.com, listed at inet-traffic.com and freehomepages.com ), @@ -5000,8 +5016,7 @@ $blocklist['C'] = array( '.constitutionpartyofwa.org', // "UcoZ WEB-SERVICES" '.covertarena.co.uk', // by Wayne Huxtable '.d999.info', // by Peter Vayner (peter.vayner at inbox.ru) - '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc. - //'.wow-gold.dinmo.cn', // 125.65.76.59, pl + '.dinmo.cn', // 218.30.96.149 by dinso at 163.com, seo etc. //'.wow-gold.dinmo.cn', // 125.65.76.59, pl '.dinmoseo.com', // 210.51.168.102(winp2-web-g02.xinnetdns.com) by jianmin911 at 126.com, NS *.xinnetdns.com, seo '.dlekei.info', // by Maxima Bucaro (webmaster at tts2f.info) '.dollar4u.info', // by Carla (Carla.J.Merritt at mytrashmail.com), / is blank @@ -5163,6 +5178,8 @@ $blocklist['C'] = array( '.alasex.info', // 'UcoZ web-services' 216.32.81.234(server.isndns.net) by yx0 at yx0.be '.golden-keys.net', // 89.149.205.146(unknown.vectoral.info) by aktitol at list.ru + '.masserch.info', // 69.46.18.2(hv113.steephost.com -> 72.232.191.50 -> 72.232.191.50.steephost.com) "Free Web Hosting" by sqr at bk.ru, spam + // C-3: Not classifiable (information wanted) // // Something incoming to pukiwiki related sites @@ -5172,6 +5189,8 @@ $blocklist['C'] = array( ), ); +// -------------------------------------------------- + $blocklist['D'] = array( // D: Sample setting of // "third party in good faith"s @@ -5182,6 +5201,7 @@ $blocklist['D'] = array( // 'RESERVED', ); +// -------------------------------------------------- $blocklist['E'] = array( // E: Sample setting of @@ -5292,7 +5312,7 @@ $blocklist['E'] = array( '.zakkuzaku.com', // 210.188.201.44(sv83.xserver.jp) ); - +// -------------------------------------------------- $blocklist['Z'] = array( // Z: Yours @@ -5301,4 +5321,5 @@ $blocklist['Z'] = array( //'', //'', ); + ?> diff --git a/spam/spam.php b/spam/spam.php index 3cc34ff..6d76ad6 100644 --- a/spam/spam.php +++ b/spam/spam.php @@ -1,5 +1,5 @@ FALSE; + // 'badhost' => TRUE; + // ); // $blocklist['badhost'] = array( // '*.blogspot.com', // Blog services's subdomains (only) // 'IANA-examples' => '#^(?:.*\.)?example\.(?:com|net|org)$#', // ); - if (isset($blocklist['list'])) { - $regexes['list'] = & $blocklist['list']; - } else { - // Default - $blocklist['list'] = array( - 'goodhost' => FALSE, - 'badhost' => TRUE, - ); - } - foreach(array_keys($blocklist['list']) as $_list) { - if (! isset($blocklist[$_list])) continue; - foreach ($blocklist[$_list] as $key => $value) { - if (is_array($value)) { - $regexes[$_list][$key] = array(); - foreach($value as $_key => $_value) { - get_blocklist_add($regexes[$_list][$key], $_key, $_value); + foreach(array('pre', 'list') as $special) { + if (! isset($blocklist[$special])) continue; + $regexes[$special] = $blocklist[$special]; + foreach(array_keys($blocklist[$special]) as $_list) { + if (! isset($blocklist[$_list])) continue; + foreach ($blocklist[$_list] as $key => $value) { + if (is_array($value)) { + $regexes[$_list][$key] = array(); + foreach($value as $_key => $_value) { + get_blocklist_add($regexes[$_list][$key], $_key, $_value); + } + } else { + get_blocklist_add($regexes[$_list], $key, $value); } - } else { - get_blocklist_add($regexes[$_list], $key, $value); } + unset($blocklist[$_list]); } - unset($blocklist[$_list]); } } } @@ -527,7 +526,9 @@ function check_uri_spam($target = '', $method = array()) ), ); + // ---------------------------------------- // Aliases + $sum = & $progress['sum']; $is_spam = & $progress['is_spam']; $progress['method'] = & $method; // Argument @@ -535,7 +536,9 @@ function check_uri_spam($target = '', $method = array()) $hosts = & $progress['hosts']; $asap = isset($method['asap']); + // ---------------------------------------- // Init + if (! is_array($method) || empty($method)) { $method = check_uri_spam_method(); } @@ -544,6 +547,9 @@ function check_uri_spam($target = '', $method = array()) } if (! isset($sum['quantity'])) $sum['quantity'] = 0; + // ---------------------------------------- + // Recurse + if (is_array($target)) { foreach($target as $str) { if (! is_string($str)) continue; @@ -583,6 +589,9 @@ function check_uri_spam($target = '', $method = array()) return $progress; } + // ---------------------------------------- + // Area measure + // Area: There's HTML anchor tag if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) { $key = 'area_anchor'; @@ -612,13 +621,44 @@ function check_uri_spam($target = '', $method = array()) // Return if ... if ($asap && $is_spam) return $progress; + // ---------------------------------------- // URI: Pickup + $pickups = uri_pickup_normalize(spam_uri_pickup($target, $method)); + $hosts = array(); + foreach ($pickups as $key => $pickup) { + $hosts[$key] = & $pickup['host']; + } // Return if ... if (empty($pickups)) return $progress; + // ---------------------------------------- + // URI: Bad host (Separate good/bad hosts from $hosts) + + if ((! $asap || ! $is_spam) && isset($method['badhost'])) { + $list = get_blocklist('pre'); + $blocked = blocklist_distiller($hosts, array_keys($list), $asap); + foreach($list as $key=>$type){ + if (! $type) unset($blocked[$key]); // Ignore goodhost etc + } + unset($list); + if (! empty($blocked)) $is_spam['badhost'] = TRUE; + } + + // Return if ... + if ($asap && $is_spam) return $progress; + + // Remove blocked from $pickups + foreach(array_keys($pickups) as $key) { + if (! isset($hosts[$key])) { + unset($pickups[$key]); + } + } + + // ---------------------------------------- // URI: Check quantity + $sum['quantity'] += count($pickups); // URI quantity if ((! $asap || ! $is_spam) && isset($method['quantity']) && @@ -626,7 +666,9 @@ function check_uri_spam($target = '', $method = array()) $is_spam['quantity'] = TRUE; } + // ---------------------------------------- // URI: used inside HTML anchor tag pair + if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) { $key = 'uri_anchor'; foreach($pickups as $pickup) { @@ -642,7 +684,9 @@ function check_uri_spam($target = '', $method = array()) } } + // ---------------------------------------- // URI: used inside 'BBCode' pair + if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) { $key = 'uri_bbcode'; foreach($pickups as $pickup) { @@ -658,7 +702,9 @@ function check_uri_spam($target = '', $method = array()) } } + // ---------------------------------------- // URI: Uniqueness (and removing non-uniques) + if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) { $uris = array(); @@ -683,9 +729,11 @@ function check_uri_spam($target = '', $method = array()) // Return if ... if ($asap && $is_spam) return $progress; + // ---------------------------------------- // Host: Uniqueness (uniq / non-uniq) - foreach ($pickups as $pickup) $hosts[] = & $pickup['host']; + $hosts = array_unique($hosts); + if (isset($sum['uniqhost'])) $sum['uniqhost'] += count($hosts); if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) { $sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost']; @@ -697,20 +745,29 @@ function check_uri_spam($target = '', $method = array()) // Return if ... if ($asap && $is_spam) return $progress; + // ---------------------------------------- // URI: Bad host (Separate good/bad hosts from $hosts) - if ((! $asap || ! $is_spam) && isset($method['badhost'])) { - // is_badhost() - $list = get_blocklist('list'); - $blocked = blocklist_distiller($hosts, array_keys($list), $asap); + if ((! $asap || ! $is_spam) && isset($method['badhost'])) { + $list = get_blocklist('list'); + $blocked = array_merge_leaves( + $blocked, + blocklist_distiller($hosts, array_keys($list), $asap), + FALSE + ); foreach($list as $key=>$type){ if (! $type) unset($blocked[$key]); // Ignore goodhost etc } unset($list); - if (! empty($blocked)) $is_spam['badhost'] = TRUE; } + // Return if ... + //if ($asap && $is_spam) return $progress; + + // ---------------------------------------- + // End + return $progress; } -- 2.11.0