From 80c523259748571ea5edfb7c9ac2107795785278 Mon Sep 17 00:00:00 2001 From: henoheno Date: Sun, 26 Nov 2006 11:57:26 +0900 Subject: [PATCH] Area selection (Check BBCode only, check anchor only, check ...) --- spam.php | 108 ++++++++++++++++++++++++++++++++++++++-------------------- spam/spam.php | 108 ++++++++++++++++++++++++++++++++++++++-------------------- 2 files changed, 142 insertions(+), 74 deletions(-) diff --git a/spam.php b/spam.php index 7172eec..90b9735 100644 --- a/spam.php +++ b/spam.php @@ -1,5 +1,5 @@ TRUE, + 'bbcode' => TRUE, + ); + } + $string = spam_uri_pickup_preprocess($string); $array = uri_pickup($string); // Area elevation for '(especially external)link' intension if (! empty($array)) { + + $area_shadow = array(); + foreach(array_keys($array) as $key){ + $area_shadow[$key] = & $array[$key]['area']; + $area_shadow[$key]['anchor'] = 0; + $area_shadow[$key]['bbcode'] = 0; + } + // Anchor tags by preg_match_all() // [OK] visit http://nasty.example.com/ // [OK] discount foobar // [NG] visit http://ng.example.com _not_ended_ // [NG] Good site! test - $areas = array(); - preg_match_all('#]*href[^>]*>.*?]*(>)#i', - $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); - //var_dump(recursive_map('htmlspecialchars', $areas)); - foreach(array_keys($areas) as $area) { - $areas[$area] = array( - $areas[$area][0][1], // Area start () - $areas[$area][1][1], // Area end () - ); + if (isset($area['anchor'])) { + $areas = array(); + preg_match_all('#]*href[^>]*>.*?]*(>)#i', + $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); + //var_dump(recursive_map('htmlspecialchars', $areas)); + foreach(array_keys($areas) as $_area) { + $areas[$_area] = array( + $areas[$_area][0][1], // Area start () + $areas[$_area][1][1], // Area end () + ); + } + area_measure($areas, $area_shadow, -1, 'anchor'); } - area_measure($areas, $array); // phpBB's "BBCode" by preg_match_all() // [url]http://nasty.example.com/[/url] @@ -186,17 +201,19 @@ function spam_uri_pickup($string = '') // [url=http://nasty.example.com]visit http://nasty.example.com/[/url] // [link http://nasty.example.com/]buy something[/link] // ?? [url=][/url] - $areas = array(); - preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i', - $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); - //var_dump(recursive_map('htmlspecialchars', $areas)); - foreach(array_keys($areas) as $area) { - $areas[$area] = array( - $areas[$area][0][1], // Area start ([url]) - $areas[$area][2][1], // Area end ([/url]) - ); + if (isset($area['bbcode'])) { + $areas = array(); + preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i', + $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); + //var_dump(recursive_map('htmlspecialchars', $areas)); + foreach(array_keys($areas) as $_area) { + $areas[$_area] = array( + $areas[$_area][0][1], // Area start ([url]) + $areas[$_area][2][1], // Area end ([/url]) + ); + } + area_measure($areas, $area_shadow, -1, 'bbcode'); } - area_measure($areas, $array); // Various Wiki syntax // [text_or_uri>text_or_uri] @@ -472,7 +489,7 @@ function is_badhost($hosts = '', $asap = TRUE) //'\[1\]', // Too much malicious sub-domains - '*.blogspot.com', + //'*.blogspot.com', // 2006-11 dev 'wwwtahoo.com', @@ -481,10 +498,10 @@ function is_badhost($hosts = '', $asap = TRUE) '*.infogami.com', // 2006/11/19 17:50 dev - '*.google0site.org', - '*.bigpricesearch.org', - '*.osfind.org', - '*.bablomira.biz', + //'*.google0site.org', + //'*.bigpricesearch.org', + //'*.osfind.org', + //'*.bablomira.biz', ); foreach ($blocklist as $part) { $blocklist_regex[] = '#^' . generate_glob_regex($part, '#') . '$#i'; @@ -515,8 +532,12 @@ function check_uri_spam_method() { return array( 'quantity' => 8, // Allow N URIs - 'area' => TRUE, - 'non_uniq' => 3, // Allow N times dupe + 'area' => array( + 'total' => 0, // Allow N areas + 'anchor' => 0, // HTML tag + 'bbcode' => 0, // [url] or [link] BBCode + ), + 'non_uniq' => 3, // Allow N duped (and normalized) URIs 'badhost' => TRUE, ); } @@ -528,7 +549,11 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) $is_spam = FALSE; $progress = array( 'quantity' => 0, - 'area' => 0, + 'area' => array( + 'total' => 0, + 'anchor' => 0, + 'bbcode' => 0, + ), 'non_uniq' => 0, 'uniqhost' => 0, 'badhost' => 0, @@ -543,7 +568,9 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) // Recurse list($is_spam, $_progress) = check_uri_spam($str, $method); $progress['quantity'] += $_progress['quantity']; - $progress['area'] += $_progress['area']; + $progress['area']['total'] += $_progress['area']['total']; + $progress['area']['anchor'] += $_progress['area']['anchor']; + $progress['area']['bbcode'] += $_progress['area']['bbcode']; $progress['non_uniq'] += $_progress['non_uniq']; $progress['uniqhost'] += $_progress['uniqhost']; $progress['badhost'] += $_progress['badhost']; @@ -565,9 +592,16 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) // Using invalid area if ((! $is_spam || ! $asap) && isset($method['area'])) { foreach($pickups as $pickup) { - if ($pickup['area'] < 0) { - ++$progress['area']; - $is_spam = TRUE; + // Total + $total = 0; + foreach ($pickup['area'] as $key => $value) { + if ($key == 'offset') continue; + $progress['area']['total'] += $value; + $progress['area'][$key] += $value; + if ($value < 0) { + $is_spam = TRUE; + if ($is_spam && $asap) break; + } if ($asap) break; } } diff --git a/spam/spam.php b/spam/spam.php index 7172eec..90b9735 100644 --- a/spam/spam.php +++ b/spam/spam.php @@ -1,5 +1,5 @@ TRUE, + 'bbcode' => TRUE, + ); + } + $string = spam_uri_pickup_preprocess($string); $array = uri_pickup($string); // Area elevation for '(especially external)link' intension if (! empty($array)) { + + $area_shadow = array(); + foreach(array_keys($array) as $key){ + $area_shadow[$key] = & $array[$key]['area']; + $area_shadow[$key]['anchor'] = 0; + $area_shadow[$key]['bbcode'] = 0; + } + // Anchor tags by preg_match_all() // [OK] visit http://nasty.example.com/ // [OK] discount foobar // [NG] visit http://ng.example.com _not_ended_ // [NG] Good site! test - $areas = array(); - preg_match_all('#]*href[^>]*>.*?]*(>)#i', - $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); - //var_dump(recursive_map('htmlspecialchars', $areas)); - foreach(array_keys($areas) as $area) { - $areas[$area] = array( - $areas[$area][0][1], // Area start () - $areas[$area][1][1], // Area end () - ); + if (isset($area['anchor'])) { + $areas = array(); + preg_match_all('#]*href[^>]*>.*?]*(>)#i', + $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); + //var_dump(recursive_map('htmlspecialchars', $areas)); + foreach(array_keys($areas) as $_area) { + $areas[$_area] = array( + $areas[$_area][0][1], // Area start () + $areas[$_area][1][1], // Area end () + ); + } + area_measure($areas, $area_shadow, -1, 'anchor'); } - area_measure($areas, $array); // phpBB's "BBCode" by preg_match_all() // [url]http://nasty.example.com/[/url] @@ -186,17 +201,19 @@ function spam_uri_pickup($string = '') // [url=http://nasty.example.com]visit http://nasty.example.com/[/url] // [link http://nasty.example.com/]buy something[/link] // ?? [url=][/url] - $areas = array(); - preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i', - $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); - //var_dump(recursive_map('htmlspecialchars', $areas)); - foreach(array_keys($areas) as $area) { - $areas[$area] = array( - $areas[$area][0][1], // Area start ([url]) - $areas[$area][2][1], // Area end ([/url]) - ); + if (isset($area['bbcode'])) { + $areas = array(); + preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i', + $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE); + //var_dump(recursive_map('htmlspecialchars', $areas)); + foreach(array_keys($areas) as $_area) { + $areas[$_area] = array( + $areas[$_area][0][1], // Area start ([url]) + $areas[$_area][2][1], // Area end ([/url]) + ); + } + area_measure($areas, $area_shadow, -1, 'bbcode'); } - area_measure($areas, $array); // Various Wiki syntax // [text_or_uri>text_or_uri] @@ -472,7 +489,7 @@ function is_badhost($hosts = '', $asap = TRUE) //'\[1\]', // Too much malicious sub-domains - '*.blogspot.com', + //'*.blogspot.com', // 2006-11 dev 'wwwtahoo.com', @@ -481,10 +498,10 @@ function is_badhost($hosts = '', $asap = TRUE) '*.infogami.com', // 2006/11/19 17:50 dev - '*.google0site.org', - '*.bigpricesearch.org', - '*.osfind.org', - '*.bablomira.biz', + //'*.google0site.org', + //'*.bigpricesearch.org', + //'*.osfind.org', + //'*.bablomira.biz', ); foreach ($blocklist as $part) { $blocklist_regex[] = '#^' . generate_glob_regex($part, '#') . '$#i'; @@ -515,8 +532,12 @@ function check_uri_spam_method() { return array( 'quantity' => 8, // Allow N URIs - 'area' => TRUE, - 'non_uniq' => 3, // Allow N times dupe + 'area' => array( + 'total' => 0, // Allow N areas + 'anchor' => 0, // HTML tag + 'bbcode' => 0, // [url] or [link] BBCode + ), + 'non_uniq' => 3, // Allow N duped (and normalized) URIs 'badhost' => TRUE, ); } @@ -528,7 +549,11 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) $is_spam = FALSE; $progress = array( 'quantity' => 0, - 'area' => 0, + 'area' => array( + 'total' => 0, + 'anchor' => 0, + 'bbcode' => 0, + ), 'non_uniq' => 0, 'uniqhost' => 0, 'badhost' => 0, @@ -543,7 +568,9 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) // Recurse list($is_spam, $_progress) = check_uri_spam($str, $method); $progress['quantity'] += $_progress['quantity']; - $progress['area'] += $_progress['area']; + $progress['area']['total'] += $_progress['area']['total']; + $progress['area']['anchor'] += $_progress['area']['anchor']; + $progress['area']['bbcode'] += $_progress['area']['bbcode']; $progress['non_uniq'] += $_progress['non_uniq']; $progress['uniqhost'] += $_progress['uniqhost']; $progress['badhost'] += $_progress['badhost']; @@ -565,9 +592,16 @@ function check_uri_spam($target = '', $method = array(), $asap = TRUE) // Using invalid area if ((! $is_spam || ! $asap) && isset($method['area'])) { foreach($pickups as $pickup) { - if ($pickup['area'] < 0) { - ++$progress['area']; - $is_spam = TRUE; + // Total + $total = 0; + foreach ($pickup['area'] as $key => $value) { + if ($key == 'offset') continue; + $progress['area']['total'] += $value; + $progress['area'][$key] += $value; + if ($value < 0) { + $is_spam = TRUE; + if ($is_spam && $asap) break; + } if ($asap) break; } } -- 2.11.0