<?php
-// $Id: spam.php,v 1.72 2006/12/16 02:59:26 henoheno Exp $
+// $Id: spam.php,v 1.73 2006/12/16 04:09:46 henoheno Exp $
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
{
$area = array();
- // Anchor tag pair by preg_match_all()
+ // Anchor tag pair by preg_match and preg_match_all()
// [OK] <a href></a>
// [OK] <a href= >Good site!</a>
// [OK] <a href= "#" >test</a>
// [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a>
// [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a>
// [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_
+ $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#i';
+ if (isset($method['area_anchor'])) {
+ if (preg_match($regex, $string)) {
+ $area['area_anchor'] = TRUE;
+ }
+ }
if (isset($method['uri_anchor'])) {
$areas = array();
- preg_match_all('#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#i',
- $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
foreach(array_keys($areas) as $_area) {
$areas[$_area] = array(
$areas[$_area][0][1], // Area start (<a href>)
if (! empty($areas)) $area['uri_anchor'] = $areas;
}
- // phpBB's "BBCode" pair by preg_match_all()
+ // phpBB's "BBCode" pair by preg_match and preg_match_all()
// [OK] [url][/url]
// [OK] [url]http://nasty.example.com/[/url]
// [OK] [link]http://nasty.example.com/[/link]
// [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url]
// [OK] [link http://nasty.example.com/]buy something[/link]
+ $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i';
+ if (isset($method['area_bbcode'])) {
+ if (preg_match($regex, $string)) {
+ $area['area_bbcode'] = TRUE;
+ }
+ }
if (isset($method['uri_bbcode'])) {
$areas = array();
- preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i',
- $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
foreach(array_keys($areas) as $_area) {
$areas[$_area] = array(
$areas[$_area][0][1], // Area start ([url])
// Area elevation for '(especially external)link' intension
if (! empty($array)) {
- $areas = area_pickup($string, $method);
+ $_method = array();
+ if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor'];
+ if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode'];
+
+ $areas = area_pickup($string, $_method, TRUE);
if (! empty($areas)) {
$area_shadow = array();
- foreach(array_keys($array) as $key){
+ foreach (array_keys($array) as $key) {
$area_shadow[$key] = & $array[$key]['area'];
- $area_shadow[$key]['uri_anchor'] = 0;
- $area_shadow[$key]['uri_bbcode'] = 0;
- }
- if (isset($areas['uri_anchor'])) {
- area_measure($areas['uri_anchor'], $area_shadow, 1, 'uri_anchor');
+ foreach (array_keys($_method) as $_key) {
+ $area_shadow[$key][$_key] = 0;
+ }
}
- if (isset($areas['uri_bbcode'])) {
- area_measure($areas['uri_bbcode'], $area_shadow, 1, 'uri_bbcode');
+ foreach (array_keys($_method) as $_key) {
+ if (isset($areas[$_key])) {
+ area_measure($areas[$_key], $area_shadow, 1, $_key);
+ }
}
}
}
'non_uniq' => 3 * $times, // Allow N duped (and normalized) URIs
// Areas
- //'area_anchor' => $t_area, // Using <a href> HTML tag
- //'area_bbcode' => $t_area, // Using [url] or [link] BBCode
- 'uri_anchor' => $t_area, // URI inside <a href> HTML tag
- 'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
+ 'area_anchor' => $t_area, // Using <a href> HTML tag
+ 'area_bbcode' => $t_area, // Using [url] or [link] BBCode
+ //'uri_anchor' => $t_area, // URI inside <a href> HTML tag
+ //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
);
if ($rule) {
$bool = array(
}
return $progress;
}
+
+ // Area: There's HTML anchor tag
+ if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
+ $key = 'area_anchor';
+ if (area_pickup($target, array($key => TRUE))) {
+ $sum[$key] += 1;
+ $is_spam[$key] = TRUE;
+ }
+ }
+
+ // Area: There's 'BBCode' linking tag
+ if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
+ $key = 'area_bbcode';
+ if (area_pickup($target, array($key => TRUE))) {
+ $sum[$key] += 1;
+ $is_spam[$key] = TRUE;
+ }
+ }
+
+ // URI Init
$pickups = spam_uri_pickup($target, $method);
if (empty($pickups)) {
return $progress;
}
- // Check quantity
+ // URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$is_spam['quantity'] = TRUE;
}
- // URI: Using invalid area: anchor
+ // URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
}
}
- // URI: Using invalid area: bbcode
+ // URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
}
}
- // URI uniqueness (and removing non-uniques)
+ // URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniq'])) {
// Destructive normalize of URIs
unset($uris);
}
- // Unique host
+ // URI: Unique host
$hosts = array();
foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
$hosts = array_unique($hosts);
$sum['uniqhost'] += count($hosts);
- // Bad host
+ // URI: Bad host
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
$count = array_count_leaves(is_badhost($hosts, $asap));
$sum['badhost'] += $count;
<?php
-// $Id: spam.php,v 1.72 2006/12/16 02:59:26 henoheno Exp $
+// $Id: spam.php,v 1.73 2006/12/16 04:09:46 henoheno Exp $
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
{
$area = array();
- // Anchor tag pair by preg_match_all()
+ // Anchor tag pair by preg_match and preg_match_all()
// [OK] <a href></a>
// [OK] <a href= >Good site!</a>
// [OK] <a href= "#" >test</a>
// [OK] <a href="http://nasty.example.com">visit http://nasty.example.com/</a>
// [OK] <a href=\'http://nasty.example.com/\' >discount foobar</a>
// [NG] <a href="http://ng.example.com">visit http://ng.example.com _not_ended_
+ $regex = '#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#i';
+ if (isset($method['area_anchor'])) {
+ if (preg_match($regex, $string)) {
+ $area['area_anchor'] = TRUE;
+ }
+ }
if (isset($method['uri_anchor'])) {
$areas = array();
- preg_match_all('#<a\b[^>]*\bhref\b[^>]*>.*?</a\b[^>]*(>)#i',
- $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
foreach(array_keys($areas) as $_area) {
$areas[$_area] = array(
$areas[$_area][0][1], // Area start (<a href>)
if (! empty($areas)) $area['uri_anchor'] = $areas;
}
- // phpBB's "BBCode" pair by preg_match_all()
+ // phpBB's "BBCode" pair by preg_match and preg_match_all()
// [OK] [url][/url]
// [OK] [url]http://nasty.example.com/[/url]
// [OK] [link]http://nasty.example.com/[/link]
// [OK] [url=http://nasty.example.com]visit http://nasty.example.com/[/url]
// [OK] [link http://nasty.example.com/]buy something[/link]
+ $regex = '#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i';
+ if (isset($method['area_bbcode'])) {
+ if (preg_match($regex, $string)) {
+ $area['area_bbcode'] = TRUE;
+ }
+ }
if (isset($method['uri_bbcode'])) {
$areas = array();
- preg_match_all('#\[(url|link)\b[^\]]*\].*?\[/\1\b[^\]]*(\])#i',
- $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
+ preg_match_all($regex, $string, $areas, PREG_SET_ORDER | PREG_OFFSET_CAPTURE);
foreach(array_keys($areas) as $_area) {
$areas[$_area] = array(
$areas[$_area][0][1], // Area start ([url])
// Area elevation for '(especially external)link' intension
if (! empty($array)) {
- $areas = area_pickup($string, $method);
+ $_method = array();
+ if (isset($method['uri_anchor'])) $_method['uri_anchor'] = & $method['uri_anchor'];
+ if (isset($method['uri_bbcode'])) $_method['uri_bbcode'] = & $method['uri_bbcode'];
+
+ $areas = area_pickup($string, $_method, TRUE);
if (! empty($areas)) {
$area_shadow = array();
- foreach(array_keys($array) as $key){
+ foreach (array_keys($array) as $key) {
$area_shadow[$key] = & $array[$key]['area'];
- $area_shadow[$key]['uri_anchor'] = 0;
- $area_shadow[$key]['uri_bbcode'] = 0;
- }
- if (isset($areas['uri_anchor'])) {
- area_measure($areas['uri_anchor'], $area_shadow, 1, 'uri_anchor');
+ foreach (array_keys($_method) as $_key) {
+ $area_shadow[$key][$_key] = 0;
+ }
}
- if (isset($areas['uri_bbcode'])) {
- area_measure($areas['uri_bbcode'], $area_shadow, 1, 'uri_bbcode');
+ foreach (array_keys($_method) as $_key) {
+ if (isset($areas[$_key])) {
+ area_measure($areas[$_key], $area_shadow, 1, $_key);
+ }
}
}
}
'non_uniq' => 3 * $times, // Allow N duped (and normalized) URIs
// Areas
- //'area_anchor' => $t_area, // Using <a href> HTML tag
- //'area_bbcode' => $t_area, // Using [url] or [link] BBCode
- 'uri_anchor' => $t_area, // URI inside <a href> HTML tag
- 'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
+ 'area_anchor' => $t_area, // Using <a href> HTML tag
+ 'area_bbcode' => $t_area, // Using [url] or [link] BBCode
+ //'uri_anchor' => $t_area, // URI inside <a href> HTML tag
+ //'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
);
if ($rule) {
$bool = array(
}
return $progress;
}
+
+ // Area: There's HTML anchor tag
+ if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
+ $key = 'area_anchor';
+ if (area_pickup($target, array($key => TRUE))) {
+ $sum[$key] += 1;
+ $is_spam[$key] = TRUE;
+ }
+ }
+
+ // Area: There's 'BBCode' linking tag
+ if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
+ $key = 'area_bbcode';
+ if (area_pickup($target, array($key => TRUE))) {
+ $sum[$key] += 1;
+ $is_spam[$key] = TRUE;
+ }
+ }
+
+ // URI Init
$pickups = spam_uri_pickup($target, $method);
if (empty($pickups)) {
return $progress;
}
- // Check quantity
+ // URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$is_spam['quantity'] = TRUE;
}
- // URI: Using invalid area: anchor
+ // URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
}
}
- // URI: Using invalid area: bbcode
+ // URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
}
}
- // URI uniqueness (and removing non-uniques)
+ // URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniq'])) {
// Destructive normalize of URIs
unset($uris);
}
- // Unique host
+ // URI: Unique host
$hosts = array();
foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
$hosts = array_unique($hosts);
$sum['uniqhost'] += count($hosts);
- // Bad host
+ // URI: Bad host
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
$count = array_count_leaves(is_badhost($hosts, $asap));
$sum['badhost'] += $count;