<?php
-// $Id: spam.php,v 1.33 2006/11/25 03:31:19 henoheno Exp $
+// $Id: spam.php,v 1.36 2006/11/25 13:55:34 henoheno Exp $
// Copyright (C) 2006 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
$array[$uri]['host'] = strtolower($array[$uri]['host']);
$array[$uri]['port'] = port_normalize($array[$uri]['port'], $array[$uri]['scheme'], FALSE);
$array[$uri]['path'] = path_normalize($array[$uri]['path']);
-
//$array[$uri]['uri'] = uri_array_implode($array[$uri]);
if ($preserve_rawuri) $array[$uri]['rawuri'] = & $array[$uri][0];
} else {
// TODO: Ignore list
// TODO: require_or_include_once(another file)
-function is_badhost($hosts = '')
+function is_badhost($hosts = '', $asap = TRUE)
{
static $blocklist_regex;
}
}
+ $result = 0;
if (! is_array($hosts)) $hosts = array($hosts);
foreach($hosts as $host) {
- if (is_string($host)) $host = '';
+ if (! is_string($host)) $host = '';
foreach ($blocklist_regex as $regex) {
if (preg_match($regex, $host)) {
- return TRUE;
+ ++$result;
+ if ($asap) {
+ return $result;
+ } else {
+ break; // Check next host
+ }
}
}
}
- return FALSE;
+ return $result;
}
// TODO return TRUE or FALSE!
// Simple/fast spam check
-function is_uri_spam($target = '')
+function check_uri_spam($target = '', $method = array(), $asap = TRUE)
{
- $is_spam = FALSE;
- $urinum = 0;
+ $is_spam = FALSE;
+ $progress = array(
+ 'quantity' => 0,
+ 'area' => 0,
+ 'non_uniq' => 0,
+ 'badhost' => 0,
+ );
+
+ if (! is_array($method) || empty($method)) {
+ // Default
+ $method = array(
+ 'quantity' => 8, // Allow N URIs
+ 'area' => TRUE,
+ 'non_uniq' => 3, // Allow N times dupe
+ 'badhost' => TRUE,
+ );
+ }
if (is_array($target)) {
foreach($target as $str) {
// Recurse
- list($is_spam, $_urinum) = is_uri_spam($str);
- $urinum += $_urinum;
- if ($is_spam) break;
+ list($is_spam, $_progress) = check_uri_spam($str, $method);
+ $progress['quantity'] += $_progress['quantity'];
+ $progress['non_uniq'] += $_progress['non_uniq'];
+ if ($asap || $is_spam) break;
}
} else {
$pickups = spam_uri_pickup($target);
- $urinum += count($pickups);
+ $progress['quantity'] += count($pickups);
+
if (! empty($pickups)) {
- // Some users want to post some URLs, but ...
- if ($urinum > 8) {
- $is_spam = TRUE; // Too many!
- } else {
+
+ // URI quantity
+ if ((! $is_spam || ! $asap) && isset($method['quantity']) &&
+ $progress['quantity'] > $method['quantity']) {
+ $is_spam = TRUE;
+ }
+ //var_dump($method['quantity'], $is_spam);
+
+ // Using invalid area
+ if ((! $is_spam || ! $asap) && isset($method['area'])) {
foreach($pickups as $pickup) {
if ($pickup['area'] < 0) {
+ ++$progress['area'];
$is_spam = TRUE;
- break;
+ if ($asap) break;
+ }
+ }
+ }
+ //var_dump($method['area'], $is_spam);
+
+ // URI uniqueness (and removing non-uniques)
+ if ((! $is_spam || ! $asap) && isset($method['non_uniq'])) {
+ $uris = array();
+ foreach ($pickups as $key => $pickup) {
+ $uris[$key] = uri_array_implode($pickup);
+ }
+ $count = count($uris);
+ $uris = array_unique($uris);
+ $progress['non_uniq'] += $count - count($uris);
+ if ($progress['non_uniq'] > $method['non_uniq']) {
+ $is_spam = TRUE;
+ }
+ if (! $asap || ! $is_spam) {
+ foreach (array_diff(array_keys($pickups),
+ array_keys($uris)) as $remove) {
+ unset($pickups[$remove]);
}
}
+ unset($uris);
+ //var_dump($uris, $pickups);
}
+ //var_dump($method['non_uniq'], $is_spam);
- $hosts = array();
- foreach ($pickups as $pickup) {
- $hosts[] = & $pickup['host'];
+ // Bad host
+ if ((! $is_spam || ! $asap) && isset($method['badhost'])) {
+ $hosts = array();
+ foreach ($pickups as $pickup) {
+ $hosts[] = & $pickup['host'];
+ }
+ $count = is_badhost(array_unique($hosts), $asap);
+ $progress['badhost'] += $count;
+ if ($count !== 0) $is_spam = TRUE;
}
- $is_spam = is_badhost(array_unique($hosts));
+ //var_dump($method['badhost'], $is_spam);
}
}
- return array($is_spam, $urinum);
+ return array($is_spam, $progress);
}
// ---------------------
// ---------------------
// TODO: Separate check-part(s) and mail part
-// TODO: Multi-metrics (uri, host, user-agent, ...)
// TODO: Mail to administrator with more measurement data?
// Simple/fast spam filter ($target: 'a string' or an array())
-function pkwk_spamfilter($action, $page, $target = array('title' => ''))
+function pkwk_spamfilter($action, $page, $target = array('title' => ''), $method = array())
{
$is_spam = FALSE;
if ($is_spam) {
$action .= ' (Invalid User-Agent)';
} else {
- list($is_spam) = is_uri_spam($target);
+ list($is_spam) = check_uri_spam($target, $method);
}
if ($is_spam) {