HTML tag
//'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
);
if ($rule) {
$bool = array(
// Rules
//'asap' => TRUE, // Quit or return As Soon As Possible
'uniqhost' => TRUE, // Show uniq host (at block notification mail)
'badhost' => TRUE, // Check badhost
);
} else {
$bool = array();
}
// Remove non-$positive values
foreach (array_keys($positive) as $key) {
if ($positive[$key] < 0) unset($positive[$key]);
}
return $positive + $bool;
}
// Simple/fast spam check
function check_uri_spam($target = '', $method = array())
{
// Return value
$progress = array(
'method' => array(
// Theme to do => Dummy, optional value, or optional array()
//'quantity' => 8,
//'uniqhost' => TRUE,
//'non_uniqhost'=> 3,
//'non_uniquri' => 3,
//'badhost' => TRUE,
//'area_anchor' => 0,
//'area_bbcode' => 0,
//'uri_anchor' => 0,
//'uri_bbcode' => 0,
),
'sum' => array(
// Theme => Volume found (int)
),
'is_spam' => array(
// Flag. If someting defined here,
// one or more spam will be included
// in this report
),
'blocked' => array(
// Hosts blocked
//'category' => array(
// 'host',
//)
),
'hosts' => array(
// Hosts not blocked
),
);
// Aliases
$sum = & $progress['sum'];
$is_spam = & $progress['is_spam'];
$progress['method'] = & $method; // Argument
$blocked = & $progress['blocked'];
$hosts = & $progress['hosts'];
$asap = isset($method['asap']);
// Init
if (! is_array($method) || empty($method)) {
$method = check_uri_spam_method();
}
foreach(array_keys($method) as $key) {
if (! isset($sum[$key])) $sum[$key] = 0;
}
if (is_array($target)) {
foreach($target as $str) {
if (! is_string($str)) continue;
$_progress = check_uri_spam($str, $method); // Recurse
// Merge $sum
$_sum = & $_progress['sum'];
foreach (array_keys($_sum) as $key) {
if (! isset($sum[$key])) {
$sum[$key] = & $_sum[$key];
} else {
$sum[$key] += $_sum[$key];
}
}
// Merge $is_spam
$_is_spam = & $_progress['is_spam'];
foreach (array_keys($_is_spam) as $key) {
$is_spam[$key] = TRUE;
if ($asap) break;
}
if ($asap && $is_spam) break;
// Merge only
$blocked = array_merge_recursive($blocked, $_progress['blocked']);
$hosts = array_merge_recursive($hosts, $_progress['hosts']);
}
// Unique values
$blocked = array_unique_recursive($blocked);
$hosts = array_unique_recursive($hosts);
// Recount $sum['badhost']
$sum['badhost'] = array_count_leaves($blocked);
return $progress;
}
// Area: There's HTML anchor tag
if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
$key = 'area_anchor';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Area: There's 'BBCode' linking tag
if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
$key = 'area_bbcode';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Pickup
$pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
// Return if ...
if (empty($pickups)) return $progress;
// URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$sum['quantity'] > $method['quantity']) {
$is_spam['quantity'] = TRUE;
}
// URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
$uris = array();
foreach (array_keys($pickups) as $key) {
$uris[$key] = uri_pickup_implode($pickups[$key]);
}
$count = count($uris);
$uris = array_unique($uris);
$sum['non_uniquri'] += $count - count($uris);
if ($sum['non_uniquri'] > $method['non_uniquri']) {
$is_spam['non_uniquri'] = TRUE;
}
if (! $asap || ! $is_spam) {
foreach (array_diff(array_keys($pickups),
array_keys($uris)) as $remove) {
unset($pickups[$remove]);
}
}
unset($uris);
}
// Return if ...
if ($asap && $is_spam) return $progress;
// Host: Uniqueness (uniq / non-uniq)
foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
$hosts = array_unique($hosts);
$sum['uniqhost'] += count($hosts);
if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
$sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
$is_spam['non_uniqhost'] = TRUE;
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Bad host (Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
// is_badhost()
$list = get_blocklist('list');
$blocked = blocklist_distiller($hosts, array_keys($list), $asap);
foreach($list as $key=>$type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
return $progress;
}
// Count leaves (A leaf = value that is not an array, or an empty array)
function array_count_leaves($array = array(), $count_empty = FALSE)
{
if (! is_array($array) || (empty($array) && $count_empty)) return 1;
// Recurse
$count = 0;
foreach ($array as $part) {
$count += array_count_leaves($part, $count_empty);
}
return $count;
}
// An array-leaves to a flat array
function array_flat_leaves($array, $unique = TRUE)
{
if (! is_array($array)) return $array;
$tmp = array();
foreach(array_keys($array) as $key) {
if (is_array($array[$key])) {
// Recurse
foreach(array_flat_leaves($array[$key]) as $_value) {
$tmp[] = $_value;
}
} else {
$tmp[] = & $array[$key];
}
}
return $unique ? array_values(array_unique($tmp)) : $tmp;
}
// An array() to an array leaf
function array_leaf($array = array('A', 'B', 'C.D'), $stem = FALSE, $edge = array())
{
$leaf = array();
$tmp = & $leaf;
foreach($array as $arg) {
if (! is_string($arg) && ! is_int($arg)) continue;
$tmp[$arg] = array();
$parent = & $tmp;
$tmp = & $tmp[$arg];
}
if ($stem) {
$parent[key($parent)] = & $edge;
} else {
$parent = key($parent);
}
return $leaf; // array('A' => array('B' => 'C.D'))
}
// ---------------------
// Reporting
// Summarize $progress (blocked only)
function summarize_spam_progress($progress = array(), $blockedonly = FALSE)
{
if ($blockedonly) {
$tmp = array_keys($progress['is_spam']);
} else {
$tmp = array();
$method = & $progress['method'];
if (isset($progress['sum'])) {
foreach ($progress['sum'] as $key => $value) {
if (isset($method[$key]) && $value) {
$tmp[] = $key . '(' . $value . ')';
}
}
}
}
return implode(', ', $tmp);
}
function summarize_detail_badhost($progress = array())
{
if (! isset($progress['blocked']) || empty($progress['blocked'])) return '';
// Flat per group
$blocked = array();
foreach($progress['blocked'] as $list => $lvalue) {
foreach($lvalue as $group => $gvalue) {
$flat = implode(', ', array_flat_leaves($gvalue));
if ($flat == $group) {
$blocked[$list][] = $flat;
} else {
$blocked[$list][$group] = $flat;
}
}
}
// Shrink per list
// From: 'A-1' => array('ie.to')
// To: 'A-1' => 'ie.to'
foreach($blocked as $list => $lvalue) {
if (is_array($lvalue) &&
count($lvalue) == 1 &&
is_numeric(key($lvalue))) {
$blocked[$list] = current($lvalue);
}
}
return var_export_shrink($blocked, TRUE, TRUE);
}
function summarize_detail_newtral($progress = array())
{
if (! isset($progress['hosts']) ||
! is_array($progress['hosts']) ||
empty($progress['hosts'])) return '';
$result = '';
if (FALSE) {
// Sort by domain
$tmp = array();
foreach($progress['hosts'] as $value) {
$tmp[delimiter_reverse($value)] = $value;
}
ksort($tmp, SORT_STRING);
$result = count($tmp) . ' (' .implode(', ', $tmp) . ')';
} else {
$tmp = array();
foreach($progress['hosts'] as $value) {
$tmp = array_merge_recursive(
$tmp,
array_leaf(explode('.', delimiter_reverse($value) . '.'), TRUE, $value)
);
}
ksort($tmp, SORT_STRING);
$tmp = array_joinkey_leaf($tmp, '.', TRUE, TRUE);
$tmp = array_joinkey_leaf($tmp, '.', TRUE, FALSE);
$tmp = array_joinkey_leaf($tmp, '.', TRUE, FALSE);
//$tmp = array_joinkey_leaf($tmp, '.', TRUE, FALSE);
foreach($tmp as $key => $value) {
if (is_array($value)) {
ksort($tmp[$key]);
$tmp[$key] = implode(', ', array_flat_leaves($value));
}
}
//$tmp = array_unique_recursive($tmp); // Buggy?
$result = var_export_shrink($tmp, TRUE, TRUE);
}
return $result;
}
function array_joinkey_leaf($array = array('A' => array('B' => 'C.D')),
$delim = '.', $reverse = FALSE, $allowmulti = FALSE)
{
if (! is_array($array)) return $array;
$result = array();
foreach(array_keys($array) as $key) {
if (! is_array($array[$key]) || (! $allowmulti && count($array[$key]) > 1)) {
$result[$key] = & $array[$key]; // Do nothing
} else {
foreach(array_keys($array[$key]) as $_key) {
$joinkey = $reverse ?
$_key . $delim . $key :
$key . $delim . $_key;
$result[$joinkey] = & $array[$key][$_key];
}
}
}
return $result; // array('A.B' => 'C.D')
}
// ---------------------
// Exit
// Common bahavior for blocking
// NOTE: Call this function from various blocking feature, to disgueise the reason 'why blocked'
function spam_exit($mode = '', $data = array())
{
switch ($mode) {
case '': echo("\n"); break;
case 'dump':
echo('' . "\n");
echo htmlspecialchars(var_export($data, TRUE));
echo('
' . "\n");
break;
};
// Force exit
exit;
}
// ---------------------
// Simple filtering
// TODO: Record them
// Simple/fast spam filter ($target: 'a string' or an array())
function pkwk_spamfilter($action, $page, $target = array('title' => ''), $method = array(), $exitmode = '')
{
$progress = check_uri_spam($target, $method);
if (! empty($progress['is_spam'])) {
// Mail to administrator(s)
pkwk_spamnotify($action, $page, $target, $progress, $method);
// Exit
spam_exit($exitmode, $progress);
}
}
// ---------------------
// PukiWiki original
// Mail to administrator(s)
function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progress = array(), $method = array())
{
global $notify, $notify_subject;
if (! $notify) return;
$asap = isset($method['asap']);
$summary['ACTION'] = 'Blocked by: ' . summarize_spam_progress($progress, TRUE);
if (! $asap) {
$summary['METRICS'] = summarize_spam_progress($progress);
}
$tmp = summarize_detail_badhost($progress);
if ($tmp != '') $summary['DETAIL_BADHOST'] = $tmp;
$tmp = summarize_detail_newtral($progress);
if (! $asap && $tmp != '') $summary['DETAIL_NEUTRAL_HOST'] = $tmp;
$summary['COMMENT'] = $action;
$summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : '');
$summary['URI'] = get_script_uri() . '?' . rawurlencode($page);
$summary['USER_AGENT'] = TRUE;
$summary['REMOTE_ADDR'] = TRUE;
pkwk_mail_notify($notify_subject, var_export($target, TRUE), $summary, TRUE);
}
?>