HTML tag
//'uri_bbcode' => $t_area, // URI inside [url] or [link] BBCode
);
if ($rule) {
$bool = array(
// Rules
//'asap' => TRUE, // Quit or return As Soon As Possible
'uniqhost' => TRUE, // Show uniq host (at block notification mail)
'badhost' => TRUE, // Check badhost
);
} else {
$bool = array();
}
// Remove non-$positive values
foreach (array_keys($positive) as $key) {
if ($positive[$key] < 0) unset($positive[$key]);
}
return $positive + $bool;
}
// Simple/fast spam check
function check_uri_spam($target = '', $method = array())
{
// Return value
$progress = array(
'method' => array(
// Theme to do => Dummy, optional value, or optional array()
//'quantity' => 8,
//'uniqhost' => TRUE,
//'non_uniqhost'=> 3,
//'non_uniquri' => 3,
//'badhost' => TRUE,
//'area_anchor' => 0,
//'area_bbcode' => 0,
//'uri_anchor' => 0,
//'uri_bbcode' => 0,
),
'sum' => array(
// Theme => Volume found (int)
),
'is_spam' => array(
// Flag. If someting defined here,
// one or more spam will be included
// in this report
),
'blocked' => array(
// Hosts blocked
//'category' => array(
// 'host',
//)
),
'hosts' => array(
// Hosts not blocked
),
);
// Aliases
$sum = & $progress['sum'];
$is_spam = & $progress['is_spam'];
$progress['method'] = & $method; // Argument
$blocked = & $progress['blocked'];
$hosts = & $progress['hosts'];
$asap = isset($method['asap']);
// Init
if (! is_array($method) || empty($method)) {
$method = check_uri_spam_method();
}
foreach(array_keys($method) as $key) {
if (! isset($sum[$key])) $sum[$key] = 0;
}
if (is_array($target)) {
foreach($target as $str) {
if (! is_string($str)) continue;
$_progress = check_uri_spam($str, $method); // Recurse
// Merge $sum
$_sum = & $_progress['sum'];
foreach (array_keys($_sum) as $key) {
if (! isset($sum[$key])) {
$sum[$key] = & $_sum[$key];
} else {
$sum[$key] += $_sum[$key];
}
}
// Merge $is_spam
$_is_spam = & $_progress['is_spam'];
foreach (array_keys($_is_spam) as $key) {
$is_spam[$key] = TRUE;
if ($asap) break;
}
if ($asap && $is_spam) break;
// Merge $blocked
// TODO: about numeric keys, unique the hosts
$blocked = array_merge_recursive($blocked, $_progress['blocked']);
// Merge $hosts
// TODO: about numeric keys, unique the hosts
$hosts = array_merge_recursive($hosts, $_progress['hosts']);
}
// Recount $sum['badhost']
$sum['badhost'] = array_count_leaves($blocked);
return $progress;
}
// Area: There's HTML anchor tag
if ((! $asap || ! $is_spam) && isset($method['area_anchor'])) {
$key = 'area_anchor';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Area: There's 'BBCode' linking tag
if ((! $asap || ! $is_spam) && isset($method['area_bbcode'])) {
$key = 'area_bbcode';
$_asap = isset($method['asap']) ? array('asap' => TRUE) : array();
$result = area_pickup($target, array($key => TRUE) + $_asap);
if ($result) {
$sum[$key] = $result[$key];
if (isset($method[$key]) && $sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
}
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Pickup
$pickups = uri_pickup_normalize(spam_uri_pickup($target, $method));
// Return if ...
if (empty($pickups)) return $progress;
// URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$sum['quantity'] > $method['quantity']) {
$is_spam['quantity'] = TRUE;
}
// URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
$uris = array();
foreach (array_keys($pickups) as $key) {
$uris[$key] = uri_pickup_implode($pickups[$key]);
}
$count = count($uris);
$uris = array_unique($uris);
$sum['non_uniquri'] += $count - count($uris);
if ($sum['non_uniquri'] > $method['non_uniquri']) {
$is_spam['non_uniquri'] = TRUE;
}
if (! $asap || ! $is_spam) {
foreach (array_diff(array_keys($pickups),
array_keys($uris)) as $remove) {
unset($pickups[$remove]);
}
}
unset($uris);
}
// Return if ...
if ($asap && $is_spam) return $progress;
// Host: Uniqueness (uniq / non-uniq)
foreach ($pickups as $pickup) $hosts[] = & $pickup['host'];
$hosts = array_unique($hosts);
$sum['uniqhost'] += count($hosts);
if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
$sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
$is_spam['non_uniqhost'] = TRUE;
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// URI: Bad host (Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
// is_badhost()
$list = get_blocklist('list');
$blocked = blocklist_distiller($hosts, array_keys($list), $asap);
foreach($list as $key=>$type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
return $progress;
}
// Count leaves (A leaf = value that is not an array, or an empty array)
function array_count_leaves($array = array(), $count_empty = FALSE)
{
if (! is_array($array) || (empty($array) && $count_empty)) return 1;
// Recurse
$count = 0;
foreach ($array as $part) {
$count += array_count_leaves($part, $count_empty);
}
return $count;
}
// Merge two leaves
function array_merge_leaves($array1 = array(), $array2 = array())
{
return array_merge_recursive($array1, $array2);
}
// ---------------------
// Reporting
// TODO: Don't show unused $method!
// Summarize $progress (blocked only)
function summarize_spam_progress($progress = array(), $blockedonly = FALSE)
{
if ($blockedonly) {
$tmp = array_keys($progress['is_spam']);
} else {
$tmp = array();
$method = & $progress['method'];
if (isset($progress['sum'])) {
foreach ($progress['sum'] as $key => $value) {
if (isset($method[$key]) && $value) {
$tmp[] = $key . '(' . $value . ')';
}
}
}
}
return implode(', ', $tmp);
}
function summarize_detail_badhost($progress = array())
{
if (! isset($progress['is_spam']['badhost'])) return '';
return var_export($progress['blocked'], TRUE);
}
function summarize_detail_newtral($progress = array())
{
if (empty($progress['hosts'])) return '';
return var_export($progress['hosts'], TRUE);
}
// ---------------------
// Exit
// Common bahavior for blocking
// NOTE: Call this function from various blocking feature, to disgueise the reason 'why blocked'
function spam_exit($mode = '', $data = array())
{
switch ($mode) {
case '': echo("\n"); break;
case 'dump':
echo('' . "\n");
echo htmlspecialchars(var_export($data, TRUE));
echo('
' . "\n");
break;
};
// Force exit
exit;
}
// ---------------------
// Simple filtering
// TODO: Record them
// Simple/fast spam filter ($target: 'a string' or an array())
function pkwk_spamfilter($action, $page, $target = array('title' => ''), $method = array(), $exitmode = '')
{
$progress = check_uri_spam($target, $method);
if (! empty($progress['is_spam'])) {
// Mail to administrator(s)
pkwk_spamnotify($action, $page, $target, $progress, $method);
// Exit
spam_exit($exitmode, $progress);
}
}
// ---------------------
// PukiWiki original
// Mail to administrator(s)
function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progress = array(), $method = array())
{
global $notify, $notify_subject;
if (! $notify) return;
$asap = isset($method['asap']);
$summary['ACTION'] = 'Blocked by: ' . summarize_spam_progress($progress, TRUE);
if (! $asap) {
$summary['METRICS'] = summarize_spam_progress($progress);
}
$tmp = summarize_detail_badhost($progress);
if ($tmp != '') $summary['DETAIL_BADHOST'] = $tmp;
$tmp = summarize_detail_newtral($progress);
if (! $asap && $tmp != '') $summary['DETAIL_NEUTRAL_HOST'] = $tmp;
$summary['COMMENT'] = $action;
$summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : '');
$summary['URI'] = get_script_uri() . '?' . rawurlencode($page);
$summary['USER_AGENT'] = TRUE;
$summary['REMOTE_ADDR'] = TRUE;
pkwk_mail_notify($notify_subject, var_export($target, TRUE), $summary, TRUE);
}
?>