(Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
$list = get_blocklist('pre');
$blocked = blocklist_distiller($hosts, array_keys($list), $asap);
foreach($list as $key => $type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
// Return if ...
if ($asap && $is_spam) return $progress;
// Remove blocked from $pickups
foreach(array_keys($pickups) as $key) {
if (! isset($hosts[$key])) {
unset($pickups[$key]);
}
}
// ----------------------------------------
// URI: Check quantity
$sum['quantity'] += count($pickups);
// URI quantity
if ((! $asap || ! $is_spam) && isset($method['quantity']) &&
$sum['quantity'] > $method['quantity']) {
$is_spam['quantity'] = TRUE;
}
// ----------------------------------------
// URI: used inside HTML anchor tag pair
if ((! $asap || ! $is_spam) && isset($method['uri_anchor'])) {
$key = 'uri_anchor';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// ----------------------------------------
// URI: used inside 'BBCode' pair
if ((! $asap || ! $is_spam) && isset($method['uri_bbcode'])) {
$key = 'uri_bbcode';
foreach($pickups as $pickup) {
if (isset($pickup['area'][$key])) {
$sum[$key] += $pickup['area'][$key];
if(isset($method[$key]) &&
$sum[$key] > $method[$key]) {
$is_spam[$key] = TRUE;
if ($asap && $is_spam) break;
}
if ($asap && $is_spam) break;
}
}
}
// ----------------------------------------
// URI: Uniqueness (and removing non-uniques)
if ((! $asap || ! $is_spam) && isset($method['non_uniquri'])) {
$uris = array();
foreach (array_keys($pickups) as $key) {
$uris[$key] = uri_pickup_implode($pickups[$key]);
}
$count = count($uris);
$uris = array_unique($uris);
$sum['non_uniquri'] += $count - count($uris);
if ($sum['non_uniquri'] > $method['non_uniquri']) {
$is_spam['non_uniquri'] = TRUE;
}
if (! $asap || ! $is_spam) {
foreach (array_diff(array_keys($pickups),
array_keys($uris)) as $remove) {
unset($pickups[$remove]);
}
}
unset($uris);
}
// Return if ...
if ($asap && $is_spam) return $progress;
// ----------------------------------------
// Host: Uniqueness (uniq / non-uniq)
$hosts = array_unique($hosts);
if (isset($sum['uniqhost'])) $sum['uniqhost'] += count($hosts);
if ((! $asap || ! $is_spam) && isset($method['non_uniqhost'])) {
$sum['non_uniqhost'] = $sum['quantity'] - $sum['uniqhost'];
if ($sum['non_uniqhost'] > $method['non_uniqhost']) {
$is_spam['non_uniqhost'] = TRUE;
}
}
// Return if ...
if ($asap && $is_spam) return $progress;
// ----------------------------------------
// URI: Bad host (Separate good/bad hosts from $hosts)
if ((! $asap || ! $is_spam) && isset($method['badhost'])) {
$list = get_blocklist('list');
$blocked = array_merge_leaves(
$blocked,
blocklist_distiller($hosts, array_keys($list), $asap),
FALSE
);
foreach($list as $key=>$type){
if (! $type) unset($blocked[$key]); // Ignore goodhost etc
}
unset($list);
if (! empty($blocked)) $is_spam['badhost'] = TRUE;
}
// Return if ...
//if ($asap && $is_spam) return $progress;
// ----------------------------------------
// End
return $progress;
}
// ---------------------
// Reporting
// Summarize $progress (blocked only)
function summarize_spam_progress($progress = array(), $blockedonly = FALSE)
{
if ($blockedonly) {
$tmp = array_keys($progress['is_spam']);
} else {
$tmp = array();
$method = & $progress['method'];
if (isset($progress['sum'])) {
foreach ($progress['sum'] as $key => $value) {
if (isset($method[$key]) && $value) {
$tmp[] = $key . '(' . $value . ')';
}
}
}
}
return implode(', ', $tmp);
}
function summarize_detail_badhost($progress = array())
{
if (! isset($progress['blocked']) || empty($progress['blocked'])) return '';
// Flat per group
$blocked = array();
foreach($progress['blocked'] as $list => $lvalue) {
foreach($lvalue as $group => $gvalue) {
$flat = implode(', ', array_flat_leaves($gvalue));
if ($flat === $group) {
$blocked[$list][] = $flat;
} else {
$blocked[$list][$group] = $flat;
}
}
}
// Shrink per list
// From: 'A-1' => array('ie.to')
// To: 'A-1' => 'ie.to'
foreach($blocked as $list => $lvalue) {
if (is_array($lvalue) &&
count($lvalue) == 1 &&
is_numeric(key($lvalue))) {
$blocked[$list] = current($lvalue);
}
}
return var_export_shrink($blocked, TRUE, TRUE);
}
function summarize_detail_newtral($progress = array())
{
if (! isset($progress['hosts']) ||
! is_array($progress['hosts']) ||
empty($progress['hosts'])) return '';
// Generate a responsible $trie
$trie = array();
foreach($progress['hosts'] as $value) {
// 'A.foo.bar.example.com'
$resp = whois_responsibility($value); // 'example.com'
if (empty($resp)) {
// One or more test, or do nothing here
$resp = strval($value);
$rest = '';
} else {
$rest = rtrim(substr($value, 0, - strlen($resp)), '.'); // 'A.foo.bar'
}
$trie = array_merge_leaves($trie, array($resp => array($rest => NULL)), FALSE);
}
// Format: var_export_shrink() -like output
$result = array();
ksort_by_domain($trie);
foreach(array_keys($trie) as $key) {
ksort_by_domain($trie[$key]);
if (count($trie[$key]) == 1 && key($trie[$key]) == '') {
// Just one 'responsibility.example.com'
$result[] = ' \'' . $key . '\',';
} else {
// One subdomain-or-host, or several ones
$subs = array();
foreach(array_keys($trie[$key]) as $sub) {
if ($sub == '') {
$subs[] = $key; // 'example.com'
} else {
$subs[] = $sub . '. '; // 'A.foo.bar. '
}
}
$result[] = ' \'' . $key . '\' => \'' . implode(', ', $subs) . '\',';
}
unset($trie[$key]);
}
return
'array (' . "\n" .
implode("\n", $result) . "\n" .
')';
}
// ---------------------
// Exit
// Freeing memories
function spam_dispose()
{
get_blocklist(NULL);
whois_responsibility(NULL);
}
// Common bahavior for blocking
// NOTE: Call this function from various blocking feature, to disgueise the reason 'why blocked'
function spam_exit($mode = '', $data = array())
{
$exit = TRUE;
switch ($mode) {
case '':
echo("\n");
break;
case 'dump':
echo('' . "\n");
echo htmlspecialchars(var_export($data, TRUE));
echo('
' . "\n");
break;
};
if ($exit) exit; // Force exit
}
// ---------------------
// Simple filtering
// TODO: Record them
// Simple/fast spam filter ($target: 'a string' or an array())
function pkwk_spamfilter($action, $page, $target = array('title' => ''), $method = array(), $exitmode = '')
{
$progress = check_uri_spam($target, $method);
if (empty($progress['is_spam'])) {
spam_dispose();
} else {
// TODO: detect encoding from $target for mbstring functions
// $tmp = array();
// foreach(array_keys($target) as $key) {
// $tmp[strings($key, 0, FALSE, TRUE)] = strings($target[$key], 0, FALSE, TRUE); // Removing "\0" etc
// }
// $target = & $tmp;
pkwk_spamnotify($action, $page, $target, $progress, $method);
spam_exit($exitmode, $progress);
}
}
// ---------------------
// PukiWiki original
// Mail to administrator(s)
function pkwk_spamnotify($action, $page, $target = array('title' => ''), $progress = array(), $method = array())
{
global $notify, $notify_subject;
if (! $notify) return;
$asap = isset($method['asap']);
$summary['ACTION'] = 'Blocked by: ' . summarize_spam_progress($progress, TRUE);
if (! $asap) {
$summary['METRICS'] = summarize_spam_progress($progress);
}
$tmp = summarize_detail_badhost($progress);
if ($tmp != '') $summary['DETAIL_BADHOST'] = $tmp;
$tmp = summarize_detail_newtral($progress);
if (! $asap && $tmp != '') $summary['DETAIL_NEUTRAL_HOST'] = $tmp;
$summary['COMMENT'] = $action;
$summary['PAGE'] = '[blocked] ' . (is_pagename($page) ? $page : '');
$summary['URI'] = get_script_uri() . '?' . rawurlencode($page);
$summary['USER_AGENT'] = TRUE;
$summary['REMOTE_ADDR'] = TRUE;
pkwk_mail_notify($notify_subject, var_export($target, TRUE), $summary, TRUE);
}
?>