<?php
-// $Id: spam_pickup.php,v 1.59 2007/09/13 13:02:53 henoheno Exp $
+// $Id: spam_pickup.php,v 1.60 2007/09/15 15:55:29 henoheno Exp $
// Copyright (C) 2006-2007 PukiWiki Developers Team
// License: GPL v2 or (at your option) any later version
//
// ---------------------
// Spam-uri pickup
-// Preprocess: Removing uninterest part for URI detection
+// Preprocess: Removing/Modifying uninterest part for URI detection
function spam_uri_removing_hocus_pocus($binary = '', $method = array())
{
+ $from = $to = array();
+
+ // Remove sequential spaces and too short lines
$length = 4 ; // 'http'(1) and '://'(2) and 'fqdn'(1)
if (is_array($method)) {
// '<a'(2) or 'href='(5) or '>'(1) or '</a>'(4)
isset($method['area_bbcode']) || isset($method['uri_bbcode']))
$length = 1; // Seems not effective
}
-
- // Removing sequential spaces and too short lines
$binary = strings($binary, $length, TRUE, FALSE); // Multibyte NOT needed
+ // Remove/Replace quoted-spaces within tags
+ $from[] = '#(<\w+ [^<>]*?\w ?= ?")([^"<>]*? [^"<>]*)("[^<>]*?>)#ie';
+ $to[] = "'$1' . str_replace(' ' , '%20' , trim('$2')) . '$3'";
+
// Remove words (has no '<>[]:') between spaces
- $binary = preg_replace('/[ \t][\w.,()\ \t]+[ \t]/', ' ', $binary);
+ $from[] = '/[ \t][\w.,()\ \t]+[ \t]/';
+ $to[] = ' ';
- return $binary;
+ return preg_replace($from, $to, $binary);
}
// Preprocess: Domain exposure callback (See spam_uri_pickup_preprocess())
);
$string = spam_uri_removing_hocus_pocus($string, $method);
- //var_dump(htmlspecialchars($string));
// Domain exposure (simple)
// http://victim.example.org/nasty.example.org/path#frag