4 * NP_SpamBayes(JP) ($Revision: 1.4 $)
5 * by hsur ( http://blog.cles.jp/np_cles )
6 * $Id: NP_SpamBayes.php,v 1.4 2007-06-24 05:39:01 hsur Exp $
8 * Copyright (C) 2007 cles All rights reserved.
12 * Based on NP_SpamBayes
13 * by Xiffy. http://xiffy.nl/weblog/
15 * Bayesian filter for comment and trackback spam
18 ***** BEGIN LICENSE BLOCK *****
20 The Initial Developer of the Original Code is
21 Loic d'Anterroches [loic_at_xhtml.net].
22 Portions created by the Initial Developer are Copyright (C) 2003
23 the Initial Developer. All Rights Reserved.
27 PHP Naive Bayesian Filter is free software; you can redistribute it
28 and/or modify it under the terms of the GNU General Public License as
29 published by the Free Software Foundation; either version 2 of
30 the License, or (at your option) any later version.
32 PHP Naive Bayesian Filter is distributed in the hope that it will
33 be useful, but WITHOUT ANY WARRANTY; without even the implied
34 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
35 See the GNU General Public License for more details.
37 You should have received a copy of the GNU General Public License
38 along with Foobar; if not, write to the Free Software
39 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
41 Alternatively, the contents of this file may be used under the terms of
42 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
43 in which case the provisions of the LGPL are applicable instead
46 ***** END LICENSE BLOCK *****
48 ***** Version history *****
49 Version 1.0 : 2006 09 06 Stable on development and fresh installed blog.
50 1.0.1 : 2006 09 11 NAN bug solved, some more information on the screens
51 1.0.2 : 2006 09 15 Logging filtering applied to both ham and spam as well as different logtypes. Handy when
52 a lot of plugins use spambaues as a spam filter.
53 1.0.3 : 2006 09 19 Logging now adherse the plugin option setting (thanks VJ)
54 Added the feature to train all 'new' comments
55 1.0.4 : 2006 09 26 Logging now adherse the plugin option setting also in version 4 of PHP (thanks pepiino)
56 1.0.5 : 2006 10 15 Update probabilities now made obsolete. The function is run after all training sessions.
57 1.1.0 Beta 2007 01 07 Logger functions have been enhanched dramaticly.
58 Items per page is now a user setting.
59 It's possible to scan for keywords inside the content
60 Explain functionality to see how a logged event scores against SpamBayes keywords. Prints both ham and spam results.
61 1.1.0 2007 01 08 Promote to weblog. Comments only. Will teach the document a s Ham and publishes the logged event as a legit comment.
62 Pagecounter could be wrong..
63 ***** End version history *****
65 * based on: many sources:
66 * http://priyadi.net/archives/2005/10/07/wpbayes-naive-bayesian-comment-spam-filter-for-wordpress/
67 * http://www.xhtml.net/php/PHPNaiveBayesianFilter
68 * http://www.opensourcetutorials.com/tutorials/Server-Side-Coding/PHP/implement-bayesian-inference-using-php-1/page11.html
69 * http://weblogtoolscollection.com/archives/2005/02/19/three-strikes-spam-plugin-updated/
70 * http://www-128.ibm.com/developerworks/web/library/wa-bayes1/?ca=dgr-lnxw961Bayesian
73 class NP_SpamBayes extends NucleusPlugin {
75 function NP_SpamBayes() {
77 $this->table_cat = sql_table('plug_sb_cat'); // categories
78 $this->table_wf = sql_table('plug_sb_wf'); // word frequencies
79 $this->table_ref = sql_table('plug_sb_ref'); // references
80 $this->table_log = sql_table('plug_sb_log'); // logging
81 include_once($DIR_PLUGINS."spambayes/spambayes.php");
82 $this->spambayes = new NaiveBayesian(&$this);
85 function getEventList() {
86 return array('QuickMenu', 'SpamCheck');
89 function hasAdminArea() {
93 function event_SpamCheck (&$data) {
95 if( isset($data['spamcheck']['result']) && $data['spamcheck']['result'] == true) return;
97 switch( strtolower($data['spamcheck']['type']) ){
106 // for SpamCheck API 2.0 compatibility
107 if( ! $data['spamcheck']['data'] ){
108 $data['spamcheck']['data'] = $data['spamcheck']['body'] ."\n";
109 $data['spamcheck']['data'] .= $data['spamcheck']['author'] ."\n";
110 $data['spamcheck']['data'] .= $data['spamcheck']['email'] ."\n";
111 $data['spamcheck']['data'] .= $data['spamcheck']['url'] ."\n";
114 $score = $this->spambayes->categorize($data['spamcheck']['data']);
116 if( (float)$score['spam'] > (float)$this->getOption('probability') ) {
117 $log = $data['spamcheck']['type'] > '' ? $data['spamcheck']['type'] ." SpamCheck":"event SpamCheck";
118 $this->spambayes->nbs->logevent(
119 $log.' SPAM detected. score: (ham '.$score['ham'].') (spam: '.$score['spam'].')',
120 $data['spamcheck']['data'],
123 if(isset($data['spamcheck']['return']) && $data['spamcheck']['return'] == true) {
125 $data['spamcheck']['result'] = true;
126 $data['spamcheck']['plugin'] = $this->getName();
127 $data['spamcheck']['message'] = 'Marked as spam by NP_SpamBayes spamScore:'.(float)$score['spam'].' hamScore:'.(float)$score['ham'];
132 } elseif ( trim($data['spamcheck']['data']) != '' ) {
133 $log = $data['spamcheck']['type'] > '' ? $data['spamcheck']['type'] ." SpamCheck":"event SpamCheck";
134 $this->spambayes->nbs->logevent(
135 $log.' HAM detected. score: (ham '.$score['ham'].') (spam: '.$score['spam'].')',
136 $data['spamcheck']['data'],
140 // in case of SpamCheck we do NOT log HAM events ...
143 /* some default functions for a plugin */
144 function getName() { return 'SpamBayes(JP)'; }
145 function getAuthor() { return 'xiffy + hsur'; }
146 function getURL() { return 'http://blog.cles.jp/np_cles/category/31/subcatid/17'; }
147 function getVersion() { return '1.1.0 jp1.4b'; }
148 function getDescription() { return 'SpamBayes filter for comment and trackback spam. In adherence with Spam API 1.0 for Nucleus'; }
149 function supportsFeature($what) {
151 case 'SqlTablePrefix':
158 function event_QuickMenu(&$data) {
159 global $member, $nucleus, $blogid;
160 // only show to admins
161 if (preg_match("/MD$/", $nucleus['version'])) {
162 $isblogadmin = $member->isBlogAdmin(-1);
164 $isblogadmin = $member->isBlogAdmin($blogid);
166 if (!($member->isLoggedIn() && ($member->isAdmin() | $isblogadmin))) return;
167 if ($this->getOption('enableQuickmenu') == 'yes' ) {
171 'title' => 'SpamBayes',
172 'url' => $this->getAdminURL(),
173 'tooltip' => 'Manage SpamBayes filter'
180 // create some options
181 $this->createOption('probability','Score at which point we sould consider a text as spam?','text','0.95');
182 $this->createOption('ignorelist','Which words should not be taken into consideration?','textarea','you the for and');
183 $this->createOption('enableTrainall','Show SpamBayes train all ham in menu?','yesno','no');
184 $this->createOption('enableQuickmenu','Show SpamBayes in quickmenu?','yesno','yes');
185 $this->createOption('enableLogging','Use SpamBayes action logging? (this could slow down during a spamrun and can cost huge amounts of db space!)','yesno','no');
187 $this->createOption('appid','Yahoo!Japan AppID','text','');
188 $this->createOption('DropTable','Clear the database when uninstalling','yesno','no');
190 // create some sql tables as well
191 sql_query("CREATE TABLE IF NOT EXISTS ".$this->table_cat." (catcode varchar(50) NOT NULL default '', probability double NOT NULL default '0', wordcount bigint(20) NOT NULL default '0', PRIMARY KEY (catcode))");
192 sql_query("CREATE TABLE IF NOT EXISTS ".$this->table_wf." (word varchar(250) NOT NULL default '', catcode varchar(50) NOT NULL default '', wordcount bigint(20) NOT NULL default '0', PRIMARY KEY (word, catcode))");
193 sql_query("CREATE TABLE IF NOT EXISTS ".$this->table_ref." (ref bigint(20) NOT NULL, catcode varchar(250) NOT NULL default '', content text NOT NULL default '', PRIMARY KEY (ref), KEY(catcode))");
194 sql_query("CREATE TABLE IF NOT EXISTS ".$this->table_log." (id bigint(20) NOT NULL auto_increment, log varchar(250) NOT NULL default '', content text NOT NULL default '', catcode varchar(250) NOT NULL default '', logtime timestamp, PRIMARY KEY (id), KEY(catcode))");
195 // create 'ham' and 'spam' categories
196 sql_query("insert into ".$this->table_cat." (catcode) values ('ham')");
197 sql_query("insert into ".$this->table_cat." (catcode) values ('spam')");
200 function unInstall() {
201 if ($this->getOption('DropTable') == 'yes') {
202 sql_query('drop table if exists '.$this->table_cat);
203 sql_query('drop table if exists '.$this->table_ref);
204 sql_query('drop table if exists '.$this->table_wf);
205 sql_query('drop table if exists '.$this->table_log);