OSDN Git Service

*** empty log message ***
authorhsur <hsur@1ca29b6e-896d-4ea0-84a5-967f57386b96>
Fri, 22 Jun 2007 16:40:20 +0000 (16:40 +0000)
committerhsur <hsur@1ca29b6e-896d-4ea0-84a5-967f57386b96>
Fri, 22 Jun 2007 16:40:20 +0000 (16:40 +0000)
git-svn-id: https://svn.sourceforge.jp/svnroot/nucleus-jp/plugin@576 1ca29b6e-896d-4ea0-84a5-967f57386b96

trunk/NP_SpamBayes/NP_SpamBayes.php
trunk/NP_SpamBayes/spambayes/index.php
trunk/NP_SpamBayes/spambayes/spambayes.php

index d401056..4f41a04 100644 (file)
@@ -1,9 +1,9 @@
 <?php
 
 /**
-  * NP_SpamBayes(JP) ($Revision: 1.1 $)
+  * NP_SpamBayes(JP) ($Revision: 1.2 $)
   * by hsur ( http://blog.cles.jp/np_cles )
-  * $Id: NP_SpamBayes.php,v 1.1 2007-06-20 16:25:46 hsur Exp $
+  * $Id: NP_SpamBayes.php,v 1.2 2007-06-22 16:40:20 hsur Exp $
   *
   * Copyright (C) 2007 cles All rights reserved.
 */
@@ -143,8 +143,8 @@ class NP_SpamBayes extends NucleusPlugin {
        /* some default functions for a plugin */
        function getName()                { return 'SpamBayes(JP)'; }
        function getAuthor()      { return 'xiffy + hsur'; }
-       function getURL()                 { return 'http://blog.cles.jp/np_cles/'; }
-       function getVersion()     { return '1.1.0 jp1.0b'; }
+       function getURL()                 { return 'http://blog.cles.jp/np_cles/category/31/subcatid/17'; }
+       function getVersion()     { return '1.1.0 jp1.2b'; }
        function getDescription() { return 'SpamBayes filter for comment and trackback spam. In adherence with Spam API 1.0 for Nucleus';       }
        function supportsFeature($what) {
                switch($what) {
index 19a07e0..ed31486 100644 (file)
        
        function sb_batch() {
                global $oPluginAdmin;
-               $logids = requestIntArray(batch);
+               $logids = requestIntArray('batch');
                $action = requestVar('batchaction');
-               //debug: print_r ($logids);
+               //debug: var_dump($logids);
                if ($logids) foreach ($logids as $id) {
                        switch ($action) {
                                case 'tspam':
                }
                if ($trainall == 'yes') {
                        echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainall"),ENT_QUOTES)."\">Train HAM (not spam) with all comments<span>Use this to train the Spam Bayesian filter with all your comments as 'ham' (not spam). This can take a while but you don't have to do anything. Just sit back and relax. Once you've run this option it's save to remove it from the menu. (See options)</span></a></li>\n";
-                       //cles::blog echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainblocked"),ENT_QUOTES)."\">Train spam with all blocked comments</a></li>\n";
+                       //echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainblocked"),ENT_QUOTES)."\">Train spam with all blocked comments</a></li>\n";
                        echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=traintb"),ENT_QUOTES)."\">Train ham with all trackbacks.</a></li>\n";
                        echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainspamtb"),ENT_QUOTES)."\">Train spam with all blocked trackbacks.</a></li>\n";
                        echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=untrainall"),ENT_QUOTES)."\">Remove all comments from the HAM (not spam).<span>Use this to untrain the Spam Bayesian filter. This can take a while but you don't have to do anything. Just sit back and relax. Use only if you think earlier training went wrong.</span></a></li>\n";
                }
                echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainnew"),ENT_QUOTES)."\">Train HAM (not spam) with all NEW comments<span>Use this to train the Spam Bayesian filter with all your yet untrained comments as 'ham' (not spam). This can take a while but you don't have to do anything. Just sit back and relax. You can use this option as much as you like. Only untrained comments will be added.</span></a></li>\n";
-               //cles::blog echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainblockednew"),ENT_QUOTES)."\">Train spam with all NEW blocked comments</a></li>\n";
+               //echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainblockednew"),ENT_QUOTES)."\">Train spam with all NEW blocked comments</a></li>\n";
                echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=traintbnew"),ENT_QUOTES)."\">Train HAM (not spam) with all NEW trackbacks.</a></li>\n";
                echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=trainspamtbnew"),ENT_QUOTES)."\">Train spam with all NEW blocked trackbacks.</a></li>\n";
                //echo "<li><a href=\"".htmlspecialchars($manager->addTicketToUrl(serverVar('PHP_SELF')."?page=update"),ENT_QUOTES)."\">Update probabilities<span>After some training, you must use this to finalise</span></a></li>\n";
                echo '</span></td></tr>';
                echo $pager;
                $extraaction = '&filter='.$filter.'&filtertype='.urlencode($filtertype).'&startpos='.$startpos.'&keyword='.$keyword.'&ipp='.$ipp.'&ticket='.$ticket;
-               echo '<tr><th>Date</th><th>event</th><th>content</th><th>action</th></tr><form>';
+               echo '<tr><th>Date</th><th>event</th><th>content</th><th>action</th></tr><form method="post"><input type="hidden" name="ticket" value="'.$ticket.'" />';
                $i = 0;
                while ($arr = mysql_fetch_array($res)) {
                        echo '<tr onmouseover="focusRow(this);" onmouseout="blurRow(this);"><td>'.$arr['logtime'].'<br /><b>'.$arr['catcode'].'</b></td><td>'.$arr['log'].'</td><td><input id="batch'.$i.'" name="batch['.$i.']" value="'.$arr['id'].'" type="checkbox"><label for="batch'.$i.'">'.htmlspecialchars(str_replace('^^', ' ',$arr['content']),ENT_QUOTES).'</label></td>';
                echo '<option value="tspam">Train spam</option>';
                echo '<option value="tham">Train ham</option>';
                echo '<option value="delete">Delete</option></select><input name="page" value="batch" type="hidden">';
-               echo '<input type="hidden" name="ipp" value="'.$ipp.'"/><input type="hidden" name="filter" value="'.$filter.'" /><input type="hidden" name="filtertype" value="'.$filtertype.'" /><input type="hidden" name="keyword" value="'.$keyword.'" /><input type="hidden" name="ticket" value="'.$ticket.'" />';
-               echo '<input value="Uitvoeren" type="submit">(
+               echo '<input type="hidden" name="ipp" value="'.$ipp.'"/><input type="hidden" name="filter" value="'.$filter.'" /><input type="hidden" name="filtertype" value="'.$filtertype.'" /><input type="hidden" name="keyword" value="'.$keyword.'" />';
+               echo '<input value="Execute" type="submit">(
                                 <a href="" onclick="if (event && event.preventDefault) event.preventDefault(); return batchSelectAll(1); ">select all</a> -
                                 <a href="" onclick="if (event && event.preventDefault) event.preventDefault(); return batchSelectAll(0); ">deselect all</a>
                                )
index f608643..7f31a70 100644 (file)
@@ -260,7 +260,9 @@ class NaiveBayesian {
                //$string = $this->_cleanString($string);
                if (count(0 >= $this->ignore_list))
                $this->ignore_list = $this->getIgnoreList();
-                       
+
+               $string = strip_tags($string);
+
                if( defined('NP_SPAMBAYES_TOKENIZER') && function_exists(proc_open) ) {
                        // using mecab
 
@@ -288,7 +290,9 @@ class NaiveBayesian {
                        }
                } else {
                        // using Yahoo!API
-                               
+                       if( _CHARSET != 'UTF-8' )
+                               $string = mb_convert_encoding($string, 'UTF-8', _CHARSET);
+                       
                        $postData['appid'] = $this->parent->getOption('appid');
                        $postData['results'] = 'ma';
                        $postData['filter'] = '1|2|3|4|5|7|8|9|10';
@@ -300,6 +304,12 @@ class NaiveBayesian {
                        $p = new NP_SpamBayes_XMLParser();
                        $rawtokens = $p->parse($data);
                        $p->free();
+                       
+                       if( _CHARSET != 'UTF-8' ){
+                               foreach( $rawtokens as $index => $word ){
+                                       $rawtokens[$index] = mb_convert_encoding($word, _CHARSET, 'UTF-8');
+                               }
+                       }
                }
 
                // remove some tokens
@@ -352,6 +362,12 @@ class NaiveBayesian {
                        $request .= "\r\n";
                }
 
+               /* debug
+               $test = fopen("/tmp/postdata.dat","wb");
+               fwrite($test, $request);
+               fclose($test);
+               */
+
                $fp = fsockopen($URL['host'], $URL['port'], $errno, $errstr, 20);
 
                if ($fp) {