OSDN Git Service

*** empty log message ***
[nucleus-jp/nucleus-plugins.git] / trunk / NP_SpamBayes / spambayes / spambayes.php
1 <?php
2
3 /**
4   * Modified by hsur ( http://blog.cles.jp/np_cles )
5   * $Id: spambayes.php,v 1.5 2007-06-25 11:47:30 hsur Exp $
6
7     ***** BEGIN LICENSE BLOCK *****
8         This file is part of PHP Naive Bayesian Filter.
9         The Initial Developer of the Original Code is
10         Loic d'Anterroches [loic_at_xhtml.net].
11         Portions created by the Initial Developer are Copyright (C) 2003
12         the Initial Developer. All Rights Reserved.
13
14         PHP Naive Bayesian Filter is free software; you can redistribute it
15         and/or modify it under the terms of the GNU General Public License as
16         published by the Free Software Foundation; either version 2 of
17         the License, or (at your option) any later version.
18
19         PHP Naive Bayesian Filter is distributed in the hope that it will
20         be useful, but WITHOUT ANY WARRANTY; without even the implied
21         warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
22         See the GNU General Public License for more details.
23
24         You should have received a copy of the GNU General Public License
25         along with Foobar; if not, write to the Free Software
26         Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
27
28         Alternatively, the contents of this file may be used under the terms of
29         the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
30         in which case the provisions of the LGPL are applicable instead
31         of those above.
32 ***** END LICENSE BLOCK ******/
33
34 //define('NP_SPAMBAYES_TOKENIZER', '/usr/local/bin/mecab -F "%h\t%m\t%f[6]\n" -E ""');
35 define('NP_SPAMBAYES_APIURL', 'http://api.jlp.yahoo.co.jp/MAService/V1/parse');
36
37 class NaiveBayesian {
38         /** min token length for it to be taken into consideration */
39         var $min_token_length = 2;
40         /** max token length for it to be taken into consideration */
41         var $max_token_length = 40;
42         /** list of token to ignore @see getIgnoreList() */
43         var $ignore_list = array();
44
45         var $nbs = null;
46
47         function NaiveBayesian(&$parent) {
48                 $this->nbs = new NaiveBayesianStorage(&$parent);
49                 $this->parent = &$parent;
50                 
51                 $this->appid = $this->parent->getOption('appid');
52                 return true;
53         }
54
55         /** categorize a document.
56                 Get list of categories in which the document can be categorized
57                 with a score for each category.
58
59                 @return array keys = category ids, values = scores
60                 @param string document
61                 */
62         function categorize($document) {
63                 $scores = array();
64                 $categories = $this->nbs->getCategories();
65
66                 $tokens = $this->_getTokens($document);
67                 // calculate the score in each category
68                 $total_words = 0;
69                 $ncat = 0;
70                 while (list($category, $data) = each($categories)) {
71                         $total_words += $data['wordcount'];
72                         $ncat++;
73                 }
74                 reset($categories);
75                 while (list($category, $data) = each($categories)) {
76                         $scores[$category] = $data['probability'];
77                         //debug: print_r($scores);
78                         // small probability for a word not in the category
79                         // maybe putting 1.0 as a 'no effect' word can also be good
80                         $small_proba = 1.0 / ($data['wordcount'] * 2);
81                         reset($tokens);
82                         while (list($token, $count) = each($tokens)) {
83                                 //debug: echo "<br/>$token; $count ";
84                                 if ($this->nbs->wordExists($token)) {
85                                         //debug: echo "$category = known $small_proba wordcount: ";
86                                         $word = $this->nbs->getWord($token, $category);
87                                         //debug: echo $word['wordcount'];
88                                         if ($word['wordcount']) $proba = $word['wordcount']/$data['wordcount'];
89                                         else $proba = $small_proba;
90                                         $newval = $scores[$category] * pow($proba, $count)*pow($total_words/$ncat, $count);
91                                         if (is_finite($newval)) {
92                                                 $scores[$category] = $newval;
93                                         }
94                                 }
95                         }
96                 } // while (list () )
97                 return $this->_rescale($scores);
98         } // function categorize
99
100
101         function explain($content) {
102                 $categories = $this->nbs->getCategories(); // ham, spam
103                 $scores = array();
104                 $tokens = $this->_getTokens($content);
105                 // calculate the score in each category
106                 $total_words = 0;
107                 $ncat = 0;
108                 while (list($category, $data) = each($categories)) {
109                         $total_words += $data['wordcount'];
110                         $ncat++;
111                 }
112                 reset($categories);
113                 $result = array();
114                 while (list($category, $data) = each($categories)) {
115                         $scores[$category] = $data['probability'];
116                         //debug: echo $category.'<br />';
117                         $small_proba = 1.0 / ($data['wordcount'] * 2);
118                         reset($tokens);
119                         //print_r ($tokens);
120                         while (list($token, $count) = each($tokens)) {
121                                 //debug:
122                                 //echo "<br/>$token; $count ";
123                                 if ($this->nbs->wordExists($token)) {
124                                         $word = $this->nbs->getWord($token, $category);
125                                         $result[$word['word']][$category] = $word['wordcount'];
126                                         //print_r($word);
127                                         //echo "<br />\n";
128                                         if ($word['wordcount']) $proba = $word['wordcount']/$data['wordcount'];
129                                         else $proba = $small_proba;
130                                         $newval = $scores[$category] * pow($proba, $count)*pow($total_words/$ncat, $count);
131                                         if (is_finite($newval)) {
132                                                 $scores[$category] = $newval;
133                                         }
134                                 }
135                         }
136                 }
137                 $scores = $this->_rescale($scores);
138                 array_multisort($result, SORT_DESC);
139
140                 echo '<table>';
141                 echo '<tr><th>word</th><th>Ham</th><th>Spam</th></tr>';
142                 foreach($result as $key => $value) {
143                         echo '<tr>';
144                         echo '<td>'.$key.'</td>';
145                         echo '<td>'.$value['ham'].'</td>';
146                         echo '<td>'.$value['spam'].'</td>';
147                         echo '</tr>';
148                 }
149                 echo '<tr><td>Rescaled probability:</td><th>'.$scores['ham'].'</th><th>'.$scores['spam'].'</th></tr>';
150                 echo '</table>';
151                 //debug: print_r ($scores);
152         }
153
154         /** training against a document.
155                 Set a document as being in a specific category. The document becomes a reference
156                 and is saved in the table of references. After a set of training is done
157                 the updateProbabilities() function must be run.
158
159                 @see updateProbabilities()
160                 @see untrain()
161                 @return bool success
162                 @param string document id, must be unique
163                 @param string category_id the category id in which the document should be
164                 @param string content of the document
165                 */
166         function train($doc_id, $category_id, $content) {
167                 $tokens = $this->_getTokens($content);
168                 //debug: print_r($tokens);
169                 while (list($token, $count) = each($tokens)) {
170                         $this->nbs->updateWord($token, $count, $category_id);
171                 }
172                 $this->nbs->saveReference($doc_id, $category_id, $content);
173                 return true;
174         } // function train
175
176         function trainnew($doc_id, $category_id, $content) {
177                 $reference = $this->nbs->getReference($doc_id);
178                 if (!$reference) {
179                         $this->train($doc_id, $category_id, $content);
180                 }
181         }
182
183         /** untraining of a document.
184                 To remove just one document from the references.
185
186                 @see updateProbabilities()
187                 @see untrain()
188                 @return bool success
189                 @param string document id, must be unique
190                 */
191
192         function untrain($doc_id) {
193                 $ref = $this->nbs->getReference($doc_id);
194                 $tokens = $this->_getTokens($ref['content']);
195                 while (list($token, $count) = each($tokens)) {
196                         $this->nbs->removeWord($token, $count, $ref['catcode']);
197                 }
198                 $this->nbs->removeReference($doc_id);
199                 return true;
200         } // function untrain
201
202         /** rescale the results between 0 and 1.
203         @author Ken Williams, ken@mathforum.org
204         @see categorize()
205         @return array normalized scores (keys => category, values => scores)
206         @param array scores (keys => category, values => scores)
207         */
208
209         function _rescale($scores) {
210                 // Scale everything back to a reasonable area in
211                 // logspace (near zero), un-loggify, and normalize
212                 $total = 0.0;
213                 $max   = 0.0;
214                 reset($scores);
215                 while (list($cat, $score) = each($scores)) {
216                         if ($score >= $max) $max = $score;
217                 }
218                 reset($scores);
219                 while (list($cat, $score) = each($scores)) {
220                         $scores[$cat] = (float) exp($score - $max);
221                         $total += (float) pow($scores[$cat],2);
222                 }
223                 $total = (float) sqrt($total);
224                 reset($scores);
225                 while (list($cat, $score) = each($scores)) {
226                         $scores[$cat] = (float) $scores[$cat]/$total;
227                 }
228                 reset($scores);
229                 return $scores;
230         } // function _rescale
231
232         /** update the probabilities of the categories and word count.
233                 This function must be run after a set of training
234
235                 @see train()
236                 @see untrain()
237                 @return bool sucess
238                 */
239         function updateProbabilities() {
240                 // this function is really only database manipulation
241                 // that is why all is done in the NaiveBayesianStorage
242                 return $this->nbs->updateProbabilities();
243         } // function updateProbabilities
244
245         /** Get the list of token to ignore.
246         @return array ignore list
247         */
248
249         function getIgnoreList() {
250                 $ignore = $this->parent->getOption('ignorelist');
251                 $arr = explode(',',$ignore);
252                 $ignore = implode(' ',$arr);
253                 $arr = explode(' ',$ignore);
254                 return $arr;
255         }
256
257         /** get the tokens from a string
258         @author James Seng. [http://james.seng.cc/] (based on his perl version)
259
260         @return array tokens
261         @param  string the string to get the tokens from
262         */
263
264         function _getTokens($string)  {
265                 $rawtokens = array();
266                 $tokens    = array();
267                 
268                 if (count(0 >= $this->ignore_list))
269                 $this->ignore_list = $this->getIgnoreList();
270
271                 $string = strip_tags($string);
272
273                 if( defined('NP_SPAMBAYES_APIURL') && $this->appid ){
274                         // using Yahoo!API
275                         if( _CHARSET != 'UTF-8' )
276                                 $string = mb_convert_encoding($string, 'UTF-8', _CHARSET);
277                         
278                         $postData['appid'] = $this->appid;
279                         $postData['results'] = 'ma';
280                         $postData['filter'] = '1|2|3|4|5|7|8|9|10';
281                         $postData['response'] = 'baseform';
282                         $postData['sentence'] = $string;
283                                 
284                         $data = $this->_http(NP_SPAMBAYES_APIURL, 'POST', '', $postData);
285                         if( $data ){
286                                 $p = new NP_SpamBayes_XMLParser();
287                                 $rawtokens = $p->parse($data);
288                                 
289                                 if( _CHARSET != 'UTF-8' ){
290                                         if( is_array($rawtokens) ) foreach( $rawtokens as $index => $word ){
291                                                 $rawtokens[$index] = mb_convert_encoding($word, _CHARSET, 'UTF-8');
292                                         }
293                                 }
294                                 
295                                 if( $p->isError ){
296                                         ACTIONLOG :: add(WARNING, 'NP_SpamBayes: Y!API Error( '. (isset($rawtokens[0]) ? $rawtokens[0] : 'Unknown Error') . ' )');
297                                         $rawtokens = array();
298                                 }
299                                 
300                                 $p->free();
301                         }
302                 } else if( defined('NP_SPAMBAYES_TOKENIZER') && function_exists(proc_open) ) {
303                         // using mecab
304                         $string = preg_replace('/\r|\n/', '', $string);
305                         $string = strtr($string, array_flip(get_html_translation_table(HTML_SPECIALCHARS)));
306                         $string = strip_tags($string);
307                         $dspec = array(
308                         0 => array("pipe", "r"),
309                         1 => array("pipe", "w"),
310                         2 => array("file", "/dev/null", "w")
311                         );
312                         $process = proc_open(NP_SPAMBAYES_TOKENIZER, $dspec, $pipes);
313                         if(is_resource($process)) {
314                                 stream_set_blocking($pipes[0], FALSE);
315                                 stream_set_blocking($pipes[1], FALSE);
316                                 fwrite($pipes[0], $string . "\n");
317                                 fclose($pipes[0]);
318                                 while(!feof($pipes[1])) {
319                                         list($id, $origStr, $regStr) = explode("\t", trim(fgets($pipes[1], 32768)), 3);
320                                         if(  ( 31 <= $id && $id <= 67 ) || ( 10 <= $id && $id <= 12 ) )
321                                         $rawtokens[] = trim($regStr ? $regStr : $origStr);
322                                 }
323                                 fclose($pipes[1]);
324                                 proc_close($process);
325                         }
326                 } else {
327                         // original
328                         $string = $this->_cleanString($string);
329                         $rawtokens = preg_split('/[\W]+/', $string);
330                 }
331
332                 // remove some tokens
333                 if( is_array($rawtokens) ) foreach($rawtokens as $token) {
334                         if (!(('' == $token)                             ||
335                         (mb_strlen($token) < $this->min_token_length) ||
336                         (mb_strlen($token) > $this->max_token_length) ||
337                         (preg_match('/^[0-9]+$/', $token))         ||
338                         (preg_match('/['.preg_quote('"\':;/\_[](){}!#%&$=+*|~?<>,.-','/').']+/', $token)) ||
339                         (in_array($token, $this->ignore_list))
340                         ))
341                         $tokens[$token]++;
342                 } // foreach
343                 return $tokens;
344         } // function _getTokens
345
346         function _http($url, $method = "GET", $headers = "", $post = array ("")) {
347                 $URL = parse_url($url);
348
349                 if (isset ($URL['query'])) {
350                         $URL['query'] = "?".$URL['query'];
351                 } else {
352                         $URL['query'] = "";
353                 }
354
355                 if (!isset ($URL['port']))
356                 $URL['port'] = 80;
357
358                 $request = $method." ".$URL['path'].$URL['query']." HTTP/1.0\r\n";
359
360                 $request .= "Host: ".$URL['host']."\r\n";
361                 $request .= "User-Agent: NP_SpamBayes\r\n";
362
363                 if (isset ($URL['user']) && isset ($URL['pass'])) {
364                         $request .= "Authorization: Basic ".base64_encode($URL['user'].":".$URL['pass'])."\r\n";
365                 }
366
367                 $request .= $headers;
368
369                 if (strtoupper($method) == "POST") {
370                         while (list ($name, $value) = each($post)) {
371                                 $POST[] = $name."=".urlencode($value);
372                         }
373                         $postdata = implode("&", $POST);
374                         $request .= "Content-Type: application/x-www-form-urlencoded\r\n";
375                         $request .= "Content-Length: ".strlen($postdata)."\r\n";
376                         $request .= "\r\n";
377                         $request .= $postdata;
378                 } else {
379                         $request .= "\r\n";
380                 }
381
382                 /* debug
383                 $test = fopen("/tmp/postdata.dat","wb");
384                 fwrite($test, $request);
385                 fclose($test);
386                 */
387
388                 $fp = fsockopen($URL['host'], $URL['port'], $errno, $errstr, 20);
389
390                 if ($fp) {
391                         socket_set_timeout($fp, 20);
392                         fputs($fp, $request);
393                         $response = "";
394                         while (!feof($fp)) {
395                                 $response .= fgets($fp, 4096);
396                         }
397                         fclose($fp);
398                         $DATA = split("\r\n\r\n", $response, 2);
399                         return $DATA[1];
400                 } else {
401                         $host = $URL['host'];
402                         $port = $URL['port'];
403                         ACTIONLOG :: add(WARNING, 'NP_SpamBayes: HTTP Error: '."[$errno]($host:$port) $errstr");
404                         return null;
405                 }
406         }
407
408         /** clean a string from the diacritics
409         @author Antoine Bajolet [phpdig_at_toiletoine.net]
410         @author SPIP [http://uzine.net/spip/]
411
412         @return string clean string
413         @param  string string with accents
414         */
415
416         function _cleanString($string)  {
417                 $diac =
418                 /* A */   chr(192).chr(193).chr(194).chr(195).chr(196).chr(197).
419                 /* a */   chr(224).chr(225).chr(226).chr(227).chr(228).chr(229).
420                 /* O */   chr(210).chr(211).chr(212).chr(213).chr(214).chr(216).
421                 /* o */   chr(242).chr(243).chr(244).chr(245).chr(246).chr(248).
422                 /* E */   chr(200).chr(201).chr(202).chr(203).
423                 /* e */   chr(232).chr(233).chr(234).chr(235).
424                 /* Cc */  chr(199).chr(231).
425                 /* I */   chr(204).chr(205).chr(206).chr(207).
426                 /* i */   chr(236).chr(237).chr(238).chr(239).
427                 /* U */   chr(217).chr(218).chr(219).chr(220).
428                 /* u */   chr(249).chr(250).chr(251).chr(252).
429                 /* yNn */ chr(255).chr(209).chr(241);
430                 return strtolower(strtr($string, $diac, 'AAAAAAaaaaaaOOOOOOooooooEEEEeeeeCcIIIIiiiiUUUUuuuuyNn'));
431         }
432 } // class NaiveBaysian
433
434 class NP_SpamBayes_XMLParser {
435         function NP_SpamBayes_XMLParser(){
436                 $this->parser = xml_parser_create();
437                 xml_set_object($this->parser, $this);
438                 xml_set_element_handler($this->parser, "_open", "_close");
439                 xml_set_character_data_handler($this->parser, "_cdata");
440
441                 $this->target = null ;
442                 $this->inTarget = false;
443         }
444
445         function parse($data){
446                 $this->words = array();
447                 xml_parse($this->parser, $data);
448                 return $this->words;
449         }
450
451         function free(){
452                 xml_parser_free($this->parser);
453                 $this->words = null;
454         }
455
456         function _open($parser, $name, $attribute){
457                 switch( $name ){
458                         case 'BASEFORM':
459                                 $this->inTarget = 'BASEFORM';
460                                 break;
461                         case 'MESSAGE':
462                                 $this->inTarget = 'MESSAGE';
463                                 break;
464                         case 'ERROR':
465                                 $this->isError = true;
466                                 break;
467                 }
468         }
469
470         function _close($parser, $name){
471                 if( $name == $this->target ) $this->inTarget = null;
472         }
473
474         function _cdata($parser, $data){
475                 if( $this->inTarget ){
476                         $this->words[] = trim($data);
477                 }
478         }
479 }
480
481 /** Access to the storage of the data for the filter.
482
483 To avoid dependency with respect to any database, this class handle all the
484 access to the data storage. You can provide your own class as long as
485 all the methods are available. The current one rely on a MySQL database.
486
487 methods:
488 - array getCategories()
489 - bool  wordExists(string $word)
490 - array getWord(string $word, string $categoryid)
491
492 */
493 class NaiveBayesianStorage {
494         function NaiveBayesianStorage(&$plugin) {
495                 $this->table_cat = sql_table('plug_sb_cat'); // categories
496                 $this->table_wf  = sql_table('plug_sb_wf');  // word frequencies
497                 $this->table_ref = sql_table('plug_sb_ref'); // references
498                 $this->table_log = sql_table('plug_sb_log'); // logging
499                 $this->plugin = &$plugin;
500         }
501         /** get the list of categories with basic data.
502         @return array key = category ids, values = array(keys = 'probability', 'word_count')
503         */
504         function getCategories() {
505                 $categories = array();
506
507                 $rs = sql_query('SELECT * FROM '.$this->table_cat);
508
509                 if ($rs) {
510                         while ($row = mysql_fetch_array($rs)) {
511                                 $categories[$row['catcode']] = array('probability' => $row['probability'], 'wordcount'  => $row['wordcount'] );
512                         }
513                 } else {
514                         $categories[0] = 'No categories found';
515                 }
516                 return $categories;
517         } // getCategories
518
519         /** see if the word is an already learnt word.
520         @return bool
521         @param string word
522         */
523         function wordExists($word)  {
524                 $rs = sql_query("SELECT count(*) as amount FROM ".$this->table_wf." WHERE word='". mysql_real_escape_string($word)."'");
525                 $obj = mysql_fetch_object($rs);
526                 if ($obj->amount == 0) return false;
527                 else return true;
528         } // wordExists
529
530         /** get details of a word in a category.
531         @return array ('count' => count)
532         @param  string word
533         @param  string category id
534         */
535         function getWord($word, $catcode){
536                 $details = array();
537                 $rs = sql_query("SELECT * FROM ".$this->table_wf." WHERE word='".mysql_real_escape_string($word)."' AND catcode='".mysql_real_escape_string($catcode)."'");
538                 $obj = mysql_fetch_object($rs);
539                 if ($obj) {
540                         $details['wordcount'] = $obj->wordcount;
541                         $details['catcode']   = $obj->catcode;
542                         $details['word']      = $obj->word;
543                 } else {
544                         $details['wordcount'] = 0;
545                         $details['catcode']   = $catcode;
546                         $details['word']      = $word;
547                 }
548                 return $details;
549         } // getWord
550
551         /** update a word in a category.
552                 If the word is new in this category it is added, else only the count is updated.
553                 @return bool success
554                 @param string word
555                 @param int    count
556                 @paran string category id
557                 */
558
559         function updateWord($word, $wordcount, $catcode) {
560                 $oldword = $this->getWord($word, $catcode);
561                 if (0 == $oldword['wordcount']) {
562                         return sql_query("INSERT INTO ".$this->table_wf." (word, catcode, wordcount) VALUES ('".mysql_real_escape_string($word)."','".mysql_real_escape_string($catcode)."','".mysql_real_escape_string((int)$wordcount)."')");
563                 } else {
564                         return sql_query("UPDATE ".$this->table_wf." SET wordcount = wordcount +".(int)$wordcount." WHERE catcode = '".mysql_real_escape_string($catcode)."' AND word = '".mysql_real_escape_string($word)."'");
565                 }
566         } // function updateWord
567
568         /** remove a word from a category.
569         @return bool success
570         @param string word
571         @param int  count
572         @param string category id
573         */
574
575         function removeWord($word, $wordcount, $catcode) {
576                 $oldword = $this->getWord($word, $catcode);
577                 if (0 != $oldword['wordcount'] && 0 >= ($oldword['wordcount']-$wordcount)) {
578                         return sql_query("DELETE FROM ".$this->table_wf." WHERE word='".mysql_real_escape_string($word)."' AND catcode ='".mysql_real_escape_string($catcode)."'");
579                 } else {
580                         return sql_query("UPDATE ".$this->table_wf." SET wordcount = wordcount - ".(int)$wordcount." WHERE catcode = '".mysql_real_escape_string($catcode)."' AND word = '".mysql_real_escape_string($word)."'");
581                 }
582         } // function removeWord
583
584         /** update the probabilities of the categories and word count.
585                 This function must be run after a set of training
586                 @return bool sucess
587                 */
588         function updateProbabilities() {
589                 // first update the word count of each category
590                 $rs = sql_query("SELECT catcode, SUM(wordcount) AS total FROM ".$this->table_wf." WHERE 1 GROUP BY catcode");
591                 $total_words = 0;
592                 while ($obj = mysql_fetch_object($rs)) {
593                         $total_words += $obj->total;
594                 }
595
596                 if ($total_words == 0) {
597                         sql_query("UPDATE ".$this->table_cat." SET wordcount = 0, probability = 0 WHERE 1");
598                 } else {
599                         $rs = sql_query("SELECT catcode, SUM(wordcount) AS total FROM ".$this->table_wf." WHERE 1 GROUP BY catcode");
600                         while ($obj = mysql_fetch_object($rs)) {
601                                 $proba = $obj->total / $total_words;
602                                 sql_query("UPDATE ".$this->table_cat." SET wordcount=".(int)$obj->total.", probability=".$proba." WHERE catcode = '".$obj->catcode."'");
603                         }
604                 }
605                 return true;
606         } // updateProbabilities
607
608         /** save a reference in the database.
609         @return bool success
610         @param  string reference if, must be unique
611         @param  string category id
612         @param  string content of the reference
613         */
614         function saveReference($ref, $catcode, $content) {
615                 return sql_query("INSERT INTO ".$this->table_ref." (ref, catcode, content) VALUES (".intval($ref).", '".mysql_real_escape_string($catcode)."','".mysql_real_escape_string($content)."')");
616         } // function saveReference
617
618         /** get a reference from the database.
619         @return array  reference( catcode => ...., content => ....)
620         @param  string id
621         */
622         function getReference($ref) {
623                 $reference = array();
624                 $rs = sql_query("SELECT * FROM ".$this->table_ref." WHERE ref=".intval($ref));
625                 if ($rs) {
626                         $reference = mysql_fetch_array($rs);
627                 }
628                 return $reference;
629         }
630
631         /** remove a reference from the database
632         @return bool sucess
633         @param  string reference id
634         */
635
636         function removeReference($ref) {
637                 return sql_query("DELETE FROM ".$this->table_ref." WHERE ref=".intval($ref));
638         }
639
640         function nextdocid() {
641                 $res = sql_query ("select ref from ".$this->table_ref." where ref >= 500000000 order by ref desc limit 0,1");
642                 $obj = @ mysql_fetch_object($res);
643                 if ($obj) {
644                         return $obj->ref + 1;
645                 } else {
646                         return 500000000;
647                 }
648         }
649
650         function logevent($log,$content,$catcode) {
651                 if ($this->plugin->getOption('enableLogging') == 'yes') {
652                         if (isset($log) && isset($content)) {
653                                 sql_query("insert into ".$this->table_log." (log,content,catcode) values ('".mysql_real_escape_string($log)."','".mysql_real_escape_string($content)."','".mysql_real_escape_string($catcode)."')");
654                         }
655                 }
656         } // logevent
657
658         function clearlog($filter = 'all', $filtertype = 'all', $keyword = '', $ipp = 10) {
659                 $query = 'delete from '.$this->table_log;
660                 if ($filter != 'all' || $filtertype != 'all') {
661                         $query .= ' where ';
662                         if ($filter != 'all') {
663                                 $query .= " catcode = '".mysql_real_escape_string($filter)."'";
664                         }
665                         if ($filter != 'all' && $filtertype != 'all') {
666                                 $query .= ' and ';
667                         }
668                         if ($filtertype != 'all') {
669                                 $query .= " log like '".mysql_real_escape_string($filtertype)."%'";
670                         }
671                         if ($keyword != '') {
672                                 $query .= " and content like '%".mysql_real_escape_string($keyword)."%'";
673                         }
674                 } elseif ($keyword != '') {
675                         $query .= " where content like '%".mysql_real_escape_string($keyword)."%'";
676                 }
677                 if ($_REQUEST['amount'] == 'cp') { //only current page?
678                         $query .= '  order by logtime desc limit '.$ipp;
679                 }
680                 sql_query($query);
681         } // function clearlog
682
683         function getlogtable($startpos, $filter = 'all',$filtertype = 'all', $keyword, $ipp = 10) {
684                 $query = 'select * from '.$this->table_log;
685                 if ($filter != 'all' || $filtertype != 'all') {
686                         $query .= ' where ';
687                         if ($filter != 'all') {
688                                 $query .= " catcode = '".mysql_real_escape_string($filter)."'";
689                         }
690                         if ($filter != 'all' && $filtertype != 'all') {
691                                 $query .= ' and ';
692                         }
693                         if ($filtertype != 'all') {
694                                 $query .= " log like '".mysql_real_escape_string($filtertype)."%'";
695                         }
696                         if ($keyword != '') {
697                                 $query .= " and content like '%".mysql_real_escape_string($keyword)."%'";
698                         }
699                 } elseif ($keyword != '') {
700                         $query .= " where content like '%".mysql_real_escape_string($keyword)."%'";
701                 }
702                 $query .= ' order by logtime desc limit '.$startpos.','.$ipp;
703                 return sql_query($query);
704         } // function getlogtable
705
706         function countlogtable($filter = 'all', $filtertype = 'all', $keyword = '') {
707                 $query = 'select count(*) as total from '.$this->table_log;
708                 if ($filter != 'all' || $filtertype != 'all') {
709                         $query .= ' where ';
710                         if ($filter != 'all') {
711                                 $query .= " catcode = '".mysql_real_escape_string($filter)."'";
712                         }
713                         if ($filter != 'all' && $filtertype != 'all') {
714                                 $query .= ' and ';
715                         }
716                         if ($filtertype != 'all') {
717                                 $query .= " log like '".mysql_real_escape_string($filtertype)."%'";
718                         }
719                         if ($keyword != '') {
720                                 $query .= " and content like '%".mysql_real_escape_string($keyword)."%'";
721                         }
722                 } elseif ($keyword != '') {
723                         $query .= " where content like '%".mysql_real_escape_string($keyword)."%'";
724                 }
725                 $res = sql_query($query);
726                 $arr = mysql_fetch_array($res);
727                 return $arr['total'];
728         }
729
730         function getlogtypes() {
731                 $query = "select distinct(substring_index(log,' ', 2)) as logtype from ".$this->table_log;
732                 $logtypes = array();
733                 $res = sql_query($query);
734                 while ($arr = mysql_fetch_array($res)) {
735                         $logtypes[] = $arr['logtype'];
736                 }
737                 return $logtypes;
738         }
739
740         function getreftable($startpos) {
741                 $query = 'select * from '.$this->table_ref.' where ref >= 1000000 order by ref desc limit '.$startpos.',10';
742                 return sql_query($query);
743         }
744
745         function getLogevent($id) {
746                 $query = 'select * from '.$this->table_log.' where id = '.$id;
747                 $res = sql_query($query);
748                 return mysql_fetch_array($res);
749         }
750
751         function removeLogevent($id) {
752                 $query = ' delete from '.$this->table_log.' where id = '.$id;
753                 $res = sql_query($query);
754                 return $res;
755         }
756         function countreftable() {
757                 $query = 'select count(*) as total from '.$this->table_ref.' where ref >= 1000000';
758                 $res = sql_query($query);
759                 $arr = mysql_fetch_array($res);
760                 return $arr['total'];
761         }
762
763 } // class NaiveBayesianStorage