4 * Nucleus: PHP/MySQL Weblog CMS (http://nucleuscms.org/)
5 * Copyright (C) 2003-2009 The Nucleus Group
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 * (see nucleus/documentation/index.html#license for more info)
14 * SEARCH(querystring) offers different functionality to create an
15 * SQL query to find certain items. (and comments)
17 * based on code by David Altherr:
18 * http://www.evolt.org/article/Boolean_Fulltext_Searching_with_PHP_and_MySQL/18/15665/
19 * http://davidaltherr.net/web/php_functions/boolean/funcs.mysql.boolean.txt
21 * @license http://nucleuscms.org/license.txt GNU General Public License
22 * @copyright Copyright (C) 2002-2009 The Nucleus Group
23 * @version $Id: SEARCH.php 1556 2011-07-11 14:18:48Z ftruscot $
36 function SEARCH($text) {
38 $text = preg_replace ("/[<,>,=,?,!,#,^,(,),[,\],:,;,\\\,%]/","",$text);
39 $this->querystring = $text;
40 $this->marked = $this->boolean_mark_atoms($text);
41 $this->inclusive = $this->boolean_inclusive_atoms($text);
42 $this->blogs = array();
44 // get all public searchable blogs, no matter what, include the current blog allways.
45 $res = sql_query('SELECT bnumber FROM '.sql_table('blog').' WHERE bincludesearch=1 ');
46 while ($obj = sql_fetch_object($res))
47 $this->blogs[] = intval($obj->bnumber);
50 function boolean_sql_select($match){
51 if (i18n::strlen($this->inclusive) > 0) {
52 /* build sql for determining score for each record */
53 $result=i18n::explode(" ",$this->inclusive);
54 for($cth=0;$cth<count($result);$cth++){
55 if(i18n::strlen($result[$cth])>=4){
56 $stringsum_long .= " $result[$cth] ";
58 $stringsum_a[] = ' '.$this->boolean_sql_select_short($result[$cth],$match).' ';
62 if(i18n::strlen($stringsum_long)>0){
63 $stringsum_long = sql_real_escape_string($stringsum_long);
64 $stringsum_a[] = " match ($match) against ('$stringsum_long') ";
67 $stringsum .= implode("+",$stringsum_a);
72 function boolean_inclusive_atoms($string){
73 $result = trim($string);
74 $result = preg_replace("#([[:space:]]{2,})#", ' ', $result);
76 # replaced eregi_replace() below with preg_replace(). ereg* functions are deprecated in PHP 5.3.0
77 # just added delimiters to regex and the 'i' for case-insensitive matching
79 /* convert normal boolean operators to shortened syntax */
80 $result = preg_replace('# not #i', ' -', $result);
81 $result = preg_replace('# and #i', ' ', $result);
82 $result = preg_replace('# or #i', ',', $result);
84 /* drop unnecessary spaces */
85 $result = str_replace(' ,', ',', $result);
86 $result = str_replace(', ', ',', $result);
87 $result = str_replace('- ', '-', $result);
88 $result = str_replace('+', '', $result);
90 /* strip exlusive atoms */
91 $result = preg_replace(
92 "#\-\([A-Za-z0-9]{1,}[A-Za-z0-9\-\.\_\,]{0,}\)#",
96 $result = str_replace('(', ' ', $result);
97 $result = str_replace(')', ' ', $result);
98 $result = str_replace(',', ' ', $result);
103 function boolean_sql_where($match){
105 $result = $this->marked;
107 $this->boolean_sql_where_cb1($match); // set the static $match
109 $result = preg_replace_callback(
111 "/foo\[\(\'([^\)]{4,})\'\)\]bar/",
113 array($this,'boolean_sql_where_cb1'),
117 $this->boolean_sql_where_cb2($match); // set the static $match
119 $result = preg_replace_callback(
121 "/foo\[\(\'([^\)]{1,3})\'\)\]bar/",
123 array($this,'boolean_sql_where_cb2'),
131 function boolean_sql_where_cb1($matches){
135 if (!is_array($matches)) $match=$matches;
137 else return ' match ('.$match.') against (\''.sql_real_escape_string($matches[1]).'\') > 0 ';
141 function boolean_sql_where_cb2($matches){
145 if (!is_array($matches)) $match=$matches;
147 else return ' ('.$this->boolean_sql_where_short(sql_real_escape_string($matches[1]),$match).') ';
151 function boolean_mark_atoms($string){
152 $result = trim($string);
153 $result = preg_replace("/([[:space:]]{2,})/",' ',$result);
155 # replaced eregi_replace() below with preg_replace(). ereg* functions are deprecated in PHP 5.3.0
156 # just added delimiters to regex and the 'i' for case-insensitive matching
158 /* convert normal boolean operators to shortened syntax */
159 $result = preg_replace('# not #i', ' -', $result);
160 $result = preg_replace('# and #i', ' ', $result);
161 $result = preg_replace('# or #i', ',', $result);
163 /* strip excessive whitespace */
164 $result = str_replace('( ', '(', $result);
165 $result = str_replace(' )', ')', $result);
166 $result = str_replace(', ', ',', $result);
167 $result = str_replace(' ,', ',', $result);
168 $result = str_replace('- ', '-', $result);
169 $result = str_replace('+', '', $result);
171 // remove double spaces (we might have introduced some new ones above)
172 $result = trim($result);
173 $result = preg_replace("#([[:space:]]{2,})#", ' ', $result);
175 /* apply arbitrary function to all 'word' atoms */
177 $result_a = i18n::explode(' ', $result);
179 for($word = 0;$word<count($result_a);$word++)
181 $result_a[$word] = "foo[('" . $result_a[$word] . "')]bar";
184 $result = implode(' ', $result_a);
186 /* dispatch ' ' to ' AND ' */
187 $result = str_replace(' ', ' AND ', $result);
189 /* dispatch ',' to ' OR ' */
190 $result = str_replace(',', ' OR ', $result);
192 /* dispatch '-' to ' NOT ' */
193 $result = str_replace(' -', ' NOT ', $result);
197 function boolean_sql_where_short($string,$match){
198 $match_a = i18n::explode(',',$match);
199 for($ith=0;$ith<count($match_a);$ith++){
200 $like_a[$ith] = " $match_a[$ith] LIKE '% $string %' ";
202 $like = implode(" OR ",$like_a);
206 function boolean_sql_select_short($string,$match){
207 $match_a = i18n::explode(',',$match);
208 $score_unit_weight = .2;
209 for($ith=0;$ith<count($match_a);$ith++){
211 " $score_unit_weight*(
212 LENGTH(" . sql_real_escape_string($match_a[$ith]) . ") -
213 LENGTH(REPLACE(LOWER(" . sql_real_escape_string($match_a[$ith]) . "),LOWER('" . sql_real_escape_string($string) . "'),'')))
214 /LENGTH('" . sql_real_escape_string($string) . "') ";
216 $score = implode(" + ",$score_a);