2 ''' searchd: Search daemon for Newslash'''
11 from yaml import CLoader as Loader, CDumper as Dumper
13 from yaml import Loader, Dumper
15 from newslash_searchd import Router, Route
18 lucene_wrapper.init_vm()
20 class SearchdError(Exception):
21 def __init__(self, message):
22 self.message = message
26 def _load_config(pathname):
30 raise SearchdError("config file not found")
32 config = load(fh, Loader=Loader)
35 # convert relative index_path to absolute path
36 section = config.get('Searchd')
38 index_path = section.get('index_path', '')
39 if index_path[0] != '/':
40 cfg_dir = os.path.dirname(pathname)
41 index_path = os.path.normpath(os.path.join(cfg_dir, index_path))
42 section["index_path"] = index_path
46 config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf")
47 if not os.path.exists(config_path):
48 base_dir = os.path.dirname(os.path.realpath(__file__))
49 config_path = os.path.join(base_dir, 'searchd.conf')
50 if not os.path.exists(config_path):
51 raise SearchdError("config file not found")
53 config = _load_config(config_path)
56 class Searchd(Router):
57 def __init__(self, environ, start_response):
58 super(Searchd, self).__init__(environ, start_response)
59 self.default_route(Root())
60 self.route(re.compile(r'^/admin'), SearchdAdmin())
67 def config(self, section, key=None, default=None):
69 return self._config.get(section, default)
70 d = self._config.get(section, {})
71 return d.get(key, default)
73 def get(self, req, resp):
74 resp.render(200, json={"error": 0})
76 def html_escape(self, text):
77 text = text.replace("&", "&")
78 text = text.replace("<", "<")
79 text = text.replace(">", ">")
82 def post(self, req, resp):
83 # check request body is valid
89 query_text = req.body.get('query', '')
90 limit = req.body.get('limit', 10)
91 target = req.body.get('target', 'story')
92 sort_key = req.body.get('sort_key', 'create_time')
93 sort_reverse = req.body.get('sort_reverse', 1) == 1
96 offset = int(req.body.get('offset', 0))
101 #if req.environ['wsgi.errors']:
102 #req.environ['wsgi.errors'].write(query.encode('utf-8'))
104 index_dir = os.path.join(self.config('Searchd', 'index_path'), 'lucene_index')
105 searcher = lucene_wrapper.Searcher(index_directory=index_dir)
107 content_query = lucene_wrapper.Query("content_text", query_text)
108 title_query = lucene_wrapper.Query("title", query_text)
109 query = lucene_wrapper.BooleanQuery()
110 query.set_minimum_nubmber_should_match(1)
111 query.add_should(content_query)
112 query.add_should(title_query)
115 target_query = lucene_wrapper.TermQuery("type", target)
116 query.add_must(target_query)
117 except lucene_wrapper.QueryParseError as e:
118 resp.render(400, json={"error": { "message": e.message }})
121 except Exception as e:
122 resp.render(500, json={"error": { "message": "query_error" }})
123 req.environ['wsgi.errors'].write("query error: {} - query is {} ".format(e, query_text))
128 sort = lucene_wrapper.Sort(sort_key, lucene_wrapper.Sort.INT, sort_reverse)
129 result = searcher.search(query, limit, offset, sort)
130 except Exception as e:
131 resp.render(500, json={"error": { "message": "search_error" }})
132 req.environ['wsgi.errors'].write("search error: {} - query is {} \n".format(e, query_text))
137 "total_hits": result.total_hits,
143 highlighter = lucene_wrapper.Highlighter(query, "<strong>", "</strong>")
146 texts = highlighter.get_best_fragments("content_text", self.html_escape(item.content_text), 2)
147 content_text = "".join(texts)
148 if content_text is None or len(texts) == 0:
149 content_text = item.content_text
151 title = highlighter.get_best_fragment("title", item.title)
152 if title is None or len(title) == 0:
155 resp_body["hits"].append({ "number": item.number,
159 "author": item.author,
160 "create_time": item.create_time,
161 "content_text": content_text,
166 resp.render(200, json=resp_body)
169 class SearchdAdmin(Root):
170 def get(self, req, resp):
171 resp.render(200, json={"error": 0})
173 def post(self, req, resp):
177 if __name__ == '__main__':
178 from wsgiref.simple_server import make_server, WSGIRequestHandler
180 host = config.get('host', "")
181 port = config.get('port', 6000)
182 if len(host) > 0 and host.find(":") >= 0:
183 (host, port) = host.split(":", 1)
185 server = make_server(host, port, Searchd)
187 print("starting server at {}:{}...".format(host, port))
188 server.serve_forever()