3 from __future__ import print_function
15 from yaml import CLoader as Loader, CDumper as Dumper
17 from yaml import Loader, Dumper
20 from newslash_index import Index, DatabaseError
21 from newslash_db import NewslashDB
23 class SearchCLIError(Exception):
24 def __init__(self, message):
25 self.message = message
27 class SearchCLI(object):
30 self.logger = logging.getLogger()
33 config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf")
34 if not os.path.exists(config_path):
35 base_dir = os.path.dirname(os.path.realpath(__file__))
36 config_path = os.path.join(base_dir, 'searchd.conf')
37 if not os.path.exists(config_path):
38 raise SearchCLIError("config file not found")
40 self._load_config(config_path)
42 def _make_parser(self):
43 # parse command line option
44 parser = argparse.ArgumentParser(description='search daemon for Newslash')
45 parser.add_argument('-i', '--index-dir', help='lucene index directory')
46 parser.add_argument('-v', '--verbose', action='store_true', help='show more log messages')
49 subparsers = parser.add_subparsers(help="subcommands")
52 p_query = subparsers.add_parser("query", help="execute query")
53 p_query.add_argument('subcommand', action='store_const', const='query')
54 p_query.add_argument('query_string', help='query string')
57 p_index = subparsers.add_parser("index", help="manipulate lucene index")
58 p_index.add_argument('subcommand', action='store_const', const='index')
59 p_index.add_argument('action', help='action')
60 p_index.add_argument('target', help='target', default='all')
62 # 'getdocument' subcommand
63 p_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
64 p_getdoc.add_argument('subcommand', action='store_const', const='getdocument')
65 p_getdoc.add_argument('target_type', help='target type')
66 p_getdoc.add_argument('target_id', help='target id')
69 p_initdb = subparsers.add_parser("initdb", help="create table which stores index related information")
70 p_initdb.add_argument('subcommand', action='store_const', const='initdb')
72 # 'analyze' subcommand
73 p_analyze = subparsers.add_parser("analyze", help="analyze index")
74 p_analyze.add_argument('subcommand', action='store_const', const='analyze')
75 p_analyze.add_argument('query_string', help='query string')
76 p_analyze.add_argument('-n', '--number-of-result', default=10, type=int, help='number of result output')
78 # 'metadata' subcommand
79 p_metadata = subparsers.add_parser("metadata", help="manipulate metadata")
80 p_metadata.add_argument('subcommand', action='store_const', const='metadata')
81 p_metadata.add_argument('action', help='action')
85 def _parse_args(self):
87 parser = self._make_parser()
88 self.args = parser.parse_args()
91 if self.args.index_dir:
92 self.index_dir = os.path.abspath(self.args.index_dir)
96 self.sub_command = self.args.subcommand or ""
97 except AttributeError:
101 def _load_config(self, pathname):
105 sys.stderr.write('config file ({}) not found...'.format(pathname))
109 self.config = load(fh, Loader=Loader)
112 d = self.config.get("Searchd", {})
114 self.index_dir = os.path.abspath(d["index_path"])
116 raise SearchCLIError("index_path not given")
118 self.database = self.config.get("Database", {})
121 parser = self._make_parser()
125 if self.args.action == 'show':
126 index = Index(database=self.database, index_path=self.index_dir)
127 for data in index.metadata.get_all():
131 action = self.args.action
132 target = self.args.target
134 lucene_wrapper.init_vm()
135 index = Index(database=self.database, index_path=self.index_dir)
137 def progress_cb(target, phase, success, errors):
138 self.logger.info("{} {} index for {} items... ({} errors)".format(phase, target, success, errors))
140 def error_cb(target, phase, item):
141 self.logger.error("{} {} index error: id={}".format(phase, target, item["sid"]))
143 if action == 'clear':
145 index.metadata.delete_all()
146 print("clear all index and metadata done.")
147 elif action == 'update':
148 start_time = time.time()
150 index.update_all(progress_cb=progress_cb, error_cb=error_cb)
151 elif target == 'story':
152 index.update_story(progress_cb=progress_cb, error_cb=error_cb)
153 elif target == 'comment':
154 index.update_comment(progress_cb=progress_cb, error_cb=error_cb)
155 elif target == 'journal':
156 index.update_journal(progress_cb=progress_cb, error_cb=error_cb)
157 elif target == 'submission':
158 index.update_submission(progress_cb=progress_cb, error_cb=error_cb)
159 elif target == 'poll':
160 index.update_poll(progress_cb=progress_cb, error_cb=error_cb)
161 elif target == 'user':
162 index.update_user(progress_cb=progress_cb, error_cb=error_cb)
164 self.logger.error("invalid target - {}".format(target))
166 self.logger.info("indexing done. total time: {}s".format(time.time() - start_time))
169 self.logger.error("invalid action - {}".format(action))
172 if self.index_dir is None:
173 self.logger.error("index directory not given")
176 query_string = self.args.query_string
177 lucene_wrapper.init_vm()
179 searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
181 query = lucene_wrapper.Query("content_text", query_string)
182 except lucene_wrapper.QueryParseError as e:
183 self.logger.error("query parse error")
186 result = searcher.search(query)
188 print("total hits: {}".format(result.total_hits))
190 print("#{} - {}: {}".format(item.number, item.id, item.content_text.encode('utf-8')))
193 if self.index_dir is None:
194 self.logger.error("index directory not given")
197 query_string = self.args.query_string
198 limit = self.args.number_of_result
200 lucene_wrapper.init_vm()
202 searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
204 content_query = lucene_wrapper.Query("content_text", query_string)
205 title_query = lucene_wrapper.Query("title", query_string)
206 query = lucene_wrapper.BooleanQuery()
207 query.set_minimum_nubmber_should_match(1)
208 query.add_should(content_query)
209 query.add_should(title_query)
210 except lucene_wrapper.QueryParseError as e:
211 self.logger.error("query parse error")
214 sort = lucene_wrapper.Sort("create_time", lucene_wrapper.Sort.INT, True)
215 result = searcher.search(query, limit, 0, sort)
217 print("total hits: {}".format(result.total_hits))
219 print("#{} - {}:".format(item.number, item.id))
222 fields = item.get_fields()
224 if field.name == "content_text":
225 print(" {}: {}".format(field.name, field.value.encode("utf8")))
227 for term in field.get_tokens():
228 sys.stdout.write("{} ".format(term.encode("utf8")))
231 print(" {}: {}".format(field.name, field.value.encode("utf8")))
235 def getdocument(self):
236 lucene_wrapper.init_vm()
237 index = Index(database=self.database, index_path=self.index_dir)
238 result = index.get(self.args.target_type, self.args.target_id)
243 if result.totalhits > 1:
244 self.logger.warning("hits multiple items")
246 print("#{} - {}: {}".format(result.number, result.id, result.content_text.encode('utf-8')))
249 index = Index(database=self.database, index_path=self.index_dir)
251 index.metadata.create_table()
252 except DatabaseError as e:
253 self.logger.error('{}'.format(str(e)))
257 if self.args.verbose:
258 self.logger.setLevel(logging.DEBUG)
260 self.logger.setLevel(logging.WARNING)
262 if self.sub_command == 'query':
265 if self.sub_command == 'index':
268 if self.sub_command == 'getdocument':
269 return self.getdocument()
271 if self.sub_command == 'initdb':
274 if self.sub_command == 'analyze':
275 return self.analyze()
277 if self.sub_command == 'metadata':
278 return self.metadata()
280 return self.show_help()
283 if __name__ == '__main__':
284 logging.basicConfig(format='%(asctime)s[%(levelname)s] %(message)s', level=logging.DEBUG)