3 from __future__ import print_function
14 from yaml import CLoader as Loader, CDumper as Dumper
16 from yaml import Loader, Dumper
19 from newslash_index import Index, DatabaseError
20 from newslash_db import NewslashDB
22 class SearchCLIError(Exception):
23 def __init__(self, message):
24 self.message = message
26 class SearchCLI(object):
31 config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf")
32 if not os.path.exists(config_path):
33 base_dir = os.path.dirname(os.path.realpath(__file__))
34 config_path = os.path.join(base_dir, 'searchd.conf')
35 if not os.path.exists(config_path):
36 raise SearchCLIError("config file not found")
38 self._load_config(config_path)
40 def _make_parser(self):
41 # parse command line option
42 parser = argparse.ArgumentParser(description='search daemon for Newslash')
43 parser.add_argument('-i', '--index-dir', help='lucene index directory')
46 subparsers = parser.add_subparsers(help="subcommands")
49 p_query = subparsers.add_parser("query", help="execute query")
50 p_query.add_argument('subcommand', action='store_const', const='query')
51 p_query.add_argument('query_string', help='query string')
54 p_index = subparsers.add_parser("index", help="manipulate lucene index")
55 p_index.add_argument('subcommand', action='store_const', const='index')
56 p_index.add_argument('action', help='action')
57 p_index.add_argument('target', help='target', default='all')
59 # 'getdocument' subcommand
60 p_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
61 p_getdoc.add_argument('subcommand', action='store_const', const='getdocument')
62 p_getdoc.add_argument('target_type', help='target type')
63 p_getdoc.add_argument('target_id', help='target id')
66 p_initdb = subparsers.add_parser("initdb", help="create table which stores index related information")
67 p_initdb.add_argument('subcommand', action='store_const', const='initdb')
69 # 'analyze' subcommand
70 p_analyze = subparsers.add_parser("analyze", help="analyze index")
71 p_analyze.add_argument('subcommand', action='store_const', const='analyze')
72 p_analyze.add_argument('query_string', help='query string')
73 p_analyze.add_argument('-n', '--number-of-result', default=10, type=int, help='number of result output')
75 # 'metadata' subcommand
76 p_metadata = subparsers.add_parser("metadata", help="manipulate metadata")
77 p_metadata.add_argument('subcommand', action='store_const', const='metadata')
78 p_metadata.add_argument('action', help='action')
82 def _parse_args(self):
84 parser = self._make_parser()
85 self.args = parser.parse_args()
88 if self.args.index_dir:
89 self.index_dir = os.path.abspath(self.args.index_dir)
93 self.sub_command = self.args.subcommand or ""
94 except AttributeError:
98 def _load_config(self, pathname):
102 sys.stderr.write('config file ({}) not found...'.format(pathname))
106 self.config = load(fh, Loader=Loader)
109 d = self.config.get("Searchd", {})
111 self.index_dir = os.path.abspath(d["index_path"])
113 raise SearchCLIError("index_path not given")
115 self.database = self.config.get("Database", {})
118 parser = self._make_parser()
122 if self.args.action == 'show':
123 index = Index(database=self.database, index_path=self.index_dir)
124 for data in index.metadata.get_all():
128 action = self.args.action
129 target = self.args.target
131 lucene_wrapper.init_vm()
132 index = Index(database=self.database, index_path=self.index_dir)
134 def progress_cb(target, phase, success, errors):
135 print("{}: {} to index {} items... ({} errors)".format(target, phase, success, errors))
137 def error_cb(target, phase, item):
138 print("{}: indexing {} error: id={}".format(target, phase, item["sid"]))
140 if action == 'clear':
142 index.metadata.delete_all()
143 print("clear all index and metadata done.")
144 elif action == 'update':
145 start_time = time.time()
147 index.update_all(progress_cb=progress_cb, error_cb=error_cb)
148 elif target == 'story':
149 index.update_story(progress_cb=progress_cb, error_cb=error_cb)
150 elif target == 'comment':
151 index.update_comment(progress_cb=progress_cb, error_cb=error_cb)
152 elif target == 'journal':
153 index.update_journal(progress_cb=progress_cb, error_cb=error_cb)
154 elif target == 'submission':
155 index.update_submission(progress_cb=progress_cb, error_cb=error_cb)
156 elif target == 'poll':
157 index.update_poll(progress_cb=progress_cb, error_cb=error_cb)
158 elif target == 'user':
159 index.update_user(progress_cb=progress_cb, error_cb=error_cb)
160 print("indexing done. total time: {}s".format(time.time() - start_time))
163 if self.index_dir is None:
164 sys.stderr.write("error: index directory not given\n")
167 query_string = self.args.query_string
168 lucene_wrapper.init_vm()
170 searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
172 query = lucene_wrapper.Query("content_text", query_string)
173 except lucene_wrapper.QueryParseError as e:
174 sys.stderr.write("query parse error\n")
177 result = searcher.search(query)
179 print("total hits: {}".format(result.total_hits))
181 print("#{} - {}: {}".format(item.number, item.id, item.content_text.encode('utf-8')))
184 if self.index_dir is None:
185 sys.stderr.write("error: index directory not given\n")
188 query_string = self.args.query_string
189 limit = self.args.number_of_result
191 lucene_wrapper.init_vm()
193 searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
195 content_query = lucene_wrapper.Query("content_text", query_string)
196 title_query = lucene_wrapper.Query("title", query_string)
197 query = lucene_wrapper.BooleanQuery()
198 query.set_minimum_nubmber_should_match(1)
199 query.add_should(content_query)
200 query.add_should(title_query)
201 except lucene_wrapper.QueryParseError as e:
202 sys.stderr.write("query parse error\n")
205 sort = lucene_wrapper.Sort("create_time", lucene_wrapper.Sort.INT, True)
206 result = searcher.search(query, limit, 0, sort)
208 print("total hits: {}".format(result.total_hits))
210 print("#{} - {}:".format(item.number, item.id))
213 fields = item.get_fields()
215 if field.name == "content_text":
216 print(" {}: {}".format(field.name, field.value.encode("utf8")))
218 for term in field.get_tokens():
219 sys.stdout.write("{} ".format(term.encode("utf8")))
222 print(" {}: {}".format(field.name, field.value.encode("utf8")))
226 def getdocument(self):
227 lucene_wrapper.init_vm()
228 index = Index(database=self.database, index_path=self.index_dir)
229 result = index.get(self.args.target_type, self.args.target_id)
234 if result.totalhits > 1:
235 print("warning: hits multiple items")
237 print("#{} - {}: {}".format(result.number, result.id, result.content_text.encode('utf-8')))
240 index = Index(database=self.database, index_path=self.index_dir)
242 index.metadata.create_table()
243 except DatabaseError as e:
244 print('error: {}'.format(str(e)))
247 if self.sub_command == 'query':
250 if self.sub_command == 'index':
253 if self.sub_command == 'getdocument':
254 return self.getdocument()
256 if self.sub_command == 'initdb':
259 if self.sub_command == 'analyze':
260 return self.analyze()
262 if self.sub_command == 'metadata':
263 return self.metadata()
265 return self.show_help()
268 if __name__ == '__main__':