OSDN Git Service

4ddc1b9c0f393e6dadf5b174a9bcb070847a58cd
[newslash/newslash.git] / src / ns_search / searchd_cli.py
1 #!/usr/bin/python
2 # -*- coding: utf-8
3 from __future__ import print_function
4
5 import argparse
6 import os.path
7 import os
8 import sys
9 import time
10 import inspect
11 import logging
12
13 from yaml import load
14 try:
15     from yaml import CLoader as Loader, CDumper as Dumper
16 except ImportError:
17     from yaml import Loader, Dumper
18
19 import lucene_wrapper
20 from newslash_index import Index, DatabaseError
21 from newslash_db import NewslashDB
22
23 class SearchCLIError(Exception):
24     def __init__(self, message):
25         self.message = message
26
27 class SearchCLI(object):
28     def __init__(self):
29         self.index_dir = None
30         self.logger = logging.getLogger()
31         self._parse_args()
32
33         config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf")
34         if not os.path.exists(config_path):
35             base_dir = os.path.dirname(os.path.realpath(__file__))
36             config_path = os.path.join(base_dir, 'searchd.conf')
37         if not os.path.exists(config_path):
38             raise SearchCLIError("config file not found")
39
40         self._load_config(config_path)
41
42     def _make_parser(self):
43         # parse command line option
44         parser = argparse.ArgumentParser(description='search daemon for Newslash')
45         parser.add_argument('-i', '--index-dir', help='lucene index directory')
46         parser.add_argument('-v', '--verbose', action='store_true', help='show more log messages')
47
48         # subcommands
49         subparsers = parser.add_subparsers(help="subcommands")
50
51         # 'query' subcommand
52         p_query = subparsers.add_parser("query", help="execute query")
53         p_query.add_argument('subcommand', action='store_const', const='query')
54         p_query.add_argument('query_string', help='query string')
55
56         # 'index' subcommand
57         p_index = subparsers.add_parser("index", help="manipulate lucene index")
58         p_index.add_argument('subcommand', action='store_const', const='index')
59         p_index.add_argument('action', help='action')
60         p_index.add_argument('target', help='target', default='all')
61
62         # 'getdocument' subcommand
63         p_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
64         p_getdoc.add_argument('subcommand', action='store_const', const='getdocument')
65         p_getdoc.add_argument('target_type', help='target type')
66         p_getdoc.add_argument('target_id', help='target id')
67
68         # 'initdb' subcommand
69         p_initdb = subparsers.add_parser("initdb", help="create table which stores index related information")
70         p_initdb.add_argument('subcommand', action='store_const', const='initdb')
71
72         # 'analyze' subcommand
73         p_analyze = subparsers.add_parser("analyze", help="analyze index")
74         p_analyze.add_argument('subcommand', action='store_const', const='analyze')
75         p_analyze.add_argument('query_string', help='query string')
76         p_analyze.add_argument('-n', '--number-of-result', default=10, type=int, help='number of result output')
77
78         # 'metadata' subcommand
79         p_metadata = subparsers.add_parser("metadata", help="manipulate metadata")
80         p_metadata.add_argument('subcommand', action='store_const', const='metadata')
81         p_metadata.add_argument('action', help='action')
82
83         return parser
84
85     def _parse_args(self):
86         self.conf = {}
87         parser = self._make_parser()
88         self.args = parser.parse_args()
89
90         # index directory
91         if self.args.index_dir:
92             self.index_dir = os.path.abspath(self.args.index_dir)
93
94         # command
95         try:
96             self.sub_command = self.args.subcommand or ""
97         except AttributeError:
98             self.sub_command = ""
99             return
100
101     def _load_config(self, pathname):
102         try:
103             fh = open(pathname)
104         except IOError:
105             sys.stderr.write('config file ({}) not found...'.format(pathname))
106             self.config = {}
107             return
108
109         self.config = load(fh, Loader=Loader)
110         fh.close()
111
112         d = self.config.get("Searchd", {})
113         try:
114             self.index_dir = os.path.abspath(d["index_path"])
115         except KeyError:
116             raise SearchCLIError("index_path not given")
117
118         self.database = self.config.get("Database", {})
119             
120     def show_help(self):
121         parser = self._make_parser()
122         parser.print_help()
123
124     def metadata(self):
125         if self.args.action == 'show':
126             index = Index(database=self.database, index_path=self.index_dir)
127             for data in index.metadata.get_all():
128                 print(data)
129
130     def index(self):
131         action = self.args.action
132         target = self.args.target
133
134         lucene_wrapper.init_vm()
135         index = Index(database=self.database, index_path=self.index_dir)
136
137         def progress_cb(target, phase, success, errors):
138             self.logger.info("{} {} index for {} items... ({} errors)".format(phase, target, success, errors))
139
140         def error_cb(target, phase, item):
141             self.logger.error("{} {} index error: id={}".format(phase, target, item["sid"]))
142
143         if action == 'clear':
144             index.delete_all()
145             index.metadata.delete_all()
146             print("clear all index and metadata done.")
147         elif action == 'update':
148             start_time = time.time()
149             if target == 'all':
150                 index.update_all(progress_cb=progress_cb, error_cb=error_cb)
151             elif target == 'story':
152                 index.update_story(progress_cb=progress_cb, error_cb=error_cb)
153             elif target == 'comment':
154                 index.update_comment(progress_cb=progress_cb, error_cb=error_cb)
155             elif target == 'journal':
156                 index.update_journal(progress_cb=progress_cb, error_cb=error_cb)
157             elif target == 'submission':
158                 index.update_submission(progress_cb=progress_cb, error_cb=error_cb)
159             elif target == 'poll':
160                 index.update_poll(progress_cb=progress_cb, error_cb=error_cb)
161             elif target == 'user':
162                 index.update_user(progress_cb=progress_cb, error_cb=error_cb)
163             else:
164                 self.logger.error("invalid target - {}".format(target))
165             
166             self.logger.info("indexing done. total time: {}s".format(time.time() - start_time))
167
168         else:
169             self.logger.error("invalid action - {}".format(action))
170
171     def query(self):
172         if self.index_dir is None:
173             self.logger.error("index directory not given")
174             return
175
176         query_string = self.args.query_string
177         lucene_wrapper.init_vm()
178         
179         searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
180         try:
181             query = lucene_wrapper.Query("content_text", query_string)
182         except lucene_wrapper.QueryParseError as e:
183             self.logger.error("query parse error")
184             return
185
186         result = searcher.search(query)
187        
188         print("total hits: {}".format(result.total_hits))
189         for item in result:
190             print("#{} - {}: {}".format(item.number, item.id, item.content_text.encode('utf-8')))
191
192     def analyze(self):
193         if self.index_dir is None:
194             self.logger.error("index directory not given")
195             return
196
197         query_string = self.args.query_string
198         limit = self.args.number_of_result
199
200         lucene_wrapper.init_vm()
201         
202         searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
203         try:
204             content_query = lucene_wrapper.Query("content_text", query_string)
205             title_query = lucene_wrapper.Query("title", query_string)
206             query = lucene_wrapper.BooleanQuery()
207             query.set_minimum_nubmber_should_match(1)
208             query.add_should(content_query)
209             query.add_should(title_query)
210         except lucene_wrapper.QueryParseError as e:
211             self.logger.error("query parse error")
212             return
213
214         sort = lucene_wrapper.Sort("create_time", lucene_wrapper.Sort.INT, True)
215         result = searcher.search(query, limit, 0, sort)
216         
217         print("total hits: {}".format(result.total_hits))
218         for item in result:
219             print("#{} - {}:".format(item.number, item.id))
220
221             # get fields
222             fields = item.get_fields()
223             for field in fields:
224                 if field.name == "content_text":
225                     print("  {}: {}".format(field.name, field.value.encode("utf8")))
226                     print("----")
227                     for term in field.get_tokens():
228                         sys.stdout.write("{} ".format(term.encode("utf8")))
229                     print("----")
230                 else:
231                     print("  {}: {}".format(field.name, field.value.encode("utf8")))
232             
233             print("\n")
234
235     def getdocument(self):
236         lucene_wrapper.init_vm()
237         index = Index(database=self.database, index_path=self.index_dir)
238         result = index.get(self.args.target_type, self.args.target_id)
239         if result is None:
240             print("no item")
241             return
242
243         if result.totalhits > 1:
244             self.logger.warning("hits multiple items")
245
246         print("#{} - {}: {}".format(result.number, result.id, result.content_text.encode('utf-8')))
247
248     def initdb(self):
249         index = Index(database=self.database, index_path=self.index_dir)
250         try:
251             index.metadata.create_table()
252         except DatabaseError as e:
253             self.logger.error('{}'.format(str(e)))
254             
255     def run(self):
256         # set loglevel
257         if self.args.verbose:
258             self.logger.setLevel(logging.DEBUG)
259         else:
260             self.logger.setLevel(logging.WARNING)
261
262         if self.sub_command == 'query':
263             return self.query()
264
265         if self.sub_command == 'index':
266             return self.index()
267
268         if self.sub_command == 'getdocument':
269             return self.getdocument()
270
271         if self.sub_command == 'initdb':
272             return self.initdb()
273
274         if self.sub_command == 'analyze':
275             return self.analyze()
276
277         if self.sub_command == 'metadata':
278             return self.metadata()
279
280         return self.show_help()
281
282
283 if __name__ == '__main__':
284     logging.basicConfig(format='%(asctime)s[%(levelname)s] %(message)s', level=logging.DEBUG)
285     cli = SearchCLI()
286     cli.run()