OSDN Git Service

ns_search: seachd_cli.py now support "analyze" command
authorhylom <hylom@users.sourceforge.jp>
Wed, 23 May 2018 10:15:42 +0000 (19:15 +0900)
committerhylom <hylom@users.sourceforge.jp>
Wed, 23 May 2018 10:15:42 +0000 (19:15 +0900)
src/ns_search/searchd_cli.py

index ada5203..504a6ed 100755 (executable)
@@ -7,6 +7,7 @@ import os.path
 import os
 import sys
 import time
+import inspect
 
 from yaml import load
 try:
@@ -47,25 +48,31 @@ class SearchCLI(object):
         subparsers = parser.add_subparsers(help="subcommands")
 
         # 'query' subcommand
-        parser_query = subparsers.add_parser("query", help="execute query")
-        parser_query.add_argument('subcommand', action='store_const', const='query')
-        parser_query.add_argument('query_string', help='query string')
+        p_query = subparsers.add_parser("query", help="execute query")
+        p_query.add_argument('subcommand', action='store_const', const='query')
+        p_query.add_argument('query_string', help='query string')
 
         # 'index' subcommand
-        parser_query = subparsers.add_parser("index", help="manipulate lucene index")
-        parser_query.add_argument('subcommand', action='store_const', const='index')
-        parser_query.add_argument('action', help='action')
-        parser_query.add_argument('target', help='target', default='all')
+        p_index = subparsers.add_parser("index", help="manipulate lucene index")
+        p_index.add_argument('subcommand', action='store_const', const='index')
+        p_index.add_argument('action', help='action')
+        p_index.add_argument('target', help='target', default='all')
 
         # 'getdocument' subcommand
-        parser_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
-        parser_getdoc.add_argument('subcommand', action='store_const', const='getdocument')
-        parser_getdoc.add_argument('target_type', help='target type')
-        parser_getdoc.add_argument('target_id', help='target id')
+        p_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
+        p_getdoc.add_argument('subcommand', action='store_const', const='getdocument')
+        p_getdoc.add_argument('target_type', help='target type')
+        p_getdoc.add_argument('target_id', help='target id')
 
         # 'initdb' subcommand
-        parser_initdb = subparsers.add_parser("initdb", help="create table which stores index related information")
-        parser_initdb.add_argument('subcommand', action='store_const', const='initdb')
+        p_initdb = subparsers.add_parser("initdb", help="create table which stores index related information")
+        p_initdb.add_argument('subcommand', action='store_const', const='initdb')
+
+        # 'analyze' subcommand
+        p_analyze = subparsers.add_parser("analyze", help="analyze index")
+        p_analyze.add_argument('subcommand', action='store_const', const='analyze')
+        p_analyze.add_argument('query_string', help='query string')
+        p_analyze.add_argument('-n', '--number-of-result', default=10, type=int, help='number of result output')
 
         return parser
 
@@ -159,11 +166,52 @@ class SearchCLI(object):
             return
 
         result = searcher.search(query)
-        
+       
         print("total hits: {}".format(result.total_hits))
         for item in result:
             print("#{} - {}: {}".format(item.number, item.id, item.content_text.encode('utf-8')))
 
+    def analyze(self):
+        if self.index_dir is None:
+            sys.stderr.write("error: index directory not given\n")
+            return
+
+        query_string = self.args.query_string
+        num_of_output = self.args.number_of_result
+
+        lucene_wrapper.init_vm()
+        
+        searcher = lucene_wrapper.Searcher(index_directory=self.index_dir)
+        try:
+            query = lucene_wrapper.Query("content_text", query_string)
+        except lucene_wrapper.QueryParseError as e:
+            sys.stderr.write("query parse error\n")
+            return
+
+        result = searcher.search(query)
+        
+        print("total hits: {}".format(result.total_hits))
+        count = 0
+        for item in result:
+            print("#{} - {}:".format(item.number, item.id))
+
+            # get fields
+            fields = item.get_fields()
+            for field in fields:
+                if field.name == "content_text":
+                    print("  {}: {}".format(field.name, field.value.encode("utf8")))
+                    print("----")
+                    for term in field.get_tokens():
+                        sys.stdout.write("{} ".format(term.encode("utf8")))
+                    print("----")
+                else:
+                    print("  {}: {}".format(field.name, field.value.encode("utf8")))
+            
+            print("\n")
+            count = count + 1
+            if count >= num_of_output:
+                break
+
     def getdocument(self):
         lucene_wrapper.init_vm()
         index = Index(database=self.database, index_path=self.index_dir)
@@ -197,6 +245,9 @@ class SearchCLI(object):
         if self.sub_command == 'initdb':
             return self.initdb()
 
+        if self.sub_command == 'analyze':
+            return self.analyze()
+
         return self.show_help()