From f5b7e1fb9a23e294e3efcbfe998e2873da7914de Mon Sep 17 00:00:00 2001 From: hylom Date: Fri, 27 Apr 2018 19:59:04 +0900 Subject: [PATCH] ns_search: update config file format --- src/ns_search/newslash_index/index.py | 31 ++++++------------ src/ns_search/newslash_index/metadata.py | 2 +- src/ns_search/searchd.conf.example | 10 ++++-- src/ns_search/searchd.py | 2 +- src/ns_search/searchd_cli.py | 54 +++++++++++++++++++++++++------- 5 files changed, 63 insertions(+), 36 deletions(-) diff --git a/src/ns_search/newslash_index/index.py b/src/ns_search/newslash_index/index.py index 2280ac58..38e52410 100644 --- a/src/ns_search/newslash_index/index.py +++ b/src/ns_search/newslash_index/index.py @@ -14,33 +14,22 @@ from metadata import Metadata class Index(object): def __init__(self, **kwargs): if "config" in kwargs: - self._config = kwargs["config"] + self.config = kwargs["config"] else: - self._config = {} + self.config = {} for k in kwargs: if k == "config": continue - self._config[k] = kwargs[k] + self.config[k] = kwargs[k] - self.metadata = Metadata(self._config) - - def config(self, section, key=None, default=None): - '''get config value''' - d = self._config.get(section) - if d is None: - return default - - if key is None: - return d - - return d.get(key, default) + self.metadata = Metadata(self.config) def _get_searcher(self): - return lucene_wrapper.Searcher(index_directory=self.config("SearchIndex", "path")) + return lucene_wrapper.Searcher(index_directory=self.config.get("index_path")) def _db_config(self): - db_cfg = self.config("Database") + db_cfg = self.config.get("database") if db_cfg is None: raise ConfigFileError(ConfigFileError.SECTION_NOT_FOUND, "Database") return db_cfg @@ -60,12 +49,12 @@ class Index(object): if indexer: indexer.delete(q) else: - with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer: + with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer: indexer.delete(q) def delete_all(self): '''delete all document from index''' - with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer: + with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer: indexer.delete_all() def _get_unique_id(self, target, item): @@ -99,7 +88,7 @@ class Index(object): max_unique_id = latest_id force_exit = False - with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer: + with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer: try: while not query_done: items = selector(batch_size, 0, max_unique_id) @@ -159,7 +148,7 @@ class Index(object): update_success = 0 update_errors = 0 offset = 0 - with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer: + with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer: items = selector(batch_size, offset, latest_id, last_update) offset += len(items) if len(items) < batch_size: diff --git a/src/ns_search/newslash_index/metadata.py b/src/ns_search/newslash_index/metadata.py index f10410bd..47b5ccde 100644 --- a/src/ns_search/newslash_index/metadata.py +++ b/src/ns_search/newslash_index/metadata.py @@ -10,7 +10,7 @@ class Metadata(object): self._config = config def _db_config(self): - db_cfg = self._config.get("Database") + db_cfg = self._config.get("database") if db_cfg is None: raise exceptions.ConfigFileError(exceptions.ConfigFileError.SECTION_NOT_FOUND, "Database") return db_cfg diff --git a/src/ns_search/searchd.conf.example b/src/ns_search/searchd.conf.example index 17bda9fb..6314b78b 100644 --- a/src/ns_search/searchd.conf.example +++ b/src/ns_search/searchd.conf.example @@ -1,2 +1,8 @@ -Index: - path: '/var/newslash/lucene_index' +Database: + host: test-db + name: foo + user: bar + password: hoge + +Searchd: + index_path: 'lucene_index' diff --git a/src/ns_search/searchd.py b/src/ns_search/searchd.py index 043ad29f..7387979d 100644 --- a/src/ns_search/searchd.py +++ b/src/ns_search/searchd.py @@ -85,7 +85,7 @@ class Root(Route): #if req.environ['wsgi.errors']: #req.environ['wsgi.errors'].write(query.encode('utf-8')) - searcher = lucene_wrapper.Searcher(index_directory=self.config('Index', 'path')) + searcher = lucene_wrapper.Searcher(index_directory=self.config('Searchd', 'index_path')) try: content_query = lucene_wrapper.Query("content_text", query_text) title_query = lucene_wrapper.Query("title", query_text) diff --git a/src/ns_search/searchd_cli.py b/src/ns_search/searchd_cli.py index 1eb972fb..85feda2e 100755 --- a/src/ns_search/searchd_cli.py +++ b/src/ns_search/searchd_cli.py @@ -20,11 +20,23 @@ from newslash_db import NewslashDB CONFIG_FILE="./.config.yml" +class SearchCLIError(Exception): + def __init__(self, message): + self.message = message + class SearchCLI(object): def __init__(self): self.index_dir = None self._parse_args() - self._load_config(CONFIG_FILE) + + config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf") + if not os.path.exists(config_path): + base_dir = os.path.dirname(os.path.realpath(__file__)) + config_path = os.path.join(base_dir, 'searchd.conf') + if not os.path.exists(config_path): + raise SearchCLIError("config file not found") + + self._load_config(config_path) def _make_parser(self): # parse command line option @@ -43,7 +55,7 @@ class SearchCLI(object): parser_query = subparsers.add_parser("index", help="manipulate lucene index") parser_query.add_argument('subcommand', action='store_const', const='index') parser_query.add_argument('action', help='action') - + parser_query.add_argument('target', help='target', default='all') # 'getdocument' subcommand parser_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index") @@ -84,21 +96,24 @@ class SearchCLI(object): self.config = load(fh, Loader=Loader) fh.close() - d = self.config.get("SearchIndex") - if d is not None: - index_path = d.get("path") - if index_path is not None: - self.index_dir = os.path.abspath(index_path) - + d = self.config.get("Searchd", {}) + try: + self.index_dir = os.path.abspath(d["index_path"]) + except KeyError: + raise SearchCLIError("index_path not given") + + self.database = self.config.get("Database", {}) + def show_help(self): parser = self._make_parser() parser.print_help() def index(self): action = self.args.action + target = self.args.target lucene_wrapper.init_vm() - index = Index(config=self.config) + index = Index(database=self.database, index_path=self.index_dir) def progress_cb(target, phase, success, errors): print("{}: {} to index {} items... ({} errors)".format(target, phase, success, errors)) @@ -115,6 +130,23 @@ class SearchCLI(object): start_time = time.time() index.update_all(progress_cb=progress_cb, error_cb=error_cb) print("indexing done. total time: {}s".format(time.time() - start_time)) + elif action == 'create': + start_time = time.time() + if target == 'all': + index.update_all(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'story': + index.update_story(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'comment': + index.update_comment(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'journal': + index.update_journal(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'submission': + index.update_submission(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'poll': + index.update_poll(progress_cb=progress_cb, error_cb=error_cb) + elif target == 'user': + index.update_user(progress_cb=progress_cb, error_cb=error_cb) + print("indexing done. total time: {}s".format(time.time() - start_time)) def query(self): if self.index_dir is None: @@ -139,7 +171,7 @@ class SearchCLI(object): def getdocument(self): lucene_wrapper.init_vm() - index = Index(config=self.config) + index = Index(database=self.database, index_path=self.index_dir) result = index.get(self.args.target_type, self.args.target_id) if result is None: print("no item") @@ -151,7 +183,7 @@ class SearchCLI(object): print("#{} - {}: {}".format(result.number, result.id, result.content_text.encode('utf-8'))) def initdb(self): - index = Index(config=self.config) + index = Index(database=self.database, index_path=self.index_dir) try: index.create_metadata_table() except DatabaseError as e: -- 2.11.0