class Index(object):
def __init__(self, **kwargs):
if "config" in kwargs:
- self._config = kwargs["config"]
+ self.config = kwargs["config"]
else:
- self._config = {}
+ self.config = {}
for k in kwargs:
if k == "config":
continue
- self._config[k] = kwargs[k]
+ self.config[k] = kwargs[k]
- self.metadata = Metadata(self._config)
-
- def config(self, section, key=None, default=None):
- '''get config value'''
- d = self._config.get(section)
- if d is None:
- return default
-
- if key is None:
- return d
-
- return d.get(key, default)
+ self.metadata = Metadata(self.config)
def _get_searcher(self):
- return lucene_wrapper.Searcher(index_directory=self.config("SearchIndex", "path"))
+ return lucene_wrapper.Searcher(index_directory=self.config.get("index_path"))
def _db_config(self):
- db_cfg = self.config("Database")
+ db_cfg = self.config.get("database")
if db_cfg is None:
raise ConfigFileError(ConfigFileError.SECTION_NOT_FOUND, "Database")
return db_cfg
if indexer:
indexer.delete(q)
else:
- with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
indexer.delete(q)
def delete_all(self):
'''delete all document from index'''
- with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
indexer.delete_all()
def _get_unique_id(self, target, item):
max_unique_id = latest_id
force_exit = False
- with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
try:
while not query_done:
items = selector(batch_size, 0, max_unique_id)
update_success = 0
update_errors = 0
offset = 0
- with lucene_wrapper.Indexer(index_directory=self.config("SearchIndex", "path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
items = selector(batch_size, offset, latest_id, last_update)
offset += len(items)
if len(items) < batch_size:
CONFIG_FILE="./.config.yml"
+class SearchCLIError(Exception):
+ def __init__(self, message):
+ self.message = message
+
class SearchCLI(object):
def __init__(self):
self.index_dir = None
self._parse_args()
- self._load_config(CONFIG_FILE)
+
+ config_path = os.environ.get("SEARCHD_CONFIG", "/etc/newslash/searchd.conf")
+ if not os.path.exists(config_path):
+ base_dir = os.path.dirname(os.path.realpath(__file__))
+ config_path = os.path.join(base_dir, 'searchd.conf')
+ if not os.path.exists(config_path):
+ raise SearchCLIError("config file not found")
+
+ self._load_config(config_path)
def _make_parser(self):
# parse command line option
parser_query = subparsers.add_parser("index", help="manipulate lucene index")
parser_query.add_argument('subcommand', action='store_const', const='index')
parser_query.add_argument('action', help='action')
-
+ parser_query.add_argument('target', help='target', default='all')
# 'getdocument' subcommand
parser_getdoc = subparsers.add_parser("getdocument", help="get document from lucene index")
self.config = load(fh, Loader=Loader)
fh.close()
- d = self.config.get("SearchIndex")
- if d is not None:
- index_path = d.get("path")
- if index_path is not None:
- self.index_dir = os.path.abspath(index_path)
-
+ d = self.config.get("Searchd", {})
+ try:
+ self.index_dir = os.path.abspath(d["index_path"])
+ except KeyError:
+ raise SearchCLIError("index_path not given")
+
+ self.database = self.config.get("Database", {})
+
def show_help(self):
parser = self._make_parser()
parser.print_help()
def index(self):
action = self.args.action
+ target = self.args.target
lucene_wrapper.init_vm()
- index = Index(config=self.config)
+ index = Index(database=self.database, index_path=self.index_dir)
def progress_cb(target, phase, success, errors):
print("{}: {} to index {} items... ({} errors)".format(target, phase, success, errors))
start_time = time.time()
index.update_all(progress_cb=progress_cb, error_cb=error_cb)
print("indexing done. total time: {}s".format(time.time() - start_time))
+ elif action == 'create':
+ start_time = time.time()
+ if target == 'all':
+ index.update_all(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'story':
+ index.update_story(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'comment':
+ index.update_comment(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'journal':
+ index.update_journal(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'submission':
+ index.update_submission(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'poll':
+ index.update_poll(progress_cb=progress_cb, error_cb=error_cb)
+ elif target == 'user':
+ index.update_user(progress_cb=progress_cb, error_cb=error_cb)
+ print("indexing done. total time: {}s".format(time.time() - start_time))
def query(self):
if self.index_dir is None:
def getdocument(self):
lucene_wrapper.init_vm()
- index = Index(config=self.config)
+ index = Index(database=self.database, index_path=self.index_dir)
result = index.get(self.args.target_type, self.args.target_id)
if result is None:
print("no item")
print("#{} - {}: {}".format(result.number, result.id, result.content_text.encode('utf-8')))
def initdb(self):
- index = Index(config=self.config)
+ index = Index(database=self.database, index_path=self.index_dir)
try:
index.create_metadata_table()
except DatabaseError as e: