import calendar
from datetime import datetime
+import os.path
import exceptions
import lucene_wrapper
self.config[k] = kwargs[k]
self.metadata = Metadata(self.config)
+ p = self.config.get("index_path")
+ self._index_path = os.path.join(p, "lucene_index");
+
def _get_searcher(self):
- return lucene_wrapper.Searcher(index_directory=self.config.get("index_path"))
+ return lucene_wrapper.Searcher(index_directory=self._index_path)
def _db_config(self):
db_cfg = self.config.get("database")
if indexer:
indexer.delete(q)
else:
- with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self._index_path) as indexer:
indexer.delete(q)
def delete_all(self):
'''delete all document from index'''
- with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self._index_path) as indexer:
indexer.delete_all()
def _get_unique_id(self, target, item):
max_unique_id = latest_id
force_exit = False
- with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self._index_path) as indexer:
try:
while not query_done:
items = selector(batch_size, 0, max_unique_id)
update_success = 0
update_errors = 0
offset = 0
- with lucene_wrapper.Indexer(index_directory=self.config.get("index_path")) as indexer:
+ with lucene_wrapper.Indexer(index_directory=self._index_path) as indexer:
items = selector(batch_size, offset, latest_id, last_update)
offset += len(items)
if len(items) < batch_size:
"""metadata: index metadata for newslash search system"""
import newslash_db
-from mysql.connector.errors import ProgrammingError
+import sqlite3
+import os.path
+#from mysql.connector.errors import ProgrammingError
import exceptions
+class SQLiteDB(object):
+ def __init__(self, path):
+ self._path = path
+ self._conn = None
+
+ def connect(self):
+ self._conn = sqlite3.connect(self._path);
+ #self._conn.autocommit = True
+ return self
+
+ def cursor(self):
+ if not self._conn:
+ self.connect()
+ cur = self._conn.cursor()
+ return cur
+
+ def execute(self, query, **kwargs):
+ cur = self.cursor()
+ cur.execute(query, kwargs)
+ return cur
+
+ def commit(self):
+ self._conn.commit()
+ return self
+
+ def rollback(self):
+ self._conn.rollback()
+ return self
+
+ def close(self):
+ if self._conn:
+ self.commit()
+ self._conn.close()
+ self._conn = None
+ return self
+
+
class Metadata(object):
def __init__(self, config):
self._config = config
+ path = config.get("index_path")
+ if not path:
+ raise exceptions.ConfigFileError(exceptions.ConfigFileError.PARAMETER_NOT_FOUND, "index_path")
+
+ self._db_path = os.path.join(path, "_ns_metadata.db");
- def _db_config(self):
- db_cfg = self._config.get("database")
- if db_cfg is None:
- raise exceptions.ConfigFileError(exceptions.ConfigFileError.SECTION_NOT_FOUND, "Database")
- return db_cfg
-
+ def _get_db(self):
+ return SQLiteDB(self._db_path);
+
def get_current_timestamp(self):
- db = newslash_db.NewslashDB(self._db_config())
- cur = db.execute('SELECT NOW()')
- return cur.fetchone()[0]
+ db = self._get_db()
+ cur = db.execute("SELECT datetime('now')")
+ r = cur.fetchone()[0]
+ db.close()
+ return r
def create_table(self):
sql = (
"CREATE TABLE ns_search_metadata ("
- " target_id tinyint(8) unsigned NOT NULL AUTO_INCREMENT,"
- " target_name varchar(32) NOT NULL UNIQUE,"
- " last_update timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,"
- " latest_id int(8) unsigned NOT NULL DEFAULT 0,"
- " PRIMARY KEY (target_id)"
+ " target_id INTEGER PRIMARY KEY AUTOINCREMENT,"
+ " target_name TEXT NOT NULL UNIQUE,"
+ " last_update TEXT NOT NULL DEFAULT CURRENT_TIMESTAMP,"
+ " latest_id INTEGER NOT NULL DEFAULT 0"
")"
)
- db= newslash_db.NewslashDB(self._db_config())
+ db = self._get_db()
try:
db.execute(sql)
- except ProgrammingError as e:
+ except sqlite3.OperationalError as e:
db.close()
raise exceptions.DatabaseError('table creation error: {}'.format(str(e)))
db.close()
def drop_table(self):
- db = newslash_db.NewslashDB(self._db_config())
+ db = self._get_db()
db.execute('DROP TABLE ns_search_metadata')
db.close()
def delete_all(self):
- db = newslash_db.NewslashDB(self._db_config())
+ db = self._get_db()
db.execute('DELETE FROM ns_search_metadata')
db.close()
def get(self, target):
- sql = 'SELECT * from ns_search_metadata WHERE target_name = %(target)s'
- db = newslash_db.NewslashDB(self._db_config())
+ sql = 'SELECT * from ns_search_metadata WHERE target_name = :target'
+ db = self._get_db()
cur = db.execute(sql, target=target)
if cur.rowcount > 0:
result = dict(zip(cur.column_names, cur.fetchone()))
return result
def update(self, target, last_update, latest_id):
+ # sqlite 3.24 or after, can use ON CONFLICT, but current package's version
+ # not supported...
+ # if latest_id > 0:
+ # sql = (
+ # "INSERT INTO ns_search_metadata"
+ # " (target_name, last_update, latest_id)"
+ # " VALUES (:target, :last_update, :latest_id)"
+ # " ON CONFLICT (target_name) DO UPDATE SET"
+ # " last_update = :last_update,"
+ # " latest_id = :latest_id"
+ # )
+ # else:
+ # sql = (
+ # "INSERT INTO ns_search_metadata"
+ # " (target_name, last_update, latest_id)"
+ # " VALUES (:target, :last_update, :latest_id)"
+ # " ON CONFLIT (target_name) DO UPDATE SET"
+ # " last_update = :last_update"
+ # )
if latest_id > 0:
- sql = (
- "INSERT INTO ns_search_metadata"
+ update_sql = (
+ "UPDATE OR IGNORE ns_search_metadata"
+ " SET last_update = :last_update,"
+ " latest_id = :latest_id"
+ " WHERE target_name = :target"
+ )
+ insert_sql = (
+ "INSERT OR IGNORE INTO ns_search_metadata"
" (target_name, last_update, latest_id)"
- " VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
- " ON DUPLICATE KEY UPDATE"
- " last_update = %(last_update)s,"
- " latest_id = %(latest_id)s"
+ " VALUES (:target, :last_update, :latest_id)"
)
else:
- sql = (
- "INSERT INTO ns_search_metadata"
+ update_sql = (
+ "UPDATE OR IGNORE ns_search_metadata"
+ " SET last_update = :last_update,"
+ " WHERE target_name = :target"
+ )
+ insert_sql = (
+ "INSERT OR IGNORE INTO ns_search_metadata"
" (target_name, last_update, latest_id)"
- " VALUES (%(target)s, %(last_update)s, %(latest_id)s)"
- " ON DUPLICATE KEY UPDATE"
- " last_update = %(last_update)s"
+ " VALUES (:target, :last_update, :latest_id)"
)
- db = newslash_db.NewslashDB(self._db_config())
- cur = db.execute(sql, target=target, last_update=last_update, latest_id=latest_id)
+ db = self._get_db()
+ try:
+ cur = db.execute(insert_sql, target=target, last_update=last_update, latest_id=latest_id)
+ cur = db.execute(update_sql, target=target, last_update=last_update, latest_id=latest_id)
+ except sqlite3.OperationalError as e:
+ db.close()
+ raise exceptions.DatabaseError('ns_search_metadata update error: {}'.format(str(e)))
+
db.close()