1 # -*- coding: utf-8 -*-
2 """wrapper: Wrapper interface to Full-text search system 'lucene'"""
5 from java.io import File
6 from org.apache.lucene.util import Version
7 from org.apache.lucene.index import IndexWriter, IndexWriterConfig, IndexReader, Term
8 from org.apache.lucene.search import IndexSearcher, TermQuery as _TermQuery
9 from org.apache.lucene.analysis.ja import JapaneseAnalyzer, JapaneseTokenizer
10 from org.apache.lucene.store import SimpleFSDirectory
11 from org.apache.lucene.document import Document as LuceneDocument, Field, TextField, StringField, IntField
12 #from org.apache.lucene.queryparser.classic import QueryParser
13 from org.apache.lucene.queryparser.flexible.standard import StandardQueryParser
18 class LuceneWrapper(object):
19 def __init__(self, **kwargs):
21 self._version = Version.LUCENE_4_10_1
23 def _get_analyzer(self):
24 # create Japanese analyzer
25 mode = JapaneseTokenizer.Mode.NORMAL
26 stop_set = JapaneseAnalyzer.getDefaultStopSet()
27 stop_tags = JapaneseAnalyzer.getDefaultStopTags()
28 return JapaneseAnalyzer(None, mode, stop_set, stop_tags)
30 def _get_index_directory(self):
31 return SimpleFSDirectory(File(self._config["index_directory"]))
33 class Indexer(LuceneWrapper):
34 def __init__(self, **kwargs):
35 super(Indexer, self).__init__(**kwargs)
38 def _get_writer(self):
39 analyzer = self._get_analyzer()
41 # create IndexWriterConfig
42 lucene_conf = IndexWriterConfig(self._version, analyzer)
45 index_dir = self._get_index_directory()
48 self._writer = IndexWriter(index_dir, lucene_conf)
51 def _close_writer(self):
58 self._writer.addDocument(doc.doc)
65 def __exit__(self, exc_type, exc_value, traceback):
69 class Document(LuceneWrapper):
71 super(Document, self).__init__()
72 self.doc = LuceneDocument()
74 def _store_flag(self, store):
76 return Field.Store.YES
79 def add_string_field(self, name, value, store=True):
80 self.doc.add(StringField(name, value, self._store_flag(store)))
83 def add_text_field(self, name, value, store=True):
84 self.doc.add(TextField(name, value, self._store_flag(store)))
87 def add_int_field(self, name, value, store=True):
88 self.doc.add(IntField(name, value, self._store_flag(store)))
92 class Searcher(LuceneWrapper):
93 def __init__(self, **kwargs):
94 super(Searcher, self).__init__(**kwargs)
97 def _get_searcher(self):
98 index_dir = self._get_index_directory()
99 reader = IndexReader.open(index_dir)
100 self._searcher = IndexSearcher(reader)
103 def search(self, query, max_result=1000):
104 docs = self._searcher.search(query.query, max_result)
105 return SearchResults(docs, self._searcher)
107 def raw_search(self, query, max_result=1000):
108 docs = self._searcher.search(query, max_result)
109 return SearchResults(docs, self._searcher)
112 class SearchResults(LuceneWrapper):
113 def __init__(self, docs, searcher):
115 self.searcher = searcher
116 self.total_hits = docs.totalHits
120 self._iter = iter(self.docs.scoreDocs)
124 return self.__next__()
130 doc = next(self._iter)
131 return ScoredDocument(doc, self.searcher)
134 class ScoredDocument(LuceneWrapper):
135 def __init__(self, doc, searcher):
136 super(ScoredDocument, self).__init__()
138 self._searcher_doc = searcher.doc(doc.doc)
139 self.number = doc.doc
140 self.score = doc.score
141 self.searcher = searcher
143 def __getattr__(self, name):
144 val = self._searcher_doc.get(name)
148 class TermQuery(LuceneWrapper):
149 def __init__(self, field_name, query_term):
150 super(TermQuery, self).__init__()
152 term = Term(field_name, query_term)
153 self.query = _TermQuery(term)
156 class Query(LuceneWrapper):
157 def __init__(self, field_name, query_text):
158 super(Query, self).__init__()
160 parser = StandardQueryParser()
161 parser.setAllowLeadingWildcard(True);
162 parser.setAnalyzer(self._get_analyzer())
165 self.query = parser.parse(query_text, field_name)
166 except lucene.JavaError as e:
167 raise QueryParseError(e.getJavaException().getMessage())
170 class LuceneWrapperError(Exception):
173 class QueryParseError(LuceneWrapperError):
174 def __init__(self, message):
175 self.message = message