OSDN Git Service

pgindent run for 8.2.
[pg-rex/syncrep.git] / contrib / tsearch2 / ts_lexize.c
1 /*
2  * lexize stream of lexemes
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include "postgres.h"
6
7 #include <ctype.h>
8 #include <locale.h>
9
10 #include "ts_cfg.h"
11 #include "dict.h"
12
13 void
14 LexizeInit(LexizeData * ld, TSCfgInfo * cfg)
15 {
16         ld->cfg = cfg;
17         ld->curDictId = InvalidOid;
18         ld->posDict = 0;
19         ld->towork.head = ld->towork.tail = ld->curSub = NULL;
20         ld->waste.head = ld->waste.tail = NULL;
21         ld->lastRes = NULL;
22         ld->tmpRes = NULL;
23 }
24
25 static void
26 LPLAddTail(ListParsedLex * list, ParsedLex * newpl)
27 {
28         if (list->tail)
29         {
30                 list->tail->next = newpl;
31                 list->tail = newpl;
32         }
33         else
34                 list->head = list->tail = newpl;
35         newpl->next = NULL;
36 }
37
38 static ParsedLex *
39 LPLRemoveHead(ListParsedLex * list)
40 {
41         ParsedLex  *res = list->head;
42
43         if (list->head)
44                 list->head = list->head->next;
45
46         if (list->head == NULL)
47                 list->tail = NULL;
48
49         return res;
50 }
51
52
53 void
54 LexizeAddLemm(LexizeData * ld, int type, char *lemm, int lenlemm)
55 {
56         ParsedLex  *newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
57
58         newpl = (ParsedLex *) palloc(sizeof(ParsedLex));
59         newpl->type = type;
60         newpl->lemm = lemm;
61         newpl->lenlemm = lenlemm;
62         LPLAddTail(&ld->towork, newpl);
63         ld->curSub = ld->towork.tail;
64 }
65
66 static void
67 RemoveHead(LexizeData * ld)
68 {
69         LPLAddTail(&ld->waste, LPLRemoveHead(&ld->towork));
70
71         ld->posDict = 0;
72 }
73
74 static void
75 setCorrLex(LexizeData * ld, ParsedLex ** correspondLexem)
76 {
77         if (correspondLexem)
78         {
79                 *correspondLexem = ld->waste.head;
80         }
81         else
82         {
83                 ParsedLex  *tmp,
84                                    *ptr = ld->waste.head;
85
86                 while (ptr)
87                 {
88                         tmp = ptr->next;
89                         pfree(ptr);
90                         ptr = tmp;
91                 }
92         }
93         ld->waste.head = ld->waste.tail = NULL;
94 }
95
96 static void
97 moveToWaste(LexizeData * ld, ParsedLex * stop)
98 {
99         bool            go = true;
100
101         while (ld->towork.head && go)
102         {
103                 if (ld->towork.head == stop)
104                 {
105                         ld->curSub = stop->next;
106                         go = false;
107                 }
108                 RemoveHead(ld);
109         }
110 }
111
112 static void
113 setNewTmpRes(LexizeData * ld, ParsedLex * lex, TSLexeme * res)
114 {
115         if (ld->tmpRes)
116         {
117                 TSLexeme   *ptr;
118
119                 for (ptr = ld->tmpRes; ptr->lexeme; ptr++)
120                         pfree(ptr->lexeme);
121                 pfree(ld->tmpRes);
122         }
123         ld->tmpRes = res;
124         ld->lastRes = lex;
125 }
126
127 TSLexeme *
128 LexizeExec(LexizeData * ld, ParsedLex ** correspondLexem)
129 {
130         int                     i;
131         ListDictionary *map;
132         DictInfo   *dict;
133         TSLexeme   *res;
134
135         if (ld->curDictId == InvalidOid)
136         {
137                 /*
138                  * usial mode: dictionary wants only one word, but we should keep in
139                  * mind that we should go through all stack
140                  */
141
142                 while (ld->towork.head)
143                 {
144                         ParsedLex  *curVal = ld->towork.head;
145
146                         map = ld->cfg->map + curVal->type;
147
148                         if (curVal->type == 0 || curVal->type >= ld->cfg->len || map->len == 0)
149                         {
150                                 /* skip this type of lexeme */
151                                 RemoveHead(ld);
152                                 continue;
153                         }
154
155                         for (i = ld->posDict; i < map->len; i++)
156                         {
157                                 dict = finddict(DatumGetObjectId(map->dict_id[i]));
158
159                                 ld->dictState.isend = ld->dictState.getnext = false;
160                                 ld->dictState.private = NULL;
161                                 res = (TSLexeme *) DatumGetPointer(FunctionCall4(
162                                                                                                                 &(dict->lexize_info),
163                                                                                    PointerGetDatum(dict->dictionary),
164                                                                                            PointerGetDatum(curVal->lemm),
165                                                                                           Int32GetDatum(curVal->lenlemm),
166                                                                                           PointerGetDatum(&ld->dictState)
167                                                                                                                                  ));
168
169                                 if (ld->dictState.getnext)
170                                 {
171                                         /*
172                                          * dictinary wants next word, so setup and store current
173                                          * position and go to multiword  mode
174                                          */
175
176                                         ld->curDictId = DatumGetObjectId(map->dict_id[i]);
177                                         ld->posDict = i + 1;
178                                         ld->curSub = curVal->next;
179                                         if (res)
180                                                 setNewTmpRes(ld, curVal, res);
181                                         return LexizeExec(ld, correspondLexem);
182                                 }
183
184                                 if (!res)               /* dictionary doesn't know this lexeme */
185                                         continue;
186
187                                 RemoveHead(ld);
188                                 setCorrLex(ld, correspondLexem);
189                                 return res;
190                         }
191
192                         RemoveHead(ld);
193                 }
194         }
195         else
196         {                                                       /* curDictId is valid */
197                 dict = finddict(ld->curDictId);
198
199                 /*
200                  * Dictionary ld->curDictId asks  us about following words
201                  */
202
203                 while (ld->curSub)
204                 {
205                         ParsedLex  *curVal = ld->curSub;
206
207                         map = ld->cfg->map + curVal->type;
208
209                         if (curVal->type != 0)
210                         {
211                                 bool            dictExists = false;
212
213                                 if (curVal->type >= ld->cfg->len || map->len == 0)
214                                 {
215                                         /* skip this type of lexeme */
216                                         ld->curSub = curVal->next;
217                                         continue;
218                                 }
219
220                                 /*
221                                  * We should be sure that current type of lexeme is recognized
222                                  * by our dictinonary: we just check is it exist in list of
223                                  * dictionaries ?
224                                  */
225                                 for (i = 0; i < map->len && !dictExists; i++)
226                                         if (ld->curDictId == DatumGetObjectId(map->dict_id[i]))
227                                                 dictExists = true;
228
229                                 if (!dictExists)
230                                 {
231                                         /*
232                                          * Dictionary can't work with current tpe of lexeme,
233                                          * return to basic mode and redo all stored lexemes
234                                          */
235                                         ld->curDictId = InvalidOid;
236                                         return LexizeExec(ld, correspondLexem);
237                                 }
238                         }
239
240                         ld->dictState.isend = (curVal->type == 0) ? true : false;
241                         ld->dictState.getnext = false;
242
243                         res = (TSLexeme *) DatumGetPointer(FunctionCall4(
244                                                                                                                 &(dict->lexize_info),
245                                                                                    PointerGetDatum(dict->dictionary),
246                                                                                            PointerGetDatum(curVal->lemm),
247                                                                                           Int32GetDatum(curVal->lenlemm),
248                                                                                           PointerGetDatum(&ld->dictState)
249                                                                                                                          ));
250
251                         if (ld->dictState.getnext)
252                         {
253                                 /* Dictionary wants one more */
254                                 ld->curSub = curVal->next;
255                                 if (res)
256                                         setNewTmpRes(ld, curVal, res);
257                                 continue;
258                         }
259
260                         if (res || ld->tmpRes)
261                         {
262                                 /*
263                                  * Dictionary normalizes lexemes, so we remove from stack all
264                                  * used lexemes , return to basic mode and redo end of stack
265                                  * (if it exists)
266                                  */
267                                 if (res)
268                                 {
269                                         moveToWaste(ld, ld->curSub);
270                                 }
271                                 else
272                                 {
273                                         res = ld->tmpRes;
274                                         moveToWaste(ld, ld->lastRes);
275                                 }
276
277                                 /* reset to initial state */
278                                 ld->curDictId = InvalidOid;
279                                 ld->posDict = 0;
280                                 ld->lastRes = NULL;
281                                 ld->tmpRes = NULL;
282                                 setCorrLex(ld, correspondLexem);
283                                 return res;
284                         }
285
286                         /*
287                          * Dict don't want next lexem and didn't recognize anything, redo
288                          * from ld->towork.head
289                          */
290                         ld->curDictId = InvalidOid;
291                         return LexizeExec(ld, correspondLexem);
292                 }
293         }
294
295         setCorrLex(ld, correspondLexem);
296         return NULL;
297 }