OSDN Git Service

eba2cd6eb58d7e56e7038ef47b6d039ffefce581
[pg-rex/syncrep.git] / contrib / tsearch2 / wparser.c
1 /*
2  * interface functions to parser
3  * Teodor Sigaev <teodor@sigaev.ru>
4  */
5 #include <errno.h>
6 #include <stdlib.h>
7 #include <string.h>
8 #include <ctype.h>
9
10 #include "postgres.h"
11 #include "fmgr.h"
12 #include "utils/array.h"
13 #include "catalog/pg_type.h"
14 #include "executor/spi.h"
15 #include "funcapi.h"
16
17 #include "wparser.h"
18 #include "ts_cfg.h"
19 #include "snmap.h"
20 #include "common.h"
21
22 /*********top interface**********/
23
24 static Oid      current_parser_id = InvalidOid;
25
26 void
27 init_prs(Oid id, WParserInfo * prs)
28 {
29         Oid                     arg[1];
30         bool            isnull;
31         Datum           pars[1];
32         int                     stat;
33         void *plan;
34         char buf[1024], *nsp;
35
36         arg[0] = OIDOID;
37         pars[0] = ObjectIdGetDatum(id);
38
39         memset(prs, 0, sizeof(WParserInfo));
40         SPI_connect();
41         nsp=get_namespace(TSNSP_FunctionOid);
42         sprintf(buf, "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from %s.pg_ts_parser where oid = $1", nsp);
43         pfree(nsp);
44         plan= SPI_prepare(buf, 1, arg);
45         if (!plan)
46                 ts_error(ERROR, "SPI_prepare() failed");
47
48         stat = SPI_execp(plan, pars, " ", 1);
49         if (stat < 0)
50                 ts_error(ERROR, "SPI_execp return %d", stat);
51         if (SPI_processed > 0)
52         {
53                 Oid                     oid = InvalidOid;
54
55                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
56                 fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext);
57                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull));
58                 fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext);
59                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull));
60                 fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext);
61                 prs->lextype = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull));
62                 oid = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull));
63                 fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext);
64                 prs->prs_id = id;
65         }
66         else
67                 ts_error(ERROR, "No parser with id %d", id);
68         SPI_freeplan(plan);
69         SPI_finish();
70 }
71
72 typedef struct
73 {
74         WParserInfo *last_prs;
75         int                     len;
76         int                     reallen;
77         WParserInfo *list;
78         SNMap           name2id_map;
79 }       PrsList;
80
81 static PrsList PList = {NULL, 0, 0, NULL, {0, 0, NULL}};
82
83 void
84 reset_prs(void)
85 {
86         freeSNMap(&(PList.name2id_map));
87         if (PList.list)
88                 free(PList.list);
89         memset(&PList, 0, sizeof(PrsList));
90 }
91
92 static int
93 compareprs(const void *a, const void *b)
94 {
95         return ((WParserInfo *) a)->prs_id - ((WParserInfo *) b)->prs_id;
96 }
97
98 WParserInfo *
99 findprs(Oid id)
100 {
101         /* last used prs */
102         if (PList.last_prs && PList.last_prs->prs_id == id)
103                 return PList.last_prs;
104
105         /* already used prs */
106         if (PList.len != 0)
107         {
108                 WParserInfo key;
109
110                 key.prs_id = id;
111                 PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs);
112                 if (PList.last_prs != NULL)
113                         return PList.last_prs;
114         }
115
116         /* last chance */
117         if (PList.len == PList.reallen)
118         {
119                 WParserInfo *tmp;
120                 int                     reallen = (PList.reallen) ? 2 * PList.reallen : 16;
121
122                 tmp = (WParserInfo *) realloc(PList.list, sizeof(WParserInfo) * reallen);
123                 if (!tmp)
124                         ts_error(ERROR, "No memory");
125                 PList.reallen = reallen;
126                 PList.list = tmp;
127         }
128         PList.last_prs = &(PList.list[PList.len]);
129         init_prs(id, PList.last_prs);
130         PList.len++;
131         qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs);
132         return findprs(id); /* qsort changed order!! */ ;
133 }
134
135 Oid
136 name2id_prs(text *name)
137 {
138         Oid                     arg[1];
139         bool            isnull;
140         Datum           pars[1];
141         int                     stat;
142         Oid                     id = findSNMap_t(&(PList.name2id_map), name);
143         char buf[1024], *nsp;
144         void *plan;
145
146         arg[0] = TEXTOID;
147         pars[0] = PointerGetDatum(name);
148
149         if (id)
150                 return id;
151
152         SPI_connect();
153         nsp = get_namespace(TSNSP_FunctionOid);
154         sprintf(buf, "select oid from %s.pg_ts_parser where prs_name = $1", nsp);
155         pfree(nsp);
156         plan= SPI_prepare(buf, 1, arg);
157         if (!plan)
158                 ts_error(ERROR, "SPI_prepare() failed");
159
160         stat = SPI_execp(plan, pars, " ", 1);
161         if (stat < 0)
162                 ts_error(ERROR, "SPI_execp return %d", stat);
163         if (SPI_processed > 0)
164                 id = DatumGetObjectId(SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull));
165         else
166                 ts_error(ERROR, "No parser '%s'", text2char(name));
167         SPI_freeplan(plan);
168         SPI_finish();
169         addSNMap_t(&(PList.name2id_map), name, id);
170         return id;
171 }
172
173
174 /******sql-level interface******/
175 typedef struct
176 {
177         int                     cur;
178         LexDescr   *list;
179 }       TypeStorage;
180
181 static void
182 setup_firstcall(FuncCallContext *funcctx, Oid prsid)
183 {
184         TupleDesc       tupdesc;
185         MemoryContext oldcontext;
186         TypeStorage *st;
187         WParserInfo *prs = findprs(prsid);
188
189         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
190
191         st = (TypeStorage *) palloc(sizeof(TypeStorage));
192         st->cur = 0;
193         st->list = (LexDescr *) DatumGetPointer(
194                                 OidFunctionCall1(prs->lextype, PointerGetDatum(prs->prs))
195                 );
196         funcctx->user_fctx = (void *) st;
197         tupdesc = RelationNameGetTupleDesc("tokentype");
198         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
199         MemoryContextSwitchTo(oldcontext);
200 }
201
202 static Datum
203 process_call(FuncCallContext *funcctx)
204 {
205         TypeStorage *st;
206
207         st = (TypeStorage *) funcctx->user_fctx;
208         if (st->list && st->list[st->cur].lexid)
209         {
210                 Datum           result;
211                 char       *values[3];
212                 char            txtid[16];
213                 HeapTuple       tuple;
214
215                 values[0] = txtid;
216                 sprintf(txtid, "%d", st->list[st->cur].lexid);
217                 values[1] = st->list[st->cur].alias;
218                 values[2] = st->list[st->cur].descr;
219
220                 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
221                 result = HeapTupleGetDatum(tuple);
222
223                 pfree(values[1]);
224                 pfree(values[2]);
225                 st->cur++;
226                 return result;
227         }
228         else
229         {
230                 if (st->list)
231                         pfree(st->list);
232                 pfree(st);
233         }
234         return (Datum) 0;
235 }
236
237 PG_FUNCTION_INFO_V1(token_type);
238 Datum           token_type(PG_FUNCTION_ARGS);
239
240 Datum
241 token_type(PG_FUNCTION_ARGS)
242 {
243         FuncCallContext *funcctx;
244         Datum           result;
245         SET_FUNCOID();
246         if (SRF_IS_FIRSTCALL())
247         {
248                 funcctx = SRF_FIRSTCALL_INIT();
249                 setup_firstcall(funcctx, PG_GETARG_OID(0));
250         }
251
252         funcctx = SRF_PERCALL_SETUP();
253
254         if ((result = process_call(funcctx)) != (Datum) 0)
255                 SRF_RETURN_NEXT(funcctx, result);
256         SRF_RETURN_DONE(funcctx);
257 }
258
259 PG_FUNCTION_INFO_V1(token_type_byname);
260 Datum           token_type_byname(PG_FUNCTION_ARGS);
261 Datum
262 token_type_byname(PG_FUNCTION_ARGS)
263 {
264         FuncCallContext *funcctx;
265         Datum           result;
266         SET_FUNCOID();
267         if (SRF_IS_FIRSTCALL())
268         {
269                 text       *name = PG_GETARG_TEXT_P(0);
270
271                 funcctx = SRF_FIRSTCALL_INIT();
272                 setup_firstcall(funcctx, name2id_prs(name));
273                 PG_FREE_IF_COPY(name, 0);
274         }
275
276         funcctx = SRF_PERCALL_SETUP();
277
278         if ((result = process_call(funcctx)) != (Datum) 0)
279                 SRF_RETURN_NEXT(funcctx, result);
280         SRF_RETURN_DONE(funcctx);
281 }
282
283 PG_FUNCTION_INFO_V1(token_type_current);
284 Datum           token_type_current(PG_FUNCTION_ARGS);
285 Datum
286 token_type_current(PG_FUNCTION_ARGS)
287 {
288         FuncCallContext *funcctx;
289         Datum           result;
290         SET_FUNCOID();
291         if (SRF_IS_FIRSTCALL())
292         {
293                 funcctx = SRF_FIRSTCALL_INIT();
294                 if (current_parser_id == InvalidOid)
295                         current_parser_id = name2id_prs(char2text("default"));
296                 setup_firstcall(funcctx, current_parser_id);
297         }
298
299         funcctx = SRF_PERCALL_SETUP();
300
301         if ((result = process_call(funcctx)) != (Datum) 0)
302                 SRF_RETURN_NEXT(funcctx, result);
303         SRF_RETURN_DONE(funcctx);
304 }
305
306
307 PG_FUNCTION_INFO_V1(set_curprs);
308 Datum           set_curprs(PG_FUNCTION_ARGS);
309 Datum
310 set_curprs(PG_FUNCTION_ARGS)
311 {
312         SET_FUNCOID();
313         findprs(PG_GETARG_OID(0));
314         current_parser_id = PG_GETARG_OID(0);
315         PG_RETURN_VOID();
316 }
317
318 PG_FUNCTION_INFO_V1(set_curprs_byname);
319 Datum           set_curprs_byname(PG_FUNCTION_ARGS);
320 Datum
321 set_curprs_byname(PG_FUNCTION_ARGS)
322 {
323         text       *name = PG_GETARG_TEXT_P(0);
324         SET_FUNCOID();
325         DirectFunctionCall1(
326                                                 set_curprs,
327                                                 ObjectIdGetDatum(name2id_prs(name))
328                 );
329         PG_FREE_IF_COPY(name, 0);
330         PG_RETURN_VOID();
331 }
332
333 typedef struct
334 {
335         int                     type;
336         char       *lexem;
337 }       LexemEntry;
338
339 typedef struct
340 {
341         int                     cur;
342         int                     len;
343         LexemEntry *list;
344 }       PrsStorage;
345
346
347 static void
348 prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt)
349 {
350         TupleDesc       tupdesc;
351         MemoryContext oldcontext;
352         PrsStorage *st;
353         WParserInfo *prs = findprs(prsid);
354         char       *lex = NULL;
355         int                     llen = 0,
356                                 type = 0;
357
358         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
359
360         st = (PrsStorage *) palloc(sizeof(PrsStorage));
361         st->cur = 0;
362         st->len = 16;
363         st->list = (LexemEntry *) palloc(sizeof(LexemEntry) * st->len);
364
365         prs->prs = (void *) DatumGetPointer(
366                                                                                 FunctionCall2(
367                                                                                                           &(prs->start_info),
368                                                                                    PointerGetDatum(VARDATA(txt)),
369                                                                    Int32GetDatum(VARSIZE(txt) - VARHDRSZ)
370                                                                                                           )
371                 );
372
373         while ((type = DatumGetInt32(FunctionCall3(
374                                                                                            &(prs->getlexeme_info),
375                                                                                            PointerGetDatum(prs->prs),
376                                                                                            PointerGetDatum(&lex),
377                                                                                   PointerGetDatum(&llen)))) != 0)
378         {
379
380                 if (st->cur >= st->len)
381                 {
382                         st->len = 2 * st->len;
383                         st->list = (LexemEntry *) repalloc(st->list, sizeof(LexemEntry) * st->len);
384                 }
385                 st->list[st->cur].lexem = palloc(llen + 1);
386                 memcpy(st->list[st->cur].lexem, lex, llen);
387                 st->list[st->cur].lexem[llen] = '\0';
388                 st->list[st->cur].type = type;
389                 st->cur++;
390         }
391
392         FunctionCall1(
393                                   &(prs->end_info),
394                                   PointerGetDatum(prs->prs)
395                 );
396
397         st->len = st->cur;
398         st->cur = 0;
399
400         funcctx->user_fctx = (void *) st;
401         tupdesc = RelationNameGetTupleDesc("tokenout");
402         funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc);
403         MemoryContextSwitchTo(oldcontext);
404 }
405
406 static Datum
407 prs_process_call(FuncCallContext *funcctx)
408 {
409         PrsStorage *st;
410
411         st = (PrsStorage *) funcctx->user_fctx;
412         if (st->cur < st->len)
413         {
414                 Datum           result;
415                 char       *values[2];
416                 char            tid[16];
417                 HeapTuple       tuple;
418
419                 values[0] = tid;
420                 sprintf(tid, "%d", st->list[st->cur].type);
421                 values[1] = st->list[st->cur].lexem;
422                 tuple = BuildTupleFromCStrings(funcctx->attinmeta, values);
423                 result = HeapTupleGetDatum(tuple);
424
425                 pfree(values[1]);
426                 st->cur++;
427                 return result;
428         }
429         else
430         {
431                 if (st->list)
432                         pfree(st->list);
433                 pfree(st);
434         }
435         return (Datum) 0;
436 }
437
438
439
440 PG_FUNCTION_INFO_V1(parse);
441 Datum           parse(PG_FUNCTION_ARGS);
442 Datum
443 parse(PG_FUNCTION_ARGS)
444 {
445         FuncCallContext *funcctx;
446         Datum           result;
447         SET_FUNCOID();
448         if (SRF_IS_FIRSTCALL())
449         {
450                 text       *txt = PG_GETARG_TEXT_P(1);
451
452                 funcctx = SRF_FIRSTCALL_INIT();
453                 prs_setup_firstcall(funcctx, PG_GETARG_OID(0), txt);
454                 PG_FREE_IF_COPY(txt, 1);
455         }
456
457         funcctx = SRF_PERCALL_SETUP();
458
459         if ((result = prs_process_call(funcctx)) != (Datum) 0)
460                 SRF_RETURN_NEXT(funcctx, result);
461         SRF_RETURN_DONE(funcctx);
462 }
463
464 PG_FUNCTION_INFO_V1(parse_byname);
465 Datum           parse_byname(PG_FUNCTION_ARGS);
466 Datum
467 parse_byname(PG_FUNCTION_ARGS)
468 {
469         FuncCallContext *funcctx;
470         Datum           result;
471         SET_FUNCOID();
472         if (SRF_IS_FIRSTCALL())
473         {
474                 text       *name = PG_GETARG_TEXT_P(0);
475                 text       *txt = PG_GETARG_TEXT_P(1);
476
477                 funcctx = SRF_FIRSTCALL_INIT();
478                 prs_setup_firstcall(funcctx, name2id_prs(name), txt);
479                 PG_FREE_IF_COPY(name, 0);
480                 PG_FREE_IF_COPY(txt, 1);
481         }
482
483         funcctx = SRF_PERCALL_SETUP();
484
485         if ((result = prs_process_call(funcctx)) != (Datum) 0)
486                 SRF_RETURN_NEXT(funcctx, result);
487         SRF_RETURN_DONE(funcctx);
488 }
489
490
491 PG_FUNCTION_INFO_V1(parse_current);
492 Datum           parse_current(PG_FUNCTION_ARGS);
493 Datum
494 parse_current(PG_FUNCTION_ARGS)
495 {
496         FuncCallContext *funcctx;
497         Datum           result;
498         SET_FUNCOID();
499         if (SRF_IS_FIRSTCALL())
500         {
501                 text       *txt = PG_GETARG_TEXT_P(0);
502
503                 funcctx = SRF_FIRSTCALL_INIT();
504                 if (current_parser_id == InvalidOid)
505                         current_parser_id = name2id_prs(char2text("default"));
506                 prs_setup_firstcall(funcctx, current_parser_id, txt);
507                 PG_FREE_IF_COPY(txt, 0);
508         }
509
510         funcctx = SRF_PERCALL_SETUP();
511
512         if ((result = prs_process_call(funcctx)) != (Datum) 0)
513                 SRF_RETURN_NEXT(funcctx, result);
514         SRF_RETURN_DONE(funcctx);
515 }
516
517 PG_FUNCTION_INFO_V1(headline);
518 Datum           headline(PG_FUNCTION_ARGS);
519 Datum
520 headline(PG_FUNCTION_ARGS)
521 {
522         text       *in = PG_GETARG_TEXT_P(1);
523         QUERYTYPE  *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2)));
524         text       *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
525         HLPRSTEXT       prs;
526         text       *out;
527         TSCfgInfo  *cfg;
528         WParserInfo *prsobj;
529
530         SET_FUNCOID();
531         cfg = findcfg(PG_GETARG_OID(0));
532         prsobj = findprs(cfg->prs_id);
533
534         memset(&prs, 0, sizeof(HLPRSTEXT));
535         prs.lenwords = 32;
536         prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords);
537         hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ);
538
539
540         FunctionCall3(
541                                   &(prsobj->headline_info),
542                                   PointerGetDatum(&prs),
543                                   PointerGetDatum(opt),
544                                   PointerGetDatum(query)
545                 );
546
547         out = genhl(&prs);
548
549         PG_FREE_IF_COPY(in, 1);
550         PG_FREE_IF_COPY(query, 2);
551         if (opt)
552                 PG_FREE_IF_COPY(opt, 3);
553         pfree(prs.words);
554         pfree(prs.startsel);
555         pfree(prs.stopsel);
556
557         PG_RETURN_POINTER(out);
558 }
559
560
561 PG_FUNCTION_INFO_V1(headline_byname);
562 Datum           headline_byname(PG_FUNCTION_ARGS);
563 Datum
564 headline_byname(PG_FUNCTION_ARGS)
565 {
566         text       *cfg = PG_GETARG_TEXT_P(0);
567
568         Datum           out;
569         SET_FUNCOID();
570         out = DirectFunctionCall4(
571                                                                                   headline,
572                                                                           ObjectIdGetDatum(name2id_cfg(cfg)),
573                                                                                   PG_GETARG_DATUM(1),
574                                                                                   PG_GETARG_DATUM(2),
575                         (PG_NARGS() > 3) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL)
576         );
577
578         PG_FREE_IF_COPY(cfg, 0);
579         PG_RETURN_DATUM(out);
580 }
581
582 PG_FUNCTION_INFO_V1(headline_current);
583 Datum           headline_current(PG_FUNCTION_ARGS);
584 Datum
585 headline_current(PG_FUNCTION_ARGS)
586 {
587         SET_FUNCOID();
588         PG_RETURN_DATUM(DirectFunctionCall4(
589                                                                                 headline,
590                                                                                 ObjectIdGetDatum(get_currcfg()),
591                                                                                 PG_GETARG_DATUM(0),
592                                                                                 PG_GETARG_DATUM(1),
593                         (PG_NARGS() > 2) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL)
594                                                                                 ));
595 }