OSDN Git Service

pgindent run.
[pg-rex/syncrep.git] / contrib / tsearch / morph.c
1 /*
2  * morphology module
3  * New dictionary is include in dict.h. For languages which
4  * use latin charset it may be need to modify mapdict table.
5  * Teodor Sigaev <teodor@stack.net>
6  */
7 #include "postgres.h"
8
9 #include "utils/elog.h"
10 #include "utils/palloc.h"
11 #include "utils/builtins.h"
12 #include "catalog/pg_control.h"
13 #include "utils/pg_locale.h"
14
15 #include "morph.h"
16 #include "deflex.h"
17
18 /*
19  * Struct for calling dictionaries
20  * All of this methods are optional, but
21  * if all methods are NULL, then dictionary does nothing :)
22  * Return value of lemmatize must be palloced or the same.
23  * Return value of init must be malloced in other case
24  * it will be free in end of transaction!
25  */
26 typedef struct
27 {
28         char            localename[LOCALE_NAME_BUFLEN];
29         /* init dictionary */
30         void       *(*init) (void);
31         /* close dictionary */
32         void            (*close) (void *);
33         /* find in dictionary */
34         char       *(*lemmatize) (void *, char *, int *);
35         int                     (*is_stoplemm) (void *, char *, int);
36         int                     (*is_stemstoplemm) (void *, char *, int);
37 }       DICT;
38
39 /* insert all dictionaries */
40 #define DICT_BODY
41 #include "dict.h"
42 #undef  DICT_BODY
43
44 /* fill dictionary's structure */
45 #define DICT_TABLE
46 DICT            dicts[] = {
47         {
48                 "C", NULL, NULL, NULL, NULL, NULL               /* fake dictionary */
49         }
50 #include "dict.h"
51 };
52
53 #undef DICT_TABLE
54
55 /* array for storing dictinary's objects (if needed) */
56 void       *dictobjs[
57                                          lengthof(dicts)];
58
59 #define STOPLEXEM       -2
60 #define BYLOCALE        -1
61 #define NODICT          0
62 #define DEFAULTDICT 1
63
64 #define MAXNDICT        2
65 typedef int2 MAPDICT[MAXNDICT];
66
67 #define GETDICT(x,i)    *( ((int2*)(x)) + (i) )
68
69 /* map dictionaries for lexem type */
70 static MAPDICT mapdict[] = {
71         {NODICT, NODICT},                       /* not used                     */
72         {DEFAULTDICT, NODICT},          /* LATWORD              */
73         {BYLOCALE, NODICT},                     /* NONLATINWORD         */
74         {BYLOCALE, DEFAULTDICT},        /* UWORD                */
75         {NODICT, NODICT},                       /* EMAIL                */
76         {NODICT, NODICT},                       /* FURL                 */
77         {NODICT, NODICT},                       /* HOST                 */
78         {NODICT, NODICT},                       /* SCIENTIFIC           */
79         {NODICT, NODICT},                       /* VERSIONNUMBER                */
80         {BYLOCALE, DEFAULTDICT},        /* PARTHYPHENWORD               */
81         {BYLOCALE, NODICT},                     /* CYRPARTHYPHENWORD */
82         {DEFAULTDICT, NODICT},          /* LATPARTHYPHENWORD            */
83         {STOPLEXEM, NODICT},            /* SPACE                */
84         {STOPLEXEM, NODICT},            /* TAG          */
85         {STOPLEXEM, NODICT},            /* HTTP                 */
86         {BYLOCALE, DEFAULTDICT},        /* HYPHENWORD           */
87         {DEFAULTDICT, NODICT},          /* LATHYPHENWORD                */
88         {BYLOCALE, NODICT},                     /* CYRHYPHENWORD        */
89         {NODICT, NODICT},                       /* URI                  */
90         {NODICT, NODICT},                       /* FILEPATH             */
91         {NODICT, NODICT},                       /* DECIMAL              */
92         {NODICT, NODICT},                       /* SIGNEDINT            */
93         {NODICT, NODICT},                       /* UNSIGNEDINT          */
94         {STOPLEXEM, NODICT}                     /* HTMLENTITY           */
95 };
96
97 static bool inited = false;
98
99 void
100 initmorph(void)
101 {
102         int                     i,
103                                 j,
104                                 k;
105         MAPDICT    *md;
106         bool            needinit[lengthof(dicts)];
107         PG_LocaleCategories lc;
108
109         int                     bylocaledict = NODICT;
110
111         if (inited)
112                 return;
113         for (i = 1; i < lengthof(dicts); i++)
114                 needinit[i] = false;
115
116         PGLC_current(&lc);
117         if (lc.lc_ctype)
118                 for (i = 1; i < lengthof(dicts); i++)
119                         if (strcmp(dicts[i].localename, lc.lc_ctype) == 0)
120                         {
121                                 bylocaledict = i;
122                                 break;
123                         }
124         PGLC_free_categories(&lc);
125
126         for (i = 1; i < lengthof(mapdict); i++)
127         {
128                 k = 0;
129                 md = &mapdict[i];
130                 for (j = 0; j < MAXNDICT; j++)
131                 {
132                         GETDICT(md, k) = GETDICT(md, j);
133                         if (GETDICT(md, k) == NODICT)
134                                 break;
135                         else if (GETDICT(md, k) == BYLOCALE)
136                         {
137                                 if (bylocaledict == NODICT)
138                                         continue;
139                                 GETDICT(md, k) = bylocaledict;
140                         }
141                         if (GETDICT(md, k) >= (int2) lengthof(dicts))
142                                 continue;
143                         needinit[GETDICT(md, k)] = true;
144                         k++;
145                 }
146                 for (; k < MAXNDICT; k++)
147                         if (GETDICT(md, k) != STOPLEXEM)
148                                 GETDICT(md, k) = NODICT;
149         }
150
151         for (i = 1; i < lengthof(dicts); i++)
152                 if (needinit[i] && dicts[i].init)
153                         dictobjs[i] = (*(dicts[i].init)) ();
154
155         inited = true;
156         return;
157 }
158
159 char *
160 lemmatize(char *word, int *len, int type)
161 {
162         int2            nd;
163         int                     i;
164         DICT       *dict;
165
166         for (i = 0; i < MAXNDICT; i++)
167         {
168                 nd = GETDICT(&mapdict[type], i);
169                 if (nd == NODICT)
170                 {
171                         /* there is no dictionary */
172                         return word;
173                 }
174                 else if (nd == STOPLEXEM)
175                 {
176                         /* word is stopword */
177                         return NULL;
178                 }
179                 else
180                 {
181                         dict = &dicts[nd];
182                         if (dict->is_stoplemm && (*(dict->is_stoplemm)) (dictobjs[nd], word, *len))
183                                 return NULL;
184                         if (dict->lemmatize)
185                         {
186                                 int                     oldlen = *len;
187                                 char       *newword = (*(dict->lemmatize)) (dictobjs[nd], word, len);
188
189                                 /* word is recognized by distionary */
190                                 if (newword != word || *len != oldlen)
191                                 {
192                                         if (dict->is_stemstoplemm &&
193                                         (*(dict->is_stemstoplemm)) (dictobjs[nd], word, *len))
194                                         {
195                                                 if (newword != word && newword)
196                                                         pfree(newword);
197                                                 return NULL;
198                                         }
199                                         return newword;
200                                 }
201                         }
202                 }
203         }
204
205         return word;
206 }
207
208 bool
209 is_stoptype(int type)
210 {
211         return (GETDICT(&mapdict[type], 0) == STOPLEXEM) ? true : false;
212 }