From b88605337ea37ce1080a696937540cf8b48338d1 Mon Sep 17 00:00:00 2001 From: Teodor Sigaev Date: Mon, 21 Jul 2003 10:27:44 +0000 Subject: [PATCH] tsearch2 module --- contrib/tsearch2/Makefile | 44 + contrib/tsearch2/README.tsearch2 | 199 +++ contrib/tsearch2/common.c | 82 ++ contrib/tsearch2/common.h | 24 + contrib/tsearch2/crc32.c | 103 ++ contrib/tsearch2/crc32.h | 10 + contrib/tsearch2/data/test_tsearch.data | 508 +++++++ contrib/tsearch2/dict.c | 275 ++++ contrib/tsearch2/dict.h | 38 + contrib/tsearch2/dict_ex.c | 59 + contrib/tsearch2/dict_ispell.c | 141 ++ contrib/tsearch2/dict_snowball.c | 108 ++ contrib/tsearch2/dict_syn.c | 157 ++ contrib/tsearch2/docs/tsearch-V2-intro.html | 975 +++++++++++++ contrib/tsearch2/docs/tsearch2-guide.html | 1057 ++++++++++++++ contrib/tsearch2/docs/tsearch2-ref.html | 448 ++++++ contrib/tsearch2/expected/tsearch2.out | 2055 +++++++++++++++++++++++++++ contrib/tsearch2/gendict/Makefile.IN | 12 + contrib/tsearch2/gendict/README.gendict | 130 ++ contrib/tsearch2/gendict/config.sh | 183 +++ contrib/tsearch2/gendict/dict_snowball.c.IN | 52 + contrib/tsearch2/gendict/dict_tmpl.c.IN | 64 + contrib/tsearch2/gendict/sql.IN | 26 + contrib/tsearch2/gistidx.c | 686 +++++++++ contrib/tsearch2/gistidx.h | 67 + contrib/tsearch2/ispell/spell.c | 520 +++++++ contrib/tsearch2/ispell/spell.h | 51 + contrib/tsearch2/prs_dcfg.c | 119 ++ contrib/tsearch2/query.c | 862 +++++++++++ contrib/tsearch2/query.h | 55 + contrib/tsearch2/rank.c | 591 ++++++++ contrib/tsearch2/rewrite.c | 292 ++++ contrib/tsearch2/rewrite.h | 7 + contrib/tsearch2/snmap.c | 75 + contrib/tsearch2/snmap.h | 23 + contrib/tsearch2/snowball/api.c | 48 + contrib/tsearch2/snowball/api.h | 27 + contrib/tsearch2/snowball/english_stem.c | 894 ++++++++++++ contrib/tsearch2/snowball/english_stem.h | 8 + contrib/tsearch2/snowball/header.h | 57 + contrib/tsearch2/snowball/russian_stem.c | 626 ++++++++ contrib/tsearch2/snowball/russian_stem.h | 8 + contrib/tsearch2/snowball/utilities.c | 328 +++++ contrib/tsearch2/sql/tsearch2.sql | 243 ++++ contrib/tsearch2/stopword.c | 101 ++ contrib/tsearch2/stopword/english.stop | 128 ++ contrib/tsearch2/stopword/russian.stop | 151 ++ contrib/tsearch2/ts_cfg.c | 509 +++++++ contrib/tsearch2/ts_cfg.h | 68 + contrib/tsearch2/ts_stat.c | 412 ++++++ contrib/tsearch2/ts_stat.h | 32 + contrib/tsearch2/tsearch.sql._in | 674 +++++++++ contrib/tsearch2/tsvector.c | 804 +++++++++++ contrib/tsearch2/tsvector.h | 71 + contrib/tsearch2/tsvector_op.c | 264 ++++ contrib/tsearch2/untsearch.sql.in | 62 + contrib/tsearch2/wordparser/deflex.c | 56 + contrib/tsearch2/wordparser/deflex.h | 34 + contrib/tsearch2/wordparser/parser.h | 11 + contrib/tsearch2/wordparser/parser.l | 346 +++++ contrib/tsearch2/wparser.c | 529 +++++++ contrib/tsearch2/wparser.h | 28 + contrib/tsearch2/wparser_def.c | 291 ++++ 63 files changed, 16908 insertions(+) create mode 100644 contrib/tsearch2/Makefile create mode 100644 contrib/tsearch2/README.tsearch2 create mode 100644 contrib/tsearch2/common.c create mode 100644 contrib/tsearch2/common.h create mode 100644 contrib/tsearch2/crc32.c create mode 100644 contrib/tsearch2/crc32.h create mode 100644 contrib/tsearch2/data/test_tsearch.data create mode 100644 contrib/tsearch2/dict.c create mode 100644 contrib/tsearch2/dict.h create mode 100644 contrib/tsearch2/dict_ex.c create mode 100644 contrib/tsearch2/dict_ispell.c create mode 100644 contrib/tsearch2/dict_snowball.c create mode 100644 contrib/tsearch2/dict_syn.c create mode 100644 contrib/tsearch2/docs/tsearch-V2-intro.html create mode 100644 contrib/tsearch2/docs/tsearch2-guide.html create mode 100644 contrib/tsearch2/docs/tsearch2-ref.html create mode 100644 contrib/tsearch2/expected/tsearch2.out create mode 100644 contrib/tsearch2/gendict/Makefile.IN create mode 100644 contrib/tsearch2/gendict/README.gendict create mode 100755 contrib/tsearch2/gendict/config.sh create mode 100644 contrib/tsearch2/gendict/dict_snowball.c.IN create mode 100644 contrib/tsearch2/gendict/dict_tmpl.c.IN create mode 100644 contrib/tsearch2/gendict/sql.IN create mode 100644 contrib/tsearch2/gistidx.c create mode 100644 contrib/tsearch2/gistidx.h create mode 100644 contrib/tsearch2/ispell/spell.c create mode 100644 contrib/tsearch2/ispell/spell.h create mode 100644 contrib/tsearch2/prs_dcfg.c create mode 100644 contrib/tsearch2/query.c create mode 100644 contrib/tsearch2/query.h create mode 100644 contrib/tsearch2/rank.c create mode 100644 contrib/tsearch2/rewrite.c create mode 100644 contrib/tsearch2/rewrite.h create mode 100644 contrib/tsearch2/snmap.c create mode 100644 contrib/tsearch2/snmap.h create mode 100644 contrib/tsearch2/snowball/api.c create mode 100644 contrib/tsearch2/snowball/api.h create mode 100644 contrib/tsearch2/snowball/english_stem.c create mode 100644 contrib/tsearch2/snowball/english_stem.h create mode 100644 contrib/tsearch2/snowball/header.h create mode 100644 contrib/tsearch2/snowball/russian_stem.c create mode 100644 contrib/tsearch2/snowball/russian_stem.h create mode 100644 contrib/tsearch2/snowball/utilities.c create mode 100644 contrib/tsearch2/sql/tsearch2.sql create mode 100644 contrib/tsearch2/stopword.c create mode 100644 contrib/tsearch2/stopword/english.stop create mode 100644 contrib/tsearch2/stopword/russian.stop create mode 100644 contrib/tsearch2/ts_cfg.c create mode 100644 contrib/tsearch2/ts_cfg.h create mode 100644 contrib/tsearch2/ts_stat.c create mode 100644 contrib/tsearch2/ts_stat.h create mode 100644 contrib/tsearch2/tsearch.sql._in create mode 100644 contrib/tsearch2/tsvector.c create mode 100644 contrib/tsearch2/tsvector.h create mode 100644 contrib/tsearch2/tsvector_op.c create mode 100644 contrib/tsearch2/untsearch.sql.in create mode 100644 contrib/tsearch2/wordparser/deflex.c create mode 100644 contrib/tsearch2/wordparser/deflex.h create mode 100644 contrib/tsearch2/wordparser/parser.h create mode 100644 contrib/tsearch2/wordparser/parser.l create mode 100644 contrib/tsearch2/wparser.c create mode 100644 contrib/tsearch2/wparser.h create mode 100644 contrib/tsearch2/wparser_def.c diff --git a/contrib/tsearch2/Makefile b/contrib/tsearch2/Makefile new file mode 100644 index 0000000000..a58370ec51 --- /dev/null +++ b/contrib/tsearch2/Makefile @@ -0,0 +1,44 @@ +subdir = contrib/tsearch2 +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + + +override CPPFLAGS := -I. -I./snowball -I./ispell -I./wordparser $(CPPFLAGS) + +MODULE_big = tsearch2 +OBJS = dict_ex.o dict.o snmap.o stopword.o common.o prs_dcfg.o \ + snowball/english_stem.o snowball/api.o snowball/russian_stem.o snowball/utilities.o \ + dict_snowball.o ispell/spell.o dict_ispell.o dict_syn.o \ + wparser.o wordparser/parser.o wordparser/deflex.o wparser_def.o \ + ts_cfg.o tsvector.o rewrite.o crc32.o query.o gistidx.o \ + tsvector_op.o rank.o ts_stat.o + +DATA_built = tsearch2.sql untsearch2.sql +DOCS = README.tsearch2 +REGRESS = tsearch2 + +wordparser/parser.c: wordparser/parser.l +ifdef FLEX + $(FLEX) $(FLEXFLAGS) -8 -Ptsearch2_yy -o'$@' $< +else + @$(missing) flex $< $@ +endif + +EXTRA_CLEAN = wordparser/parser.c tsearch2.sql.in + +SHLIB_LINK := -lm +include $(top_srcdir)/contrib/contrib-global.mk +# DO NOT DELETE + +install: installstop + +installstop: + cp stopword/*.stop $(datadir) + + +tsearch2.sql.in: tsearch.sql._in + sed 's,DATA_PATH,$(datadir),g' < $< > $@ + +untsearch2.sql: untsearch.sql.in + cp $< $@ + diff --git a/contrib/tsearch2/README.tsearch2 b/contrib/tsearch2/README.tsearch2 new file mode 100644 index 0000000000..801e3fe5f6 --- /dev/null +++ b/contrib/tsearch2/README.tsearch2 @@ -0,0 +1,199 @@ +Tsearch2 - full text search extension for PostgreSQL + + [10][Online version] of this document is available + + This module is sponsored by Delta-Soft Ltd., Moscow, Russia. + + Notice: This version is fully incompatible with old tsearch (V1), + which is considered as deprecated in upcoming 7.4 release and + obsoleted in 7.5. + + The Tsearch2 contrib module contains an implementation of a new data + type tsvector - a searchable data type with indexed access. In a + nutshell, tsvector is a set of unique words along with their + positional information in the document, organized in a special + structure optimized for fast access and lookup. Actually, each word + entry, besides its position in the document, could have a weight + attribute, describing importance of this word (at a specific) position + in document. A set of bit-signatures of a fixed length, representing + tsvectors, are stored in a search tree (developed using PostgreSQL + GiST), which provides online update of full text index and fast query + lookup. The module provides indexed access methods, queries, + operations and supporting routines for the tsvector data type and easy + conversion of text data to tsvector. Table driven configuration allows + creation of custom configuration optimized for specific searches using + standard SQL commands. + + Configuration allows you to: + * specify the type of lexemes to be indexed and the way they are + processed. + * specify dictionaries to be used along with stop words recognition. + * specify the parser used to process a document. + + See [11]Documentation Roadmap for links to documentation. + +Authors + + * Oleg Bartunov , Moscow, Moscow University, Russia + * Teodor Sigaev , Moscow, Delta-Soft Ltd.,Russia + +Contributors + + * Robert John Shepherd and Andrew J. Kopciuch submitted + "Introduction to tsearch" (Robert - tsearch v1, Andrew - tsearch + v2) + * Brandon Craig Rhodes wrote "Tsearch2 Guide" and "Tsearch2 + Reference" and proposed new naming convention for tsearch V2 + +New features + + * Relevance ranking of search results + * Table driven configuration + * Morphology support (ispell dictionaries, snowball stemmers) + * Headline support (text fragments with highlighted search terms) + * Ability to plug-in custom dictionaries and parsers + * Synonym dictionary + * Generator of templates for dictionaries (built-in snowball stemmer + support) + * Statistics of indexed words is available + +Limitations + + * Lexeme should be not longer than 2048 bytes + * The number of lexemes is limited by 2^32. Note, that actual + capacity of tsvector is depends on whether positional information + is stored or not. + * tsvector - the size is limited by approximately 2^20 bytes. + * tsquery - the number of entries (lexemes and operations) < 32768 + * Positional information + + maximal position of lexeme < 2^14 (16384) + + lexeme could have maximum 256 positions + +References + + * GiST development site - + [12]http://www.sai.msu.su/~megera/postgres/gist + * OpenFTS home page - [13]http://openfts.sourceforge.net/ + * Mailing list - + [14]http://sourceforge.net/mailarchive/forum.php?forum=openfts-gen + eral + + [15]Documentation Roadmap + +Documentation Roadmap + + * Several docs are available from docs/ subdirectory + + "Tsearch V2 Introduction" by Andrew Kopciuch + + "Tsearch2 Guide" by Brandon Rhodes + + "Tsearch2 Reference" by Brandon Rhodes + * Readme.gendict in gendict/ subdirectory + + [16][Gendict tutorial] + + Online version of documentation is always available from Tsearch V2 + home page - + [17]http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/ + +Support + + Authors urgently recommend people to use [18][openfts-general] or + [19][pgsql-general] mailing lists for questions and discussions. + +Caution + + In spite of apparent easy full text searching with our tsearch module + (authors hope it's so), any serious search engine require profound + study of various aspects, such as stop words, dictionaries, special + parsers. Tsearch module was designed to facilitate both those cases. + +Development History + + Pre-tsearch era + Development of OpenFTS began in 2000 after realizing that we + needed a search engine optimized for online updates and able to + access metadata from the database. This is essential for online + news agencies, web portals, digital libraries, etc. Most search + engines available utilize an inverted index which is very fast + for searching but very slow for online updates. Incremental + updates of an inverted index is a complex engineering task + while we needed something light, free and with the ability to + access metadata from the database. The last requirement is very + important because in a real life application a search engine + should always consult metadata ( topic, permissions, date + range, version, etc.). We extensively use PostgreSQL as a + database backend and have no intention to move from it, so the + problem was to find a data structure and a fast way to access + it. PostgreSQL has rather unique data type for storing sets + (think about words) - arrays, but lacks index access to them. A + document is parsed into lexemes, which are identified in + various ways (e.g. stemming, morphology, dictionary), and as a + result is reduced to an array of integer numbers. During our + research we found a paper of Joseph Hellerstein which + introduced an interesting data structure suitable for sets - + RD-tree (Russian Doll tree). It looked very attractive, but + implementing it in PostgreSQL seemed difficult because of our + ignorance of database internals. Further research lead us to + the idea to use GiST for implementing RD-tree, but at that time + the GiST code had for a long while remained untouched and + contained several bugs. After work on improving GiST for + version 7.0.3 of PostgreSQL was done, we were able to implement + RD-Tree and use it for index access to arrays of integers. This + implementation was ideally suited for small arrays and + eliminated complex joins, but was practically useless for + indexing large arrays. The next improvement came from an idea + to represent a document by a single bit-signature, a so-called + superimposed signature (see "Index Structures for Databases + Containing Data Items with Set-valued Attributes", 1997, Sven + Helmer for details). We developeded the contrib/intarray module + and used it for full text indexing. + + tsearch v1 + It was inconvenient to use integer id's instead of words, so we + introduced a new data type called 'txtidx' - a searchable data + type (textual) with indexed access. This was a first step of + our work on an implementation of a built-in PostgreSQL full + text search engine. Even though tsearch v1 had many features of + a search engine it lacked configuration support and relevance + ranking. People were encouraged to use OpenFTS, which provided + relevance ranking based on coordinate information and flexible + configuration. OpenFTS v.0.34 is the last version based on + tsearch v1. + + tsearch V2 + People recognized tsearch as a powerful tool for full text + searching and insisted on adding ranking support, better + configurability, etc. We already thought about moving most of + the features of OpenFTS to tsearch, and in the early 2003 we + decided to work on a new version of tsearch - tsearch v2. We've + abandoned auxiliary index tables which were used by OpenFTS to + store coordinate information and modified the txtidx type to + store them internally. Also, we've added table-driven + configuration, support of ispell dictionaries, snowball + stemmers and the ability to specify which types of lexemes to + index. Also, it's now possible to generate headlines of + documents with highlighted search terms. These changes make + tsearch more user friendly and turn it into a really powerful + full text search engine. After announcing the alpha version, we + received a proposal from Brandon Rhodes to rename tsearch + functions to be more consistent. So, we have renamed txtidx + type to tsvector and other things as well. + + To allow users of tsearch v1 smooth upgrade, we named the module as + tsearch2. + + Future release of OpenFTS (v.0.35) will be based on tsearch2. Brave + people could download it from OpenFTS CVS (see link from [20][OpenFTS + page] + +References + + 10. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/Tsearch_V2_Readme.html + 11. http://www.sai.msu.su/~megera/oddmuse/index.cgi/Tsearch_V2_Readme#Documentation_Roadmap + 12. http://www.sai.msu.su/~megera/postgres/gist + 13. http://openfts.sourceforge.net/ + 14. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general + 15. http://www.sai.msu.su/~megera/oddmuse/index.cgi?action=anchor&id=Documentation_Roadmap#Documentation_Roadmap + 16. http://www.sai.msu.su/~megera/oddmuse/index.cgi?Gendict + 17. http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/ + 18. http://sourceforge.net/mailarchive/forum.php?forum=openfts-general + 19. http://archives.postgresql.org/pgsql-general/ + 20. http://openfts.sourceforge.net/ diff --git a/contrib/tsearch2/common.c b/contrib/tsearch2/common.c new file mode 100644 index 0000000000..acce0f569f --- /dev/null +++ b/contrib/tsearch2/common.c @@ -0,0 +1,82 @@ +#include "postgres.h" +#include "common.h" +#include "wparser.h" +#include "ts_cfg.h" +#include "dict.h" + +text* +char2text(char* in) { + return charl2text(in, strlen(in)); +} + +text* charl2text(char* in, int len) { + text *out=(text*)palloc(len+VARHDRSZ); + memcpy(VARDATA(out), in, len); + VARATT_SIZEP(out) = len+VARHDRSZ; + return out; +} + +char +*text2char(text* in) { + char *out=palloc( VARSIZE(in) ); + memcpy(out, VARDATA(in), VARSIZE(in)-VARHDRSZ); + out[ VARSIZE(in)-VARHDRSZ ] ='\0'; + return out; +} + +char +*pnstrdup(char* in, int len) { + char *out=palloc( len+1 ); + memcpy(out, in, len); + out[len]='\0'; + return out; +} + +text +*ptextdup(text* in) { + text *out=(text*)palloc( VARSIZE(in) ); + memcpy(out,in,VARSIZE(in)); + return out; +} + +text +*mtextdup(text* in) { + text *out=(text*)malloc( VARSIZE(in) ); + if ( !out ) + ts_error(ERROR, "No memory"); + memcpy(out,in,VARSIZE(in)); + return out; +} + +void +ts_error(int state, const char *format, ...) { + va_list args; + int tlen = 128, len=0; + char *buf; + + reset_cfg(); + reset_dict(); + reset_prs(); + + va_start(args, format); + buf = palloc(tlen); + len = vsnprintf(buf, tlen-1, format, args); + if ( len >= tlen ) { + tlen=len+1; + buf = repalloc( buf, tlen ); + vsnprintf(buf, tlen-1, format, args); + } + va_end(args); + + elog(state,buf); + pfree(buf); +} + +int +text_cmp(text *a, text *b) { + if ( VARSIZE(a) == VARSIZE(b) ) + return strncmp( VARDATA(a), VARDATA(b), VARSIZE(a)-VARHDRSZ ); + return (int)VARSIZE(a) - (int)VARSIZE(b); + +} + diff --git a/contrib/tsearch2/common.h b/contrib/tsearch2/common.h new file mode 100644 index 0000000000..70313fa4d2 --- /dev/null +++ b/contrib/tsearch2/common.h @@ -0,0 +1,24 @@ +#ifndef __TS_COMMON_H__ +#define __TS_COMMON_H__ +#include "postgres.h" +#include "fmgr.h" + +#ifndef PG_NARGS +#define PG_NARGS() (fcinfo->nargs) +#endif + +text* char2text(char* in); +text* charl2text(char* in, int len); +char *text2char(text* in); +char *pnstrdup(char* in, int len); +text *ptextdup(text* in); +text *mtextdup(text* in); + +int text_cmp(text *a, text *b); + +#define NEXTVAL(x) ( (text*)( (char*)(x) + INTALIGN( VARSIZE(x) ) ) ) +#define ARRNELEMS(x) ArrayGetNItems( ARR_NDIM(x), ARR_DIMS(x)) + +void ts_error(int state, const char *format, ...); + +#endif diff --git a/contrib/tsearch2/crc32.c b/contrib/tsearch2/crc32.c new file mode 100644 index 0000000000..dc93db727c --- /dev/null +++ b/contrib/tsearch2/crc32.c @@ -0,0 +1,103 @@ +/* Both POSIX and CRC32 checksums */ + +#include +#include +#include + +#include "crc32.h" + +/* + * This code implements the AUTODIN II polynomial + * The variable corresponding to the macro argument "crc" should + * be an unsigned long. + * Oroginal code by Spencer Garrett + */ + +#define _CRC32_(crc, ch) (crc = (crc >> 8) ^ crc32tab[(crc ^ (ch)) & 0xff]) + +/* generated using the AUTODIN II polynomial + * x^32 + x^26 + x^23 + x^22 + x^16 + + * x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + 1 + */ + +static const unsigned int crc32tab[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, +}; + +unsigned int +crc32_sz(char *buf, int size) +{ + unsigned int crc = ~0; + char *p; + int len, + nr; + + len = 0; + nr = size; + for (len += nr, p = buf; nr--; ++p) + _CRC32_(crc, *p); + return ~crc; +} diff --git a/contrib/tsearch2/crc32.h b/contrib/tsearch2/crc32.h new file mode 100644 index 0000000000..97254a4a90 --- /dev/null +++ b/contrib/tsearch2/crc32.h @@ -0,0 +1,10 @@ +#ifndef _CRC32_H +#define _CRC32_H + +/* Returns crc32 of data block */ +extern unsigned int crc32_sz(char *buf, int size); + +/* Returns crc32 of null-terminated string */ +#define crc32(buf) crc32_sz((buf),strlen(buf)) + +#endif diff --git a/contrib/tsearch2/data/test_tsearch.data b/contrib/tsearch2/data/test_tsearch.data new file mode 100644 index 0000000000..29a26f2428 --- /dev/null +++ b/contrib/tsearch2/data/test_tsearch.data @@ -0,0 +1,508 @@ +\n +\n +\n +\n +\n +\n +\n +\n +\n i8 hy qo xa jl wr le l5 ja jx zf ro vw wd wa cc mm wh fn yd td l8 ec rv th oc ix ir sm y4 gh pr qg ue cx ww zv c9 zv tx eo f5 gd km b9 wb rm ym yl xj u7 xz uk iq tm ux di if uc hc ge +\n gr ty ph jh po wa iw ag wq r3 yd ow rb ip et ej yl a9 dk pu y6 su ov hf xe qe sd qr zt kp ml ea tp pg dq e3 s3 hh gn hz j7 hb qs qd v0 v4 w0 nu ee wk ez un rd sz wx e7 pn yf gh uh ki kx rb qv f1 bh sr yj ry r2 +\n q1 q8 wp w9 vs ww rq de qt wo qp sa rv mc sn u8 yl +\n hv ra sa fr qs ps 4w z5 ls wt ad wy q6 zg bd vt wa e4 ft w7 ld es yg et ic pm sw ja qv ov jm ma b3 wu wi qy ug hs wh ex rt tj en ur e2 ut gv as ui dy qy du qo gv cy lx kw xm fl x2 hd ny nu hh dt wg wh rs wb wz yy yu tj ha ak rw sw io h1 ux ku v6 wc qa rv xb s8 qd f2 zo k2 ew w4 yh yu yi +\n rs tt gp qh wt q6 lg zh vr b8 uy uu lh px jm ww qe xu fp fd rs qu ki dr fn gq gw jv oq zt 2r lc ke wg l9 x3 x5 7g vs ar e7 u2 s8 t0 av dj kl nm u2 zp gf yw ee oc tw a1 +\n qs uz wr gq q9 rl e0 pe dj a9 hp qw aw er kq pp uu pl zo wp fr r6 ej pv u5 hh av lw ko qc pn qj ez n8 wn eu tq +\n po h9 rd qs hr la u0 um me wp 0p rl mv rc ab r0 fe fj fk qn jh iy cn lb bl ln b5 ll yg yh qt qp uz od dq as gn cr qa wa cu fy zy vo xk eq vg mr ns yy t7 yi op th yo ov pv em tc hg az io s5 ct os wu lq dr mp hk si gx +\n hm k5 pw a5 qh nb q3 ql wr wt z7 oz wu wh kv q8 c3 mt mg hb a3 rz pz uo y1 rb av us ek dz q0 d3 qw j2 ls wy qq jf ng eo gl ed ix em he qt du hp jc f2 m9 qp hb l4 gy zf l6 qr dn cp x1 oh qk kk s3 hy wg zs ot wj sl oz ie e9 ay it u5 ai hm gh py hz qk ki h8 ja zu qb ei vc qj hg ev h6 yh u0 tb id +\n qg d1 bt c5 r3 iv g6 d7 rc ml gk uh yn y0 zo uh qd wh ib uo u4 om qg ql yz +\n hb a3 q5 pl yj lo qy ki sy fo rj kk zq dl wn 7a zi wn wm yr w3 tv r1 +\n ft k6 iz qn qj q2 q3 bl zd av ro wo lk tg ea ew ed y1 ia yl ic g6 po aw sc zm qn gl wq qw zr jp wt j5 gs vt qt yc rr op yw tl ye hr i8 tb uu j0 xd lz vu nl qd fu wg pf wj bt ee wh t2 tp sz um oo tg ha u4 f5 sw pq pr ju qk mh ki zb vj ob cx df hj ef cj q6 u9 tv rv o4 sy ru fq ir +\n ps ko uk tz vv um t9 uk k2 ja o6 ob +\n qs nb gh ld q7 jc sp el w0 py qx i2 qe la rl qw tu ti dq ue iv oi wa qr ed t3 fg oa of rr fv qz xn wu wq te hx +\n yb ty pq az fi qg qn la bu ji lg wg q8 mi cv rl up lg om oq ym pv in aq gg js ha on ww qr bj vn pv he b5 mh qe cc mk qt rb eu qy rw tr qo ec op sn oh e2 ao iv e4 hy dt s6 qt p1 hb ih qs wg x1 bd l1 t1 ro r9 uv wb aw gu os t0 ah e0 s0 hj pe or qj zz ql fd ks qv bq qm bg ec ry oj u8 u0 yj ru r1 yx o7 +\n z4 wr qz cg nq ir bb gb w7 e5 zc pj e9 px uo fp ts aq db q9 iy qe zv xu a9 l1 mb qw tc qu fi hw ur de e4 hk lj wo wf fi ep rl wh vh ek vp oi sv rh ay hj px aa er tv do ir +\n tr o9 gb tt pp qa qs a5 ps rf q1 kj by ub ru ox co o8 ny wp wa ws rd kk b1 zc rl rz uo ts ig fh db qm q0 bg rr fu ld lr wb en nd cw vr hy rn qr en em au p8 so oh ut hz gq wp ow be ky wj dw t1 pl er wc ot na r9 wl ou un um wx iq sc e8 sn re rr f7 hz h4 ce wz qx wx kp px tl tx ai wq hf ec 6u rz og yt ok yy yp +\n sa pp a7 qm qh of je qj lo ph wt h0 ji cg z8 2v xs zl mo ik hm on tu d8 av ot pn iv ez ja qn pq wy 7r mq qu p1 tu p6 ti ur pj uy ui qo i9 qa nj xm s1 ya fb 7j ro wn t6 wz yu iq yi go en pb aj f5 hf ug uh hk av pr wl wz im ja v9 u2 ks it br wv wn se ia o5 ox ei r2 ig aj sp +\n sa tn z8 ew uo eh g8 zt wy 27 ff uh te en pd eh hv 2e wh ty oi sw xx 2p qs mx wb q3 rl eq aa eu +\n d4 ef ta zq j2 em c0 vv wf kj dw uk ql y9 rn +\n sq nm kl w8 ur kz c1 pc y1 g4 oi jv wr zy ew by se ec yn ti gq gt rd l5 ej yp tk da qz qx ir wm on q2 to ew +\n rd gu z2 kj qk bl 6d wy nw xq iu 8t ri uc kq nx ql oa vi kd o6 +\n ra gr he wy q0 ow ti ia pb ha qr lv ms qu pu qw qr ml qt ep sv i5 of fm oe nl xh x1 xz u4 ha ao fc ug pw nh n9 qv kh vx uq w1 u0 ei if +\n q1 d2 qz zd jd qb wj nt ah mj ea ed y1 et fj qe en b8 ty iv ht fv tn tm sg jb ky ai en us tl ud iu zj ql u1 ci ru iw tw +\n fr ub h9 pd ub jk vh z6 wu wh wp 5z yt w9 w0 uy om tl rc r6 ax d7 et y2 tw dz se vf ii m3 lf b4 jf vr qw qy uf es qp en tl to ye ue ph e3 uy i0 jl pz oe qo zp wp ft ka zf qd wd kr qf l9 mm wf qx ef t3 x8 ex rg ev s8 ys it da rw al hn tc f6 fv nd nc ad fj nr x0 bx yq ti rx ok tb hx o8 dp +\n o0 jq un xu q8 wo qq gg ta oj ec az dl bl wb +\n o9 ij pq gu gp nv qk gg la q4 nw bo z8 9a iw wu q8 eh wi nt jk ut ys c1 r5 up y1 yl py oy ht gd td db qn cz qw lp re c7 dh j5 ia bz dj qr qt wd wf qi rt sv ul uz tl ta yr e4 tm sg pc jv hc hv lc xg xm br vf r8 na wl ou td wc up rj s8 e8 ir ys ii qk p0 lt ho wb x8 bv lw w1 rz ew aa rv ry gx o8 +\n tt hn gn un db fu uq qf d4 q3 pp ji lf wu bx q8 hx kb ny t5 bn hb ex yf ef yj g1 g2 to yk g3 ej sk hy dv qc gj qv sy bg wr na wy bx z0 rc rm ml ug te qp i5 ue oj s4 im oq qt gx sa gt l4 sv at v3 bq mv wd x3 80 x8 aq xk rg yp en gs us dq ak tz al tx o2 dg f9 kv or h4 jy k1 jo h8 kp lt os kh as tn eu ul tm su an tw sp +\n za yi pe sh pv y4 y5 hy th jg qy qt ke ti ue qk yy ie cq wl p0 lw mf er w5 +\n k9 bt xu kc me is o5 z9 kb gv ur rc oe sk qn ve wi mm rn eu to ue uy qa xf by t1 td t7 aw up yf pr dk cg zr sc 3d at rw ec rl st zo rn do +\n o9 z5 wy vi ya ea ee fo gf va ov ww rr wr lb ro qq vr gj nw ru ym iv s4 hu tm wo wp zs br fs wg ej du y1 yt yu e7 eb em dd pq v7 cr um ae oz 0z kc tq rw zl rt wb y9 xv tm tq di eo te gc +\n tt un qs qn a7 qh je qj k0 o1 wr q6 wy ab q9 qm wr ea er eh pi hi sc hs m6 w1 bv lo zr tn yk ep op es ve xx sb ux hg sa gq qp wd n2 zh wf xf wj y3 wl e7 os u4 on ip kn ko qp s7 ly zn ba wu u4 kh f4 zo y9 q6 oh iw tq +\n qa a4 gu a7 cp z1 he ma q7 lu dp w7 ea rc ee d8 y4 tw ez im ae bv ii qe vb zt lc lv wm ro lk qr hp re tw yv es fp as zu oe qu qi bp wg cp p7 v4 ek rd wc ar rj tj e8 od e0 pm h2 h4 in qf wu wi 19 bj rl rc ee yj et tw ep +\n gv qd kj cd t3 c3 ih ws rg mc rx lh fd g8 gh cc vw b7 qe at j7 qo ws wg oy t6 t9 go eb e8 us u5 rq oe zj jy oz cj wb be ei pm og se w4 yu xw su yx if +\n o9 ub rd hw gs z3 ql nq ru wg jc 1t kv mr zm ah dd jk w8 ej aq ig y8 pp fj li wq jj cc qr no wy wu en bx yr qy oo es fy pd tk ix ph yr sf vx pn p2 jq fs ed oy yk os ie s9 u5 ak ud gd uf kb xc u1 xm eu xw 19 wn vh w1 to ee er aa rb rn ru an r1 ei +\n se kl 7h b6 xs ym tp an ta qb gn uo pt xi cl qp qy op vr ym ri ti tl i5 e1 e4 i9 ff i5 qp jx ht ql uo en pe ku h7 iw wn w4 ey ia si +\n ql xt wi k6 ew sf eg up eh oy sq ja g9 i3 qe cv l1 qq bv w2 la eu wg ec ef oh fs tb pc xd qs nl qu fn dy oi iu yf re fc hj hk xv zn zz w1 ew +\n po al hm qk jt cd ju nm li rs w9 ev ut ea 2f r4 d6 ey im pa nu wr m4 is bc xz w3 eu tb ha ft p4 ti to hr dy af i6 iz r4 jb x7 wj xg na rf gi at pn gd re wq qz ze bo wc vz sm zo my ye u7 oh dk w5 is yx tw fe dp +\n jl za gk cm wu vq jc zc iu mb oe fo fp ic sc 2l hy qr eb p5 pf dq pa fy lc td sz oo aw u1 rj fl tz nx aq xx oz xb 55 y0 +\n uq wr lh jv ri i7 ss qo gy bt s3 u1 dy ox hg it +\n ps hr lf jx bn qq up eh ab yl pn jg ng bz gd qr yw i9 j8 zi 3v oz at hd cx oj u9 rt uz ro ov +\n sq ga ny se cj id rg r3 pk kv ee sh ek dk sz pp q0 mn az kp ei qi ry em ph p9 gw hc m0 cp ea mn yf t1 5y wx ol e6 ec u2 e7 uh uj uk av ql lw qx zr qv mw qg cq ww wb pw tu w2 mf ut gk af yo ie ob +\n hn um a6 q7 af du r4 up tp ej sk lo le m8 rp eu ei qi ky op of tp ur oj hu tb dy qu gt tf oz wc s7 e7 ua pw ax nb wx wy fj wn 18 wv es yq ok w4 uz yx yc +\n pa qg qh q4 fv qz kx q6 cp gb c6 pr eh id in qw we bk wn qq b6 qy qu es ic s1 og gn wp op qf ic ro os yp rj fj ag oc ay da fv wl qp f1 yx n7 ea w2 ly yj iq iw rm o5 +\n o9 ps d3 lp wr qc md e5 rk w0 pm gx lf ku qt qp to tc pk fb tb qi lh nt yd vt ot ra tg gd zx wx vj rq cr hm ma jp vg u8 rt ei it +\n dx dv h9 rf qf uw a8 qh uv k3 ri is yr r3 eq uu tz yn y6 qc ps jf wq xe wx lc qr j4 ku xx nb 4z sr tr uq p6 uz of i6 s1 fs pj tc hu qu hz f1 hp lj s4 qx tg yp gs ob tz ds sw pm ug hm ip ql le vl wq tb xv eq w2 yg w4 st o6 +\n qd q4 pa z6 qz ia 70 r3 mb iu es r5 gh t9 cj vz qw mb ko vt qr qt gh qo ty eb kq n1 xb ef rp ek gu rg s7 rj sn ai hg o1 uj pr jt fg v0 tq tx ww bj bm ct w1 zi rn ox iw ri +\n al rd w8 vp yd yk r0 pi po se sr qa l0 qk ir e9 hm kc rz aa w6 +\n un pq qd a8 z2 qk z5 ws bi xy qx wg wp t4 mj gv qm rg c6 w7 w9 es y1 g2 ej yz gg qc qn wq qw m9 wx qe kr 27 fp fq m7 xp 3p qr rr tr ij il eh au s1 uc fx ut qu sj j8 j9 ya nr rz wg wh eg x8 sl t7 yu vf ay ds ap re dh qg qh qj hz qk zz qx k3 cy iq ox qv eu nx n6 6r lq n0 y0 uq tb sy iw fm an +\n yv dc qs gm q2 cv ok wt b2 cj wu mr zj kn e5 iu pz r8 pe fp ot tq a9 y5 sz ez cl wq qq wv a7 ln ky jd qe qr yx rm qi ea ln te y9 ev en eh iv tx e3 as tn j8 wf xh co fl nc wk xz es rx ee wh ub aq u1 ar e7 up it iu o2 wl ko jo cu pc wo al hm uq rn ul yz ro +\n pw na wu jd yf oe qr xr sk wa hw ql wg x6 s9 u7 am +\n uv tr ub k7 qg he u6 jt gs z3 by tn bi av z7 jc ck q7 2n ny cx km mk rf pj xi lh sf up yj to ia ab tq fq pm fd qc qv ps su qw fu xu cm zb bc qr qt tn ei rw gl p1 xi qo tt ed ef ri iz yw oh tc uy tv as qu l4 qr t4 wx e5 ae op oa em tz gd dq rw ug dr ux qj be ko cg nl je aj xw q1 vv ax rl w2 yt aa u0 eu ah +\n dc ph sq jt ql un q5 cg lk w9 ur uy pz uo sx qv qq cc ln fu ym ho su pn qa bq pd wj wj yk ou wl rk o2 pt uc km ja wm ry rm ob +\n gb pw qf we q3 ls q4 sy bl lg q8 t3 wl rg ed io ef if oi hp lo kw wy qw ei yz rt es p6 fp hi qo bn qw wg cy np uv yy oa uo ir of em ug x9 qh nj n8 ea u8 er w6 +\n ij dg cd lw gk wu zl dd eb eq sg ia am in wq xt nk wr xj qq p5 pd pk as sd fn lj jw fk l9 nt wl oo fj sb u4 gs fx hg o1 dr fb hj h8 xc yq ch er e2 aa af ah ob +\n a2 o0 hn pd iz hw jg q1 jl qz ip le me wi bb r3 z7 g1 eh td sw g9 qq c9 vy ud qo es ec tj uw dq ur hj dy oe zp lk l5 fl wj ys t2 ej t4 ek rs sl yu oa u3 gd pm rw h1 pr h2 py wl 2p s7 wq 6r mi 10 ox o6 +\n i3 qw ee ur cy nx r2 wj t2 ub ir aj cl qm u0 oz +\n qd qn un qz xy nq an kg hc c6 w8 93 eq ts g9 wy mg w3 rb 3f wf rw kt op es ef at em s6 pc wg bw x1 xl wg hl yk yo eb ud hm hl py wb u4 zp bj bm se sr sy ox am +\n rc ix qs ls qy at ut pk yo ys ec hs lq xv ks +\n yb al zf ws cn ac ih th ww vb kt b3 xo qe qi te ea p8 tn qd ci ix xk pk bg rc tl f4 wb rb ru +\n iy qd a5 jq jw qh sw fv oz cj hc qq ya ee yn pr av or us iv fa qb q9 bh ns d0 qe i1 b0 fh qy qu qi ry os ul hq ri ix e1 ao p0 qt sf qi uh ll ko lx nz sg jz hq sh p8 x3 wg rd sx yo yp u3 pv rq ds tc rr wx lr xb wn ep hh bk yw q6 og yr yg si tq do if +\n hv qa qf jg he q1 kj qz bh lr kn rj th kz ef eh av pp i1 ar gl ur lr bz xp yr ze qt tn es fl hw s5 qa ed t4 wz sx rg sv e9 fz hf al h1 av bg ym ee yg +\n k8 nn jy q4 wd lf xu q9 a1 4v yd mb r6 yh pb ta g6 dn d3 pl j1 jk wc cn wy 26 rr te ti fa e4 uy fb gr hb kd lc qf p5 wh au fa iv xo hf ot eg ra wv tp ec yo ah iu pw hj ac h3 py k2 u1 wb rl rz yt er w6 ru af yo ep +\n qd uq qh qm q3 vg qc c5 rd vp ut eq on yn ii xp up r8 d0 sz qx ue pl lx qe wr qr lm nh qt ha qo ki ri e2 tx iv ao s3 ow kp xf rh ya r2 rk cw nt by wd j8 t1 hk y1 ns t6 wc ev sq rq yf ux aw ch qs u2 zn sm rt wb bk yq dh 8w w3 rc yg o3 yi ox ov ir +\n u0 q7 qb ml or nu b5 1l xb tr tp in qt hz so v6 dq o2 qh wl nb rv fw +\n ss jr zf zh xt oy hy aw y8 js ob wq ny or vy fi en tb qi j9 gt ib ot oy rd e5 y6 tg th pt gq wz rt rl ew fm ie ri ir ro ah +\n o0 qj h9 wy ee g9 gk jd fg qt 3d fu ru iz tl fd tv ad hl wp oo wf nb ez sv tl f4 dr oy rp +\n ak il k6 qh q2 vd k3 zd bo lj k7 km 5c ut rz yd up ua is r0 qn zq wq j1 qe cv pw fu md bw yw qq ra rw qu ex ik at y0 ru ti yw fz ic ao ow gm jc i7 nf p4 fj xg kr br xk bs mb pk hl wl ta ez sv e9 us om rw ap gq wl k2 qz h8 gu kf et ru tq ag uz rp +\n yb az dd fu rf hw qg we u9 o3 q5 q6 ag c2 o7 wa kh w8 vo mc yg tu ua uh ta tw ih hu fj su bg ww bh kw ry ru wy ky wu wi fw 20 b9 qo ik oa ev hw s1 e1 e3 fc uu s5 tn qy hz jc do ou jq gb kf pf xl x3 yv lz iq eb e8 os sn fx dw qg ql wc ka n8 gf ly se tv yk di si o7 r2 rp +\n il mj vi sd ia y6 wq rm p5 ux ho nr ef ej wq iq fn +\n ft cs uo io er ic tw ig mm c9 xk ab ze uw i5 s1 e4 pl ui f2 lj p4 sf x4 kz ej ez eb ov of rw dy av qh f0 h5 ki qx cx eb og gk oz uc +\n ul io zd kn w9 y3 wt qq wp jl i9 jk ca h5 wx wb tm do +\n iy hv cs a2 ee yz y6 gk kq em qy uq ts w0 rq rr vt pb nc q5 +\n qn q3 vt vu yk ej fp tw zm qq qy y9 hh wo wg rh ep x5 wk mr el l9 av hz w5 +\n hq qz wy cx rh ur w9 e8 r4 fq im fj gj dm qn gl jn iz l1 yh mz rw e2 qo wh nt wk zw t7 e5 iq fh eb sn ud az uv fh sv dq q1 ku zs eb ue xq rn o6 do +\n ub lo sq wr d1 mt o7 ts t5 rd xe iu yg ot gg se pp qc js lu xt j3 j4 wt pc vz 5o yr qw zw qr eu db sy eb em fo i0 ad gw m9 ig ih lc od n4 pg rx bi ni kq wl aw e7 az jo mk bo wb ei mi ep wb eq di do +\n q1 ub xt db wt ws ik pl ee or to ej ic is fr jk ls c9 qq yg qt eo rw tp p8 dy pz gm hz or xs bt x8 t4 t8 s7 oj lt wv vx u7 w4 et ox yo +\n po o9 ih dx qa rf qf pd d2 kl ad lh kb bd qm bb b1 z8 ew d6 yg d7 ym ti eh ic iv oi y6 sz dx qn ut qm gz pj zw jj 4d bk wb lm xb ke yx oo qp yb yn en fo yw fp e4 aa fd jz qu gw qa zs nl v9 wf qt qi vg ni wx hk 9f sz tg t0 ga de re io av h2 jt x0 h4 wx wc fg rb rn nc yz iy zp ds ep zw pr xv rz yh yk zp do hc ep +\n hb ty z2 qz qz zh gw mg kb ve zz ti tp py el jp tg qc ar qv gx la qr cn lr nd ng ve qt 6g ml op pd uq uw eh i8 uy dt ho j8 wp wd qe xm w0 x4 qk el e9 pb sm pn tc gt ce oj jr mi ds wb ym ew u8 +\n ij yb hn u7 cd gj co dp lp b2 r5 ed ti pn qx g0 jb jn jj we bl ri ot pi rb yc sv ty oh ph hh e4 hy sd wp ll ft l7 wh ca ys wf wb t7 sv uo sb sn ha pb sw de un qc bz wo en as tb eu af eo +\n d2 k0 wr q4 q5 c2 sj iv pm g8 m1 l1 5s ij aa lb xm vf ej ta ar th od sm cw gy bu qd q1 u8 ry rn +\n qa ux q3 mj ex yu zx rk gi rl ya is py am tw ja js db ps dn qb qn gn lc pe qq vr qr eo qi ec oa ev uz yq of in ho qo jj jk wk wd zp wf lz t8 tk ha pv fz pn ug o2 pe uk kv gq v7 oi qv wv dj tv fn fw +\n dx a3 k5 um uq jd og nn q5 qx cu wp rd ws d6 px ac oe rb up tp ej ek ih ff qc gj qm xk b4 dz jg sq jh eu yx eo re es ul yw tp i6 pj ho qi qf sn og xo yv pk wj wb go ar uo eb ir iy pq uh qg h6 vt wv sn n0 rx af uz hx eo +\n yv ub ty gn gu fu dm ca q2 d4 cn ad iw k6 bf zl zz 2o w7 uo ee yk ix g3 am fw oi jo se ha vs qn iy qq 24 bl j6 g4 cw jv 1l ei qy ke j4 qi ep of ao hh tb gm sh lh vc uf vu wd p6 xm qt kh rk l9 s4 wh mr t4 oi rf iq op ox u4 e9 fk u5 it re uk f0 kb nd qk ce jp lr cy js qd qb sb tq n7 n8 ed ue tn ox o6 id r2 it +\n qa pa jd qn qg jt gh q5 lg ag qv ah qn vr da rh w7 b2 rz rx d6 d7 eg eh yl a9 ek dl tw sc hp ha su gz lo qe le ns kt qy qi 1h kp mz qu es yb yn p6 eh fs ok as im dy px gq qp qs l6 iv rl zw dr 4r hi wj rp t6 go s8 e8 at e9 f3 ak dg f9 qh pt dz ww rv wb oc pv be wq cs q1 xr xx eq yr u9 sr tb yl tq if hc ig +\n a5 co dh bt lw ck lh w7 3e mp r3 rz yf yh uh eh td y8 fg pa ar va dm su q9 d5 qw re vh he jc 1g ib xz qq qw yg vt rn rb cb ry ym em i7 hr ff f2 qp rd lx wg lb kh va jv qi xd wh wc el un sz tf gu oz ae e9 e0 iu dr io dt fb dh jo um wx s5 oa kx ly rn oc zy f3 hb tt wb u9 oz hx if ig +\n ak o0 qd q7 eq g1 y2 pt dk g8 qb vs qe dh 5i pt yh qo ul tp oj sp oq di uh zg xn rx tp tf ie f6 cg rv zm xw zq 5f md sr yk ru ro +\n a2 tt ub rs ij ml ow pe el gd va ue zm sa pq lc yw qi qw lv ep qo uj ym tl ye hj s6 uf qp 82 fk y1 wl oi t8 fk pb tx o1 sk lm oo xv n2 ad fk n6 dp on q6 rv +\n qf jf kk nm oz q7 b2 xo fw kj rh ua oe yl gh vd qe gn wb pt wi z0 se gj 48 of i5 oh so hz wp ae wg nc kg xf ev pv ov au iy az f7 qb q5 eq yr tv yy ol ry o4 oc di ep +\n po o9 dc a5 jd z1 sq ws b7 ti r9 sl ez aw tg zm si ng qe ky b5 pp eb od jl ff oe ce qp gy yv qk r4 xf kw iw sn tx gg uh cq ql qa 2s mt eq rb dp +\n qs qz cd dl se q0 lv eu yi rw qo uh uj ul en tx wo qd e6 pv gg je zx kp qc q3 ye en +\n un qs qh se ws lf so eq yf ef y3 g4 zb hs q0 no qw j2 y0 uu fb di f1 kq oa ul t3 ot fh ak yf fv dt f8 jo sx wx at wn cs lq zc +\n ub qa qs ik pw uq a6 pd dm d1 qm d2 qk cv zd bi wd ne ah qb kg kh ij 1p rk w9 wt r5 d5 px uf eh yk oy pm i2 hp st qn si qm zw we ls px lr ri qr sr db hp qu xk fy os eg en uc ur i7 sa hp vn qs kw dn od rh xj w9 wk ph ap yh el oi oo e7 gp ay s0 f4 gf az jy qk ql qx 1k v9 qc jq zy n5 kg hd ww wv bj hj ur er rn ry eo o7 +\n df a6 dn je ql no q6 ox wo zl bn rh ya mv e0 yn pr gd pi y8 i4 c7 g1 j6 wo rv eu xg eo c0 yx ea sv os wp qw wl ou un t6 u2 os of f6 dt f0 jt wc ja ae qv rm ds pq y7 qk ck aa ux +\n db iz jk zd wy wh c3 zk 2o rj hw vp on ed ac to g2 r0 id ta th qb dm pj m8 np oe pu bb tc gh ml rq uf tu eh ye tx gv pk jv j8 lk xs kd fi mx be wd t3 mr wk wl td eb ie tz dw rw pm re fb dj h6 ql wz wx qx qv u3 vz xe ex 2w ty ew xm oz an +\n ty a6 we wi ro lj bn rh r5 g4 aw jd q0 gz xy m6 wu qq et oo ex qp tr y0 fi au e3 oj gm px lg wk tu ek tg u2 ov em dg uk nd qj cy hp wq mi bj q4 ia fm r1 ei ie ux +\n tr qg h7 qk jl kc jr am mo w8 e8 td gl kw sd jo qr vl gs qe b9 mm fh eo uh ft ik e2 i0 uu ff qu f2 jq v2 wg kg ek aq wm yi yo s8 e8 sq ab cw wt ck pb pn xl bj yq wm ew xq su r2 +\n qk wa q6 jj ws ut gd gf ly ec pj sa pd wl e5 wc da kx zk zz zb wv rm te +\n jq uz nv ql as jx z8 q7 o7 yt rl ea e0 ym y2 pr ia sz sq sr qq qr vk oe pe lr bl ll rm yx y0 eg ti e1 ue uu ui jx zd oq kd rg lv lb r1 fx ro me ts ay f6 fc io qg py qj qk qs ky qh y9 ok o4 am +\n qh dl jt wy a2 yk y2 i4 zq kq we bb dg m6 qq zw rt ta tc ff xs xd qf g0 1d yg du wz iy sw tc dd hj hk mh ov zi wn hk ee yj af +\n ra ak uw q3 cb ji wy fw gw t4 mu ts qw ww rj vo rl yd ug d9 gj i3 zw qw wy md qq bv rn qy pd tl ic p9 hr dr hh ui sf f1 i6 ws cy es ef t5 kr ek oo t7 ec e8 u4 od dq ji ch jr zm jy q1 zp yq 2e og yo tm +\n tr tt qa qd jf pg qh jr sw ao q3 qz za wt js bl vw q9 ws uu w0 ya pl yf rx ee tu r7 gg dv it lo ww up js qq qe lz eu qy rr yb ri ay ye ta tv im sh ss uk qd qf bw ro sl wl t5 e5 um th ha fx re ii fv je hk ot cq km h8 ks bk vl qn xe te tt rl u7 iq ry ag dp o8 +\n sa z1 qj q2 nn wr z5 mq xu q7 gv t6 w7 r4 c1 mb sd ed ym ot ta ht ts tw gd tf g8 se ar gh fh qv qn zm hs qw qe oq wc xj vz xl wo rn i2 sr rq at yq uw s1 tx hy fm pc wo hv gy vu wd lc ul p8 wk wk el oz oa rh gp pn gf fx fc f7 rr dy x9 uk f0 py wl v7 cr ch qs wv nz lv lu 5o xe ym ly er yi ia gl ox r1 dp +\n uv qd hm qf gp k9 kj we lf bs ej 2i el wl t7 rj w0 rz yf ys r8 tp py tq tw dl im qc db qq sd ry c9 oe if qw aw qu uh tt p5 p7 p8 oj zi oe qu qi lk j9 sk zs ka lc wh wk zq mq vh t2 ej r9 mr ez t6 e5 op rk ga pb dq ap f9 py qk qz wc kd pv bd sm dr u7 mf o3 yk di r2 +\n po a3 uw q5 q7 ck kb zj td zz yf jd wq xh ld qr w4 p1 ij fu tp qq sv y2 yt t6 e5 op dw iu pw jp ka qv 4u qf rm vb w6 +\n fy a6 qg cs z3 ql dc jz wy me cj o6 ba kv wp w8 ea r5 uo fw ib ig g7 gg sy bg qr cb cq ro xl xv ex tt ru pd hg im oq gq ao rl pl aq sz t7 e6 os uf ug gg pr ql qz vt mj px wb ci qf ov be bg ww mp mi rz u7 w3 ei yc +\n gh cm ca rg uy pm y7 g8 lx yc qi re uh yn uq eh tz ph wo cr sv fp kh sl oi sx ov ga iu h1 je fd rv qv wi jy yy ry o4 tq si +\n qd iz qh q3 cg lf wy xa ez eq om ug eg yj fo fp yz qx qe wb or jg xb c9 p4 tj y0 iz tc oj tb i6 p1 ka zf qe yp wj mv ra ez rd uh pt zl lm sz wc lr bq oc zq sr af gl ei ux it +\n a2 qa h9 qh q3 fn kx ve wz us sj yz fd g7 vh c9 xq xj ln tz wp wf kg kk by vf j9 5y un yi e6 rh sn tx hj kn rb +\n un qs fi qm jk js bd o8 bx vt eq ya xp yg pz ym dj fp tp ta oy dl qc cx qq m1 rt wn d0 wm yr qw aq qt tb ha p1 uk yn ef tj gv im sd hk pz jx zi wa wf ba l0 wd mq ej wv t5 ek iq pv ov f3 em ak rq hn hh f7 uk qh ot ju ng ji h7 wz cr v9 bj bk rc er ia is iw ei id ov pu hc aj +\n pw gm qd jf u6 z3 jl q4 wt bi lr id wa wz w8 ya ev ew r6 yn ee io r8 ip ej td im si j2 jo d7 m4 pv iv yg qi il ti i6 ta ib ap fb hz wd vu wf wh kt kh og kk nm rp ti ek ns t7 y5 wc ae ir pv hf ub wc ho wb wn mi rn w6 yk tm te rp +\n o9 un h0 a6 pf iq xi tg w8 z7 r5 om oq eg or y4 sr fh zv vw zq tc ws rq db rw eo ym tl fv i9 pz jx j7 hx oi qf x2 l9 x3 qj by to el sx ys yd ao az uj hl dl tj nz wn kg kh on wv w1 w5 gl ei +\n gv az ql nm rc r7 yl ja zn q9 xw no iz qt pk x4 l4 tg u3 of zk wc go qb mo eq u7 tv rm ig fe +\n o0 ik qd um qg k9 mb bu wy bx ny ws hm ea mb iu pe eg ey sh uh g2 ic iv aq td qx ja qb ha 2j lp xr wc vl wn wi sl tx qt rq ec vw of uw sm ic qy j7 ns hb 2q kw wf vp 1f x1 5c zs y1 rg tg oz e7 fh eb ie up e0 ap ve wq zz cr wz h8 wv go ly fk az pr rz h4 ew w2 ok w5 ia si ro am +\n dv d2 cd qc zt 25 xp wd te es sh eo wn f4 wo tv oc +\n uv qd qn hr bj b1 mw lg io sh lo qq xh m6 28 rn xx p7 im qt jn jw bm qf r1 mn ny ed em ii dd wn cz ds vc wb hh q4 yw ur rt ie o6 ux r2 +\n rs dh z1 jr cb vq r3 eq om y1 sg r9 if tw qb qn m2 vy dc b0 ik fa ib aa jz qu sg qs 1s be of w9 oz sv t9 oc pv rw dd o2 dh lq ka lu 1m qk q4 y0 ye yq w2 w6 si ob it +\n o9 db iz fi qn qj mw 3v wp li e3 km zz yy mo ya rz mv yg r7 yh pc or r9 pm a0 td ih db lt pg jf gl re ww qw m4 j5 xi wi yd nh yg qr rm et ey ug re rr y9 y0 sb tu od ay of iv oh i8 ok uu sf gq lg wa nn uj qs cu kd xb wg cs 3k yv hk pn ii hh cw km wz wx wc n3 jr wm qn bd zo mo jo wm q6 w3 rt an hx ah am uc dp +\n ps pt vq kc bs vu vo xu ee ib hv wj x3 nu ud yf qh wb lb gz +\n ra o9 qs ty rd ps db pu qj u6 k9 nb qk oj ql wt jx bo ri xo o0 mk rh bm mj ut mb rc yn et yl pm ih i1 g9 qm xq oq bj wt jp xu pt bc eo ep qi ky sv uk fy ri i5 im dy hk ui tm f2 uf ug qd kf nc s3 fs 12 t2 ro du wk ek yt ej t7 s7 oc ay s9 pv fl s0 gf tx fc ac py v5 qk ce qz cr jp ck hp u3 zm xr ii yv ea cf hj ye w3 tp do tw ux +\n o9 qa ik wt kc q8 wk sp yy w8 w0 ys ea om tu yz pn fe ae g9 ps g0 i4 qb fk qn qm ut j1 d6 4d cb vj vk xy j5 be wi ve qq gf qr j3 ug qo p4 sm s2 ut fd pl qt jz ui qy qu qa nk fj iz xh wk iv qz fb ro x6 ti sl rs hc oi wm us ai dw o1 hh ab qh qj ju ng wl zr vk tw 5a vc hk md yr u7 w2 yt tn eu ul ah +\n uv ra qs ty jh q4 o4 bc vr o9 rg jz mc r4 ui g1 ey g3 sj am sq fj qn it xt ln dl jh b9 g8 dv qt yz ea ue ss w9 wj kk bi ym tg t0 ob ys iu uj qk nf v6 nj ox qb wy mw 1n pq eq w1 rx rp ge +\n a2 pa e4 xy yd sj vs jj xj lm qy qp ri ux p8 pj tv xs wd wf oz gd sw rw uj uk qj k1 xx um eu bx my em ey +\n az h0 qz iq bl kb xp yf y8 qn rv hx oc re gt k2 bo qg cf rl +\n yv uj d2 mq hx ws w7 mv yn r8 ab an ae jn xw al up be qr zr ep re qo ec ur ap hp pn wp i9 rf wf vo qk t8 eb yd uk kv ww wx wt ox kh mi eq yj oz fn ie am rp +\n ik df qg jg k9 wy kc ro wi ve bb rl ew io or eh sq oi qc qe d7 m4 pu gd db oo yv yq ix eh fl pg ib hu pl cr fr xd cy ke mx yh wk ag hf hk qg sj we mz gp u4 ak ma rz rv af ox di yx ob +\n hn pa pw qd il qh q1 z3 wr t3 wo ws vu uu ld pe fo dj ot dx pp vl qq rr ls j4 fs dl ve c6 rq ln xk ec rt ty ik y0 tu yq fz s5 sd sh jn wa uj ws lx qr ca rz wd nu ek yy yi y6 uo os up f4 fz qk h8 qc wr at 18 ca ww rv sy ox o6 +\n dv wu wo uo m2 we rp b6 qe ik e1 bq w8 x5 ez fh u4 iy jy wu li f5 u8 w3 yl te +\n sa ds qd no q5 ra jd qo ru r7 uo ar ud on ak fv dg wl qx qv ye yl ep ge +\n ss rf qn bu gj rj uo yz tf m1 kw zr oo y0 pf tc dy qu v6 xh t4 oo um df je qh dz v8 ho wn wo 0w dj rv o3 du ro +\n ij uj k7 me lg ih hv ws rj pl sd uo y1 yk d0 pt y4 g4 ou tw sq td fj ha qm qq 4a kw d7 xy m5 bx c9 yx nw tr qo uj fu en p6 s1 ht tb qo zp kq x2 wk wj wk yt wz sz ae iw ay fk ao ug pq qg k1 ql xx qc cl qk 56 bn oj yt et ut uy tw ir yc +\n k5 pg cp z5 wr no zd tk ej an qx gj i3 su we up 3q yq fx ib tv qp ik wj yf u1 os rk jt qo qx n9 w1 rb +\n k9 uv gs wr 3b mh km bm we w9 es or yk r0 g5 aq gf nq qv ll m5 yd zq qt qp sv ed p5 of eh i7 pz hl sg jn wa m0 nm kf w8 wj de e7 ar iy pn ly wn fx w3 rb ey am +\n o9 d2 vg gk ex rf rc hy qm j4 ga qw rm ls yl cm en tl tp fp tb i3 qy qo j9 vn zf wf qg mb kj qi jb mq wl rj s8 lw um zt wb f4 xw f9 +\n ra go ls qx wi c6 b0 rw g1 yz fe g8 ow qq ra mz ex oa fu iz tl uc e1 p6 x1 tf rh tk fz ap hl qh k3 xb mw zm yb yw q5 aa rp +\n yu qx sc xe j2 oq gs i6 i9 l0 +\n yv ss tt gu fi qj bt ql ls io nw gk hl up zv gl ni xt wy dz qe ud nw rw qu uw e4 qy px qf zw za ty ek t7 pv dg ho wn uq rx yx ep +\n ga 1w ld wy o7 xr pk r9 g6 hu jg lx sd no xt wr zy ku l2 nw 9r rt i5 to tp tc s6 f1 ud ko xb rj qy es t0 f4 fx ii rr hm hj fb ji oi n1 vk ci 9e mt yc 2r tv gk yp ux +\n hb hn k0 wy m4 w7 rc ts y6 j3 qe ve qy rt so di qo dp lk xf mq wl em f5 pr wl wn 3k ew yt w4 ri +\n qa ss lq wr bx t3 r5 ed eg sx dn we 7n ra qe b9 rm wd rw eo oa ri e1 e2 ut ap hu qo ws uz ai tz nl cu wq ln wn ie aj +\n yb rs un hm dg qm qk ao mw fn kv ur uo pj e9 sf ia tp tw a0 td sx fg su xq m1 om na vk wy xk em l1 z0 nh b0 mz qy p2 ru au iv p9 pz ug lz xn wf xg fk zu wj wd u1 e9 tl ak hf sw o1 pw dt gq v5 lm h7 nn nl wq uq zt o3 ad ry id ig +\n a3 pp dv qs gn u6 jy kk io dt wt ck rg ua yd ya yh ax ac y1 pe pc pv fp fw dc qv zb dn q0 ju jj m1 ui lx qe qr t9 ja he wi vw m7 l1 jn qe wa qt xg rq qy yi qu p3 yb ed en tz so s3 tb ho fm px gw zo lx wf sm mc dq wj yg l4 uv yk tp t5 wz ol fk f4 pe kc dj wz qb zm wi br zk ww ty 6i vm eb lt eq w2 ey yp yz o6 ei +\n fu ga io nt wp jz yf rv oe eh pt dz ih sx qx g0 qm hf xe lz gc d5 bh 2z cn d9 1r sz li bv qe 6d bb er xv yx p2 ea tj p5 ay uq dq pg oh qt s6 px sh ko qa nn oa bq cs kk hh cr wg tu y4 t6 e5 oz th sn ov u5 dw qg dh uk n1 zr qv 3d n4 yx xe wv eb h4 yo ro hx o8 rp +\n ra gu hm a7 jw qm qh jr gs k0 ql xt q5 dt ru wy k5 wh fw lu kb am m6 bx vy qq ev rk 2s mc yd es io pc pv g2 ek tq tw in ih ae qc d5 ui qe wt qq m8 vr nb ee hu rw rr tt ed fi em e1 e3 hh hi sh zp qo wp l4 ws qf qf pg eg to un gu u1 t9 ox e7 u4 od ds de hh py ql h7 gy js vz gf y8 uq se do ro rp +\n qs qg gk ta bf r3 hw r7 r9 sh ua g3 sq td g7 ha lu qw xr wy wu rz ko bb i6 uy as di qi za hv jw rf 1f 2u va ap qi rc du wk yt t7 u2 ob re ax v8 cg qb wy wn kg pn yi rn ru +\n gr ra jq qf go ga jh gs q3 tm q8 k7 o8 mj ym er ip ua ej hy i1 dv qb vg cv m5 wy xk g5 wi ng w3 3w ud rw ug ep hq ta fc fd aa i5 hx qp wp v7 qs l6 l9 l0 jn ty t6 ie rj tk od ys on pq tc zl nh qc xv wc cu ks ei lm vm cj yi ad r1 si sp +\n qa hm a3 ac q9 na rj if qw rr vw tj ib su qu wo dp j0 wf pf 2y wk ym ra wb ae ga gs f8 gq im ar pb ec f9 yu rm +\n t8 ej an y4 td ez ln z9 lj qy sm uw dq us cp nu tg vn +\n qa ds k5 hw k8 k0 ql hl 1r wi fw c6 w7 mz rj xy r4 e0 ym yh eg r0 us fs ib oi qv q0 ww lp gm ln bx nc qi l1 qe wg ea qo eb tk eg to ur jc oe dp hv wa 2q nk at rg wf wg ca xk jn pk yg er ot uv wb aq ol wx e6 ev sv uo vf eb ah ud da pe qg jr kn ju ng ae wv n3 iw ly kf cl pb wv tt vn eb vm u7 ew aa w6 rm gx r2 o8 +\n q4 q6 vk d6 eg pc pv r0 tw i3 q0 we tw sm e4 ow sn kg up hm qx zv nz wm u9 ul ri do +\n po uv dc qs qd hm qg q2 jj sw kl me q8 xa wa xf z5 yd r4 rq sf px ti ia r9 yl dj dk ek qx i2 sr qv qb lb wi nf wu qe tv fh qo rr yb fy eb ri ai ok qt pc ud qi qa ws qs lc zh nc x2 cs t2 di ke wk sz oc yp s9 ys ai ln wz cg wc wv os qb f2 ec y8 dg wm rz yr ee rn sy du su eu fm ei o6 dp hc +\n ft qj q6 wq up ut lg er uw db ll ws of og e4 1i r0 wx fh th vf re hm zi +\n a3 ty pw ph cg uo r7 oi q0 lb c0 vl xx mh hu b9 qy tt sv p5 eh to hh ow tm oe si sk oi gt kq cu vi j5 wf el tf yu u3 ya uj dy qh ql ct wc el y0 o3 o6 if ge +\n a3 dd by wt lf 2v bl 7c bn cf yo go yf ii et ey yl aq aw g8 ho i3 qb dn qn lu vf vg 2k le ml wy t0 4h xk qw b7 bb eu xr qu tt y0 os sn tl pf og tz tx pl ss us xd cu oa qd xn ke qf vp kh ny wk r8 ej rd t7 sc e6 rh ud tx al gg re hj ux qj gw xx zv xm iy ca vb yw en oh u9 aa w5 w6 ul oc an uc +\n qa un dn hq hw d1 jr jy kk kl wt kz zf z7 cm q7 me xp wp mj rh ue e7 ys rz eq ew ed xp ee yj y1 to fw aq po i2 jn li on m2 na vq wu ck er yu db yi gl ty eh uw fp tx e2 fs uu sf jx oe jb qp cy bn qd wg 3z nu j9 mr t6 gp pv ha tl ai fx uf fc kx qh gw xb zt qv qb ir cq vb y9 ct ol fn ah hx sp +\n db wt cm ch jc wi dd ys on td po y8 q0 wq kt eu tc tv or fr s3 na e7 uf gg re f5 tt aa tb ie +\n a5 jw qh q1 qj oj xy my b6 es yg yl y5 zm pv qw qt qo ea ri ao in s4 gv i0 ad lh wa qf gm rk vs oy r0 ez ab lm qs qh ry ox +\n ga ca z6 nr wo rg bm vu uu rj e0 ui io pe eh d9 ab tw fe tf fk wy ln md rk sk qq qw hy kp dc qy y0 p5 p6 p7 ic pg e4 jb ge wp qa bn xf ks zi oy e5 um wx ie yp fv je ng oj ja v9 bp qv er an pu +\n a4 jw a8 o1 q3 un gh le nq q8 ig rg rl ea io er tu fp dk hy sq ae lo qq wt wy 5i xj cw dz oo qo yn ty y9 y0 sb ef tj uw ta ur i8 tb oq af hz qi wo sk zs qs vi wf kt nb y1 oy wk r0 ol ex ec tg t9 eb ap fv qk ji cr s6 et xq bf ep mi ax rt yu iq af am yc +\n gr uv hv tr a3 qs lo u7 jy qz kl z6 gk gz ag kn rg k0 w7 wr rl pj ii yh up ac ot d0 g6 td y6 fr se pp dc g9 cl gx qw pl m1 ii qr vz oy nf eu eo p1 os y0 ri ix au uc ai fx p9 nm jj lz pa kw ul rg gw qh 4m eo qc l5 rp oy ej un yy yu t8 sv ud fx ac dy av gq qj ve sl bu th u3 rn nz n5 zm yz bd tx el ex n9 es rt rz rx ol sy rn yo +\n ph cb jx wu ib vb ih ty oy tl vu +\n df qd k7 z2 q2 ju jz zf cm mw yr gu rx yh ym ef pc qx jd q0 ow pw wt rj xo mf xl qq qw ud tw ku ik oa od ti hk f1 xs qd wf dm s2 ph xo ou sx ae iw t9 eb u3 rk ak hf dw ax oe zl zz wm sm el cx cw lq za tu yw rx yu rn fn yi ei +\n a6 ql wr jx z7 wu xi ym fo if a0 dv ww lx zv dk tu sn hh ff hu zo ws rf wf aa ni kq uv t7 um go e8 ob sm tz hl uc zz ol lr kc n6 bk ry if +\n qs gm tn rp iu pi qe ec to l1 wh ra wl it kx fd vx q1 ri +\n gu pw qg we d4 ws q4 cn q5 me qv zj zl ex wr xo yg r7 eg et ey us iv po aw se cx az lb nz nc qq ew rs rq yx ep tj uq eh fz hg gv jc di wp sa nc ya cs fv qz ti wn aw e7 ox u6 pn re o2 hm fv qg hl dl v9 qv tz 6y rl ye rx ur tn eo +\n gr qj z6 ld tm jw hc ed y5 se ke ht tn jb 12 yt ek ao io wv ew ey fm tw ir +\n gv fr ak o0 gb rd dv gu qf qg qh jg ux qj ph k0 oj wa jz bi ja eg c1 fe qn b5 rs rg b1 vo z7 us d5 r5 ii tu yh y1 or ek sl pm hy dc th sy ww ze vb wt m6 iv mj qe 6f qt gk tq ru yq au ap dr hh qy sf qa ik kt wd rz ej t3 ot ej ub wx oz th s7 t0 ag ga pv em fz sw o1 ip qh nd h5 et ho cu yz tq wq wn et tn yo si ov a1 +\n ij rs rd qd pd qg qh z4 ql ip nq q5 xu bz lh o7 my 3w xe ws 2p w9 rk es er d8 pu y6 qc gl bv qa rv qt rb os ru fn qy qu hx or wa qs at zg mx xo bg yh ec os eb hd rw dw ip vw ki ok qx cu wb sn wm yj o3 tm ei ah +\n tr rd pq qd um qj u7 q3 cf db k4 gl mr gw c3 bs k8 vi 4v kz cg rz et ey tp fq y5 el gd dx qx hp mn cz wq xh m4 av t0 vz m6 qw tv rq ei il sb tk eg uq tc wo qo zs rd nx 2y fo j5 l0 l1 hy vy t3 t4 yt va y5 rg e7 uo ox at ir ys hd uf fx re rr ac kc cq qk cw h6 kp xn zu bd cz ca pn pq w1 rx vc w6 yo is fw ir ov +\n ra ij a3 qs qn jf qm nv cs cv kz q5 um q7 q8 km ya ys rx yn d8 sh pt fe se js ue rk m7 wp et ei qy to tz s4 af 3i lc wk ej hc ex t7 oc sm s0 tl fx re fb jr jp qc kc jr cc w3 yl oc ob ep +\n sa yb qn k8 lf d1 c3 wp vr wl yd iu kb sz g7 mn jm lz sd m3 lv qq j1 ex qo ry ru em pk i3 hi rf fk nc wd vu yt td sc tg s9 tz tx dh x9 qh ku dz my yr w3 oj se ei gz tq hx ah fe +\n w9 rl rc or fq a9 pp db gj hs lc qr ec p4 ph hb x1 ez u5 qx ea 6t tn +\n a3 qa dd qf qn qm qj vj wi ag wo ig e3 wz r6 d7 ax pe rb ey r9 is ot tq oy if hy se qx ar qb vd qw qe np xy nd wi in gj qu y9 ev ti tb qt px ud wo ll cy wd hw kh fp wk wg wh ym vo ub rd t7 iq yo ox eb yp ys au u6 rq ii io pe qh nz vl be n8 wv hk og rc er yu u0 rn yl is do eo +\n dd nn oc el yu tl rc rv r7 y2 hi qc qm wu cq qw xc kp tr fu ib zi qu wp vi ci qj nu zw t1 wl fh ev os f4 f6 f7 cd zc qx zy wu bs qn u0 +\n o9 gr fu a7 qk xu q7 wp el yu fp ou y5 pm pp qm jm st op uc fx tn hl zs kp bq p7 hi ys qj ki qc qa n6 oj ey w6 yk si +\n q7 xo sr he uu sd s6 gy ws iz fk sw al v6 lq fh ie oh uz pu +\n of ch zj rk rx rc g8 i1 jk tv ul fi e1 ic sp in jl jv j7 nm rp r8 go hf wx tb oz tw it +\n se kc tj rx yh eh td pa zb qv c8 j5 ri eq b9 rm ik ev ul ti p6 en ok tn wp jm ws ke br wj rp en gd rq f6 ac ab zc rz ew tb ro +\n qd wr d6 i3 j1 ww if qt yn fd e4 qf j5 yh t8 u1 ev qc wv pw u7 oj ok yz tw o8 +\n un pa jd qh qm dz pi z3 ny gs k0 wt xy z6 cj k5 gl bz d1 fq ye yr rh t7 ot if g6 im pa ps fk zw lx kr lv na wu vw eo cm te qo tr ec ty sb y0 i5 to ye so tc i5 sg ct qs sc ws qr xj 2u n5 rl cw dw ys qj yn qc y1 sl t9 ox sv s8 ya s0 tl ys rw tx fx ds rr cq cd ql qa au qg vg rx yt iq tn yl uz ei si r2 ob +\n rs gm qk gg m3 rj eq mv yf sk gx ve eh iv i7 n3 pb uf gh uj tg ox ww bg oz su o6 +\n ih a3 uj rd qs df h0 jd d2 kj q2 ap wr ol nw bz q7 fq ir ra w0 eq ya r6 d6 eg ej pn py pp sr qb jb wq ni xe we lm be xo w2 qo mj qr hp tr qo qp ef of yw ai e2 i8 fb tm do dp i8 l4 wd p5 sn pf gr vs rx kz vh t2 wj ot ar t9 at ir dw qj nc cw fd sx qc mz lt pv br wv dr q3 yq vn ye yw dk oh rc w3 yt se ov ge +\n ds h0 jw he qh jr jt ql me na ah xa tf wt pj pk om 97 rc yg ym oe yj eg dl fe sz g9 lo qq qw wx rr c8 ns vq m7 xl gs vr qw qr kq qi qo eb tk ue dw e2 i8 i9 hy hh qt f1 pc vv qs bn ij i0 uj xh wk qz ns ej un oi yi rh od ha tl re tc o1 uh ac ip qj we qh lq eb w3 w4 ia oz eu ri uz ep +\n fr ij dl qk z3 qz wt z9 gq mr wo zz rd dd rz ee sw pp g0 sy vg ww iu pz uo cb t9 ld qr ei yx rw es ts zi wp wd gw wj hf r4 tt x8 wl t6 hc gp eb aj ai iu o2 nh qv ey kg dp wq f5 rt cg yw sr tb gj rb fm ro ah ig +\n ss ux q4 ji xa mj mi ld rl pj r4 rx yg ti ix a9 ig gj j1 ww ii qe j3 mz vl qq ye m8 b8 yl qp ik ki eg uq fi ok fb oq fm sf oe hp v4 nk wg mx kt vs j8 yn wc wj ot td wn iw os u4 tl u5 rq de io x9 dl ql n2 ji wb if hx +\n iy jk ql q4 wt kz fb q7 vy w8 ur ax uf ym yl py ou dl pm in sq ho j1 qr ls j6 ic sl ko xq jm qe qr qt yc es ry pf he i8 s3 pj tm oe qo lk wp j9 nb yd bu rs e5 yi ar rh ga ud al hh oe wx s6 do kv be pm w5 fn ey du do pu +\n po rd qs il rf uq of jf nf ih w8 b0 ur pl us ed tz tu ef rb sj tf ff i1 pa dv ue fk m2 qr wt j5 c0 vw xo b7 p7 qt zi wo gt qa oo qd bt nt zq x8 ou e5 u2 fj s0 yd sw re cd qx jp wc ja ga jt bh hm eq rv hx gx +\n iy ij pa gy qd qg he d2 qk d4 qz q5 nw wu am qm ft w7 rq yn ef oe pv ek fq sk y4 ts am fd qx q9 jf ju wy lm zw wp er sy qu oa ta jl ss gq fe wa p2 kq ws 2w dn xz t2 ej rp rs yy e6 iq ag e0 u5 tx dd pq fv jr qj ku oj ql wz fd s5 nj qx zt 2d qb nx pm ce f9 w3 er tw rp aj it +\n ss qa gm dm qk c1 jd k0 t8 mk rk tk om yn tz r7 px ac av ot ts if fw ez y6 se qb ha su qn cl wx we qr zt kr mj rc qo es tj ym iz tk i6 fa i7 s3 pl jx du qu j0 ws v2 wj ys yv wd s4 wf nm dt wv ub ez ta wx e0 fz al ap qg wr ar wb fj n4 cz qg wq fc mp yq ev se eu am gx dp te +\n a2 hv yb nv h7 jt lw xt lu wp yr rg 2o 93 uo pr ej ez jb lz ww az oq bk b5 wi qw rq hs te ea es ed fu ti uz fd tm jc do qi j9 zs j0 wd xv iz hr wx el ns oi t8 sc t9 sb fk hg cd rb mz wn 4i ov ln yr oz tm ro o8 +\n iy pi jt kz st tm rh ya b2 om ef eh tp el in sc qc g0 ps zq nu pq j3 oe a7 ja js ng tc qe pp eo em fc s3 hh i9 jl qy i8 lk wa ae 1p vh ox rk em hf dd jt rn tq is oc o6 so pi +\n qs gm qn gw qb cx w8 ur yp up uy ek ez ar sy qb hd bx rl qt yi nw tt eb fl eh pg oh ib qy qi bp jz lf eo ph wh oy y5 om az tc ab wv wb kg ww eq ok aa uy w6 ag ig pi +\n ra a5 db co qn d4 bu qz kx me nr q8 my lp t8 gu rk yn et y1 ej g7 yq d0 j6 b6 qq rn kw ei yc uq e2 s3 oj s6 jl kf rl ny wg mw t2 co el yy ez eb e0 al qg km k3 n2 zr tk qb n5 n7 et tm ul +\n po uv a2 o0 rd hq dh hw a8 d4 wi z2 vt ww kb d7 pv tq fa ta dl oi y6 im ff ae qv sy si wq pq bn b3 lm b5 wi ku qu ru ul ri tx fb ss or sk wp qd w7 kh nn es hy wh rp um sx e6 rh rk pn sq rr hm dt ip dh pt wl h8 qc vj ly bq zn gd wn q4 hj yq xb mf ok tm ge +\n ub pa fy qf dh qj q4 wt mw cm k5 gw kb el w7 w8 mx ya ii dj dk gd dc gh st qb iu jk qr bz vz ab b5 mf pu qe xd nq eo yb pd i6 ue dq e1 qo wo sp 1o n1 4v at qf fi of xj rj dq ew nm x5 wh na ub e5 um sb ob em pb re ip x9 h7 zv xm bw 1v mp zr w3 xm ee yg rv rt ia is ro ep +\n cm bp hc rx y4 sr q9 jj rt qo uk ev to ff so bg eg y4 l0 go os ay tx qh hl qc wb +\n z3 nn o9 xf fs gd g8 ns ec p0 tb wf uv iw jt wr dq bj u7 e2 +\n da td ta tw tf tt ay dq sf gi ae rl e1 gk af dp +\n un fi dx wt m5 vo ys j3 i5 ad nr wj mn tg ox bs ia +\n hv yv qa qf dg qj do ek w0 is sl ez sr i2 ww we rl vr qw y9 tu p5 uc hj i6 ud ws l5 qf xh kh lg wf wj uv tf t7 e7 dt qz ka xn cx xe fn it +\n iy yv rs qg uw oh q3 lr vq bz ab zm wa ds b1 w9 rl rz uy wy om uo ef fo py tw fe qx i2 qc qb qn ww vg ke wr j5 j6 oy qq ng sl qw mj yh xf yk xg qu te p2 ft y9 uk ym uq so fx ff fn qy du f1 na lh wo qo ge sk v1 wg mc dq wj iv 1h pk 3s ej oy ek td ex ae yi t9 go e8 rk tz ud rq ax hz dk qz kf wm yq cy w4 h6 rt ry tn r1 gz ux pi +\n d2 we aa cb o3 xi tu ti gd wq pl xg wc lm de e4 sj hc ic wc ra wc go o1 dd ip wl in wx js tv rx yi yc +\n ty um hq co ux ql q6 wi bc kn q0 r7 yz ib pm g7 po qv re we bh 8j ru xo ra eu ud qy uh ec ty ry yn hw sm e1 pg p0 dr qy oe lc x1 kt xz pl t4 el t5 ex sn us dq rq ao f8 pr md ql v7 v0 n7 kh vn wm u7 cz et w6 gl yo ei di tw +\n ub jg ph q1 q2 d4 q4 qz kl ld cn ji z9 ro ek gb w7 rh pk ea ax yj pv ot yl an sl y5 po im i1 zb fj qb i4 gl xq si m1 jj lb l2 ul sn ue s1 ta hg zu lh nd j9 ci qu wd bh ef ro ra aq t7 ex t8 od en fz fc df h7 qz n1 v9 zy 4a tc bb ea yw mf ia yp eo aj rp ob +\n o9 qa h9 dn vo a7 qj jt ji ne kc cj zh wo q0 w7 e5 ui vi ya wy c2 r6 ui px yh y2 to pr ab dj pn a9 pm tw sq ig hi bg ni ry lv wy ic bc li qe im dv rq xy ki i5 fa fv uu af do vv za l3 bi kd nx w8 nt lh hu ra ub un ec rj ua fk ir s0 f4 uf dw jt k2 kz ml cl km wv vv es tb yj w5 rn yo af ru ah ig +\n vg wr zh wo on ew ef ae ha id uf eg p9 ef gi al ng 16 rz o3 +\n qs jw qh cv z4 ok wt k8 kg km wa uu us sj oy iv tw jm c9 fo nd 20 qw w3 yi re qo yv rr op qp ue oh s3 uu px jc hx wf v2 br ep wg pz t7 t9 au re zj kn xc bo kg 1v hb wv tt u9 gj yu ry iw dp o8 +\n qd wi ij rh ef fe jm kw xj wh uk ef ti e2 j8 ou xo ny wh rp wj ub s7 pb nb qv ev o6 o7 yx +\n gr dc ft qs gm qd dn k6 lo k9 nb as zg bz lw ui ee g4 dl qv q9 lu jg rx w4 yj ep oo sv uq hq yw ao fc e3 ui dy du sk gc gy qs l7 kz ed ej wl un yu wm oc gq qk qc ks tk ti eq em ly vl er ry sy yo ro eo +\n lq d7 i4 7w y0 qt gw ch o6 eo +\n fr hb dc o0 yb hn gi jh sw kj we o1 vg nm q8 bz zk bf ml ev ed r8 iv ht fg th qv vz d3 ng xj 0h 42 ew vt yg qr qt ha qu hs qp ij yn eg of tl p8 fz oh iv jl ss dy zu or sk uj co kt rp wb wx fg ev t9 rj yp u5 us ys ak rw al io kc dt jr hl ln wl wz gy wy qv qb mu hd ky ku zp ww yw rl oh ee w4 yz +\n fu dg qf pg jg o1 dc by q4 st t3 lj ve jr am 2i rz ea lh pl ed pz y4 g8 i2 db g0 fj q9 qn bl en hr m8 qw rn qt yi ei yk qu xi uh fy yn ix uy gn jx f2 gr fi x2 zo pl vh ek sz u1 s7 ya em u5 da re f7 hl qh ju oz ar zb ci tk ob n7 vh og w1 ok er o5 ri ro tw rp it +\n gv ra fr ub h0 hm pf qj kk zf zh rj eq d7 oe eh ib oi gg i4 jd ph nu gc qw rr m3 vj ry is dk qi rm qy qu ep p3 ed pd ta s3 tc fd sa im ow jc oe qi j0 gt bm vm zf nj rg w7 x2 nr wf hi rp wk co t6 t7 e6 ag eb u3 e9 f4 om o2 dk h4 gq jo cr oz ka kx rn wn do ep wb vn ef rz ew yi r2 ro so ob +\n ft a4 qs pq iz pd u5 cs q3 qz ra rh w7 rk mv kv ee y1 to dj sj ta pn oi tf i2 th q0 vx vf ww 2l cb wt yq ku ye gs qe w4 qy qi xi tt es qp ed ef ti i7 tc pl jz ho zo qi za fy zy rk x2 r3 ht yv ex op ae iq u2 ag pb of dd h4 lq wx cy cu zy wm ry ef dj vx st ia ey te +\n rs al qd uq ga qj sw we pa bi ba e4 yy mo d6 er et ti rb py ek am ib fe y7 fh jv mn qe qr oe c0 l1 qi mh 44 xe ei ev hq ix e1 pg pj ui hp pm fr qs kd nk 1v wj fa wf yt t5 vp ex wx fh pn ug fc pq io gh dg oy nf v6 bt jo qz gu me wm n7 br tx mt q1 su eu di uz am if uc aj +\n da a6 q1 ph uv oj ji mp t5 mi rj cf jl w0 pk ew ii rv oe r9 ic id sl se su q9 vd we j3 ac d9 yw ew w3 y0 tk ao hr in e4 hu du qu jb wp cr qs v9 p5 vi xm kf s1 ea t2 wh y1 co iq yo au iy on ds fx yf qa zv qv f1 y8 wm u8 rc o3 +\n iu r5 el dz rt m9 hb lc x2 zp aw uz +\n k0 px qe qr i2 yz qo ap t1 ou n4 +\n qg q1 wr wt wu 5x ij rg lq eg ia r9 is dl aw g9 xx w2 qt au i7 us jc f2 ge qa gt l7 lb mc x3 3p tz u6 kx f8 fb ku ag hd oj o3 fn tw +\n ds rs k5 go qg ga qj gs by q3 xy q6 k5 4k o8 ws td mo w8 th ys eq pk yf r5 uo rb r9 td y8 tg ho qn gz li m0 oq kw qr g1 wy iv b7 vt qr qu ti to ta ut sa i0 pl oq sd ho qa gy qq l4 ks fu wg qg kj eh ez yu tf s7 os s9 ya em pq tc fv qg ve sx af ci ah qj bj df ry rl wm zy tv ol ey ox ri ie tq ir yc +\n ak ra yb ds gt fy qh d3 ql jk jl ni zs q5 zf lf so wo mu yt wa w8 kl ue e7 2d mb yn tu ac pv id pm sq sw jo dv jd jg qq qw qe wr j5 wu 1h b6 vr yf cx lz rn ho gh qi es ev ty p7 fx fs s5 pl sf lh sh i8 qa xs 1o kq zg qh wk fs vo wl ez iq uo tj u3 gs ii je jr hk ql xx 1j v8 nz kf vz ww yw yt w4 rb ol o4 rn ux ig sp gc +\n yv fr qa rd gm ps jd a8 qh ls vg q5 lg eh z0 vt mi vy rg lp ex ew d6 yg rv oe fs sz g6 sy ha cx qq wy j6 dk hr l1 qe gl ex ln uk sv ty at ru uc ts hi hl lg jv qi vc m0 fy xg qg eo hf mu mo kz ot np oy na el yy wz fh gp up ir e9 s9 f4 gf pw uh uj jr ab qh uc wl ce qz h8 v9 wv ie 37 eu gf yv 1m ma yw wm oh dk sr oc ei o8 +\n qn cd zf y4 oi dv xq q0 lc av cw ki xd lx qi gn bh em uf we ja ox iw qb wn my zs y9 ux +\n qd qf we ls lf k4 eg bc e5 rl ea r4 oq er ip g2 yl ot iv ps gx qr wy xj vz xl bx 3o qr eu qi uj p7 uc ph in pk qt i4 gq wp v6 kw kd xk zw 11 yj wj rd oz th yo eb ya tl au tx qj wl dz wz cg zv qa rb wm 7a zs vj yw ee eo +\n jd go qg d2 ji qn wa bf t8 ys eq ui d6 ed yn r7 is qb q9 lp lz qe c0 wu tx wa te qp 64 uq in qt qy wp j0 lz l5 og ca sz un ec rh pb pw h2 kv aw wy qf 16 rw ew tb aj +\n a2 gr qs fu db qn q1 uc jr qk cn q6 b2 ne lg q7 q8 wi wp b1 ec rk yj pc fo iv sk gk jb qm zw m1 wx zt xy wy em 41 ee gh xg cn yv qp sn od ao pj fs ut s5 tb ad jc j9 xa uj ws kf wg vp nv fa wk mq x6 vh wv t4 ex iq 7r y6 sv ox ev eb rj rk em aj pq gh f8 th os sb mt ak q1 xr yw ti ee tb as ox o5 yo gx uc +\n qj lp z0 aj wp vr wa bb xt w9 ya on ew ym ia ix pt tw dz jo ae cc qe lc qr cn b3 c0 ib ml qi uj qp pf p8 e1 s3 tn ui sg pn i8 hb ij qw pd ld fo ap ty ro 3b r0 sz ie gp rj e9 fk gd pw rr uj cf qz zr rq 4p kp pr vj w5 iq ey rn ie eo ir pi +\n gr rs gy pw qd ga jj z3 kj ql nn bg dm zz uy pl e0 lh ef oe am y5 fd qx hi uw i4 q9 hs jb vd cx ni qw wx zt qr d0 wi 43 w3 cc b9 qa rw oa ev ry p4 en tk ti yq pd i5 og ic ye so tc de pj ff hl oe sj qs wf v9 xn gm wg xm 1f ph dr vg wk ns t6 um oa e8 sb t0 gs sm fx o1 de h1 uk qh zj zk ng ct gp nx xe 3z wm rz yk tn ro +\n qa pt k7 og kl wy rp hx wp wa ui mx eb 95 ac eg dj yz aq in ih i2 q0 cz cb dg xi cq jc qe qt es ed sb en iz fp ta fc tv tn gw ka i0 lz sd il qf 1s iz qf nc xj xk ep r7 rp gu t7 wc t0 en tl iy iu pw kn km ql ct qp ch fl wm n6 rw eo qm vx ty ee ru ig +\n um rf qd db qf od d1 mb u0 le xu wy q6 mt bc qw cm uu us r5 uf or tq ek sx i1 it la cb ax t0 wu ab 1t qq g6 ko g7 mk qr ey ha ea qp y0 en ue tv ho i6 i8 sp xs qf v9 jl kt rk qy ot 14 na ub aq op yo en tk ob on tx f0 qk jp vi iw tj x9 zi n6 wo wb se aa ag oc gx +\n iy ub gy pt pd qf me xp w7 rj tk r4 rx ui ii r7 us pb pt g5 fw gf dm wi w1 eu re tq oo es pd ri tl og s2 fx ap ok i4 di lh f2 1i vm cp bh wj wx of on tx dt h2 hl qk wq qz lr tl f3 ce kp yr yg ro yx +\n k6 qf cp la wp gv es pl uo eg am tf y7 i3 hd jk we d7 rl b8 gg ug es rt p5 eg em tz ow 3y eo wg t1 lc wk ol tj en ak fc f6 df gt ol qc rn tz wv rx di ov +\n pp qd iz qm vk jg r4 pl ym y7 sc qn jf qy rq p8 yr di qu hb wd rf ks gw qg s1 x7 ec ae iw eb ai sq v8 h8 le ea vh yw yp +\n ik a7 cp sq q1 lq ql wa qz lr zh rp ra gb w9 ys ui ym px up r9 pr ek qv qb hs bg wt ku pu dc p1 qo ik uk y9 y0 en hr tx ts pk jl ce lj l5 p6 v4 wk nu vg oy aq aw rg os az uj kc py ql oj qc pc fj jr bf cx es vn q4 y0 og w2 ue u8 is ag ie yc +\n rs dd ik k5 hm dg k7 go q1 qk wt q7 wi ws t6 k0 go ii ee io ym ey sl sz sw jg si d3 qq qw nh lp cc kw xt m3 ip ln nf zm qq tc ex ry at iz p7 ux of he og dq e1 i7 pj sp s4 ok qt gt sd xf ow qr pd hd wj qh x3 yb lx wx um e6 t8 s7 uo u2 it sw pm rr qg h3 aq ze h8 ks zb kb bh ec wb vb w2 oj af +\n ak ds dh jg cp ws q5 nq wy su q7 kb o7 ys sf et r9 ta sq y6 dn sy cx na j6 jc qi qw qe qr rb tn 3g eo uh tr ft ri uw of i5 ue ta fs s3 uy as ss qu ns lj wp zf wg sm x1 ix mc va mi rx ej yy y4 t7 ex u1 y6 u3 up en au ds ap kv qh kn gw k1 zv eu lu kh tx qk dr dh wm ti h5 o4 w6 yk af fq so aj +\n uv sa hb ps q4 as wi ej qm zc yd yn fp y3 td hy ue qw qy es tu uq tx e3 jz ud sv l6 fu xh dq wk wx yi dj qz v0 qd ga mp wm yy tn fn yx +\n qh qz ar qq ma kq rx qa st ei +\n dc df il he c1 jt qn yd yn pe et pn pi d7 ke g2 j6 rl sk ng z0 m8 mh qw j1 eu qu rr es ec uk ev ul pf e4 sg jv m9 qf vd wk gu rh e9 f3 on qv vj dh aa ru ux yx o8 a1 +\n ra qs h0 qh bf q3 dv bl mr if ws df ev b2 pl om tz ax yk ta y7 aw dn zr ax qt m5 xx wp qy qi qo at ti p7 tv i0 fm qu sh so lk qp hb p5 xk ib vd hk t2 np ek yt um u1 ir sm yf ug az qj v5 wr fg zv af qv ck ay cs ww pq wn w1 yh as yk ei +\n tr yb df um qf iz k7 q3 we cb cj ne zg a2 e6 ya r3 ut on rq io ow qx ja qv cx cv bh vj qr lv pc 3a rm ep uk ed ev au p8 so fx p0 ts e4 fb hj qt dy px sf f1 zo vx qa wa sa qs vm wf xg kf fz r3 bu t1 tu ez t7 va e6 fl tz uf gg io qg qj h5 zz nh qz zt et ba lu tq vz xe bb md u7 oh 5k rv rt tb yu tn ah +\n gr da ty qj by we ls av kc qc wi wo xr mx cm yg oe xs pr ua pt dk oy hp qm qq zw vk xi ln he rx ko dz yt qe tv eu qi yz tt y9 ev ry ym ay uq pg oj aa s4 sg hp f1 qu wp qa bn vi os iz hw kt t2 rs wl r0 ez rf pv hs om dd f8 uj dj pt dk km k1 qz qx wc n3 nl wv qn zo vx ww dr yr oj r1 tq +\n ra jg jr ao c1 wh rj fp gz iy lo gc dh qw qr 8p eo ev fu tl i5 uy uu ui qp mb hk yt ou aq oi e9 ip dt k2 qx vb mf id +\n rd h0 qn ql la vg qz lw q8 ra sp ts pr av qc vs vg ku am z0 lo ry ev eh i8 aa pl dt du i4 zs w7 wj xl yg yh ra ex u4 pn lw gu pc on n9 n0 wm em tn +\n gb ik rd ql xi bd yr e3 qq w7 ex rz on ui yg ax fo pv ab ta jp qw xi wi qw qe fh mz eo gk qu uj ed ev en fo ux ye fv jv ws lx kr kf n5 qj ea s4 vh ez um tj ir od ga tk f5 dh uk pr pt in v9 js sv qb zn wb vl zj wm ca mu zs ef rl yw u8 er id uz ah +\n iy ij ub qs lo ql jk dv h0 wy cm q7 wu eh fq w8 hm w9 mv yd rz rx rv r9 eh pr ek dk el hi qc sy i4 qq lp jj we m2 g2 fo j5 wy m6 ve tx yg w3 rv rn rq qy hs tt y9 ry ym eh to e1 ur ff hk do wa kq jw p7 yp ky r2 wx oy uv ra yt t6 yy sz t7 wc s0 of ds om kx ng ql qz vj wt wb ly wm lw dh md ew w3 tv er as yu an gz si ro do +\n o9 k7 q2 dt 1i wa uu t8 ut mv ef uo g3 gj hp jn nt cm rj ms wi b6 im qu eo yc ex qp eg sf do sh i8 ih qa wa wf kd yo xj ql wf ek un wx t7 s8 rj f9 qh qk k1 lq h7 in nj um bu qv ov n7 bh bn 3z w1 yt et o4 gl +\n a2 gt rs ty rd rf qd qn jf qh k8 q1 qj ql d4 cg wt q5 z7 lr wf wu q7 sd yg yh g1 eg to el ih sw tf fg qx dv q0 wq qq uu px vl xi js jd ze la ud qy rr ky ft i5 em p8 p9 i8 hg im as jz tn qo ul wg 8d vs ap mq x6 no t3 ub wl tf iw rh ox ua pv ir us pb tx pw dg h1 uk ux cr sz ko wx jw vl rc tv af du ei +\n gm we cm jx lf vq vw kb wk e3 df r3 r4 ew yf ti id fe fr su xr sl jg rq rw uq tp ss qy ws od nv wg ro t7 ar th ak da yf sw io jt cq v9 kb iy u9 +\n qd ga q1 h8 xt um wt nq wy wp a2 rg w7 hm cf tj ut r3 ch oe r8 pa qb jb zw mm wq pl m2 wr wy mh hi ei qy nw uf yv s1 fx ut sa tb ss hl qu qi zp nf zd ar l5 5h gm vo ix xk wk wf vf el r0 sx e6 uo rj f3 em dd uh qj cf wz n9 ga tc qk mu rt ye w4 o4 ad ag +\n qa jr kz c3 c6 vp e0 ng wu ug ty uk tu to hr sp ud m0 ar pa qf wf kr fi ya kk wl xs ed mp x6 ub gu fh rj e9 ya om wl vj ha ex y0 id +\n qm q2 oh cd q7 kk ld ys yd rv yk id wt qy iz ri fi i5 ic e1 ht 5z iq ha ai sq pn al gh un kt wq mi dr ax u8 u9 gk ru ov hc ep +\n iy sa un h9 rf fi he uc u6 cd q6 wu zl zz rk lf yd rx d7 ef er rb d9 r9 im hu zv ps qb jf qm m8 qq ji g2 kt qq ew la xy qo es ft ik tl ye ur as tb m9 i8 qa ka qs bm zg ix ya kl t1 wj r9 oi um aw yo ie ys yf hg gq nh zc sb nw qf xm bc xr bj es rx w3 yj iq tm di gx o7 pi aj sp +\n il qf pd k6 h0 na is q8 4p zl jl z5 hm ec io sf dk if gd qw 1a ld lf qr yx re tq y9 pd iz yw sa wp bn jq w6 v3 x2 br ta yi ha en o1 io ip pr kp nl lt kd eu kf kn n8 zs rx ux +\n ih db gm jd wr zj xp vp qb c8 pc g7 uf uz p7 sh or xh xm wh mt no fh dh wv tk li qm vb ms if +\n he ql wi bn c1 rc ip ia av or y8 mx yr dx ex gz 1p ic wf aj kn 51 bj wn o6 +\n hb ty dv gu ps qj ls qz ch q8 zh xp bs vt rh oe ot pb y5 y6 fr ih sc q0 re zx lm id xp yy qr ry ay p6 he dq s4 ff qt sd vx jb qo qp gb ws wd sd co fp kg s1 nm rp cu 8l y2 tf ev sn au us fz hj qg wc u4 au qh wv bn eq r1 +\n uw vr eq rx et rb fa ek id qx ui kr wn uf p4 tl au hw tx im sf yd dz bo wb xw +\n uv yb ik qd gm gp k8 qk ao z6 ps mw zf jc eg a1 wa 7c zz rh yi lf pl r7 yh d8 g2 r0 tq su cz pl qe qe wr wv ku ho qt yv uj ij es ec ik yn ym uw tl sm he p8 fa ho wo gy ws zf bw nb 5q ql t1 ro rp ej xg uv el l8 rd wz rg go rh sv fh ya it pn hd ao az tc dr ac dy ot sj nd qz ok um ol sx xb wb wi n8 ji rz yr sr h6 et o3 ru rm pi +\n rs fi ag c3 lw ys ef sg qu qi uq eh e4 gy qt ya ro hx oa f5 1j qa cl wq rl yh pu +\n ub rd qd fi jl zk oq r8 y1 tp sl i2 qn sd cq 6d mj w3 p6 ta fm bo nv qi wh yj e0 ao uh kn h6 r2 +\n pa q1 fm c4 ig ex 2a yi mx ek ez dv jf qw qe 4s xt ld dh qq mg qr yc eh s4 hj yy s9 pv rr uj or qj cd wc ly x0 wv hh ye ew yh rb yk o5 tm +\n pp q3 mw rd up td j2 lv af ih hb ee xh yy ua ug aa tb +\n sa tr ds az qd fi dn hw qg dh qh nt z3 qz ad q7 q8 tf vu ue mx vp lg tz er yj to hy fr sw th qn hf gx jj pz wt lb cm m7 wi b7 vr lo yl qi ry ef sn uq ri fx oh i3 sd i4 ho vb wa qa ik uk ar hw l8 ya cw s4 wg r7 ot wk gu u1 fh th rk en sm u5 iy iu re pr hk qg kn gq cf h8 nj ct gp wb qg hj wm cy ok er tv u0 sy fq o6 gx eo sp ob +\n yv ak ra co wy zj e7 ew tl fo ek ez im q0 jm bj lc tc rm ec ou bn sd os x2 lh wj ot oi y6 e6 yp ob sq p0 js qh el bg rr rc xw pu +\n ih um k9 q4 ls jx ej om sf uh dz oi qx cl zm qw qr zc qe i2 i6 uu qp wp ws qd sd fj mx qk yn wj ub gu ar pn rr qg ln dl al vg mf w6 +\n tr ub ds jd gp qk jk d4 kv xo ws gi yo sj sl el tw i3 ow qe zx nx b4 qq ee eu uf uh ex p2 rr ea ry ef eb y0 en ri eh e1 oh fx fv sj jn xf qd vi l7 wg x7 r9 uv ek yt ns aw sx sc vf tk ud ds o2 pr kv ab gt v6 un qz wr wv rb os ie u4 rm zm rw n8 vc za q3 zu yy o3 yi ag pu +\n ij a4 uj gg jy dt 1w rj a6 r3 ii pe r0 ej ta ts ff i2 ho ov wq kw qt ot m7 qq xv ei lv kr yx yb ri fa ur de pj hi si jq wg r4 x5 hj oy 5i u3 tx tc nd v6 oz wc qv bz qb qj zl dg ed ka vh w3 yt ey w6 +\n o9 ft az ps hq uq a8 ql we wg z8 ye wk bf rs wa c6 dd ys rl wy om pe ix y3 g4 dz gf se tg pa va jn jj al qw sf ma j5 qy wu xo dc rn se eu xb nw qu qi p4 ef ru sm eh im ad gm jv pm zd g0 wg qf ai qz ym qc t8 op iw ox ay tc av k1 ko vj qb zo wq bg q2 n0 rl yt as rn uz +\n o0 jq qf he qh 7k q7 kb wp z5 tl ew yg et or ez jp g9 jv gk lx vk vw qa qo ou qg kk xz rx ro wc oa us ip x0 ku hp jr o3 +\n pp dc gi fi qf ql by we la za un qz q4 jc zh wp kg o9 qm bm wt r6 rw eg ix yl tp am a0 aw i2 fh th xq gc eq xx yr qt xt nw qu ri uq tl ue pj p0 hk vx uf jm kq ws jl 1s p6 ca he x2 wk wd r5 wg bi hk ro wj t5 sx fh sv e9 ya aj e0 pn ao ug ac kn h4 gq nj wr cu qs kf vx xw k1 og yt u0 yk di gx dp +\n ij tt ss dv a6 uz gp qk cv lq ql un kz gj wt 4y fq lh z0 6h w8 vp r5 ee tu sg pv y4 pu a0 tg q9 gx qq qw we la wx rr ls zy qu wi xz wi xp ew qe rn ei qo uj rt fy ik tr tk sn pf i5 tc sp s4 in gv i0 f1 si ks nl kw bq co mu eh oi ec sc wc u3 ga fk sm om kb wx bo zj hv y0 en og q6 er tv tb rb u0 w6 tm +\n rd ik og q3 q5 cn xp c4 ig mi e0 rc ym id tq ou po gg qb sy ob hf pk xr qr up j6 ng xx b0 qt tm eo vw ux yw pg tc e4 gy p4 xv pd wg n5 r3 wk iv rl ht oy uv ub wc ar t9 ga s0 em pw x0 pt bw wm vv vm yg fn ad af do +\n hb gg kl q5 t1 mi a4 b9 r4 ee up pr g8 gl q0 cc kr c9 vq yy wa qy mz ty yn yq ai og tx tn nd ws 1d ky x3 sz td gu t8 op gs tz de av sk cy zm be wv qk og uc +\n ph qj d2 cd q3 q4 bi wp vt oq y1 ps cw kn gz ij p4 sp e4 wa sx nj v1 w7 me s7 e9 tc k1 lw zc vj wb kb tw a1 aj +\n rd a5 hq qg qh q1 la cd kl mp k8 mj vy zz yu ut uo sg yz hi gk sy q0 m1 qw b5 wi dz qw rc aw eu zr uk ti em yr lk kw nz wg fx tt wg x7 rr lm ju th tt eq oc o6 yx ro o8 a1 +\n a2 dm wy ej rh rg pe a9 oi y7 zt vk ga yf pp fh ml tj p5 sn tk im jv v6 lb pf zq ty wh t5 go sv e8 it f7 ac p9 cu bw kg qk f7 w2 ee w5 tq ep +\n vs rg rj 70 ys nq uf ex hh jn kg ep e0 +\n df q3 u9 4j bn rj hw up td i1 dc hi zb wv g3 l1 rz qt qy ty tj ef eg sm tx ap in px af mx r3 r6 t2 t4 rd uo e0 iy ii hh qg zc v8 qc ch px zy zi ye og er tm if +\n gr pq se pp qe lq n1 cy qb wb ey +\n gn q1 ji sc nh pe yh qt ss rw hf kx zm o6 gx +\n ih hq ap bl wi wa 3y er pc eh r0 yl ta ts dl gg fg i3 hp kw lc ls rl ff wg qi uh uk yq yw ok as wp gc jm qf to yy fh e9 pn qk sz nm li q1 vb yq 2e rl tv sy di sp +\n o0 yb rf k6 qm fv q5 wi rp fe dd mz rz ee oe ia yz am ig hp fj su gl li we m2 ls xj md z0 uh yx eb eh ur i8 qy oe i5 jv ce jb jm lx ci kg oy hx e5 u2 tj kv qh qo af pv tz az vg yz ri ge +\n il z2 d2 cs ba wa 1o ys uy ed er d8 sh qx sr qb jd ov xq nu lv g1 ku pu rp qq ee et rm eu xh i5 p8 tx in i3 bn v3 ap r5 oy sl oo dr dg hj lm sk ff vu cu wb kd zi wv mu w3 yt ok rv ol ey yx ah +\n hv qs qf qm ph o2 zg wa a2 rl pz ow uo y2 ey ix us d0 ek fg ww j4 wv a7 wu qq bv te tr uj tp ue ts do qo cy uk j5 ra ou aq iq ev tj u5 tx gg df dg h4 qj nc wz v7 im kv jt y8 rz w2 yy ei ge +\n gt df jg og k0 lq kz zf iw kh gv cn ur ea eq yj ix id ez dc qc st wq c0 lm fa lj qq mj sw qe xb rq qo yn ru at e1 p9 s2 ts i8 s5 i3 sf jc xa hb qq gy wd p5 ow qf wj 1n wd qz yt ex e5 op at sm ud tz yf tc ax f7 dg qg ve sl qv tl wn 2h ky yv f8 rt o8 +\n yv dh kj jl wr bi kc 4j nt c5 lo z3 e8 on or g3 g8 uq sr qm hd ll c0 nh ws qu ug ph tc dy oq qo nj wn t7 ox zj qj km h6 ql zz qz qc hn bk fw ob +\n un qs fu k7 co je gp o1 bg dt vu rj mc rz rx r6 g3 ek qc uw fj li qw lx ku z9 br b6 mz yc ty tl yw yr e4 i3 ud ce i9 rf bq xh ep wf ej rp vu wl u1 os ay of pm ap gg dt hk ab ql lq qs lt nl u3 wq n7 bm ef xm yh w6 ru fq tw +\n qn ql zj rg ed hs we qw re p7 p8 yr tv pc lh gx i8 wp lz cp fv lq 1b di +\n rs q0 is q0 yq rl qq vr qa v3 tu in h7 zy u9 o7 +\n ak pw af z0 wa jh tf to ey r0 ta fe tf th wt m5 xu wu xo l1 b7 bv se w4 qi es dy ge vm l8 nv kh l0 j9 bi ez iw ux f0 gt zc 6w bf cx u8 w6 yo o6 fw +\n gb py gp pg ql um lf bx 6q ra w7 vu rj ui pj tl ii y1 r8 ac eg yl sj tp y3 py im tg zv ll ip wt av fq qw dc yu ei uf qu tw y9 em tl dq fx hk oq zu jx hb i9 bq iz mv wd qi pz lx ek aw fg ag u4 ir tk of pb az f7 qk wz le qs wy wn dd bj mp yq zb cj rc tm yo +\n o0 pa rd qf dn qg nm ji q6 cm wp ec uu ax r8 us aq se lt g1 bl vz jc mj 6h ul en yw e1 ye tc i0 do ge gn v2 r1 7f ed x8 rf oo t8 s9 u5 pb fc ug ab pt uc ce qz h8 u3 ga fl bc yq iq iw id o6 r2 te +\n da qf qn nm wr jw c2 ig rj vi ys pl ed ii ax y1 ua r9 ia ab tq an ek dl sl jo g8 qv gl hs jf d5 c7 kt ix wy wi lm qq dz ko qe ff pp qt eu wd yx ec p3 rt ty ik p6 pg i6 e3 sa fd dr gb jl as hz qo j9 j0 qs qd wf p7 br wd x5 hh t2 wl el rf tg ar rj tk em ud pn rw av iq nl qv x8 ov wq wb ec vm y0 w2 ew 5k w6 fw a1 +\n a4 a5 z1 hr qk q3 mq zf qc wu q8 bd a3 xf es yn rc et d8 pr or yl fa se dv dn uy vz wu en mb qa tj uq i6 pl du jx f1 xs qs qd vm kd wh kh mv ed rp ra co tg oa u2 ie ha ir da pr cq uv oy qb qb eo ye rz rc ei o6 id if do yc +\n a3 qs w0 po gx xo nh uj e1 lz os wj uv ud tx rr gq ve ch cs xz +\n rs rd qs qd hw dh q1 ql dy mj zz ws vy pj ea mv yf om uf sg up ix pb ab ej tq fr tg i3 nq dn gc wx wc qr wr lv t0 cm wy wu ko qe qr ze ug oa ed ym p5 os uq i5 tp pg s1 ok tb fb vx ns p1 wa qs ka qd lx ps sf zy kj pl ro rs rd wz tf ev gp e8 u3 ir od f5 dh qj cw qk h6 ql ad qb fj tl al zs h2 rl eq rx er w5 eu yz id pu hc te +\n pa pw q0 j4 4f vq iv yu ry fa e4 kw xm wj t3 ff ye w1 oj rv ul ep +\n mw di ec wt rx ko i4 qo l3 iq iw py yl +\n dx uv yv qa a3 ij ps qh cg qc 2n mc ut eq rz ea kc pz sj dl in db fk su qm qe m2 we 1q ke bn xy wb yq qq vr qw xc gf tn re oo qo p8 iv e2 tc e3 e4 fv ff pl sd qy qi si pm ws aa zw bu hu fn yj pv hs gh o2 dy ln v5 qv zb tk bq 4p qj zz wv 8n h1 y9 wm yw og mf u7 tq ov sp +\n iy wt gx nd t5 r0 yl tg dc qc th wt ld nd zn tc ke qu qo qp tk fa hh gn qp 5d qd ar rg oz fh at ag 2a kd nx w2 w5 +\n il wp ee ym tu ef dl el qb cl qn ob qe qr m6 mf xq i2 ud fn pc gw m9 l7 fs eh gi e7 fk ys pe ok 30 mf o8 uc +\n db k5 a7 je kj q3 we wt q6 ie ck kg kn gr lf pj io us id in aq jo qx su hd qm li qq jm we lc wc ld d8 lb xj em rn j4 tq oo eg fl sm he ue so hg ok dt qt px j7 qi nz l9 lj x8 wj na wk ez ex rg e7 u5 h4 kn ad cu oa qd do q4 eq w3 yy sy su r1 ri if yc uc +\n fr ij ft db dn gp qh ph ga gg kl ws wt ab q8 lk wp mi w9 tj pk yf ew us yz id gd y7 dc qx hp qv gl jn q0 d4 qw re qe cn cq pt wu lm dk cz yy qr rq te qi rr ea ex ye dq ok qt sg qa nk wh kt bt bd lh wf yb hu mw el rd rg sb em on qg wq oj zx wv n4 bo qf bf re wb ev yq cl yu w5 ad ag id a1 gc +\n qd ga dt ej kn r5 ax sg ix av am pu g5 ez fe qm nu ii zt wr vm qt jd im rb ml yx yv ru hr gv ad dw x4 ub hx wz rd ol oo rg yf f6 ii uk dl wl yw yu ie id +\n yv ga z3 kk pp nq le lr bp qc t1 c2 hc vt lw jl w9 ur r3 ys iu es rc ud yj r9 pb ot ta sk gj jm gx xg sf lc qr ht ve sl g7 qy nw vw fo ta tb oq fn qu hc qp op nz ow wk zo zq yg wg ke yt kr yy sb tk rk tz iu o1 rr pr qh dl dz p0 n2 ci cd vn ms aa yh ry w6 af du gc +\n q1 q2 mw bz k6 xf ec r4 rx yg ed d8 pv is sj qc q9 zw vf gc cv d0 xx rn ex uj ij ts ff ge i8 zg 4m x5 vh oy yy y5 wc tg at fk ob fv f9 ql bo k4 zn be ww ea ry tv w4 ru ov +\n tu g2 tq od pk tm fi y1 uf ku wn ew +\n hn rd um qg qh ph aa zn bc gv w8 rj ea es ui r7 r8 ey fw gh jf qn nu cc la 3u bx ve po et ei eo ea qp ul i7 uu fb dt pz qt m0 zf l8 kl t1 ej wj oy rf th rk gs sm em ap hg o2 ub wx ka hd br q2 hg y8 2e eq tb an oc it ep +\n tt um gi qh lp kl lw q4 q6 ro b1 if y6 qc th g0 q9 qm j2 we xt xi nd is ng bc ku yw sm ye dw e2 f1 lg qo wp zs gy l4 ul lv w9 br xl ql vf as yg y3 t5 wc ec iy hf ii f6 re hk qz jp oo qx xv v0 f2 vx cd vb yq ew zu yx +\n lo dt mt z3 rg av us pa xq wq qe qt yx y9 ev ry tk hu oe oi r1 x7 wk wz td jy ww qc 15 ba hd mo 72 +\n rs wi vt rh we jl ur tz tw ht y8 fh i3 qb qm b3 qy ep op yn tu ay hw fd ug qp qd x7 rs yy td u1 t9 sm uh dz ql 4t rw kn wn rc eo +\n a4 d3 kk q5 q6 wf wg m4 2b vt w7 ur uo rw pc g2 sl if y8 fj va rr ld wn xo qw rn ml la qu ep re rr qo pd eg og e1 i8 ui qt px i4 jc gq oe jj ws ks qd ul zh sm ql tf go e8 os tk rk ay us u6 dw dh pr qh qj oy lm jo cf ff wm br k1 en og aa rb yj o4 te +\n gb dd rd qd a6 qj wt jx z7 xi q7 kv mv uy pk or yk ek el y5 ez aq dx qx th sy jn pe vl fp xz m8 ng jh kq qr la ei ft p9 oh hj tn ho ud xs wa jq vn il zg p7 fp ic qk wd bu e7 go hd sq ug hh ip sl ch qc px wb hs rq qh bk rl ef yw dj yt u9 st yi uz uc a1 +\n al tt pq um uq un z6 wa vu w8 ed sj r0 tq pu dz qx js 2j na ip vz lm qq qe yj ud ei wf qi te p1 tu il tk iv dy i0 xg sf ix gp fk ai qk lm ql ch fg qv vg yw rl yy rv rm pu uc +\n rd ik qd jd ph gs k0 q3 qz ia q6 af q7 6w o8 tg gi sj ou aq po ja qn q0 qq gc nh xt wr fs m8 qw aq wp ho qu rm uh tr qp tj ay fi ao i8 aa i9 tv gv qu wo lj vv wa jm qa qd uk nl g0 qd dn gw ic kj nn wf wg dy ej rp y3 rs yy e6 wc oz fh eb e0 of hd yf uf ab gw qc ww yc cd ji y7 n0 wn cg u0 rn yp ie a1 dp o8 +\n ih h9 qd xy yl ez g9 lt qm on vg rz dx qa kt p1 ex yx od uz e3 in sd oi qa cq dr me e8 ua ya dk wx bw 4p vx tn o7 pu +\n ij rf fu jt gs ld wu q7 wo 2u bd k8 rg ws gi oe yl if sq hi dn jn qw bb cb wt lm sq vr qe wd qi qo tk uc hp i0 pa w6 fo v4 1m x3 t7 u5 sq ai hg ap hm ju fl tz wv w4 ry tn yl fm gl ox +\n ql db ch wu rl ih qc 25 pb qq ty yw fp ao qy sp p1 qa rf iu rw qg uk gq km dz wv at qb jo eq ur rb ad +\n ra fy gm iz qk qz lf bp kc is nr ws mb es tl d6 up pe ey sj fq ek iv pn sx ly qb jd jn q0 lp m2 xl fs xx rs b8 hu rn ey qo yn vr yq ti ai e1 so ts jz du do oi sa i0 oq ow 3r 1a wg qg r4 yv ty r8 wk wl s7 u4 of e0 gd pm yf h1 qg n1 lt sv qb eu wi mt ez w2 yk su dp +\n rd qf dz aa bo pd pm qw rn tf ah wu +\n dv jd he jt ao ql zg eq pz oe dj fw sc fj nt io fi lr xu ns mx qe qt ei rq qy yw em e1 i8 sp in jv wo ci fa x6 fn ej wv oi um yi oc au yf mt te az yv yi ad yo yz +\n uv hv ty qd nv jh qj q1 ql we fv vj q7 gw fe wl vu w8 vp 6k yd r7 yj ia ey ot g6 dx tf im hu ae qx fh g9 lt fk hs su ov lo m1 cn t9 wi ki qq qw xx aw nw es yb vw yn ry pd ix em so ut oj du f2 lj qp jm w5 xh ny wg 13 wc qc ek el sx oo th uo yo at pv hn uh hj qj zk ql lw zx qv cc wv en yr w3 yy st uy iq ox an ah +\n rs pa df il iz qm k9 bu jz bi ji du lo ts yy xf cv gu mc ii rc up d8 ey pv av to yz y5 fd qx sc qv jv qr xt bk m6 ot md qq qt rq ex p2 yn sb xc fp pj qt ce wp i8 j0 wa qd gn ps qf be qt wh ky l9 za wg rp wn fh rh em vn dh vw ng wz k2 h8 f1 nz zh yz q5 zy e1 yh ad tw ep te +\n db wr a1 pk ew uu r7 d0 pn fe g8 la qq b0 ef os oh pk fm wa wk yi ev ua sq wu n5 tw xc er +\n fr yb qa hm d2 q2 o1 oj ox dm oc km kj r5 px rb et r9 y7 vs pj q0 4s lm gs qe eu ep ti tl of jq od pf nr nb ea ej yy rh u2 iu qh dk zl qv xe 2k vc vb zb xn yg gk ox tw +\n hb dc rd fu gu zd js wu xa c5 a5 w9 vo r5 d0 g3 oy ib i3 ha jm nu rr wy m8 b9 ws qu ru eg uz hw eh hg sp sf do jb zp dp nz wj wv fg ae ah ob hd dd gh dz qz xc s5 vi je n4 re bh ma wm as st tn yl +\n wi w7 om ug qa x4 yn r7 gi re n3 n7 +\n ij pa qd qg uw qm d4 z4 q8 t3 mu yp uf ia pm ez ih qn jm nu kw qr j6 qw bb yu qs rw wf re qi ru jz tm gw lh ce xa wa xs bn nl ys t1 wk r9 gu oc u4 hd ku hp yg rv as yz ro dp +\n qa ss dc gu qk cs cv fv nm z7 lf z8 xs a3 rk r5 ys eg y1 po qv cl jf xq vh we wr qr b3 bv yr wf re qo sv tj ti eg dq ic fx jz du qp vb ws nl wd wj zq vg t2 zf wb tf yu ex yi yp tj en ua ud dr pe qk dl lq qz h7 ol v8 cy vi wv ck el gd q2 yw w1 ye xq w5 gk o6 ob +\n k5 hr jk ju k3 jq q6 zg wi id bb wa gb rf rq g1 et ot pn ht dc ww c7 we c8 cm xo mg w1 1l yx tr p2 oo lm ry ao e2 i9 e4 hi tn di lv cp ca 2u t2 no ub ex rk ys pw qg av py ql qo lb pn eo wb er tb yk ie id r2 tw o8 +\n ra qa qd ph jh d2 dx d4 2z jl q5 ld cm wu wi 2t wa dd lg ui to id in uq ww rr g2 wu rl qe 1l qi qo ec yn ed uw p8 ut sj ig p4 zh xm p8 vs vg x7 ot cu l6 sx gu yp t0 gs az pe nf wl qz nj lr cy wr qv tj s7 u2 ly be br ym w2 af ri it ob +\n pp uv nb bu kz wi ah z3 c6 rg la vi oe ia ot pm dv gk 2h xq kq xg bv qr b9 j2 ec od ay p8 qi wp zd ay kg ea mq 6b qc un fh tl u5 av ub ji zx k2 wc zy 1x kc ah rw vc wv yq zv e2 rp +\n a2 iy sa ft pp un qn qz ol lf mg wo fr vu ya rk w0 pj pl el dz i3 jd su ob c8 pb id b9 ep yn ru tk s3 sh sj xa l3 wa nj ke kr ic xl bd ej rg yo f3 al f5 sw re uh h2 av cq bo vk kf bd mu wq wm ew ue tv ol tb o3 ul ov +\n iy a5 gu q2 se ls dt zf o4 dm ez jj uu ik ue w0 ya ea on ui tu rv y1 et r9 tq y5 ht dc fg i2 vs q0 av iv ku in il en ri p7 uc e2 ut sp fv qt gn f2 wo qa op v7 ws l6 wh ys zq t2 wc y3 sx yi t9 t0 ys of rq ug o2 av kn h5 ju ji ko v0 nz wn kf te dw u8 yt fn r1 ie yc it +\n qj lw ji eq oe g7 jf jc yr qo v7 p7 wd ma xg wz qb u7 w3 +\n un ol eh g5 px b8 rr og gn mx yf wv sl on jo uz +\n qg qm q5 wy eg ri bm mz d5 rx pt ek fs pi td ez ho gh q0 ll pl kq wr d0 l1 qq ko er qt wf ei p2 ru uq ye tx s4 hc zd vn ps ix zo wk t4 y3 xh ez rf u3 up ys ou xx zv qa wb at rm eu qj wv za zs eq zy rv ry tn tq yc ob +\n ss qa pp rd jf a7 lp h8 um kc q7 wl rg r3 w0 wy tl a0 ih ly qm qq m1 xg qr up ja b8 yh dc lx rw ep ea ev ay ux to p7 tp tb i3 qu gq do gr za l8 rj og oy ub e5 ae tg t0 sq tx hj ad gs it vc bh yb eb w2 yg u9 w5 fn iw si di ah hc +\n ub o0 ik ps qd q1 ga lp cf kl m3 z7 q8 ue ee ud ix g5 ib gd aq fh th pa qc pg ue ur xw ww qr vj m5 jd ng in tx ff xc er qs eo qi p2 ky pd eg e1 yr ut ib oj tb hi hk ho gm qu qi hc ou gn wg sn wh ix wj wj wg ot ra wl un e5 rg s7 t0 oc sm tz hf fx pw x0 wz th qv 1z zt n4 qb cl wn xq xr y8 rt y9 rz tb iw vb +\n qn lq bu eg iw wi 2u 3q t6 k0 yd sd ed rb eh ek yz if pu y6 in fr qc qm ob il ma b4 en wu dk nh b8 qe bb mj ws qt ho yl ug qi ea tw uh p5 eg tl yr i9 pl lh ce i7 wp qa xs dn 7p kr p7 eo vs mb pk ni yh ef rp wj ej y2 iq y6 u5 em e0 ii md jy lw nj fh bq pc xm km q2 wv k1 rx u7 ut et gj tb iw gx fw yx +\n qa pw k6 qn qg qh as u0 dy q7 wp hv 4z 4c w0 d7 et aw wr bl mx md j6 an wi qt lc yx ec tj ri fx ht in gm ua qo qa ik ys eq n7 wh rs wz wx e7 eb ak gg ip sj py ka rl su ag +\n gb uz q2 qz wr q4 z7 ia ad je am my vi zc mx ym r7 yk ua pt g6 hy y7 sx ih qx pa hp jd sy gk nh no qq yg rc 3s qy ep p2 yb oa tr eb p5 en ic yr dw tc in hk qt zp i8 lz ks ci lg x3 wd xa x5 zf yt y5 op u1 oa iw fh oc rk ay pb pw uh qg zj qh h5 nf cd nv qx kp qs qb 6q cl kh xe u7 ew tv sr as rt o4 ey tn is +\n hv qj bo ru z9 t3 lj q9 rg vi rj sd r8 g2 sj yl aq fe po pa qv jf dm qq re we la wt wu qq vt gj ei yz rr xk uk pf so pk im zu ua sg j8 sk zd sd xz zw kl wk ol um yi rh sv u5 pb tz dl oi wz h7 s8 qf wn cx f4 mo wb ed oh ee er ry eu ei oc fw +\n hn a7 cv q6 cj q8 fs jv rl qq qi od dt l5 co qr zq ex u2 ah on pr wx kp wb yh gx +\n gv qg je zg jc q8 fr r6 yn ii g1 pe sj ta el jo sr jv ni jj zr bj ns qr qi ur hz vu wh cs ep s3 hu ez rh u2 t0 dw uj oi wx n5 18 bf wb yq oh ov +\n gb dc um jr mn we bl t6 vi pj c4 d7 rb ia yz tf qn dm ke xb ft au ix tv xd qq xg rx x6 vg r8 wz op h3 qj qx lr xv qc kc wp lq ea rn rm ri eo yx +\n ra gt qs dv ik gn co qg qm qj cb qz z6 wt ji q6 dy qc b4 ws ds vu cf on yg d8 eh py hu tg qc hp qn d3 wq c9 pv pr or qq ml eo ug tw es il os fi uw to em ic oj ho px wo qp m0 qa 1o ks 7o cp wh wk wg x5 ee yn bi ef wj ns r0 ez um u1 iw eb ir fk ov s0 fl hn h1 pr x0 ux cd wz aq jp im k4 qv bp wm n0 vm u7 w4 gj tm uz te a1 +\n gv il ps db he nb wr ql kc zh tf mp lw ab us pn a0 tg pa th ps hf wc 1a yw l1 fs eq wp qr rw yv tr eh so i0 qf wf l7 wg na ou ah ay f4 io ip f8 cd h7 nj rn wb qb qn wp oj w3 w6 di id pu eo +\n hm fu pd qk bi wf q6 wu b2 q7 q8 oc lj c3 o7 6s a1 jh rg rj 2s z7 ya id ez fr gh vl cl zq hd jh xh ru c0 bz wu dl qw km kp b9 rn eu yc yc p4 ru tk ux fo ue p9 iv tv s5 do l4 cu rg w6 os fi 4b uz l7 ld l8 fx jb ee wx rp ek tg e8 uf de qh hz h4 qj gq nb wx qc sv go wm zi zo tc 3k ez ec rz ye oh ck w2 sy ia gk rm ei si dp +\n gi go z5 qz wj mg kl yh g5 y6 g9 xt p6 eh ap sa qu dw j8 ql yg aw t7 ir zj v5 v7 ba tw yq cz gc +\n ps qn z3 sw gs q4 ie gx ye wz r3 us ef d9 pb y6 tg y8 qb gc ww az c8 cb lv wy a9 qq qw l2 c8 qu uf yx qo ic de ut e4 uu tb fn oe dp wa uj bq sg mx lv v3 ya xk wd by n7 ra cp gu va yo u2 sv rk ir ya hf kc kp bo qb gp qb yc ku q3 dj o3 ey ad si o7 tw ge uc +\n dx yv ij pw a8 qm ph k9 dz q1 q3 cn wo wp my el bb uo on eh id yz am fe hy sw ha m8 vg wt vl wm qq w3 ls gj yx eo ef en ta e3 i3 zu hl m0 wd co zy l9 nn ea yj e5 rg gi fg gp u4 ir tl tz pm dd kc p9 zx sx qc qv kf ln on qm lw vn vm ew yg se as is di ro gc +\n al tt gu qf qj xo q8 c4 ws e5 ur vp ea rz g3 fw sx th db kq wt sv tb ad hv 1u gt ss xk wj qj pk rp e7 ha fk f6 dr rr hk dk nf qo lr ka ie fk cz yz q3 ym ks gl +\n gb q1 qk we q3 q4 t1 ox di ny wa ws gi ea rx yg r6 io ow y1 d8 ey ab g3 is ek pu ez dx qx th i3 jv fk io xh wt oe kr nd md pb vz wi ro se b9 tr yb p4 i5 p7 ux fp p8 sp in ok hj qy hz wa qq zd qd 3t wj aa dy 5u el yi uo go t0 u5 tl dq gd rw uf gg kb ux dj qj go wb zm lu tx vc es ev ry zt w2 tp w5 tn o6 +\n ra ps hm qf qg 4q we ql q4 z6 d1 wp vt xs tg 2s e7 r3 ys oq ef c4 av dj pn aw sr th hf gx uy wr ac zv m6 wn ko c5 qt qy yl yc uh od ri uq fz dq i7 tx fc aa uu qt oe i5 ge ce wa gb vn xg wh og ya xk fs ea yf zw wh ub 8x th iw rj ah ya e9 tl yd tx yf ii fc kx hl zj or qj mh ww kf zm lb ob qn wp ww wn ym u7 rv ie pu +\n uj by wo ml dl qx m3 8i 2y r1 u4 hj h6 qa xv rn rm +\n qa gm qh ql u9 ls e3 yk fa ts wr vj ac en id ud ke ye i7 fn tn f1 ks at me l8 kl hk lx rp ek l6 ek oi e6 wc u4 2h pn 9y zq qk ec cf yq oj vz tb o4 iw ox gx te +\n gt ub hn qd qf hq dh q1 lp qk by ql lq we wr sy wy lh z0 ge k9 w8 th vp pz yg ti fo tp r0 g4 yz ig sx im i1 jv qn q0 wq nu 5t pw wm id qe gg qt xg wh en uc tz pj e4 tv i0 ff qt gm dp jb qp cr gr qa nk ws os wf ne wd mm wf t1 vu wz t8 e7 t0 od hs dq df av km v5 kl we fg cx al ax yq y9 rx yh ul hc +\n we cf q4 cj bf ws ww yd tk ef ek y8 qv fk wt ko qe ep rt ik ut op mr j0 ej t3 s8 ir pt qk km ww cg wc gi lu n6 yr rc oc +\n ak ft gy rd hn a6 uq q2 q4 lr ia eg d1 eb on sj dj pn pp qv i4 hs gx ww xj m8 ko im rt fi tc uy tb pl qy pc uf kf kt mv l0 qj x7 oi um tf ap uk wl ql zb vj wv tk re y7 de q4 rc ad rm ul is fq yx r2 +\n dx gb he dl k9 z3 qk lf ad ch js o5 vq zl rj wr th r4 tu uo r8 fp ic g4 fs g6 im fr wq bg no wt dg ru ln rp wi yd qq xz ew i1 kq qt rq wg sb pj hk qu vx oi jm pa vi x5 wf ni ro ot oy di un y6 yo rh sb us tz ac f8 av ve h5 ji mj n2 ci rm yx ep rt 5f yq u9 rb hx aj +\n ih a3 pi mt do w7 zc nh qe wv rs xc qr ts ut pj im hp xa lv x3 ph tt sc od rr qh km oc rq xl vv jp ef st tw +\n ft ik a7 lp jz jx k4 hz wo bv q9 6t ur rl qx qv js dm fw wd re ea fd hu jc qu zo p3 lx pf wk vf fj 2o wx sb gs it ol yp di eo ro +\n tr ih yv h9 k5 qj qk nb wy gk q8 c6 mj a5 yi ur rl uy eq up yj r8 xs sh a0 ez oi y8 ly lp lx fu il ke zy cj sk xq 7u ey p2 uh yv qp rt y9 eg pf so ph tx tc uy e4 tn j0 gy ik vm ul mt nm mm x6 wj rp eh sb u4 ov of tc jt pt qj jy k1 s5 qs 7z do zi cx wq wb ma wm uw sr w5 o3 o5 su r1 yx fe +\n h9 gm cp z2 fb zd gk ve o7 mt bc wp bd rg w8 rk kx mv uu rb or yk eh r0 y5 ht pu tf se ar fj hp su m2 cb c9 c0 b3 ns qq qw rv gg ij y9 oa od of pg i7 hk do pm 2n sj wa vm zw vg yy om qx nj v9 f3 ee w5 w6 iw sp ep +\n da jq iz z1 ls z5 cg nt zk 1i gt w7 yi r3 xi yn pc fa ta ez in i2 qc uw si qw d5 kw il b4 fa ib 6c ud rq yl wg tr qp p4 ry sm ut s5 hi qi do j7 jn j0 qs iz p7 wg aq ex go ax ku nc h8 n3 v0 oc ah wm li zp rl h5 is eu o6 +\n gr sa a4 ik dm gp q1 wy m5 fw a2 t6 rj mx e9 et ej q0 ot wu em fa mj qe yg gh j2 te tj p5 eg tk em ao i7 di lh ce m9 wa wf ys eq l1 t3 ej el tf t9 rk u4 ay rw gg dr hj ac qg zk h6 dz ok zy kc mr fz iu hk yj oz ey ag id r2 ov +\n ds fu ps vo qf qn pf a8 ph q4 cb le cn h0 jg gy b9 rk r3 pj yd oe ht pu ig ez hu q0 qm nu ww qw ow xh b4 is a0 qq hu bb cn xh oo qo y0 fz ue e2 yr fa pj in sa hj ui sp nj zg wj ge wk xg ra ex vs oz eb pv tl iu x0 ln cq xx iq s7 wv qs zn tl wn lr yr r2 +\n az qf fi qn u5 we jq zh wh c4 sd is y5 po oq ki sz qe rm qy yv p4 ye tb ho 1u gq r3 pl td ov u4 hg ax zj wz wb vl vv se 5k eo +\n tt gi pd jk lq q6 wi gx mj ut ax av g7 qv zm lo 5y dh cw xo ve vy xg yv iv i0 qq xg jv l2 yi sc ga pv pn iu ug gy k3 cl oj tv yl di +\n qk se pd gt rz uu d6 io d7 tq gf em ym tu ib oe v3 si wa nm wf qf wg rk kz yd hl wx cy bp mx et +\n uv yb dd k9 ph q4 me o6 nr xa mu ld r5 rb g2 or fe pa fj hp db lu qn nr j2 bk kt xl rn wf qp tl uc dq i7 tx fv ar sf xm mx x1 zw yh lz mr t6 l9 s8 ah u6 x7 yw ol tq eo gc pi +\n ia ys jh wy sb i0 cp u3 ql kl kh +\n po al qg qm d2 gs q2 ap qz q4 q7 kv ah o8 rs 2o ex zx qw z5 r4 r8 y1 is ts y8 qc dm ll we wr sg lb jx wu jv qe ee qt qy es ed ym hw to tx hr s2 oh dt dy af di do qo oo qa w5 uz wh kh x1 t1 hk no r7 rp na sl ej op ev tj eb sb ya pb u5 tx ds o1 hz v6 lw jp k4 wv do wn aj bc wn yw to w4 as ey yk is ig rp o8 +\n rs ty q1 wt wy xp e3 wa yd d7 ht ts sz fk su kw xg bw cw qw oo fu od ix sd zu jn qd ci fi xh mo cp ev th ua e0 em kc lm cu u3 n8 xr yq ti yj fm yx tw +\n dx z2 ga kb yo sf sk fd gf i2 vs qw vj vw qe eu mz tu sp xs qs es t0 eb ak uh hl n1 v9 wv kd o8 rp +\n ds qn qj qz cb kz bo wi o6 z9 fq wq ml cv cb lf eq r6 r8 ic y4 am sz sx po jp g8 ze we wu en ew qw 3q lz kw tt ty ti e1 fx ut tc uu s6 ow gm sh qp vn l4 uj op xn wh qk wz rc wh uv um ar e7 uf az uh py h5 lq vt nz lu li lm dd rl er rt yu o4 eu yz if +\n iz ld me z1 y2 dj ar qb b4 l1 mz ij ry to ad xs sd wf eo hj wl ex ie u5 pr zj gt oi wc kg my ex zt ks yg eu aj +\n gr iy ft pq um qj dz wt gj cn ru kx q8 ws ue rk eb ee fo jb jf la ji ke qq qi qw rq yb qp il eb y0 iv ff zp l3 xm fi x4 r5 xd r0 ol wc t8 ae iw ox fk of pb qj ku xc ct wc ie xn zi wm rz w2 tb u0 su pi +\n a3 ss je un zf vq zg wo mx d7 pm gd vn eu yk tq ik fu ai qt qf j0 to yy at ii qk wz lw n7 ly +\n ub dz rv qt rm wg ea pc j9 qa mr h8 +\n a4 wp td ur px qq ki yx go wc tm an +\n po gn rf a5 uw qn q1 nn we is z8 wj ca t5 ij eb tz ef pr ix g3 ek ta a0 y6 sq pa wq cx kq qw we rt c0 mz pv py wi cw mj qt qu 0e oa tj ux pf to hr ao tx yr ts fd fv s5 ui qu j7 gw ug ss cy ks qf xm fk wg vp kt mv qj mn lk vh yt ol rf th os e8 tj ua rk on pq dg kv km wq kp ad bp os bw pb qh wp zs q6 u0 gj yu o5 if a1 +\n a6 he pu vd cd q4 jz z6 qc jc bz eh wi b1 ed ym eg fo us ib td y8 gj zm pk lc qr wu mh qw sw fi ue j0 xm kz y2 ev aj df h3 qk qc tx rr rl rc ut ad so ro tw +\n a2 pa we kk eg q7 lh zj wp 4z gi yd yg rc io ix r9 jb xe rr iz jd ij tz p9 qi l4 pa g9 s4 vi tj pb hd f4 qh qk lq qs hn ro +\n gn k9 qz aw wu ki yf e2 pk v8 xk wg 5y t3 sl u4 ya gd hn ql zr oj ig +\n fr qn qh ph k0 q2 nq wi zz rh rx ee ef uo d7 ix el fh qv dm vg px wi m8 qq gh ud qy ec fu yw uw sa tb lg us sk wf 5h qf sh vp wk qk zw qz wg aq of ys ak al f5 re f8 pr ku qx wc u1 lr qs wb f4 cw k2 ka hk mf yr w3 ro ir +\n gr qh ql wh kv e4 r4 oy lu qw pb et uj tb tn xs kr dt td t8 e8 uc xn eq yi af +\n q5 dt ed y1 am qv ut gx m7 yt rr yq yr dy sh mt wd wm th bv ym +\n tr qs ca lp uv q3 wu o5 c1 rx om ee er ta ou i1 jb rt ry os ti fc ss px jn gy jz vp ea tg ay rq u6 al de qc zt wn ez rz eq aa rm ox +\n uv ds h9 fy rf jq he qh h8 d4 wr wf du ck wi km yp ut rv io g1 rb av y4 tw a0 hy sz qx gh ha q9 qw ze 1a bz bx bv qw po ee wa qi xk ri i6 ic he tm hl sj jb qp wp jk qr kf sm l8 be x3 qu ql t1 x6 yy fg rj ua ug qg kv k1 wl v7 xx bj ae wc n3 zy tl tz zk wq re n0 yw oh yr oz eu fm do ux uc +\n hb pa h0 pg q3 mw q6 mg ls lh am sc gz al j2 wt t9 lm qe j2 rm re rr ry fl yw ux i0 2e eo bt vh ra ys sm pb on tx re ff wn wv tu rt ox ul ge +\n al fw zb p6 hi qy ay ou rg sx ag rz uy +\n bx wi kv t5 3w e7 sh ht ff nu la xo qi s3 uy jb 1o vm vi l6 be x3 ny pk aw u1 rk fx km qc be rw yn ey eo ro +\n dn q3 jf w0 e0 lh rv zv js j2 xg ld hr qe mk s3 dr kw kc dh h2 ql cg zv n3 ym yt aa as +\n ft ty qh pi d3 qz ip wu wi q8 wj a1 mg mj ut wt r3 om ua y3 ou fd dc zw lp xe cn dh ng qe kp qt mz xy ef ay od tz p8 i0 hu hz qo kw jw qf kf 3y v3 qj w0 ib ew t4 fg oc e9 ua ov hs u6 f9 h6 vj qv li wq iu yv xv rc w6 rm r2 +\n d2 d4 av jc si rs ut 5q pa mm s4 e5 tc km +\n qa uk uw qh vd d3 q4 xo wo c3 wl wa w7 w9 mc r4 y2 fp r0 tw fr g8 ae qn lp sd bk en vr gd hu j1 xv xg rw ep wh ed fu ul eb fl fz i7 ht jx ns v3 ll zs j0 op xf qf l6 l7 sn wk zw wg ej ti wb wz t6 oz rh rj uf je av dj h6 ql wl oi im v8 zr qv wn ku wb bj ef o4 yl r1 ei so if uc +\n k7 qg q5 kx oz mu wl ws rh b2 rk yh qn qe 6o 1y mj ei pf ye e1 dw hj hx do qo gc rh v2 zw x5 t1 t3 yu th e9 em au qh f0 qk km ql kp wc 5p vx bg ea ev wn wm w2 rx o3 yk ru +\n a2 gr qa az dd gm d1 k9 hr we bz lg mg ny wp xd mp yo pj uy xs ua pt g4 tw ez jv q9 qn wr nd md nf qq ng pi bb j2 eu yl ij ty sb os eh hw i6 hg m9 nj wa qs w5 qf nz zh hw wh be zo fs rz me yk y3 ub t7 t8 u2 u3 en ha fl yd hf qh qk wc 1x ze w1 oj ee w3 ey du uz id ah pi +\n o0 hn qz q5 du 2b bn a4 ex rj y1 dj yz y5 ig tf th js qv 5e gc j3 ls d8 yw bc cz tx mb wf ij ty ai as hi lh l3 qd n1 7o wf wh qh wg ot ra l6 el e5 s8 dg vm 3f 7o wn yw gj tb et +\n fu a5 cp ch hx hc rd eh tg g4 li sw sf il eg eb zj 2o cg ew uc +\n qs uc rp ml eb yd if pa c7 oq vk wu ot yq rl uz tv gb zu vm w0 sc tf ud qk wm ko yy er tw +\n ak tt ub pa pq il jq q1 hr k0 uv ql q3 kk zs q5 z8 is lh q8 w7 qw es ii av yz dl ht td g6 vs jm zr px bj no g2 g7 la c7 xt mz yx tt ym os to s1 ur ta s3 gv pl qy pc qi sh jv j8 gr l3 bi oa wd fy v1 s1 zp hg 5e t1 rp wj ms wl t5 el wm at fl e0 sq h3 dl qz ka ox tj qb wb wq re xr rx em ee yu ri do uc a1 rp +\n uv rs un qs um il ul q2 jy kl wo a1 rj tj pk ys r5 yn uo oe y4 ou y5 ar zb g0 qn gx zt lb 2v rm qy nw yz lm eg og i7 ht ss qy qi ge wf bw lv lb wh cs wk hf 7g yb zw hk ns ol rh th yo f4 rq dt uj qo fd wx nk rv ka fl mu rr q3 w2 oj ee rt w6 ru ul +\n qs hq qf qn d1 k0 q2 kk o5 na si bv mx e9 tz d8 tq dl ez qv jf zw ww wu xo b8 w3 i2 te p3 ry iz s1 ut as s5 hk wo wa l6 wh bq xk wj wd ra gu yi u1 t0 ak ai tc ax ip uj nf zb wv bd wo fz qm qj pe md ew rv gj ol yk tn iq yl is si ie r2 +\n d3 ni wr ws li mj ds sh sl qx vs rp ft ik e1 sd af ho xn wg zh 6b rp eb f3 u5 uf df py k1 wz vk vx k2 dg wm er rv rb +\n gv iz qz wo o7 k0 oq ti r9 us ib ps g0 jm jb tq ue iv pj sa cr kh t1 ot wc at e9 ys o2 ab qj ww za rn yi +\n ty qn qh nt ql la q3 kl wt q5 mp mg o9 ls lh g2 id ez sw hu qb cx nu pl kw wt vz v2 1t wi mh qr eu rm qs xb ei ij uj yb sn ai iv pj oj de hy gv ka lz pa nx wg wj kj cw zp wk eq wj t5 ns rf e6 rh ev t9 eb ir e0 sm gs gd ds f5 dd de fc f8 x0 lm qz wz xc wc kx cu wv ks lv kv wn pv ei wm ju ww yv zy yt e2 rb w6 oc o6 tq ig +\n q2 o2 gj q6 zh mg li mo vo ch tl ax ip ho wt ln ro wo qr tr tt os fo e1 de hg gb sk w5 fp kg mm l6 yi u3 fl hs u6 fx re rr dl wb jr el rw pm rt to rx w4 gx +\n yv a3 qa uz k9 q2 o2 bp ny zl mj vo tk rx ui g5 pu qx nr xt ls lm rv qs yv ik fz tv wp nk gn qh qi yf ek e5 pb au tc ac kc br qz 4t qc mx ly wm kb ez w2 u8 ei o8 +\n rf vo q1 o2 wt q5 oc 5l a2 es oe sc cx wq qw ky em tx rm p2 qo ft ed uq fs i9 ok lz v3 au xj v4 wf l3 eg ej ex wl rv qv ak mi hm cj yr w4 si +\n a2 tr qa fy qd qh oh kk lq dy bz oc jf wa k0 ip pm po qx wq cx we wr bj j5 yq qe gg rq rr oo ru od ix qt af or sj qo jn gr sk wd nc xo xl wk hh yf eh ek aq ex ar en tl ys rq pm ug ql qz wb wn yw og xw an if it +\n ak kk wu ig df w0 rb y4 fr gg i2 qb qw yf j1 ij ue s2 qt ad i5 yp ai l9 wk km ql zn yb ee rb ir te +\n hb q2 ld wt q7 qm km ws w7 vi iu yf rc g1 pr ot a9 pn dk ib qx hi qc jd jg hd q0 lo jj cn wb mz ec qp uj y9 tu yq au tp hg pl jx vx j7 wd 3y ca au rq kv qg dh k2 ok cf qa wv bp dw iu de k2 rt hj wm rc er o3 ey fn si if so +\n hq q1 qj d3 ws as ld mu rj ut d8 ey ou ib ez gf y7 qx qn vz qm vd zw ww d8 xu v1 av b6 mg gs bb g8 rm qy yv rr ry oa tb dt jb qa j0 qs l5 nz qg wj t4 td t6 eb ua s0 pn ii ac x9 qj k4 wc v9 s7 cj zy wo 10 hn yq vz u0 fm uz ux +\n ra pp qd d2 vg wj qn rh we ht st jm bv wu wi qy y9 de gw wa sb uz r1 qu pz ot td rg go e8 sn iy zr wc s0 ww ea pw ac w1 h4 w6 rp +\n fr uv pa jt qk q4 ls wu wi mt xa vu tk ed rb pe fp am sr hp qv m1 gc en yw qq qt ud ey eo p3 tj tu en ix ux ye tp ic s3 ad hz qi uf qa kr wh vd lk yn 5t u1 t0 od rr x9 f0 zj kn nk wp zs se rv ei pi +\n yb dc qs py qk nb jj ql q5 m3 mg zz e5 i1 zr lc zx xu vq hr xp by ei yx gl qi qp ij eg ai ph ap tn dy zp qp bp kf j5 ib vd eg el yy td yo ie ag em fc kx zx h8 wv sv q1 te wv vm yq ol if a1 +\n ak rd rf qg lo nq xy z6 q6 mw c1 cu z8 q7 vw wp a2 zz w8 yo uu ee yn r8 av yz y6 pp uq dv i3 db jv q9 2l xg rp ib lj sq tx tc mj mk qr rw te p3 ik eg pj e3 i9 im tb tn fm na j7 lj wa ct rg v4 he x3 kl bi r7 wj y1 l6 wk el 9f t6 gu tj od e9 tz re uh o2 zk ki cf lw jp sc s6 qs qb yn yw ms md w3 rv as yi ox du yz ir yc dp hc +\n yv gy ik k5 db gm ux qj gc w8 ea g6 dx po jb 5t pv wi rz qp jv v1 ea en al ii dt qj py w4 if ux +\n iy pd yg qq p4 in qa y1 yy ta fb zk s6 lu +\n ql ws rv yj jk ke lm ff lb fx s4 av uv wl n1 rv dp +\n qh d3 rs ih rc aq we 7y ud t3 h2 zt cu oc +\n fr hn k6 je q3 k4 tm zh lj aj li a4 t7 w7 kx ut pl rc ih th hp wq pl ls ma oe lf wu l1 ve lp qt qy fi ti he oh hi ow tm cu p7 nr va r3 tt wk wc s8 s0 hs al o2 hk x0 qj lm v5 wl qz 7u iw os lu ah wm hk e1 o4 rm fq ro so +\n gr sa rd um pf ca ga ql qz wt ld z6 vw kv my xt cn wr eq tk rw fe qx qc qr qr rk qa qi ex p3 p5 dq ff pc sh cy oq v4 wg s7 yo ya od rq dt un bw zm da bg q5 ru ah +\n iy qd k6 dm oj qz zd vr w0 r7 d8 et y1 eg yj gz qq p3 il i8 ge wp sx s1 wj t4 lm jo qp pw xc vm sr uz ig +\n qf u8 iq rg rk g4 im ih oq fp a0 ib tc uj tn nc kz ll u1 un qv ck lv vv +\n a4 df um jg z2 lq wr tn xu id wo ez rk rz ew d5 ti ix to r0 sk fa fe fr hp jk wr v1 ms wu jm rc qt tr ex uk vr to tz ut hl sg hb qd xn rj jc 6x mo wz rp ek y4 oi oo ae sc e9 od pn hg wz js qv ln ju rx yg as rn gk o7 so it +\n dd qs qd py k9 lw wt db gk zf nw wh t2 nf zx w8 rj ue w0 tl ew r6 ui ef g1 or ej pn an tq ou ff qv gj jv q0 kq 2l uo oe ku wi w1 tx qe rv 1l ws eu lv p4 ru fo tz ph ib fv uu i3 qu oe pc gw wp sx zd kw w6 bo w9 cs cw my qz zf rg fh e8 ay yd fz rw fc ng qz cg wx qp oz qv ly ha cx al iu rt mp e1 ee w4 tn ul ru o6 ag aj +\n qa kv e5 bm yj j4 m5 rj qe ri ht oi qf qe t6 e5 aw t8 wc dw un yb +\n iy jg jj d4 ju ol wy bs wk wq t8 a9 y4 ta dx jp qc q9 su si ut q0 m8 qq 4a zy qq zq sw po qe qt la sn p6 ht oj hy hh qi gc bn w7 au ya kz na oa ox ov je fb or wl qx ze cg we fk tz tv cg uq w6 +\n dx sa qa qs db go lo z3 lf ox jc wj 93 tj tk yf ii ef fo ua sk g5 fs el oi fr sx fg se q9 xq qw j3 m6 4h qq l2 eu rq qu qi hs uh p6 ix fx qa i0 wf ke bq ne wh xl ms un ex sx yi ua pb s0 rq ak ao fc pr qj cw wq vy wv u2 3h wq re yw eb gl eu +\n hv gv il jd go qh d3 we q4 q5 ej lk my gv a2 ds ex e7 rk yf fq pu ff qx ho js gx j1 qe kw gm ja ns wy ln d0 cz xt te xu tt sb em ix ic p0 i8 im ui di gt ws os r1 qy pz l6 e5 e6 op sb pv sn ii rr v6 ql le zy pv mt da yq ol w6 yz ag it +\n ds gy dg jf qg uw qj uv q5 q8 q8 q0 qm e5 rk yd tz pv if qn ju cv xy ki qw mj ls yv ru of tz so yr oq qi m9 sc kw qf zh jx wh kg l0 t1 pz un fj os ha sn f3 e0 om ab cw ct nj zy wn fl ww vn fn tn ie ov +\n co qh jr jj cb bi wt q6 ra qm zx ur r5 an fw tg jm re j5 vl em sl xz qe wa 45 nq ex yb ed ef ph e4 tb s6 qy lz cu gm pd gq mc ca 85 yf wf hu sb eb fk fv dt cq lq qz ww wv xr n0 eq ok er et iw r2 o7 fe it +\n ra dx og wd wt o9 i4 it pk qo ic dt hj jl oq sf lz wd ca hi fg f5 ap x9 gq nd iy q6 ep +\n o9 gb a5 z5 q6 wu w0 tl r9 dj if ts ig it zq ll qw qy ep ed ry p5 ut hh dr i0 hl qp qa zs wd ya ot xk s7 e9 om io dl ki k2 wv q1 5g er rb rm +\n qs w0 om ed tk ta th gf ii av og 6o ee o8 +\n po ty rf qf he qc hz bv c5 mi rh ew tu ef sh ix r0 d0 pb tq fw ig g8 fh i2 uw hf qw qr sf cn wi fh qt hp nq yv fy tj od ux ut fb hu tn qt oi qs oa xm dm fa nm qx yy oo ec ev ox sb ya rk ys ud jy dz zv qc zb qb 17 tb lt yt u9 w4 rb st et ry yl o5 di ux +\n gb gt az uk gm qh d3 bt qz wd ld o3 bl cm q7 ck k7 we b2 yd ua ew tl rq yg us tq js fk jb gz jn jg qq in qr rq qy 3h c0 qp p3 yn ef sb ym jl sf sh hv qd p4 fj od ix kz ni wj wz tg t0 tj e0 sm om kx ku cw nb zc aw cy qv bq kg wq pn zz wv cd wn yr se o6 pu eo gc +\n gy rf qf k6 qh qj cd q3 kk cj fw b4 fr mj w7 bm wr ya z7 wt w0 r8 ip ti is pn am y5 qb hs jf qw uu wr np qt wi 1t bx qq qw aw er cv qy rw eo oa iz fp iv qi jm nk kr qf xm eo nr w0 qj bi t3 uv wk ek wn ex e5 rf rh ga it f5 pm hm f8 qj gy jp le wc qc lt s9 zu lb q1 ju w1 uw w3 oj tn tq ir r2 te +\n ak gn pw a5 qh k9 qk nb 2l qz wr kv mt gt w7 zx ii oe ug ix sz qx qc ar sr zb su vg qe np yg qt yj sv uk pd uq pf of eh jb sk gy ws ke kd be og kh x6 me wz e6 yo iw ah rw pm rr qg pt lm ql qz wz qs ly wb wn q3 ry rx gj ia o5 ge +\n gy qm d3 q3 ia c1 ta ex e5 e8 eg sy cl jf qe nj nh m9 qa w6 ek iw kv qg ab n4 w5 iq do +\n uv gv qa un az jd qm eg iw nr q8 zj ny c5 vu rl rx yn et ia ua is ot pt hu im gj qv vv xe xu 1g wo vt qt st qy rw qi wh ft es uk hw to p8 fn f1 hp qu sk p2 l4 zf qf g0 fi l7 be ky iv mn xp nn dt 6b ro kw uv ra tp e6 sv s8 sn tl fz iy qh hz ve v8 h8 th wc wy qb xm s9 hs wq zs yq tu en zt w4 dp yc +\n sa ak gi hw qm gp pp qz fm id lh 6w 9h xr w7 ui ow rb oe ia us fq g5 y5 ig oi y6 im gk ze qe gn 3o ye xz jh qe db qi p1 ep re op te y9 os pj e3 p0 zp qa ih l4 cu zg wg sn rh lf fz ic kh ni wh vh wx th ag u3 f6 uj jy k3 2a wc kv lu wo hb eq w1 rb xw yo ei o7 gx +\n hw wd r0 g4 sz b7 pi sn tc in qt zs rg eu +\n iy hv hb hq qf z3 q2 xy ia tm zf jw wq a3 rk w0 d6 eg et is id po tg gh ob jj wr np no wt ja wy xl l2 yr bt bb tc cx qr rn qt lc rw sy ex y0 ru od to tz og ye hr pj de tv e4 qt ad oo qa jq fj l7 fo wh nt pl ro wl vp yy tf va e6 fg th ar s7 os at s8 re df pe hj f0 qx qc x7 lu nc zo tx bf ww pq cg w1 rx id pu it fe +\n qf fi ld 4o ge da t5 zz mo zx vi ui w9 pj rl rz r4 uy r5 sf av ot fq tw sc zv g9 qv i4 ut iy m1 wy xo b0 ud cb qy rq ug wg sn fo ix uc aa i9 ss sd di sh j8 qp xa ep w0 7h wl t7 iw tj ya hs e0 fz hd u6 hh dr dh uk qj qk wx ol xb qv wb s8 15 wy zm jr nx it eo fx ww yv zy to ee yu yi iq ey iw rm uz yz ob +\n po ra ik qd qf je jr lp wr ji ne wu 2b nt wa e7 rx xo ia yl ta ig ff hp gk jf q9 vd si gc qe rk wu by yg rq sb os fu eh em ux ic ao pj hy im du qy cr 2e l5 qw v1 lf mv wk rx dy rp ra rg gi eb hs au ap bo oa sn zi f6 h1 zt ey yz do +\n lj vu y2 if qr wr ta so kg 3k ol u1 f5 +\n yb cv mq lf zz ue ui dx qe tv qu ex tz hh tb dy ds wi rb +\n hb gt qa h9 rf qf qg k9 q1 lo q3 ql z8 gl zg q8 1t wo vr rg ez ws mo w9 yo r3 yd e9 ea sf a0 im tg y8 ar qb ni wr qr wv m4 ix d9 nz yq fa if yr bt qr mz qi ea rr xx at ic pk qi za hv kq w7 co xj wd x4 zs wh y3 rf ec u2 e9 ah s0 uh io un h8 iq zb bs nr be zo fz vb wm pr yw md rc ur er ia yl ox ei ux eo tw o8 +\n qs rd rh yf px ow d8 tq ig ih cl qr yw qq in qy wc ek rh ya qg cd x9 qm lq to o3 ul a1 +\n a2 cp as gk kc 4u zj z2 t5 zz rb eh sh sx fg fh g9 hp vd gx oq cv pe wv b8 qe cc nw tr il tl e3 qu l4 mt wk wh aq td e9 gf lm qz bu jq my wp vb dg y0 ye yq w6 r1 +\n ij az qn ph qv bv bf mz iu is y8 ar fh q9 hd qq ji sf ld up qo p2 at sp in qi nk l5 e0 rw px 5o ew oc te +\n qd jr pa q5 w9 hw hu y8 dn qn zw wr ma ei xl dq i7 i9 vb sx wf wh na wl um e7 s0 h2 nh nk fj yl wn iu u7 as ad ey so uc +\n la up ic g5 ay ic x8 u2 ar eb wb yr aj +\n po da rd un qg uw lq m2 4r wg q8 z9 t3 zc d9 ae st q0 li qw wt kr qu ry en sm qf kf kh ny yt gi rh u5 em tc kv h8 qx lr jq ef +\n a2 dh qh q1 h8 qz z6 kx z8 bv 3q df pk tl d8 tq tf g8 zb qw 5p zm qe cv yb ec uz iv e2 gq wp uh kq ws lc wk x3 t8 rj fc io je dk lr lt wv wt bw be eo q4 ye yy rv ok yo yp ir ig +\n rs ij ty ps ul wr bh kb rs z4 z8 er px uo up y1 rb fo jo gg dv ph q0 jn xw ww d8 rp yd yf tx b0 op yn of jl tm px fm jc zf qd pk wh rp uv tp t9 ir yf ug qg v5 ku qz fd k4 cu mw zn iu bg lq ly rv su +\n ik dm cm or tw pu lp eh qd kk j0 em ng tw +\n ra ds qk cg q7 k6 4p t6 yu lq go yd eq r8 fw am dm xy cm v1 cz eo qi ij yn eg hq tc sj qa i0 oa l6 p6 wj vd wf mr yt ex e6 yo t9 ev s8 en rr bq kg hb lm re bj ms w1 et du +\n o9 gy bl wz t8 hq iu ix av y5 y8 jn j1 np xt t9 vw qq 43 xv 9w yi ft es hy op lg vs hg wd ef wx ou ox sw dr ze xr st fm ah +\n a3 wr fb jc c2 w8 rx fe q9 hd xq qq wi te y9 e1 qt qi qs nl ca bh u2 md tv hx +\n ra jh q3 aa t3 1o eq lh rv fo us pt dj pm pi qn zr bj xj cm ix a0 ra hi eu nw yc p3 ru ri ue e2 jl hi wo g9 xn qh wl wx go yp rr dj nd ch u3 fj bd jy vn w1 ia ox tm uz +\n ma y5 bl qi g7 ri fl ap 7a yo ko rp +\n um dh pg wq r5 sf ia ta an hs ne q9 wt sh rk yi ym of tb oq do hv wj ic oi sc pe sc wq wb wm tq +\n hq qm gg q4 xu k4 k7 uo wt kb et fo ey aw g0 xi am in qy eo qi eb ay ue og nt kl wx s9 df qg f9 v6 rv sv pc tl my bj wb eq h4 o3 ri +\n tt uq qh nb qz wt jx ya on om io ow ha qp e2 fd e4 hp hx p2 vm xg xn ra l8 iu yf jr qh k4 1l oa tk zp yw rz sy ul yx eo ep +\n ij yb qs fi ul qk by ql jl wr bi q5 bl tm q7 xp k7 vy gi tj rl rx yf ym tu er r8 pe ip ej y4 fd jn gx zt vj xt xh rj kt cm ri nh zq pp eu rw to pg e1 ue dw e2 so tb gm qi jb nk gy pa v1 xj fl kh 3k kl ed wx wc ek yy ez wc iq yo u4 it tz ak hn f8 h2 hl uv wz h8 gi nk ch zt wt bw kn yq e1 tv zp ag it +\n qs rf jq go qh a8 jj xt le wu qq yd d6 d0 ff qc su c7 lc wp ty y0 tu ti pf ta aa ug vb rf vi rx no un yu e5 7r up rj ag ha hs fc wz bo qv bp tv ki er +\n yb qa rd lo ok q6 o6 ba r6 ow or yl am aq gd dc ho cx c0 wu g4 ib c5 ep re qo ed yw iv ta hy jc pn xs oq xn bo zg ps kr iz yp wh wj xl xo zq me na ek wl wx wc rg uo ir tk aj da rw hm io uj fb jt qj dk nh zx jp qx wc zr zy zu nc xe ww wv vn h2 q6 en ew ad yl af eo if r2 +\n yb o0 dn z1 q1 sw qk po xt wr ls z8 ox wu jd ro q8 lh mh wa rg ea c3 pz tu g2 is a9 pn tw po qc sa jj vh ax xh kt wy jv mg nh kp b9 qi od ht e4 uu ij qf sf nc wk ap wd qz rd yi s7 tk pb it f7 o2 f9 kv qh h4 ln wz gi qd zn xm sn 39 yn rz u7 yr yj is ie ag ir tw +\n po dx uv pq jd dm d2 hr cs gs d3 q4 wr np ab di mh vt w7 w0 on tl ia ta aq dz y7 i1 qc pg q0 wq j2 c7 la cb il wr lv na ru 4g vz nf bc sl qq qp qt ud rq wg ex uw he tz ye p9 ui ou vu at ix w0 vf bi ed yh wh ra y6 wc u1 t8 fj ov iy tz kx h1 hj jy qj h6 ng jo wz cj ie mt rq te n9 mp ma q5 8w rb o7 sp ob +\n po ra dg ca qj q2 is kn rd ws lq tu ym yl y5 tg pp qw we cn a7 1t jd m7 wo yr sz qa hp ei qy ec p4 hw p7 to au iv ht qt qy qo l4 lz xm wd yf wg ez s7 en f3 tx yf rr f8 cw ji qv yz ry ew w2 oj w4 w5 st rn oz ri o6 dp aj +\n gm jg ju q5 np lf q7 xo 1y qn k0 jo pp qx th q9 4f or ro pu bv eu nw uq ao dy jb j9 gr rd nz wj wj r9 co ta rk od dd gg hm df pr km ng oj qc sb qd wb tz cq ex wb vb ty eq tv iw +\n fu uq co qk jl cg ld lg wo vr gc bd rj r3 yd rz iu ew d6 io to sh y7 jp db dn qn qm si xg qr ls jo lr wy rk wn m7 qu bb es op qp ru en ta e3 in hy dy hl vc gc gt jw ke 2t wh rk lj hg oy e6 yo ev em fz rw pq re dg qk ku oi qz k4 qv li rq n8 ec 4d yb wb e1 iw id o6 ir do ux pi ep +\n a2 wu jd ef dc mn 5e qp pl xd ag ay +\n yv o9 al a5 uq qg jw pi z2 jt cd q5 3m zl ez vu rg jl rz yf ix sj fp d0 tq ff ha hs zw om ni m0 xg c8 a7 ki qw cc ei xg j3 tt tu il p6 ix tp tx ib sp hg p0 fc pj su qu jv sj lk qp ws qs gm 1x mx lv wj qu l1 dt wh wv un aq fg rg e6 uo ar ie up it sw tx f6 o2 h3 qc qa ho vj u3 kd zy n6 my ww vc lr em w2 se rt o4 yc a1 te +\n qs dv pa ty iz uw qh jj z3 tn jc eg e4 qq w7 ut r3 uu kb up g1 fo iv if fd gd sc qm qe xg ia wb he ky hu tv rw qu rr es p3 ue s2 as i0 dt qt hz jm j0 gy ci fi hw nv ea kk vf rx 68 ti rp wl oi vp at om io uk pt qh qj dl cf lr cx wq ku ki w2 yh af ul sp yc it +\n ub yb dc ty gm dm go nv we ql by la o1 ju o3 jx fm aj wa rg e4 vi a6 r4 xo tz oe ip pv dk tq a0 tf fg tg i4 pz sd ry ky mg hy g7 eu qy yi rw qp eg yw hw sm uc i7 dw fx s3 sf zo m9 xs vn rf ci nz kr qt 9y pj lk ee pz ef rk e0 fx uf az fc qg jr oy lq cg qp um ad wc zn bw n6 my xr mp tu en o3 iq ir ro +\n da a3 d1 jr dz ca ql nu q3 cf o6 nr mt lk yr rs lp w7 a5 pj ys ym r8 ey to fs dz im ih sw qx qv zn gl j1 xe lc zc vw 6a mh b9 qt rm re oo qp p4 tk ix p7 og tz yr sp aa hk ih lx qd mx 4n kk vf el oo td ae yo fj uf pr hl qj qk wr qc qv kf yz my wq hn zs dr ee u8 rv et ru ie ag tw +\n gt ph z0 zl mu ui av zm om ui vh qr he qr es fl ws w6 nc ra rk kp ol wm yu it +\n dd df jq jd ux ql el 3r ya uu iu ee eh g3 sj us ib pp qc jv hd bh zt uo d8 b3 xu bc rq te uh ex tt eb il qu pc ge sj qp ih xf 3r gr yh qx tu wl wn sz up ay it ab jt qz v7 wn li za 6o w3 fn yk eu ie gz ro +\n yv o9 qf eg eh mh jh rh r3 rv ix y3 a0 sr qc qq qr wr qe bx ki m8 mk qi lm uk eb ai ur e2 xd nc ca eo mb ed uv rs up ya of hn lw wz qz et qh wm zr rc o3 r2 +\n ss qg qj ph qk q2 cs z4 bi qc cj q8 qn w7 rj ys ea r4 uy om rc ii fp sj ej yz el qx qv zn gk q9 hs m2 ii d7 nk c8 j4 qq dl gg pp ei qo yc od fo eh fp ta hy ok tv uu us dp qf sb zg ks sg n3 wh x2 nr cs wk kh wk wf ew 7h 7k oy t6 gi rg yp s9 ya e9 pb tc dt dh hk h2 pt qh h7 wz n1 qv kd pm cc xr kp yk tm ge +\n gr qa ft tt gn qs pw pu ca ph ls cg cn zf bz q7 z0 c3 qn gv w7 rg ut e8 ii er ip sg y3 oy ek ht gd qx g0 qv db su jn qq lz uu jo ru an wi kn sq nh qr qy yx eo qi xz y9 ru pd au p7 dq he ut ok fd jz ui hc j9 l3 hb xd jq gy kh wf xs sl rs aq ez y4 ts um yi e0 gh dk py v5 qk ql ko jq wc nk v0 wb qv br iu wm 6p sr gk yp pu +\n o0 pa pf z3 jt jy z7 cm ne w8 yz fd fg zn qq ll vg wr wb ia xx yj ty eh e1 so ts tc s4 i0 tn wo wp wa op va wk x3 vg qx rs sn au f3 tz sq hn rr o2 fv un k2 vj ey dj +\n iy ra ij ty a4 un rf qg dm jr kj uv we cv gk wy z8 oc wp mo jl mz ev ch rv tu ax y2 g3 oy y6 im uq qv hp qb hd iy lp nk w2 bb ho ep tr os en sm p8 p9 hy ss ui gm qi oo vn ae qd w6 ps dn wd wg ro mr yt ol oz rg s7 u5 tl yd rr ax f0 cq ku qx ze n4 wn kt ca jy bg yc zs yw rz w1 eu rm r1 +\n hv fu ca q8 mt la r3 pl yh to sy yh tv x4 tg yp ov wn ze sp +\n o9 qa az gm qd pw hq pd ga qj cd q3 jk pd du c2 zk xf t8 eq om es rc ua y3 pu ig qx se qv db st qn ii lx qe wt xk nx ku br qe qr qt rm eu xf xb rn qu qi ep qo rr ex xk p5 ym fi uq to ux ix ai hj gn zi oq qf kd wf xn kr w8 rl kk mq rp rf u1 s7 oa fh e7 yp e0 pr ql sx ck ag kg kt mp eb rl em ee w6 du rm yz if ep +\n qs pi am 6a ut r4 ii sd ua ib y6 pa kt pb wm qq dz qt qp y0 he p8 ue tb qu or qo wh 40 8j t4 sl rf iu gh hh qc iq bf rl wm mf oh ew is dp +\n da ps a7 jr z4 q3 bu xt ip jx q6 np z7 bp lg bz ye wo ig bb ww rf om uu ef r8 ey pt ta pn y7 gg th dn pg qb ri qq 42 qw zw b9 b0 xb qt qy yl wh xk ft at yw i7 sp de pz fn qy si m0 ik wf sg cq ql hk er eg lc ek na wc iw ir rk ua e0 ak iu sw ap uj av ab hl uv zc qa wc jw vj qv vk ay kg xw on rw 1b wn rl eb vm eq h4 yt oz eu uz +\n a2 qa rd cp qh ub vg ws u0 4g bp da yo ev kz eq uu ee ef yk pr sj sk fe oi lt uy j2 gn io vk ns 27 ln wu ve yr l2 qu qi ry il ul tj eg ux i5 yr tx ph oj gq or zp wa qs gy iz v0 qt wj ic ca lh yb np ej sl td l8 yi iw s8 e8 ys yd sq al dt pt tg te yb eu tq r1 ir fe +\n tr h9 go qj b1 wu q7 zh el tg mb ys ed ii er r8 xs pe r0 sw db ov m7 d3 re no nu zr pq ji wr lc or qw ee yy qr rn rm re qi te ea qo yb yn y0 uz uq iz tl yr i0 fm wp qs qd he wk kl ew as hl ez oo ox ie s0 f4 dl wq wl ww lr qc qv vk mt my kn ep em yt sy am so rp sp +\n ak ft qg bg ji q6 bk xi tf mo ur pj yk ua qv wq gc qe ke ef eb tk uq i6 oh iv gb qs rx el yo rr pe wz wx ho xq tc mo yk du r1 tq oc hc te +\n ra ub qj jh jt dx ql q6 da tf r3 ew iu sg tp yl el gc 6n tt ry pd ye ff lz kt yp fl yf dl rn +\n o9 hb h9 qd dh qg q1 qj jy se q5 wt nr qv ge c5 el 6y uo rv ax pe et r0 fe y6 dx qx ha qq lo we zy v1 wy dl vr wa qr rm qi qp yn tz pg ph de p0 do qp wp wf bw xh ky xz wh hl to ek rd sv rj rq re h1 qg qh kl f1 zm 18 ez xe vm en 5j o3 rn fw fe it +\n db c2 bb o0 w8 kl kc y4 qx zm pk cw id ve mh lp rq jb fl x1 qi wd lx f3 cy bq dd ye fn ig diff --git a/contrib/tsearch2/dict.c b/contrib/tsearch2/dict.c new file mode 100644 index 0000000000..5c148c43b5 --- /dev/null +++ b/contrib/tsearch2/dict.c @@ -0,0 +1,275 @@ +/* + * interface functions to dictionary + * Teodor Sigaev + */ +#include +#include +#include +#include + +#include "postgres.h" +#include "fmgr.h" +#include "utils/array.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" + +#include "dict.h" +#include "common.h" +#include "snmap.h" + +/*********top interface**********/ + +static void *plan_getdict=NULL; + +void +init_dict(Oid id, DictInfo *dict) { + Oid arg[1]={ OIDOID }; + bool isnull; + Datum pars[1]={ ObjectIdGetDatum(id) }; + int stat; + + memset(dict,0,sizeof(DictInfo)); + SPI_connect(); + if ( !plan_getdict ) { + plan_getdict = SPI_saveplan( SPI_prepare( "select dict_init, dict_initoption, dict_lexize from pg_ts_dict where oid = $1" , 1, arg ) ); + if ( !plan_getdict ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_getdict, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + Datum opt; + Oid oid=InvalidOid; + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + if ( !(isnull || oid==InvalidOid) ) { + opt=SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull); + dict->dictionary=(void*)DatumGetPointer(OidFunctionCall1(oid, opt)); + } + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) ); + if ( isnull || oid==InvalidOid ) + ts_error(ERROR, "Null dict_lexize for dictonary %d", id); + fmgr_info_cxt(oid, &(dict->lexize_info), TopMemoryContext); + dict->dict_id=id; + } else + ts_error(ERROR, "No dictionary with id %d", id); + SPI_finish(); +} + +typedef struct { + DictInfo *last_dict; + int len; + int reallen; + DictInfo *list; + SNMap name2id_map; +} DictList; + +static DictList DList = {NULL,0,0,NULL,{0,0,NULL}}; + +void +reset_dict(void) { + freeSNMap( &(DList.name2id_map) ); + /* XXX need to free DList.list[*].dictionary */ + if ( DList.list ) + free(DList.list); + memset(&DList,0,sizeof(DictList)); +} + + +static int +comparedict(const void *a, const void *b) { + return ((DictInfo*)a)->dict_id - ((DictInfo*)b)->dict_id; +} + +DictInfo * +finddict(Oid id) { + /* last used dict */ + if ( DList.last_dict && DList.last_dict->dict_id==id ) + return DList.last_dict; + + + /* already used dict */ + if ( DList.len != 0 ) { + DictInfo key; + key.dict_id=id; + DList.last_dict = bsearch(&key, DList.list, DList.len, sizeof(DictInfo), comparedict); + if ( DList.last_dict != NULL ) + return DList.last_dict; + } + + /* last chance */ + if ( DList.len==DList.reallen ) { + DictInfo *tmp; + int reallen = ( DList.reallen ) ? 2*DList.reallen : 16; + tmp=(DictInfo*)realloc(DList.list,sizeof(DictInfo)*reallen); + if ( !tmp ) + ts_error(ERROR,"No memory"); + DList.reallen=reallen; + DList.list=tmp; + } + DList.last_dict=&(DList.list[DList.len]); + init_dict(id, DList.last_dict); + + DList.len++; + qsort(DList.list, DList.len, sizeof(DictInfo), comparedict); + return finddict(id); /* qsort changed order!! */; +} + +static void *plan_name2id=NULL; + +Oid +name2id_dict(text *name) { + Oid arg[1]={ TEXTOID }; + bool isnull; + Datum pars[1]={ PointerGetDatum(name) }; + int stat; + Oid id=findSNMap_t( &(DList.name2id_map), name ); + + if ( id ) + return id; + + SPI_connect(); + if ( !plan_name2id ) { + plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_dict where dict_name = $1" , 1, arg ) ); + if ( !plan_name2id ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_name2id, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) + id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + else + ts_error(ERROR, "No dictionary with name '%s'", text2char(name)); + SPI_finish(); + addSNMap_t( &(DList.name2id_map), name, id ); + return id; +} + + +/******sql-level interface******/ +PG_FUNCTION_INFO_V1(lexize); +Datum lexize(PG_FUNCTION_ARGS); + +Datum +lexize(PG_FUNCTION_ARGS) { + text *in=PG_GETARG_TEXT_P(1); + DictInfo *dict = finddict( PG_GETARG_OID(0) ); + char **res, **ptr; + Datum *da; + ArrayType *a; + + + ptr = res = (char**)DatumGetPointer( + FunctionCall3(&(dict->lexize_info), + PointerGetDatum(dict->dictionary), + PointerGetDatum(VARDATA(in)), + Int32GetDatum(VARSIZE(in)-VARHDRSZ) + ) + ); + PG_FREE_IF_COPY(in, 1); + if ( !res ) { + if (PG_NARGS() > 2) + PG_RETURN_POINTER(NULL); + else + PG_RETURN_NULL(); + } + + while(*ptr) ptr++; + da = (Datum*)palloc(sizeof(Datum)*(ptr-res+1)); + ptr=res; + while(*ptr) { + da[ ptr-res ] = PointerGetDatum( char2text(*ptr) ); + ptr++; + } + + a = construct_array( + da, + ptr-res, + TEXTOID, + -1, + false, + 'i' + ); + + ptr=res; + while(*ptr) { + pfree( DatumGetPointer(da[ ptr-res ]) ); + pfree( *ptr ); + ptr++; + } + pfree(res); + pfree(da); + + PG_RETURN_POINTER(a); +} + +PG_FUNCTION_INFO_V1(lexize_byname); +Datum lexize_byname(PG_FUNCTION_ARGS); +Datum +lexize_byname(PG_FUNCTION_ARGS) { + text *dictname=PG_GETARG_TEXT_P(0); + Datum res; + + strdup("simple"); + res=DirectFunctionCall3( + lexize, + ObjectIdGetDatum(name2id_dict(dictname)), + PG_GETARG_DATUM(1), + (Datum)0 + ); + PG_FREE_IF_COPY(dictname, 0); + if (res) + PG_RETURN_DATUM(res); + else + PG_RETURN_NULL(); +} + +static Oid currect_dictionary_id=0; + +PG_FUNCTION_INFO_V1(set_curdict); +Datum set_curdict(PG_FUNCTION_ARGS); +Datum +set_curdict(PG_FUNCTION_ARGS) { + finddict(PG_GETARG_OID(0)); + currect_dictionary_id=PG_GETARG_OID(0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(set_curdict_byname); +Datum set_curdict_byname(PG_FUNCTION_ARGS); +Datum +set_curdict_byname(PG_FUNCTION_ARGS) { + text *dictname=PG_GETARG_TEXT_P(0); + + DirectFunctionCall1( + set_curdict, + ObjectIdGetDatum( name2id_dict(dictname) ) + ); + PG_FREE_IF_COPY(dictname, 0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(lexize_bycurrent); +Datum lexize_bycurrent(PG_FUNCTION_ARGS); +Datum +lexize_bycurrent(PG_FUNCTION_ARGS) { + Datum res; + if ( currect_dictionary_id == 0 ) + elog(ERROR, "No currect dictionary. Execute select set_curdict()."); + + res = DirectFunctionCall3( + lexize, + ObjectIdGetDatum(currect_dictionary_id), + PG_GETARG_DATUM(0), + (Datum)0 + ); + if (res) + PG_RETURN_DATUM(res); + else + PG_RETURN_NULL(); +} + + diff --git a/contrib/tsearch2/dict.h b/contrib/tsearch2/dict.h new file mode 100644 index 0000000000..bbbbfc47a5 --- /dev/null +++ b/contrib/tsearch2/dict.h @@ -0,0 +1,38 @@ +#ifndef __DICT_H__ +#define __DICT_H__ +#include "postgres.h" +#include "fmgr.h" + +typedef struct { + int len; + char **stop; + char* (*wordop)(char*); +} StopList; + +void sortstoplist(StopList *s); +void freestoplist(StopList *s); +void readstoplist(text *in, StopList *s); +bool searchstoplist(StopList *s, char *key); +char* lowerstr(char *str); + +typedef struct { + Oid dict_id; + FmgrInfo lexize_info; + void *dictionary; +} DictInfo; + +void init_dict(Oid id, DictInfo *dict); +DictInfo* finddict(Oid id); +Oid name2id_dict(text *name); +void reset_dict(void); + + +/* simple parser of cfg string */ +typedef struct { + char *key; + char *value; +} Map; + +void parse_cfgdict(text *in, Map **m); + +#endif diff --git a/contrib/tsearch2/dict_ex.c b/contrib/tsearch2/dict_ex.c new file mode 100644 index 0000000000..b8c4f59c90 --- /dev/null +++ b/contrib/tsearch2/dict_ex.c @@ -0,0 +1,59 @@ +/* + * example of dictionary + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" + +typedef struct { + StopList stoplist; +} DictExample; + + +PG_FUNCTION_INFO_V1(dex_init); +Datum dex_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(dex_lexize); +Datum dex_lexize(PG_FUNCTION_ARGS); + +Datum +dex_init(PG_FUNCTION_ARGS) { + DictExample *d = (DictExample*)malloc( sizeof(DictExample) ); + + if ( !d ) + elog(ERROR, "No memory"); + memset(d,0,sizeof(DictExample)); + + d->stoplist.wordop=lowerstr; + + if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { + text *in = PG_GETARG_TEXT_P(0); + readstoplist(in, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + PG_FREE_IF_COPY(in, 0); + } + + PG_RETURN_POINTER(d); +} + +Datum +dex_lexize(PG_FUNCTION_ARGS) { + DictExample *d = (DictExample*)PG_GETARG_POINTER(0); + char *in = (char*)PG_GETARG_POINTER(1); + char *txt = pnstrdup(in, PG_GETARG_INT32(2)); + char **res=palloc(sizeof(char*)*2); + + if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { + pfree(txt); + res[0]=NULL; + } else + res[0]=txt; + res[1]=NULL; + + PG_RETURN_POINTER(res); +} diff --git a/contrib/tsearch2/dict_ispell.c b/contrib/tsearch2/dict_ispell.c new file mode 100644 index 0000000000..c5b33a4c04 --- /dev/null +++ b/contrib/tsearch2/dict_ispell.c @@ -0,0 +1,141 @@ +/* + * ISpell interface + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" +#include "ispell/spell.h" + +typedef struct { + StopList stoplist; + IspellDict obj; +} DictISpell; + +PG_FUNCTION_INFO_V1(spell_init); +Datum spell_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(spell_lexize); +Datum spell_lexize(PG_FUNCTION_ARGS); + +static void +freeDictISpell(DictISpell *d) { + FreeIspell(&(d->obj)); + freestoplist(&(d->stoplist)); + free(d); +} + +Datum +spell_init(PG_FUNCTION_ARGS) { + DictISpell *d; + Map *cfg, *pcfg; + text *in; + bool affloaded=false, dictloaded=false, stoploaded=false; + + if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL ) + elog(ERROR,"ISpell confguration error"); + + d = (DictISpell*)malloc( sizeof(DictISpell) ); + if ( !d ) + elog(ERROR, "No memory"); + memset(d,0,sizeof(DictISpell)); + d->stoplist.wordop=lowerstr; + + in = PG_GETARG_TEXT_P(0); + parse_cfgdict(in,&cfg); + PG_FREE_IF_COPY(in, 0); + pcfg=cfg; + while(pcfg->key) { + if ( strcasecmp("DictFile", pcfg->key) == 0 ) { + if ( dictloaded ) { + freeDictISpell(d); + elog(ERROR,"Dictionary already loaded"); + } + if ( ImportDictionary(&(d->obj), pcfg->value) ) { + freeDictISpell(d); + elog(ERROR,"Can't load dictionary file (%s)", pcfg->value); + } + dictloaded=true; + } else if ( strcasecmp("AffFile", pcfg->key) == 0 ) { + if ( affloaded ) { + freeDictISpell(d); + elog(ERROR,"Affixes already loaded"); + } + if ( ImportAffixes(&(d->obj), pcfg->value) ) { + freeDictISpell(d); + elog(ERROR,"Can't load affix file (%s)", pcfg->value); + } + affloaded=true; + } else if ( strcasecmp("StopFile", pcfg->key) == 0 ) { + text *tmp=char2text(pcfg->value); + if ( stoploaded ) { + freeDictISpell(d); + elog(ERROR,"Stop words already loaded"); + } + readstoplist(tmp, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + pfree(tmp); + stoploaded=true; + } else { + freeDictISpell(d); + elog(ERROR,"Unknown option: %s => %s", pcfg->key, pcfg->value); + } + pfree(pcfg->key); + pfree(pcfg->value); + pcfg++; + } + pfree(cfg); + + if ( affloaded && dictloaded ) { + SortDictionary(&(d->obj)); + SortAffixes(&(d->obj)); + } else if ( !affloaded ) { + freeDictISpell(d); + elog(ERROR,"No affixes"); + } else { + freeDictISpell(d); + elog(ERROR,"No dictionary"); + } + + PG_RETURN_POINTER(d); +} + +Datum +spell_lexize(PG_FUNCTION_ARGS) { + DictISpell *d = (DictISpell*)PG_GETARG_POINTER(0); + char *in = (char*)PG_GETARG_POINTER(1); + char *txt; + char **res; + char **ptr, **cptr; + + if ( !PG_GETARG_INT32(2) ) + PG_RETURN_POINTER(NULL); + + res=palloc(sizeof(char*)*2); + txt = pnstrdup(in, PG_GETARG_INT32(2)); + res=NormalizeWord(&(d->obj), txt); + pfree(txt); + + if ( res==NULL ) + PG_RETURN_POINTER(NULL); + + ptr=cptr=res; + while(*ptr) { + if ( searchstoplist(&(d->stoplist),*ptr) ) { + pfree(*ptr); + *ptr=NULL; + ptr++; + } else { + *cptr=*ptr; + cptr++; ptr++; + } + } + *cptr=NULL; + + PG_RETURN_POINTER(res); +} + diff --git a/contrib/tsearch2/dict_snowball.c b/contrib/tsearch2/dict_snowball.c new file mode 100644 index 0000000000..0fbcc521bd --- /dev/null +++ b/contrib/tsearch2/dict_snowball.c @@ -0,0 +1,108 @@ +/* + * example of Snowball dictionary + * http://snowball.tartarus.org/ + * Teodor Sigaev + */ +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" +#include "snowball/header.h" +#include "snowball/english_stem.h" +#include "snowball/russian_stem.h" + +typedef struct { + struct SN_env *z; + StopList stoplist; + int (*stem)(struct SN_env * z); +} DictSnowball; + + +PG_FUNCTION_INFO_V1(snb_en_init); +Datum snb_en_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(snb_ru_init); +Datum snb_ru_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(snb_lexize); +Datum snb_lexize(PG_FUNCTION_ARGS); + +Datum +snb_en_init(PG_FUNCTION_ARGS) { + DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) ); + + if ( !d ) + elog(ERROR, "No memory"); + memset(d,0,sizeof(DictSnowball)); + d->stoplist.wordop=lowerstr; + + if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { + text *in = PG_GETARG_TEXT_P(0); + readstoplist(in, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + PG_FREE_IF_COPY(in, 0); + } + + d->z = english_create_env(); + if (!d->z) { + freestoplist(&(d->stoplist)); + elog(ERROR,"No memory"); + } + d->stem=english_stem; + + PG_RETURN_POINTER(d); +} + +Datum +snb_ru_init(PG_FUNCTION_ARGS) { + DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) ); + + if ( !d ) + elog(ERROR, "No memory"); + memset(d,0,sizeof(DictSnowball)); + d->stoplist.wordop=lowerstr; + + if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { + text *in = PG_GETARG_TEXT_P(0); + readstoplist(in, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + PG_FREE_IF_COPY(in, 0); + } + + d->z = russian_create_env(); + if (!d->z) { + freestoplist(&(d->stoplist)); + elog(ERROR,"No memory"); + } + d->stem=russian_stem; + + PG_RETURN_POINTER(d); +} + +Datum +snb_lexize(PG_FUNCTION_ARGS) { + DictSnowball *d = (DictSnowball*)PG_GETARG_POINTER(0); + char *in = (char*)PG_GETARG_POINTER(1); + char *txt = pnstrdup(in, PG_GETARG_INT32(2)); + char **res=palloc(sizeof(char*)*2); + + if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { + pfree(txt); + res[0]=NULL; + } else { + SN_set_current(d->z, strlen(txt), txt); + (d->stem)(d->z); + if ( d->z->p && d->z->l ) { + txt=repalloc(txt, d->z->l+1); + memcpy( txt, d->z->p, d->z->l); + txt[d->z->l]='\0'; + } + res[0]=txt; + } + res[1]=NULL; + + + PG_RETURN_POINTER(res); +} + diff --git a/contrib/tsearch2/dict_syn.c b/contrib/tsearch2/dict_syn.c new file mode 100644 index 0000000000..7f5b5e0ea8 --- /dev/null +++ b/contrib/tsearch2/dict_syn.c @@ -0,0 +1,157 @@ +/* + * ISpell interface + * Teodor Sigaev + */ +#include +#include +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" + +#define SYNBUFLEN 4096 +typedef struct { + char *in; + char *out; +} Syn; + +typedef struct { + int len; + Syn *syn; +} DictSyn; + +PG_FUNCTION_INFO_V1(syn_init); +Datum syn_init(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(syn_lexize); +Datum syn_lexize(PG_FUNCTION_ARGS); + +static char * +findwrd(char *in, char **end) { + char *start; + + *end=NULL; + while(*in && isspace(*in)) + in++; + + if ( !in ) + return NULL; + start=in; + + while(*in && !isspace(*in)) + in++; + + *end=in; + return start; +} + +static int +compareSyn(const void *a, const void *b) { + return strcmp( ((Syn*)a)->in, ((Syn*)b)->in ); +} + + +Datum +syn_init(PG_FUNCTION_ARGS) { + text *in; + DictSyn *d; + int cur=0; + FILE *fin; + char *filename; + char buf[SYNBUFLEN]; + char *starti,*starto,*end=NULL; + int slen; + + if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0)==NULL ) + elog(ERROR,"NULL config"); + + in = PG_GETARG_TEXT_P(0); + if ( VARSIZE(in) - VARHDRSZ == 0 ) + elog(ERROR,"VOID config"); + + filename=text2char(in); + PG_FREE_IF_COPY(in, 0); + if ( (fin=fopen(filename,"r")) == NULL ) + elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno)); + + d = (DictSyn*)malloc( sizeof(DictSyn) ); + if ( !d ) { + fclose(fin); + elog(ERROR, "No memory"); + } + memset(d,0,sizeof(DictSyn)); + + while( fgets(buf,SYNBUFLEN,fin) ) { + slen = strlen(buf)-1; + buf[slen] = '\0'; + if ( *buf=='\0' ) continue; + if (cur==d->len) { + d->len = (d->len) ? 2*d->len : 16; + d->syn=(Syn*)realloc( d->syn, sizeof(Syn)*d->len ); + if ( !d->syn ) { + fclose(fin); + elog(ERROR, "No memory"); + } + } + + starti=findwrd(buf,&end); + if ( !starti ) + continue; + *end='\0'; + if ( end >= buf+slen ) + continue; + + starto= findwrd(end+1, &end); + if ( !starto ) + continue; + *end='\0'; + + d->syn[cur].in=strdup(lowerstr(starti)); + d->syn[cur].out=strdup(lowerstr(starto)); + if ( !(d->syn[cur].in && d->syn[cur].out) ) { + fclose(fin); + elog(ERROR, "No memory"); + } + + cur++; + } + + fclose(fin); + + d->len=cur; + if ( cur>1 ) + qsort(d->syn, d->len, sizeof(Syn), compareSyn); + + pfree(filename); + PG_RETURN_POINTER(d); +} + +Datum +syn_lexize(PG_FUNCTION_ARGS) { + DictSyn *d = (DictSyn*)PG_GETARG_POINTER(0); + char *in = (char*)PG_GETARG_POINTER(1); + Syn key,*found; + char **res=NULL; + + if ( !PG_GETARG_INT32(2) ) + PG_RETURN_POINTER(NULL); + + key.out=NULL; + key.in=lowerstr(pnstrdup(in, PG_GETARG_INT32(2))); + + found=(Syn*)bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn); + pfree(key.in); + + if ( !found ) + PG_RETURN_POINTER(NULL); + + res=palloc(sizeof(char*)*2); + + res[0]=pstrdup(found->out); + res[1]=NULL; + + PG_RETURN_POINTER(res); +} + diff --git a/contrib/tsearch2/docs/tsearch-V2-intro.html b/contrib/tsearch2/docs/tsearch-V2-intro.html new file mode 100644 index 0000000000..8375d4ca77 --- /dev/null +++ b/contrib/tsearch2/docs/tsearch-V2-intro.html @@ -0,0 +1,975 @@ + + + + + tsearch-v2-intro + + + + +
+

Tsearch2 - Introduction

+ +

+ [Online version] of this document is available.

+ +

The tsearch2 module is available to add as an extension to + the PostgreSQL database to allow for Full Text Indexing. This + document is an introduction to installing, configuring, using + and maintaining the database with the tsearch2 module + activated.

+ +

Please, note, tsearch2 module is fully incompatible with old + tsearch, which is deprecated in 7.4 and will be obsoleted in + 7.5.

+ +

USING TSEARCH2 AND POSTGRESQL FOR A WEB BASED SEARCH + ENGINE

+ +

This documentation is provided as a short guide on how to + quickly get up and running with tsearch2 and PostgreSQL, for + those who want to implement a full text indexed based search + engine. It is not meant to be a complete in-depth guide into + the full ins and outs of the contrib/tsearch2 module, and is + primarily aimed at beginners who want to speed up searching of + large text fields, or those migrating from other database + systems such as MS-SQL.

+ +

The README.tsearch2 file included in the contrib/tsearch2 + directory contains a brief overview and history behind tsearch. + This can also be found online [right + here].

+ +

Further in depth documentation such as a full function + reference, and user guide can be found online at the [tsearch + documentation home].

+ +

ACKNOWLEDGEMENTS

+ +

Robert John Shepherd originally wrote this documentation for + the previous version of tsearch module (v1) included with the + postgres release. I took his documentation and updated it to + comply with the tsearch2 modifications.

+ +

Robert's original acknowledgements:

+ +

"Thanks to Oleg Bartunov for taking the time to answer many + of my questions regarding this module, and also to Teodor + Sigaev for clearing up the process of making your own + dictionaries. Plus of course a big thanks to the pair of them + for writing this module in the first place!"

+ +

I would also like to extend my thanks to the developers, and + Oleg Bartunov for all of his direction and help with the new + features of tsearch2.

+ +

OVERVIEW

+ +

MS-SQL provides a full text indexing (FTI) system which + enables the fast searching of text based fields, very useful + for websites (and other applications) that require a results + set based on key words. PostgreSQL ships with a contributed + module called tsearch2, which implements a special type of + index that can also be used for full text indexing. Further + more, unlike MS' offering which requires regular incremental + rebuilds of the text indexes themselves, tsearch2 indexes are + always up-to-date and keeping them so induces very little + overhead.

+ +

Before we get into the details, it is recommended that you + have installed and tested PostgreSQL, are reasonably familiar + with databases, the SQL query language and also understand the + basics of connecting to PostgreSQL from the local shell. This + document isn't intended for the complete PostgreSQL newbie, but + anyone with a reasonable grasp of the basics should be able to + follow it.

+ +

INSTALLATION

+ +

Starting with PostgreSQL version 7.4 tsearch2 is now + included in the contrib directory with the PostgreSQL sources. + contrib/tsearch2 is where you will find everything needed to + install and use tsearch2. Please note that tsearch2 will also + work with PostgreSQL version 7.3.x, but it is not the module + included with the source distribution. You will have to + download the module separately and install it in the same + fashion.

+ +

I installed the tsearch2 module to a PostgreSQL 7.3 database + from the contrib directory without squashing the original (old) + tsearch module. What I did was move the modules tsearch src + driectory into the contrib tree under the name tsearchV2.

+ +

Step one is to download the tsearch V2 module :

+ +

[http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/] + (check Development History for latest stable version !)

+
+        tar -zxvf tsearch-v2.tar.gz
+        mv tsearch2 PGSQL_SRC/contrib/
+        cd PGSQL_SRC/contrib/tsearch2
+
+ +

If you are installing from PostgreSQL version 7.4 or higher, + you can skip those steps and just change to the + contrib/tsearch2 directory in the source tree and continue from + there.

+ +

Then continue with the regular building and installation + process

+
+        gmake
+        gmake install
+        gmake installcheck
+
+ +

That is pretty much all you have to do, unless of course you + get errors. However if you get those, you better go check with + the mailing lists over at http://www.postgresql.org or + http://openfts.sourceforge.net/ + since its never failed for me.

+ +

The directory in the contib/ and the directory from the + archive is called tsearch2. Tsearch2 is completely incompatible + with the previous version of tsearch. This means that both + versions can be installed into a single database, and migration + the new version may be much easier.

+ +

NOTE: the previous version of tsearch found in the + contrib/tsearch directory is depricated. ALthough it is still + available and included within PostgreSQL version 7.4. It will + be removed in version 7.5.

+ +

ADDING TSEARCH2 FUNCTIONALITY TO A DATABASE

+ +

We should create a database to use as an example for the + remainder of this file. We can call the database "ftstest". You + can create it from the command line like this:

+
+        #createdb ftstest
+
+ +

If you thought installation was easy, this next bit is even + easier. Change to the PGSQL_SRC/contrib/tsearch2 directory and + type:

+
+        psql ftstest < tsearch2.sql
+
+ +

The file "tsearch2.sql" holds all the wonderful little + goodies you need to do full text indexing. It defines numerous + functions and operators, and creates the needed tables in the + database. There will be 4 new tables created after running the + tsearch2.sql file : pg_ts_dict, pg_ts_parser, pg_ts_cfg, + pg_ts_cfgmap are added.

+ +

You can check out the tables if you like:

+
+        #psql ftstest
+        ftstest=# \d
+                    List of relations
+         Schema |     Name     | Type  |  Owner
+        --------+--------------+-------+----------
+         public | pg_ts_cfg    | table | kopciuch
+         public | pg_ts_cfgmap | table | kopciuch
+         public | pg_ts_dict   | table | kopciuch
+         public | pg_ts_parser | table | kopciuch
+        (4 rows)
+
+ +

TYPES AND FUNCTIONS PROVIDED BY TSEARCH2

+ +

The first thing we can do is try out some of the types that + are provided for us. Lets look at the tsvector type provided + for us:

+
+        SELECT 'Our first string used today'::tsvector;
+                        tsvector
+        ---------------------------------------
+         'Our' 'used' 'first' 'today' 'string'
+        (1 row)
+
+ +

The results are the words used within our string. Notice + they are not in any particular order. The tsvector type returns + a string of space separated words.

+
+        SELECT 'Our first string used today first string'::tsvector;
+                            tsvector
+        -----------------------------------------------
+         'Our' 'used' 'again' 'first' 'today' 'string'
+        (1 row)
+
+ +

Notice the results string has each unique word ('first' and + 'string' only appear once in the tsvector value). Which of + course makes sense if you are searching the full text ... you + only need to know each unique word in the text.

+ +

Those examples were just casting a text field to that of + type tsvector. Lets check out one of the new functions created + by the tsearch2 module.

+ +

The function to_tsvector has 3 possible signatures:

+
+        to_tsvector(oid, text);
+        to_tsvector(text, text);
+        to_tsvector(text);
+
+ +

We will use the second method using two text fields. The + overloaded methods provide us with a way to specifiy the way + the searchable text is broken up into words (Stemming process). + Right now we will specify the 'default' configuration. See the + section on TSEARCH2 CONFIGURATION to learn more about this.

+
+        SELECT to_tsvector('default',
+                           'Our first string used today first string');
+                        to_tsvector
+        --------------------------------------------
+         'use':4 'first':2,6 'today':5 'string':3,7
+        (1 row)
+
+ +

The result returned from this function is of type tsvector. + The results came about by this reasoning: All of the words in + the text passed in are stemmed, or not used because they are + stop words defined in our configuration. Each lower case + morphed word is returned with all of the positons in the + text.

+ +

In this case the word "Our" is a stop word in the default + configuration. That means it will not be included in the + result. The word "first" is found at positions 2 and 6 + (although "Our" is a stop word, it's position is maintained). + The word(s) positioning is maintained exactly as in the + original string. The word "used" is morphed to the word "use" + based on the default configuration for word stemming, and is + found at position 4. The rest of the results follow the same + logic. Just a reminder again ... the order of the 'word' + position in the output is not in any kind of order. (ie 'use':4 + appears first)

+ +

If you want to view the output of the tsvector fields + without their positions, you can do so with the function + "strip(tsvector)".

+
+        SELECT strip(to_tsvector('default',
+                     'Our first string used today first string'));
+                    strip
+        --------------------------------
+         'use' 'first' 'today' 'string'
+
+ +

If you wish to know the number of unique words returned in + the tsvector you can do so by using the function + "length(tsvector)"

+
+        SELECT length(to_tsvector('default',
+                      'Our first string used today first string'));
+         length
+        --------
+              4
+        (1 row)
+
+ +

Lets take a look at the function to_tsquery. It also has 3 + signatures which follow the same rational as the to_tsvector + function:

+
+        to_tsquery(oid, text);
+        to_tsquery(text, text);
+        to_tsquery(text);
+
+ +

Lets try using the function with a single word :

+
+        SELECT to_tsquery('default', 'word');
+         to_tsquery
+        -----------
+         'word'
+         (1 row)
+
+ +

I call the function the same way I would a to_tsvector + function, specifying the 'default' configuration for morphing, + and the result is the stemmed output 'word'.

+ +

Lets attempt to use the function with a string of multiple + words:

+
+        SELECT to_tsquery('default', 'this is many words');
+        ERROR:  Syntax error
+
+ +

The function can not accept a space separated string. The + intention of the to_tsquery function is to return a type of + "tsquery" used for searching a tsvector field. What we need to + do is search for one to many words with some kind of logic (for + now simple boolean).

+
+        SELECT to_tsquery('default', 'searching|sentence');
+              to_tsquery
+        ----------------------
+         'search' | 'sentenc'
+        (1 row)
+
+ +

Notice that the words are separated by the boolean logic + "OR", the text could contain boolean operators &,|,!,() + with their usual meaning.

+ +

You can not use words defined as being a stop word in your + configuration. The function will not fail ... you will just get + no result, and a NOTICE like this:

+
+        SELECT to_tsquery('default', 'a|is&not|!the');
+        NOTICE:  Query contains only stopword(s)
+                 or doesn't contain lexem(s), ignored
+         to_tsquery
+        -----------
+        (1 row)
+
+ +

That is a beginning to using the types, and functions + defined in the tsearch2 module. There are numerous more + functions that I have not touched on. You can read through the + tsearch2.sql file built when compiling to get more familiar + with what is included.

+ +

INDEXING FIELDS IN A TABLE

+ +

The next stage is to add a full text index to an existing + table. In this example we already have a table defined as + follows:

+
+        CREATE TABLE tblMessages
+        (
+                intIndex        int4,
+                strTopic        varchar(100),
+                strMessage      text
+        );
+
+ +

We are assuming there are several rows with some kind of + data in them. Any data will do, just do several inserts with + test strings for a topic, and a message. here is some test data + I inserted. (yes I know it's completely useless stuff ;-) but + it will serve our purpose right now).

+
+        INSERT INTO tblMessages
+               VALUES ('1', 'Testing Topic', 'Testing message data input');
+        INSERT INTO tblMessages
+               VALUES ('2', 'Movie', 'Breakfast at Tiffany\'s');
+        INSERT INTO tblMessages
+               VALUES ('3', 'Famous Author', 'Stephen King');
+        INSERT INTO tblMessages
+               VALUES ('4', 'Political Topic',
+                            'Nelson Mandella is released from prison');
+        INSERT INTO tblMessages
+               VALUES ('5', 'Nursery rhyme phrase',
+                            'Little jack horner sat in a corner');
+        INSERT INTO tblMessages
+               VALUES ('6', 'Gettysburg address quotation',
+                            'Four score and seven years ago'
+                            ' our fathers brought forth on this'
+                            ' continent a new nation, conceived in'
+                            ' liberty and dedicated to the proposition'
+                            ' that all men are created equal');
+        INSERT INTO tblMessages
+               VALUES ('7', 'Classic Rock Bands',
+                            'Led Zeppelin Grateful Dead and The Sex Pistols');
+        INSERT INTO tblMessages
+               VALUES ('8', 'My birth address',
+                            '18 Sommervile road, Regina, Saskatchewan');
+        INSERT INTO tblMessages
+               VALUES ('9', 'Joke', 'knock knock : who\'s there?'
+                                    ' I will not finish this joke');
+        INSERT INTO tblMessages
+               VALUES ('10', 'Computer information',
+                             'My computer is a pentium III 400 mHz'
+                             ' with 192 megabytes of RAM');
+
+ +

The next stage is to create a special text index which we + will use for FTI, so we can search our table of messages for + words or a phrase. We do this using the SQL command:

+
+        ALTER TABLE tblMessages ADD idxFTI tsvector;
+
+ +

Note that unlike traditional indexes, this is actually a new + field in the same table, which is then used (through the magic + of the tsearch2 operators and functions) by a special index we + will create in a moment.

+ +

The general rule for the initial insertion of data will + follow four steps:

+
+    1. update table
+    2. vacuum full analyze
+    3. create index
+    4. vacuum full analyze
+
+ +

The data can be updated into the table, the vacuum full + analyze will reclaim unused space. The index can be created on + the table after the data has been inserted. Having the index + created prior to the update will slow down the process. It can + be done in that manner, this way is just more efficient. After + the index has been created on the table, vacuum full analyze is + run again to update postgres's statistics (ie having the index + take effect).

+
+        UPDATE tblMessages SET idxFTI=to_tsvector('default', strMessage);
+        VACUUM FULL ANALYZE;
+
+ +

Note that this only inserts the field strMessage as a + tsvector, so if you want to also add strTopic to the + information stored, you should instead do the following, which + effectively concatenates the two fields into one before being + inserted into the table:

+
+        UPDATE tblMessages
+            SET idxFTI=to_tsvector('default',coalesce(strTopic,'') ||' '|| coalesce(strMessage,''));
+        VACUUM FULL ANALYZE;
+
+ +

Using the coalesce function makes sure this + concatenation also works with NULL fields.

+ +

We need to create the index on the column idxFTI. Keep in + mind that the database will update the index when some action + is taken. In this case we _need_ the index (The whole point of + Full Text INDEXINGi ;-)), so don't worry about any indexing + overhead. We will create an index based on the gist function. + GiST is an index structure for Generalized Search Tree.

+
+        CREATE INDEX idxFTI_idx ON tblMessages USING gist(idxFTI);
+        VACUUM FULL ANALYZE;
+
+ +

After you have converted all of your data and indexed the + column, you can select some rows to see what actually happened. + I will not display output here but you can play around + yourselves and see what happened.

+ +

The last thing to do is set up a trigger so every time a row + in this table is changed, the text index is automatically + updated. This is easily done using:

+
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, strMessage);
+
+ +

Or if you are indexing both strMessage and strTopic you + should instead do:

+
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE
+                tsearch2(idxFTI, strTopic, strMessage);
+
+ +

Before you ask, the tsearch2 function accepts multiple + fields as arguments so there is no need to concatenate the two + into one like we did before.

+ +

If you want to do something specific with columns, you may + write your very own trigger function using plpgsql or other + procedural languages (but not SQL, unfortunately) and use it + instead of tsearch2 trigger.

+ +

You could however call other stored procedures from within + the tsearch2 function. Lets say we want to create a function to + remove certain characters (like the @ symbol from all + text).

+
+       CREATE FUNCTION dropatsymbol(text) 
+                     RETURNS text AS 'select replace($1, \'@\', \' \');' LANGUAGE SQL;
+
+ +

Now we can use this function within the tsearch2 function on + the trigger.

+
+      DROP TRIGGER tsvectorupdate ON tblmessages;
+        CREATE TRIGGER tsvectorupdate BEFORE UPDATE OR INSERT ON tblMessages
+            FOR EACH ROW EXECUTE PROCEDURE tsearch2(idxFTI, dropatsymbol, strMessage);
+        INSERT INTO tblmessages VALUES (69, 'Attempt for dropatsymbol', 'Test@test.com');
+
+ +

If at this point you receive an error stating: ERROR: Can't + find tsearch config by locale

+ +

Do not worry. You have done nothing wrong. And tsearch2 is + not broken. All that has happened here is that the + configuration is setup to use a configuration based on the + locale of the server. All you have to do is change your default + configuration, or add a new one for your specific locale. See + the section on TSEARCH2 CONFIGURATION.

+
+   SELECT * FROM tblmessages WHERE intindex = 69;
+
+         intindex |         strtopic         |  strmessage   |        idxfti
+        ----------+--------------------------+---------------+-----------------------   
+                69 | Attempt for dropatsymbol | Test@test.com | 'test':1 'test.com':2
+        (1 row)
+
Notice that the string content was passed throught the stored +procedure dropatsymbol. The '@' character was replaced with a +single space ... and the output from the procedure was then stored +in the tsvector column. + +

This could be useful for removing other characters from + indexed text, or any kind of preprocessing needed to be done on + the text prior to insertion into the index.

+ +

QUERYING A TABLE

+ +

There are some examples in the README.tsearch2 file for + querying a table. One major difference between tsearch and + tsearch2 is the operator ## is no longer available. Only the + operator @@ is defined, using the types tsvector on one side + and tsquery on the other side.

+ +

Lets search the indexed data for the word "Test". I indexed + based on the the concatenation of the strTopic, and the + strMessage:

+
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'test'::tsquery;
+         intindex |   strtopic
+        ----------+---------------
+                1 | Testing Topic
+        (1 row)
+
+ +

The only result that matched was the row with a topic + "Testing Topic". Notice that the word I search for was all + lowercase. Let's see what happens when I query for uppercase + "Test".

+
+        SELECT intindex, strtopic FROM tblmessages
+                                  WHERE idxfti @@ 'Test'::tsquery;
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+ +

We get zero rows returned. The reason is because when the + text was inserted, it was morphed to my default configuration + (because of the call to to_tsvector in the UPDATE statement). + If there was no morphing done, and the tsvector field(s) + contained the word 'Text', a match would have been found.

+ +

Most likely the best way to query the field is to use the + to_tsquery function on the right hand side of the @@ operator + like this:

+
+        SELECT intindex, strtopic FROM tblmessages
+               WHERE idxfti @@ to_tsquery('default', 'Test | Zeppelin');
+         intindex |      strtopic
+        ----------+--------------------
+                1 | Testing Topic
+                7 | Classic Rock Bands
+        (2 rows)
+
+ +

That query searched for all instances of "Test" OR + "Zeppelin". It returned two rows: the "Testing Topic" row, and + the "Classic Rock Bands" row. The to_tsquery function performed + the correct morphology upon the parameters, and searched the + tsvector field appropriately.

+ +

The last example here relates to searching for a phrase, for + example "minority report". This poses a problem with regard to + tsearch2, as it doesn't index phrases, only words. But there is + a way around which doesn't appear to have a significant impact + on query time, and that is to use a query such as the + following:

+
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*men are created equal.*';
+         intindex |           strtopic
+        ----------+------------------------------
+                6 | Gettysburg address quotation
+        (1 row)
+        SELECT intindex, strTopic FROM tblmessages
+                WHERE idxfti @@ to_tsquery('default', 'gettysburg & address')
+                AND strMessage ~* '.*something that does not exist.*';
+         intindex | strtopic
+        ----------+----------
+        (0 rows)
+
+ +

Of course if your indexing both strTopic and strMessage, and + want to search for this phrase on both, then you will have to + get out the brackets and extend this query a little more.

+ +

TSEARCH2 CONFIGURATION

+ +

Some words such as "and", "the", and "who" are automatically + not indexed, since they belong to a pre-existing dictionary of + "Stop Words" which tsearch2 does not perform indexing on. If + someone needs to search for "The Who" in your database, they + are going to have a tough time coming up with any results, + since both are ignored in the indexes. But there is a + solution.

+ +

Lets say we want to add a word into the stop word list for + english stemming. We could edit the file + :'/usr/local/pgsql/share/english.stop' and add a word to the + list. I edited mine to exclude my name from indexing:

+
+    - Edit /usr/local/pgsql/share/english.stop
+    - Add 'andy' to the list
+    - Save the file.
+
+ +

When you connect to the database, the dict_init procedure is + run during initialization. And in my configuration it will read + the stop words from the file I just edited. If you were + connected to the DB while editing the stop words, you will need + to end the current session and re-connect. When you re-connect + to the database, 'andy' is no longer indexed:

+
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+        (1 row)
+
+ +

Originally I would get the result :

+
+        SELECT to_tsvector('default', 'Andy');
+         to_tsvector
+        ------------
+         'andi':1
+        (1 row)
+
+ +

But since I added it as a stop word, it would be ingnored on + the indexing. The stop word added was used in the dictionary + "en_stem". If I were to use a different configuration such as + 'simple', the results would be different. There are no stop + words for the simple dictionary. It will just convert to lower + case, and index every unique word.

+
+        SELECT to_tsvector('simple', 'Andy andy The the in out');
+                     to_tsvector
+        -------------------------------------
+         'in':5 'out':6 'the':3,4 'andy':1,2
+        (1 row)
+
+ +

All this talk about which configuration to use is leading us + into the actual configuration of tsearch2. In the examples in + this document the configuration has always been specified when + using the tsearch2 functions:

+
+        SELECT to_tsvector('default', 'Testing the default config');
+        SELECT to_tsvector('simple', 'Example of simple Config');
+
+ +

The pg_ts_cfg table holds each configuration you can use + with the tsearch2 functions. As you can see the ts_name column + contains both the 'default' configurations based on the 'C' + locale. And the 'simple' configuration which is not based on + any locale.

+
+        SELECT * from pg_ts_cfg;
+             ts_name     | prs_name |    locale
+        -----------------+----------+--------------
+         default         | default  | C
+         default_russian | default  | ru_RU.KOI8-R
+         simple          | default  |
+        (3 rows)
+
+ +

Each row in the pg_ts_cfg table contains the name of the + tsearch2 configuration, the name of the parser to use, and the + locale mapped to the configuration. There is only one parser to + choose from the table pg_ts_parser called 'default'. More + parsers could be written, but for our needs we will use the + default.

+ +

There are 3 configurations installed by tsearch2 initially. + If your locale is set to 'en_US' for example (like my laptop), + then as you can see there is currently no dictionary configured + to use with that locale. You can either set up a new + configuration or just use one that already exists. If I do not + specify which configuration to use in the to_tsvector function, + I receive the following error.

+
+        SELECT to_tsvector('learning tsearch is like going to school');
+        ERROR:  Can't find tsearch config by locale
+
+ +

We will create a new configuration for use with the server + encoding 'en_US'. The first step is to add a new configuration + into the pg_ts_cfg table. We will call the configuration + 'default_english', with the default parser and use the locale + 'en_US'.

+
+        INSERT INTO pg_ts_cfg (ts_name, prs_name, locale)
+               VALUES ('default_english', 'default', 'en_US');
+
+ +

We have only declared that there is a configuration called + 'default_english'. We need to set the configuration of how + 'default_english' will work. The next step is creating a new + dictionary to use. The configuration of the dictionary is + completlely different in tsearch2. In the prior versions to + make changes, you would have to re-compile your changes into + the tsearch.so. All of the configuration has now been moved + into the system tables created by executing the SQL code from + tsearch2.sql

+ +

Lets take a first look at the pg_ts_dict table

+
+        ftstest=# \d pg_ts_dict
+                Table "public.pg_ts_dict"
+         Column      |  Type   | Modifiers
+        -----------------+---------+-----------
+         dict_name       | text    | not null
+         dict_init       | oid     |
+         dict_initoption | text    |
+         dict_lemmatize  | oid     | not null
+         dict_comment    | text    |
+        Indexes: pg_ts_dict_idx unique btree (dict_name)
+
+ +

The dict_name column is the name of the dictionary, for + example 'simple', 'en_stem' or 'ru_stem'. The dict_init column + is an OID of a stored procedure to run for initialization of + that dictionary, for example 'snb_en_init' or 'snb_ru_init'. + The dict_init option is used for options passed to the init + function for the stored procedure. In the cases of 'en_stem' or + 'ru_stem' it is a path to a stopword file for that dictionary, + for example '/usr/local/pgsql/share/english.stop'. This is + however dictated by the dictionary. ISpell dictionaries may + require different options. The dict_lemmatize column is another + OID of a stored procedure to the function used to lemmitize, + for example 'snb_lemmatize'. The dict_comment column is just a + comment.

+ +

Next we will configure the use of a new dictionary based on + ISpell. We will assume you have ISpell installed on you + machine. (in /usr/local/lib)

+ +

First lets register the dictionary(ies) to use from ISpell. + We will use the english dictionary from ISpell. We insert the + paths to the relevant ISpell dictionary (*.hash) and affixes + (*.aff) files. There seems to be some question as to which + ISpell files are to be used. I installed ISpell from the latest + sources on my computer. The installation installed the + dictionary files with an extension of *.hash. Some + installations install with an extension of *.dict As far as I + know the two extensions are equivilant. So *.hash == + *.dict.

+ +

We will also continue to use the english word stop file that + was installed for the en_stem dictionary. You could use a + different one if you like. The ISpell configuration is based on + the "ispell_template" dictionary installed by default with + tsearch2. We will use the OIDs to the stored procedures from + the row where the dict_name = 'ispell_template'.

+
+        INSERT INTO pg_ts_dict
+               (SELECT 'en_ispell',
+                       dict_init,
+                       'DictFile="/usr/local/lib/english.hash",'
+                       'AffFile="/usr/local/lib/english.aff",'
+                       'StopFile="/usr/local/pgsql/share/english.stop"',
+                       dict_lexize
+                FROM pg_ts_dict
+                WHERE dict_name = 'ispell_template');
+
+ +

Next we need to set up the configuration for mapping the + dictionay use to the lexxem parsings. This will be done by + altering the pg_ts_cfgmap table. We will insert several rows, + specifying to using the new dictionary we installed and + configured for use within tsearch2. There are several type of + lexims we would be concerned with forcing the use of the ISpell + dictionary.

+
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lhword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lpart_hword', '{en_ispell,en_stem}');
+        INSERT INTO pg_ts_cfgmap (ts_name, tok_alias, dict_name)
+               VALUES ('default_english', 'lword', '{en_ispell,en_stem}');
+
+ +

We have just inserted 3 records to the configuration + mapping, specifying that the lexem types for "lhword, + lpart_hword and lword" are to be stemmed using the 'en_ispell' + dictionary we added into pg_ts_dict, when using the + configuration ' default_english' which we added to + pg_ts_cfg.

+ +

There are several other lexem types used that we do not need + to specify as using the ISpell dictionary. We can simply insert + values using the 'simple' stemming process dictionary.

+
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'url', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'host', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'sfloat', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uri', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'int', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'float', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'email', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'word', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlpart_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'part_hword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'nlhword', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'file', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'uint', '{simple}');
+        INSERT INTO pg_ts_cfgmap
+               VALUES ('default_english', 'version', '{simple}');
+
+ +

Our addition of a configuration for 'default_english' is now + complete. We have successfully created a new tsearch2 + configuration. At the same time we have also set the new + configuration to be our default for en_US locale.

+
+        SELECT to_tsvector('default_english',
+                           'learning tsearch is like going to school');
+                           to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        SELECT to_tsvector('learning tsearch is like going to school');
+                            to_tsvector
+        --------------------------------------------------
+         'go':5 'like':4 'learn':1 'school':7 'tsearch':2
+        (1 row)
+
+ +

In the case that you already have a configuration set for + the locale, and you are changing it to your new dictionary + configuration. You will have to set the old locale to NULL. If + we are using the 'C' locale then we would do this:

+
+        UPDATE pg_ts_cfg SET locale=NULL WHERE locale = 'C';
+
+ +

That about wraps up the configuration of tsearch2. There is + much more you can do with the tables provided. This was just an + introduction to get things working rather quickly.

+ +

ADDING NEW DICTIONARIES TO TSEARCH2

+ +

To aid in the addition of new dictionaries to the tsearch2 + module you can use another additional module in combination + with tsearch2. The gendict module is included into tsearch2 + distribution and is available from gendict/ subdirectory.

+ +

I will not go into detail about installation and + instructions on how to use gendict to it's fullest extent right + now. You can read the README.gendict ... it has all of the + instructions and information you will need.

+ +

BACKING UP AND RESTORING DATABASES THAT FEATURE + TSEARCH2

+ +

Believe it or not, this isn't as straight forward as it + should be, and you will have problems trying to backup and + restore any database which uses tsearch2 unless you take the + steps shown below. And before you ask using pg_dumpall will + result in failure every time. These took a lot of trial and + error to get working, but the process as laid down below has + been used a dozen times now in live production environments so + it should work fine.

+ +

HOWEVER never rely on anyone elses instructions to backup + and restore a database system, always develop and understand + your own methodology, and test it numerous times before you + need to do it for real.

+ +

To Backup a PostgreSQL database that uses the tsearch2 + module:

+ +

1) Backup any global database objects such as users and + groups (this step is usually only necessary when you will be + restoring to a virgin system)

+
+        pg_dumpall -g > GLOBALobjects.sql
+
+ +

2) Backup the full database schema using pg_dump

+
+        pg_dump -s DATABASE > DATABASEschema.sql
+
+ +

3) Backup the full database using pg_dump

+
+        pg_dump -Fc DATABASE > DATABASEdata.tar
+
+ +

To Restore a PostgreSQL database that uses the tsearch2 + module:

+ +

1) Create the blank database

+
+        createdb DATABASE
+
+ +

2) Restore any global database objects such as users and + groups (this step is usually only necessary when you will be + restoring to a virgin system)

+
+        psql DATABASE < GLOBALobjects.sql
+
+ +

3) Create the tsearch2 objects, functions and operators

+
+        psql DATABASE < tsearch2.sql
+
+ +

4) Edit the backed up database schema and delete all SQL + commands which create tsearch2 related functions, operators and + data types, BUT NOT fields in table definitions that specify + tsvector types. If your not sure what these are, they are the + ones listed in tsearch2.sql. Then restore the edited schema to + the database

+
+        psql DATABASE < DATABASEschema.sql
+
+ +

5) Restore the data for the database

+
+        pg_restore -N -a -d DATABASE DATABASEdata.tar
+
+ +

If you get any errors in step 4, it will most likely be + because you forgot to remove an object that was created in + tsearch2.sql. Any errors in step 5 will mean the database + schema was probably restored wrongly.

+
+ + diff --git a/contrib/tsearch2/docs/tsearch2-guide.html b/contrib/tsearch2/docs/tsearch2-guide.html new file mode 100644 index 0000000000..2529480a53 --- /dev/null +++ b/contrib/tsearch2/docs/tsearch2-guide.html @@ -0,0 +1,1057 @@ + + + + +tsearch2 guide + + +

The tsearch2 Guide

+ +

+Brandon Craig Rhodes
30 June 2003 +

+This Guide introduces the reader to the PostgreSQL tsearch2 module, +version 2. +More formal descriptions of the module's types and functions +are provided in the tsearch2 Reference, +which is a companion to this document. +You can retrieve a beta copy of the tsearch2 module from the +GiST for PostgreSQL +page — look under the section entitled Development History +for the current version. +

+First we will examine the tsvector and tsquery types +and how they are used to search documents; +next, we will use them to build a simple search engine in SQL; +and finally, we will study the internals of document conversion +and how you might tune the internals to accommodate various searching needs. +

+Once you have tsearch2 working with PostgreSQL, +you should be able to run the examples here exactly as they are typed. +

+


+

Table of Contents

+
+Vectors and Queries
+A Simple Search Engine
+Ranking and Position Weights
+Casting Vectors and Queries
+Parsing and Lexing
+
+ +
+ +

Vectors and Queries

+ +
+This section introduces +the two data types upon which tsearch2 search engines are based, +and illustrates their interaction using the simplest possible case. +The complex examples we present later on +are merely variations and elaborations of this basic mechanism. +
+

+The tsearch2 module allows you to index documents by the words they contain, +and then perform very efficient searches +for documents that contain a given combination of words. +Preparing your document index involves two steps: +

    +
  • Making a list of the words each document contains. + You must reduce each document to a tsvector + which lists each word that appears in the document. + This process offers many options, + because there is no requirement + that you must copy words into the vector + exactly as they appear in the document. + For example, + many developers omit frequent and content-free stop words + like the to reduce the size of their index; + others reduce different forms of the same word + (forked, forking, forks) + to a common form (fork) + to make search results independent of tense and case. + Because words are very often stored in a modified form, + we use the special term lexemes + for the word forms we actually store in the vector. +
  • Creating an index of the documents by lexeme. + This is managed automatically by tsearch2 + when you creat a gist() index + on the tsvector column of a table, + which implements a form of the Berkeley + Generalized Search Tree. +
+Once your documents are indexed, +performing a search involves: +
    +
  • Reducing the search terms to lexemes. + You must express each search you want to perform + as a tsquery specifying a boolean combination of lexemes. + Note that tsearch2 only finds exact matches + between the lexemes in your query and the ones in each vector — + even capitalization counts as a difference + (which is why all lexemes are usually kept lowercase). + So you must process search words the same way you processed document words; + if forking became fork in the document's tsvector, + then the search term forking must also become fork + or the search will not find the document. +
  • Retrieving the documents that match the query. + Running a SELECT ... WHERE + query @@ vector + on the table with the vector column + will return the documents that match your query. +
  • Presenting your results. + This final stage offers as many options + as turning documents into vectors. + You can order documents by how well they matched the search terms; + create a headline for each document + showing some of the phrases in which it uses the search terms; + and restrict the number of results retrieved. + You will of course want some way to identify each document, + so the user can ask for the full text of the ones he wants to read. +
+And beyond deciding upon rules for turning documents into vectors +and for presenting search results to users, +you have to decide where to perform these operations — +whether one database server +will parse documents, perform searches, and prepare search results, +or whether to spread the load of these operations across several machines. +These are complicated design issues +which we will explore later; +in this section and the next, +we will illustrate what can be accomplished +using a single database server. +

+The default tsearch2 configuration, +which we will learn more about later, +provides a good example of a process for reducing documents to vectors: + +

+=# SELECT set_curcfg('default')
+=# SELECT to_tsvector('The air smells of sea water.')
+             to_tsvector             
+-------------------------------------
+ 'air':2 'sea':5 'smell':3 'water':6
+(1 row)
+
+ +Note the complex relationship between this document and its vector. +The vector lists only words from the document — +spaces and punctuation have disappeared. +Common words like the and of have been eliminated. +The -s that makes smells a plural has been removed, +leaving a lexeme that represents the word in its simplest form. +And finally, +though the vector remembers the positions in which each word appeared, +it does not store the lexemes in that order. +

+Keeping word positions in your vectors is optional, by the way. +The positions are necessary for the tsearch2 ranking functions, +which you can use to prioritize documents +based on how often each document uses the search terms +and whether they appear in close proximity. +But if you do not perform ranking, +or use your own process that ignores the word positions stored in the vector, +then you can save space by stripping them from your vectors: + +

+=# SELECT strip(to_tsvector('The air smells of sea water.'))
+            strip            
+-----------------------------
+ 'air' 'sea' 'smell' 'water'
+(1 row)
+
+ +Now that we have a procedure for creating vectors, +we can build an indexed table of vectors very simply: + +
+=# CREATE TABLE vectors ( vector tsvector )
+=# CREATE INDEX vector_index ON vectors USING gist(vector)
+=# INSERT INTO vectors VALUES (to_tsvector('The path forks here'))
+=# INSERT INTO vectors VALUES (to_tsvector('A crawl leads west'))
+=# INSERT INTO vectors VALUES (to_tsvector('The left fork leads northeast'))
+=# SELECT * FROM vectors
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'lead':3 'west':4 'crawl':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(3 rows)
+
+ +Now we can search this collection of document vectors +using the @@ operator and a tsquery +that specifies the combination of lexemes we are looking for. +Note that while vectors simply list lexemes, +queries always combine them with the operators +‘&’ and, +‘|’ or, +and  ‘!’ not, +plus parentheses for grouping. +Some examples of the query syntax: + + + + + +
‘find documents with the word forks in them’
+
'forks' +
‘... with both forks and leads
+
'forks & leads' +
‘... with either forks or leads
+
'forks | leads' +
‘... with either forks or leads, + but without crawl
+
'(forks|leads) & !crawl' +
+The tsearch2 module +provides a to_tsquery() function for creating queries +that uses the same process as to_tsvector() uses +to reduce words to lexemes. +For instance, +it will remove the -s from the plurals in the last example above: + +
+=# SELECT to_tsquery('(leads|forks) & !crawl')
+           to_tsquery           
+--------------------------------
+ ( 'lead' | 'fork' ) & !'crawl'
+(1 row)
+
+ +Again, +this is critically important because the search operator @@ +only finds exact matches +between the words in a query and the words in a vector; +if the document vector lists the lexeme fork +but the query looks for the plural form forks, +the query would not match that document. +Thanks to the symmetry between our process +for producing vectors and queries, however, +the above searches return correct results: + +
+=# SELECT * FROM vectors WHERE vector @@ to_tsquery('(leads|forks) & !crawl')
+                  vector                  
+------------------------------------------
+ 'fork':3 'path':2
+ 'fork':3 'lead':4 'left':2 'northeast':5
+(2 rows)
+
+ +You may want to try the other queries shown above, +and perhaps invent some of your own. +

+You should not include stop words in a query, +since you cannot search for words you have discarded. +If you throw out the word the when building vectors, for example, +your index will obviously not know which documents included it. +The to_tsquery() function will automatically detect this +and give you an error to prevent this mistake: + +

+=# SELECT to_tsquery('the')
+NOTICE:  Query contains only stopword(s) or doesn't contain lexem(s), ignored
+ to_tsquery 
+------------
+ 
+(1 row)
+
+ +But if you every build vectors and queries using your own routines, +a possibility we will discuss later, +then you will need to enforce this rule yourself. + +
+Now that you understand how vectors and queries work together, +you are prepared to tackle many additional topics: +how to distribute searching across many servers; +how to customize the process +by which tsearch2 turns documents and queries into lexemes, +or use a process of your own; +and how to sort and display search results to your users. +But before discussing these detailed questions, +we will build a simple search engine +to see how easily its basic features work together. +
+ +

A Simple Search Engine

+ +
+In this section we build a simple search engine out of SQL functions +that use the vector and query types described in the previous section. +While this example is simpler +than a search engine that has to interface with the outside world, +it will illustrate the basic principles of building a search engine, +and better prepare you for developing your own. +
+Building a search engine involves only a few improvements +upon the rudimentary vector searches described in the last section. +
    +
  • Because the user wants to read documents, not vectors, + you must provide some way + for the full text of each document to be accessed — + either by storing the entire text of each document in the database, + or storing an identifier + like a URL, file name, or document routing number + that lets you fetch the document from other storage. +
  • You can make it easier for user interface code to refer to each document + by providing a unique identifier for each document, + perhaps with a SERIAL column. +
  • Search results should be ordered by relevance. + If you leave word positions in your vectors, + you can either have PostgreSQL ORDER your results + BY a ranking function, + or you can fetch the vectors yourself and perform your own sort. + If you choose to ignore word positions or strip them from your vectors, + you will have to determine relevance yourself, + using either the full text of the document + or other information about each document you may possess. +
  • For each document returned by a search, + you will usually want to display a summary called a headline + that shows short excerpts + illustrating how the document uses the query words. + Headlines are usually generated from the full text of the document, + not from position information in the tsvector, + since excerpts lacking stop words, punctuation, and suffixes + would not be comprehensible. + If you store the full text of each document in the database, + headlines can be generated very simply by a tsearch2 function. + If you store your documents elsewhere, + then you will either have to transmit each document to the database + every time you want to run the headline function on it, + or use your own headline code outside of the database. +
+

+We can easily construct a simple search engine +that accomplishes these goals. +First we build a table that, for each document, +stores a unique identifier, the full text of the document, +and its tsvector: + +

+=# CREATE TABLE docs ( id SERIAL, doc TEXT, vector tsvector )
+=# CREATE INDEX docs_index ON docs USING gist(vector);
+
+ +Note that although searches will still work +on tables where you have neglected +to create a gist() index over your vectors, +they will run much more slowly +since they will have to compare the query +against every document vector in the table. +

+Because the table we have created +stores each document in two different ways — +both as text and as a vector — +our INSERT statements must provide the document in both forms. +While more advanced PostgreSQL programmers +might accomplish this with a database trigger or rule, +for this simple example we will use a small SQL function: + +

+=# CREATE FUNCTION insdoc(text) RETURNS void LANGUAGE sql AS
+  'INSERT INTO docs (doc, vector) VALUES ($1, to_tsvector($1));'
+
+ +Now, by calling insdoc() several times, +we can populate our table with documents: + +
+=# SELECT insdoc('A low crawl over cobbles leads inward to the west.')
+=# SELECT insdoc('The canyon runs into a mass of boulders -- dead end.')
+=# SELECT insdoc('You are crawling over cobbles in a low passage.')
+=# SELECT insdoc('Cavernous passages lead east, north, and south.')
+=# SELECT insdoc('To the east a low wide crawl slants up.')
+=# SELECT insdoc('You are in the south side chamber.')
+=# SELECT insdoc('The passage here is blocked by a recent cave-in.')
+=# SELECT insdoc('You are in a splendid chamber thirty feet high.')
+
+ +Now we can build a search function. +Its SELECT statement is based upon +the same @@ operation illustrated in the previous section. +But instead of returning matching vectors, +we return for each document +its SERIAL identifier, so the user can retrieve it later; +a headline that illustrates its use of the search terms; +and a ranking with which we also order the results. +Our search operation can be coded as a single SELECT statement +returning its own kind of table row, +which we call a finddoc_t: + +
+=# CREATE TYPE finddoc_t AS (id INTEGER, headline TEXT, rank REAL)
+=# CREATE FUNCTION finddoc(text) RETURNS SETOF finddoc_t LANGUAGE sql AS '
+   SELECT id, headline(doc, q), rank(vector, q)
+     FROM docs, to_tsquery($1) AS q
+     WHERE vector @@ q ORDER BY rank(vector, q) DESC'
+
+ +This function is a rather satisfactory search engine. +Here is one example search, +after which the user fetches the top-ranking document itself; +with similar commands you can try queries of your own: + +
+=# SELECT * FROM finddoc('passage|crawl')
+ id |                       headline                        | rank 
+----+-------------------------------------------------------+------
+  3 | <b>crawling</b> over cobbles in a low <b>passage</b>. | 0.19
+  1 | <b>crawl</b> over cobbles leads inward to the west.   |  0.1
+  4 | <b>passages</b> lead east, north, and south.          |  0.1
+  5 | <b>crawl</b> slants up.                               |  0.1
+  7 | <b>passage</b> here is blocked by a recent  cave-in.  |  0.1
+(5 rows)
+=# SELECT doc FROM docs WHERE id = 3
+                       doc                       
+-------------------------------------------------
+ You are crawling over cobbles in a low passage.
+(1 row)
+
+ +While by default the headline() function +surrounds matching words with <b> and </b> +in order to distinguish them from the surrounding text, +you can provide options that change its behavior; +consult the tsearch2 Reference for more details about +Headline Functions. +

+Though a search may match hundreds or thousands of documents, +you will usually present only ten or twenty results to the user at a time. +This can be most easily accomplished +by limiting your query with a LIMIT +and an OFFSET clause — +to display results ten at a time, for example, +your would generate your first page of results +with LIMIT 10 OFFSET 0, +your second page +with LIMIT 10 OFFSET 10, +your third page +with LIMIT 10 OFFSET 20, +and so forth. +There are two problems with this approach, however. +

+The first problem is the strain of running the query over again +for every page of results the user views. +For small document collections or lightly loaded servers, +this may not be a problem; +but the impact can be high +when a search must repeatedly rank and sort +the same ten thousand results +on an already busy server. +So instead of selecting only one page of results, +you will probably use LIMIT and OFFSET +to return a few dozen or few hundred results, +which you can cache and display to the user one page at a time. +Whether a result cache rewards your effort +will depend principally on the behavior of your users — +how often they even view the second page of results, for instance. +

+The second issue solved by caching involves consistency. +If the database is changing while the user browses their results, +then documents might appear and disappear as they page through them. +In some cases the user might even miss a particular result — +perhaps the one they were looking for — +if, say, its rank improves from 31th to 30th +after they load results 21–30 but before they view results 31–40. +While many databases are static or infrequently updated, +and will not present this problem, +users searching very dymanic document collections +might benefit from the stable results that caches yield. + +

+Having seen the features of a search engine +implemented entirely within the database, +we will learn about some specific tsearch2 features. +First we will look in more detail at document ranking. +
+ +

Ranking and Position Weights

+ +
+When we built our simple search engine, +we used the rank() function to order our results. +Here we describe tsearch2 ranking in more detail. +
+ +There are two functions with which tsearch2 can rank search results. +They both use the lexeme positions listed in the tsvector, +so you cannot rank vectors +from which these have been removed with strip(). +The rank() function existed in older versions of OpenFTS, +and has the feature that you can assign different weights +to words from different sections of your document. +The rank_cd() uses a recent technique for weighting results +but does not allow different weight to be given +to different sections of your document. +

+Both ranking functions allow you to specify, +as an optional last argument, +whether you want their results normalized — +whether the rank returned should be adjusted for document length. +Specifying a last argument of 0 (zero) makes no adjustment; +1 (one) divides the document rank +by the logarithm of the document length; +and 2 divides it by the plain length. +In all of these examples we omit this optional argument, +which is the same as specifying zero — +we are making no adjustment for document length. +

+The rank_cd() function uses an experimental measurement +called cover density ranking that rewards documents +when they make frequent use of the search terms +that are close together in the document. +You can read about the algorithm in more detail +in Clarke et al., + “Relevance Ranking for One to Three Term Queries.” +An optional first argument allows you to tune their formula; +for details +see the section on ranking +in the Reference. +

+The rank() function offers more flexibility +because it pays attention to the weights +with which you have labelled lexeme positions. +Currently tsearch2 supports four different weight labels: +'D', the default weight; +and 'A', 'B', and 'C'. +All vectors created with to_tsvector() +assign the weight 'D' to each position, +which as the default is not displayed when you print a vector out. +

+If you want positions with weights other than 'D', +you have two options: +either you can author a vector directly through the ::tsvector +casting operation, +as described in the following section, +which lets you give each position whichever weight you want; +or you can pass a vector through the setweight() function +which sets all of its position weights to a single value. +An example of the latter: + + +

+=# SELECT vector FROM docs WHERE id = 3
+                 vector                 
+----------------------------------------
+ 'low':8 'cobbl':5 'crawl':3 'passag':9
+(1 row)
+=# SELECT setweight(vector, 'A') FROM docs WHERE id = 3
+                 setweight                  
+--------------------------------------------
+ 'low':8A 'cobbl':5A 'crawl':3A 'passag':9A
+(1 row)
+
+ + +Merely changing all of the weights in a vector is not very useful, +of course, +since this results still in all words having the same weight. +But if we parse different parts of a document separately, +giving each section its own weight, +and then concatenate the vectors of each part into a single vector, +the result can be very useful. +We can construct a simple example +in which document titles are given greater weight +that text in the body of the document: + + +
+=# CREATE TABLE tdocs ( id SERIAL, title TEXT, doc TEXT, vector tsvector )
+=# CREATE INDEX tdocs_index ON tdocs USING gist(vector);
+=# CREATE FUNCTION instdoc(text, text) RETURNS void LANGUAGE sql AS
+  'INSERT INTO tdocs (title, doc, vector)
+   VALUES ($1, $2, setweight(to_tsvector($1), ''A'') || to_tsvector($2));'
+
+ + +Now words from a document title will be weighted differently +than those in the main text +if we provide the title and body as separate arguments: + + +
+=# SELECT instdoc('Spendid Chamber',
+ 'The walls are frozen rivers of orange stone.')
+ instdoc 
+---------
+ 
+(1 row)
+=# SELECT vector FROM tdocs
+                                    vector                                    
+------------------------------------------------------------------------------
+ 'wall':4 'orang':9 'river':7 'stone':10 'frozen':6 'chamber':2A 'spendid':1A
+(1 row)
+
+ + +Note that although the necessity is unusual, +you can constrain search terms +to only match words from certain sections +by following them with a colon +and a list of the sections in which the word can occur; +by default this list is 'ABCD' +so that search terms match words from all sections. +For example, +here we search for a word both generally, +and then looking only for specific weights: + + +
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:A')
+      title      |                     doc                      
+-----------------+----------------------------------------------
+ Spendid Chamber | The walls are frozen rivers of orange stone.
+(1 row)
+=# SELECT title, doc FROM tdocs WHERE vector @@ to_tsquery('spendid:D')
+ title | doc 
+-------+-----
+(0 rows)
+
+ + + + +
+Our examples so far use tsearch2 to parse our documents into vectors. +When your application needs absolute control over vector content, +you will want to use direct type casting, +which is described in the next section. +
+ +

Casting Vectors and Queries

+ +
+While tsearch2 has powerful and flexible ways +to process documents and turn them into document vectors, +you will sometimes want to parse documents on your own +and place the results directly in vectors. +Here we show you how. +
+ +In the preceding examples, +we used the to_tsvector() function +when we needed a document's text reduced to a document vector. +We saw that the function stripped whitespace and punctuation, +eliminated common words, +and altered suffixes to reduce words to a common form. +While these operations are often desirable, +and while in the sections below +we will gain precise control over this process, +there are occasions on which +you want to avoid the changes that to_tsvector() makes to text +and specify explicitly the words that you want in your vectors. +Or you may want to create queries directly +rather than through to_tsquery(). +

+For example, +you may have already developed your own routine +for reducing your documents to searchable lexemes, +and do not want your carefully generated terms altered +by passing them through to_tsvector(). +Or you might be developing and debugging parsing routines of your own +that you are not ready to load into the database. +In either case, +you will find that direct insertion is easily accomplished +if you simply follow some simple rules. +

+Vectors are created directly +when you cast a string of whitespace separated lexemes +to the tsvector type: + + +

+=# select 'the only exit is the way you came in'::tsvector
+                     tsvector                     
+--------------------------------------------------
+ 'in' 'is' 'the' 'way' 'you' 'came' 'exit' 'only'
+(1 row)
+
+ + +Notice that the conversion interpreted the string +simply as a list of lexemes to be included in the vector. +Their order was lost, +as was the number of times each lexeme appeared. +You must keep in mind that directly creating vectors with casting +is not an alternate means of parsing; +it is a way of directly entering lexemes into a vector without parsing. +

+Queries can also be created through casting, +if you separate lexemes with boolean operators +rather than with whitespace. +When creating your own vectors and queries, +remember that the search operator @@ +finds only exact matches between query lexemes and vector lexemes +— +if they are not exactly the same string, +they will not be considered a match. +

+To include lexeme positions in your vector, +write the positions exactly the way tsearch2 displays them +when it prints vectors: +by following each lexeme with a colon +and a comma-separated list of integer positions. +If you list a lexeme more than once, +then all the positions listed for it are combined into a single list. +For example, +here are two ways of writing the same vector, +depending on whether you mention ‘the’ twice +or combine its positions into a list yourself: + + +

+=# select 'the:1 only:2 exit:3 is:4 the:5 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+=# select 'the:1,5 only:2 exit:3 is:4 way:6 you:7 came:8 in:9'::tsvector
+                              tsvector                              
+--------------------------------------------------------------------
+ 'in':9 'is':4 'the':1,5 'way':6 'you':7 'came':8 'exit':3 'only':2
+(1 row)
+
+ + +Things can get slightly tricky +if you want to include apostrophes, backslashes, or spaces +inside your lexemes +(wanting to include either of the latter would be unusual, +but they can be included if you follow the rules). +The main problem is that the apostrophe and backslash +are important both to PostgreSQL when it is interpreting a string, +and to the tsvector conversion function. +You may want to review section +1.1.2.1, +“String Constants” +in the PostgreSQL documentation before proceeding. +

+When you cast strings directly into vectors: +

    +
  • The string is interpreted as a whitespace-separated list of lexemes, + any of which can be suffixed with a colon and a list of positions. +
  • A lexeme can be quoted by preceding it with an apostrophe, + in which case it runs until the next apostrophe; + otherwise a lexeme ends with the first whitespace or colon encountered. +
  • Any character preceded by a backslash, + including whitespace, the apostrophe, the colon, and the backslash itself, + loses its normal meaning and is treated as a letter. + Backslashes are effective + both inside and outside of apostrophe-quoted lexemes. +
  • A lexeme can be suffixed with a list of positions + by appending a colon and a comma-separated list of integers, + each of which can itself be followed by a letter + to designate a position weight + (position weights are described below). +
+ +Here are some example strings, +showing the lexeme you want to insert +together with the string that the ::tsvector operator +needs to see, +and how you would type that string at the PostgreSQL prompt: + + + + + + + + + + + + +
For the lexeme... +you need the string... +which you can type as: +
nugget +nugget +'nugget' +
won't +won't +'won''t' +
pinin' +pinin' +'pinin''' +
'bout +\'bout +'\\''bout' +
white mist +white\ mist +'white\\ mist' +
or: +'white mist' +'''white mist''' +
won't budge +won\'t\ budge +'won\\''t\\ budge' +
or: +'won\'t budge' +'''won\\''t budge''' +
back\slashed +back\\slashed +'back\\\\slashed' +
+ +Remember to use the quoted quoting shown at the right +only when typing in strings as part of a PostgreSQL query. +If you are providing strings through a library +that automatically quotes them +or provides them in binary form to PostgreSQL, +then you can use the strings in the middle instead — +suitably quoted in the language you are using, of course. +

+Position weights are described below +and can be written exactly as they will be displayed +when you select a weighted vector: + +

+=# select 'weighty:1,3A trivial:2B,4'::tsvector
+           tsvector            
+-------------------------------
+ 'trivial':2B,4 'weighty':1,3A
+(1 row)
+
+ +

+Note that if you are composing SQL queries +in a scripting language like Perl or Python, +that itself considers quotes and backslashes special, +then you may have another quoting layer to deal with +on top of the two layers already shown above. +In such cases you may want to write a function +that performs the necessary quoting for you. + +

+Having seen how to create vectors of your own, +it is time to learn how the native tsearch2 parser +reduces documents to vectors. +
+ +

Parsing and Lexing

+ +
+The previous section +described how you can bypass the parser provided by tsearch2 +and populate your table of documents +with vectors of your own devising. +But for those interested in the native tsearch2 facilities, +we present here an overview of how it goes about +reducing documents to vectors. +
+ +The to_tsvector() function reduces documents to vectors +in two stages. +First, a parser breaks the input document +into short sequences of text called tokens. +Each token is usually a word, space, or piece of punctuation, +though some parsers return larger and more exotic items +like HTML tags as single tokens. +Each token returned by the parser +is either discarded +or passed to a dictionary that converts it into a lexeme. +The resulting lexemes are collected into a vector and returned. +

+The choice of which parser and dictionaries to_tsvector() should use +is controlled by your choice of configuration. +The tsearch2 module comes with several configurations, +and you can define more of your own; +in fact the creation of a new configuration is illustrated below, +in the section on position weights. +

+To learn about parsing in more detail, +we will study this example: + +

+=# select to_tsvector('default',
+     'The walls extend upward for well over 100 feet.')
+                       to_tsvector                        
+----------------------------------------------------------
+ '100':8 'feet':9 'wall':2 'well':6 'extend':3 'upward':4
+(1 row)
+
+ +Unlike the to_tsvector() calls used in the above examples, +this one specifies the 'default' configuration explicitly. +When we called to_tsvector() in earlier examples +with only one argument, +it used the current configuration, +which is chosen automatically based on your LOCALE +if that locale is mentioned in the pg_ts_cfg table +(which is shown under the first bullet in the description below). +If your locale is not listed in the table, +your attempts to use the current configuration will return: + +
+ERROR:  Can't find tsearch2 config by locale
+
+ +You can always change the current configuration manually +by calling the set_curcfg() function +described in the section on +Configurations +in the Reference. +

+Each configuration serves as an index into two different tables: +in pg_ts_cfg it determines +which parser will break our text into tokens, +and in pg_ts_cfgmap +it directs each token to a dictionary for processing. +The steps in detail are: + +

    +
  • +

    First, our text is parsed, +using the parser listed for our configuration in the pg_ts_cfg table. +We are using the 'default' configuration, +so the table tells us to use the 'default' parser: + +

    +=# SELECT * FROM pg_ts_cfg WHERE ts_name = 'default'
    + ts_name | prs_name | locale 
    +---------+----------+--------
    + default | default  | C
    +(1 row)
    +
    + +So our text will be parsed as though we had called: + +
    +=# select * from parse('default',
    +     'The walls extend upward for well over 100 feet.')
    +
    + +This breaks the text into a list of tokens +which are each labelled with an integer type: +

    +The112walls112extend112upward112for112well112over1121002212feet1.12 +

    +Each word has been assigned type 1; +each space (represented here by a diamond) and the period, type 12; +and the number one hundred, type 22. +We can retrieve the alias for each type +through the token_type function: + +

    +=# select * from token_type('default')
    +     where tokid = 1 or tokid = 12 or tokid = 22
    + tokid | alias |      descr       
    +-------+-------+------------------
    +     1 | lword | Latin word
    +    12 | blank | Space symbols
    +    22 | uint  | Unsigned integer
    +(3 rows)
    +
    + + +
  • +Next, the tokens are assigned to dictionaries +by looking up their type aliases in pg_ts_cfgmap +to determine which dictionary should process each token. +Since we are using the 'default' configuration: + +
    +=# select * from pg_ts_cfgmap where ts_name = 'default' and
    +      (tok_alias = 'lword' or tok_alias = 'blank' or tok_alias = 'uint')
    + ts_name | tok_alias | dict_name 
    +---------+-----------+-----------
    + default | lword     | {en_stem}
    + default | uint      | {simple}
    +(2 rows)
    +
    + +Since this map provides no dictionary for blank tokens, +the spaces and period are simply discarded, +leaving nine tokens, +which are then numbered by their position: +

    +The1 +walls2 +extend3 +upward4 +for5 +well6 +over7 +1008 +feet9 + +

  • +Finally, the words are reduced to lexemes by their respective dictionaries. +The 100 is submitted to the simple dictionary, +which returns tokens unaltered except for making them lowercase: + +
    +=# select lexize('simple', '100')
    + lexize 
    +--------
    + {100}
    +(1 row)
    +
    + +The other words are submitted to en_stem +which reduces each English word to a linguistic stem, +and then discards stems which belong to its list of stop words; +you can see the list of stop words +in the file whose path is in the dict_initoption field +of the pg_ts_dict table entry for en_stem. +The first three words of our text illustrate respectively +an en_stem stop word, +a word which en_stem alters by stemming, +and a word which en_stem leaves alone: + +
    +=# select lexize('en_stem', 'The')
    + lexize 
    +--------
    + {}
    +(1 row)
    +=# select lexize('en_stem', 'walls')
    + lexize 
    +--------
    + {wall}
    +(1 row)
    +=# select lexize('en_stem', 'extend')
    +  lexize  
    +----------
    + {extend}
    +(1 row)
    +
    + +Once en_stem is done discarding stop words and stemming the rest, +we are left with: +

    +wall2 +extend3 +upward4 +well6 +1008 +feet9 +

    +Which is precisely the result of the example that began this section. +

+Query words are stemmed by the to_tsquery() function +using the same scheme to determine the dictionary for each token, +with the difference that the query parser recognizes as special +the boolean operators that separate query words. + + + + + diff --git a/contrib/tsearch2/docs/tsearch2-ref.html b/contrib/tsearch2/docs/tsearch2-ref.html new file mode 100644 index 0000000000..df0faa47d9 --- /dev/null +++ b/contrib/tsearch2/docs/tsearch2-ref.html @@ -0,0 +1,448 @@ + + + + +tsearch2 reference + + +

The tsearch2 Reference

+ +

+Brandon Craig Rhodes
30 June 2003 +

+This Reference documents the user types and functions +of the tsearch2 module for PostgreSQL. +An introduction to the module is provided +by the tsearch2 Guide, +a companion document to this one. +You can retrieve a beta copy of the tsearch2 module from the +GiST for PostgreSQL +page — look under the section entitled Development History +for the current version. + +

Vectors and Queries

+ +Vectors and queries both store lexemes, +but for different purposes. +A tsvector stores the lexemes +of the words that are parsed out of a document, +and can also remember the position of each word. +A tsquery specifies a boolean condition among lexemes. +

+Any of the following functions with a configuration argument +can use either an integer id or textual ts_name +to select a configuration; +if the option is omitted, then the current configuration is used. +For more information on the current configuration, +read the next section on Configurations. + +

Vector Operations

+ +
+
+ to_tsvector( [configuration,] + document TEXT) RETURNS tsvector +
+ Parses a document into tokens, + reduces the tokens to lexemes, + and returns a tsvector which lists the lexemes + together with their positions in the document. + For the best description of this process, + see the section on Parsing and Stemming + in the accompanying tsearch2 Guide. +
+ strip(vector tsvector) RETURNS tsvector +
+ Return a vector which lists the same lexemes + as the given vector, + but which lacks any information + about where in the document each lexeme appeared. + While the returned vector is thus useless for relevance ranking, + it will usually be much smaller. +
+ setweight(vector tsvector, letter) RETURNS tsvector +
+ This function returns a copy of the input vector + in which every location has been labelled + with either the letter + 'A', 'B', or 'C', + or the default label 'D' + (which is the default with which new vectors are created, + and as such is usually not displayed). + These labels are retained when vectors are concatenated, + allowing words from different parts of a document + to be weighted differently by ranking functions. +
+ vector1 || vector2 +
+ concat(vector1 tsvector, vector2 tsvector) + RETURNS tsvector +
+ Returns a vector which combines the lexemes and position information + in the two vectors given as arguments. + Position weight labels (described in the previous paragraph) + are retained intact during the concatenation. + This has at least two uses. + First, + if some sections of your document + need be parsed with different configurations than others, + you can parse them separately + and concatenate the resulting vectors into one. + Second, + you can weight words from some sections of you document + more heavily than those from others by: + parsing the sections into separate vectors; + assigning the vectors different position labels + with the setweight() function; + concatenating them into a single vector; + and then providing a weights argument + to the rank() function + that assigns different weights to positions with different labels. +
+ tsvector_size(vector tsvector) RETURNS INT4 +
+ Returns the number of lexemes stored in the vector. +
+ text::tsvector RETURNS tsvector +
+ Directly casting text to a tsvector + allows you to directly inject lexemes into a vector, + with whatever positions and position weights you choose to specify. + The text should be formatted + like the vector would be printed by the output of a SELECT. + See the Casting + section in the Guide for details. +
+ +

Query Operations

+ +
+
+ to_tsquery( [configuration,] + querytext text) RETURNS tsvector +
+ Parses a query, + which should be single words separated by the boolean operators + “&” and, + “|” or, + and “!” not, + which can be grouped using parenthesis. + Each word is reduced to a lexeme using the current + or specified configuration. + +
+ querytree(query tsquery) RETURNS text +
+ This might return a textual representation of the given query. +
+ text::tsquery RETURNS tsquery +
+ Directly casting text to a tsquery + allows you to directly inject lexemes into a query, + with whatever positions and position weight flags you choose to specify. + The text should be formatted + like the query would be printed by the output of a SELECT. + See the Casting + section in the Guide for details. +
+ +

Configurations

+ +A configuration specifies all of the equipment necessary +to transform a document into a tsvector: +the parser that breaks its text into tokens, +and the dictionaries which then transform each token into a lexeme. +Every call to to_tsvector() (described above) +uses a configuration to perform its processing. +Three configurations come with tsearch2: + +
    +
  • default — Indexes words and numbers, + using the en_stem English Snowball stemmer for Latin-alphabet words + and the simple dictionary for all others. +
  • default_russian — Indexes words and numbers, + using the en_stem English Snowball stemmer for Latin-alphabet words + and the ru_stem Russian Snowball dictionary for all others. +
  • simple — Processes both words and numbers + with the simple dictionary, + which neither discards any stop words nor alters them. +
+ +The tsearch2 modules initially chooses your current configuration +by looking for your current locale in the locale field +of the pg_ts_cfg table described below. +You can manipulate the current configuration yourself with these functions: + +
+
+ set_curcfg( id INT | ts_name TEXT + ) RETURNS VOID +
+ Set the current configuration used by to_tsvector + and to_tsquery. +
+ show_curcfg() RETURNS INT4 +
+ Returns the integer id of the current configuration. +
+ +

+Each configuration is defined by a record in the pg_ts_cfg table: + +

create table pg_ts_cfg (
+	id		int not  null primary key,
+	ts_name		text not null,
+	prs_name	text not null,
+	locale		text
+);
+ +The id and ts_name are unique values +which identify the configuration; +the prs_name specifies which parser the configuration uses. +Once this parser has split document text into tokens, +the type of each resulting token — +or, more specifically, the type's lex_alias +as specified in the parser's lexem_type() table — +is searched for together with the configuration's ts_name +in the pg_ts_cfgmap table: + +
create table pg_ts_cfgmap (
+	ts_name		text not null,
+	lex_alias	text not null,
+	dict_name	text[],
+	primary key (ts_name,lex_alias)
+);
+ +Those tokens whose types are not listed are discarded. +The remaining tokens are assigned integer positions, +starting with 1 for the first token in the document, +and turned into lexemes with the help of the dictionaries +whose names are given in the dict_name array for their type. +These dictionaries are tried in order, +stopping either with the first one to return a lexeme for the token, +or discarding the token if no dictionary returns a lexeme for it. + +

Parsers

+ +Each parser is defined by a record in the pg_ts_parser table: + +
create table pg_ts_parser (
+	prs_id		int not null primary key,
+	prs_name	text not null,
+	prs_start	oid not null,
+	prs_getlexem	oid not null,
+	prs_end		oid not null,
+	prs_headline	oid not null,
+	prs_lextype	oid not null,
+	prs_comment	text
+);
+ +The prs_id and prs_name uniquely identify the parser, +while prs_comment usually describes its name and version +for the reference of users. +The other items identify the low-level functions +which make the parser operate, +and are only of interest to someone writing a parser of their own. +

+The tsearch2 module comes with one parser named default +which is suitable for parsing most plain text and HTML documents. +

+Each parser argument below +must designate a parser with either an integer prs_id +or a textual prs_name; +the current parser is used when this argument is omitted. + +

+
+ CREATE FUNCTION set_curprs(parser) RETURNS VOID +
+ Selects a current parser + which will be used when any of the following functions + are called without a parser as an argument. +
+ CREATE FUNCTION lexem_type( + [ parser ] + ) RETURNS SETOF lexemtype +
+ Returns a table which defines and describes + each kind of token the parser may produce as output. + For each token type the table gives the lexid + which the parser will label each token of that type, + the alias which names the token type, + and a short description descr for the user to read. +
+ CREATE FUNCTION parse( + [ parser, ] document TEXT + ) RETURNS SETOF lexemtype +
+ Parses the given document and returns a series of records, + one for each token produced by parsing. + Each token includes a lexid giving its type + and a lexem which gives its content. +
+ +

Dictionaries

+ +Dictionaries take textual tokens as input, +usually those produced by a parser, +and return lexemes which are usually some reduced form of the token. +Among the dictionaries which come installed with tsearch2 are: + +
    +
  • simple simply folds uppercase letters to lowercase + before returning the word. +
  • en_stem runs an English Snowball stemmer on each word + that attempts to reduce the various forms of a verb or noun + to a single recognizable form. +
  • ru_stem runs a Russian Snowball stemmer on each word. +
+ +Each dictionary is defined by an entry in the pg_ts_dict table: + +
CREATE TABLE pg_ts_dict (
+	dict_id		int not null primary key,
+	dict_name	text not null,
+	dict_init	oid,
+	dict_initoption	text,
+	dict_lemmatize	oid not null,
+	dict_comment	text
+);
+ +The dict_id and dict_name +serve as unique identifiers for the dictionary. +The meaning of the dict_initoption varies among dictionaries, +but for the built-in Snowball dictionaries +it specifies a file from which stop words should be read. +The dict_comment is a human-readable description of the dictionary. +The other fields are internal function identifiers +useful only to developers trying to implement their own dictionaries. +

+The argument named dictionary +in each of the following functions +should be either an integer dict_id or a textual dict_name +identifying which dictionary should be used for the operation; +if omitted then the current dictionary is used. + +

+
+ CREATE FUNCTION set_curdict(dictionary) RETURNS VOID +
+ Selects a current dictionary for use by functions + that do not select a dictionary explicitly. +
+ CREATE FUNCTION lexize( + [ dictionary, ] word text) + RETURNS TEXT[] +
+ Reduces a single word to a lexeme. + Note that lexemes are arrays of zero or more strings, + since in some languages there might be several base words + from which an inflected form could arise. +
+ +

Ranking

+ +Ranking attempts to measure how relevant documents are to particular queries +by inspecting the number of times each search word appears in the document, +and whether different search terms occur near each other. +Note that this information is only available in unstripped vectors — +ranking functions will only return a useful result +for a tsvector which still has position information! +

+Both of these ranking functions +take an integer normalization option +that specifies whether a document's length should impact its rank. +This is often desirable, +since a hundred-word document with five instances of a search word +is probably more relevant than a thousand-word document with five instances. +The option can have the values: + +

    +
  • 0 (the default) ignores document length. +
  • 1 divides the rank by the logarithm of the length. +
  • 2 divides the rank by the length itself. +
+ +The two ranking functions currently available are: + +
+
+ CREATE FUNCTION rank(
+ [ weights float4[], ] + vector tsvector, query tsquery, + [ normalization int4 ]
+ ) RETURNS float4
+
+ This is the ranking function from the old version of OpenFTS, + and offers the ability to weight word instances more heavily + depending on how you have classified them. + The weights specify how heavily to weight each category of word: +
{D-weight, A-weight, B-weight, C-weight}
+ If no weights are provided, then these defaults are used: +
{0.1, 0.2, 0.4, 1.0}
+ Often weights are used to mark words from special areas of the document, + like the title or an initial abstract, + and make them more or less important than words in the document body. +
+ CREATE FUNCTION rank_cd(
+ [ K int4, ] + vector tsvector, query tsquery, + [ normalization int4 ]
+ ) RETURNS float4
+
+ This function computes the cover density ranking + for the given document vector and query, + as described in Clarke, Cormack, and Tudhope's + “Relevance Ranking for One to Three Term Queries” + in the 1999 Information Processing and Management. + The value K is one of the values from their formula, + and defaults to K=4. + The examples in their paper K=16; + we can roughly describe the term + as stating how far apart two search terms can fall + before the formula begins penalizing them for lack of proximity. +
+ +

Headlines

+ +
+
+ CREATE FUNCTION headline(
+ [ id int4, | ts_name text, ] + document text, query tsquery, + [ options text ]
+ ) RETURNS text
+
+ Every form of the the headline() function + accepts a document along with a query, + and returns one or more ellipse-separated excerpts from the document + in which terms from the query are highlighted. + The configuration with which to parse the document + can be specified by either its id or ts_name; + if none is specified that the current configuration is used instead. +

+ An options string if provided should be a comma-separated list + of one or more ‘option=value’ pairs. + The available options are: +

    +
  • StartSel, StopSel — + the strings with which query words appearing in the document + should be delimited to distinguish them from other excerpted words. +
  • MaxWords, MinWords — + limits on the shortest and longest headlines you will accept. +
  • ShortWord — + this prevents your headline from beginning or ending + with a word which has this many characters or less. + The default value of 3 should eliminate most English + conjunctions and articles. +
+ Any unspecified options receive these defaults: +
+StartSel=<b>, StopSel=</b>, MaxWords=35, MinWords=15, ShortWord=3
+ 
+
+ + + diff --git a/contrib/tsearch2/expected/tsearch2.out b/contrib/tsearch2/expected/tsearch2.out new file mode 100644 index 0000000000..a842c5b66b --- /dev/null +++ b/contrib/tsearch2/expected/tsearch2.out @@ -0,0 +1,2055 @@ +-- +-- first, define the datatype. Turn off echoing so that expected file +-- does not depend on contents of seg.sql. +-- +\set ECHO none +psql:tsearch2.sql:13: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_dict_pkey' for table 'pg_ts_dict' +psql:tsearch2.sql:145: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_parser_pkey' for table 'pg_ts_parser' +psql:tsearch2.sql:244: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfg_pkey' for table 'pg_ts_cfg' +psql:tsearch2.sql:251: NOTICE: CREATE TABLE / PRIMARY KEY will create implicit index 'pg_ts_cfgmap_pkey' for table 'pg_ts_cfgmap' +psql:tsearch2.sql:339: NOTICE: ProcedureCreate: type tsvector is not yet defined +psql:tsearch2.sql:344: NOTICE: Argument type "tsvector" is only a shell +psql:tsearch2.sql:398: NOTICE: ProcedureCreate: type tsquery is not yet defined +psql:tsearch2.sql:403: NOTICE: Argument type "tsquery" is only a shell +psql:tsearch2.sql:545: NOTICE: ProcedureCreate: type gtsvector is not yet defined +psql:tsearch2.sql:550: NOTICE: Argument type "gtsvector" is only a shell +--tsvector +SELECT '1'::tsvector; + tsvector +---------- + '1' +(1 row) + +SELECT '1 '::tsvector; + tsvector +---------- + '1' +(1 row) + +SELECT ' 1'::tsvector; + tsvector +---------- + '1' +(1 row) + +SELECT ' 1 '::tsvector; + tsvector +---------- + '1' +(1 row) + +SELECT '1 2'::tsvector; + tsvector +---------- + '1' '2' +(1 row) + +SELECT '\'1 2\''::tsvector; + tsvector +---------- + '1 2' +(1 row) + +SELECT '\'1 \\\'2\''::tsvector; + tsvector +---------- + '1 \'2' +(1 row) + +SELECT '\'1 \\\'2\'3'::tsvector; + tsvector +------------- + '3' '1 \'2' +(1 row) + +SELECT '\'1 \\\'2\' 3'::tsvector; + tsvector +------------- + '3' '1 \'2' +(1 row) + +SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector; + tsvector +------------------ + '4' ' 3' '1 \'2' +(1 row) + +select '\'w\':4A,3B,2C,1D,5 a:8'; + ?column? +----------------------- + 'w':4A,3B,2C,1D,5 a:8 +(1 row) + +select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; + ?column? +---------------------------- + 'a':3A,4B 'b':2A 'ba':1237 +(1 row) + +select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); + setweight +---------------------------------------------------------- + 'a':1C,3C 'w':5C,6C,12C,13C 'asd':1C 'zxc':81C,222C,567C +(1 row) + +select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); + strip +--------------- + 'a' 'w' 'asd' +(1 row) + +--tsquery +SELECT '1'::tsquery; + tsquery +--------- + '1' +(1 row) + +SELECT '1 '::tsquery; + tsquery +--------- + '1' +(1 row) + +SELECT ' 1'::tsquery; + tsquery +--------- + '1' +(1 row) + +SELECT ' 1 '::tsquery; + tsquery +--------- + '1' +(1 row) + +SELECT '\'1 2\''::tsquery; + tsquery +--------- + '1 2' +(1 row) + +SELECT '\'1 \\\'2\''::tsquery; + tsquery +--------- + '1 \'2' +(1 row) + +SELECT '!1'::tsquery; + tsquery +--------- + !'1' +(1 row) + +SELECT '1|2'::tsquery; + tsquery +----------- + '1' | '2' +(1 row) + +SELECT '1|!2'::tsquery; + tsquery +------------ + '1' | !'2' +(1 row) + +SELECT '!1|2'::tsquery; + tsquery +------------ + !'1' | '2' +(1 row) + +SELECT '!1|!2'::tsquery; + tsquery +------------- + !'1' | !'2' +(1 row) + +SELECT '!(!1|!2)'::tsquery; + tsquery +------------------ + !( !'1' | !'2' ) +(1 row) + +SELECT '!(!1|2)'::tsquery; + tsquery +----------------- + !( !'1' | '2' ) +(1 row) + +SELECT '!(1|!2)'::tsquery; + tsquery +----------------- + !( '1' | !'2' ) +(1 row) + +SELECT '!(1|2)'::tsquery; + tsquery +---------------- + !( '1' | '2' ) +(1 row) + +SELECT '1&2'::tsquery; + tsquery +----------- + '1' & '2' +(1 row) + +SELECT '!1&2'::tsquery; + tsquery +------------ + !'1' & '2' +(1 row) + +SELECT '1&!2'::tsquery; + tsquery +------------ + '1' & !'2' +(1 row) + +SELECT '!1&!2'::tsquery; + tsquery +------------- + !'1' & !'2' +(1 row) + +SELECT '(1&2)'::tsquery; + tsquery +----------- + '1' & '2' +(1 row) + +SELECT '1&(2)'::tsquery; + tsquery +----------- + '1' & '2' +(1 row) + +SELECT '!(1)&2'::tsquery; + tsquery +------------ + !'1' & '2' +(1 row) + +SELECT '!(1&2)'::tsquery; + tsquery +---------------- + !( '1' & '2' ) +(1 row) + +SELECT '1|2&3'::tsquery; + tsquery +----------------- + '1' | '2' & '3' +(1 row) + +SELECT '1|(2&3)'::tsquery; + tsquery +----------------- + '1' | '2' & '3' +(1 row) + +SELECT '(1|2)&3'::tsquery; + tsquery +--------------------- + ( '1' | '2' ) & '3' +(1 row) + +SELECT '1|2&!3'::tsquery; + tsquery +------------------ + '1' | '2' & !'3' +(1 row) + +SELECT '1|!2&3'::tsquery; + tsquery +------------------ + '1' | !'2' & '3' +(1 row) + +SELECT '!1|2&3'::tsquery; + tsquery +------------------ + !'1' | '2' & '3' +(1 row) + +SELECT '!1|(2&3)'::tsquery; + tsquery +------------------ + !'1' | '2' & '3' +(1 row) + +SELECT '!(1|2)&3'::tsquery; + tsquery +---------------------- + !( '1' | '2' ) & '3' +(1 row) + +SELECT '(!1|2)&3'::tsquery; + tsquery +---------------------- + ( !'1' | '2' ) & '3' +(1 row) + +SELECT '1|(2|(4|(5|6)))'::tsquery; + tsquery +----------------------------------------- + '1' | ( '2' | ( '4' | ( '5' | '6' ) ) ) +(1 row) + +SELECT '1|2|4|5|6'::tsquery; + tsquery +----------------------------------------- + ( ( ( '1' | '2' ) | '4' ) | '5' ) | '6' +(1 row) + +SELECT '1&(2&(4&(5&6)))'::tsquery; + tsquery +----------------------------- + '1' & '2' & '4' & '5' & '6' +(1 row) + +SELECT '1&2&4&5&6'::tsquery; + tsquery +----------------------------- + '1' & '2' & '4' & '5' & '6' +(1 row) + +SELECT '1&(2&(4&(5|6)))'::tsquery; + tsquery +--------------------------------- + '1' & '2' & '4' & ( '5' | '6' ) +(1 row) + +SELECT '1&(2&(4&(5|!6)))'::tsquery; + tsquery +---------------------------------- + '1' & '2' & '4' & ( '5' | !'6' ) +(1 row) + +SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery; + tsquery +------------------------------------------ + '1' & '2' & ' 4' & ( '|5' | '6 \' !|&' ) +(1 row) + +SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a'; + ?column? +------------------------------------------ + 'the wether':dc & ' sKies ':BC & a:d b:a +(1 row) + +select lexize('simple', 'ASD56 hsdkf'); + lexize +----------------- + {"asd56 hsdkf"} +(1 row) + +select lexize('en_stem', 'SKIES Problems identity'); + lexize +-------------------------- + {"skies problems ident"} +(1 row) + +select * from token_type('default'); + tokid | alias | descr +-------+--------------+----------------------------------- + 1 | lword | Latin word + 2 | nlword | Non-latin word + 3 | word | Word + 4 | email | Email + 5 | url | URL + 6 | host | Host + 7 | sfloat | Scientific notation + 8 | version | VERSION + 9 | part_hword | Part of hyphenated word + 10 | nlpart_hword | Non-latin part of hyphenated word + 11 | lpart_hword | Latin part of hyphenated word + 12 | blank | Space symbols + 13 | tag | HTML Tag + 14 | http | HTTP head + 15 | hword | Hyphenated word + 16 | lhword | Latin hyphenated word + 17 | nlhword | Non-latin hyphenated word + 18 | uri | URI + 19 | file | File or path name + 20 | float | Decimal notation + 21 | int | Signed integer + 22 | uint | Unsigned integer + 23 | entity | HTML Entity +(23 rows) + +select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty'); + tokid | token +-------+-------------------------------------- + 22 | 345 + 12 | + 4 | qwe@efd.r + 12 | + 12 | ' + 12 | + 14 | http:// + 6 | www.com + 12 | / + 12 | + 14 | http:// + 5 | aew.werc.ewr/?ad=qwe&dw + 6 | aew.werc.ewr + 18 | /?ad=qwe&dw + 12 | + 5 | 1aew.werc.ewr/?ad=qwe&dw + 6 | 1aew.werc.ewr + 18 | /?ad=qwe&dw + 12 | + 6 | 2aew.werc.ewr + 12 | + 14 | http:// + 5 | 3aew.werc.ewr/?ad=qwe&dw + 6 | 3aew.werc.ewr + 18 | /?ad=qwe&dw + 12 | + 14 | http:// + 6 | 4aew.werc.ewr + 12 | + 14 | http:// + 5 | 5aew.werc.ewr:8100/? + 6 | 5aew.werc.ewr + 18 | :8100/? + 12 | + 1 | ad + 12 | = + 1 | qwe + 12 | & + 1 | dw + 12 | + 5 | 6aew.werc.ewr:8100/?ad=qwe&dw + 6 | 6aew.werc.ewr + 18 | :8100/?ad=qwe&dw + 12 | + 5 | 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 + 6 | 7aew.werc.ewr + 18 | :8100/?ad=qwe&dw=%20%32 + 12 | + 7 | +4.0e-10 + 12 | + 1 | qwe + 12 | + 1 | qwe + 12 | + 1 | qwqwe + 12 | + 20 | 234.435 + 12 | + 22 | 455 + 12 | + 20 | 5.005 + 12 | + 4 | teodor@stack.net + 12 | + 16 | qwe-wer + 11 | qwe + 12 | - + 11 | wer + 12 | + 1 | asdf + 12 | + 13 | + 1 | qwer + 12 | + 1 | jf + 12 | + 1 | sdjk + 13 | + 12 | + 3 | ewr1 + 12 | > + 12 | + 3 | ewri2 + 12 | + 13 | + 12 | + + 19 | /usr/local/fff + 12 | + 19 | /awdf/dwqe/4325 + 12 | + 19 | rewt/ewr + 12 | + 1 | wefjn + 12 | + 19 | /wqe-324/ewr + 12 | + 6 | gist.h + 12 | + 6 | gist.h.c + 12 | + 6 | gist.c + 12 | . + 12 | + 1 | readline + 12 | + 20 | 4.2 + 12 | + 20 | 4.2 + 12 | . + 12 | + 20 | 4.2 + 12 | , + 12 | + 15 | readline-4 + 11 | readline + 12 | - + 20 | 4.2 + 12 | + 15 | readline-4 + 11 | readline + 12 | - + 20 | 4.2 + 12 | . + 12 | + 22 | 234 + 12 | + + 13 | + 12 | + 1 | wow + 12 | + 12 | < + 12 | + 1 | jqw + 12 | + 12 | < + 12 | > + 12 | + 1 | qwerty +(138 rows) + +SELECT to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty'); + to_tsvector +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + 'ad':18 'dw':20 'jf':40 '234':62 '345':1 '4.2':53,54,55,58,61 '455':32 'jqw':64 'qwe':19,28,29,36 'wer':37 'wow':63 'asdf':38 'ewr1':42 'qwer':39 'sdjk':41 '5.005':33 'ewri2':43 'qwqwe':30 'wefjn':47 'gist.c':51 'gist.h':49 'qwerti':65 '234.435':31 ':8100/?':17 'qwe-wer':35 'readlin':52,57,60 'www.com':3 '+4.0e-10':27 'gist.h.c':50 'rewt/ewr':46 'qwe@efd.r':2 'readline-4':56,59 '/?ad=qwe&dw':6,9,13 '/wqe-324/ewr':48 'aew.werc.ewr':5 '1aew.werc.ewr':8 '2aew.werc.ewr':10 '3aew.werc.ewr':12 '4aew.werc.ewr':14 '5aew.werc.ewr':16 '6aew.werc.ewr':22 '7aew.werc.ewr':25 '/usr/local/fff':44 '/awdf/dwqe/4325':45 ':8100/?ad=qwe&dw':23 'teodor@stack.net':34 '5aew.werc.ewr:8100/?':15 ':8100/?ad=qwe&dw=%20%32':26 'aew.werc.ewr/?ad=qwe&dw':4 '1aew.werc.ewr/?ad=qwe&dw':7 '3aew.werc.ewr/?ad=qwe&dw':11 '6aew.werc.ewr:8100/?ad=qwe&dw':21 '7aew.werc.ewr:8100/?ad=qwe&dw=%20%32':24 +(1 row) + +SELECT length(to_tsvector('default', '345 qw')); + length +-------- + 2 +(1 row) + +SELECT length(to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty')); + length +-------- + 53 +(1 row) + +select to_tsquery('default', 'qwe & sKies '); + to_tsquery +--------------- + 'qwe' & 'sky' +(1 row) + +select to_tsquery('simple', 'qwe & sKies '); + to_tsquery +----------------- + 'qwe' & 'skies' +(1 row) + +select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC '); + to_tsquery +------------------------ + 'wether':CD & 'sky':BC +(1 row) + +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca'; + ?column? +---------- + t +(1 row) + +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B'; + ?column? +---------- + t +(1 row) + +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A'; + ?column? +---------- + t +(1 row) + +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C'; + ?column? +---------- + f +(1 row) + +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB'; + ?column? +---------- + t +(1 row) + +CREATE TABLE test_tsvector( t text, a tsvector ); +\copy test_tsvector from 'data/test_tsearch.data' +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; + count +------- + 158 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; + count +------- + 17 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; + count +------- + 6 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; + count +------- + 98 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; + count +------- + 23 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; + count +------- + 39 +(1 row) + +create index wowidx on test_tsvector using gist (a); +set enable_seqscan=off; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; + count +------- + 158 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; + count +------- + 17 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; + count +------- + 6 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; + count +------- + 98 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; + count +------- + 23 +(1 row) + +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; + count +------- + 39 +(1 row) + +select set_curcfg('default'); + set_curcfg +------------ + +(1 row) + +CREATE TRIGGER tsvectorupdate +BEFORE UPDATE OR INSERT ON test_tsvector +FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t); +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + count +------- + 0 +(1 row) + +INSERT INTO test_tsvector (t) VALUES ('345 qwerty'); +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + count +------- + 1 +(1 row) + +UPDATE test_tsvector SET t = null WHERE t = '345 qwerty'; +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + count +------- + 0 +(1 row) + +drop trigger tsvectorupdate on test_tsvector; +create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql; +create trigger tsvectorupdate before update or insert on test_tsvector +for each row execute procedure tsearch2(a, wow, t); +insert into test_tsvector (t) values ('345 qwerty'); +select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + count +------- + 1 +(1 row) + +select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright'); + count +------- + 1 +(1 row) + +select rank(' a:1 s:2C d g'::tsvector, 'a | s'); + rank +------ + 0.28 +(1 row) + +select rank(' a:1 s:2B d g'::tsvector, 'a | s'); + rank +------ + 0.46 +(1 row) + +select rank(' a:1 s:2 d g'::tsvector, 'a | s'); + rank +------ + 0.19 +(1 row) + +select rank(' a:1 s:2C d g'::tsvector, 'a & s'); + rank +---------- + 0.140153 +(1 row) + +select rank(' a:1 s:2B d g'::tsvector, 'a & s'); + rank +---------- + 0.198206 +(1 row) + +select rank(' a:1 s:2 d g'::tsvector, 'a & s'); + rank +----------- + 0.0991032 +(1 row) + +insert into test_tsvector (t) values ('foo bar foo the over foo qq bar'); +select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word; + word | ndoc | nentry +-----------+------+-------- + qq | 109 | 109 + qt | 102 | 102 + qe | 100 | 100 + qh | 98 | 98 + qw | 98 | 98 + qa | 97 | 97 + ql | 94 | 94 + qs | 94 | 94 + qi | 92 | 92 + qr | 92 | 92 + qj | 91 | 91 + qd | 87 | 87 + qz | 87 | 87 + qc | 86 | 86 + qn | 86 | 86 + qv | 85 | 85 + qo | 84 | 84 + qy | 84 | 84 + wp | 84 | 84 + qf | 81 | 81 + qk | 80 | 80 + wt | 80 | 80 + qu | 79 | 79 + qg | 78 | 78 + wb | 78 | 78 + qx | 77 | 77 + wr | 77 | 77 + ws | 73 | 73 + wy | 73 | 73 + wa | 72 | 72 + wf | 70 | 70 + wg | 70 | 70 + wi | 70 | 70 + wu | 70 | 70 + wc | 69 | 69 + wj | 69 | 69 + qp | 68 | 68 + wh | 68 | 68 + wv | 68 | 68 + qb | 66 | 66 + eu | 65 | 65 + we | 65 | 65 + wl | 65 | 65 + wq | 65 | 65 + wk | 64 | 64 + ee | 63 | 63 + eo | 63 | 63 + qm | 63 | 63 + wn | 63 | 63 + ef | 62 | 62 + eh | 62 | 62 + ex | 62 | 62 + re | 62 | 62 + rl | 62 | 62 + rr | 62 | 62 + eb | 61 | 61 + ek | 61 | 61 + ww | 61 | 61 + ea | 60 | 60 + ei | 60 | 60 + em | 60 | 60 + eq | 60 | 60 + ew | 60 | 60 + ro | 60 | 60 + rw | 60 | 60 + tl | 60 | 60 + eg | 59 | 59 + en | 59 | 59 + ez | 59 | 59 + rj | 59 | 59 + ry | 59 | 59 + tw | 59 | 59 + tx | 59 | 59 + ej | 58 | 58 + es | 58 | 58 + ra | 58 | 58 + rd | 58 | 58 + rg | 58 | 58 + rx | 58 | 58 + tb | 58 | 58 + wd | 58 | 58 + ed | 57 | 57 + tc | 57 | 57 + wx | 57 | 57 + er | 56 | 56 + wm | 56 | 56 + wo | 56 | 56 + yw | 56 | 56 + ep | 55 | 55 + rk | 55 | 55 + rp | 55 | 55 + rz | 55 | 55 + ta | 55 | 55 + rq | 54 | 54 + yn | 54 | 54 + ec | 53 | 53 + el | 53 | 53 + ru | 53 | 53 + rv | 53 | 53 + tz | 53 | 53 + un | 53 | 53 + wz | 53 | 53 + ys | 53 | 53 + oe | 52 | 52 + tn | 52 | 52 + tq | 52 | 52 + ty | 52 | 52 + uq | 52 | 52 + yg | 52 | 52 + ym | 52 | 52 + oi | 51 | 51 + to | 51 | 51 + yi | 51 | 51 + pn | 50 | 50 + rb | 50 | 50 + ri | 50 | 50 + rn | 50 | 50 + ti | 50 | 50 + tv | 50 | 50 + um | 50 | 50 + ut | 50 | 50 + ya | 50 | 50 + et | 49 | 49 + ix | 49 | 49 + ox | 49 | 49 + q3 | 49 | 49 + yf | 49 | 49 + yl | 49 | 49 + yo | 49 | 49 + yr | 49 | 49 + ev | 48 | 48 + ey | 48 | 48 + ot | 48 | 48 + rc | 48 | 48 + rm | 48 | 48 + th | 48 | 48 + uo | 48 | 48 + ia | 47 | 47 + q1 | 47 | 47 + rh | 47 | 47 + yq | 47 | 47 + yz | 47 | 47 + av | 46 | 46 + im | 46 | 46 + os | 46 | 46 + tk | 46 | 46 + yy | 46 | 46 + ir | 45 | 45 + iv | 45 | 45 + iw | 45 | 45 + oj | 45 | 45 + pl | 45 | 45 + pv | 45 | 45 + te | 45 | 45 + tu | 45 | 45 + uv | 45 | 45 + ux | 45 | 45 + yd | 45 | 45 + yx | 45 | 45 + ij | 44 | 44 + pa | 44 | 44 + se | 44 | 44 + tg | 44 | 44 + ue | 44 | 44 + yb | 44 | 44 + yt | 44 | 44 + if | 43 | 43 + ik | 43 | 43 + in | 43 | 43 + ph | 43 | 43 + pj | 43 | 43 + q5 | 43 | 43 + rt | 43 | 43 + ub | 43 | 43 + ud | 43 | 43 + uh | 43 | 43 + uj | 43 | 43 + w7 | 43 | 43 + ye | 43 | 43 + yv | 43 | 43 + db | 42 | 42 + do | 42 | 42 + id | 42 | 42 + ie | 42 | 42 + ii | 42 | 42 + of | 42 | 42 + pr | 42 | 42 + q4 | 42 | 42 + rf | 42 | 42 + td | 42 | 42 + uk | 42 | 42 + up | 42 | 42 + yh | 42 | 42 + yk | 42 | 42 + io | 41 | 41 + it | 41 | 41 + pb | 41 | 41 + q0 | 41 | 41 + q7 | 41 | 41 + rs | 41 | 41 + tj | 41 | 41 + ur | 41 | 41 + ig | 40 | 40 + iu | 40 | 40 + iy | 40 | 40 + od | 40 | 40 + q6 | 40 | 40 + tt | 40 | 40 + ug | 40 | 40 + ul | 40 | 40 + us | 40 | 40 + uu | 40 | 40 + uz | 40 | 40 + ah | 39 | 39 + ar | 39 | 39 + as | 39 | 39 + dl | 39 | 39 + dt | 39 | 39 + hk | 39 | 39 + iq | 39 | 39 + is | 39 | 39 + oc | 39 | 39 + ov | 39 | 39 + oy | 39 | 39 + uf | 39 | 39 + ui | 39 | 39 + aa | 38 | 38 + ad | 38 | 38 + fh | 38 | 38 + gm | 38 | 38 + ic | 38 | 38 + jd | 38 | 38 + om | 38 | 38 + or | 38 | 38 + oz | 38 | 38 + pm | 38 | 38 + q8 | 38 | 38 + sf | 38 | 38 + sm | 38 | 38 + sv | 38 | 38 + uc | 38 | 38 + ak | 37 | 37 + aq | 37 | 37 + di | 37 | 37 + e4 | 37 | 37 + fi | 37 | 37 + fx | 37 | 37 + ha | 37 | 37 + hp | 37 | 37 + ih | 37 | 37 + og | 37 | 37 + po | 37 | 37 + pw | 37 | 37 + sn | 37 | 37 + su | 37 | 37 + sw | 37 | 37 + w6 | 37 | 37 + yj | 37 | 37 + yu | 37 | 37 + ag | 36 | 36 + am | 36 | 36 + at | 36 | 36 + e1 | 36 | 36 + ff | 36 | 36 + gx | 36 | 36 + he | 36 | 36 + hj | 36 | 36 + ib | 36 | 36 + iz | 36 | 36 + lm | 36 | 36 + ok | 36 | 36 + pk | 36 | 36 + pp | 36 | 36 + pu | 36 | 36 + sp | 36 | 36 + tf | 36 | 36 + tm | 36 | 36 + ay | 35 | 35 + dy | 35 | 35 + fu | 35 | 35 + ku | 35 | 35 + lh | 35 | 35 + lq | 35 | 35 + o6 | 35 | 35 + ob | 35 | 35 + on | 35 | 35 + op | 35 | 35 + pd | 35 | 35 + ps | 35 | 35 + si | 35 | 35 + sl | 35 | 35 + sx | 35 | 35 + tp | 35 | 35 + tr | 35 | 35 + w3 | 35 | 35 + y1 | 35 | 35 + al | 34 | 34 + ap | 34 | 34 + az | 34 | 34 + dc | 34 | 34 + dd | 34 | 34 + dz | 34 | 34 + e0 | 34 | 34 + fj | 34 | 34 + fp | 34 | 34 + gd | 34 | 34 + gg | 34 | 34 + gk | 34 | 34 + go | 34 | 34 + ho | 34 | 34 + jc | 34 | 34 + oa | 34 | 34 + oh | 34 | 34 + oo | 34 | 34 + pe | 34 | 34 + px | 34 | 34 + sd | 34 | 34 + sq | 34 | 34 + sy | 34 | 34 + ab | 33 | 33 + ae | 33 | 33 + af | 33 | 33 + aw | 33 | 33 + e5 | 33 | 33 + fk | 33 | 33 + gu | 33 | 33 + gy | 33 | 33 + hb | 33 | 33 + hm | 33 | 33 + hy | 33 | 33 + jl | 33 | 33 + jr | 33 | 33 + ls | 33 | 33 + oq | 33 | 33 + pt | 33 | 33 + sa | 33 | 33 + sh | 33 | 33 + sj | 33 | 33 + so | 33 | 33 + sz | 33 | 33 + t7 | 33 | 33 + uw | 33 | 33 + w8 | 33 | 33 + y0 | 33 | 33 + yp | 33 | 33 + dh | 32 | 32 + dp | 32 | 32 + dq | 32 | 32 + e7 | 32 | 32 + fn | 32 | 32 + fo | 32 | 32 + fr | 32 | 32 + ga | 32 | 32 + gq | 32 | 32 + hh | 32 | 32 + il | 32 | 32 + ip | 32 | 32 + jv | 32 | 32 + lc | 32 | 32 + ol | 32 | 32 + pc | 32 | 32 + q9 | 32 | 32 + ds | 31 | 31 + e9 | 31 | 31 + fd | 31 | 31 + fe | 31 | 31 + ft | 31 | 31 + gs | 31 | 31 + hl | 31 | 31 + hs | 31 | 31 + jb | 31 | 31 + kc | 31 | 31 + kw | 31 | 31 + mj | 31 | 31 + q2 | 31 | 31 + r3 | 31 | 31 + sb | 31 | 31 + sk | 31 | 31 + ts | 31 | 31 + ua | 31 | 31 + yc | 31 | 31 + zw | 31 | 31 + ao | 30 | 30 + du | 30 | 30 + fw | 30 | 30 + gj | 30 | 30 + hu | 30 | 30 + kh | 30 | 30 + kl | 30 | 30 + kv | 30 | 30 + ld | 30 | 30 + lf | 30 | 30 + pq | 30 | 30 + py | 30 | 30 + sc | 30 | 30 + sr | 30 | 30 + uy | 30 | 30 + vg | 30 | 30 + w2 | 30 | 30 + xg | 30 | 30 + xo | 30 | 30 + au | 29 | 29 + cx | 29 | 29 + fv | 29 | 29 + gh | 29 | 29 + gl | 29 | 29 + gt | 29 | 29 + hw | 29 | 29 + ji | 29 | 29 + km | 29 | 29 + la | 29 | 29 + ou | 29 | 29 + r0 | 29 | 29 + w0 | 29 | 29 + y9 | 29 | 29 + zm | 29 | 29 + zs | 29 | 29 + zy | 29 | 29 + ax | 28 | 28 + cd | 28 | 28 + dj | 28 | 28 + dn | 28 | 28 + dr | 28 | 28 + ht | 28 | 28 + jf | 28 | 28 + lo | 28 | 28 + lr | 28 | 28 + na | 28 | 28 + ng | 28 | 28 + r8 | 28 | 28 + ss | 28 | 28 + xt | 28 | 28 + y6 | 28 | 28 + aj | 27 | 27 + ca | 27 | 27 + cg | 27 | 27 + df | 27 | 27 + dg | 27 | 27 + dv | 27 | 27 + gc | 27 | 27 + gn | 27 | 27 + gr | 27 | 27 + hd | 27 | 27 + i8 | 27 | 27 + jn | 27 | 27 + jt | 27 | 27 + lp | 27 | 27 + o9 | 27 | 27 + ow | 27 | 27 + r9 | 27 | 27 + t8 | 27 | 27 + u5 | 27 | 27 + w4 | 27 | 27 + xm | 27 | 27 + zz | 27 | 27 + a2 | 26 | 26 + ac | 26 | 26 + ai | 26 | 26 + cm | 26 | 26 + cu | 26 | 26 + cw | 26 | 26 + dk | 26 | 26 + e2 | 26 | 26 + fc | 26 | 26 + fg | 26 | 26 + fl | 26 | 26 + fs | 26 | 26 + ge | 26 | 26 + gv | 26 | 26 + hc | 26 | 26 + hi | 26 | 26 + hx | 26 | 26 + jj | 26 | 26 + jm | 26 | 26 + kg | 26 | 26 + kk | 26 | 26 + kn | 26 | 26 + ko | 26 | 26 + kt | 26 | 26 + ln | 26 | 26 + mx | 26 | 26 + pg | 26 | 26 + r4 | 26 | 26 + t6 | 26 | 26 + u1 | 26 | 26 + u4 | 26 | 26 + vi | 26 | 26 + vr | 26 | 26 + w1 | 26 | 26 + w9 | 26 | 26 + xk | 26 | 26 + xs | 26 | 26 + zf | 26 | 26 + bb | 25 | 25 + dm | 25 | 25 + dw | 25 | 25 + e8 | 25 | 25 + fb | 25 | 25 + gw | 25 | 25 + h8 | 25 | 25 + hf | 25 | 25 + hg | 25 | 25 + hn | 25 | 25 + hv | 25 | 25 + i0 | 25 | 25 + i3 | 25 | 25 + jg | 25 | 25 + jo | 25 | 25 + jx | 25 | 25 + kq | 25 | 25 + lw | 25 | 25 + lx | 25 | 25 + o3 | 25 | 25 + p7 | 25 | 25 + pf | 25 | 25 + pi | 25 | 25 + pz | 25 | 25 + r2 | 25 | 25 + r5 | 25 | 25 + t9 | 25 | 25 + u7 | 25 | 25 + ve | 25 | 25 + vu | 25 | 25 + y5 | 25 | 25 + y8 | 25 | 25 + zt | 25 | 25 + an | 24 | 24 + bj | 24 | 24 + dx | 24 | 24 + fm | 24 | 24 + fz | 24 | 24 + gb | 24 | 24 + gi | 24 | 24 + gp | 24 | 24 + hr | 24 | 24 + hz | 24 | 24 + i5 | 24 | 24 + jq | 24 | 24 + kb | 24 | 24 + ke | 24 | 24 + kf | 24 | 24 + kp | 24 | 24 + lv | 24 | 24 + lz | 24 | 24 + o8 | 24 | 24 + r1 | 24 | 24 + s7 | 24 | 24 + sg | 24 | 24 + u3 | 24 | 24 + vj | 24 | 24 + vt | 24 | 24 + w5 | 24 | 24 + zj | 24 | 24 + be | 23 | 23 + bi | 23 | 23 + bn | 23 | 23 + cn | 23 | 23 + cy | 23 | 23 + da | 23 | 23 + e6 | 23 | 23 + fa | 23 | 23 + js | 23 | 23 + ki | 23 | 23 + kz | 23 | 23 + li | 23 | 23 + mt | 23 | 23 + mz | 23 | 23 + nu | 23 | 23 + o2 | 23 | 23 + p5 | 23 | 23 + p8 | 23 | 23 + r7 | 23 | 23 + t0 | 23 | 23 + t1 | 23 | 23 + t3 | 23 | 23 + vm | 23 | 23 + xh | 23 | 23 + xx | 23 | 23 + zp | 23 | 23 + zr | 23 | 23 + a3 | 22 | 22 + bg | 22 | 22 + de | 22 | 22 + e3 | 22 | 22 + fq | 22 | 22 + i2 | 22 | 22 + i7 | 22 | 22 + ja | 22 | 22 + jk | 22 | 22 + jy | 22 | 22 + kr | 22 | 22 + kx | 22 | 22 + ly | 22 | 22 + nb | 22 | 22 + nh | 22 | 22 + ns | 22 | 22 + s3 | 22 | 22 + u2 | 22 | 22 + vn | 22 | 22 + xe | 22 | 22 + y4 | 22 | 22 + zh | 22 | 22 + zo | 22 | 22 + zq | 22 | 22 + a1 | 21 | 21 + bl | 21 | 21 + bo | 21 | 21 + cb | 21 | 21 + ch | 21 | 21 + co | 21 | 21 + cq | 21 | 21 + cv | 21 | 21 + d7 | 21 | 21 + g8 | 21 | 21 + je | 21 | 21 + jp | 21 | 21 + jz | 21 | 21 + lg | 21 | 21 + me | 21 | 21 + nc | 21 | 21 + p4 | 21 | 21 + st | 21 | 21 + vb | 21 | 21 + vw | 21 | 21 + vz | 21 | 21 + xj | 21 | 21 + xq | 21 | 21 + xu | 21 | 21 + xy | 21 | 21 + zb | 21 | 21 + bv | 20 | 20 + bz | 20 | 20 + cj | 20 | 20 + cp | 20 | 20 + cs | 20 | 20 + d8 | 20 | 20 + ju | 20 | 20 + k0 | 20 | 20 + ks | 20 | 20 + ky | 20 | 20 + l1 | 20 | 20 + lb | 20 | 20 + lj | 20 | 20 + lu | 20 | 20 + nm | 20 | 20 + nw | 20 | 20 + nz | 20 | 20 + o7 | 20 | 20 + p6 | 20 | 20 + vh | 20 | 20 + vp | 20 | 20 + vs | 20 | 20 + xb | 20 | 20 + xr | 20 | 20 + z3 | 20 | 20 + zv | 20 | 20 + bq | 19 | 19 + br | 19 | 19 + by | 19 | 19 + cl | 19 | 19 + d2 | 19 | 19 + f1 | 19 | 19 + f4 | 19 | 19 + gf | 19 | 19 + hq | 19 | 19 + k9 | 19 | 19 + ka | 19 | 19 + kd | 19 | 19 + kj | 19 | 19 + md | 19 | 19 + mi | 19 | 19 + ml | 19 | 19 + my | 19 | 19 + nj | 19 | 19 + ny | 19 | 19 + o1 | 19 | 19 + s4 | 19 | 19 + s8 | 19 | 19 + t5 | 19 | 19 + u0 | 19 | 19 + xl | 19 | 19 + zg | 19 | 19 + zi | 19 | 19 + a5 | 18 | 18 + b9 | 18 | 18 + bh | 18 | 18 + bx | 18 | 18 + d3 | 18 | 18 + fy | 18 | 18 + g2 | 18 | 18 + i4 | 18 | 18 + i6 | 18 | 18 + i9 | 18 | 18 + jw | 18 | 18 + lk | 18 | 18 + mb | 18 | 18 + mv | 18 | 18 + nd | 18 | 18 + nr | 18 | 18 + nt | 18 | 18 + t2 | 18 | 18 + xf | 18 | 18 + xv | 18 | 18 + zc | 18 | 18 + zd | 18 | 18 + a7 | 17 | 17 + bc | 17 | 17 + bd | 17 | 17 + ce | 17 | 17 + cf | 17 | 17 + cr | 17 | 17 + g9 | 17 | 17 + j0 | 17 | 17 + j5 | 17 | 17 + mp | 17 | 17 + mr | 17 | 17 + mw | 17 | 17 + nk | 17 | 17 + no | 17 | 17 + o0 | 17 | 17 + o4 | 17 | 17 + s0 | 17 | 17 + s1 | 17 | 17 + t4 | 17 | 17 + u9 | 17 | 17 + vf | 17 | 17 + vx | 17 | 17 + x3 | 17 | 17 + xi | 17 | 17 + xn | 17 | 17 + xz | 17 | 17 + zl | 17 | 17 + zn | 17 | 17 + a0 | 16 | 16 + bu | 16 | 16 + bw | 16 | 16 + ci | 16 | 16 + ck | 16 | 16 + d0 | 16 | 16 + d4 | 16 | 16 + d6 | 16 | 16 + f5 | 16 | 16 + g1 | 16 | 16 + gz | 16 | 16 + h4 | 16 | 16 + jh | 16 | 16 + l4 | 16 | 16 + lt | 16 | 16 + mg | 16 | 16 + mh | 16 | 16 + mo | 16 | 16 + ni | 16 | 16 + nl | 16 | 16 + nq | 16 | 16 + p2 | 16 | 16 + u8 | 16 | 16 + v9 | 16 | 16 + vl | 16 | 16 + vo | 16 | 16 + xp | 16 | 16 + y3 | 16 | 16 + y7 | 16 | 16 + z7 | 16 | 16 + za | 16 | 16 + zx | 16 | 16 + bf | 15 | 15 + bp | 15 | 15 + cc | 15 | 15 + g0 | 15 | 15 + j2 | 15 | 15 + j9 | 15 | 15 + l6 | 15 | 15 + le | 15 | 15 + ll | 15 | 15 + m8 | 15 | 15 + ma | 15 | 15 + mu | 15 | 15 + nf | 15 | 15 + r6 | 15 | 15 + s5 | 15 | 15 + vd | 15 | 15 + vk | 15 | 15 + xa | 15 | 15 + xw | 15 | 15 + y2 | 15 | 15 + z8 | 15 | 15 + ze | 15 | 15 + zu | 15 | 15 + a6 | 14 | 14 + bk | 14 | 14 + bt | 14 | 14 + c0 | 14 | 14 + f8 | 14 | 14 + g3 | 14 | 14 + g4 | 14 | 14 + g7 | 14 | 14 + h6 | 14 | 14 + h7 | 14 | 14 + h9 | 14 | 14 + i1 | 14 | 14 + k1 | 14 | 14 + k2 | 14 | 14 + k6 | 14 | 14 + k7 | 14 | 14 + mc | 14 | 14 + nn | 14 | 14 + p9 | 14 | 14 + u6 | 14 | 14 + xd | 14 | 14 + z6 | 14 | 14 + zk | 14 | 14 + a4 | 13 | 13 + a9 | 13 | 13 + bm | 13 | 13 + cz | 13 | 13 + f2 | 13 | 13 + f3 | 13 | 13 + f6 | 13 | 13 + g6 | 13 | 13 + h2 | 13 | 13 + j1 | 13 | 13 + k5 | 13 | 13 + m1 | 13 | 13 + mf | 13 | 13 + mq | 13 | 13 + np | 13 | 13 + nx | 13 | 13 + o5 | 13 | 13 + p0 | 13 | 13 + p1 | 13 | 13 + s6 | 13 | 13 + s9 | 13 | 13 + v6 | 13 | 13 + va | 13 | 13 + vc | 13 | 13 + xc | 13 | 13 + z0 | 13 | 13 + c9 | 12 | 12 + d1 | 12 | 12 + h0 | 12 | 12 + h1 | 12 | 12 + j8 | 12 | 12 + k4 | 12 | 12 + l5 | 12 | 12 + l9 | 12 | 12 + m2 | 12 | 12 + m6 | 12 | 12 + m9 | 12 | 12 + n7 | 12 | 12 + nv | 12 | 12 + p3 | 12 | 12 + vq | 12 | 12 + vy | 12 | 12 + x1 | 12 | 12 + x2 | 12 | 12 + z5 | 12 | 12 + c1 | 11 | 11 + c3 | 11 | 11 + ct | 11 | 11 + f9 | 11 | 11 + g5 | 11 | 11 + j6 | 11 | 11 + l8 | 11 | 11 + n1 | 11 | 11 + v7 | 11 | 11 + vv | 11 | 11 + x5 | 11 | 11 + x8 | 11 | 11 + z2 | 11 | 11 + b0 | 10 | 10 + b2 | 10 | 10 + b8 | 10 | 10 + c6 | 10 | 10 + f0 | 10 | 10 + f7 | 10 | 10 + h5 | 10 | 10 + j3 | 10 | 10 + j4 | 10 | 10 + j7 | 10 | 10 + l7 | 10 | 10 + m0 | 10 | 10 + m7 | 10 | 10 + mm | 10 | 10 + mn | 10 | 10 + n8 | 10 | 10 + v1 | 10 | 10 + x0 | 10 | 10 + x6 | 10 | 10 + x7 | 10 | 10 + x9 | 10 | 10 + a8 | 9 | 9 + b1 | 9 | 9 + b4 | 9 | 9 + b5 | 9 | 9 + b6 | 9 | 9 + ba | 9 | 9 + bs | 9 | 9 + c5 | 9 | 9 + d5 | 9 | 9 + k8 | 9 | 9 + l0 | 9 | 9 + m5 | 9 | 9 + mk | 9 | 9 + ms | 9 | 9 + n3 | 9 | 9 + n4 | 9 | 9 + n6 | 9 | 9 + ne | 9 | 9 + v0 | 9 | 9 + v3 | 9 | 9 + v5 | 9 | 9 + v8 | 9 | 9 + b3 | 8 | 8 + b7 | 8 | 8 + c2 | 8 | 8 + c7 | 8 | 8 + c8 | 8 | 8 + d9 | 8 | 8 + k3 | 8 | 8 + l3 | 8 | 8 + m3 | 8 | 8 + m4 | 8 | 8 + n0 | 8 | 8 + n5 | 8 | 8 + v4 | 8 | 8 + x4 | 8 | 8 + z1 | 8 | 8 + z9 | 8 | 8 + l2 | 7 | 7 + s2 | 7 | 7 + z4 | 7 | 7 + 1l | 6 | 6 + 1o | 6 | 6 + 1t | 6 | 6 + 2e | 6 | 6 + 2o | 6 | 6 + c4 | 6 | 6 + h3 | 6 | 6 + n2 | 6 | 6 + n9 | 6 | 6 + v2 | 6 | 6 + 2l | 5 | 5 + 2u | 5 | 5 + 3k | 5 | 5 + 4p | 5 | 5 + 18 | 4 | 4 + 1a | 4 | 4 + 1i | 4 | 4 + 2s | 4 | 4 + 3q | 4 | 4 + 3y | 4 | 4 + 5y | 4 | 4 + 1f | 3 | 3 + 1h | 3 | 3 + 1m | 3 | 3 + 1p | 3 | 3 + 1s | 3 | 3 + 1v | 3 | 3 + 1x | 3 | 3 + 27 | 3 | 3 + 2a | 3 | 3 + 2b | 3 | 3 + 2h | 3 | 3 + 2n | 3 | 3 + 2p | 3 | 3 + 2v | 3 | 3 + 2y | 3 | 3 + 3d | 3 | 3 + 3w | 3 | 3 + 3z | 3 | 3 + 4a | 3 | 3 + 4d | 3 | 3 + 4v | 3 | 3 + 4z | 3 | 3 + 5e | 3 | 3 + 5i | 3 | 3 + 5k | 3 | 3 + 5o | 3 | 3 + 5t | 3 | 3 + 6b | 3 | 3 + 6d | 3 | 3 + 6o | 3 | 3 + 6w | 3 | 3 + 7a | 3 | 3 + 7h | 3 | 3 + 7r | 3 | 3 + 93 | 3 | 3 + 10 | 2 | 2 + 12 | 2 | 2 + 15 | 2 | 2 + 16 | 2 | 2 + 19 | 2 | 2 + 1b | 2 | 2 + 1d | 2 | 2 + 1g | 2 | 2 + 1j | 2 | 2 + 1n | 2 | 2 + 1r | 2 | 2 + 1u | 2 | 2 + 1w | 2 | 2 + 1y | 2 | 2 + 20 | 2 | 2 + 25 | 2 | 2 + 2d | 2 | 2 + 2i | 2 | 2 + 2j | 2 | 2 + 2k | 2 | 2 + 2q | 2 | 2 + 2r | 2 | 2 + 2t | 2 | 2 + 2w | 2 | 2 + 2z | 2 | 2 + 3b | 2 | 2 + 3f | 2 | 2 + 3h | 2 | 2 + 3o | 2 | 2 + 3p | 2 | 2 + 3r | 2 | 2 + 3s | 2 | 2 + 3v | 2 | 2 + 42 | 2 | 2 + 43 | 2 | 2 + 4f | 2 | 2 + 4g | 2 | 2 + 4h | 2 | 2 + 4j | 2 | 2 + 4m | 2 | 2 + 4r | 2 | 2 + 4s | 2 | 2 + 4t | 2 | 2 + 4u | 2 | 2 + 5c | 2 | 2 + 5f | 2 | 2 + 5h | 2 | 2 + 5p | 2 | 2 + 5q | 2 | 2 + 5z | 2 | 2 + 6a | 2 | 2 + 6h | 2 | 2 + 6q | 2 | 2 + 6r | 2 | 2 + 6t | 2 | 2 + 6y | 2 | 2 + 70 | 2 | 2 + 7c | 2 | 2 + 7g | 2 | 2 + 7k | 2 | 2 + 7o | 2 | 2 + 7u | 2 | 2 + 8j | 2 | 2 + 8w | 2 | 2 + 9f | 2 | 2 + 9y | 2 | 2 + copyright | 2 | 2 + foo | 1 | 3 + bar | 1 | 2 + 0e | 1 | 1 + 0h | 1 | 1 + 0p | 1 | 1 + 0w | 1 | 1 + 0z | 1 | 1 + 11 | 1 | 1 + 13 | 1 | 1 + 14 | 1 | 1 + 17 | 1 | 1 + 1k | 1 | 1 + 1q | 1 | 1 + 1z | 1 | 1 + 24 | 1 | 1 + 26 | 1 | 1 + 28 | 1 | 1 + 2f | 1 | 1 + 30 | 1 | 1 + 345 | 1 | 1 + 37 | 1 | 1 + 39 | 1 | 1 + 3a | 1 | 1 + 3e | 1 | 1 + 3g | 1 | 1 + 3i | 1 | 1 + 3m | 1 | 1 + 3t | 1 | 1 + 3u | 1 | 1 + 40 | 1 | 1 + 41 | 1 | 1 + 44 | 1 | 1 + 45 | 1 | 1 + 48 | 1 | 1 + 4b | 1 | 1 + 4c | 1 | 1 + 4i | 1 | 1 + 4k | 1 | 1 + 4n | 1 | 1 + 4o | 1 | 1 + 4q | 1 | 1 + 4w | 1 | 1 + 4y | 1 | 1 + 51 | 1 | 1 + 55 | 1 | 1 + 56 | 1 | 1 + 5a | 1 | 1 + 5d | 1 | 1 + 5g | 1 | 1 + 5j | 1 | 1 + 5l | 1 | 1 + 5s | 1 | 1 + 5u | 1 | 1 + 5x | 1 | 1 + 64 | 1 | 1 + 68 | 1 | 1 + 6c | 1 | 1 + 6f | 1 | 1 + 6g | 1 | 1 + 6i | 1 | 1 + 6k | 1 | 1 + 6n | 1 | 1 + 6p | 1 | 1 + 6s | 1 | 1 + 6u | 1 | 1 + 6x | 1 | 1 + 72 | 1 | 1 + 7f | 1 | 1 + 7j | 1 | 1 + 7n | 1 | 1 + 7p | 1 | 1 + 7w | 1 | 1 + 7y | 1 | 1 + 7z | 1 | 1 + 80 | 1 | 1 + 82 | 1 | 1 + 85 | 1 | 1 + 8d | 1 | 1 + 8i | 1 | 1 + 8l | 1 | 1 + 8n | 1 | 1 + 8p | 1 | 1 + 8t | 1 | 1 + 8x | 1 | 1 + 95 | 1 | 1 + 97 | 1 | 1 + 9a | 1 | 1 + 9e | 1 | 1 + 9h | 1 | 1 + 9r | 1 | 1 + 9w | 1 | 1 + qwerti | 1 | 1 +(1146 rows) + +select reset_tsearch(); +NOTICE: TSearch cache cleaned + reset_tsearch +--------------- + +(1 row) + +select to_tsquery('default', 'skies & books'); + to_tsquery +---------------- + 'sky' & 'book' +(1 row) + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea&thousand&years')); + rank_cd +--------- + 1.2 +(1 row) + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('granite&sea')); + rank_cd +---------- + 0.880303 +(1 row) + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea')); + rank_cd +--------- + 2 +(1 row) + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea&thousand&years')); + get_covers +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + eros took {1 sea thousand year }1 {2 thousand year trace granit featur cliff crag scarp base took sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 +(1 row) + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('granite&sea')); + get_covers +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + eros took {1 sea thousand year thousand year trace {2 granit }1 featur cliff crag scarp base took {3 sea }2 hour one night hour storm place sculptur granit }3 seam upon woman face e j pratt 1882 1964 +(1 row) + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea')); + get_covers +---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + eros took {1 sea }1 thousand year thousand year trace granit featur cliff crag scarp base took {2 sea }2 hour one night hour storm place sculptur granit seam upon woman face e j pratt 1882 1964 +(1 row) + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('sea&thousand&years')); + headline +----------------------------------------------------------------------------------------------------------------------- + sea a thousand years, +A thousand years to trace +The granite features of this cliff +(1 row) + + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('granite&sea')); + headline +---------------------------------------------------------------------------------------------- + sea an hour one night +An hour of storm to place +The sculpture of these granite +(1 row) + + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('sea')); + headline +------------------------------------------------------------------------------------------- + sea a thousand years, +A thousand years to trace +The granite features of this cliff +(1 row) + diff --git a/contrib/tsearch2/gendict/Makefile.IN b/contrib/tsearch2/gendict/Makefile.IN new file mode 100644 index 0000000000..c13e496d06 --- /dev/null +++ b/contrib/tsearch2/gendict/Makefile.IN @@ -0,0 +1,12 @@ +subdir = contrib/CFG_DIR +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global + +MODULE_big = dict_CFG_MODNAME +OBJS = CFG_OFILE +DATA_built = dict_CFG_MODNAME.sql +DOCS = README.CFG_MODNAME +PG_CPPFLAGS = +SHLIB_LINK = ../tsearch2/libtsearch2.a + +include $(top_srcdir)/contrib/contrib-global.mk diff --git a/contrib/tsearch2/gendict/README.gendict b/contrib/tsearch2/gendict/README.gendict new file mode 100644 index 0000000000..e91f1b75a3 --- /dev/null +++ b/contrib/tsearch2/gendict/README.gendict @@ -0,0 +1,130 @@ +Gendict - generate dictionary templates for contrib/tsearch2 module. + +This utility aims to help people creating dictionary for contrib/tsearch v2 +module. Particularly, it has built-in support for snowball stemmers. + +Programming API to tsearch2 dictionaries is described in tsearch v2 +documentation. + + +Prerequisities: + +* PostgreSQL 7.3 and above. + +* You need tsearch2 module sources already compiled + +* Rights to install contrib modules + +Usage: + + run config.sh without parameters to see options and arguments + +Usage: +./config.sh -n DICTNAME ( [ -s [ -p PREFIX ] ] | [ -c CFILES ] [ -h HFILES ] [ -i ] ) [ -v ] [ -d DIR ] [ -C COMMENT ] + -v - be verbose + -d DIR - name of directory in PGSQL_SRC/contrib (default dict_DICTNAME) + -C COMMENT - dictionary comment +Generate Snowball stemmer: +./config.sh -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ] + -s - generate Snowball wrapper + -p - prefix of Snowball's function, (default DICTNAME) +Generate template dictionary: +./config.sh -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ] + -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory. + These files will be used in Makefile. + -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory. + These files will be used in Makefile and subinclude.h + -i - dictionary has init method + + +Example 1: + + Create Portuguese stemmer + + 0. cd PGSQL_SRC/contrib/tsearch2/gendict + + 1. Obtain stem.{c,h} files for Portuguese + + wget http://snowball.tartarus.org/portuguese/stem.c + wget http://snowball.tartarus.org/portuguese/stem.h + + 2. Create template files for Portuguese + + ./config.sh -n pt -s -p portuguese -v -C'Snowball stemmer for Portuguese' + + Note, that argument for -p option should be *the same* as name of stemming + function in stem.c (without _stem) + + A bunch of files will be generated and placed in PGSQL_SRC/contrib/dict_pt + directory. + + 3. Compile and install dictionary + + cd PGSQL_SRC/contrib/dict_pt + make + make install + + 4. Test it + + Sample portuguese words with the stemmed forms are available + from http://snowball.tartarus.org/portuguese/stemmer.html + + createdb testdict + psql testdict < /usr/local/pgsql/share/contrib/tsearch2.sql + psql testdict < /usr/local/pgsql/share/contrib/dict_pt.sql + psql -d testdict -c "select lexize('pt','bobagem');" + lexize + --------- + {bobag} + (1 row) + + Here is what I have in pg_ts_dict table + + psql -d testdict -c "select * from pg_ts_dict where dict_name='pt';" + dict_name | dict_init | dict_initoption | dict_lexize | dict_comment + -----------+-----------+-----------------+-------------+--------------------------------- + pt | 7177806 | | 7159330 | Snowball stemmer for Portuguese + (1 row) + + + Note, that you have already installed dictionary and corresponding + entry in tsearch configuration and you may modify it using + plain SQL commands, for example, specify stop words. + +Example 2: + + a) Simple template dictionary with init method + + ./config.sh -n wow -v -i -C WOW + + b) Create simple template dict (without init method): + ./config.sh -n wow -v -C WOW + + The same as above, but dictionary will have not init method + + Dictionaries obtained in a) and b) are fully working and ready + for use: + a) lowercase input word and remove it if it is a stop word + b) recognizes any word + + c) Simple template dictionary with source files (with init method): + + ./config.sh -n wow -v -i -c a.c -h a.h -C WOW + + Source files ( a.c ) must be placed in contrib/tsearch2/gendict directory. + These files will be used in Makefile. + + Header files ( a.h ), must be placed in contrib/tsearch2/gendict directory. + These files will be used in Makefile and subinclude.h + + d) Simple template dictionary with source files (without init method): + + ./config.sh -n wow -v -c a.c -h a.h -C WOW + + The same as above, but dictionary will have not init method + + After that you have sources in PGSQL_SRC/contrib/dict_wow and + you may edit them to create actual dictionary. + + Please, check Tsearch2 home page (http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/) + for additional information about "Gendict tutorial" and dictionaries. \ No newline at end of file diff --git a/contrib/tsearch2/gendict/config.sh b/contrib/tsearch2/gendict/config.sh new file mode 100755 index 0000000000..26bb5421f7 --- /dev/null +++ b/contrib/tsearch2/gendict/config.sh @@ -0,0 +1,183 @@ +#!/bin/sh + +usage () { + echo Usage: + echo $0 -n DICTNAME \( [ -s [ -p PREFIX ] ] \| [ -c CFILES ] [ -h HFILES ] [ -i ] \) [ -v ] [ -d DIR ] [ -C COMMENT ] + echo ' -v - be verbose' + echo ' -d DIR - name of directory in PGSQL_SRL/contrib (default dict_DICTNAME)' + echo ' -C COMMENT - dictionary comment' + echo Generate Snowball stemmer: + echo $0 -n DICTNAME -s [ -p PREFIX ] [ -v ] [ -d DIR ] [ -C COMMENT ] + echo ' -s - generate Snowball wrapper' + echo " -p - prefix of Snowball's function, (default DICTNAME)" + echo Generate template dictionary: + echo $0 -n DICTNAME [ -c CFILES ] [ -h HFILES ] [ -i ] [ -v ] [ -d DIR ] [ -C COMMENT ] + echo ' -c CFILES - source files, must be placed in contrib/tsearch2/gendict directory.' + echo ' These files will be used in Makefile.' + echo ' -h HFILES - header files, must be placed in contrib/tsearch2/gendict directory.' + echo ' These files will be used in Makefile and subinclude.h' + echo ' -i - dictionary has init method' + exit 1; +} + +dictname= +stemmode=no +verbose=no +cfile= +hfile= +dir= +hasinit=no +comment= +prefix= + +while getopts n:c:C:h:d:p:vis opt +do + case "$opt" in + v) verbose=yes;; + s) stemmode=yes;; + i) hasinit=yes;; + n) dictname="$OPTARG";; + c) cfile="$OPTARG";; + h) hfile="$OPTARG";; + d) dir="$OPTARG";; + C) comment="$OPTARG";; + p) prefix="$OPTARG";; + \?) usage;; + esac +done + +[ ${#dictname} -eq 0 ] && usage + +dictname=`echo $dictname | tr '[:upper:]' '[:lower:]'` + +if [ $stemmode = "yes" ] ; then + [ ${#prefix} -eq 0 ] && prefix=$dictname + hasinit=yes + cfile="stem.c" + hfile="stem.h" +fi + +[ ${#dir} -eq 0 ] && dir="dict_$dictname" + +if [ ${#comment} -eq 0 ]; then + comment=null +else + comment="'$comment'" +fi + +ofile= +for f in $cfile +do + f=` echo $f | sed 's#c$#o#'` + ofile="$ofile $f" +done + +if [ $stemmode = "yes" ] ; then + ofile="$ofile dict_snowball.o" +else + ofile="$ofile dict_tmpl.o" +fi + +if [ $verbose = "yes" ]; then + echo Dictname: "'"$dictname"'" + echo Snowball stemmer: $stemmode + echo Has init method: $hasinit + [ $stemmode = "yes" ] && echo Function prefix: $prefix + echo Source files: $cfile + echo Header files: $hfile + echo Object files: $ofile + echo Comment: $comment + echo Directory: ../../$dir +fi + + +[ $verbose = "yes" ] && echo -n 'Build directory... ' +if [ ! -d ../../$dir ]; then + if ! mkdir ../../$dir ; then + echo "Can't create directory ../../$dir" + exit 1 + fi +fi +[ $verbose = "yes" ] && echo ok + + +[ $verbose = "yes" ] && echo -n 'Build Makefile... ' +sed s#CFG_DIR#$dir# < Makefile.IN | sed s#CFG_MODNAME#$dictname# | sed "s#CFG_OFILE#$ofile#" > ../../$dir/Makefile.tmp +if [ $stemmode = "yes" ] ; then + sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2/snowball -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile +else + sed "s#^PG_CPPFLAGS.*\$#PG_CPPFLAGS = -I../tsearch2#" < ../../$dir/Makefile.tmp > ../../$dir/Makefile +fi +rm ../../$dir/Makefile.tmp +[ $verbose = "yes" ] && echo ok + + +[ $verbose = "yes" ] && echo -n Build dict_$dictname'.sql.in... ' +if [ $hasinit = "yes" ]; then + sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^HASINIT## | sed 's#^NOINIT.*$##' > ../../$dir/dict_$dictname.sql.in.tmp + if [ $stemmode = "yes" ] ; then + sed s#^ISSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^NOSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in + else + sed s#^NOSNOWBALL## < ../../$dir/dict_$dictname.sql.in.tmp | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in + fi + rm ../../$dir/dict_$dictname.sql.in.tmp +else + sed s#CFG_MODNAME#$dictname# < sql.IN | sed "s#CFG_COMMENT#$comment#" | sed s#^NOINIT## | sed 's#^HASINIT.*$##' | sed s#^NOSNOWBALL## | sed s#^ISSNOWBALL.*\$## > ../../$dir/dict_$dictname.sql.in +fi +[ $verbose = "yes" ] && echo ok + + + +if [ ${#cfile} -ne 0 ] || [ ${#hfile} -ne 0 ] ; then + [ $verbose = "yes" ] && echo -n 'Copy source and header files... ' + if [ ${#cfile} -ne 0 ] ; then + if ! cp $cfile ../../$dir ; then + echo "Cant cp all or one of files: $cfile" + exit 1 + fi + fi + if [ ${#hfile} -ne 0 ] ; then + if ! cp $hfile ../../$dir ; then + echo "Cant cp all or one of files: $hfile" + exit 1 + fi + fi + [ $verbose = "yes" ] && echo ok +fi + + +[ $verbose = "yes" ] && echo -n 'Build sub-include header... ' +echo -n > ../../$dir/subinclude.h +for i in $hfile +do + echo "#include \"$i\"" >> ../../$dir/subinclude.h +done +[ $verbose = "yes" ] && echo ok + + +if [ $stemmode = "yes" ] ; then + [ $verbose = "yes" ] && echo -n 'Build Snowball stemmer... ' + sed s#CFG_MODNAME#$dictname#g < dict_snowball.c.IN | sed s#CFG_PREFIX#$prefix#g > ../../$dir/dict_snowball.c +else + [ $verbose = "yes" ] && echo -n 'Build dictinonary... ' + sed s#CFG_MODNAME#$dictname#g < dict_tmpl.c.IN > ../../$dir/dict_tmpl.c.tmp + if [ $hasinit = "yes" ]; then + sed s#^HASINIT## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT.*$##' > ../../$dir/dict_tmpl.c + else + sed s#^HASINIT.*\$## < ../../$dir/dict_tmpl.c.tmp | sed 's#^NOINIT##' > ../../$dir/dict_tmpl.c + fi + rm ../../$dir/dict_tmpl.c.tmp +fi +[ $verbose = "yes" ] && echo ok + + +[ $verbose = "yes" ] && echo -n "Build README.$dictname... " +if [ $stemmode = "yes" ] ; then + echo "Autogenerated Snowball's wrapper for $prefix" > ../../$dir/README.$dictname +else + echo "Autogenerated template for $dictname" > ../../$dir/README.$dictname +fi +[ $verbose = "yes" ] && echo ok + +echo All is done + diff --git a/contrib/tsearch2/gendict/dict_snowball.c.IN b/contrib/tsearch2/gendict/dict_snowball.c.IN new file mode 100644 index 0000000000..10ef6f1c5e --- /dev/null +++ b/contrib/tsearch2/gendict/dict_snowball.c.IN @@ -0,0 +1,52 @@ +/* + * example of Snowball dictionary + * http://snowball.tartarus.org/ + * Teodor Sigaev + */ +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" +#include "snowball/header.h" +#include "subinclude.h" + +typedef struct { + struct SN_env *z; + StopList stoplist; + int (*stem)(struct SN_env * z); +} DictSnowball; + + +PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME); +Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS); + +Datum +dinit_CFG_MODNAME(PG_FUNCTION_ARGS) { + DictSnowball *d = (DictSnowball*)malloc( sizeof(DictSnowball) ); + + if ( !d ) + elog(ERROR, "No memory"); + memset(d,0,sizeof(DictSnowball)); + d->stoplist.wordop=lowerstr; + + if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { + text *in = PG_GETARG_TEXT_P(0); + readstoplist(in, &(d->stoplist)); + sortstoplist(&(d->stoplist)); + PG_FREE_IF_COPY(in, 0); + } + + d->z = CFG_PREFIX_create_env(); + if (!d->z) { + freestoplist(&(d->stoplist)); + elog(ERROR,"No memory"); + } + d->stem=CFG_PREFIX_stem; + + PG_RETURN_POINTER(d); +} + + diff --git a/contrib/tsearch2/gendict/dict_tmpl.c.IN b/contrib/tsearch2/gendict/dict_tmpl.c.IN new file mode 100644 index 0000000000..10c0381aee --- /dev/null +++ b/contrib/tsearch2/gendict/dict_tmpl.c.IN @@ -0,0 +1,64 @@ +/* + * example of dictionary + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" + +#include "subinclude.h" + +HASINIT typedef struct { +HASINIT StopList stoplist; +HASINIT } DictExample; + + +HASINIT PG_FUNCTION_INFO_V1(dinit_CFG_MODNAME); +HASINIT Datum dinit_CFG_MODNAME(PG_FUNCTION_ARGS); + +HASINIT Datum +HASINIT dinit_CFG_MODNAME(PG_FUNCTION_ARGS) { +HASINIT DictExample *d = (DictExample*)malloc( sizeof(DictExample) ); +HASINIT +HASINIT if ( !d ) +HASINIT elog(ERROR, "No memory"); +HASINIT memset(d,0,sizeof(DictExample)); +HASINIT +HASINIT d->stoplist.wordop=lowerstr; +HASINIT +HASINIT /* Your INIT code */ +HASINIT +HASINIT if ( !PG_ARGISNULL(0) && PG_GETARG_POINTER(0)!=NULL ) { +HASINIT text *in = PG_GETARG_TEXT_P(0); +HASINIT readstoplist(in, &(d->stoplist)); +HASINIT sortstoplist(&(d->stoplist)); +HASINIT PG_FREE_IF_COPY(in, 0); +HASINIT } +HASINIT +HASINIT PG_RETURN_POINTER(d); +HASINIT } + +PG_FUNCTION_INFO_V1(dlexize_CFG_MODNAME); +Datum dlexize_CFG_MODNAME(PG_FUNCTION_ARGS); +Datum +dlexize_CFG_MODNAME(PG_FUNCTION_ARGS) { +HASINIT DictExample *d = (DictExample*)PG_GETARG_POINTER(0); + char *in = (char*)PG_GETARG_POINTER(1); + char *txt = pnstrdup(in, PG_GETARG_INT32(2)); + char **res=palloc(sizeof(char*)*2); + + /* Your INIT dictionary code */ +HASINIT if ( *txt=='\0' || searchstoplist(&(d->stoplist),txt) ) { +HASINIT pfree(txt); +HASINIT res[0]=NULL; +HASINIT } else + res[0]=txt; + res[1]=NULL; + + PG_RETURN_POINTER(res); +} diff --git a/contrib/tsearch2/gendict/sql.IN b/contrib/tsearch2/gendict/sql.IN new file mode 100644 index 0000000000..ff0d842399 --- /dev/null +++ b/contrib/tsearch2/gendict/sql.IN @@ -0,0 +1,26 @@ +SET search_path = public; +BEGIN; + +HASINIT create function dinit_CFG_MODNAME(text) +HASINIT returns internal +HASINIT as 'MODULE_PATHNAME' +HASINIT language 'C'; + +NOSNOWBALL create function dlexize_CFG_MODNAME(internal,internal,int4) +NOSNOWBALL returns internal +NOSNOWBALL as 'MODULE_PATHNAME' +NOSNOWBALL language 'C' +NOSNOWBALL with (isstrict); + +insert into pg_ts_dict select + 'CFG_MODNAME', +HASINIT (select oid from pg_proc where proname='dinit_CFG_MODNAME'), +NOINIT null, + null, +ISSNOWBALL (select oid from pg_proc where proname='snb_lexize'), +NOSNOWBALL (select oid from pg_proc where proname='dlexize_CFG_MODNAME'), + CFG_COMMENT +; + + +END; diff --git a/contrib/tsearch2/gistidx.c b/contrib/tsearch2/gistidx.c new file mode 100644 index 0000000000..5a34f74789 --- /dev/null +++ b/contrib/tsearch2/gistidx.c @@ -0,0 +1,686 @@ +#include "postgres.h" + +#include + +#include "access/gist.h" +#include "access/itup.h" +#include "access/rtree.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" +#include "access/tuptoaster.h" + +#include "tsvector.h" +#include "query.h" +#include "gistidx.h" +#include "crc32.h" + +PG_FUNCTION_INFO_V1(gtsvector_in); +Datum gtsvector_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_out); +Datum gtsvector_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_compress); +Datum gtsvector_compress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_decompress); +Datum gtsvector_decompress(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_consistent); +Datum gtsvector_consistent(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_union); +Datum gtsvector_union(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_same); +Datum gtsvector_same(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_penalty); +Datum gtsvector_penalty(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(gtsvector_picksplit); +Datum gtsvector_picksplit(PG_FUNCTION_ARGS); + +#define GETENTRY(vec,pos) ((GISTTYPE *) DatumGetPointer(((GISTENTRY *) VARDATA(vec))[(pos)].key)) +#define SUMBIT(val) ( \ + GETBITBYTE(val,0) + \ + GETBITBYTE(val,1) + \ + GETBITBYTE(val,2) + \ + GETBITBYTE(val,3) + \ + GETBITBYTE(val,4) + \ + GETBITBYTE(val,5) + \ + GETBITBYTE(val,6) + \ + GETBITBYTE(val,7) \ +) + + +Datum +gtsvector_in(PG_FUNCTION_ARGS) +{ + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +Datum +gtsvector_out(PG_FUNCTION_ARGS) +{ + elog(ERROR, "Not implemented"); + PG_RETURN_DATUM(0); +} + +static int +compareint(const void *a, const void *b) +{ + if (*((int4 *) a) == *((int4 *) b)) + return 0; + return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1; +} + +static int +uniqueint(int4 *a, int4 l) +{ + int4 *ptr, + *res; + + if (l == 1) + return l; + + ptr = res = a; + + qsort((void *) a, l, sizeof(int4), compareint); + + while (ptr - a < l) + if (*ptr != *res) + *(++res) = *ptr++; + else + ptr++; + return res + 1 - a; +} + +static void +makesign(BITVECP sign, GISTTYPE * a) +{ + int4 k, + len = ARRNELEM(a); + int4 *ptr = GETARR(a); + + MemSet((void *) sign, 0, sizeof(BITVEC)); + for (k = 0; k < len; k++) + HASH(sign, ptr[k]); +} + +Datum +gtsvector_compress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTENTRY *retval = entry; + + if (entry->leafkey) + { /* tsvector */ + GISTTYPE *res; + tsvector *toastedval = (tsvector *) DatumGetPointer(entry->key); + tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); + int4 len; + int4 *arr; + WordEntry *ptr = ARRPTR(val); + char *words = STRPTR(val); + + len = CALCGTSIZE(ARRKEY, val->size); + res = (GISTTYPE *) palloc(len); + res->len = len; + res->flag = ARRKEY; + arr = GETARR(res); + len = val->size; + while (len--) + { + *arr = crc32_sz((uint8 *) &words[ptr->pos], ptr->len); + arr++; + ptr++; + } + + len = uniqueint(GETARR(res), val->size); + if (len != val->size) + { + /* + * there is a collision of hash-function; len is always less + * than val->size + */ + len = CALCGTSIZE(ARRKEY, len); + res = (GISTTYPE *) repalloc((void *) res, len); + res->len = len; + } + if (val != toastedval) + pfree(val); + + /* make signature, if array is too long */ + if (res->len > TOAST_INDEX_TARGET) + { + GISTTYPE *ressign; + + len = CALCGTSIZE(SIGNKEY, 0); + ressign = (GISTTYPE *) palloc(len); + ressign->len = len; + ressign->flag = SIGNKEY; + makesign(GETSIGN(ressign), res); + pfree(res); + res = ressign; + } + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, res->len, FALSE); + } + else if (ISSIGNKEY(DatumGetPointer(entry->key)) && + !ISALLTRUE(DatumGetPointer(entry->key))) + { + int4 i, + len; + GISTTYPE *res; + BITVECP sign = GETSIGN(DatumGetPointer(entry->key)); + + LOOPBYTE( + if ((sign[i] & 0xff) != 0xff) + PG_RETURN_POINTER(retval); + ); + + len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); + res = (GISTTYPE *) palloc(len); + res->len = len; + res->flag = SIGNKEY | ALLISTRUE; + + retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + gistentryinit(*retval, PointerGetDatum(res), + entry->rel, entry->page, + entry->offset, res->len, FALSE); + } + PG_RETURN_POINTER(retval); +} + +Datum +gtsvector_decompress(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + GISTTYPE *key = (GISTTYPE *) DatumGetPointer(PG_DETOAST_DATUM(entry->key)); + + if (key != (GISTTYPE *) DatumGetPointer(entry->key)) + { + GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY)); + + gistentryinit(*retval, PointerGetDatum(key), + entry->rel, entry->page, + entry->offset, key->len, FALSE); + + PG_RETURN_POINTER(retval); + } + + PG_RETURN_POINTER(entry); +} + +typedef struct +{ + int4 *arrb; + int4 *arre; +} CHKVAL; + +/* + * is there value 'val' in array or not ? + */ +static bool +checkcondition_arr(void *checkval, ITEM * val) +{ + int4 *StopLow = ((CHKVAL *) checkval)->arrb; + int4 *StopHigh = ((CHKVAL *) checkval)->arre; + int4 *StopMiddle; + + /* Loop invariant: StopLow <= val < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + if (*StopMiddle == val->val) + return (true); + else if (*StopMiddle < val->val) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return (false); +} + +static bool +checkcondition_bit(void *checkval, ITEM * val) +{ + return GETBIT(checkval, HASHVAL(val->val)); +} + +Datum +gtsvector_consistent(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query = (QUERYTYPE *) PG_GETARG_POINTER(1); + GISTTYPE *key = (GISTTYPE *) DatumGetPointer( + ((GISTENTRY *) PG_GETARG_POINTER(0))->key + ); + + if (!query->size) + PG_RETURN_BOOL(false); + + if (ISSIGNKEY(key)) + { + if (ISALLTRUE(key)) + PG_RETURN_BOOL(true); + + PG_RETURN_BOOL(TS_execute( + GETQUERY(query), + (void *) GETSIGN(key), false, + checkcondition_bit + )); + } + else + { /* only leaf pages */ + CHKVAL chkval; + + chkval.arrb = GETARR(key); + chkval.arre = chkval.arrb + ARRNELEM(key); + PG_RETURN_BOOL(TS_execute( + GETQUERY(query), + (void *) &chkval, true, + checkcondition_arr + )); + } +} + +static int4 +unionkey(BITVECP sbase, GISTTYPE * add) +{ + int4 i; + + if (ISSIGNKEY(add)) + { + BITVECP sadd = GETSIGN(add); + + if (ISALLTRUE(add)) + return 1; + + LOOPBYTE( + sbase[i] |= sadd[i]; + ); + } + else + { + int4 *ptr = GETARR(add); + + for (i = 0; i < ARRNELEM(add); i++) + HASH(sbase, ptr[i]); + } + return 0; +} + + +Datum +gtsvector_union(PG_FUNCTION_ARGS) +{ + bytea *entryvec = (bytea *) PG_GETARG_POINTER(0); + int *size = (int *) PG_GETARG_POINTER(1); + BITVEC base; + int4 len = (VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY); + int4 i; + int4 flag = 0; + GISTTYPE *result; + + MemSet((void *) base, 0, sizeof(BITVEC)); + for (i = 0; i < len; i++) + { + if (unionkey(base, GETENTRY(entryvec, i))) + { + flag = ALLISTRUE; + break; + } + } + + flag |= SIGNKEY; + len = CALCGTSIZE(flag, 0); + result = (GISTTYPE *) palloc(len); + *size = result->len = len; + result->flag = flag; + if (!ISALLTRUE(result)) + memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC)); + + PG_RETURN_POINTER(result); +} + +Datum +gtsvector_same(PG_FUNCTION_ARGS) +{ + GISTTYPE *a = (GISTTYPE *) PG_GETARG_POINTER(0); + GISTTYPE *b = (GISTTYPE *) PG_GETARG_POINTER(1); + bool *result = (bool *) PG_GETARG_POINTER(2); + + if (ISSIGNKEY(a)) + { /* then b also ISSIGNKEY */ + if (ISALLTRUE(a) && ISALLTRUE(b)) + *result = true; + else if (ISALLTRUE(a)) + *result = false; + else if (ISALLTRUE(b)) + *result = false; + else + { + int4 i; + BITVECP sa = GETSIGN(a), + sb = GETSIGN(b); + + *result = true; + LOOPBYTE( + if (sa[i] != sb[i]) + { + *result = false; + break; + } + ); + } + } + else + { /* a and b ISARRKEY */ + int4 lena = ARRNELEM(a), + lenb = ARRNELEM(b); + + if (lena != lenb) + *result = false; + else + { + int4 *ptra = GETARR(a), + *ptrb = GETARR(b); + int4 i; + + *result = true; + for (i = 0; i < lena; i++) + if (ptra[i] != ptrb[i]) + { + *result = false; + break; + } + } + } + + PG_RETURN_POINTER(result); +} + +static int4 +sizebitvec(BITVECP sign) +{ + int4 size = 0, + i; + + LOOPBYTE( + size += SUMBIT(*(char *) sign); + sign = (BITVECP) (((char *) sign) + 1); + ); + return size; +} + +static int +hemdistsign(BITVECP a, BITVECP b) { + int i,dist=0; + + LOOPBIT( + if ( GETBIT(a,i) != GETBIT(b,i) ) + dist++; + ); + return dist; +} + +static int +hemdist(GISTTYPE *a, GISTTYPE *b) { + if ( ISALLTRUE(a) ) { + if (ISALLTRUE(b)) + return 0; + else + return SIGLENBIT-sizebitvec(GETSIGN(b)); + } else if (ISALLTRUE(b)) + return SIGLENBIT-sizebitvec(GETSIGN(a)); + + return hemdistsign( GETSIGN(a), GETSIGN(b) ); +} + +Datum +gtsvector_penalty(PG_FUNCTION_ARGS) +{ + GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */ + GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1); + float *penalty = (float *) PG_GETARG_POINTER(2); + GISTTYPE *origval = (GISTTYPE *) DatumGetPointer(origentry->key); + GISTTYPE *newval = (GISTTYPE *) DatumGetPointer(newentry->key); + BITVECP orig = GETSIGN(origval); + + *penalty = 0.0; + + if (ISARRKEY(newval)) { + BITVEC sign; + makesign(sign, newval); + + if ( ISALLTRUE(origval) ) + *penalty=((float)(SIGLENBIT-sizebitvec(sign)))/(float)(SIGLENBIT+1); + else + *penalty=hemdistsign(sign,orig); + } else { + *penalty=hemdist(origval,newval); + } + PG_RETURN_POINTER(penalty); +} + +typedef struct +{ + bool allistrue; + BITVEC sign; +} CACHESIGN; + +static void +fillcache(CACHESIGN * item, GISTTYPE * key) +{ + item->allistrue = false; + if (ISARRKEY(key)) + makesign(item->sign, key); + else if (ISALLTRUE(key)) + item->allistrue = true; + else + memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC)); +} + +#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) ) +typedef struct +{ + OffsetNumber pos; + int4 cost; +} SPLITCOST; + +static int +comparecost(const void *a, const void *b) +{ + if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost) + return 0; + else + return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1; +} + + +static int +hemdistcache(CACHESIGN *a, CACHESIGN *b) { + if ( a->allistrue ) { + if (b->allistrue) + return 0; + else + return SIGLENBIT-sizebitvec(b->sign); + } else if (b->allistrue) + return SIGLENBIT-sizebitvec(a->sign); + + return hemdistsign( a->sign, b->sign ); +} + +Datum +gtsvector_picksplit(PG_FUNCTION_ARGS) +{ + bytea *entryvec = (bytea *) PG_GETARG_POINTER(0); + GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1); + OffsetNumber k, + j; + GISTTYPE *datum_l, + *datum_r; + BITVECP union_l, + union_r; + int4 size_alpha, + size_beta; + int4 size_waste, + waste = -1; + int4 nbytes; + OffsetNumber seed_1 = 0, + seed_2 = 0; + OffsetNumber *left, + *right; + OffsetNumber maxoff; + BITVECP ptr; + int i; + CACHESIGN *cache; + SPLITCOST *costvector; + + maxoff = ((VARSIZE(entryvec) - VARHDRSZ) / sizeof(GISTENTRY)) - 2; + nbytes = (maxoff + 2) * sizeof(OffsetNumber); + v->spl_left = (OffsetNumber *) palloc(nbytes); + v->spl_right = (OffsetNumber *) palloc(nbytes); + + cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2)); + fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber)); + + for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k)) { + for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j)) { + if (k == FirstOffsetNumber) + fillcache(&cache[j], GETENTRY(entryvec, j)); + + size_waste=hemdistcache(&(cache[j]),&(cache[k])); + if (size_waste > waste) { + waste = size_waste; + seed_1 = k; + seed_2 = j; + } + } + } + + left = v->spl_left; + v->spl_nleft = 0; + right = v->spl_right; + v->spl_nright = 0; + + if (seed_1 == 0 || seed_2 == 0) { + seed_1 = 1; + seed_2 = 2; + } + + /* form initial .. */ + if (cache[seed_1].allistrue) { + datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + datum_l->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); + datum_l->flag = SIGNKEY | ALLISTRUE; + } else { + datum_l = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0)); + datum_l->len = CALCGTSIZE(SIGNKEY, 0); + datum_l->flag = SIGNKEY; + memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC)); + } + if (cache[seed_2].allistrue) { + datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0)); + datum_r->len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0); + datum_r->flag = SIGNKEY | ALLISTRUE; + } else { + datum_r = (GISTTYPE *) palloc(CALCGTSIZE(SIGNKEY, 0)); + datum_r->len = CALCGTSIZE(SIGNKEY, 0); + datum_r->flag = SIGNKEY; + memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC)); + } + + union_l=GETSIGN(datum_l); + union_r=GETSIGN(datum_r); + maxoff = OffsetNumberNext(maxoff); + fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff)); + /* sort before ... */ + costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff); + for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j)) { + costvector[j - 1].pos = j; + size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j])); + size_beta = hemdistcache(&(cache[seed_2]), &(cache[j])); + costvector[j - 1].cost = abs(size_alpha - size_beta); + } + qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost); + + for (k = 0; k < maxoff; k++) { + j = costvector[k].pos; + if (j == seed_1) { + *left++ = j; + v->spl_nleft++; + continue; + } else if (j == seed_2) { + *right++ = j; + v->spl_nright++; + continue; + } + + if (ISALLTRUE(datum_l) || cache[j].allistrue) { + if ( ISALLTRUE(datum_l) && cache[j].allistrue ) + size_alpha=0; + else + size_alpha = SIGLENBIT-sizebitvec( + ( cache[j].allistrue ) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign) + ); + } else { + size_alpha=hemdistsign(cache[j].sign,GETSIGN(datum_l)); + } + + if (ISALLTRUE(datum_r) || cache[j].allistrue) { + if ( ISALLTRUE(datum_r) && cache[j].allistrue ) + size_beta=0; + else + size_beta = SIGLENBIT-sizebitvec( + ( cache[j].allistrue ) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign) + ); + } else { + size_beta=hemdistsign(cache[j].sign,GETSIGN(datum_r)); + } + + if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1)) { + if (ISALLTRUE(datum_l) || cache[j].allistrue) { + if (! ISALLTRUE(datum_l) ) + MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC)); + } else { + ptr=cache[j].sign; + LOOPBYTE( + union_l[i] |= ptr[i]; + ); + } + *left++ = j; + v->spl_nleft++; + } else { + if (ISALLTRUE(datum_r) || cache[j].allistrue) { + if (! ISALLTRUE(datum_r) ) + MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC)); + } else { + ptr=cache[j].sign; + LOOPBYTE( + union_r[i] |= ptr[i]; + ); + } + *right++ = j; + v->spl_nright++; + } + } + + *right = *left = FirstOffsetNumber; + pfree(costvector); + pfree(cache); + v->spl_ldatum = PointerGetDatum(datum_l); + v->spl_rdatum = PointerGetDatum(datum_r); + + PG_RETURN_POINTER(v); +} diff --git a/contrib/tsearch2/gistidx.h b/contrib/tsearch2/gistidx.h new file mode 100644 index 0000000000..d081c74682 --- /dev/null +++ b/contrib/tsearch2/gistidx.h @@ -0,0 +1,67 @@ +#ifndef __GISTIDX_H__ +#define __GISTIDX_H__ + +/* +#define GISTIDX_DEBUG +*/ + +/* + * signature defines + */ + +#define BITBYTE 8 +#define SIGLENINT 63 /* >121 => key will toast, so it will not + * work !!! */ +#define SIGLEN ( sizeof(int4)*SIGLENINT ) +#define SIGLENBIT (SIGLEN*BITBYTE) + +typedef char BITVEC[SIGLEN]; +typedef char *BITVECP; + +#define LOOPBYTE(a) \ + for(i=0;i> i & 0x01 ) +#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITBYTE ) ) +#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITBYTE ) ) +#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITBYTE )) & 0x01 ) + +#define abs(a) ((a) < (0) ? -(a) : (a)) +#define min(a,b) ((a) < (b) ? (a) : (b)) +#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT) +#define HASH(sign, val) SETBIT((sign), HASHVAL(val)) + + +/* + * type of index key + */ +typedef struct +{ + int4 len; + int4 flag; + char data[1]; +} GISTTYPE; + +#define ARRKEY 0x01 +#define SIGNKEY 0x02 +#define ALLISTRUE 0x04 + +#define ISARRKEY(x) ( ((GISTTYPE*)x)->flag & ARRKEY ) +#define ISSIGNKEY(x) ( ((GISTTYPE*)x)->flag & SIGNKEY ) +#define ISALLTRUE(x) ( ((GISTTYPE*)x)->flag & ALLISTRUE ) + +#define GTHDRSIZE ( sizeof(int4)*2 ) +#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) ) + +#define GETSIGN(x) ( (BITVECP)( (char*)x+GTHDRSIZE ) ) +#define GETARR(x) ( (int4*)( (char*)x+GTHDRSIZE ) ) +#define ARRNELEM(x) ( ( ((GISTTYPE*)x)->len - GTHDRSIZE )/sizeof(int4) ) + +#endif diff --git a/contrib/tsearch2/ispell/spell.c b/contrib/tsearch2/ispell/spell.c new file mode 100644 index 0000000000..3cf2cc8aae --- /dev/null +++ b/contrib/tsearch2/ispell/spell.c @@ -0,0 +1,520 @@ +#include +#include +#include +#include + +#include "postgres.h" + +#include "spell.h" + +#define MAXNORMLEN 56 + +#define STRNCASECMP(x,y) (strncasecmp(x,y,strlen(y))) + +static int cmpspell(const void *s1,const void *s2){ + return(strcmp(((const SPELL*)s1)->word,((const SPELL*)s2)->word)); +} + +static void +strlower( char * str ) { + unsigned char *ptr = (unsigned char *)str; + while ( *ptr ) { + *ptr = tolower( *ptr ); + ptr++; + } +} + +/* backward string compaire for suffix tree operations */ +static int +strbcmp(const char *s1, const char *s2) { + int l1 = strlen(s1)-1, l2 = strlen(s2)-1; + while (l1 >= 0 && l2 >= 0) { + if (s1[l1] < s2[l2]) return -1; + if (s1[l1] > s2[l2]) return 1; + l1--; l2--; + } + if (l1 < l2) return -1; + if (l1 > l2) return 1; + + return 0; +} +static int +strbncmp(const char *s1, const char *s2, size_t count) { + int l1 = strlen(s1) - 1, l2 = strlen(s2) - 1, l = count; + while (l1 >= 0 && l2 >= 0 && l > 0) { + if (s1[l1] < s2[l2]) return -1; + if (s1[l1] > s2[l2]) return 1; + l1--; + l2--; + l--; + } + if (l == 0) return 0; + if (l1 < l2) return -1; + if (l1 > l2) return 1; + return 0; +} + +static int +cmpaffix(const void *s1,const void *s2){ + if (((const AFFIX*)s1)->type < ((const AFFIX*)s2)->type) return -1; + if (((const AFFIX*)s1)->type > ((const AFFIX*)s2)->type) return 1; + if (((const AFFIX*)s1)->type == 'p') + return(strcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl)); + else + return(strbcmp(((const AFFIX*)s1)->repl,((const AFFIX*)s2)->repl)); +} + +int +AddSpell(IspellDict * Conf,const char * word,const char *flag){ + if(Conf->nspell>=Conf->mspell){ + if(Conf->mspell){ + Conf->mspell+=1024*20; + Conf->Spell=(SPELL *)realloc(Conf->Spell,Conf->mspell*sizeof(SPELL)); + }else{ + Conf->mspell=1024*20; + Conf->Spell=(SPELL *)malloc(Conf->mspell*sizeof(SPELL)); + } + if ( Conf->Spell == NULL ) + elog(ERROR,"No memory for AddSpell"); + } + Conf->Spell[Conf->nspell].word=strdup(word); + if ( !Conf->Spell[Conf->nspell].word ) + elog(ERROR,"No memory for AddSpell"); + strncpy(Conf->Spell[Conf->nspell].flag,flag,10); + Conf->nspell++; + return(0); +} + + +int +ImportDictionary(IspellDict * Conf,const char *filename){ + unsigned char str[BUFSIZ]; + FILE *dict; + + if(!(dict=fopen(filename,"r")))return(1); + while(fgets(str,sizeof(str),dict)){ + unsigned char *s; + const unsigned char *flag; + + flag = NULL; + if((s=strchr(str,'/'))){ + *s=0; + s++;flag=s; + while(*s){ + if (((*s>='A')&&(*s<='Z'))||((*s>='a')&&(*s<='z'))) + s++; + else { + *s=0; + break; + } + } + }else{ + flag=""; + } + strlower(str); + /* Dont load words if first letter is not required */ + /* It allows to optimize loading at search time */ + s=str; + while(*s){ + if(*s=='\r')*s=0; + if(*s=='\n')*s=0; + s++; + } + AddSpell(Conf,str,flag); + } + fclose(dict); + return(0); +} + + +static SPELL * +FindWord(IspellDict * Conf, const char *word, int affixflag) { + int l,c,r,resc,resl,resr, i; + + i = (int)(*word) & 255; + l = Conf->SpellTree.Left[i]; + r = Conf->SpellTree.Right[i]; + if (l == -1) return (NULL); + while(l<=r){ + c = (l + r) >> 1; + resc = strcmp(Conf->Spell[c].word, word); + if( (resc == 0) && + ((affixflag == 0) || (strchr(Conf->Spell[c].flag, affixflag) != NULL)) ) { + return(&Conf->Spell[c]); + } + resl = strcmp(Conf->Spell[l].word, word); + if( (resl == 0) && + ((affixflag == 0) || (strchr(Conf->Spell[l].flag, affixflag) != NULL)) ) { + return(&Conf->Spell[l]); + } + resr = strcmp(Conf->Spell[r].word, word); + if( (resr == 0) && + ((affixflag == 0) || (strchr(Conf->Spell[r].flag, affixflag) != NULL)) ) { + return(&Conf->Spell[r]); + } + if(resc < 0){ + l = c + 1; + r--; + } else if(resc > 0){ + r = c - 1; + l++; + } else { + l++; + r--; + } + } + return(NULL); +} + +int +AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type) { + if(Conf->naffixes>=Conf->maffixes){ + if(Conf->maffixes){ + Conf->maffixes+=16; + Conf->Affix = (AFFIX*)realloc((void*)Conf->Affix,Conf->maffixes*sizeof(AFFIX)); + }else{ + Conf->maffixes=16; + Conf->Affix = (AFFIX*)malloc(Conf->maffixes * sizeof(AFFIX)); + } + if ( Conf->Affix == NULL ) + elog(ERROR,"No memory for AddAffix"); + } + if (type=='s') { + sprintf(Conf->Affix[Conf->naffixes].mask,"%s$",mask); + } else { + sprintf(Conf->Affix[Conf->naffixes].mask,"^%s",mask); + } + Conf->Affix[Conf->naffixes].compile = 1; + Conf->Affix[Conf->naffixes].flag=flag; + Conf->Affix[Conf->naffixes].type=type; + + strcpy(Conf->Affix[Conf->naffixes].find,find); + strcpy(Conf->Affix[Conf->naffixes].repl,repl); + Conf->Affix[Conf->naffixes].replen=strlen(repl); + Conf->naffixes++; + return(0); +} + +static char * +remove_spaces(char *dist,char *src){ +char *d,*s; + d=dist; + s=src; + while(*s){ + if(*s!=' '&&*s!='-'&&*s!='\t'){ + *d=*s; + d++; + } + s++; + } + *d=0; + return(dist); +} + + +int +ImportAffixes(IspellDict * Conf,const char *filename){ + unsigned char str[BUFSIZ]; + unsigned char flag=0; + unsigned char mask[BUFSIZ]=""; + unsigned char find[BUFSIZ]=""; + unsigned char repl[BUFSIZ]=""; + unsigned char *s; + int i; + int suffixes=0; + int prefixes=0; + FILE *affix; + + if(!(affix=fopen(filename,"r"))) + return(1); + + while(fgets(str,sizeof(str),affix)){ + if(!STRNCASECMP(str,"suffixes")){ + suffixes=1; + prefixes=0; + continue; + } + if(!STRNCASECMP(str,"prefixes")){ + suffixes=0; + prefixes=1; + continue; + } + if(!STRNCASECMP(str,"flag ")){ + s=str+5; + while(strchr("* ",*s)) + s++; + flag=*s; + continue; + } + if((!suffixes)&&(!prefixes))continue; + if((s=strchr(str,'#')))*s=0; + if(!*str)continue; + strlower(str); + strcpy(mask,""); + strcpy(find,""); + strcpy(repl,""); + i=sscanf(str,"%[^>\n]>%[^,\n],%[^\n]",mask,find,repl); + remove_spaces(str,repl);strcpy(repl,str); + remove_spaces(str,find);strcpy(find,str); + remove_spaces(str,mask);strcpy(mask,str); + switch(i){ + case 3: + break; + case 2: + if(*find != '\0'){ + strcpy(repl,find); + strcpy(find,""); + } + break; + default: + continue; + } + + AddAffix(Conf,(int)flag,mask,find,repl,suffixes?'s':'p'); + + } + fclose(affix); + + return(0); +} + +void +SortDictionary(IspellDict * Conf){ + int CurLet = -1, Let;size_t i; + + qsort((void*)Conf->Spell,Conf->nspell,sizeof(SPELL),cmpspell); + + for(i = 0; i < 256 ; i++ ) + Conf->SpellTree.Left[i] = -1; + + for(i = 0; i < Conf->nspell; i++) { + Let = (int)(*(Conf->Spell[i].word)) & 255; + if (CurLet != Let) { + Conf->SpellTree.Left[Let] = i; + CurLet = Let; + } + Conf->SpellTree.Right[Let] = i; + } +} + +void +SortAffixes(IspellDict * Conf) { + int CurLetP = -1, CurLetS = -1, Let; + AFFIX *Affix; size_t i; + + if (Conf->naffixes > 1) + qsort((void*)Conf->Affix,Conf->naffixes,sizeof(AFFIX),cmpaffix); + for(i = 0; i < 256; i++) { + Conf->PrefixTree.Left[i] = Conf->PrefixTree.Right[i] = -1; + Conf->SuffixTree.Left[i] = Conf->SuffixTree.Right[i] = -1; + } + + for(i = 0; i < Conf->naffixes; i++) { + Affix = &(((AFFIX*)Conf->Affix)[i]); + if(Affix->type == 'p') { + Let = (int)(*(Affix->repl)) & 255; + if (CurLetP != Let) { + Conf->PrefixTree.Left[Let] = i; + CurLetP = Let; + } + Conf->PrefixTree.Right[Let] = i; + } else { + Let = (Affix->replen) ? (int)(Affix->repl[Affix->replen-1]) & 255 : 0; + if (CurLetS != Let) { + Conf->SuffixTree.Left[Let] = i; + CurLetS = Let; + } + Conf->SuffixTree.Right[Let] = i; + } + } +} + +static char * +CheckSuffix(const char *word, size_t len, AFFIX *Affix, int *res, IspellDict *Conf) { + regmatch_t subs[2]; /* workaround for apache&linux */ + char newword[2*MAXNORMLEN] = ""; + int err; + + *res = strbncmp(word, Affix->repl, Affix->replen); + if (*res < 0) { + return NULL; + } + if (*res > 0) { + return NULL; + } + strcpy(newword, word); + strcpy(newword+len-Affix->replen, Affix->find); + + if (Affix->compile) { + err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB); + if(err){ + /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/ + regfree(&(Affix->reg)); + return(NULL); + } + Affix->compile = 0; + } + if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){ + if(FindWord(Conf, newword, Affix->flag)) + return pstrdup(newword); + } + return NULL; +} + +#define NS 1 +#define MAX_NORM 512 +static int +CheckPrefix(const char *word, size_t len, AFFIX *Affix, IspellDict *Conf, int pi, + char **forms, char ***cur ) { + regmatch_t subs[NS*2]; + char newword[2*MAXNORMLEN] = ""; + int err, ls, res, lres; + size_t newlen; + AFFIX *CAffix = Conf->Affix; + + res = strncmp(word, Affix->repl, Affix->replen); + if (res != 0) { + return res; + } + strcpy(newword, Affix->find); + strcat(newword, word+Affix->replen); + + if (Affix->compile) { + err = regcomp(&(Affix->reg),Affix->mask,REG_EXTENDED|REG_ICASE|REG_NOSUB); + if(err){ + /*regerror(err, &(Affix->reg), regerrstr, ERRSTRSIZE);*/ + regfree(&(Affix->reg)); + return (0); + } + Affix->compile = 0; + } + if(!(err=regexec(&(Affix->reg),newword,1,subs,0))){ + SPELL * curspell; + + if((curspell=FindWord(Conf, newword, Affix->flag))){ + if ((*cur - forms) < (MAX_NORM-1)) { + **cur = pstrdup(newword); + (*cur)++; **cur = NULL; + } + } + newlen = strlen(newword); + ls = Conf->SuffixTree.Left[pi]; + if ( ls>=0 && ((*cur - forms) < (MAX_NORM-1)) ) { + **cur = CheckSuffix(newword, newlen, &CAffix[ls], &lres, Conf); + if (**cur) { + (*cur)++; **cur = NULL; + } + } + } + return 0; +} + + +char ** +NormalizeWord(IspellDict * Conf,char *word){ +/*regmatch_t subs[NS];*/ +size_t len; +char ** forms; +char **cur; +AFFIX * Affix; +int ri, pi, ipi, lp, rp, cp, ls, rs; +int lres, rres, cres = 0; + SPELL *spell; + + len=strlen(word); + if (len > MAXNORMLEN) + return(NULL); + + strlower(word); + + forms=(char **) palloc(MAX_NORM*sizeof(char **)); + cur=forms;*cur=NULL; + + ri = (int)(*word) & 255; + pi = (int)(word[strlen(word)-1]) & 255; + Affix=(AFFIX*)Conf->Affix; + + /* Check that the word itself is normal form */ + if((spell = FindWord(Conf, word, 0))){ + *cur=pstrdup(word); + cur++;*cur=NULL; + } + + /* Find all other NORMAL forms of the 'word' */ + + for (ipi = 0; ipi <= pi; ipi += pi) { + + /* check prefix */ + lp = Conf->PrefixTree.Left[ri]; + rp = Conf->PrefixTree.Right[ri]; + while (lp >= 0 && lp <= rp) { + cp = (lp + rp) >> 1; + cres = 0; + if ((cur - forms) < (MAX_NORM-1)) { + cres = CheckPrefix(word, len, &Affix[cp], Conf, ipi, forms, &cur); + } + if ((lp < cp) && ((cur - forms) < (MAX_NORM-1)) ) { + lres = CheckPrefix(word, len, &Affix[lp], Conf, ipi, forms, &cur); + } + if ( (rp > cp) && ((cur - forms) < (MAX_NORM-1)) ) { + rres = CheckPrefix(word, len, &Affix[rp], Conf, ipi, forms, &cur); + } + if (cres < 0) { + rp = cp - 1; + lp++; + } else if (cres > 0) { + lp = cp + 1; + rp--; + } else { + lp++; + rp--; + } + } + + /* check suffix */ + ls = Conf->SuffixTree.Left[ipi]; + rs = Conf->SuffixTree.Right[ipi]; + while (ls >= 0 && ls <= rs) { + if ( ((cur - forms) < (MAX_NORM-1)) ) { + *cur = CheckSuffix(word, len, &Affix[ls], &lres, Conf); + if (*cur) { + cur++; *cur = NULL; + } + } + if ( (rs > ls) && ((cur - forms) < (MAX_NORM-1)) ) { + *cur = CheckSuffix(word, len, &Affix[rs], &rres, Conf); + if (*cur) { + cur++; *cur = NULL; + } + } + ls++; + rs--; + } /* end while */ + + } /* for ipi */ + + if(cur==forms){ + pfree(forms); + return(NULL); + } + return(forms); +} + +void +FreeIspell (IspellDict *Conf) { + int i; + AFFIX *Affix = (AFFIX *)Conf->Affix; + + for (i = 0; i < Conf->naffixes; i++) { + if (Affix[i].compile == 0) { + regfree(&(Affix[i].reg)); + } + } + for (i = 0; i < Conf->naffixes; i++) { + free( Conf->Spell[i].word ); + } + free(Conf->Affix); + free(Conf->Spell); + memset( (void*)Conf, 0, sizeof(IspellDict) ); + return; +} diff --git a/contrib/tsearch2/ispell/spell.h b/contrib/tsearch2/ispell/spell.h new file mode 100644 index 0000000000..3034ca6709 --- /dev/null +++ b/contrib/tsearch2/ispell/spell.h @@ -0,0 +1,51 @@ +#ifndef __SPELL_H__ +#define __SPELL_H__ + +#include +#include + +typedef struct spell_struct { + char * word; + char flag[10]; +} SPELL; + +typedef struct aff_struct { + char flag; + char type; + char mask[33]; + char find[16]; + char repl[16]; + regex_t reg; + size_t replen; + char compile; +} AFFIX; + +typedef struct Tree_struct { + int Left[256], Right[256]; +} Tree_struct; + +typedef struct { + int maffixes; + int naffixes; + AFFIX * Affix; + + int nspell; + int mspell; + SPELL *Spell; + Tree_struct SpellTree; + Tree_struct PrefixTree; + Tree_struct SuffixTree; + +} IspellDict; + +char ** NormalizeWord(IspellDict * Conf,char *word); +int ImportAffixes(IspellDict * Conf, const char *filename); +int ImportDictionary(IspellDict * Conf,const char *filename); + +int AddSpell(IspellDict * Conf,const char * word,const char *flag); +int AddAffix(IspellDict * Conf,int flag,const char *mask,const char *find,const char *repl,int type); +void SortDictionary(IspellDict * Conf); +void SortAffixes(IspellDict * Conf); +void FreeIspell (IspellDict *Conf); + +#endif diff --git a/contrib/tsearch2/prs_dcfg.c b/contrib/tsearch2/prs_dcfg.c new file mode 100644 index 0000000000..e4b0e8b644 --- /dev/null +++ b/contrib/tsearch2/prs_dcfg.c @@ -0,0 +1,119 @@ +/* + * Simple config parser + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" + +#include "dict.h" +#include "common.h" + +#define CS_WAITKEY 0 +#define CS_INKEY 1 +#define CS_WAITEQ 2 +#define CS_WAITVALUE 3 +#define CS_INVALUE 4 +#define CS_IN2VALUE 5 +#define CS_WAITDELIM 6 +#define CS_INESC 7 +#define CS_IN2ESC 8 + +static char * +nstrdup(char *ptr, int len) { + char *res=palloc(len+1), *cptr; + memcpy(res,ptr,len); + res[len]='\0'; + cptr = ptr = res; + while(*ptr) { + if ( *ptr == '\\' ) + ptr++; + *cptr=*ptr; ptr++; cptr++; + } + *cptr='\0'; + + return res; +} + +void +parse_cfgdict(text *in, Map **m) { + Map *mptr; + char *ptr=VARDATA(in), *begin=NULL; + char num=0; + int state=CS_WAITKEY; + + while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) { + if ( *ptr==',' ) num++; + ptr++; + } + + *m=mptr=(Map*)palloc( sizeof(Map)*(num+2) ); + memset(mptr, 0, sizeof(Map)*(num+2) ); + ptr=VARDATA(in); + while( ptr-VARDATA(in) < VARSIZE(in) - VARHDRSZ ) { + if (state==CS_WAITKEY) { + if (isalpha(*ptr)) { + begin=ptr; + state=CS_INKEY; + } else if ( !isspace(*ptr) ) + elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr); + } else if (state==CS_INKEY) { + if ( isspace(*ptr) ) { + mptr->key=nstrdup(begin, ptr-begin); + state=CS_WAITEQ; + } else if ( *ptr=='=' ) { + mptr->key=nstrdup(begin, ptr-begin); + state=CS_WAITVALUE; + } else if ( !isalpha(*ptr) ) + elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr); + } else if ( state==CS_WAITEQ ) { + if ( *ptr=='=' ) + state=CS_WAITVALUE; + else if ( !isspace(*ptr) ) + elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr); + } else if ( state==CS_WAITVALUE ) { + if ( *ptr=='"' ) { + begin=ptr+1; + state=CS_INVALUE; + } else if ( !isspace(*ptr) ) { + begin=ptr; + state=CS_IN2VALUE; + } + } else if ( state==CS_INVALUE ) { + if ( *ptr=='"' ) { + mptr->value = nstrdup(begin, ptr-begin); + mptr++; + state=CS_WAITDELIM; + } else if ( *ptr=='\\' ) + state=CS_INESC; + } else if ( state==CS_IN2VALUE ) { + if ( isspace(*ptr) || *ptr==',' ) { + mptr->value = nstrdup(begin, ptr-begin); + mptr++; + state=( *ptr==',' ) ? CS_WAITKEY : CS_WAITDELIM; + } else if ( *ptr=='\\' ) + state=CS_INESC; + } else if ( state==CS_WAITDELIM ) { + if ( *ptr==',' ) + state=CS_WAITKEY; + else if ( !isspace(*ptr) ) + elog(ERROR,"Syntax error in position %d near '%c'", ptr-VARDATA(in), *ptr); + } else if ( state == CS_INESC ) { + state=CS_INVALUE; + } else if ( state == CS_IN2ESC ) { + state=CS_IN2VALUE; + } else + elog(ERROR,"Bad parser state: %d at position %d near '%c'", state, ptr-VARDATA(in), *ptr); + ptr++; + } + + if (state==CS_IN2VALUE) { + mptr->value = nstrdup(begin, ptr-begin); + mptr++; + } else if ( !(state==CS_WAITDELIM || state==CS_WAITKEY) ) + elog(ERROR,"Unexpected end of line"); +} + + diff --git a/contrib/tsearch2/query.c b/contrib/tsearch2/query.c new file mode 100644 index 0000000000..8e714f26d5 --- /dev/null +++ b/contrib/tsearch2/query.c @@ -0,0 +1,862 @@ +/* + * IO definitions for tsquery and mtsquery. This type + * are identical, but for parsing mtsquery used parser for text + * and also morphology is used. + * Internal structure: + * query tree, then string with original value. + * Query tree with plain view. It's means that in array of nodes + * right child is always next and left position = item+item->left + * Teodor Sigaev + */ +#include "postgres.h" + +#include +#include + +#include "access/gist.h" +#include "access/itup.h" +#include "access/rtree.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" + +#include "ts_cfg.h" +#include "tsvector.h" +#include "crc32.h" +#include "query.h" +#include "rewrite.h" +#include "common.h" + + +PG_FUNCTION_INFO_V1(tsquery_in); +Datum tsquery_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsquery_out); +Datum tsquery_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(exectsq); +Datum exectsq(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(rexectsq); +Datum rexectsq(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsquerytree); +Datum tsquerytree(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(to_tsquery); +Datum to_tsquery(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(to_tsquery_name); +Datum to_tsquery_name(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(to_tsquery_current); +Datum to_tsquery_current(PG_FUNCTION_ARGS); + +#define END 0 +#define ERR 1 +#define VAL 2 +#define OPR 3 +#define OPEN 4 +#define CLOSE 5 +#define VALTRUE 6 /* for stop words */ +#define VALFALSE 7 + +/* parser's states */ +#define WAITOPERAND 1 +#define WAITOPERATOR 2 + +/* + * node of query tree, also used + * for storing polish notation in parser + */ +typedef struct NODE +{ + int2 weight; + int2 type; + int4 val; + int2 distance; + int2 length; + struct NODE *next; +} NODE; + +typedef struct +{ + char *buf; + int4 state; + int4 count; + /* reverse polish notation in list (for temprorary usage) */ + NODE *str; + /* number in str */ + int4 num; + + /* user-friendly operand */ + int4 lenop; + int4 sumlen; + char *op; + char *curop; + + /* state for value's parser */ + TI_IN_STATE valstate; + + /* tscfg */ + int cfg_id; +} QPRS_STATE; + +static char* +get_weight(char *buf, int2 *weight) { + *weight = 0; + + if ( *buf != ':' ) + return buf; + + buf++; + while( *buf ) { + switch(tolower(*buf)) { + case 'a': *weight |= 1<<3; break; + case 'b': *weight |= 1<<2; break; + case 'c': *weight |= 1<<1; break; + case 'd': *weight |= 1; break; + default: return buf; + } + buf++; + } + + return buf; +} + +/* + * get token from query string + */ +static int4 +gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight) +{ + while (1) + { + switch (state->state) + { + case WAITOPERAND: + if (*(state->buf) == '!') + { + (state->buf)++; + *val = (int4) '!'; + return OPR; + } + else if (*(state->buf) == '(') + { + state->count++; + (state->buf)++; + return OPEN; + } else if ( *(state->buf) == ':' ) { + elog(ERROR,"Error at start of operand"); + } else if (*(state->buf) != ' ') { + state->valstate.prsbuf = state->buf; + state->state = WAITOPERATOR; + if (gettoken_tsvector(&(state->valstate))) + { + *strval = state->valstate.word; + *lenval = state->valstate.curpos - state->valstate.word; + state->buf = get_weight(state->valstate.prsbuf, weight); + return VAL; + } + else + elog(ERROR, "No operand"); + } + break; + case WAITOPERATOR: + if (*(state->buf) == '&' || *(state->buf) == '|') + { + state->state = WAITOPERAND; + *val = (int4) *(state->buf); + (state->buf)++; + return OPR; + } + else if (*(state->buf) == ')') + { + (state->buf)++; + state->count--; + return (state->count < 0) ? ERR : CLOSE; + } + else if (*(state->buf) == '\0') + return (state->count) ? ERR : END; + else if (*(state->buf) != ' ') + return ERR; + break; + default: + return ERR; + break; + } + (state->buf)++; + } + return END; +} + +/* + * push new one in polish notation reverse view + */ +static void +pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight) +{ + NODE *tmp = (NODE *) palloc(sizeof(NODE)); + + tmp->weight = weight; + tmp->type = type; + tmp->val = val; + if (distance >= MAXSTRPOS) + elog(ERROR, "Value is too big"); + if (lenval >= MAXSTRLEN) + elog(ERROR, "Operand is too long"); + tmp->distance = distance; + tmp->length = lenval; + tmp->next = state->str; + state->str = tmp; + state->num++; +} + +/* + * This function is used for tsquery parsing + */ +static void +pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight) +{ + if (lenval >= MAXSTRLEN) + elog(ERROR, "Word is too long"); + + pushquery(state, type, crc32_sz((uint8 *) strval, lenval), + state->curop - state->op, lenval, weight); + + while (state->curop - state->op + lenval + 1 >= state->lenop) + { + int4 tmp = state->curop - state->op; + + state->lenop *= 2; + state->op = (char *) repalloc((void *) state->op, state->lenop); + state->curop = state->op + tmp; + } + memcpy((void *) state->curop, (void *) strval, lenval); + state->curop += lenval; + *(state->curop) = '\0'; + state->curop++; + state->sumlen += lenval + 1; + return; +} + +/* + * This function is used for morph parsing + */ +static void +pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight) +{ + int4 count = 0; + PRSTEXT prs; + + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); + + parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval); + + for(count=0;countlen == item->length) + return strncmp( + &(chkval->values[ptr->pos]), + &(chkval->operand[item->distance]), + item->length); + + return (ptr->len > item->length) ? 1 : -1; +} + +/* + * check weight info + */ +static bool +checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item) { + WordEntryPos *ptr = (WordEntryPos*) (chkval->values+val->pos+SHORTALIGN(val->len)+sizeof(uint16)); + uint16 len = *( (uint16*) (chkval->values+val->pos+SHORTALIGN(val->len)) ); + while (len--) { + if ( item->weight & ( 1<weight ) ) + return true; + ptr++; + } + return false; +} + +/* + * is there value 'val' in array or not ? + */ +static bool +checkcondition_str(void *checkval, ITEM * val) +{ + WordEntry *StopLow = ((CHKVAL *) checkval)->arrb; + WordEntry *StopHigh = ((CHKVAL *) checkval)->arre; + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= val < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = ValCompare((CHKVAL *) checkval, StopMiddle, val); + if (difference == 0) + return ( val->weight && StopMiddle->haspos ) ? + checkclass_str((CHKVAL *) checkval,StopMiddle, val) : true; + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return (false); +} + +/* + * check for boolean condition + */ +bool +TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)) +{ + if (curitem->type == VAL) + return (*chkcond) (checkval, curitem); + else if (curitem->val == (int4) '!') + { + return (calcnot) ? + ((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true) + : true; + } + else if (curitem->val == (int4) '&') + { + if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) + return TS_execute(curitem + 1, checkval, calcnot, chkcond); + else + return false; + } + else + { /* |-operator */ + if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond)) + return true; + else + return TS_execute(curitem + 1, checkval, calcnot, chkcond); + } + return false; +} + +/* + * boolean operations + */ +Datum +rexectsq(PG_FUNCTION_ARGS) +{ + return DirectFunctionCall2( + exectsq, + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(0) + ); +} + +Datum +exectsq(PG_FUNCTION_ARGS) +{ + tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1))); + CHKVAL chkval; + bool result; + + if (!val->size || !query->size) + { + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(false); + } + + chkval.arrb = ARRPTR(val); + chkval.arre = chkval.arrb + val->size; + chkval.values = STRPTR(val); + chkval.operand = GETOPERAND(query); + result = TS_execute( + GETQUERY(query), + &chkval, + true, + checkcondition_str + ); + + PG_FREE_IF_COPY(val, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_BOOL(result); +} + +/* + * find left operand in polish notation view + */ +static void +findoprnd(ITEM * ptr, int4 *pos) +{ +#ifdef BS_DEBUG + elog(DEBUG3, (ptr[*pos].type == OPR) ? + "%d %c" : "%d %d ", *pos, ptr[*pos].val); +#endif + if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE) + { + ptr[*pos].left = 0; + (*pos)++; + } + else if (ptr[*pos].val == (int4) '!') + { + ptr[*pos].left = 1; + (*pos)++; + findoprnd(ptr, pos); + } + else + { + ITEM *curitem = &ptr[*pos]; + int4 tmp = *pos; + + (*pos)++; + findoprnd(ptr, pos); + curitem->left = *pos - tmp; + findoprnd(ptr, pos); + } +} + + +/* + * input + */ +static QUERYTYPE * +queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id) +{ + QPRS_STATE state; + int4 i; + QUERYTYPE *query; + int4 commonlen; + ITEM *ptr; + NODE *tmp; + int4 pos = 0; + +#ifdef BS_DEBUG + char pbuf[16384], + *cur; +#endif + + /* init state */ + state.buf = buf; + state.state = WAITOPERAND; + state.count = 0; + state.num = 0; + state.str = NULL; + state.cfg_id=cfg_id; + + /* init value parser's state */ + state.valstate.oprisdelim = true; + state.valstate.len = 32; + state.valstate.word = (char *) palloc(state.valstate.len); + + /* init list of operand */ + state.sumlen = 0; + state.lenop = 64; + state.curop = state.op = (char *) palloc(state.lenop); + *(state.curop) = '\0'; + + /* parse query & make polish notation (postfix, but in reverse order) */ + makepol(&state, pushval); + pfree(state.valstate.word); + if (!state.num) + elog(ERROR, "Empty query"); + + /* make finish struct */ + commonlen = COMPUTESIZE(state.num, state.sumlen); + query = (QUERYTYPE *) palloc(commonlen); + query->len = commonlen; + query->size = state.num; + ptr = GETQUERY(query); + + /* set item in polish notation */ + for (i = 0; i < state.num; i++) + { + ptr[i].weight = state.str->weight; + ptr[i].type = state.str->type; + ptr[i].val = state.str->val; + ptr[i].distance = state.str->distance; + ptr[i].length = state.str->length; + tmp = state.str->next; + pfree(state.str); + state.str = tmp; + } + + /* set user friendly-operand view */ + memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen); + pfree(state.op); + + /* set left operand's position for every operator */ + pos = 0; + findoprnd(ptr, &pos); + +#ifdef BS_DEBUG + cur = pbuf; + *cur = '\0'; + for (i = 0; i < query->size; i++) + { + if (ptr[i].type == OPR) + sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left); + else + sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance); + cur = strchr(cur, '\0'); + } + elog(DEBUG3, "POR: %s", pbuf); +#endif + + return query; +} + +/* + * in without morphology + */ +Datum +tsquery_in(PG_FUNCTION_ARGS) +{ + PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0)); +} + +/* + * out function + */ +typedef struct +{ + ITEM *curpol; + char *buf; + char *cur; + char *op; + int4 buflen; +} INFIX; + +#define RESIZEBUF(inf,addsize) \ +while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \ +{ \ + int4 len = inf->cur - inf->buf; \ + inf->buflen *= 2; \ + inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \ + inf->cur = inf->buf + len; \ +} + +/* + * recursive walk on tree and print it in + * infix (human-readable) view + */ +static void +infix(INFIX * in, bool first) +{ + if (in->curpol->type == VAL) + { + char *op = in->op + in->curpol->distance; + + RESIZEBUF(in, in->curpol->length * 2 + 2 + 5); + *(in->cur) = '\''; + in->cur++; + while (*op) + { + if (*op == '\'') + { + *(in->cur) = '\\'; + in->cur++; + } + *(in->cur) = *op; + op++; + in->cur++; + } + *(in->cur) = '\''; + in->cur++; + if ( in->curpol->weight ) { + *(in->cur) = ':'; in->cur++; + if ( in->curpol->weight & (1<<3) ) { *(in->cur) = 'A'; in->cur++; } + if ( in->curpol->weight & (1<<2) ) { *(in->cur) = 'B'; in->cur++; } + if ( in->curpol->weight & (1<<1) ) { *(in->cur) = 'C'; in->cur++; } + if ( in->curpol->weight & 1 ) { *(in->cur) = 'D'; in->cur++; } + } + *(in->cur) = '\0'; + in->curpol++; + } + else if (in->curpol->val == (int4) '!') + { + bool isopr = false; + + RESIZEBUF(in, 1); + *(in->cur) = '!'; + in->cur++; + *(in->cur) = '\0'; + in->curpol++; + if (in->curpol->type == OPR) + { + isopr = true; + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + infix(in, isopr); + if (isopr) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } + else + { + int4 op = in->curpol->val; + INFIX nrm; + + in->curpol++; + if (op == (int4) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, "( "); + in->cur = strchr(in->cur, '\0'); + } + + nrm.curpol = in->curpol; + nrm.op = in->op; + nrm.buflen = 16; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + + /* get right operand */ + infix(&nrm, false); + + /* get & print left operand */ + in->curpol = nrm.curpol; + infix(in, false); + + /* print operator & right operand */ + RESIZEBUF(in, 3 + (nrm.cur - nrm.buf)); + sprintf(in->cur, " %c %s", op, nrm.buf); + in->cur = strchr(in->cur, '\0'); + pfree(nrm.buf); + + if (op == (int4) '|' && !first) + { + RESIZEBUF(in, 2); + sprintf(in->cur, " )"); + in->cur = strchr(in->cur, '\0'); + } + } +} + + +Datum +tsquery_out(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + INFIX nrm; + + if (query->size == 0) + { + char *b = palloc(1); + + *b = '\0'; + PG_RETURN_POINTER(b); + } + nrm.curpol = GETQUERY(query); + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + PG_FREE_IF_COPY(query, 0); + PG_RETURN_POINTER(nrm.buf); +} + +/* + * debug function, used only for view query + * which will be executed in non-leaf pages in index + */ +Datum +tsquerytree(PG_FUNCTION_ARGS) +{ + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0))); + INFIX nrm; + text *res; + ITEM *q; + int4 len; + + + if (query->size == 0) + { + res = (text *) palloc(VARHDRSZ); + VARATT_SIZEP(res) = VARHDRSZ; + PG_RETURN_POINTER(res); + } + + q = clean_NOT_v2(GETQUERY(query), &len); + + if (!q) + { + res = (text *) palloc(1 + VARHDRSZ); + VARATT_SIZEP(res) = 1 + VARHDRSZ; + *((char *) VARDATA(res)) = 'T'; + } + else + { + nrm.curpol = q; + nrm.buflen = 32; + nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen); + *(nrm.cur) = '\0'; + nrm.op = GETOPERAND(query); + infix(&nrm, true); + + res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ); + VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ; + strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf); + pfree(q); + } + + PG_FREE_IF_COPY(query, 0); + + PG_RETURN_POINTER(res); +} + +Datum +to_tsquery(PG_FUNCTION_ARGS) { + text *in = PG_GETARG_TEXT_P(1); + char *str; + QUERYTYPE *query; + ITEM *res; + int4 len; + + str=text2char(in); + PG_FREE_IF_COPY(in,1); + + query = queryin(str, pushval_morph, PG_GETARG_INT32(0)); + res = clean_fakeval_v2(GETQUERY(query), &len); + if (!res) + { + query->len = HDRSIZEQT; + query->size = 0; + PG_RETURN_POINTER(query); + } + memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM)); + pfree(res); + PG_RETURN_POINTER(query); +} + +Datum +to_tsquery_name(PG_FUNCTION_ARGS) { + text *name=PG_GETARG_TEXT_P(0); + Datum res= DirectFunctionCall2( + to_tsquery, + Int32GetDatum( name2id_cfg(name) ), + PG_GETARG_DATUM(1) + ); + + PG_FREE_IF_COPY(name,1); + PG_RETURN_DATUM(res); +} + +Datum +to_tsquery_current(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM( DirectFunctionCall2( + to_tsquery, + Int32GetDatum( get_currcfg() ), + PG_GETARG_DATUM(0) + )); +} + + diff --git a/contrib/tsearch2/query.h b/contrib/tsearch2/query.h new file mode 100644 index 0000000000..c0715a2a03 --- /dev/null +++ b/contrib/tsearch2/query.h @@ -0,0 +1,55 @@ +#ifndef __QUERY_H__ +#define __QUERY_H__ +/* +#define BS_DEBUG +*/ + + +/* + * item in polish notation with back link + * to left operand + */ +typedef struct ITEM +{ + int8 type; + int8 weight; + int2 left; + int4 val; + /* user-friendly value, must correlate with WordEntry */ + uint32 + unused:1, + length:11, + distance:20; +} ITEM; + +/* + *Storage: + * (len)(size)(array of ITEM)(array of operand in user-friendly form) + */ +typedef struct +{ + int4 len; + int4 size; + char data[1]; +} QUERYTYPE; + +#define HDRSIZEQT ( 2*sizeof(int4) ) +#define COMPUTESIZE(size,lenofoperand) ( HDRSIZEQT + size * sizeof(ITEM) + lenofoperand ) +#define GETQUERY(x) (ITEM*)( (char*)(x)+HDRSIZEQT ) +#define GETOPERAND(x) ( (char*)GETQUERY(x) + ((QUERYTYPE*)x)->size * sizeof(ITEM) ) + +#define ISOPERATOR(x) ( (x)=='!' || (x)=='&' || (x)=='|' || (x)=='(' || (x)==')' ) + +#define END 0 +#define ERR 1 +#define VAL 2 +#define OPR 3 +#define OPEN 4 +#define CLOSE 5 +#define VALTRUE 6 /* for stop words */ +#define VALFALSE 7 + +bool TS_execute(ITEM * curitem, void *checkval, + bool calcnot, bool (*chkcond) (void *checkval, ITEM * val)); + +#endif diff --git a/contrib/tsearch2/rank.c b/contrib/tsearch2/rank.c new file mode 100644 index 0000000000..b73f400b88 --- /dev/null +++ b/contrib/tsearch2/rank.c @@ -0,0 +1,591 @@ +/* + * Relevation + * Teodor Sigaev + */ +#include "postgres.h" +#include + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "fmgr.h" +#include "funcapi.h" +#include "storage/bufpage.h" +#include "executor/spi.h" +#include "commands/trigger.h" +#include "nodes/pg_list.h" +#include "catalog/namespace.h" + +#include "utils/array.h" + +#include "tsvector.h" +#include "query.h" +#include "common.h" + +PG_FUNCTION_INFO_V1(rank); +Datum rank(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(rank_def); +Datum rank_def(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(rank_cd); +Datum rank_cd(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(rank_cd_def); +Datum rank_cd_def(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(get_covers); +Datum get_covers(PG_FUNCTION_ARGS); + +static float weights[]={0.1, 0.2, 0.4, 1.0}; + +#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] ) + +#define DEF_NORM_METHOD 0 + +/* + * Returns a weight of a word collocation + */ +static float4 word_distance ( int4 w ) { + if ( w>100 ) + return 1e-30; + + return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) ); +} + +static int +cnt_length( tsvector *t ) { + WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t); + int len = 0, clen; + + while(ptr < end) { + if ( (clen=POSDATALEN(t, ptr)) == 0 ) + len += 1; + else + len += clen; + ptr++; + } + + return len; +} + +static int4 +WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) { + if (ptr->len == item->length) + return strncmp( + eval + ptr->pos, + qval + item->distance, + item->length); + + return (ptr->len > item->length) ? 1 : -1; +} + +static WordEntry* +find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) { + WordEntry *StopLow = ARRPTR(t); + WordEntry *StopHigh = (WordEntry*)STRPTR(t); + WordEntry *StopMiddle; + int difference; + + /* Loop invariant: StopLow <= item < StopHigh */ + + while (StopLow < StopHigh) + { + StopMiddle = StopLow + (StopHigh - StopLow) / 2; + difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item); + if (difference == 0) + return StopMiddle; + else if (difference < 0) + StopLow = StopMiddle + 1; + else + StopHigh = StopMiddle; + } + + return NULL; +} + +static WordEntryPos POSNULL[]={ + {0,0}, + {0,MAXENTRYPOS-1} +}; + +static float +calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) { + uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size); + int i,k,l,p; + WordEntry *entry; + WordEntryPos *post,*ct; + int4 dimt,lenct,dist; + float res=-1.0; + ITEM *item=GETQUERY(q); + + memset(pos,0,sizeof(uint16**) * q->size); + *(uint16*)POSNULL = lengthof(POSNULL)-1; + + for(i=0; isize; i++) { + + if ( item[i].type != VAL ) + continue; + + entry=find_wordentry(t,q,&(item[i])); + if ( !entry ) + continue; + + if ( entry->haspos ) + pos[i] = (uint16*)_POSDATAPTR(t,entry); + else + pos[i] = (uint16*)POSNULL; + + + dimt = *(uint16*)(pos[i]); + post = (WordEntryPos*)(pos[i]+1); + for( k=0; ksize; i++) { + if ( item[i].type != VAL ) + continue; + + entry=find_wordentry(t,q,&(item[i])); + if ( !entry ) + continue; + + if ( entry->haspos ) { + dimt = POSDATALEN(t,entry); + post = POSDATAPTR(t,entry); + } else { + dimt = *(uint16*)POSNULL; + post = POSNULL+1; + } + + for(j=0;jsize || !q->size) + return 0.0; + + res = ( item->type != VAL && item->val == (int4) '&' ) ? + calc_rank_and(w,t,q) : calc_rank_or(w,t,q); + + if ( res < 0 ) + res = 1e-20; + + switch(method) { + case 0: break; + case 1: res /= log((float)cnt_length(t)); break; + case 2: res /= (float)cnt_length(t); break; + default: + elog(ERROR,"Unknown normalization method: %d",method); + } + + return res; +} + +Datum +rank(PG_FUNCTION_ARGS) { + ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); + int method=DEF_NORM_METHOD; + float res=0.0; + float ws[ lengthof(weights) ]; + int i; + + if ( ARR_NDIM(win) != 1 ) + elog(ERROR,"Array of weight is not one dimentional"); + if ( ARRNELEMS(win) < lengthof(weights) ) + elog(ERROR,"Array of weight is too short"); + + for(i=0;i= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i]; + if ( ws[ i ] > 1.0 ) + elog(ERROR,"Weight out of range"); + } + + if ( PG_NARGS() == 4 ) + method=PG_GETARG_INT32(3); + + res=calc_rank(ws, txt, query, method); + + PG_FREE_IF_COPY(win, 0); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(res); +} + +Datum +rank_def(PG_FUNCTION_ARGS) { + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + float res=0.0; + int method=DEF_NORM_METHOD; + + if ( PG_NARGS() == 3 ) + method=PG_GETARG_INT32(2); + + res=calc_rank(weights, txt, query, method); + + PG_FREE_IF_COPY(txt, 0); + PG_FREE_IF_COPY(query, 1); + PG_RETURN_FLOAT4(res); +} + + +typedef struct { + ITEM *item; + int32 pos; +} DocRepresentation; + +static int +compareDocR(const void *a, const void *b) { + if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos ) + return 1; + return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1; +} + + +typedef struct { + DocRepresentation *doc; + int len; +} ChkDocR; + +static bool +checkcondition_DR(void *checkval, ITEM *val) { + DocRepresentation *ptr = ((ChkDocR*)checkval)->doc; + + while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) { + if ( val == ptr->item ) + return true; + ptr++; + } + + return false; +} + + +static bool +Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) { + int i; + DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff; + ITEM *item=GETQUERY(query); + int lastpos=*pos; + int oldq=*q; + + *p=0x7fffffff; + *q=0; + + for(i=0; isize; i++) { + if ( item->type != VAL ) { + item++; + continue; + } + ptr = doc + *pos; + + while(ptr-docitem == item ) { + if ( ptr->pos > *q ) { + *q = ptr->pos; + lastpos= ptr - doc; + } + break; + } + ptr++; + } + + item++; + } + + if (*q==0 ) + return false; + + if (*q==oldq) { /* already check this pos */ + (*pos)++; + return Cover(doc, len, query, pos,p,q); + } + + item=GETQUERY(query); + for(i=0; isize; i++) { + if ( item->type != VAL ) { + item++; + continue; + } + ptr = doc + lastpos; + + while(ptr>=doc+*pos) { + if ( ptr->item == item ) { + if ( ptr->pos < *p ) { + *p = ptr->pos; + f=ptr; + } + break; + } + ptr--; + } + item++; + } + + if ( *p<=*q ) { + ChkDocR ch = { f, (doc + lastpos)-f+1 }; + *pos = f-doc+1; + if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) { + /*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/ + return true; + } else + return Cover(doc, len, query, pos,p,q); + } + + return false; +} + +static DocRepresentation* +get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) { + ITEM *item=GETQUERY(query); + WordEntry *entry; + WordEntryPos *post; + int4 dimt,j,i; + int len=query->size*4,cur=0; + DocRepresentation *doc; + + *(uint16*)POSNULL = lengthof(POSNULL)-1; + doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len); + for(i=0; isize; i++) { + if ( item[i].type != VAL ) + continue; + + entry=find_wordentry(txt,query,&(item[i])); + if ( !entry ) + continue; + + if ( entry->haspos ) { + dimt = POSDATALEN(txt,entry); + post = POSDATAPTR(txt,entry); + } else { + dimt = *(uint16*)POSNULL; + post = POSNULL+1; + } + + while( cur+dimt >= len ) { + len*=2; + doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len); + } + + for(j=0;j0 ) { + if ( cur>1 ) + qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR); + return doc; + } + + pfree(doc); + return NULL; +} + + +Datum +rank_cd(PG_FUNCTION_ARGS) { + int K = PG_GETARG_INT32(0); + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2)); + int method=DEF_NORM_METHOD; + DocRepresentation *doc; + float res=0.0; + int p=0,q=0,len,cur; + + doc = get_docrep(txt, query, &len); + if ( !doc ) { + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + PG_RETURN_FLOAT4(0.0); + } + + cur=0; + if (K<=0) + K=4; + while( Cover(doc, len, query, &cur, &p, &q) ) + res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0; + + if ( PG_NARGS() == 4 ) + method=PG_GETARG_INT32(3); + + switch(method) { + case 0: break; + case 1: res /= log((float)cnt_length(txt)); break; + case 2: res /= (float)cnt_length(txt); break; + default: + elog(ERROR,"Unknown normalization method: %d",method); + } + + pfree(doc); + PG_FREE_IF_COPY(txt, 1); + PG_FREE_IF_COPY(query, 2); + + PG_RETURN_FLOAT4(res); +} + + +Datum +rank_cd_def(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM( DirectFunctionCall4( + rank_cd, + Int32GetDatum(-1), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + ( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD) + )); +} + +/**************debug*************/ + +typedef struct { + char *w; + int2 len; + int2 pos; + int2 start; + int2 finish; +} DocWord; + +static int +compareDocWord(const void *a, const void *b) { + if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos ) + return 1; + return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1; +} + + +Datum +get_covers(PG_FUNCTION_ARGS) { + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + WordEntry *pptr=ARRPTR(txt); + int i,dlen=0,j,cur=0,len=0,rlen; + DocWord *dw,*dwptr; + text *out; + char *cptr; + DocRepresentation *doc; + int pos=0,p,q,olddwpos=0; + int ncover=1; + + doc = get_docrep(txt, query, &rlen); + + if ( !doc ) { + out=palloc(VARHDRSZ); + VARATT_SIZEP(out) = VARHDRSZ; + PG_FREE_IF_COPY(txt,0); + PG_FREE_IF_COPY(query,1); + PG_RETURN_POINTER(out); + } + + for(i=0;isize;i++) { + if (!pptr[i].haspos) + elog(ERROR,"No pos info"); + dlen += POSDATALEN(txt,&(pptr[i])); + } + + dwptr=dw=palloc(sizeof(DocWord)*dlen); + memset(dw,0,sizeof(DocWord)*dlen); + + for(i=0;isize;i++) { + WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i])); + for(j=0;jpos < p && dwptr-dwstart=ncover; + while(dwptr->pos < q+1 && dwptr-dwfinish=ncover; + len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/; + ncover++; + } + + out=palloc(VARHDRSZ+len); + cptr=((char*)out)+VARHDRSZ; + dwptr=dw; + + while( dwptr-dw < dlen) { + if ( dwptr->start ) { + sprintf(cptr,"{%d ",dwptr->start); + cptr=strchr(cptr,'\0'); + } + memcpy(cptr,dwptr->w,dwptr->len); + cptr+=dwptr->len; + *cptr=' '; + cptr++; + if ( dwptr->finish ) { + sprintf(cptr,"}%d ",dwptr->finish); + cptr=strchr(cptr,'\0'); + } + dwptr++; + } + + VARATT_SIZEP(out) = cptr - ((char*)out); + + pfree(dw); + pfree(doc); + + PG_FREE_IF_COPY(txt,0); + PG_FREE_IF_COPY(query,1); + PG_RETURN_POINTER(out); +} + diff --git a/contrib/tsearch2/rewrite.c b/contrib/tsearch2/rewrite.c new file mode 100644 index 0000000000..d5bc0f6e6e --- /dev/null +++ b/contrib/tsearch2/rewrite.c @@ -0,0 +1,292 @@ +/* + * Rewrite routines of query tree + * Teodor Sigaev + */ + +#include "postgres.h" + +#include + +#include "access/gist.h" +#include "access/itup.h" +#include "access/rtree.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" + +#include "query.h" +#include "rewrite.h" + +typedef struct NODE +{ + struct NODE *left; + struct NODE *right; + ITEM *valnode; +} NODE; + +/* + * make query tree from plain view of query + */ +static NODE * +maketree(ITEM * in) +{ + NODE *node = (NODE *) palloc(sizeof(NODE)); + + node->valnode = in; + node->right = node->left = NULL; + if (in->type == OPR) + { + node->right = maketree(in + 1); + if (in->val != (int4) '!') + node->left = maketree(in + in->left); + } + return node; +} + +typedef struct +{ + ITEM *ptr; + int4 len; + int4 cur; +} PLAINTREE; + +static void +plainnode(PLAINTREE * state, NODE * node) +{ + if (state->cur == state->len) + { + state->len *= 2; + state->ptr = (ITEM *) repalloc((void *) state->ptr, state->len * sizeof(ITEM)); + } + memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(ITEM)); + if (node->valnode->type == VAL) + state->cur++; + else if (node->valnode->val == (int4) '!') + { + state->ptr[state->cur].left = 1; + state->cur++; + plainnode(state, node->right); + } + else + { + int4 cur = state->cur; + + state->cur++; + plainnode(state, node->right); + state->ptr[cur].left = state->cur - cur; + plainnode(state, node->left); + } + pfree(node); +} + +/* + * make plain view of tree from 'normal' view of tree + */ +static ITEM * +plaintree(NODE * root, int4 *len) +{ + PLAINTREE pl; + + pl.cur = 0; + pl.len = 16; + if (root && (root->valnode->type == VAL || root->valnode->type == OPR)) + { + pl.ptr = (ITEM *) palloc(pl.len * sizeof(ITEM)); + plainnode(&pl, root); + } + else + pl.ptr = NULL; + *len = pl.cur; + return pl.ptr; +} + +static void +freetree(NODE * node) +{ + if (!node) + return; + if (node->left) + freetree(node->left); + if (node->right) + freetree(node->right); + pfree(node); +} + +/* + * clean tree for ! operator. + * It's usefull for debug, but in + * other case, such view is used with search in index. + * Operator ! always return TRUE + */ +static NODE * +clean_NOT_intree(NODE * node) +{ + if (node->valnode->type == VAL) + return node; + + if (node->valnode->val == (int4) '!') + { + freetree(node); + return NULL; + } + + /* operator & or | */ + if (node->valnode->val == (int4) '|') + { + if ((node->left = clean_NOT_intree(node->left)) == NULL || + (node->right = clean_NOT_intree(node->right)) == NULL) + { + freetree(node); + return NULL; + } + } + else + { + NODE *res = node; + + node->left = clean_NOT_intree(node->left); + node->right = clean_NOT_intree(node->right); + if (node->left == NULL && node->right == NULL) + { + pfree(node); + res = NULL; + } + else if (node->left == NULL) + { + res = node->right; + pfree(node); + } + else if (node->right == NULL) + { + res = node->left; + pfree(node); + } + return res; + } + return node; +} + +ITEM * +clean_NOT_v2(ITEM * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + + return plaintree(clean_NOT_intree(root), len); +} + +#define V_UNKNOWN 0 +#define V_TRUE 1 +#define V_FALSE 2 + +/* + * Clean query tree from values which is always in + * text (stopword) + */ +static NODE * +clean_fakeval_intree(NODE * node, char *result) +{ + char lresult = V_UNKNOWN, + rresult = V_UNKNOWN; + + if (node->valnode->type == VAL) + return node; + else if (node->valnode->type == VALTRUE) + { + pfree(node); + *result = V_TRUE; + return NULL; + } + + + if (node->valnode->val == (int4) '!') + { + node->right = clean_fakeval_intree(node->right, &rresult); + if (!node->right) + { + *result = (rresult == V_TRUE) ? V_FALSE : V_TRUE; + freetree(node); + return NULL; + } + } + else if (node->valnode->val == (int4) '|') + { + NODE *res = node; + + node->left = clean_fakeval_intree(node->left, &lresult); + node->right = clean_fakeval_intree(node->right, &rresult); + if (lresult == V_TRUE || rresult == V_TRUE) + { + freetree(node); + *result = V_TRUE; + return NULL; + } + else if (lresult == V_FALSE && rresult == V_FALSE) + { + freetree(node); + *result = V_FALSE; + return NULL; + } + else if (lresult == V_FALSE) + { + res = node->right; + pfree(node); + } + else if (rresult == V_FALSE) + { + res = node->left; + pfree(node); + } + return res; + } + else + { + NODE *res = node; + + node->left = clean_fakeval_intree(node->left, &lresult); + node->right = clean_fakeval_intree(node->right, &rresult); + if (lresult == V_FALSE || rresult == V_FALSE) + { + freetree(node); + *result = V_FALSE; + return NULL; + } + else if (lresult == V_TRUE && rresult == V_TRUE) + { + freetree(node); + *result = V_TRUE; + return NULL; + } + else if (lresult == V_TRUE) + { + res = node->right; + pfree(node); + } + else if (rresult == V_TRUE) + { + res = node->left; + pfree(node); + } + return res; + } + return node; +} + +ITEM * +clean_fakeval_v2(ITEM * ptr, int4 *len) +{ + NODE *root = maketree(ptr); + char result = V_UNKNOWN; + NODE *resroot; + + resroot = clean_fakeval_intree(root, &result); + if (result != V_UNKNOWN) + { + elog(NOTICE, "Query contains only stopword(s) or doesn't contain lexem(s), ignored"); + *len = 0; + return NULL; + } + + return plaintree(resroot, len); +} diff --git a/contrib/tsearch2/rewrite.h b/contrib/tsearch2/rewrite.h new file mode 100644 index 0000000000..d47788a332 --- /dev/null +++ b/contrib/tsearch2/rewrite.h @@ -0,0 +1,7 @@ +#ifndef __REWRITE_H__ +#define __REWRITE_H__ + +ITEM *clean_NOT_v2(ITEM * ptr, int4 *len); +ITEM *clean_fakeval_v2(ITEM * ptr, int4 *len); + +#endif diff --git a/contrib/tsearch2/snmap.c b/contrib/tsearch2/snmap.c new file mode 100644 index 0000000000..fe138ad95d --- /dev/null +++ b/contrib/tsearch2/snmap.c @@ -0,0 +1,75 @@ +/* + * simple but fast map from str to Oid + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" +#include "snmap.h" +#include "common.h" + +static int +compareSNMapEntry(const void *a, const void *b) { + return strcmp( ((SNMapEntry*)a)->key, ((SNMapEntry*)b)->key ); +} + +void +addSNMap( SNMap *map, char *key, Oid value ) { + if (map->len>=map->reallen) { + SNMapEntry *tmp; + int len = (map->reallen) ? 2*map->reallen : 16; + tmp=(SNMapEntry*)realloc(map->list, sizeof(SNMapEntry) * len); + if ( !tmp ) + elog(ERROR, "No memory"); + map->reallen=len; + map->list=tmp; + } + map->list[ map->len ].key = strdup(key); + if ( ! map->list[ map->len ].key ) + elog(ERROR, "No memory"); + map->list[ map->len ].value=value; + map->len++; + if ( map->len>1 ) qsort(map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry); +} + +void +addSNMap_t( SNMap *map, text *key, Oid value ) { + char *k=text2char( key ); + addSNMap(map, k, value); + pfree(k); +} + +Oid +findSNMap( SNMap *map, char *key ) { + SNMapEntry *ptr; + SNMapEntry ks = {key, 0}; + if ( map->len==0 || !map->list ) + return 0; + ptr = (SNMapEntry*) bsearch(&ks, map->list, map->len, sizeof(SNMapEntry), compareSNMapEntry); + return (ptr) ? ptr->value : 0; +} + +Oid +findSNMap_t( SNMap *map, text *key ) { + char *k=text2char(key); + int res; + res= findSNMap(map, k); + pfree(k); + return res; +} + +void freeSNMap( SNMap *map ) { + SNMapEntry *entry=map->list; + if ( map->list ) { + while( map->len ) { + if ( entry->key ) free(entry->key); + entry++; map->len--; + } + free( map->list ); + } + memset(map,0,sizeof(SNMap)); +} + + diff --git a/contrib/tsearch2/snmap.h b/contrib/tsearch2/snmap.h new file mode 100644 index 0000000000..b4856019c9 --- /dev/null +++ b/contrib/tsearch2/snmap.h @@ -0,0 +1,23 @@ +#ifndef __SNMAP_H__ +#define __SNMAP_H__ + +#include "postgres.h" + +typedef struct { + char *key; + Oid value; +} SNMapEntry; + +typedef struct { + int len; + int reallen; + SNMapEntry *list; +} SNMap; + +void addSNMap( SNMap *map, char *key, Oid value ); +void addSNMap_t( SNMap *map, text *key, Oid value ); +Oid findSNMap( SNMap *map, char *key ); +Oid findSNMap_t( SNMap *map, text *key ); +void freeSNMap( SNMap *map ); + +#endif diff --git a/contrib/tsearch2/snowball/api.c b/contrib/tsearch2/snowball/api.c new file mode 100644 index 0000000000..c9019ce973 --- /dev/null +++ b/contrib/tsearch2/snowball/api.c @@ -0,0 +1,48 @@ + +#include "header.h" + +extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size) +{ struct SN_env * z = (struct SN_env *) calloc(1, sizeof(struct SN_env)); + z->p = create_s(); + if (S_size) + { z->S = (symbol * *) calloc(S_size, sizeof(symbol *)); + { int i; + for (i = 0; i < S_size; i++) z->S[i] = create_s(); + } + z->S_size = S_size; + } + + if (I_size) + { z->I = (int *) calloc(I_size, sizeof(int)); + z->I_size = I_size; + } + + if (B_size) + { z->B = (symbol *) calloc(B_size, sizeof(symbol)); + z->B_size = B_size; + } + + return z; +} + +extern void SN_close_env(struct SN_env * z) +{ + if (z->S_size) + { + { int i; + for (i = 0; i < z->S_size; i++) lose_s(z->S[i]); + } + free(z->S); + } + if (z->I_size) free(z->I); + if (z->B_size) free(z->B); + if (z->p) lose_s(z->p); + free(z); +} + +extern void SN_set_current(struct SN_env * z, int size, const symbol * s) +{ + replace_s(z, 0, z->l, size, s); + z->c = 0; +} + diff --git a/contrib/tsearch2/snowball/api.h b/contrib/tsearch2/snowball/api.h new file mode 100644 index 0000000000..3e8b6e1851 --- /dev/null +++ b/contrib/tsearch2/snowball/api.h @@ -0,0 +1,27 @@ + +typedef unsigned char symbol; + +/* Or replace 'char' above with 'short' for 16 bit characters. + + More precisely, replace 'char' with whatever type guarantees the + character width you need. Note however that sizeof(symbol) should divide + HEAD, defined in header.h as 2*sizeof(int), without remainder, otherwise + there is an alignment problem. In the unlikely event of a problem here, + consult Martin Porter. + +*/ + +struct SN_env { + symbol * p; + int c; int a; int l; int lb; int bra; int ket; + int S_size; int I_size; int B_size; + symbol * * S; + int * I; + symbol * B; +}; + +extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); +extern void SN_close_env(struct SN_env * z); + +extern void SN_set_current(struct SN_env * z, int size, const symbol * s); + diff --git a/contrib/tsearch2/snowball/english_stem.c b/contrib/tsearch2/snowball/english_stem.c new file mode 100644 index 0000000000..6715c7cea4 --- /dev/null +++ b/contrib/tsearch2/snowball/english_stem.c @@ -0,0 +1,894 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +extern int english_stem(struct SN_env * z); +static int r_exception2(struct SN_env * z); +static int r_exception1(struct SN_env * z); +static int r_Step_5(struct SN_env * z); +static int r_Step_4(struct SN_env * z); +static int r_Step_3(struct SN_env * z); +static int r_Step_2(struct SN_env * z); +static int r_Step_1c(struct SN_env * z); +static int r_Step_1b(struct SN_env * z); +static int r_Step_1a(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_R1(struct SN_env * z); +static int r_shortv(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); +static int r_postlude(struct SN_env * z); +static int r_prelude(struct SN_env * z); + +extern struct SN_env * english_create_env(void); +extern void english_close_env(struct SN_env * z); + +static symbol s_0_0[5] = { 'g', 'e', 'n', 'e', 'r' }; + +static struct among a_0[1] = +{ +/* 0 */ { 5, s_0_0, -1, -1, 0} +}; + +static symbol s_1_0[3] = { 'i', 'e', 'd' }; +static symbol s_1_1[1] = { 's' }; +static symbol s_1_2[3] = { 'i', 'e', 's' }; +static symbol s_1_3[4] = { 's', 's', 'e', 's' }; +static symbol s_1_4[2] = { 's', 's' }; +static symbol s_1_5[2] = { 'u', 's' }; + +static struct among a_1[6] = +{ +/* 0 */ { 3, s_1_0, -1, 2, 0}, +/* 1 */ { 1, s_1_1, -1, 3, 0}, +/* 2 */ { 3, s_1_2, 1, 2, 0}, +/* 3 */ { 4, s_1_3, 1, 1, 0}, +/* 4 */ { 2, s_1_4, 1, -1, 0}, +/* 5 */ { 2, s_1_5, 1, -1, 0} +}; + +static symbol s_2_1[2] = { 'b', 'b' }; +static symbol s_2_2[2] = { 'd', 'd' }; +static symbol s_2_3[2] = { 'f', 'f' }; +static symbol s_2_4[2] = { 'g', 'g' }; +static symbol s_2_5[2] = { 'b', 'l' }; +static symbol s_2_6[2] = { 'm', 'm' }; +static symbol s_2_7[2] = { 'n', 'n' }; +static symbol s_2_8[2] = { 'p', 'p' }; +static symbol s_2_9[2] = { 'r', 'r' }; +static symbol s_2_10[2] = { 'a', 't' }; +static symbol s_2_11[2] = { 't', 't' }; +static symbol s_2_12[2] = { 'i', 'z' }; + +static struct among a_2[13] = +{ +/* 0 */ { 0, 0, -1, 3, 0}, +/* 1 */ { 2, s_2_1, 0, 2, 0}, +/* 2 */ { 2, s_2_2, 0, 2, 0}, +/* 3 */ { 2, s_2_3, 0, 2, 0}, +/* 4 */ { 2, s_2_4, 0, 2, 0}, +/* 5 */ { 2, s_2_5, 0, 1, 0}, +/* 6 */ { 2, s_2_6, 0, 2, 0}, +/* 7 */ { 2, s_2_7, 0, 2, 0}, +/* 8 */ { 2, s_2_8, 0, 2, 0}, +/* 9 */ { 2, s_2_9, 0, 2, 0}, +/* 10 */ { 2, s_2_10, 0, 1, 0}, +/* 11 */ { 2, s_2_11, 0, 2, 0}, +/* 12 */ { 2, s_2_12, 0, 1, 0} +}; + +static symbol s_3_0[2] = { 'e', 'd' }; +static symbol s_3_1[3] = { 'e', 'e', 'd' }; +static symbol s_3_2[3] = { 'i', 'n', 'g' }; +static symbol s_3_3[4] = { 'e', 'd', 'l', 'y' }; +static symbol s_3_4[5] = { 'e', 'e', 'd', 'l', 'y' }; +static symbol s_3_5[5] = { 'i', 'n', 'g', 'l', 'y' }; + +static struct among a_3[6] = +{ +/* 0 */ { 2, s_3_0, -1, 2, 0}, +/* 1 */ { 3, s_3_1, 0, 1, 0}, +/* 2 */ { 3, s_3_2, -1, 2, 0}, +/* 3 */ { 4, s_3_3, -1, 2, 0}, +/* 4 */ { 5, s_3_4, 3, 1, 0}, +/* 5 */ { 5, s_3_5, -1, 2, 0} +}; + +static symbol s_4_0[4] = { 'a', 'n', 'c', 'i' }; +static symbol s_4_1[4] = { 'e', 'n', 'c', 'i' }; +static symbol s_4_2[3] = { 'o', 'g', 'i' }; +static symbol s_4_3[2] = { 'l', 'i' }; +static symbol s_4_4[3] = { 'b', 'l', 'i' }; +static symbol s_4_5[4] = { 'a', 'b', 'l', 'i' }; +static symbol s_4_6[4] = { 'a', 'l', 'l', 'i' }; +static symbol s_4_7[5] = { 'f', 'u', 'l', 'l', 'i' }; +static symbol s_4_8[6] = { 'l', 'e', 's', 's', 'l', 'i' }; +static symbol s_4_9[5] = { 'o', 'u', 's', 'l', 'i' }; +static symbol s_4_10[5] = { 'e', 'n', 't', 'l', 'i' }; +static symbol s_4_11[5] = { 'a', 'l', 'i', 't', 'i' }; +static symbol s_4_12[6] = { 'b', 'i', 'l', 'i', 't', 'i' }; +static symbol s_4_13[5] = { 'i', 'v', 'i', 't', 'i' }; +static symbol s_4_14[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static symbol s_4_15[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static symbol s_4_16[5] = { 'a', 'l', 'i', 's', 'm' }; +static symbol s_4_17[5] = { 'a', 't', 'i', 'o', 'n' }; +static symbol s_4_18[7] = { 'i', 'z', 'a', 't', 'i', 'o', 'n' }; +static symbol s_4_19[4] = { 'i', 'z', 'e', 'r' }; +static symbol s_4_20[4] = { 'a', 't', 'o', 'r' }; +static symbol s_4_21[7] = { 'i', 'v', 'e', 'n', 'e', 's', 's' }; +static symbol s_4_22[7] = { 'f', 'u', 'l', 'n', 'e', 's', 's' }; +static symbol s_4_23[7] = { 'o', 'u', 's', 'n', 'e', 's', 's' }; + +static struct among a_4[24] = +{ +/* 0 */ { 4, s_4_0, -1, 3, 0}, +/* 1 */ { 4, s_4_1, -1, 2, 0}, +/* 2 */ { 3, s_4_2, -1, 13, 0}, +/* 3 */ { 2, s_4_3, -1, 16, 0}, +/* 4 */ { 3, s_4_4, 3, 12, 0}, +/* 5 */ { 4, s_4_5, 4, 4, 0}, +/* 6 */ { 4, s_4_6, 3, 8, 0}, +/* 7 */ { 5, s_4_7, 3, 14, 0}, +/* 8 */ { 6, s_4_8, 3, 15, 0}, +/* 9 */ { 5, s_4_9, 3, 10, 0}, +/* 10 */ { 5, s_4_10, 3, 5, 0}, +/* 11 */ { 5, s_4_11, -1, 8, 0}, +/* 12 */ { 6, s_4_12, -1, 12, 0}, +/* 13 */ { 5, s_4_13, -1, 11, 0}, +/* 14 */ { 6, s_4_14, -1, 1, 0}, +/* 15 */ { 7, s_4_15, 14, 7, 0}, +/* 16 */ { 5, s_4_16, -1, 8, 0}, +/* 17 */ { 5, s_4_17, -1, 7, 0}, +/* 18 */ { 7, s_4_18, 17, 6, 0}, +/* 19 */ { 4, s_4_19, -1, 6, 0}, +/* 20 */ { 4, s_4_20, -1, 7, 0}, +/* 21 */ { 7, s_4_21, -1, 11, 0}, +/* 22 */ { 7, s_4_22, -1, 9, 0}, +/* 23 */ { 7, s_4_23, -1, 10, 0} +}; + +static symbol s_5_0[5] = { 'i', 'c', 'a', 't', 'e' }; +static symbol s_5_1[5] = { 'a', 't', 'i', 'v', 'e' }; +static symbol s_5_2[5] = { 'a', 'l', 'i', 'z', 'e' }; +static symbol s_5_3[5] = { 'i', 'c', 'i', 't', 'i' }; +static symbol s_5_4[4] = { 'i', 'c', 'a', 'l' }; +static symbol s_5_5[6] = { 't', 'i', 'o', 'n', 'a', 'l' }; +static symbol s_5_6[7] = { 'a', 't', 'i', 'o', 'n', 'a', 'l' }; +static symbol s_5_7[3] = { 'f', 'u', 'l' }; +static symbol s_5_8[4] = { 'n', 'e', 's', 's' }; + +static struct among a_5[9] = +{ +/* 0 */ { 5, s_5_0, -1, 4, 0}, +/* 1 */ { 5, s_5_1, -1, 6, 0}, +/* 2 */ { 5, s_5_2, -1, 3, 0}, +/* 3 */ { 5, s_5_3, -1, 4, 0}, +/* 4 */ { 4, s_5_4, -1, 4, 0}, +/* 5 */ { 6, s_5_5, -1, 1, 0}, +/* 6 */ { 7, s_5_6, 5, 2, 0}, +/* 7 */ { 3, s_5_7, -1, 5, 0}, +/* 8 */ { 4, s_5_8, -1, 5, 0} +}; + +static symbol s_6_0[2] = { 'i', 'c' }; +static symbol s_6_1[4] = { 'a', 'n', 'c', 'e' }; +static symbol s_6_2[4] = { 'e', 'n', 'c', 'e' }; +static symbol s_6_3[4] = { 'a', 'b', 'l', 'e' }; +static symbol s_6_4[4] = { 'i', 'b', 'l', 'e' }; +static symbol s_6_5[3] = { 'a', 't', 'e' }; +static symbol s_6_6[3] = { 'i', 'v', 'e' }; +static symbol s_6_7[3] = { 'i', 'z', 'e' }; +static symbol s_6_8[3] = { 'i', 't', 'i' }; +static symbol s_6_9[2] = { 'a', 'l' }; +static symbol s_6_10[3] = { 'i', 's', 'm' }; +static symbol s_6_11[3] = { 'i', 'o', 'n' }; +static symbol s_6_12[2] = { 'e', 'r' }; +static symbol s_6_13[3] = { 'o', 'u', 's' }; +static symbol s_6_14[3] = { 'a', 'n', 't' }; +static symbol s_6_15[3] = { 'e', 'n', 't' }; +static symbol s_6_16[4] = { 'm', 'e', 'n', 't' }; +static symbol s_6_17[5] = { 'e', 'm', 'e', 'n', 't' }; + +static struct among a_6[18] = +{ +/* 0 */ { 2, s_6_0, -1, 1, 0}, +/* 1 */ { 4, s_6_1, -1, 1, 0}, +/* 2 */ { 4, s_6_2, -1, 1, 0}, +/* 3 */ { 4, s_6_3, -1, 1, 0}, +/* 4 */ { 4, s_6_4, -1, 1, 0}, +/* 5 */ { 3, s_6_5, -1, 1, 0}, +/* 6 */ { 3, s_6_6, -1, 1, 0}, +/* 7 */ { 3, s_6_7, -1, 1, 0}, +/* 8 */ { 3, s_6_8, -1, 1, 0}, +/* 9 */ { 2, s_6_9, -1, 1, 0}, +/* 10 */ { 3, s_6_10, -1, 1, 0}, +/* 11 */ { 3, s_6_11, -1, 2, 0}, +/* 12 */ { 2, s_6_12, -1, 1, 0}, +/* 13 */ { 3, s_6_13, -1, 1, 0}, +/* 14 */ { 3, s_6_14, -1, 1, 0}, +/* 15 */ { 3, s_6_15, -1, 1, 0}, +/* 16 */ { 4, s_6_16, 15, 1, 0}, +/* 17 */ { 5, s_6_17, 16, 1, 0} +}; + +static symbol s_7_0[1] = { 'e' }; +static symbol s_7_1[1] = { 'l' }; + +static struct among a_7[2] = +{ +/* 0 */ { 1, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 2, 0} +}; + +static symbol s_8_0[7] = { 's', 'u', 'c', 'c', 'e', 'e', 'd' }; +static symbol s_8_1[7] = { 'p', 'r', 'o', 'c', 'e', 'e', 'd' }; +static symbol s_8_2[6] = { 'e', 'x', 'c', 'e', 'e', 'd' }; +static symbol s_8_3[7] = { 'c', 'a', 'n', 'n', 'i', 'n', 'g' }; +static symbol s_8_4[6] = { 'i', 'n', 'n', 'i', 'n', 'g' }; +static symbol s_8_5[7] = { 'e', 'a', 'r', 'r', 'i', 'n', 'g' }; +static symbol s_8_6[7] = { 'h', 'e', 'r', 'r', 'i', 'n', 'g' }; +static symbol s_8_7[6] = { 'o', 'u', 't', 'i', 'n', 'g' }; + +static struct among a_8[8] = +{ +/* 0 */ { 7, s_8_0, -1, -1, 0}, +/* 1 */ { 7, s_8_1, -1, -1, 0}, +/* 2 */ { 6, s_8_2, -1, -1, 0}, +/* 3 */ { 7, s_8_3, -1, -1, 0}, +/* 4 */ { 6, s_8_4, -1, -1, 0}, +/* 5 */ { 7, s_8_5, -1, -1, 0}, +/* 6 */ { 7, s_8_6, -1, -1, 0}, +/* 7 */ { 6, s_8_7, -1, -1, 0} +}; + +static symbol s_9_0[5] = { 'a', 'n', 'd', 'e', 's' }; +static symbol s_9_1[5] = { 'a', 't', 'l', 'a', 's' }; +static symbol s_9_2[4] = { 'b', 'i', 'a', 's' }; +static symbol s_9_3[6] = { 'c', 'o', 's', 'm', 'o', 's' }; +static symbol s_9_4[5] = { 'd', 'y', 'i', 'n', 'g' }; +static symbol s_9_5[5] = { 'e', 'a', 'r', 'l', 'y' }; +static symbol s_9_6[6] = { 'g', 'e', 'n', 't', 'l', 'y' }; +static symbol s_9_7[4] = { 'h', 'o', 'w', 'e' }; +static symbol s_9_8[4] = { 'i', 'd', 'l', 'y' }; +static symbol s_9_9[5] = { 'l', 'y', 'i', 'n', 'g' }; +static symbol s_9_10[4] = { 'n', 'e', 'w', 's' }; +static symbol s_9_11[4] = { 'o', 'n', 'l', 'y' }; +static symbol s_9_12[6] = { 's', 'i', 'n', 'g', 'l', 'y' }; +static symbol s_9_13[5] = { 's', 'k', 'i', 'e', 's' }; +static symbol s_9_14[4] = { 's', 'k', 'i', 's' }; +static symbol s_9_15[3] = { 's', 'k', 'y' }; +static symbol s_9_16[5] = { 't', 'y', 'i', 'n', 'g' }; +static symbol s_9_17[4] = { 'u', 'g', 'l', 'y' }; + +static struct among a_9[18] = +{ +/* 0 */ { 5, s_9_0, -1, -1, 0}, +/* 1 */ { 5, s_9_1, -1, -1, 0}, +/* 2 */ { 4, s_9_2, -1, -1, 0}, +/* 3 */ { 6, s_9_3, -1, -1, 0}, +/* 4 */ { 5, s_9_4, -1, 3, 0}, +/* 5 */ { 5, s_9_5, -1, 9, 0}, +/* 6 */ { 6, s_9_6, -1, 7, 0}, +/* 7 */ { 4, s_9_7, -1, -1, 0}, +/* 8 */ { 4, s_9_8, -1, 6, 0}, +/* 9 */ { 5, s_9_9, -1, 4, 0}, +/* 10 */ { 4, s_9_10, -1, -1, 0}, +/* 11 */ { 4, s_9_11, -1, 10, 0}, +/* 12 */ { 6, s_9_12, -1, 11, 0}, +/* 13 */ { 5, s_9_13, -1, 2, 0}, +/* 14 */ { 4, s_9_14, -1, 1, 0}, +/* 15 */ { 3, s_9_15, -1, -1, 0}, +/* 16 */ { 5, s_9_16, -1, 5, 0}, +/* 17 */ { 4, s_9_17, -1, 8, 0} +}; + +static unsigned char g_v[] = { 17, 65, 16, 1 }; + +static unsigned char g_v_WXY[] = { 1, 17, 65, 208, 1 }; + +static unsigned char g_valid_LI[] = { 55, 141, 2 }; + +static symbol s_0[] = { 'y' }; +static symbol s_1[] = { 'Y' }; +static symbol s_2[] = { 'y' }; +static symbol s_3[] = { 'Y' }; +static symbol s_4[] = { 's', 's' }; +static symbol s_5[] = { 'i', 'e' }; +static symbol s_6[] = { 'i' }; +static symbol s_7[] = { 'e', 'e' }; +static symbol s_8[] = { 'e' }; +static symbol s_9[] = { 'e' }; +static symbol s_10[] = { 'y' }; +static symbol s_11[] = { 'Y' }; +static symbol s_12[] = { 'i' }; +static symbol s_13[] = { 't', 'i', 'o', 'n' }; +static symbol s_14[] = { 'e', 'n', 'c', 'e' }; +static symbol s_15[] = { 'a', 'n', 'c', 'e' }; +static symbol s_16[] = { 'a', 'b', 'l', 'e' }; +static symbol s_17[] = { 'e', 'n', 't' }; +static symbol s_18[] = { 'i', 'z', 'e' }; +static symbol s_19[] = { 'a', 't', 'e' }; +static symbol s_20[] = { 'a', 'l' }; +static symbol s_21[] = { 'f', 'u', 'l' }; +static symbol s_22[] = { 'o', 'u', 's' }; +static symbol s_23[] = { 'i', 'v', 'e' }; +static symbol s_24[] = { 'b', 'l', 'e' }; +static symbol s_25[] = { 'l' }; +static symbol s_26[] = { 'o', 'g' }; +static symbol s_27[] = { 'f', 'u', 'l' }; +static symbol s_28[] = { 'l', 'e', 's', 's' }; +static symbol s_29[] = { 't', 'i', 'o', 'n' }; +static symbol s_30[] = { 'a', 't', 'e' }; +static symbol s_31[] = { 'a', 'l' }; +static symbol s_32[] = { 'i', 'c' }; +static symbol s_33[] = { 's' }; +static symbol s_34[] = { 't' }; +static symbol s_35[] = { 'l' }; +static symbol s_36[] = { 's', 'k', 'i' }; +static symbol s_37[] = { 's', 'k', 'y' }; +static symbol s_38[] = { 'd', 'i', 'e' }; +static symbol s_39[] = { 'l', 'i', 'e' }; +static symbol s_40[] = { 't', 'i', 'e' }; +static symbol s_41[] = { 'i', 'd', 'l' }; +static symbol s_42[] = { 'g', 'e', 'n', 't', 'l' }; +static symbol s_43[] = { 'u', 'g', 'l', 'i' }; +static symbol s_44[] = { 'e', 'a', 'r', 'l', 'i' }; +static symbol s_45[] = { 'o', 'n', 'l', 'i' }; +static symbol s_46[] = { 's', 'i', 'n', 'g', 'l' }; +static symbol s_47[] = { 'Y' }; +static symbol s_48[] = { 'y' }; + +static int r_prelude(struct SN_env * z) { + z->B[0] = 0; /* unset Y_found, line 24 */ + { int c = z->c; /* do, line 25 */ + z->bra = z->c; /* [, line 25 */ + if (!(eq_s(z, 1, s_0))) goto lab0; + z->ket = z->c; /* ], line 25 */ + if (!(in_grouping(z, g_v, 97, 121))) goto lab0; + slice_from_s(z, 1, s_1); /* <-, line 25 */ + z->B[0] = 1; /* set Y_found, line 25 */ + lab0: + z->c = c; + } + { int c = z->c; /* do, line 26 */ + while(1) { /* repeat, line 26 */ + int c = z->c; + while(1) { /* goto, line 26 */ + int c = z->c; + if (!(in_grouping(z, g_v, 97, 121))) goto lab3; + z->bra = z->c; /* [, line 26 */ + if (!(eq_s(z, 1, s_2))) goto lab3; + z->ket = z->c; /* ], line 26 */ + z->c = c; + break; + lab3: + z->c = c; + if (z->c >= z->l) goto lab2; + z->c++; + } + slice_from_s(z, 1, s_3); /* <-, line 26 */ + z->B[0] = 1; /* set Y_found, line 26 */ + continue; + lab2: + z->c = c; + break; + } + lab1: + z->c = c; + } + return 1; +} + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c = z->c; /* do, line 32 */ + { int c = z->c; /* or, line 36 */ + if (!(find_among(z, a_0, 1))) goto lab2; /* among, line 33 */ + goto lab1; + lab2: + z->c = c; + while(1) { /* gopast, line 36 */ + if (!(in_grouping(z, g_v, 97, 121))) goto lab3; + break; + lab3: + if (z->c >= z->l) goto lab0; + z->c++; + } + while(1) { /* gopast, line 36 */ + if (!(out_grouping(z, g_v, 97, 121))) goto lab4; + break; + lab4: + if (z->c >= z->l) goto lab0; + z->c++; + } + } + lab1: + z->I[0] = z->c; /* setmark p1, line 37 */ + while(1) { /* gopast, line 38 */ + if (!(in_grouping(z, g_v, 97, 121))) goto lab5; + break; + lab5: + if (z->c >= z->l) goto lab0; + z->c++; + } + while(1) { /* gopast, line 38 */ + if (!(out_grouping(z, g_v, 97, 121))) goto lab6; + break; + lab6: + if (z->c >= z->l) goto lab0; + z->c++; + } + z->I[1] = z->c; /* setmark p2, line 38 */ + lab0: + z->c = c; + } + return 1; +} + +static int r_shortv(struct SN_env * z) { + { int m = z->l - z->c; /* or, line 46 */ + if (!(out_grouping_b(z, g_v_WXY, 89, 121))) goto lab1; + if (!(in_grouping_b(z, g_v, 97, 121))) goto lab1; + if (!(out_grouping_b(z, g_v, 97, 121))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(out_grouping_b(z, g_v, 97, 121))) return 0; + if (!(in_grouping_b(z, g_v, 97, 121))) return 0; + if (z->c > z->lb) return 0; /* atlimit, line 47 */ + } +lab0: + return 1; +} + +static int r_R1(struct SN_env * z) { + if (!(z->I[0] <= z->c)) return 0; + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_Step_1a(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 54 */ + among_var = find_among_b(z, a_1, 6); /* substring, line 54 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 54 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_from_s(z, 2, s_4); /* <-, line 55 */ + break; + case 2: + { int m = z->l - z->c; /* or, line 57 */ + if (z->c <= z->lb) goto lab1; + z->c--; /* next, line 57 */ + if (z->c > z->lb) goto lab1; /* atlimit, line 57 */ + slice_from_s(z, 2, s_5); /* <-, line 57 */ + goto lab0; + lab1: + z->c = z->l - m; + slice_from_s(z, 1, s_6); /* <-, line 57 */ + } + lab0: + break; + case 3: + if (z->c <= z->lb) return 0; + z->c--; /* next, line 58 */ + while(1) { /* gopast, line 58 */ + if (!(in_grouping_b(z, g_v, 97, 121))) goto lab2; + break; + lab2: + if (z->c <= z->lb) return 0; + z->c--; + } + slice_del(z); /* delete, line 58 */ + break; + } + return 1; +} + +static int r_Step_1b(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 64 */ + among_var = find_among_b(z, a_3, 6); /* substring, line 64 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 64 */ + switch(among_var) { + case 0: return 0; + case 1: + if (!r_R1(z)) return 0; /* call R1, line 66 */ + slice_from_s(z, 2, s_7); /* <-, line 66 */ + break; + case 2: + { int m_test = z->l - z->c; /* test, line 69 */ + while(1) { /* gopast, line 69 */ + if (!(in_grouping_b(z, g_v, 97, 121))) goto lab0; + break; + lab0: + if (z->c <= z->lb) return 0; + z->c--; + } + z->c = z->l - m_test; + } + slice_del(z); /* delete, line 69 */ + { int m_test = z->l - z->c; /* test, line 70 */ + among_var = find_among_b(z, a_2, 13); /* substring, line 70 */ + if (!(among_var)) return 0; + z->c = z->l - m_test; + } + switch(among_var) { + case 0: return 0; + case 1: + { int c = z->c; + insert_s(z, z->c, z->c, 1, s_8); /* <+, line 72 */ + z->c = c; + } + break; + case 2: + z->ket = z->c; /* [, line 75 */ + if (z->c <= z->lb) return 0; + z->c--; /* next, line 75 */ + z->bra = z->c; /* ], line 75 */ + slice_del(z); /* delete, line 75 */ + break; + case 3: + if (z->c != z->I[0]) return 0; /* atmark, line 76 */ + { int m_test = z->l - z->c; /* test, line 76 */ + if (!r_shortv(z)) return 0; /* call shortv, line 76 */ + z->c = z->l - m_test; + } + { int c = z->c; + insert_s(z, z->c, z->c, 1, s_9); /* <+, line 76 */ + z->c = c; + } + break; + } + break; + } + return 1; +} + +static int r_Step_1c(struct SN_env * z) { + z->ket = z->c; /* [, line 83 */ + { int m = z->l - z->c; /* or, line 83 */ + if (!(eq_s_b(z, 1, s_10))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 1, s_11))) return 0; + } +lab0: + z->bra = z->c; /* ], line 83 */ + if (!(out_grouping_b(z, g_v, 97, 121))) return 0; + { int m = z->l - z->c; /* not, line 84 */ + if (z->c > z->lb) goto lab2; /* atlimit, line 84 */ + return 0; + lab2: + z->c = z->l - m; + } + slice_from_s(z, 1, s_12); /* <-, line 85 */ + return 1; +} + +static int r_Step_2(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 89 */ + among_var = find_among_b(z, a_4, 24); /* substring, line 89 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 89 */ + if (!r_R1(z)) return 0; /* call R1, line 89 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_from_s(z, 4, s_13); /* <-, line 90 */ + break; + case 2: + slice_from_s(z, 4, s_14); /* <-, line 91 */ + break; + case 3: + slice_from_s(z, 4, s_15); /* <-, line 92 */ + break; + case 4: + slice_from_s(z, 4, s_16); /* <-, line 93 */ + break; + case 5: + slice_from_s(z, 3, s_17); /* <-, line 94 */ + break; + case 6: + slice_from_s(z, 3, s_18); /* <-, line 96 */ + break; + case 7: + slice_from_s(z, 3, s_19); /* <-, line 98 */ + break; + case 8: + slice_from_s(z, 2, s_20); /* <-, line 100 */ + break; + case 9: + slice_from_s(z, 3, s_21); /* <-, line 101 */ + break; + case 10: + slice_from_s(z, 3, s_22); /* <-, line 103 */ + break; + case 11: + slice_from_s(z, 3, s_23); /* <-, line 105 */ + break; + case 12: + slice_from_s(z, 3, s_24); /* <-, line 107 */ + break; + case 13: + if (!(eq_s_b(z, 1, s_25))) return 0; + slice_from_s(z, 2, s_26); /* <-, line 108 */ + break; + case 14: + slice_from_s(z, 3, s_27); /* <-, line 109 */ + break; + case 15: + slice_from_s(z, 4, s_28); /* <-, line 110 */ + break; + case 16: + if (!(in_grouping_b(z, g_valid_LI, 99, 116))) return 0; + slice_del(z); /* delete, line 111 */ + break; + } + return 1; +} + +static int r_Step_3(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 116 */ + among_var = find_among_b(z, a_5, 9); /* substring, line 116 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 116 */ + if (!r_R1(z)) return 0; /* call R1, line 116 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_from_s(z, 4, s_29); /* <-, line 117 */ + break; + case 2: + slice_from_s(z, 3, s_30); /* <-, line 118 */ + break; + case 3: + slice_from_s(z, 2, s_31); /* <-, line 119 */ + break; + case 4: + slice_from_s(z, 2, s_32); /* <-, line 121 */ + break; + case 5: + slice_del(z); /* delete, line 123 */ + break; + case 6: + if (!r_R2(z)) return 0; /* call R2, line 125 */ + slice_del(z); /* delete, line 125 */ + break; + } + return 1; +} + +static int r_Step_4(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 130 */ + among_var = find_among_b(z, a_6, 18); /* substring, line 130 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 130 */ + if (!r_R2(z)) return 0; /* call R2, line 130 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 133 */ + break; + case 2: + { int m = z->l - z->c; /* or, line 134 */ + if (!(eq_s_b(z, 1, s_33))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 1, s_34))) return 0; + } + lab0: + slice_del(z); /* delete, line 134 */ + break; + } + return 1; +} + +static int r_Step_5(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 139 */ + among_var = find_among_b(z, a_7, 2); /* substring, line 139 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 139 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m = z->l - z->c; /* or, line 140 */ + if (!r_R2(z)) goto lab1; /* call R2, line 140 */ + goto lab0; + lab1: + z->c = z->l - m; + if (!r_R1(z)) return 0; /* call R1, line 140 */ + { int m = z->l - z->c; /* not, line 140 */ + if (!r_shortv(z)) goto lab2; /* call shortv, line 140 */ + return 0; + lab2: + z->c = z->l - m; + } + } + lab0: + slice_del(z); /* delete, line 140 */ + break; + case 2: + if (!r_R2(z)) return 0; /* call R2, line 141 */ + if (!(eq_s_b(z, 1, s_35))) return 0; + slice_del(z); /* delete, line 141 */ + break; + } + return 1; +} + +static int r_exception2(struct SN_env * z) { + z->ket = z->c; /* [, line 147 */ + if (!(find_among_b(z, a_8, 8))) return 0; /* substring, line 147 */ + z->bra = z->c; /* ], line 147 */ + if (z->c > z->lb) return 0; /* atlimit, line 147 */ + return 1; +} + +static int r_exception1(struct SN_env * z) { + int among_var; + z->bra = z->c; /* [, line 159 */ + among_var = find_among(z, a_9, 18); /* substring, line 159 */ + if (!(among_var)) return 0; + z->ket = z->c; /* ], line 159 */ + if (z->c < z->l) return 0; /* atlimit, line 159 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_from_s(z, 3, s_36); /* <-, line 163 */ + break; + case 2: + slice_from_s(z, 3, s_37); /* <-, line 164 */ + break; + case 3: + slice_from_s(z, 3, s_38); /* <-, line 165 */ + break; + case 4: + slice_from_s(z, 3, s_39); /* <-, line 166 */ + break; + case 5: + slice_from_s(z, 3, s_40); /* <-, line 167 */ + break; + case 6: + slice_from_s(z, 3, s_41); /* <-, line 171 */ + break; + case 7: + slice_from_s(z, 5, s_42); /* <-, line 172 */ + break; + case 8: + slice_from_s(z, 4, s_43); /* <-, line 173 */ + break; + case 9: + slice_from_s(z, 5, s_44); /* <-, line 174 */ + break; + case 10: + slice_from_s(z, 4, s_45); /* <-, line 175 */ + break; + case 11: + slice_from_s(z, 5, s_46); /* <-, line 176 */ + break; + } + return 1; +} + +static int r_postlude(struct SN_env * z) { + if (!(z->B[0])) return 0; /* Boolean test Y_found, line 192 */ + while(1) { /* repeat, line 192 */ + int c = z->c; + while(1) { /* goto, line 192 */ + int c = z->c; + z->bra = z->c; /* [, line 192 */ + if (!(eq_s(z, 1, s_47))) goto lab1; + z->ket = z->c; /* ], line 192 */ + z->c = c; + break; + lab1: + z->c = c; + if (z->c >= z->l) goto lab0; + z->c++; + } + slice_from_s(z, 1, s_48); /* <-, line 192 */ + continue; + lab0: + z->c = c; + break; + } + return 1; +} + +extern int english_stem(struct SN_env * z) { + { int c = z->c; /* or, line 196 */ + if (!r_exception1(z)) goto lab1; /* call exception1, line 196 */ + goto lab0; + lab1: + z->c = c; + { int c_test = z->c; /* test, line 198 */ + { int c = z->c + 3; + if (0 > c || c > z->l) return 0; + z->c = c; /* hop, line 198 */ + } + z->c = c_test; + } + { int c = z->c; /* do, line 199 */ + if (!r_prelude(z)) goto lab2; /* call prelude, line 199 */ + lab2: + z->c = c; + } + { int c = z->c; /* do, line 200 */ + if (!r_mark_regions(z)) goto lab3; /* call mark_regions, line 200 */ + lab3: + z->c = c; + } + z->lb = z->c; z->c = z->l; /* backwards, line 201 */ + + { int m = z->l - z->c; /* do, line 203 */ + if (!r_Step_1a(z)) goto lab4; /* call Step_1a, line 203 */ + lab4: + z->c = z->l - m; + } + { int m = z->l - z->c; /* or, line 205 */ + if (!r_exception2(z)) goto lab6; /* call exception2, line 205 */ + goto lab5; + lab6: + z->c = z->l - m; + { int m = z->l - z->c; /* do, line 207 */ + if (!r_Step_1b(z)) goto lab7; /* call Step_1b, line 207 */ + lab7: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 208 */ + if (!r_Step_1c(z)) goto lab8; /* call Step_1c, line 208 */ + lab8: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 210 */ + if (!r_Step_2(z)) goto lab9; /* call Step_2, line 210 */ + lab9: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 211 */ + if (!r_Step_3(z)) goto lab10; /* call Step_3, line 211 */ + lab10: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 212 */ + if (!r_Step_4(z)) goto lab11; /* call Step_4, line 212 */ + lab11: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 214 */ + if (!r_Step_5(z)) goto lab12; /* call Step_5, line 214 */ + lab12: + z->c = z->l - m; + } + } + lab5: + z->c = z->lb; + { int c = z->c; /* do, line 217 */ + if (!r_postlude(z)) goto lab13; /* call postlude, line 217 */ + lab13: + z->c = c; + } + } +lab0: + return 1; +} + +extern struct SN_env * english_create_env(void) { return SN_create_env(0, 2, 1); } + +extern void english_close_env(struct SN_env * z) { SN_close_env(z); } + diff --git a/contrib/tsearch2/snowball/english_stem.h b/contrib/tsearch2/snowball/english_stem.h new file mode 100644 index 0000000000..bfefcd5656 --- /dev/null +++ b/contrib/tsearch2/snowball/english_stem.h @@ -0,0 +1,8 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +extern struct SN_env * english_create_env(void); +extern void english_close_env(struct SN_env * z); + +extern int english_stem(struct SN_env * z); + diff --git a/contrib/tsearch2/snowball/header.h b/contrib/tsearch2/snowball/header.h new file mode 100644 index 0000000000..aaec3ae5d1 --- /dev/null +++ b/contrib/tsearch2/snowball/header.h @@ -0,0 +1,57 @@ + +#include + +#include "api.h" + +#define MAXINT INT_MAX +#define MININT INT_MIN + +#define HEAD 2*sizeof(int) + +#define SIZE(p) ((int *)(p))[-1] +#define SET_SIZE(p, n) ((int *)(p))[-1] = n +#define CAPACITY(p) ((int *)(p))[-2] + +struct among +{ int s_size; /* number of chars in string */ + symbol * s; /* search string */ + int substring_i;/* index to longest matching substring */ + int result; /* result of the lookup */ + int (* function)(struct SN_env *); +}; + +extern symbol * create_s(void); +extern void lose_s(symbol * p); + +extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max); +extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max); +extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max); +extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max); + +extern int in_range(struct SN_env * z, int min, int max); +extern int in_range_b(struct SN_env * z, int min, int max); +extern int out_range(struct SN_env * z, int min, int max); +extern int out_range_b(struct SN_env * z, int min, int max); + +extern int eq_s(struct SN_env * z, int s_size, symbol * s); +extern int eq_s_b(struct SN_env * z, int s_size, symbol * s); +extern int eq_v(struct SN_env * z, symbol * p); +extern int eq_v_b(struct SN_env * z, symbol * p); + +extern int find_among(struct SN_env * z, struct among * v, int v_size); +extern int find_among_b(struct SN_env * z, struct among * v, int v_size); + +extern symbol * increase_size(symbol * p, int n); +extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s); +extern void slice_from_s(struct SN_env * z, int s_size, symbol * s); +extern void slice_from_v(struct SN_env * z, symbol * p); +extern void slice_del(struct SN_env * z); + +extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s); +extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p); + +extern symbol * slice_to(struct SN_env * z, symbol * p); +extern symbol * assign_to(struct SN_env * z, symbol * p); + +extern void debug(struct SN_env * z, int number, int line_count); + diff --git a/contrib/tsearch2/snowball/russian_stem.c b/contrib/tsearch2/snowball/russian_stem.c new file mode 100644 index 0000000000..14fd49156b --- /dev/null +++ b/contrib/tsearch2/snowball/russian_stem.c @@ -0,0 +1,626 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +#include "header.h" + +extern int russian_stem(struct SN_env * z); +static int r_tidy_up(struct SN_env * z); +static int r_derivational(struct SN_env * z); +static int r_noun(struct SN_env * z); +static int r_verb(struct SN_env * z); +static int r_reflexive(struct SN_env * z); +static int r_adjectival(struct SN_env * z); +static int r_adjective(struct SN_env * z); +static int r_perfective_gerund(struct SN_env * z); +static int r_R2(struct SN_env * z); +static int r_mark_regions(struct SN_env * z); + +extern struct SN_env * russian_create_env(void); +extern void russian_close_env(struct SN_env * z); + +static symbol s_0_0[3] = { 215, 219, 201 }; +static symbol s_0_1[4] = { 201, 215, 219, 201 }; +static symbol s_0_2[4] = { 217, 215, 219, 201 }; +static symbol s_0_3[1] = { 215 }; +static symbol s_0_4[2] = { 201, 215 }; +static symbol s_0_5[2] = { 217, 215 }; +static symbol s_0_6[5] = { 215, 219, 201, 211, 216 }; +static symbol s_0_7[6] = { 201, 215, 219, 201, 211, 216 }; +static symbol s_0_8[6] = { 217, 215, 219, 201, 211, 216 }; + +static struct among a_0[9] = +{ +/* 0 */ { 3, s_0_0, -1, 1, 0}, +/* 1 */ { 4, s_0_1, 0, 2, 0}, +/* 2 */ { 4, s_0_2, 0, 2, 0}, +/* 3 */ { 1, s_0_3, -1, 1, 0}, +/* 4 */ { 2, s_0_4, 3, 2, 0}, +/* 5 */ { 2, s_0_5, 3, 2, 0}, +/* 6 */ { 5, s_0_6, -1, 1, 0}, +/* 7 */ { 6, s_0_7, 6, 2, 0}, +/* 8 */ { 6, s_0_8, 6, 2, 0} +}; + +static symbol s_1_0[2] = { 192, 192 }; +static symbol s_1_1[2] = { 197, 192 }; +static symbol s_1_2[2] = { 207, 192 }; +static symbol s_1_3[2] = { 213, 192 }; +static symbol s_1_4[2] = { 197, 197 }; +static symbol s_1_5[2] = { 201, 197 }; +static symbol s_1_6[2] = { 207, 197 }; +static symbol s_1_7[2] = { 217, 197 }; +static symbol s_1_8[2] = { 201, 200 }; +static symbol s_1_9[2] = { 217, 200 }; +static symbol s_1_10[3] = { 201, 205, 201 }; +static symbol s_1_11[3] = { 217, 205, 201 }; +static symbol s_1_12[2] = { 197, 202 }; +static symbol s_1_13[2] = { 201, 202 }; +static symbol s_1_14[2] = { 207, 202 }; +static symbol s_1_15[2] = { 217, 202 }; +static symbol s_1_16[2] = { 197, 205 }; +static symbol s_1_17[2] = { 201, 205 }; +static symbol s_1_18[2] = { 207, 205 }; +static symbol s_1_19[2] = { 217, 205 }; +static symbol s_1_20[3] = { 197, 199, 207 }; +static symbol s_1_21[3] = { 207, 199, 207 }; +static symbol s_1_22[2] = { 193, 209 }; +static symbol s_1_23[2] = { 209, 209 }; +static symbol s_1_24[3] = { 197, 205, 213 }; +static symbol s_1_25[3] = { 207, 205, 213 }; + +static struct among a_1[26] = +{ +/* 0 */ { 2, s_1_0, -1, 1, 0}, +/* 1 */ { 2, s_1_1, -1, 1, 0}, +/* 2 */ { 2, s_1_2, -1, 1, 0}, +/* 3 */ { 2, s_1_3, -1, 1, 0}, +/* 4 */ { 2, s_1_4, -1, 1, 0}, +/* 5 */ { 2, s_1_5, -1, 1, 0}, +/* 6 */ { 2, s_1_6, -1, 1, 0}, +/* 7 */ { 2, s_1_7, -1, 1, 0}, +/* 8 */ { 2, s_1_8, -1, 1, 0}, +/* 9 */ { 2, s_1_9, -1, 1, 0}, +/* 10 */ { 3, s_1_10, -1, 1, 0}, +/* 11 */ { 3, s_1_11, -1, 1, 0}, +/* 12 */ { 2, s_1_12, -1, 1, 0}, +/* 13 */ { 2, s_1_13, -1, 1, 0}, +/* 14 */ { 2, s_1_14, -1, 1, 0}, +/* 15 */ { 2, s_1_15, -1, 1, 0}, +/* 16 */ { 2, s_1_16, -1, 1, 0}, +/* 17 */ { 2, s_1_17, -1, 1, 0}, +/* 18 */ { 2, s_1_18, -1, 1, 0}, +/* 19 */ { 2, s_1_19, -1, 1, 0}, +/* 20 */ { 3, s_1_20, -1, 1, 0}, +/* 21 */ { 3, s_1_21, -1, 1, 0}, +/* 22 */ { 2, s_1_22, -1, 1, 0}, +/* 23 */ { 2, s_1_23, -1, 1, 0}, +/* 24 */ { 3, s_1_24, -1, 1, 0}, +/* 25 */ { 3, s_1_25, -1, 1, 0} +}; + +static symbol s_2_0[2] = { 197, 205 }; +static symbol s_2_1[2] = { 206, 206 }; +static symbol s_2_2[2] = { 215, 219 }; +static symbol s_2_3[3] = { 201, 215, 219 }; +static symbol s_2_4[3] = { 217, 215, 219 }; +static symbol s_2_5[1] = { 221 }; +static symbol s_2_6[2] = { 192, 221 }; +static symbol s_2_7[3] = { 213, 192, 221 }; + +static struct among a_2[8] = +{ +/* 0 */ { 2, s_2_0, -1, 1, 0}, +/* 1 */ { 2, s_2_1, -1, 1, 0}, +/* 2 */ { 2, s_2_2, -1, 1, 0}, +/* 3 */ { 3, s_2_3, 2, 2, 0}, +/* 4 */ { 3, s_2_4, 2, 2, 0}, +/* 5 */ { 1, s_2_5, -1, 1, 0}, +/* 6 */ { 2, s_2_6, 5, 1, 0}, +/* 7 */ { 3, s_2_7, 6, 2, 0} +}; + +static symbol s_3_0[2] = { 211, 209 }; +static symbol s_3_1[2] = { 211, 216 }; + +static struct among a_3[2] = +{ +/* 0 */ { 2, s_3_0, -1, 1, 0}, +/* 1 */ { 2, s_3_1, -1, 1, 0} +}; + +static symbol s_4_0[1] = { 192 }; +static symbol s_4_1[2] = { 213, 192 }; +static symbol s_4_2[2] = { 204, 193 }; +static symbol s_4_3[3] = { 201, 204, 193 }; +static symbol s_4_4[3] = { 217, 204, 193 }; +static symbol s_4_5[2] = { 206, 193 }; +static symbol s_4_6[3] = { 197, 206, 193 }; +static symbol s_4_7[3] = { 197, 212, 197 }; +static symbol s_4_8[3] = { 201, 212, 197 }; +static symbol s_4_9[3] = { 202, 212, 197 }; +static symbol s_4_10[4] = { 197, 202, 212, 197 }; +static symbol s_4_11[4] = { 213, 202, 212, 197 }; +static symbol s_4_12[2] = { 204, 201 }; +static symbol s_4_13[3] = { 201, 204, 201 }; +static symbol s_4_14[3] = { 217, 204, 201 }; +static symbol s_4_15[1] = { 202 }; +static symbol s_4_16[2] = { 197, 202 }; +static symbol s_4_17[2] = { 213, 202 }; +static symbol s_4_18[1] = { 204 }; +static symbol s_4_19[2] = { 201, 204 }; +static symbol s_4_20[2] = { 217, 204 }; +static symbol s_4_21[2] = { 197, 205 }; +static symbol s_4_22[2] = { 201, 205 }; +static symbol s_4_23[2] = { 217, 205 }; +static symbol s_4_24[1] = { 206 }; +static symbol s_4_25[2] = { 197, 206 }; +static symbol s_4_26[2] = { 204, 207 }; +static symbol s_4_27[3] = { 201, 204, 207 }; +static symbol s_4_28[3] = { 217, 204, 207 }; +static symbol s_4_29[2] = { 206, 207 }; +static symbol s_4_30[3] = { 197, 206, 207 }; +static symbol s_4_31[3] = { 206, 206, 207 }; +static symbol s_4_32[2] = { 192, 212 }; +static symbol s_4_33[3] = { 213, 192, 212 }; +static symbol s_4_34[2] = { 197, 212 }; +static symbol s_4_35[3] = { 213, 197, 212 }; +static symbol s_4_36[2] = { 201, 212 }; +static symbol s_4_37[2] = { 209, 212 }; +static symbol s_4_38[2] = { 217, 212 }; +static symbol s_4_39[2] = { 212, 216 }; +static symbol s_4_40[3] = { 201, 212, 216 }; +static symbol s_4_41[3] = { 217, 212, 216 }; +static symbol s_4_42[3] = { 197, 219, 216 }; +static symbol s_4_43[3] = { 201, 219, 216 }; +static symbol s_4_44[2] = { 206, 217 }; +static symbol s_4_45[3] = { 197, 206, 217 }; + +static struct among a_4[46] = +{ +/* 0 */ { 1, s_4_0, -1, 2, 0}, +/* 1 */ { 2, s_4_1, 0, 2, 0}, +/* 2 */ { 2, s_4_2, -1, 1, 0}, +/* 3 */ { 3, s_4_3, 2, 2, 0}, +/* 4 */ { 3, s_4_4, 2, 2, 0}, +/* 5 */ { 2, s_4_5, -1, 1, 0}, +/* 6 */ { 3, s_4_6, 5, 2, 0}, +/* 7 */ { 3, s_4_7, -1, 1, 0}, +/* 8 */ { 3, s_4_8, -1, 2, 0}, +/* 9 */ { 3, s_4_9, -1, 1, 0}, +/* 10 */ { 4, s_4_10, 9, 2, 0}, +/* 11 */ { 4, s_4_11, 9, 2, 0}, +/* 12 */ { 2, s_4_12, -1, 1, 0}, +/* 13 */ { 3, s_4_13, 12, 2, 0}, +/* 14 */ { 3, s_4_14, 12, 2, 0}, +/* 15 */ { 1, s_4_15, -1, 1, 0}, +/* 16 */ { 2, s_4_16, 15, 2, 0}, +/* 17 */ { 2, s_4_17, 15, 2, 0}, +/* 18 */ { 1, s_4_18, -1, 1, 0}, +/* 19 */ { 2, s_4_19, 18, 2, 0}, +/* 20 */ { 2, s_4_20, 18, 2, 0}, +/* 21 */ { 2, s_4_21, -1, 1, 0}, +/* 22 */ { 2, s_4_22, -1, 2, 0}, +/* 23 */ { 2, s_4_23, -1, 2, 0}, +/* 24 */ { 1, s_4_24, -1, 1, 0}, +/* 25 */ { 2, s_4_25, 24, 2, 0}, +/* 26 */ { 2, s_4_26, -1, 1, 0}, +/* 27 */ { 3, s_4_27, 26, 2, 0}, +/* 28 */ { 3, s_4_28, 26, 2, 0}, +/* 29 */ { 2, s_4_29, -1, 1, 0}, +/* 30 */ { 3, s_4_30, 29, 2, 0}, +/* 31 */ { 3, s_4_31, 29, 1, 0}, +/* 32 */ { 2, s_4_32, -1, 1, 0}, +/* 33 */ { 3, s_4_33, 32, 2, 0}, +/* 34 */ { 2, s_4_34, -1, 1, 0}, +/* 35 */ { 3, s_4_35, 34, 2, 0}, +/* 36 */ { 2, s_4_36, -1, 2, 0}, +/* 37 */ { 2, s_4_37, -1, 2, 0}, +/* 38 */ { 2, s_4_38, -1, 2, 0}, +/* 39 */ { 2, s_4_39, -1, 1, 0}, +/* 40 */ { 3, s_4_40, 39, 2, 0}, +/* 41 */ { 3, s_4_41, 39, 2, 0}, +/* 42 */ { 3, s_4_42, -1, 1, 0}, +/* 43 */ { 3, s_4_43, -1, 2, 0}, +/* 44 */ { 2, s_4_44, -1, 1, 0}, +/* 45 */ { 3, s_4_45, 44, 2, 0} +}; + +static symbol s_5_0[1] = { 192 }; +static symbol s_5_1[2] = { 201, 192 }; +static symbol s_5_2[2] = { 216, 192 }; +static symbol s_5_3[1] = { 193 }; +static symbol s_5_4[1] = { 197 }; +static symbol s_5_5[2] = { 201, 197 }; +static symbol s_5_6[2] = { 216, 197 }; +static symbol s_5_7[2] = { 193, 200 }; +static symbol s_5_8[2] = { 209, 200 }; +static symbol s_5_9[3] = { 201, 209, 200 }; +static symbol s_5_10[1] = { 201 }; +static symbol s_5_11[2] = { 197, 201 }; +static symbol s_5_12[2] = { 201, 201 }; +static symbol s_5_13[3] = { 193, 205, 201 }; +static symbol s_5_14[3] = { 209, 205, 201 }; +static symbol s_5_15[4] = { 201, 209, 205, 201 }; +static symbol s_5_16[1] = { 202 }; +static symbol s_5_17[2] = { 197, 202 }; +static symbol s_5_18[3] = { 201, 197, 202 }; +static symbol s_5_19[2] = { 201, 202 }; +static symbol s_5_20[2] = { 207, 202 }; +static symbol s_5_21[2] = { 193, 205 }; +static symbol s_5_22[2] = { 197, 205 }; +static symbol s_5_23[3] = { 201, 197, 205 }; +static symbol s_5_24[2] = { 207, 205 }; +static symbol s_5_25[2] = { 209, 205 }; +static symbol s_5_26[3] = { 201, 209, 205 }; +static symbol s_5_27[1] = { 207 }; +static symbol s_5_28[1] = { 209 }; +static symbol s_5_29[2] = { 201, 209 }; +static symbol s_5_30[2] = { 216, 209 }; +static symbol s_5_31[1] = { 213 }; +static symbol s_5_32[2] = { 197, 215 }; +static symbol s_5_33[2] = { 207, 215 }; +static symbol s_5_34[1] = { 216 }; +static symbol s_5_35[1] = { 217 }; + +static struct among a_5[36] = +{ +/* 0 */ { 1, s_5_0, -1, 1, 0}, +/* 1 */ { 2, s_5_1, 0, 1, 0}, +/* 2 */ { 2, s_5_2, 0, 1, 0}, +/* 3 */ { 1, s_5_3, -1, 1, 0}, +/* 4 */ { 1, s_5_4, -1, 1, 0}, +/* 5 */ { 2, s_5_5, 4, 1, 0}, +/* 6 */ { 2, s_5_6, 4, 1, 0}, +/* 7 */ { 2, s_5_7, -1, 1, 0}, +/* 8 */ { 2, s_5_8, -1, 1, 0}, +/* 9 */ { 3, s_5_9, 8, 1, 0}, +/* 10 */ { 1, s_5_10, -1, 1, 0}, +/* 11 */ { 2, s_5_11, 10, 1, 0}, +/* 12 */ { 2, s_5_12, 10, 1, 0}, +/* 13 */ { 3, s_5_13, 10, 1, 0}, +/* 14 */ { 3, s_5_14, 10, 1, 0}, +/* 15 */ { 4, s_5_15, 14, 1, 0}, +/* 16 */ { 1, s_5_16, -1, 1, 0}, +/* 17 */ { 2, s_5_17, 16, 1, 0}, +/* 18 */ { 3, s_5_18, 17, 1, 0}, +/* 19 */ { 2, s_5_19, 16, 1, 0}, +/* 20 */ { 2, s_5_20, 16, 1, 0}, +/* 21 */ { 2, s_5_21, -1, 1, 0}, +/* 22 */ { 2, s_5_22, -1, 1, 0}, +/* 23 */ { 3, s_5_23, 22, 1, 0}, +/* 24 */ { 2, s_5_24, -1, 1, 0}, +/* 25 */ { 2, s_5_25, -1, 1, 0}, +/* 26 */ { 3, s_5_26, 25, 1, 0}, +/* 27 */ { 1, s_5_27, -1, 1, 0}, +/* 28 */ { 1, s_5_28, -1, 1, 0}, +/* 29 */ { 2, s_5_29, 28, 1, 0}, +/* 30 */ { 2, s_5_30, 28, 1, 0}, +/* 31 */ { 1, s_5_31, -1, 1, 0}, +/* 32 */ { 2, s_5_32, -1, 1, 0}, +/* 33 */ { 2, s_5_33, -1, 1, 0}, +/* 34 */ { 1, s_5_34, -1, 1, 0}, +/* 35 */ { 1, s_5_35, -1, 1, 0} +}; + +static symbol s_6_0[3] = { 207, 211, 212 }; +static symbol s_6_1[4] = { 207, 211, 212, 216 }; + +static struct among a_6[2] = +{ +/* 0 */ { 3, s_6_0, -1, 1, 0}, +/* 1 */ { 4, s_6_1, -1, 1, 0} +}; + +static symbol s_7_0[4] = { 197, 202, 219, 197 }; +static symbol s_7_1[1] = { 206 }; +static symbol s_7_2[1] = { 216 }; +static symbol s_7_3[3] = { 197, 202, 219 }; + +static struct among a_7[4] = +{ +/* 0 */ { 4, s_7_0, -1, 1, 0}, +/* 1 */ { 1, s_7_1, -1, 2, 0}, +/* 2 */ { 1, s_7_2, -1, 3, 0}, +/* 3 */ { 3, s_7_3, -1, 1, 0} +}; + +static unsigned char g_v[] = { 35, 130, 34, 18 }; + +static symbol s_0[] = { 193 }; +static symbol s_1[] = { 209 }; +static symbol s_2[] = { 193 }; +static symbol s_3[] = { 209 }; +static symbol s_4[] = { 193 }; +static symbol s_5[] = { 209 }; +static symbol s_6[] = { 206 }; +static symbol s_7[] = { 206 }; +static symbol s_8[] = { 206 }; +static symbol s_9[] = { 201 }; + +static int r_mark_regions(struct SN_env * z) { + z->I[0] = z->l; + z->I[1] = z->l; + { int c = z->c; /* do, line 100 */ + while(1) { /* gopast, line 101 */ + if (!(in_grouping(z, g_v, 192, 220))) goto lab1; + break; + lab1: + if (z->c >= z->l) goto lab0; + z->c++; + } + z->I[0] = z->c; /* setmark pV, line 101 */ + while(1) { /* gopast, line 101 */ + if (!(out_grouping(z, g_v, 192, 220))) goto lab2; + break; + lab2: + if (z->c >= z->l) goto lab0; + z->c++; + } + while(1) { /* gopast, line 102 */ + if (!(in_grouping(z, g_v, 192, 220))) goto lab3; + break; + lab3: + if (z->c >= z->l) goto lab0; + z->c++; + } + while(1) { /* gopast, line 102 */ + if (!(out_grouping(z, g_v, 192, 220))) goto lab4; + break; + lab4: + if (z->c >= z->l) goto lab0; + z->c++; + } + z->I[1] = z->c; /* setmark p2, line 102 */ + lab0: + z->c = c; + } + return 1; +} + +static int r_R2(struct SN_env * z) { + if (!(z->I[1] <= z->c)) return 0; + return 1; +} + +static int r_perfective_gerund(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 111 */ + among_var = find_among_b(z, a_0, 9); /* substring, line 111 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 111 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m = z->l - z->c; /* or, line 115 */ + if (!(eq_s_b(z, 1, s_0))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 1, s_1))) return 0; + } + lab0: + slice_del(z); /* delete, line 115 */ + break; + case 2: + slice_del(z); /* delete, line 122 */ + break; + } + return 1; +} + +static int r_adjective(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 127 */ + among_var = find_among_b(z, a_1, 26); /* substring, line 127 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 127 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 136 */ + break; + } + return 1; +} + +static int r_adjectival(struct SN_env * z) { + int among_var; + if (!r_adjective(z)) return 0; /* call adjective, line 141 */ + { int m = z->l - z->c; /* try, line 148 */ + z->ket = z->c; /* [, line 149 */ + among_var = find_among_b(z, a_2, 8); /* substring, line 149 */ + if (!(among_var)) { z->c = z->l - m; goto lab0; } + z->bra = z->c; /* ], line 149 */ + switch(among_var) { + case 0: { z->c = z->l - m; goto lab0; } + case 1: + { int m = z->l - z->c; /* or, line 154 */ + if (!(eq_s_b(z, 1, s_2))) goto lab2; + goto lab1; + lab2: + z->c = z->l - m; + if (!(eq_s_b(z, 1, s_3))) { z->c = z->l - m; goto lab0; } + } + lab1: + slice_del(z); /* delete, line 154 */ + break; + case 2: + slice_del(z); /* delete, line 161 */ + break; + } + lab0: + ; + } + return 1; +} + +static int r_reflexive(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 168 */ + among_var = find_among_b(z, a_3, 2); /* substring, line 168 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 168 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 171 */ + break; + } + return 1; +} + +static int r_verb(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 176 */ + among_var = find_among_b(z, a_4, 46); /* substring, line 176 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 176 */ + switch(among_var) { + case 0: return 0; + case 1: + { int m = z->l - z->c; /* or, line 182 */ + if (!(eq_s_b(z, 1, s_4))) goto lab1; + goto lab0; + lab1: + z->c = z->l - m; + if (!(eq_s_b(z, 1, s_5))) return 0; + } + lab0: + slice_del(z); /* delete, line 182 */ + break; + case 2: + slice_del(z); /* delete, line 190 */ + break; + } + return 1; +} + +static int r_noun(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 199 */ + among_var = find_among_b(z, a_5, 36); /* substring, line 199 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 199 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 206 */ + break; + } + return 1; +} + +static int r_derivational(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 215 */ + among_var = find_among_b(z, a_6, 2); /* substring, line 215 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 215 */ + if (!r_R2(z)) return 0; /* call R2, line 215 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 218 */ + break; + } + return 1; +} + +static int r_tidy_up(struct SN_env * z) { + int among_var; + z->ket = z->c; /* [, line 223 */ + among_var = find_among_b(z, a_7, 4); /* substring, line 223 */ + if (!(among_var)) return 0; + z->bra = z->c; /* ], line 223 */ + switch(among_var) { + case 0: return 0; + case 1: + slice_del(z); /* delete, line 227 */ + z->ket = z->c; /* [, line 228 */ + if (!(eq_s_b(z, 1, s_6))) return 0; + z->bra = z->c; /* ], line 228 */ + if (!(eq_s_b(z, 1, s_7))) return 0; + slice_del(z); /* delete, line 228 */ + break; + case 2: + if (!(eq_s_b(z, 1, s_8))) return 0; + slice_del(z); /* delete, line 231 */ + break; + case 3: + slice_del(z); /* delete, line 233 */ + break; + } + return 1; +} + +extern int russian_stem(struct SN_env * z) { + { int c = z->c; /* do, line 240 */ + if (!r_mark_regions(z)) goto lab0; /* call mark_regions, line 240 */ + lab0: + z->c = c; + } + z->lb = z->c; z->c = z->l; /* backwards, line 241 */ + + { int m = z->l - z->c; /* setlimit, line 241 */ + int m3; + if (z->c < z->I[0]) return 0; + z->c = z->I[0]; /* tomark, line 241 */ + m3 = z->lb; z->lb = z->c; + z->c = z->l - m; + { int m = z->l - z->c; /* do, line 242 */ + { int m = z->l - z->c; /* or, line 243 */ + if (!r_perfective_gerund(z)) goto lab3; /* call perfective_gerund, line 243 */ + goto lab2; + lab3: + z->c = z->l - m; + { int m = z->l - z->c; /* try, line 244 */ + if (!r_reflexive(z)) { z->c = z->l - m; goto lab4; } /* call reflexive, line 244 */ + lab4: + ; + } + { int m = z->l - z->c; /* or, line 245 */ + if (!r_adjectival(z)) goto lab6; /* call adjectival, line 245 */ + goto lab5; + lab6: + z->c = z->l - m; + if (!r_verb(z)) goto lab7; /* call verb, line 245 */ + goto lab5; + lab7: + z->c = z->l - m; + if (!r_noun(z)) goto lab1; /* call noun, line 245 */ + } + lab5: + ; + } + lab2: + lab1: + z->c = z->l - m; + } + { int m = z->l - z->c; /* try, line 248 */ + z->ket = z->c; /* [, line 248 */ + if (!(eq_s_b(z, 1, s_9))) { z->c = z->l - m; goto lab8; } + z->bra = z->c; /* ], line 248 */ + slice_del(z); /* delete, line 248 */ + lab8: + ; + } + { int m = z->l - z->c; /* do, line 251 */ + if (!r_derivational(z)) goto lab9; /* call derivational, line 251 */ + lab9: + z->c = z->l - m; + } + { int m = z->l - z->c; /* do, line 252 */ + if (!r_tidy_up(z)) goto lab10; /* call tidy_up, line 252 */ + lab10: + z->c = z->l - m; + } + z->lb = m3; + } + z->c = z->lb; + return 1; +} + +extern struct SN_env * russian_create_env(void) { return SN_create_env(0, 2, 0); } + +extern void russian_close_env(struct SN_env * z) { SN_close_env(z); } + diff --git a/contrib/tsearch2/snowball/russian_stem.h b/contrib/tsearch2/snowball/russian_stem.h new file mode 100644 index 0000000000..7dc26d45c8 --- /dev/null +++ b/contrib/tsearch2/snowball/russian_stem.h @@ -0,0 +1,8 @@ + +/* This file was generated automatically by the Snowball to ANSI C compiler */ + +extern struct SN_env * russian_create_env(void); +extern void russian_close_env(struct SN_env * z); + +extern int russian_stem(struct SN_env * z); + diff --git a/contrib/tsearch2/snowball/utilities.c b/contrib/tsearch2/snowball/utilities.c new file mode 100644 index 0000000000..5dc752445b --- /dev/null +++ b/contrib/tsearch2/snowball/utilities.c @@ -0,0 +1,328 @@ + +#include +#include +#include + +#include "header.h" + +#define unless(C) if(!(C)) + +#define CREATE_SIZE 1 + +extern symbol * create_s(void) +{ symbol * p = (symbol *) (HEAD + (char *) malloc(HEAD + (CREATE_SIZE + 1) * sizeof(symbol))); + CAPACITY(p) = CREATE_SIZE; + SET_SIZE(p, CREATE_SIZE); + return p; +} + +extern void lose_s(symbol * p) { free((char *) p - HEAD); } + +extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max) +{ if (z->c >= z->l) return 0; + { int ch = z->p[z->c]; + if + (ch > max || (ch -= min) < 0 || + (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; + } + z->c++; return 1; +} + +extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max) +{ if (z->c <= z->lb) return 0; + { int ch = z->p[z->c - 1]; + if + (ch > max || (ch -= min) < 0 || + (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; + } + z->c--; return 1; +} + +extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max) +{ if (z->c >= z->l) return 0; + { int ch = z->p[z->c]; + unless + (ch > max || (ch -= min) < 0 || + (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; + } + z->c++; return 1; +} + +extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max) +{ if (z->c <= z->lb) return 0; + { int ch = z->p[z->c - 1]; + unless + (ch > max || (ch -= min) < 0 || + (s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return 0; + } + z->c--; return 1; +} + + +extern int in_range(struct SN_env * z, int min, int max) +{ if (z->c >= z->l) return 0; + { int ch = z->p[z->c]; + if + (ch > max || ch < min) return 0; + } + z->c++; return 1; +} + +extern int in_range_b(struct SN_env * z, int min, int max) +{ if (z->c <= z->lb) return 0; + { int ch = z->p[z->c - 1]; + if + (ch > max || ch < min) return 0; + } + z->c--; return 1; +} + +extern int out_range(struct SN_env * z, int min, int max) +{ if (z->c >= z->l) return 0; + { int ch = z->p[z->c]; + unless + (ch > max || ch < min) return 0; + } + z->c++; return 1; +} + +extern int out_range_b(struct SN_env * z, int min, int max) +{ if (z->c <= z->lb) return 0; + { int ch = z->p[z->c - 1]; + unless + (ch > max || ch < min) return 0; + } + z->c--; return 1; +} + +extern int eq_s(struct SN_env * z, int s_size, symbol * s) +{ if (z->l - z->c < s_size || + memcmp(z->p + z->c, s, s_size * sizeof(symbol)) != 0) return 0; + z->c += s_size; return 1; +} + +extern int eq_s_b(struct SN_env * z, int s_size, symbol * s) +{ if (z->c - z->lb < s_size || + memcmp(z->p + z->c - s_size, s, s_size * sizeof(symbol)) != 0) return 0; + z->c -= s_size; return 1; +} + +extern int eq_v(struct SN_env * z, symbol * p) +{ return eq_s(z, SIZE(p), p); +} + +extern int eq_v_b(struct SN_env * z, symbol * p) +{ return eq_s_b(z, SIZE(p), p); +} + +extern int find_among(struct SN_env * z, struct among * v, int v_size) +{ + int i = 0; + int j = v_size; + + int c = z->c; int l = z->l; + symbol * q = z->p + c; + + struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) + { int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; /* smaller */ + w = v + k; + { int i; for (i = common; i < w->s_size; i++) + { if (c + common == l) { diff = -1; break; } + diff = q[common] - w->s[i]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) + { if (i > 0) break; /* v->s has been inspected */ + if (j == i) break; /* only one item in v */ + + /* - but now we need to go round once more to get + v->s inspected. This looks messy, but is actually + the optimal approach. */ + + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) + { w = v + i; + if (common_i >= w->s_size) + { z->c = c + w->s_size; + if (w->function == 0) return w->result; + { int res = w->function(z); + z->c = c + w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + +/* find_among_b is for backwards processing. Same comments apply */ + +extern int find_among_b(struct SN_env * z, struct among * v, int v_size) +{ + int i = 0; + int j = v_size; + + int c = z->c; int lb = z->lb; + symbol * q = z->p + c - 1; + + struct among * w; + + int common_i = 0; + int common_j = 0; + + int first_key_inspected = 0; + + while(1) + { int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; + w = v + k; + { int i; for (i = w->s_size - 1 - common; i >= 0; i--) + { if (c - common == lb) { diff = -1; break; } + diff = q[- common] - w->s[i]; + if (diff != 0) break; + common++; + } + } + if (diff < 0) { j = k; common_j = common; } + else { i = k; common_i = common; } + if (j - i <= 1) + { if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = 1; + } + } + while(1) + { w = v + i; + if (common_i >= w->s_size) + { z->c = c - w->s_size; + if (w->function == 0) return w->result; + { int res = w->function(z); + z->c = c - w->s_size; + if (res) return w->result; + } + } + i = w->substring_i; + if (i < 0) return 0; + } +} + + +extern symbol * increase_size(symbol * p, int n) +{ int new_size = n + 20; + symbol * q = (symbol *) (HEAD + (char *) malloc(HEAD + (new_size + 1) * sizeof(symbol))); + CAPACITY(q) = new_size; + memmove(q, p, CAPACITY(p) * sizeof(symbol)); lose_s(p); return q; +} + +/* to replace symbols between c_bra and c_ket in z->p by the + s_size symbols at s +*/ + +extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s) +{ int adjustment = s_size - (c_ket - c_bra); + int len = SIZE(z->p); + if (adjustment != 0) + { if (adjustment + len > CAPACITY(z->p)) z->p = increase_size(z->p, adjustment + len); + memmove(z->p + c_ket + adjustment, z->p + c_ket, (len - c_ket) * sizeof(symbol)); + SET_SIZE(z->p, adjustment + len); + z->l += adjustment; + if (z->c >= c_ket) z->c += adjustment; else + if (z->c > c_bra) z->c = c_bra; + } + unless (s_size == 0) memmove(z->p + c_bra, s, s_size * sizeof(symbol)); + return adjustment; +} + +static void slice_check(struct SN_env * z) +{ + if (!(0 <= z->bra && + z->bra <= z->ket && + z->ket <= z->l && + z->l <= SIZE(z->p))) /* this line could be removed */ + { + fprintf(stderr, "faulty slice operation:\n"); + debug(z, -1, 0); + exit(1); + } +} + +extern void slice_from_s(struct SN_env * z, int s_size, symbol * s) +{ slice_check(z); + replace_s(z, z->bra, z->ket, s_size, s); +} + +extern void slice_from_v(struct SN_env * z, symbol * p) +{ slice_from_s(z, SIZE(p), p); +} + +extern void slice_del(struct SN_env * z) +{ slice_from_s(z, 0, 0); +} + +extern void insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s) +{ int adjustment = replace_s(z, bra, ket, s_size, s); + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; +} + +extern void insert_v(struct SN_env * z, int bra, int ket, symbol * p) +{ int adjustment = replace_s(z, bra, ket, SIZE(p), p); + if (bra <= z->bra) z->bra += adjustment; + if (bra <= z->ket) z->ket += adjustment; +} + +extern symbol * slice_to(struct SN_env * z, symbol * p) +{ slice_check(z); + { int len = z->ket - z->bra; + if (CAPACITY(p) < len) p = increase_size(p, len); + memmove(p, z->p + z->bra, len * sizeof(symbol)); + SET_SIZE(p, len); + } + return p; +} + +extern symbol * assign_to(struct SN_env * z, symbol * p) +{ int len = z->l; + if (CAPACITY(p) < len) p = increase_size(p, len); + memmove(p, z->p, len * sizeof(symbol)); + SET_SIZE(p, len); + return p; +} + +extern void debug(struct SN_env * z, int number, int line_count) +{ int i; + int limit = SIZE(z->p); + /*if (number >= 0) printf("%3d (line %4d): '", number, line_count);*/ + if (number >= 0) printf("%3d (line %4d): [%d]'", number, line_count,limit); + for (i = 0; i <= limit; i++) + { if (z->lb == i) printf("{"); + if (z->bra == i) printf("["); + if (z->c == i) printf("|"); + if (z->ket == i) printf("]"); + if (z->l == i) printf("}"); + if (i < limit) + { int ch = z->p[i]; + if (ch == 0) ch = '#'; + printf("%c", ch); + } + } + printf("'\n"); +} diff --git a/contrib/tsearch2/sql/tsearch2.sql b/contrib/tsearch2/sql/tsearch2.sql new file mode 100644 index 0000000000..6ca6480cc4 --- /dev/null +++ b/contrib/tsearch2/sql/tsearch2.sql @@ -0,0 +1,243 @@ +-- +-- first, define the datatype. Turn off echoing so that expected file +-- does not depend on contents of seg.sql. +-- +\set ECHO none +\i tsearch2.sql +\set ECHO all + +--tsvector +SELECT '1'::tsvector; +SELECT '1 '::tsvector; +SELECT ' 1'::tsvector; +SELECT ' 1 '::tsvector; +SELECT '1 2'::tsvector; +SELECT '\'1 2\''::tsvector; +SELECT '\'1 \\\'2\''::tsvector; +SELECT '\'1 \\\'2\'3'::tsvector; +SELECT '\'1 \\\'2\' 3'::tsvector; +SELECT '\'1 \\\'2\' \' 3\' 4 '::tsvector; +select '\'w\':4A,3B,2C,1D,5 a:8'; +select 'a:3A b:2a'::tsvector || 'ba:1234 a:1B'; +select setweight('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd zxc:81,567,222A'::tsvector, 'c'); +select strip('w:12B w:13* w:12,5,6 a:1,3* a:3 w asd:1dc asd'::tsvector); + + +--tsquery +SELECT '1'::tsquery; +SELECT '1 '::tsquery; +SELECT ' 1'::tsquery; +SELECT ' 1 '::tsquery; +SELECT '\'1 2\''::tsquery; +SELECT '\'1 \\\'2\''::tsquery; +SELECT '!1'::tsquery; +SELECT '1|2'::tsquery; +SELECT '1|!2'::tsquery; +SELECT '!1|2'::tsquery; +SELECT '!1|!2'::tsquery; +SELECT '!(!1|!2)'::tsquery; +SELECT '!(!1|2)'::tsquery; +SELECT '!(1|!2)'::tsquery; +SELECT '!(1|2)'::tsquery; +SELECT '1&2'::tsquery; +SELECT '!1&2'::tsquery; +SELECT '1&!2'::tsquery; +SELECT '!1&!2'::tsquery; +SELECT '(1&2)'::tsquery; +SELECT '1&(2)'::tsquery; +SELECT '!(1)&2'::tsquery; +SELECT '!(1&2)'::tsquery; +SELECT '1|2&3'::tsquery; +SELECT '1|(2&3)'::tsquery; +SELECT '(1|2)&3'::tsquery; +SELECT '1|2&!3'::tsquery; +SELECT '1|!2&3'::tsquery; +SELECT '!1|2&3'::tsquery; +SELECT '!1|(2&3)'::tsquery; +SELECT '!(1|2)&3'::tsquery; +SELECT '(!1|2)&3'::tsquery; +SELECT '1|(2|(4|(5|6)))'::tsquery; +SELECT '1|2|4|5|6'::tsquery; +SELECT '1&(2&(4&(5&6)))'::tsquery; +SELECT '1&2&4&5&6'::tsquery; +SELECT '1&(2&(4&(5|6)))'::tsquery; +SELECT '1&(2&(4&(5|!6)))'::tsquery; +SELECT '1&(\'2\'&(\' 4\'&(\\|5 | \'6 \\\' !|&\')))'::tsquery; +SELECT '\'the wether\':dc & \' sKies \':BC & a:d b:a'; + +select lexize('simple', 'ASD56 hsdkf'); +select lexize('en_stem', 'SKIES Problems identity'); + +select * from token_type('default'); +select * from parse('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty'); + +SELECT to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty'); + +SELECT length(to_tsvector('default', '345 qw')); + +SELECT length(to_tsvector('default', '345 qwe@efd.r \' http://www.com/ http://aew.werc.ewr/?ad=qwe&dw 1aew.werc.ewr/?ad=qwe&dw 2aew.werc.ewr http://3aew.werc.ewr/?ad=qwe&dw http://4aew.werc.ewr http://5aew.werc.ewr:8100/? ad=qwe&dw 6aew.werc.ewr:8100/?ad=qwe&dw 7aew.werc.ewr:8100/?ad=qwe&dw=%20%32 +4.0e-10 qwe qwe qwqwe 234.435 455 5.005 teodor@stack.net qwe-wer asdf qwer jf sdjk ewr1> ewri2 +/usr/local/fff /awdf/dwqe/4325 rewt/ewr wefjn /wqe-324/ewr gist.h gist.h.c gist.c. readline 4.2 4.2. 4.2, readline-4.2 readline-4.2. 234 + wow < jqw <> qwerty')); + + +select to_tsquery('default', 'qwe & sKies '); +select to_tsquery('simple', 'qwe & sKies '); +select to_tsquery('default', '\'the wether\':dc & \' sKies \':BC '); +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca'; +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:B'; +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:A'; +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:C'; +select 'a b:89 ca:23A,64b d:34c'::tsvector @@ 'd:AC & ca:CB'; + +CREATE TABLE test_tsvector( t text, a tsvector ); + +\copy test_tsvector from 'data/test_tsearch.data' + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; + +create index wowidx on test_tsvector using gist (a); +set enable_seqscan=off; + +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr|qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'wr&qh'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq&yt'; +SELECT count(*) FROM test_tsvector WHERE a @@ 'eq|yt'; +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq&yt)|(wr&qh)'; +SELECT count(*) FROM test_tsvector WHERE a @@ '(eq|yt)&(wr|qh)'; + +select set_curcfg('default'); + +CREATE TRIGGER tsvectorupdate +BEFORE UPDATE OR INSERT ON test_tsvector +FOR EACH ROW EXECUTE PROCEDURE tsearch2(a, t); + +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + +INSERT INTO test_tsvector (t) VALUES ('345 qwerty'); + +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + +UPDATE test_tsvector SET t = null WHERE t = '345 qwerty'; + +SELECT count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); + +drop trigger tsvectorupdate on test_tsvector; +create function wow(text) returns text as 'select $1 || \' copyright\'; ' language sql; +create trigger tsvectorupdate before update or insert on test_tsvector +for each row execute procedure tsearch2(a, wow, t); +insert into test_tsvector (t) values ('345 qwerty'); +select count(*) FROM test_tsvector WHERE a @@ to_tsquery('345&qwerty'); +select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright'); + +select rank(' a:1 s:2C d g'::tsvector, 'a | s'); +select rank(' a:1 s:2B d g'::tsvector, 'a | s'); +select rank(' a:1 s:2 d g'::tsvector, 'a | s'); +select rank(' a:1 s:2C d g'::tsvector, 'a & s'); +select rank(' a:1 s:2B d g'::tsvector, 'a & s'); +select rank(' a:1 s:2 d g'::tsvector, 'a & s'); + +insert into test_tsvector (t) values ('foo bar foo the over foo qq bar'); +select * from stat('select a from test_tsvector') order by ndoc desc, nentry desc, word; + +select reset_tsearch(); +select to_tsquery('default', 'skies & books'); + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea&thousand&years')); + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('granite&sea')); + +select rank_cd(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea')); + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea&thousand&years')); + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('granite&sea')); + +select get_covers(to_tsvector('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +'), to_tsquery('sea')); + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('sea&thousand&years')); + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('granite&sea')); + +select headline('Erosion It took the sea a thousand years, +A thousand years to trace +The granite features of this cliff +In crag and scarp and base. +It took the sea an hour one night +An hour of storm to place +The sculpture of these granite seams, +Upon a woman s face. E. J. Pratt (1882 1964) +', to_tsquery('sea')); + diff --git a/contrib/tsearch2/stopword.c b/contrib/tsearch2/stopword.c new file mode 100644 index 0000000000..7f7806fa2e --- /dev/null +++ b/contrib/tsearch2/stopword.c @@ -0,0 +1,101 @@ +/* + * stopword library + * Teodor Sigaev + */ +#include +#include +#include +#include + +#include "postgres.h" +#include "common.h" +#include "dict.h" + +#define STOPBUFLEN 4096 + +char* +lowerstr(char *str) { + char *ptr=str; + while(*ptr) { + *ptr = tolower(*(unsigned char*)ptr); + ptr++; + } + return str; +} + +void +freestoplist(StopList *s) { + char **ptr=s->stop; + if ( ptr ) + while( *ptr && s->len >0 ) { + free(*ptr); + ptr++; s->len--; + free(s->stop); + } + memset(s,0,sizeof(StopList)); +} + +void +readstoplist(text *in, StopList *s) { + char **stop=NULL; + s->len=0; + if ( in && VARSIZE(in) - VARHDRSZ > 0 ) { + char *filename=text2char(in); + FILE *hin=NULL; + char buf[STOPBUFLEN]; + int reallen=0; + + if ( (hin=fopen(filename,"r")) == NULL ) + elog(ERROR,"Can't open file '%s': %s", filename, strerror(errno)); + while( fgets(buf,STOPBUFLEN,hin) ) { + buf[strlen(buf)-1] = '\0'; + if ( *buf=='\0' ) continue; + + if ( s->len>= reallen ) { + char **tmp; + reallen=(reallen) ? reallen*2 : 16; + tmp=(char**)realloc((void*)stop, sizeof(char*)*reallen); + if (!tmp) { + freestoplist(s); + fclose(hin); + elog(ERROR,"Not enough memory"); + } + stop=tmp; + } + + stop[s->len]=strdup(buf); + if ( !stop[s->len] ) { + freestoplist(s); + fclose(hin); + elog(ERROR,"Not enough memory"); + } + if ( s->wordop ) + stop[s->len]=(s->wordop)(stop[s->len]); + + (s->len)++; + } + fclose(hin); + pfree(filename); + } + s->stop=stop; +} + +static int +comparestr(const void *a, const void *b) { + return strcmp( *(char**)a, *(char**)b ); +} + +void +sortstoplist(StopList *s) { + if (s->stop && s->len>0) + qsort(s->stop, s->len, sizeof(char*), comparestr); +} + +bool +searchstoplist(StopList *s, char *key) { + if ( s->wordop ) + key=(*(s->wordop))(key); + return ( s->stop && s->len>0 && bsearch(&key, s->stop, s->len, sizeof(char*), comparestr) ) ? true : false; +} + + diff --git a/contrib/tsearch2/stopword/english.stop b/contrib/tsearch2/stopword/english.stop new file mode 100644 index 0000000000..a9130116d3 --- /dev/null +++ b/contrib/tsearch2/stopword/english.stop @@ -0,0 +1,128 @@ +i +me +my +myself +we +our +ours +ourselves +you +your +yours +yourself +yourselves +he +him +his +himself +she +her +hers +herself +it +its +itself +they +them +their +theirs +themselves +what +which +who +whom +this +that +these +those +am +is +are +was +were +be +been +being +have +has +had +having +do +does +did +doing +a +an +the +and +but +if +or +because +as +until +while +of +at +by +for +with +about +against +between +into +through +during +before +after +above +below +to +from +up +down +in +out +on +off +over +under +again +further +then +once +here +there +when +where +why +how +all +any +both +each +few +more +most +other +some +such +no +nor +not +only +own +same +so +than +too +very +s +t +can +will +just +don +should +now + diff --git a/contrib/tsearch2/stopword/russian.stop b/contrib/tsearch2/stopword/russian.stop new file mode 100644 index 0000000000..1877e3ab5b --- /dev/null +++ b/contrib/tsearch2/stopword/russian.stop @@ -0,0 +1,151 @@ +É +× +×Ï +ÎÅ +ÞÔÏ +ÏÎ +ÎÁ +Ñ +Ó +ÓÏ +ËÁË +Á +ÔÏ +×ÓÅ +ÏÎÁ +ÔÁË +ÅÇÏ +ÎÏ +ÄÁ +ÔÙ +Ë +Õ +ÖÅ +×Ù +ÚÁ +ÂÙ +ÐÏ +ÔÏÌØËÏ +ÅÅ +ÍÎÅ +ÂÙÌÏ +×ÏÔ +ÏÔ +ÍÅÎÑ +ÅÝÅ +ÎÅÔ +Ï +ÉÚ +ÅÍÕ +ÔÅÐÅÒØ +ËÏÇÄÁ +ÄÁÖÅ +ÎÕ +×ÄÒÕÇ +ÌÉ +ÅÓÌÉ +ÕÖÅ +ÉÌÉ +ÎÉ +ÂÙÔØ +ÂÙÌ +ÎÅÇÏ +ÄÏ +×ÁÓ +ÎÉÂÕÄØ +ÏÐÑÔØ +ÕÖ +×ÁÍ +×ÅÄØ +ÔÁÍ +ÐÏÔÏÍ +ÓÅÂÑ +ÎÉÞÅÇÏ +ÅÊ +ÍÏÖÅÔ +ÏÎÉ +ÔÕÔ +ÇÄÅ +ÅÓÔØ +ÎÁÄÏ +ÎÅÊ +ÄÌÑ +ÍÙ +ÔÅÂÑ +ÉÈ +ÞÅÍ +ÂÙÌÁ +ÓÁÍ +ÞÔÏ +ÂÅÚ +ÂÕÄÔÏ +ÞÅÇÏ +ÒÁÚ +ÔÏÖÅ +ÓÅÂÅ +ÐÏÄ +ÂÕÄÅÔ +Ö +ÔÏÇÄÁ +ËÔÏ +ÜÔÏÔ +ÔÏÇÏ +ÐÏÔÏÍÕ +ÜÔÏÇÏ +ËÁËÏÊ +ÓÏ×ÓÅÍ +ÎÉÍ +ÚÄÅÓØ +ÜÔÏÍ +ÏÄÉÎ +ÐÏÞÔÉ +ÍÏÊ +ÔÅÍ +ÞÔÏÂÙ +ÎÅÅ +ÓÅÊÞÁÓ +ÂÙÌÉ +ËÕÄÁ +ÚÁÞÅÍ +×ÓÅÈ +ÎÉËÏÇÄÁ +ÍÏÖÎÏ +ÐÒÉ +ÎÁËÏÎÅà +Ä×Á +Ï +ÄÒÕÇÏÊ +ÈÏÔØ +ÐÏÓÌÅ +ÎÁÄ +ÂÏÌØÛÅ +ÔÏÔ +ÞÅÒÅÚ +ÜÔÉ +ÎÁÓ +ÐÒÏ +×ÓÅÇÏ +ÎÉÈ +ËÁËÁÑ +ÍÎÏÇÏ +ÒÁÚ×Å +ÔÒÉ +ÜÔÕ +ÍÏÑ +×ÐÒÏÞÅÍ +ÈÏÒÏÛÏ +Ó×ÏÀ +ÜÔÏÊ +ÐÅÒÅÄ +ÉÎÏÇÄÁ +ÌÕÞÛÅ +ÞÕÔØ +ÔÏÍ +ÎÅÌØÚÑ +ÔÁËÏÊ +ÉÍ +ÂÏÌÅÅ +×ÓÅÇÄÁ +ËÏÎÅÞÎÏ +×ÓÀ +ÍÅÖÄÕ diff --git a/contrib/tsearch2/ts_cfg.c b/contrib/tsearch2/ts_cfg.c new file mode 100644 index 0000000000..7c9f20c8de --- /dev/null +++ b/contrib/tsearch2/ts_cfg.c @@ -0,0 +1,509 @@ +/* + * interface functions to tscfg + * Teodor Sigaev + */ +#include +#include +#include +#include +#include + +#include "postgres.h" +#include "fmgr.h" +#include "utils/array.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" + +#include "ts_cfg.h" +#include "dict.h" +#include "wparser.h" +#include "snmap.h" +#include "common.h" +#include "tsvector.h" + +/*********top interface**********/ + +static void *plan_getcfg_bylocale=NULL; +static void *plan_getcfg=NULL; +static void *plan_getmap=NULL; +static void *plan_name2id=NULL; +static Oid current_cfg_id=0; + +void +init_cfg(Oid id, TSCfgInfo *cfg) { + Oid arg[2]={ OIDOID, OIDOID }; + bool isnull; + Datum pars[2]={ ObjectIdGetDatum(id), ObjectIdGetDatum(id) } ; + int stat,i,j; + text *ptr; + text *prsname=NULL; + MemoryContext oldcontext; + + memset(cfg,0,sizeof(TSCfgInfo)); + SPI_connect(); + if ( !plan_getcfg ) { + plan_getcfg = SPI_saveplan( SPI_prepare( "select prs_name from pg_ts_cfg where oid = $1" , 1, arg ) ); + if ( !plan_getcfg ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_getcfg, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + prsname = (text*) DatumGetPointer( + SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) + ); + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + prsname = ptextdup( prsname ); + MemoryContextSwitchTo(oldcontext); + + cfg->id=id; + } else + ts_error(ERROR, "No tsearch cfg with id %d", id); + + arg[0]=TEXTOID; + if ( !plan_getmap ) { + plan_getmap = SPI_saveplan( SPI_prepare( "select lt.tokid, pg_ts_cfgmap.dict_name from pg_ts_cfgmap, pg_ts_cfg, token_type( $1 ) as lt where lt.alias = pg_ts_cfgmap.tok_alias and pg_ts_cfgmap.ts_name = pg_ts_cfg.ts_name and pg_ts_cfg.oid= $2 order by lt.tokid desc;" , 2, arg ) ); + if ( !plan_getmap ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + pars[0]=PointerGetDatum( prsname ); + stat = SPI_execp(plan_getmap, pars, " ", 0); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed <= 0 ) + ts_error(ERROR, "No parser with id %d", id); + + for(i=0;ivals[i], SPI_tuptable->tupdesc, 1, &isnull)); + ArrayType *toasted_a = (ArrayType*)PointerGetDatum(SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull)); + ArrayType *a; + + if ( !cfg->map ) { + cfg->len=lexid+1; + cfg->map = (ListDictionary*)malloc( sizeof(ListDictionary)*cfg->len ); + if ( !cfg->map ) + ts_error(ERROR,"No memory"); + memset( cfg->map, 0, sizeof(ListDictionary)*cfg->len ); + } + + if (isnull) + continue; + + a=(ArrayType*)PointerGetDatum( PG_DETOAST_DATUM( DatumGetPointer(toasted_a) ) ); + + if ( ARR_NDIM(a) != 1 ) + ts_error(ERROR,"Wrong dimension"); + if ( ARRNELEMS(a) < 1 ) + continue; + + cfg->map[lexid].len=ARRNELEMS(a); + cfg->map[lexid].dict_id=(Datum*)malloc( sizeof(Datum)*cfg->map[lexid].len ); + memset(cfg->map[lexid].dict_id,0,sizeof(Datum)*cfg->map[lexid].len ); + ptr=(text*)ARR_DATA_PTR(a); + oldcontext = MemoryContextSwitchTo(TopMemoryContext); + for(j=0;jmap[lexid].len;j++) { + cfg->map[lexid].dict_id[j] = PointerGetDatum(ptextdup(ptr)); + ptr=NEXTVAL(ptr); + } + MemoryContextSwitchTo(oldcontext); + + if ( a != toasted_a ) + pfree(a); + } + + SPI_finish(); + cfg->prs_id = name2id_prs( prsname ); + pfree(prsname); + for(i=0;ilen;i++) { + for(j=0;jmap[i].len;j++) { + ptr = (text*)DatumGetPointer( cfg->map[i].dict_id[j] ); + cfg->map[i].dict_id[j] = ObjectIdGetDatum( name2id_dict(ptr) ); + pfree(ptr); + } + } +} + +typedef struct { + TSCfgInfo *last_cfg; + int len; + int reallen; + TSCfgInfo *list; + SNMap name2id_map; +} CFGList; + +static CFGList CList = {NULL,0,0,NULL,{0,0,NULL}}; + +void +reset_cfg(void) { + freeSNMap( &(CList.name2id_map) ); + if ( CList.list ) { + int i,j; + for(i=0;iid - ((TSCfgInfo*)b)->id; +} + +TSCfgInfo * +findcfg(Oid id) { + /* last used cfg */ + if ( CList.last_cfg && CList.last_cfg->id==id ) + return CList.last_cfg; + + /* already used cfg */ + if ( CList.len != 0 ) { + TSCfgInfo key; + key.id=id; + CList.last_cfg = bsearch(&key, CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); + if ( CList.last_cfg != NULL ) + return CList.last_cfg; + } + + /* last chance */ + if ( CList.len==CList.reallen ) { + TSCfgInfo *tmp; + int reallen = ( CList.reallen ) ? 2*CList.reallen : 16; + tmp=(TSCfgInfo*)realloc(CList.list,sizeof(TSCfgInfo)*reallen); + if ( !tmp ) + ts_error(ERROR,"No memory"); + CList.reallen=reallen; + CList.list=tmp; + } + CList.last_cfg=&(CList.list[CList.len]); + init_cfg(id, CList.last_cfg); + CList.len++; + qsort(CList.list, CList.len, sizeof(TSCfgInfo), comparecfg); + return findcfg(id); /* qsort changed order!! */; +} + + +Oid +name2id_cfg(text *name) { + Oid arg[1]={ TEXTOID }; + bool isnull; + Datum pars[1]={ PointerGetDatum(name) }; + int stat; + Oid id=findSNMap_t( &(CList.name2id_map), name ); + + if ( id ) + return id; + + SPI_connect(); + if ( !plan_name2id ) { + plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where ts_name = $1" , 1, arg ) ); + if ( !plan_name2id ) + elog(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_name2id, pars, " ", 1); + if ( stat < 0 ) + elog (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + if ( isnull ) + elog(ERROR, "Null id for tsearch config"); + } else + elog(ERROR, "No tsearch config"); + SPI_finish(); + addSNMap_t( &(CList.name2id_map), name, id ); + return id; +} + + +void +parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen) { + int type, lenlemm, i; + char *lemm=NULL; + WParserInfo *prsobj = findprs(cfg->prs_id); + + prsobj->prs=(void*)DatumGetPointer( + FunctionCall2( + &(prsobj->start_info), + PointerGetDatum(buf), + Int32GetDatum(buflen) + ) + ); + + while( ( type=DatumGetInt32(FunctionCall3( + &(prsobj->getlexeme_info), + PointerGetDatum(prsobj->prs), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))) ) != 0 ) { + + if ( lenlemm >= MAXSTRLEN ) + elog(ERROR, "Word is too long"); + + + if ( type >= cfg->len ) /* skip this type of lexem */ + continue; + + for(i=0;imap[type].len;i++) { + DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) ); + char **norms, **ptr; + + norms = ptr = (char**)DatumGetPointer( + FunctionCall3( + &(dict->lexize_info), + PointerGetDatum(dict->dictionary), + PointerGetDatum(lemm), + PointerGetDatum(lenlemm) + ) + ); + if ( !norms ) /* dictionary doesn't know this lexem */ + continue; + + prs->pos++; /*set pos*/ + + while( *ptr ) { + if (prs->curwords == prs->lenwords) { + prs->lenwords *= 2; + prs->words = (WORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(WORD)); + } + + prs->words[prs->curwords].len = strlen(*ptr); + prs->words[prs->curwords].word = *ptr; + prs->words[prs->curwords].alen = 0; + prs->words[prs->curwords].pos.pos = LIMITPOS(prs->pos); + ptr++; + prs->curwords++; + } + pfree(norms); + break; /* lexem already normalized or is stop word*/ + } + } + + FunctionCall1( + &(prsobj->end_info), + PointerGetDatum(prsobj->prs) + ); +} + +static void +hladdword(HLPRSTEXT * prs, char *buf, int4 buflen, int type) { + while (prs->curwords >= prs->lenwords) { + prs->lenwords *= 2; + prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); + } + memset( &(prs->words[prs->curwords]), 0, sizeof(HLWORD) ); + prs->words[prs->curwords].type = (uint8)type; + prs->words[prs->curwords].len = buflen; + prs->words[prs->curwords].word = palloc(buflen); + memcpy(prs->words[prs->curwords].word, buf, buflen); + prs->curwords++; +} + +static void +hlfinditem(HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int buflen ) { + int i; + ITEM *item=GETQUERY(query); + HLWORD *word=&( prs->words[prs->curwords-1] ); + + while (prs->curwords + query->size >= prs->lenwords) { + prs->lenwords *= 2; + prs->words = (HLWORD *) repalloc((void *) prs->words, prs->lenwords * sizeof(HLWORD)); + } + + for(i=0; isize; i++) { + if ( item->type == VAL && item->length == buflen && strncmp( GETOPERAND(query) + item->distance, buf, buflen )==0 ) { + if ( word->item ) { + memcpy( &(prs->words[prs->curwords]), word, sizeof(HLWORD) ); + prs->words[prs->curwords].item=item; + prs->words[prs->curwords].repeated=1; + prs->curwords++; + } else + word->item=item; + } + item++; + } +} + +void +hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen) { + int type, lenlemm, i; + char *lemm=NULL; + WParserInfo *prsobj = findprs(cfg->prs_id); + + prsobj->prs=(void*)DatumGetPointer( + FunctionCall2( + &(prsobj->start_info), + PointerGetDatum(buf), + Int32GetDatum(buflen) + ) + ); + + while( ( type=DatumGetInt32(FunctionCall3( + &(prsobj->getlexeme_info), + PointerGetDatum(prsobj->prs), + PointerGetDatum(&lemm), + PointerGetDatum(&lenlemm))) ) != 0 ) { + + if ( lenlemm >= MAXSTRLEN ) + elog(ERROR, "Word is too long"); + + hladdword(prs,lemm,lenlemm,type); + + if ( type >= cfg->len ) + continue; + + for(i=0;imap[type].len;i++) { + DictInfo *dict=finddict( DatumGetObjectId(cfg->map[type].dict_id[i]) ); + char **norms, **ptr; + + norms = ptr = (char**)DatumGetPointer( + FunctionCall3( + &(dict->lexize_info), + PointerGetDatum(dict->dictionary), + PointerGetDatum(lemm), + PointerGetDatum(lenlemm) + ) + ); + if ( !norms ) /* dictionary doesn't know this lexem */ + continue; + + while( *ptr ) { + hlfinditem(prs,query,*ptr,strlen(*ptr)); + pfree(*ptr); + ptr++; + } + pfree(norms); + break; /* lexem already normalized or is stop word*/ + } + } + + FunctionCall1( + &(prsobj->end_info), + PointerGetDatum(prsobj->prs) + ); +} + +text* +genhl(HLPRSTEXT * prs) { + text *out; + int len=128; + char *ptr; + HLWORD *wrd=prs->words; + + out = (text*)palloc( len ); + ptr=((char*)out) + VARHDRSZ; + + while( wrd - prs->words < prs->curwords ) { + while ( wrd->len + prs->stopsellen + prs->startsellen + (ptr - ((char*)out)) >= len ) { + int dist = ptr - ((char*)out); + len*= 2; + out = (text *) repalloc(out, len); + ptr=((char*)out) + dist; + } + + if ( wrd->in && !wrd->skip && !wrd->repeated ) { + if ( wrd->replace ) { + *ptr=' '; + ptr++; + } else { + if (wrd->selected) { + memcpy(ptr,prs->startsel,prs->startsellen); + ptr+=prs->startsellen; + } + memcpy(ptr,wrd->word,wrd->len); + ptr+=wrd->len; + if (wrd->selected) { + memcpy(ptr,prs->stopsel,prs->stopsellen); + ptr+=prs->stopsellen; + } + } + } + + if ( !wrd->repeated ) + pfree(wrd->word); + + wrd++; + } + + VARATT_SIZEP(out)=ptr - ((char*)out); + return out; +} + +int +get_currcfg(void) { + Oid arg[1]={ TEXTOID }; + const char *curlocale; + Datum pars[1]; + bool isnull; + int stat; + + if ( current_cfg_id > 0 ) + return current_cfg_id; + + SPI_connect(); + if ( !plan_getcfg_bylocale ) { + plan_getcfg_bylocale=SPI_saveplan( SPI_prepare( "select oid from pg_ts_cfg where locale = $1 ", 1, arg ) ); + if ( !plan_getcfg_bylocale ) + elog(ERROR, "SPI_prepare() failed"); + } + + curlocale = setlocale(LC_CTYPE, NULL); + pars[0] = PointerGetDatum( char2text((char*)curlocale) ); + stat = SPI_execp(plan_getcfg_bylocale, pars, " ", 1); + + if ( stat < 0 ) + elog (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) + current_cfg_id = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + else + elog(ERROR,"Can't find tsearch config by locale"); + + pfree(DatumGetPointer(pars[0])); + SPI_finish(); + return current_cfg_id; +} + +PG_FUNCTION_INFO_V1(set_curcfg); +Datum set_curcfg(PG_FUNCTION_ARGS); +Datum +set_curcfg(PG_FUNCTION_ARGS) { + findcfg(PG_GETARG_OID(0)); + current_cfg_id=PG_GETARG_OID(0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(set_curcfg_byname); +Datum set_curcfg_byname(PG_FUNCTION_ARGS); +Datum +set_curcfg_byname(PG_FUNCTION_ARGS) { + text *name=PG_GETARG_TEXT_P(0); + + DirectFunctionCall1( + set_curcfg, + ObjectIdGetDatum( name2id_cfg(name) ) + ); + PG_FREE_IF_COPY(name, 0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(show_curcfg); +Datum show_curcfg(PG_FUNCTION_ARGS); +Datum +show_curcfg(PG_FUNCTION_ARGS) { + PG_RETURN_OID( get_currcfg() ); +} + +PG_FUNCTION_INFO_V1(reset_tsearch); +Datum reset_tsearch(PG_FUNCTION_ARGS); +Datum +reset_tsearch(PG_FUNCTION_ARGS) { + ts_error(NOTICE,"TSearch cache cleaned"); + PG_RETURN_VOID(); +} diff --git a/contrib/tsearch2/ts_cfg.h b/contrib/tsearch2/ts_cfg.h new file mode 100644 index 0000000000..01006c1f93 --- /dev/null +++ b/contrib/tsearch2/ts_cfg.h @@ -0,0 +1,68 @@ +#ifndef __TS_CFG_H__ +#define __TS_CFG_H__ +#include "postgres.h" +#include "query.h" + +typedef struct { + int len; + Datum *dict_id; +} ListDictionary; + +typedef struct { + Oid id; + Oid prs_id; + int len; + ListDictionary *map; +} TSCfgInfo; + +Oid name2id_cfg(text *name); +TSCfgInfo * findcfg(Oid id); +void init_cfg(Oid id, TSCfgInfo *cfg); +void reset_cfg(void); + +typedef struct { + uint16 len; + union { + uint16 pos; + uint16 *apos; + } pos; + char *word; + uint32 alen; +} WORD; + +typedef struct { + WORD *words; + int4 lenwords; + int4 curwords; + int4 pos; +} PRSTEXT; + +typedef struct { + uint16 len; + uint8 selected:1, + in:1, + skip:1, + replace:1, + repeated:1; + uint8 type; + char *word; + ITEM *item; +} HLWORD; + +typedef struct { + HLWORD *words; + int4 lenwords; + int4 curwords; + char *startsel; + char *stopsel; + int2 startsellen; + int2 stopsellen; +} HLPRSTEXT; + +void hlparsetext(TSCfgInfo *cfg, HLPRSTEXT * prs, QUERYTYPE *query, char *buf, int4 buflen); +text* genhl(HLPRSTEXT * prs); + +void parsetext_v2(TSCfgInfo *cfg, PRSTEXT * prs, char *buf, int4 buflen); +int get_currcfg(void); + +#endif diff --git a/contrib/tsearch2/ts_stat.c b/contrib/tsearch2/ts_stat.c new file mode 100644 index 0000000000..9099981e5c --- /dev/null +++ b/contrib/tsearch2/ts_stat.c @@ -0,0 +1,412 @@ +/* + * stat functions + */ + +#include "tsvector.h" +#include "ts_stat.h" +#include "funcapi.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" +#include "common.h" + +PG_FUNCTION_INFO_V1(tsstat_in); +Datum tsstat_in(PG_FUNCTION_ARGS); +Datum +tsstat_in(PG_FUNCTION_ARGS) { + tsstat *stat=palloc(STATHDRSIZE); + stat->len=STATHDRSIZE; + stat->size=0; + PG_RETURN_POINTER(stat); +} + +PG_FUNCTION_INFO_V1(tsstat_out); +Datum tsstat_out(PG_FUNCTION_ARGS); +Datum +tsstat_out(PG_FUNCTION_ARGS) { + elog(ERROR,"Unimplemented"); + PG_RETURN_NULL(); +} + +static WordEntry** +SEI_realloc( WordEntry** in, uint32 *len ) { + if ( *len==0 || in==NULL ) { + *len=8; + in=palloc( sizeof(WordEntry*)* (*len) ); + } else { + *len *= 2; + in=repalloc( in, sizeof(WordEntry*)* (*len) ); + } + return in; +} + +static int +compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, tsvector *txt) { + if ( a->len == b->len ) + return strncmp( + STATSTRPTR(stat) + a->pos, + STRPTR(txt) + b->pos, + a->len + ); + return ( a->len > b->len ) ? 1 : -1; +} + +static tsstat* +formstat(tsstat *stat, tsvector *txt, WordEntry** entry, uint32 len) { + tsstat *newstat; + uint32 totallen, nentry; + uint32 slen=0; + WordEntry **ptr=entry; + char *curptr; + StatEntry *sptr,*nptr; + + while(ptr-entrylen; + ptr++; + } + + nentry=stat->size + len; + slen+=STATSTRSIZE(stat); + totallen=CALCSTATSIZE(nentry,slen); + newstat=palloc(totallen); + newstat->len=totallen; + newstat->size=nentry; + + memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat)); + curptr=STATSTRPTR(newstat) + STATSTRSIZE(stat); + + ptr=entry; + sptr=STATPTR(stat); + nptr=STATPTR(newstat); + + if ( len == 1 ) { + StatEntry *StopLow = STATPTR(stat); + StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat); + + while (StopLow < StopHigh) { + sptr=StopLow + (StopHigh - StopLow) / 2; + if ( compareStatWord(sptr,*ptr,stat,txt) < 0 ) + StopLow = sptr + 1; + else + StopHigh = sptr; + } + nptr =STATPTR(newstat) + (StopLow-STATPTR(stat)); + memcpy( STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow-STATPTR(stat)) ); + nptr->nentry=POSDATALEN(txt,*ptr); + if ( nptr->nentry==0 ) + nptr->nentry=1; + nptr->ndoc=1; + nptr->len=(*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + memcpy( nptr+1, StopLow, sizeof(StatEntry) * ( ((StatEntry*)STATSTRPTR(stat))-StopLow ) ); + } else { + while( sptr-STATPTR(stat) < stat->size && ptr-entrynentry=POSDATALEN(txt,*ptr); + if ( nptr->nentry==0 ) + nptr->nentry=1; + nptr->ndoc=1; + nptr->len=(*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + curptr += nptr->len; + ptr++; + } + nptr++; + } + + memcpy( nptr, sptr, sizeof(StatEntry)*( stat->size - (sptr-STATPTR(stat)) ) ); + + while(ptr-entrynentry=POSDATALEN(txt,*ptr); + if ( nptr->nentry==0 ) + nptr->nentry=1; + nptr->ndoc=1; + nptr->len=(*ptr)->len; + memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len); + nptr->pos = curptr - STATSTRPTR(newstat); + curptr += nptr->len; + ptr++; nptr++; + } + } + + return newstat; +} + +PG_FUNCTION_INFO_V1(ts_accum); +Datum ts_accum(PG_FUNCTION_ARGS); +Datum +ts_accum(PG_FUNCTION_ARGS) { + tsstat *newstat,*stat= (tsstat*)PG_GETARG_POINTER(0); + tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + WordEntry **newentry=NULL; + uint32 len=0, cur=0; + StatEntry *sptr; + WordEntry *wptr; + + if ( stat==NULL || PG_ARGISNULL(0) ) { /* Init in first */ + stat=palloc(STATHDRSIZE); + stat->len=STATHDRSIZE; + stat->size=0; + } + + /* simple check of correctness */ + if ( txt==NULL || PG_ARGISNULL(1) || txt->size==0 ) { + PG_FREE_IF_COPY(txt,1); + PG_RETURN_POINTER(stat); + } + + sptr=STATPTR(stat); + wptr=ARRPTR(txt); + + if ( stat->size < 100*txt->size ) { /* merge */ + while( sptr-STATPTR(stat) < stat->size && wptr-ARRPTR(txt) < txt->size ) { + int cmp = compareStatWord(sptr,wptr,stat,txt); + if ( cmp<0 ) { + sptr++; + } else if ( cmp==0 ) { + int n=POSDATALEN(txt,wptr); + + if (n==0) n=1; + sptr->ndoc++; + sptr->nentry +=n ; + sptr++; wptr++; + } else { + if ( cur==len ) + newentry=SEI_realloc(newentry, &len); + newentry[cur]=wptr; + wptr++; cur++; + } + } + + while( wptr-ARRPTR(txt) < txt->size ) { + if ( cur==len ) + newentry=SEI_realloc(newentry, &len); + newentry[cur]=wptr; + wptr++; cur++; + } + } else { /* search */ + while( wptr-ARRPTR(txt) < txt->size ) { + StatEntry *StopLow = STATPTR(stat); + StatEntry *StopHigh = (StatEntry*)STATSTRPTR(stat); + int cmp; + + while (StopLow < StopHigh) { + sptr=StopLow + (StopHigh - StopLow) / 2; + cmp = compareStatWord(sptr,wptr,stat,txt); + if (cmp==0) { + int n=POSDATALEN(txt,wptr); + if (n==0) n=1; + sptr->ndoc++; + sptr->nentry +=n ; + break; + } else if ( cmp < 0 ) + StopLow = sptr + 1; + else + StopHigh = sptr; + } + + if ( StopLow >= StopHigh ) { /* not found */ + if ( cur==len ) + newentry=SEI_realloc(newentry, &len); + newentry[cur]=wptr; + cur++; + } + wptr++; + } + } + + + if ( cur==0 ) { /* no new words */ + PG_FREE_IF_COPY(txt,1); + PG_RETURN_POINTER(stat); + } + + newstat = formstat(stat, txt, newentry, cur); + pfree(newentry); + PG_FREE_IF_COPY(txt,1); + /* pfree(stat); */ + + PG_RETURN_POINTER(newstat); +} + +typedef struct { + uint32 cur; + tsvector *stat; +} StatStorage; + +static void +ts_setup_firstcall(FuncCallContext *funcctx, tsstat *stat) { + TupleDesc tupdesc; + MemoryContext oldcontext; + StatStorage *st; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + st=palloc( sizeof(StatStorage) ); + st->cur=0; + st->stat=palloc( stat->len ); + memcpy(st->stat, stat, stat->len); + funcctx->user_fctx = (void*)st; + tupdesc = RelationNameGetTupleDesc("statinfo"); + funcctx->slot = TupleDescGetSlot(tupdesc); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); +} + + +static Datum +ts_process_call(FuncCallContext *funcctx) { + StatStorage *st; + st=(StatStorage*)funcctx->user_fctx; + + if ( st->cur < st->stat->size ) { + Datum result; + char* values[3]; + char ndoc[16]; + char nentry[16]; + StatEntry *entry=STATPTR(st->stat) + st->cur; + HeapTuple tuple; + + values[1]=ndoc; + sprintf(ndoc,"%d",entry->ndoc); + values[2]=nentry; + sprintf(nentry,"%d",entry->nentry); + values[0]=palloc( entry->len+1 ); + memcpy( values[0], STATSTRPTR(st->stat)+entry->pos, entry->len); + (values[0])[entry->len]='\0'; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = TupleGetDatum(funcctx->slot, tuple); + + pfree(values[0]); + st->cur++; + return result; + } else { + pfree(st->stat); + pfree(st); + } + + return (Datum)0; +} + +PG_FUNCTION_INFO_V1(ts_accum_finish); +Datum ts_accum_finish(PG_FUNCTION_ARGS); +Datum +ts_accum_finish(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + ts_setup_firstcall(funcctx, (tsstat*)PG_GETARG_POINTER(0) ); + } + + funcctx = SRF_PERCALL_SETUP(); + if ( (result=ts_process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +static Oid tiOid=InvalidOid; +static void +get_ti_Oid(void) { + int ret; + bool isnull; + + if ( (ret = SPI_exec("select oid from pg_type where typname='tsvector'",1)) < 0 ) + elog(ERROR, "SPI_exec to get tsvector oid returns %d", ret); + + if ( SPI_processed<0 ) + elog(ERROR, "There is no tsvector type"); + tiOid = DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + if ( tiOid==InvalidOid ) + elog(ERROR, "tsvector type has InvalidOid"); +} + +static tsstat* +ts_stat_sql(text *txt) { + char *query=text2char(txt); + int i; + tsstat *newstat,*stat; + bool isnull; + Portal portal; + void *plan; + + if ( tiOid==InvalidOid ) + get_ti_Oid(); + + if ( (plan = SPI_prepare(query,0,NULL))==NULL ) + elog(ERROR, "SPI_prepare('%s') returns NULL",query); + + if ( (portal = SPI_cursor_open(NULL, plan, NULL, NULL)) == NULL ) + elog(ERROR, "SPI_cursor_open('%s') returns NULL",query); + + SPI_cursor_fetch(portal, true, 100); + + if ( SPI_tuptable->tupdesc->natts != 1 ) + elog(ERROR, "Number of fields doesn't equal to 1"); + + if ( SPI_gettypeid(SPI_tuptable->tupdesc, 1) != tiOid ) + elog(ERROR, "Column isn't of tsvector type"); + + stat=palloc(STATHDRSIZE); + stat->len=STATHDRSIZE; + stat->size=0; + + while(SPI_processed>0) { + for(i=0;ivals[i], SPI_tuptable->tupdesc, 1, &isnull); + + if ( !isnull ) { + newstat = (tsstat*)DatumGetPointer(DirectFunctionCall2( + ts_accum, + PointerGetDatum(stat), + data + )); + if ( stat!=newstat && stat ) + pfree(stat); + stat=newstat; + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + pfree(query); + + return stat; +} + +PG_FUNCTION_INFO_V1(ts_stat); +Datum ts_stat(PG_FUNCTION_ARGS); +Datum +ts_stat(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + tsstat *stat; + text *txt=PG_GETARG_TEXT_P(0); + + funcctx = SRF_FIRSTCALL_INIT(); + SPI_connect(); + stat = ts_stat_sql(txt); + PG_FREE_IF_COPY(txt,0); + ts_setup_firstcall(funcctx, stat ); + SPI_finish(); + } + + funcctx = SRF_PERCALL_SETUP(); + if ( (result=ts_process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + + diff --git a/contrib/tsearch2/ts_stat.h b/contrib/tsearch2/ts_stat.h new file mode 100644 index 0000000000..c32b17a3f5 --- /dev/null +++ b/contrib/tsearch2/ts_stat.h @@ -0,0 +1,32 @@ +#ifndef __TXTIDX_STAT_H__ +#define __TXTIDX_STAT_H__ + +#include "postgres.h" + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" + +typedef struct { + uint32 len; + uint32 pos; + uint32 ndoc; + uint32 nentry; +} StatEntry; + +typedef struct { + int4 len; + int4 size; + char data[1]; +} tsstat; + +#define STATHDRSIZE (sizeof(int4)*2) +#define CALCSTATSIZE(x, lenstr) ( x * sizeof(StatEntry) + STATHDRSIZE + lenstr ) +#define STATPTR(x) ( (StatEntry*) ( (char*)x + STATHDRSIZE ) ) +#define STATSTRPTR(x) ( (char*)x + STATHDRSIZE + ( sizeof(StatEntry) * ((tsvector*)x)->size ) ) +#define STATSTRSIZE(x) ( ((tsvector*)x)->len - STATHDRSIZE - ( sizeof(StatEntry) * ((tsvector*)x)->size ) ) + +#endif diff --git a/contrib/tsearch2/tsearch.sql._in b/contrib/tsearch2/tsearch.sql._in new file mode 100644 index 0000000000..91ffbc862d --- /dev/null +++ b/contrib/tsearch2/tsearch.sql._in @@ -0,0 +1,674 @@ +-- Adjust this setting to control where the objects get CREATEd. +SET search_path = public; + +BEGIN; + +--dict conf +CREATE TABLE pg_ts_dict ( + dict_name text not null primary key, + dict_init oid, + dict_initoption text, + dict_lexize oid not null, + dict_comment text +) with oids; + +--dict interface +CREATE FUNCTION lexize(oid, text) + returns _text + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION lexize(text, text) + returns _text + as 'MODULE_PATHNAME', 'lexize_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION lexize(text) + returns _text + as 'MODULE_PATHNAME', 'lexize_bycurrent' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curdict(int) + returns void + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curdict(text) + returns void + as 'MODULE_PATHNAME', 'set_curdict_byname' + language 'C' + with (isstrict); + +--built-in dictionaries +CREATE FUNCTION dex_init(text) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION dex_lexize(internal,internal,int4) + returns internal + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'simple', + (select oid from pg_proc where proname='dex_init'), + null, + (select oid from pg_proc where proname='dex_lexize'), + 'Simple example of dictionary.' +; + +CREATE FUNCTION snb_en_init(text) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION snb_lexize(internal,internal,int4) + returns internal + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'en_stem', + (select oid from pg_proc where proname='snb_en_init'), + 'DATA_PATH/english.stop', + (select oid from pg_proc where proname='snb_lexize'), + 'English Stemmer. Snowball.' +; + +CREATE FUNCTION snb_ru_init(text) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +insert into pg_ts_dict select + 'ru_stem', + (select oid from pg_proc where proname='snb_ru_init'), + 'DATA_PATH/russian.stop', + (select oid from pg_proc where proname='snb_lexize'), + 'Russian Stemmer. Snowball.' +; + +CREATE FUNCTION spell_init(text) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION spell_lexize(internal,internal,int4) + returns internal + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'ispell_template', + (select oid from pg_proc where proname='spell_init'), + null, + (select oid from pg_proc where proname='spell_lexize'), + 'ISpell interface. Must have .dict and .aff files' +; + +CREATE FUNCTION syn_init(text) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION syn_lexize(internal,internal,int4) + returns internal + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +insert into pg_ts_dict select + 'synonym', + (select oid from pg_proc where proname='syn_init'), + null, + (select oid from pg_proc where proname='syn_lexize'), + 'Example of synonym dictionary' +; + +--dict conf +CREATE TABLE pg_ts_parser ( + prs_name text not null primary key, + prs_start oid not null, + prs_nexttoken oid not null, + prs_end oid not null, + prs_headline oid not null, + prs_lextype oid not null, + prs_comment text +) with oids; + +--sql-level interface +CREATE TYPE tokentype + as (tokid int4, alias text, descr text); + +CREATE FUNCTION token_type(int4) + returns setof tokentype + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION token_type(text) + returns setof tokentype + as 'MODULE_PATHNAME', 'token_type_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION token_type() + returns setof tokentype + as 'MODULE_PATHNAME', 'token_type_current' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curprs(int) + returns void + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curprs(text) + returns void + as 'MODULE_PATHNAME', 'set_curprs_byname' + language 'C' + with (isstrict); + +CREATE TYPE tokenout + as (tokid int4, token text); + +CREATE FUNCTION parse(oid,text) + returns setof tokenout + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION parse(text,text) + returns setof tokenout + as 'MODULE_PATHNAME', 'parse_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION parse(text) + returns setof tokenout + as 'MODULE_PATHNAME', 'parse_current' + language 'C' + with (isstrict); + +--default parser +CREATE FUNCTION prsd_start(internal,int4) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION prsd_getlexeme(internal,internal,internal) + returns int4 + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION prsd_end(internal) + returns void + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION prsd_lextype(internal) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +CREATE FUNCTION prsd_headline(internal,internal,internal) + returns internal + as 'MODULE_PATHNAME' + language 'C'; + +insert into pg_ts_parser select + 'default', + (select oid from pg_proc where proname='prsd_start'), + (select oid from pg_proc where proname='prsd_getlexeme'), + (select oid from pg_proc where proname='prsd_end'), + (select oid from pg_proc where proname='prsd_headline'), + (select oid from pg_proc where proname='prsd_lextype'), + 'Parser from OpenFTS v0.34' +; + +--tsearch config + +CREATE TABLE pg_ts_cfg ( + ts_name text not null primary key, + prs_name text not null, + locale text +) with oids; + +CREATE TABLE pg_ts_cfgmap ( + ts_name text not null, + tok_alias text not null, + dict_name text[], + primary key (ts_name,tok_alias) +) with oids; + +CREATE FUNCTION set_curcfg(int) + returns void + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +CREATE FUNCTION set_curcfg(text) + returns void + as 'MODULE_PATHNAME', 'set_curcfg_byname' + language 'C' + with (isstrict); + +CREATE FUNCTION show_curcfg() + returns oid + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +insert into pg_ts_cfg values ('default', 'default','C'); +insert into pg_ts_cfg values ('default_russian', 'default','ru_RU.KOI8-R'); +insert into pg_ts_cfg values ('simple', 'default'); + +copy pg_ts_cfgmap from stdin; +default lword {en_stem} +default nlword {simple} +default word {simple} +default email {simple} +default url {simple} +default host {simple} +default sfloat {simple} +default version {simple} +default part_hword {simple} +default nlpart_hword {simple} +default lpart_hword {en_stem} +default hword {simple} +default lhword {en_stem} +default nlhword {simple} +default uri {simple} +default file {simple} +default float {simple} +default int {simple} +default uint {simple} +default_russian lword {en_stem} +default_russian nlword {ru_stem} +default_russian word {ru_stem} +default_russian email {simple} +default_russian url {simple} +default_russian host {simple} +default_russian sfloat {simple} +default_russian version {simple} +default_russian part_hword {simple} +default_russian nlpart_hword {ru_stem} +default_russian lpart_hword {en_stem} +default_russian hword {ru_stem} +default_russian lhword {en_stem} +default_russian nlhword {ru_stem} +default_russian uri {simple} +default_russian file {simple} +default_russian float {simple} +default_russian int {simple} +default_russian uint {simple} +simple lword {simple} +simple nlword {simple} +simple word {simple} +simple email {simple} +simple url {simple} +simple host {simple} +simple sfloat {simple} +simple version {simple} +simple part_hword {simple} +simple nlpart_hword {simple} +simple lpart_hword {simple} +simple hword {simple} +simple lhword {simple} +simple nlhword {simple} +simple uri {simple} +simple file {simple} +simple float {simple} +simple int {simple} +simple uint {simple} +\. + +--tsvector type +CREATE FUNCTION tsvector_in(cstring) +RETURNS tsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION tsvector_out(tsvector) +RETURNS cstring +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE tsvector ( + INTERNALLENGTH = -1, + INPUT = tsvector_in, + OUTPUT = tsvector_out, + STORAGE = extended +); + +CREATE FUNCTION length(tsvector) +RETURNS int4 +AS 'MODULE_PATHNAME', 'tsvector_length' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(oid, text) +RETURNS tsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(text, text) +RETURNS tsvector +AS 'MODULE_PATHNAME', 'to_tsvector_name' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION to_tsvector(text) +RETURNS tsvector +AS 'MODULE_PATHNAME', 'to_tsvector_current' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION strip(tsvector) +RETURNS tsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION setweight(tsvector,"char") +RETURNS tsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE FUNCTION concat(tsvector,tsvector) +RETURNS tsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict,iscachable); + +CREATE OPERATOR || ( + LEFTARG = tsvector, + RIGHTARG = tsvector, + PROCEDURE = concat +); + +--query type +CREATE FUNCTION tsquery_in(cstring) +RETURNS tsquery +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION tsquery_out(tsquery) +RETURNS cstring +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE tsquery ( + INTERNALLENGTH = -1, + INPUT = tsquery_in, + OUTPUT = tsquery_out +); + +CREATE FUNCTION querytree(tsquery) +RETURNS text +AS 'MODULE_PATHNAME', 'tsquerytree' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION to_tsquery(oid, text) +RETURNS tsquery +AS 'MODULE_PATHNAME' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION to_tsquery(text, text) +RETURNS tsquery +AS 'MODULE_PATHNAME','to_tsquery_name' +LANGUAGE 'c' with (isstrict,iscachable); + +CREATE FUNCTION to_tsquery(text) +RETURNS tsquery +AS 'MODULE_PATHNAME','to_tsquery_current' +LANGUAGE 'c' with (isstrict,iscachable); + +--operations +CREATE FUNCTION exectsq(tsvector, tsquery) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict, iscachable); + +COMMENT ON FUNCTION exectsq(tsvector, tsquery) IS 'boolean operation with text index'; + +CREATE FUNCTION rexectsq(tsquery, tsvector) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict, iscachable); + +COMMENT ON FUNCTION rexectsq(tsquery, tsvector) IS 'boolean operation with text index'; + +CREATE OPERATOR @@ ( + LEFTARG = tsvector, + RIGHTARG = tsquery, + PROCEDURE = exectsq, + COMMUTATOR = '@@', + RESTRICT = contsel, + JOIN = contjoinsel +); +CREATE OPERATOR @@ ( + LEFTARG = tsquery, + RIGHTARG = tsvector, + PROCEDURE = rexectsq, + COMMUTATOR = '@@', + RESTRICT = contsel, + JOIN = contjoinsel +); + +--Trigger +CREATE FUNCTION tsearch2() +RETURNS trigger +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +--Relevation +CREATE FUNCTION rank(float4[], tsvector, tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(float4[], tsvector, tsquery, int4) +RETURNS float4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(tsvector, tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rank_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank(tsvector, tsquery, int4) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rank_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(int4, tsvector, tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(int4, tsvector, tsquery, int4) +RETURNS float4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(tsvector, tsquery) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rank_cd_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION rank_cd(tsvector, tsquery, int4) +RETURNS float4 +AS 'MODULE_PATHNAME', 'rank_cd_def' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(oid, text, tsquery, text) +RETURNS text +AS 'MODULE_PATHNAME', 'headline' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(oid, text, tsquery) +RETURNS text +AS 'MODULE_PATHNAME', 'headline' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, text, tsquery, text) +RETURNS text +AS 'MODULE_PATHNAME', 'headline_byname' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, text, tsquery) +RETURNS text +AS 'MODULE_PATHNAME', 'headline_byname' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, tsquery, text) +RETURNS text +AS 'MODULE_PATHNAME', 'headline_current' +LANGUAGE 'C' WITH (isstrict, iscachable); + +CREATE FUNCTION headline(text, tsquery) +RETURNS text +AS 'MODULE_PATHNAME', 'headline_current' +LANGUAGE 'C' WITH (isstrict, iscachable); + +--GiST +--GiST key type +CREATE FUNCTION gtsvector_in(cstring) +RETURNS gtsvector +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsvector_out(gtsvector) +RETURNS cstring +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE TYPE gtsvector ( + INTERNALLENGTH = -1, + INPUT = gtsvector_in, + OUTPUT = gtsvector_out +); + +-- support FUNCTIONs +CREATE FUNCTION gtsvector_consistent(gtsvector,internal,int4) +RETURNS bool +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_compress(internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_decompress(internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_penalty(internal,internal,internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C' with (isstrict); + +CREATE FUNCTION gtsvector_picksplit(internal, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_union(bytea, internal) +RETURNS _int4 +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +CREATE FUNCTION gtsvector_same(gtsvector, gtsvector, internal) +RETURNS internal +AS 'MODULE_PATHNAME' +LANGUAGE 'C'; + +-- CREATE the OPERATOR class +CREATE OPERATOR CLASS gist_tsvector_ops +DEFAULT FOR TYPE tsvector USING gist +AS + OPERATOR 1 @@ (tsvector, tsquery) RECHECK , + FUNCTION 1 gtsvector_consistent (gtsvector, internal, int4), + FUNCTION 2 gtsvector_union (bytea, internal), + FUNCTION 3 gtsvector_compress (internal), + FUNCTION 4 gtsvector_decompress (internal), + FUNCTION 5 gtsvector_penalty (internal, internal, internal), + FUNCTION 6 gtsvector_picksplit (internal, internal), + FUNCTION 7 gtsvector_same (gtsvector, gtsvector, internal), + STORAGE gtsvector; + + +--stat info +CREATE TYPE statinfo + as (word text, ndoc int4, nentry int4); + +--REATE FUNCTION tsstat_in(cstring) +--RETURNS tsstat +--AS 'MODULE_PATHNAME' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE FUNCTION tsstat_out(tsstat) +--RETURNS cstring +--AS 'MODULE_PATHNAME' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE TYPE tsstat ( +-- INTERNALLENGTH = -1, +-- INPUT = tsstat_in, +-- OUTPUT = tsstat_out, +-- STORAGE = plain +--); +-- +--CREATE FUNCTION ts_accum(tsstat,tsvector) +--RETURNS tsstat +--AS 'MODULE_PATHNAME' +--LANGUAGE 'C' with (isstrict); +-- +--CREATE FUNCTION ts_accum_finish(tsstat) +-- returns setof statinfo +-- as 'MODULE_PATHNAME' +-- language 'C' +-- with (isstrict); +-- +--CREATE AGGREGATE stat ( +-- BASETYPE=tsvector, +-- SFUNC=ts_accum, +-- STYPE=tsstat, +-- FINALFUNC = ts_accum_finish, +-- initcond = '' +--); + +CREATE FUNCTION stat(text) + returns setof statinfo + as 'MODULE_PATHNAME', 'ts_stat' + language 'C' + with (isstrict); + +--reset - just for debuging +CREATE FUNCTION reset_tsearch() + returns void + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + +--get cover (debug for rank_cd) +CREATE FUNCTION get_covers(tsvector,tsquery) + returns text + as 'MODULE_PATHNAME' + language 'C' + with (isstrict); + + +--example of ISpell dictionary +--update pg_ts_dict set dict_initoption='DictFile="/usr/local/share/ispell/russian.dict" ,AffFile ="/usr/local/share/ispell/russian.aff", StopFile="/usr/local/share/ispell/russian.stop"' where dict_id=4; +--example of synonym dict +--update pg_ts_dict set dict_initoption='/usr/local/share/ispell/english.syn' where dict_id=5; +END; diff --git a/contrib/tsearch2/tsvector.c b/contrib/tsearch2/tsvector.c new file mode 100644 index 0000000000..ff0794da00 --- /dev/null +++ b/contrib/tsearch2/tsvector.c @@ -0,0 +1,804 @@ +/* + * In/Out definitions for tsvector type + * Internal structure: + * string of values, array of position lexem in string and it's length + * Teodor Sigaev + */ +#include "postgres.h" + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" +#include "executor/spi.h" +#include "commands/trigger.h" +#include "nodes/pg_list.h" +#include "catalog/namespace.h" + +#include "utils/pg_locale.h" + +#include /* tolower */ +#include "tsvector.h" +#include "query.h" +#include "ts_cfg.h" +#include "common.h" + +PG_FUNCTION_INFO_V1(tsvector_in); +Datum tsvector_in(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsvector_out); +Datum tsvector_out(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(to_tsvector); +Datum to_tsvector(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(to_tsvector_current); +Datum to_tsvector_current(PG_FUNCTION_ARGS); +PG_FUNCTION_INFO_V1(to_tsvector_name); +Datum to_tsvector_name(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsearch2); +Datum tsearch2(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(tsvector_length); +Datum tsvector_length(PG_FUNCTION_ARGS); + +/* + * in/out text index type + */ +static int +comparePos(const void *a, const void *b) { + if ( ((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos ) + return 1; + return ( ((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos ) ? 1 : -1; +} + +static int +uniquePos(WordEntryPos *a, int4 l) { + WordEntryPos *ptr, *res; + + res=a; + if (l==1) + return l; + + qsort((void *) a, l, sizeof(WordEntryPos), comparePos); + + ptr = a + 1; + while (ptr - a < l) { + if ( ptr->pos != res->pos ) { + res++; + res->pos = ptr->pos; + res->weight = ptr->weight; + if ( res-a >= MAXNUMPOS-1 || res->pos == MAXENTRYPOS-1 ) + break; + } else if ( ptr->weight > res->weight ) + res->weight = ptr->weight; + ptr++; + } + return res + 1 - a; +} + +static char *BufferStr; +static int +compareentry(const void *a, const void *b) +{ + if ( ((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len) + { + return strncmp( + &BufferStr[((WordEntryIN *) a)->entry.pos], + &BufferStr[((WordEntryIN *) b)->entry.pos], + ((WordEntryIN *) a)->entry.len); + } + return ( ((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len ) ? 1 : -1; +} + +static int +uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen) +{ + WordEntryIN *ptr, + *res; + + res = a; + if (l == 1) { + if ( a->entry.haspos ) { + *(uint16*)(a->pos) = uniquePos( &(a->pos[1]), *(uint16*)(a->pos)); + *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16*)(a->pos) +1 )*sizeof(WordEntryPos); + } + return l; + } + + ptr = a + 1; + BufferStr = buf; + qsort((void *) a, l, sizeof(WordEntryIN), compareentry); + + while (ptr - a < l) + { + if (!(ptr->entry.len == res->entry.len && + strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0)) + { + if ( res->entry.haspos ) { + *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos)); + *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + res++; + memcpy(res,ptr,sizeof(WordEntryIN)); + } else if ( ptr->entry.haspos ){ + if ( res->entry.haspos ) { + int4 len=*(uint16*)(ptr->pos) + 1 + *(uint16*)(res->pos); + res->pos=(WordEntryPos*)repalloc( res->pos, len*sizeof(WordEntryPos)); + memcpy( &(res->pos[ *(uint16*)(res->pos) + 1 ]), + &(ptr->pos[1]), *(uint16*)(ptr->pos) * sizeof(WordEntryPos)); + *(uint16*)(res->pos) += *(uint16*)(ptr->pos); + pfree( ptr->pos ); + } else { + res->entry.haspos=1; + res->pos = ptr->pos; + } + } + ptr++; + } + if ( res->entry.haspos ) { + *(uint16*)(res->pos) = uniquePos( &(res->pos[1]), *(uint16*)(res->pos)); + *outbuflen += *(uint16*)(res->pos) * sizeof(WordEntryPos); + } + *outbuflen += SHORTALIGN(res->entry.len); + + return res + 1 - a; +} + +#define WAITWORD 1 +#define WAITENDWORD 2 +#define WAITNEXTCHAR 3 +#define WAITENDCMPLX 4 +#define WAITPOSINFO 5 +#define INPOSINFO 6 +#define WAITPOSDELIM 7 + +#define RESIZEPRSBUF \ +do { \ + if ( state->curpos - state->word + 1 >= state->len ) \ + { \ + int4 clen = state->curpos - state->word; \ + state->len *= 2; \ + state->word = (char*)repalloc( (void*)state->word, state->len ); \ + state->curpos = state->word + clen; \ + } \ +} while (0) + +int4 +gettoken_tsvector(TI_IN_STATE * state) +{ + int4 oldstate = 0; + + state->curpos = state->word; + state->state = WAITWORD; + state->alen=0; + + while (1) + { + if (state->state == WAITWORD) + { + if (*(state->prsbuf) == '\0') + return 0; + else if (*(state->prsbuf) == '\'') + state->state = WAITENDCMPLX; + else if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf))) + elog(ERROR, "Syntax error"); + else if (*(state->prsbuf) != ' ') + { + *(state->curpos) = *(state->prsbuf); + state->curpos++; + state->state = WAITENDWORD; + } + } + else if (state->state == WAITNEXTCHAR) + { + if (*(state->prsbuf) == '\0') + elog(ERROR, "There is no escaped character"); + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + state->state = oldstate; + } + } + else if (state->state == WAITENDWORD) + { + if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDWORD; + } + else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' || + (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))) + { + RESIZEPRSBUF; + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + *(state->curpos) = '\0'; + return 1; + } else if ( *(state->prsbuf) == ':' ) { + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + *(state->curpos) = '\0'; + if ( state->oprisdelim ) + return 1; + else + state->state = INPOSINFO; + } + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + } + } + else if (state->state == WAITENDCMPLX) + { + if (*(state->prsbuf) == '\'') + { + RESIZEPRSBUF; + *(state->curpos) = '\0'; + if (state->curpos == state->word) + elog(ERROR, "Syntax error"); + if ( state->oprisdelim ) { + state->prsbuf++; + return 1; + } else + state->state = WAITPOSINFO; + } + else if (*(state->prsbuf) == '\\') + { + state->state = WAITNEXTCHAR; + oldstate = WAITENDCMPLX; + } + else if (*(state->prsbuf) == '\0') + elog(ERROR, "Syntax error"); + else + { + RESIZEPRSBUF; + *(state->curpos) = *(state->prsbuf); + state->curpos++; + } + } else if (state->state == WAITPOSINFO) { + if ( *(state->prsbuf) == ':' ) + state->state=INPOSINFO; + else + return 1; + } else if (state->state == INPOSINFO) { + if ( isdigit(*(state->prsbuf)) ) { + if ( state->alen==0 ) { + state->alen=4; + state->pos = (WordEntryPos*)palloc( sizeof(WordEntryPos)*state->alen ); + *(uint16*)(state->pos)=0; + } else if ( *(uint16*)(state->pos) +1 >= state->alen ) { + state->alen *= 2; + state->pos = (WordEntryPos*)repalloc( state->pos, sizeof(WordEntryPos)*state->alen ); + } + ( *(uint16*)(state->pos) )++; + state->pos[ *(uint16*)(state->pos) ].pos = LIMITPOS(atoi(state->prsbuf)); + if ( state->pos[ *(uint16*)(state->pos) ].pos == 0 ) + elog(ERROR,"Wrong position info"); + state->pos[ *(uint16*)(state->pos) ].weight = 0; + state->state = WAITPOSDELIM; + } else + elog(ERROR,"Syntax error"); + } else if (state->state == WAITPOSDELIM) { + if ( *(state->prsbuf) == ',' ) { + state->state = INPOSINFO; + } else if ( tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf)=='*' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 3; + } else if ( tolower(*(state->prsbuf)) == 'b' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 2; + } else if ( tolower(*(state->prsbuf)) == 'c' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 1; + } else if ( tolower(*(state->prsbuf)) == 'd' ) { + if ( state->pos[ *(uint16*)(state->pos) ].weight ) + elog(ERROR,"Syntax error"); + state->pos[ *(uint16*)(state->pos) ].weight = 0; + } else if ( isspace(*(state->prsbuf)) || *(state->prsbuf) == '\0' ) { + return 1; + } else if ( !isdigit(*(state->prsbuf)) ) + elog(ERROR,"Syntax error"); + } else + elog(ERROR, "Inner bug :("); + state->prsbuf++; + } + + return 0; +} + +Datum +tsvector_in(PG_FUNCTION_ARGS) +{ + char *buf = PG_GETARG_CSTRING(0); + TI_IN_STATE state; + WordEntryIN *arr; + WordEntry *inarr; + int4 len = 0, + totallen = 64; + tsvector *in; + char *tmpbuf, + *cur; + int4 i, + buflen = 256; + + state.prsbuf = buf; + state.len = 32; + state.word = (char *) palloc(state.len); + state.oprisdelim = false; + + arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen); + cur = tmpbuf = (char *) palloc(buflen); + while (gettoken_tsvector(&state)) + { + if (len >= totallen) + { + totallen *= 2; + arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen); + } + while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen) + { + int4 dist = cur - tmpbuf; + + buflen *= 2; + tmpbuf = (char *) repalloc((void *) tmpbuf, buflen); + cur = tmpbuf + dist; + } + if (state.curpos - state.word >= MAXSTRLEN) + elog(ERROR, "Word is too long"); + arr[len].entry.len= state.curpos - state.word; + if (cur - tmpbuf > MAXSTRPOS) + elog(ERROR, "Too long value"); + arr[len].entry.pos=cur - tmpbuf; + memcpy((void *) cur, (void *) state.word, arr[len].entry.len); + cur += arr[len].entry.len; + if ( state.alen ) { + arr[len].entry.haspos=1; + arr[len].pos = state.pos; + } else + arr[len].entry.haspos=0; + len++; + } + pfree(state.word); + + if ( len > 0 ) + len = uniqueentry(arr, len, tmpbuf, &buflen); + totallen = CALCDATASIZE(len, buflen); + in = (tsvector *) palloc(totallen); + memset(in,0,totallen); + in->len = totallen; + in->size = len; + cur = STRPTR(in); + inarr = ARRPTR(in); + for (i = 0; i < len; i++) + { + memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len); + arr[i].entry.pos=cur - STRPTR(in); + cur += SHORTALIGN(arr[i].entry.len); + if ( arr[i].entry.haspos ) { + memcpy( cur, arr[i].pos, (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos)); + cur += (*(uint16*)arr[i].pos + 1) * sizeof(WordEntryPos); + pfree( arr[i].pos ); + } + memcpy( &(inarr[i]), &(arr[i].entry), sizeof(WordEntry) ); + } + pfree(tmpbuf); + pfree(arr); + PG_RETURN_POINTER(in); +} + +Datum +tsvector_length(PG_FUNCTION_ARGS) +{ + tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + int4 ret = in->size; + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_INT32(ret); +} + +Datum +tsvector_out(PG_FUNCTION_ARGS) +{ + tsvector *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + char *outbuf; + int4 i, + j, + lenbuf = 0, pp; + WordEntry *ptr = ARRPTR(out); + char *curin, + *curout; + + lenbuf=out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /*\0*/; + for (i = 0; i < out->size; i++) { + lenbuf += ptr[i].len*2 /*for escape */; + if ( ptr[i].haspos ) + lenbuf += 7*POSDATALEN(out, &(ptr[i])); + } + + curout = outbuf = (char *) palloc(lenbuf); + for (i = 0; i < out->size; i++) + { + curin = STRPTR(out)+ptr->pos; + if (i != 0) + *curout++ = ' '; + *curout++ = '\''; + j = ptr->len; + while (j--) + { + if (*curin == '\'') + { + int4 pos = curout - outbuf; + + outbuf = (char *) repalloc((void *) outbuf, ++lenbuf); + curout = outbuf + pos; + *curout++ = '\\'; + } + *curout++ = *curin++; + } + *curout++ = '\''; + if ( (pp=POSDATALEN(out,ptr)) != 0 ) { + WordEntryPos *wptr; + *curout++ = ':'; + wptr=POSDATAPTR(out,ptr); + while(pp) { + sprintf(curout,"%d",wptr->pos); + curout=strchr(curout,'\0'); + switch( wptr->weight ) { + case 3: *curout++ = 'A'; break; + case 2: *curout++ = 'B'; break; + case 1: *curout++ = 'C'; break; + case 0: + default: break; + } + if ( pp>1 ) *curout++ = ','; + pp--; wptr++; + } + } + ptr++; + } + *curout='\0'; + outbuf[lenbuf - 1] = '\0'; + PG_FREE_IF_COPY(out, 0); + PG_RETURN_POINTER(outbuf); +} + +static int +compareWORD(const void *a, const void *b) +{ + if (((WORD *) a)->len == ((WORD *) b)->len) { + int res = strncmp( + ((WORD *) a)->word, + ((WORD *) b)->word, + ((WORD *) b)->len); + if ( res==0 ) + return ( ((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos ) ? 1 : -1; + return res; + } + return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1; +} + +static int +uniqueWORD(WORD * a, int4 l) +{ + WORD *ptr, + *res; + int tmppos; + + if (l == 1) { + tmppos=LIMITPOS(a->pos.pos); + a->alen=2; + a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen ); + a->pos.apos[0]=1; + a->pos.apos[1]=tmppos; + return l; + } + + res = a; + ptr = a + 1; + + qsort((void *) a, l, sizeof(WORD), compareWORD); + tmppos=LIMITPOS(a->pos.pos); + a->alen=2; + a->pos.apos=(uint16*)palloc( sizeof(uint16)*a->alen ); + a->pos.apos[0]=1; + a->pos.apos[1]=tmppos; + + while (ptr - a < l) + { + if (!(ptr->len == res->len && + strncmp(ptr->word, res->word, res->len) == 0)) + { + res++; + res->len = ptr->len; + res->word = ptr->word; + tmppos=LIMITPOS(ptr->pos.pos); + res->alen=2; + res->pos.apos=(uint16*)palloc( sizeof(uint16)*res->alen ); + res->pos.apos[0]=1; + res->pos.apos[1]=tmppos; + } else { + pfree(ptr->word); + if ( res->pos.apos[0] < MAXNUMPOS-1 && res->pos.apos[ res->pos.apos[0] ] != MAXENTRYPOS-1 ) { + if ( res->pos.apos[0]+1 >= res->alen ) { + res->alen*=2; + res->pos.apos=(uint16*)repalloc( res->pos.apos, sizeof(uint16)*res->alen ); + } + res->pos.apos[ res->pos.apos[0]+1 ] = LIMITPOS(ptr->pos.pos); + res->pos.apos[0]++; + } + } + ptr++; + } + + return res + 1 - a; +} + +/* + * make value of tsvector + */ +static tsvector * +makevalue(PRSTEXT * prs) +{ + int4 i,j, + lenstr = 0, + totallen; + tsvector *in; + WordEntry *ptr; + char *str, + *cur; + + prs->curwords = uniqueWORD(prs->words, prs->curwords); + for (i = 0; i < prs->curwords; i++) { + lenstr += SHORTALIGN(prs->words[i].len); + + if ( prs->words[i].alen ) + lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + } + + totallen = CALCDATASIZE(prs->curwords, lenstr); + in = (tsvector *) palloc(totallen); + memset(in,0,totallen); + in->len = totallen; + in->size = prs->curwords; + + ptr = ARRPTR(in); + cur = str = STRPTR(in); + for (i = 0; i < prs->curwords; i++) + { + ptr->len = prs->words[i].len; + if (cur - str > MAXSTRPOS) + elog(ERROR, "Value is too big"); + ptr->pos= cur - str; + memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len); + pfree(prs->words[i].word); + cur += SHORTALIGN(prs->words[i].len); + if ( prs->words[i].alen ) { + WordEntryPos *wptr; + + ptr->haspos=1; + *(uint16*)cur = prs->words[i].pos.apos[0]; + wptr=POSDATAPTR(in,ptr); + for(j=0;j<*(uint16*)cur;j++) { + wptr[j].weight=0; + wptr[j].pos=prs->words[i].pos.apos[j+1]; + } + cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos); + pfree(prs->words[i].pos.apos); + } else + ptr->haspos=0; + ptr++; + } + pfree(prs->words); + return in; +} + + +Datum +to_tsvector(PG_FUNCTION_ARGS) +{ + text *in = PG_GETARG_TEXT_P(1); + PRSTEXT prs; + tsvector *out = NULL; + TSCfgInfo *cfg=findcfg(PG_GETARG_INT32(0)); + + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); + + parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ); + PG_FREE_IF_COPY(in, 1); + + if (prs.curwords) + out = makevalue(&prs); + else { + pfree(prs.words); + out = palloc(CALCDATASIZE(0,0)); + out->len = CALCDATASIZE(0,0); + out->size = 0; + } + PG_RETURN_POINTER(out); +} + +Datum +to_tsvector_name(PG_FUNCTION_ARGS) { + text *cfg=PG_GETARG_TEXT_P(0); + Datum res = DirectFunctionCall3( + to_tsvector, + Int32GetDatum( name2id_cfg( cfg ) ), + PG_GETARG_DATUM(1), + (Datum)0 + ); + PG_FREE_IF_COPY(cfg,0); + PG_RETURN_DATUM(res); +} + +Datum +to_tsvector_current(PG_FUNCTION_ARGS) { + Datum res = DirectFunctionCall3( + to_tsvector, + Int32GetDatum( get_currcfg() ), + PG_GETARG_DATUM(0), + (Datum)0 + ); + PG_RETURN_DATUM(res); +} + +static Oid +findFunc(char *fname) { + FuncCandidateList clist,ptr; + Oid funcid = InvalidOid; + List *names=makeList1(makeString(fname)); + + ptr = clist = FuncnameGetCandidates(names, 1); + freeList(names); + + if ( !ptr ) + return funcid; + + while(ptr) { + if ( ptr->args[0] == TEXTOID && funcid == InvalidOid ) + funcid=ptr->oid; + clist=ptr->next; + pfree(ptr); + ptr=clist; + } + + return funcid; +} + +/* + * Trigger + */ +Datum +tsearch2(PG_FUNCTION_ARGS) +{ + TriggerData *trigdata; + Trigger *trigger; + Relation rel; + HeapTuple rettuple = NULL; + TSCfgInfo *cfg=findcfg(get_currcfg()); + int numidxattr, + i; + PRSTEXT prs; + Datum datum = (Datum) 0; + Oid funcoid = InvalidOid; + + if (!CALLED_AS_TRIGGER(fcinfo)) + elog(ERROR, "TSearch: Not fired by trigger manager"); + + trigdata = (TriggerData *) fcinfo->context; + if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event)) + elog(ERROR, "TSearch: Can't process STATEMENT events"); + if (TRIGGER_FIRED_AFTER(trigdata->tg_event)) + elog(ERROR, "TSearch: Must be fired BEFORE event"); + + if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event)) + rettuple = trigdata->tg_trigtuple; + else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event)) + rettuple = trigdata->tg_newtuple; + else + elog(ERROR, "TSearch: Unknown event"); + + trigger = trigdata->tg_trigger; + rel = trigdata->tg_relation; + + if (trigger->tgnargs < 2) + elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)"); + + numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]); + if (numidxattr == SPI_ERROR_NOATTRIBUTE) + elog(ERROR, "TSearch: Can not find tsvector_field"); + + prs.lenwords = 32; + prs.curwords = 0; + prs.pos = 0; + prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords); + + /* find all words in indexable column */ + for (i = 1; i < trigger->tgnargs; i++) + { + int numattr; + Oid oidtype; + Datum txt_toasted; + bool isnull; + text *txt; + + numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]); + if (numattr == SPI_ERROR_NOATTRIBUTE) + { + funcoid=findFunc(trigger->tgargs[i]); + if ( funcoid==InvalidOid ) + elog(ERROR,"TSearch: can't find function or field '%s'",trigger->tgargs[i]); + continue; + } + oidtype = SPI_gettypeid(rel->rd_att, numattr); + /* We assume char() and varchar() are binary-equivalent to text */ + if (!(oidtype == TEXTOID || + oidtype == VARCHAROID || + oidtype == BPCHAROID)) + { + elog(WARNING, "TSearch: '%s' is not of character type", + trigger->tgargs[i]); + continue; + } + txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull); + if (isnull) + continue; + + if ( funcoid!=InvalidOid ) { + text *txttmp = (text *) DatumGetPointer( OidFunctionCall1( + funcoid, + PointerGetDatum(txt_toasted) + )); + txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp))); + if ( txt == txttmp ) + txt_toasted = PointerGetDatum(txt); + } else + txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted))); + + parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ); + if (txt != (text*)DatumGetPointer(txt_toasted) ) + pfree(txt); + } + + /* make tsvector value */ + if (prs.curwords) + { + datum = PointerGetDatum(makevalue(&prs)); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, NULL); + pfree(DatumGetPointer(datum)); + } + else + { + tsvector *out = palloc(CALCDATASIZE(0,0)); + out->len = CALCDATASIZE(0,0); + out->size = 0; + datum = PointerGetDatum(out); + pfree(prs.words); + rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr, + &datum, NULL); + } + + if (rettuple == NULL) + elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result); + + return PointerGetDatum(rettuple); +} diff --git a/contrib/tsearch2/tsvector.h b/contrib/tsearch2/tsvector.h new file mode 100644 index 0000000000..31e6a4b02c --- /dev/null +++ b/contrib/tsearch2/tsvector.h @@ -0,0 +1,71 @@ +#ifndef __TXTIDX_H__ +#define __TXTIDX_H__ + +/* +#define TXTIDX_DEBUG +*/ + +#include "postgres.h" + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" + +typedef struct { + uint32 + haspos:1, + len:11, /* MAX 2Kb */ + pos:20; /* MAX 1Mb */ +} WordEntry; +#define MAXSTRLEN ( 1<<11 ) +#define MAXSTRPOS ( 1<<20 ) + +typedef struct { + uint16 + weight:2, + pos:14; +} WordEntryPos; +#define MAXENTRYPOS (1<<14) +#define MAXNUMPOS 256 +#define LIMITPOS(x) ( ( (x) >= MAXENTRYPOS ) ? (MAXENTRYPOS-1) : (x) ) + +typedef struct +{ + int4 len; + int4 size; + char data[1]; +} tsvector; + +#define DATAHDRSIZE (sizeof(int4)*2) +#define CALCDATASIZE(x, lenstr) ( x * sizeof(WordEntry) + DATAHDRSIZE + lenstr ) +#define ARRPTR(x) ( (WordEntry*) ( (char*)x + DATAHDRSIZE ) ) +#define STRPTR(x) ( (char*)x + DATAHDRSIZE + ( sizeof(WordEntry) * ((tsvector*)x)->size ) ) +#define STRSIZE(x) ( ((tsvector*)x)->len - DATAHDRSIZE - ( sizeof(WordEntry) * ((tsvector*)x)->size ) ) +#define _POSDATAPTR(x,e) (STRPTR(x)+((WordEntry*)(e))->pos+SHORTALIGN(((WordEntry*)(e))->len)) +#define POSDATALEN(x,e) ( ( ((WordEntry*)(e))->haspos ) ? (*(uint16*)_POSDATAPTR(x,e)) : 0 ) +#define POSDATAPTR(x,e) ( (WordEntryPos*)( _POSDATAPTR(x,e)+sizeof(uint16) ) ) + + +typedef struct { + WordEntry entry; + WordEntryPos *pos; +} WordEntryIN; + +typedef struct +{ + char *prsbuf; + char *word; + char *curpos; + int4 len; + int4 state; + int4 alen; + WordEntryPos *pos; + bool oprisdelim; +} TI_IN_STATE; + +int4 gettoken_tsvector(TI_IN_STATE * state); + +#endif diff --git a/contrib/tsearch2/tsvector_op.c b/contrib/tsearch2/tsvector_op.c new file mode 100644 index 0000000000..3f38014813 --- /dev/null +++ b/contrib/tsearch2/tsvector_op.c @@ -0,0 +1,264 @@ +/* + * Operations for tsvector type + * Teodor Sigaev + */ +#include "postgres.h" + +#include "access/gist.h" +#include "access/itup.h" +#include "utils/elog.h" +#include "utils/palloc.h" +#include "utils/builtins.h" +#include "storage/bufpage.h" +#include "executor/spi.h" +#include "commands/trigger.h" +#include "nodes/pg_list.h" +#include "catalog/namespace.h" + +#include "utils/pg_locale.h" + +#include /* tolower */ +#include "tsvector.h" +#include "query.h" +#include "ts_cfg.h" +#include "common.h" + +PG_FUNCTION_INFO_V1(strip); +Datum strip(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(setweight); +Datum setweight(PG_FUNCTION_ARGS); + +PG_FUNCTION_INFO_V1(concat); +Datum concat(PG_FUNCTION_ARGS); + +Datum +strip(PG_FUNCTION_ARGS) +{ + tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + tsvector *out; + int i,len=0; + WordEntry *arrin=ARRPTR(in), *arrout; + char *cur; + + for(i=0;isize;i++) + len += SHORTALIGN( arrin[i].len ); + + len = CALCDATASIZE(in->size, len); + out=(tsvector*)palloc(len); + memset(out,0,len); + out->len=len; + out->size=in->size; + arrout=ARRPTR(out); + cur=STRPTR(out); + for(i=0;isize;i++) { + memcpy(cur, STRPTR(in)+arrin[i].pos, arrin[i].len); + arrout[i].haspos = 0; + arrout[i].len = arrin[i].len; + arrout[i].pos = cur - STRPTR(out); + cur += SHORTALIGN( arrout[i].len ); + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +Datum +setweight(PG_FUNCTION_ARGS) +{ + tsvector *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + char cw = PG_GETARG_CHAR(1); + tsvector *out; + int i,j; + WordEntry *entry; + WordEntryPos *p; + int w=0; + + switch(tolower(cw)) { + case 'a': w=3; break; + case 'b': w=2; break; + case 'c': w=1; break; + case 'd': w=0; break; + default: elog(ERROR,"Unknown weight"); + } + + out=(tsvector*)palloc(in->len); + memcpy(out,in,in->len); + entry=ARRPTR(out); + i=out->size; + while(i--) { + if ( (j=POSDATALEN(out,entry)) != 0 ) { + p=POSDATAPTR(out,entry); + while(j--) { + p->weight=w; + p++; + } + } + entry++; + } + + PG_FREE_IF_COPY(in, 0); + PG_RETURN_POINTER(out); +} + +static int +compareEntry(char *ptra, WordEntry* a, char *ptrb, WordEntry* b) +{ + if ( a->len == b->len) + { + return strncmp( + ptra + a->pos, + ptrb + b->pos, + a->len); + } + return ( a->len > b->len ) ? 1 : -1; +} + +static int4 +add_pos(tsvector *src, WordEntry *srcptr, tsvector *dest, WordEntry *destptr, int4 maxpos ) { + uint16 *clen = (uint16*)_POSDATAPTR(dest,destptr); + int i; + uint16 slen = POSDATALEN(src, srcptr), startlen; + WordEntryPos *spos=POSDATAPTR(src, srcptr), *dpos=POSDATAPTR(dest,destptr); + + if ( ! destptr->haspos ) + *clen=0; + + startlen = *clen; + for(i=0; ihaspos=1; + return *clen - startlen; +} + + +Datum +concat(PG_FUNCTION_ARGS) { + tsvector *in1 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + tsvector *in2 = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + tsvector *out; + WordEntry *ptr; + WordEntry *ptr1,*ptr2; + WordEntryPos *p; + int maxpos=0,i,j,i1,i2; + char *cur; + char *data,*data1,*data2; + + ptr=ARRPTR(in1); + i=in1->size; + while(i--) { + if ( (j=POSDATALEN(in1,ptr)) != 0 ) { + p=POSDATAPTR(in1,ptr); + while(j--) { + if ( p->pos > maxpos ) + maxpos = p->pos; + p++; + } + } + ptr++; + } + + ptr1=ARRPTR(in1); ptr2=ARRPTR(in2); + data1=STRPTR(in1); data2=STRPTR(in2); + i1=in1->size; i2=in2->size; + out=(tsvector*)palloc( in1->len + in2->len ); + memset(out,0,in1->len + in2->len); + out->len = in1->len + in2->len; + out->size = in1->size + in2->size; + data=cur=STRPTR(out); + ptr=ARRPTR(out); + while( i1 && i2 ) { + int cmp=compareEntry(data1,ptr1,data2,ptr2); + if ( cmp < 0 ) { /* in1 first */ + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy( cur, data1 + ptr1->pos, ptr1->len ); + ptr->pos = cur - data; + cur+=SHORTALIGN(ptr1->len); + if ( ptr->haspos ) { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); + cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; ptr1++; i1--; + } else if ( cmp>0 ) { /* in2 first */ + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy( cur, data2 + ptr2->pos, ptr2->len ); + ptr->pos = cur - data; + cur+=SHORTALIGN(ptr2->len); + if ( ptr->haspos ) { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); + if ( addlen == 0 ) + ptr->haspos=0; + else + cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; ptr2++; i2--; + } else { + ptr->haspos = ptr1->haspos | ptr2->haspos; + ptr->len = ptr1->len; + memcpy( cur, data1 + ptr1->pos, ptr1->len ); + ptr->pos = cur - data; + cur+=SHORTALIGN(ptr1->len); + if ( ptr->haspos ) { + if ( ptr1->haspos ) { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); + cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); + if ( ptr2->haspos ) + cur += add_pos(in2, ptr2, out, ptr, maxpos )*sizeof(WordEntryPos); + } else if ( ptr2->haspos ) { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); + if ( addlen == 0 ) + ptr->haspos=0; + else + cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); + } + } + ptr++; ptr1++; ptr2++; i1--; i2--; + } + } + + while(i1) { + ptr->haspos = ptr1->haspos; + ptr->len = ptr1->len; + memcpy( cur, data1 + ptr1->pos, ptr1->len ); + ptr->pos = cur - data; + cur+=SHORTALIGN(ptr1->len); + if ( ptr->haspos ) { + memcpy(cur, _POSDATAPTR(in1, ptr1), POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16)); + cur+=POSDATALEN(in1, ptr1)*sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; ptr1++; i1--; + } + + while(i2) { + ptr->haspos = ptr2->haspos; + ptr->len = ptr2->len; + memcpy( cur, data2 + ptr2->pos, ptr2->len ); + ptr->pos = cur - data; + cur+=SHORTALIGN(ptr2->len); + if ( ptr->haspos ) { + int addlen = add_pos(in2, ptr2, out, ptr, maxpos ); + if ( addlen == 0 ) + ptr->haspos=0; + else + cur += addlen*sizeof(WordEntryPos) + sizeof(uint16); + } + ptr++; ptr2++; i2--; + } + + out->size=ptr-ARRPTR(out); + out->len = CALCDATASIZE( out->size, cur-data ); + if ( data != STRPTR(out) ) + memmove( STRPTR(out), data, cur-data ); + + PG_FREE_IF_COPY(in1, 0); + PG_FREE_IF_COPY(in2, 1); + PG_RETURN_POINTER(out); +} + diff --git a/contrib/tsearch2/untsearch.sql.in b/contrib/tsearch2/untsearch.sql.in new file mode 100644 index 0000000000..a4fe145c6f --- /dev/null +++ b/contrib/tsearch2/untsearch.sql.in @@ -0,0 +1,62 @@ +BEGIN; + +--Be careful !!! +--script drops all indices, triggers and columns with types defined +--in tsearch2.sql + + +DROP OPERATOR CLASS gist_tsvector_ops USING gist CASCADE; + + +DROP OPERATOR || (tsvector, tsvector); +DROP OPERATOR @@ (tsvector, tsquery); +DROP OPERATOR @@ (tsquery, tsvector); + +DROP AGGREGATE stat(tsvector); + +DROP TABLE pg_ts_dict; +DROP TABLE pg_ts_parser; +DROP TABLE pg_ts_cfg; +DROP TABLE pg_ts_cfgmap; + +DROP TYPE tokentype CASCADE; +DROP TYPE tokenout CASCADE; +DROP TYPE tsvector CASCADE; +DROP TYPE tsquery CASCADE; +DROP TYPE gtsvector CASCADE; +DROP TYPE tsstat CASCADE; +DROP TYPE statinfo CASCADE; + +DROP FUNCTION lexize(oid, text) ; +DROP FUNCTION lexize(text, text); +DROP FUNCTION lexize(text); +DROP FUNCTION set_curdict(int); +DROP FUNCTION set_curdict(text); +DROP FUNCTION dex_init(text); +DROP FUNCTION dex_lexize(internal,internal,int4); +DROP FUNCTION snb_en_init(text); +DROP FUNCTION snb_lexize(internal,internal,int4); +DROP FUNCTION snb_ru_init(text); +DROP FUNCTION spell_init(text); +DROP FUNCTION spell_lexize(internal,internal,int4); +DROP FUNCTION syn_init(text); +DROP FUNCTION syn_lexize(internal,internal,int4); +DROP FUNCTION set_curprs(int); +DROP FUNCTION set_curprs(text); +DROP FUNCTION prsd_start(internal,int4); +DROP FUNCTION prsd_getlexeme(internal,internal,internal); +DROP FUNCTION prsd_end(internal); +DROP FUNCTION prsd_lextype(internal); +DROP FUNCTION prsd_headline(internal,internal,internal); +DROP FUNCTION set_curcfg(int); +DROP FUNCTION set_curcfg(text); +DROP FUNCTION show_curcfg(); +DROP FUNCTION gtsvector_compress(internal); +DROP FUNCTION gtsvector_decompress(internal); +DROP FUNCTION gtsvector_penalty(internal,internal,internal); +DROP FUNCTION gtsvector_picksplit(internal, internal); +DROP FUNCTION gtsvector_union(bytea, internal); +DROP FUNCTION reset_tsearch(); +DROP FUNCTION tsearch2() CASCADE; + +END; diff --git a/contrib/tsearch2/wordparser/deflex.c b/contrib/tsearch2/wordparser/deflex.c new file mode 100644 index 0000000000..ea596c507c --- /dev/null +++ b/contrib/tsearch2/wordparser/deflex.c @@ -0,0 +1,56 @@ +#include "deflex.h" + +const char *lex_descr[]={ + "", + "Latin word", + "Non-latin word", + "Word", + "Email", + "URL", + "Host", + "Scientific notation", + "VERSION", + "Part of hyphenated word", + "Non-latin part of hyphenated word", + "Latin part of hyphenated word", + "Space symbols", + "HTML Tag", + "HTTP head", + "Hyphenated word", + "Latin hyphenated word", + "Non-latin hyphenated word", + "URI", + "File or path name", + "Decimal notation", + "Signed integer", + "Unsigned integer", + "HTML Entity" +}; + +const char *tok_alias[]={ + "", + "lword", + "nlword", + "word", + "email", + "url", + "host", + "sfloat", + "version", + "part_hword", + "nlpart_hword", + "lpart_hword", + "blank", + "tag", + "http", + "hword", + "lhword", + "nlhword", + "uri", + "file", + "float", + "int", + "uint", + "entity" +}; + diff --git a/contrib/tsearch2/wordparser/deflex.h b/contrib/tsearch2/wordparser/deflex.h new file mode 100644 index 0000000000..651d1f9e77 --- /dev/null +++ b/contrib/tsearch2/wordparser/deflex.h @@ -0,0 +1,34 @@ +#ifndef __DEFLEX_H__ +#define __DEFLEX_H__ + +/* rememder !!!! */ +#define LASTNUM 23 + +#define LATWORD 1 +#define CYRWORD 2 +#define UWORD 3 +#define EMAIL 4 +#define FURL 5 +#define HOST 6 +#define SCIENTIFIC 7 +#define VERSIONNUMBER 8 +#define PARTHYPHENWORD 9 +#define CYRPARTHYPHENWORD 10 +#define LATPARTHYPHENWORD 11 +#define SPACE 12 +#define TAG 13 +#define HTTP 14 +#define HYPHENWORD 15 +#define LATHYPHENWORD 16 +#define CYRHYPHENWORD 17 +#define URI 18 +#define FILEPATH 19 +#define DECIMAL 20 +#define SIGNEDINT 21 +#define UNSIGNEDINT 22 +#define HTMLENTITY 23 + +extern const char *lex_descr[]; +extern const char *tok_alias[]; + +#endif diff --git a/contrib/tsearch2/wordparser/parser.h b/contrib/tsearch2/wordparser/parser.h new file mode 100644 index 0000000000..55cf0051ed --- /dev/null +++ b/contrib/tsearch2/wordparser/parser.h @@ -0,0 +1,11 @@ +#ifndef __PARSER_H__ +#define __PARSER_H__ + +char *token; +int tokenlen; +int tsearch2_yylex(void); +void start_parse_str(char *, int); +void start_parse_fh(FILE *, int); +void end_parse(void); + +#endif diff --git a/contrib/tsearch2/wordparser/parser.l b/contrib/tsearch2/wordparser/parser.l new file mode 100644 index 0000000000..49824f5525 --- /dev/null +++ b/contrib/tsearch2/wordparser/parser.l @@ -0,0 +1,346 @@ +%{ +#include "postgres.h" + +#include "deflex.h" +#include "parser.h" +#include "common.h" + +/* Avoid exit() on fatal scanner errors */ +#define fprintf(file, fmt, msg) ts_error(ERROR, fmt, msg) + +/* postgres allocation function */ +#define free pfree +#define malloc palloc +#define realloc repalloc + +#ifdef strdup +#undef strdup +#endif +#define strdup pstrdup + +char *token = NULL; /* pointer to token */ +char *s = NULL; /* to return WHOLE hyphenated-word */ + +YY_BUFFER_STATE buf = NULL; /* buffer to parse; it need for parse from string */ + +int lrlimit = -1; /* for limiting read from filehandle ( -1 - unlimited read ) */ +int bytestoread = 0; /* for limiting read from filehandle */ + +/* redefine macro for read limited length */ +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( tsearch2_yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( tsearch2_yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } else { \ + if ( lrlimit == 0 ) \ + result=YY_NULL; \ + else { \ + if ( lrlimit>0 ) { \ + bytestoread = ( lrlimit > max_size ) ? max_size : lrlimit; \ + lrlimit -= bytestoread; \ + } else \ + bytestoread = max_size; \ + if ( ((result = fread( buf, 1, bytestoread, tsearch2_yyin )) == 0) \ + && ferror( tsearch2_yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + } \ + } + +%} + +%option 8bit +%option never-interactive +%option nounput +%option noyywrap + +/* parser's state for parsing hyphenated-word */ +%x DELIM +/* parser's state for parsing URL*/ +%x URL +%x SERVER + +/* parser's state for parsing TAGS */ +%x INTAG +%x QINTAG +%x INCOMMENT +%x INSCRIPT + +/* cyrillic koi8 char */ +CYRALNUM [0-9\200-\377] +CYRALPHA [\200-\377] +ALPHA [a-zA-Z\200-\377] +ALNUM [0-9a-zA-Z\200-\377] + + +HOSTNAME ([-_[:alnum:]]+\.)+[[:alpha:]]+ +URI [-_[:alnum:]/%,\.;=&?#]+ + +%% + +"<"[Ss][Cc][Rr][Ii][Pp][Tt] { BEGIN INSCRIPT; } + +"" { + BEGIN INITIAL; + *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SPACE; +} + +"" { + BEGIN INITIAL; + *tsearch2_yytext=' '; *(tsearch2_yytext+1) = '\0'; + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SPACE; +} + + +"<"[\![:alpha:]] { BEGIN INTAG; } + +""\"" { BEGIN QINTAG; } + +"\\\"" ; + +"\"" { BEGIN INTAG; } + +">" { + BEGIN INITIAL; + token = tsearch2_yytext; + *tsearch2_yytext=' '; + token = tsearch2_yytext; + tokenlen = 1; + return TAG; +} + +.|\n ; + +\&(quot|amp|nbsp|lt|gt)\; { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return HTMLENTITY; +} + +\&\#[0-9][0-9]?[0-9]?\; { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return HTMLENTITY; +} + +[-_\.[:alnum:]]+@{HOSTNAME} /* Emails */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return EMAIL; +} + +[+-]?[0-9]+(\.[0-9]+)?[eEdD][+-]?[0-9]+ /* float */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SCIENTIFIC; +} + +[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return VERSIONNUMBER; +} + +[+-]?[0-9]+\.[0-9]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return DECIMAL; +} + +[+-][0-9]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SIGNEDINT; +} + +[0-9]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return UNSIGNEDINT; +} + +http"://" { + BEGIN URL; + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return HTTP; +} + +ftp"://" { + BEGIN URL; + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return HTTP; +} + +{HOSTNAME}[/:]{URI} { + BEGIN SERVER; + if (s) { free(s); s=NULL; } + s = strdup( tsearch2_yytext ); + tokenlen = tsearch2_yyleng; + yyless( 0 ); + token = s; + return FURL; +} + +{HOSTNAME} { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return HOST; +} + +[/:]{URI} { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return URI; +} + +[[:alnum:]\./_-]+"/"[[:alnum:]\./_-]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return FILEPATH; +} + +({CYRALPHA}+-)+{CYRALPHA}+ /* composite-word */ { + BEGIN DELIM; + if (s) { free(s); s=NULL; } + s = strdup( tsearch2_yytext ); + tokenlen = tsearch2_yyleng; + yyless( 0 ); + token = s; + return CYRHYPHENWORD; +} + +([[:alpha:]]+-)+[[:alpha:]]+ /* composite-word */ { + BEGIN DELIM; + if (s) { free(s); s=NULL; } + s = strdup( tsearch2_yytext ); + tokenlen = tsearch2_yyleng; + yyless( 0 ); + token = s; + return LATHYPHENWORD; +} + +({ALNUM}+-)+{ALNUM}+ /* composite-word */ { + BEGIN DELIM; + if (s) { free(s); s=NULL; } + s = strdup( tsearch2_yytext ); + tokenlen = tsearch2_yyleng; + yyless( 0 ); + token = s; + return HYPHENWORD; +} + +[0-9]+\.[0-9]+\.[0-9\.]*[0-9] { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return VERSIONNUMBER; +} + +\+?[0-9]+\.[0-9]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return DECIMAL; +} + +{CYRALPHA}+ /* one word in composite-word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return CYRPARTHYPHENWORD; +} + +[[:alpha:]]+ /* one word in composite-word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return LATPARTHYPHENWORD; +} + +{ALNUM}+ /* one word in composite-word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return PARTHYPHENWORD; +} + +- { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SPACE; +} + +.|\n /* return in basic state */ { + BEGIN INITIAL; + yyless( 0 ); +} + +{CYRALPHA}+ /* normal word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return CYRWORD; +} + +[[:alpha:]]+ /* normal word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return LATWORD; +} + +{ALNUM}+ /* normal word */ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return UWORD; +} + +[ \r\n\t]+ { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SPACE; +} + +. { + token = tsearch2_yytext; + tokenlen = tsearch2_yyleng; + return SPACE; +} + +%% + +/* clearing after parsing from string */ +void end_parse() { + if (s) { free(s); s=NULL; } + tsearch2_yy_delete_buffer( buf ); + buf = NULL; +} + +/* start parse from string */ +void start_parse_str(char* str, int limit) { + if (buf) end_parse(); + buf = tsearch2_yy_scan_bytes( str, limit ); + tsearch2_yy_switch_to_buffer( buf ); + BEGIN INITIAL; +} + +/* start parse from filehandle */ +void start_parse_fh( FILE* fh, int limit ) { + if (buf) end_parse(); + lrlimit = ( limit ) ? limit : -1; + buf = tsearch2_yy_create_buffer( fh, YY_BUF_SIZE ); + tsearch2_yy_switch_to_buffer( buf ); + BEGIN INITIAL; +} + + diff --git a/contrib/tsearch2/wparser.c b/contrib/tsearch2/wparser.c new file mode 100644 index 0000000000..deff94ce90 --- /dev/null +++ b/contrib/tsearch2/wparser.c @@ -0,0 +1,529 @@ +/* + * interface functions to parser + * Teodor Sigaev + */ +#include +#include +#include +#include + +#include "postgres.h" +#include "fmgr.h" +#include "utils/array.h" +#include "catalog/pg_type.h" +#include "executor/spi.h" +#include "funcapi.h" + +#include "wparser.h" +#include "ts_cfg.h" +#include "snmap.h" +#include "common.h" + +/*********top interface**********/ + +static void *plan_getparser=NULL; +static Oid current_parser_id=InvalidOid; + +void +init_prs(Oid id, WParserInfo *prs) { + Oid arg[1]={ OIDOID }; + bool isnull; + Datum pars[1]={ ObjectIdGetDatum(id) }; + int stat; + + memset(prs,0,sizeof(WParserInfo)); + SPI_connect(); + if ( !plan_getparser ) { + plan_getparser = SPI_saveplan( SPI_prepare( "select prs_start, prs_nexttoken, prs_end, prs_lextype, prs_headline from pg_ts_parser where oid = $1" , 1, arg ) ); + if ( !plan_getparser ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_getparser, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) { + Oid oid=InvalidOid; + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + fmgr_info_cxt(oid, &(prs->start_info), TopMemoryContext); + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 2, &isnull) ); + fmgr_info_cxt(oid, &(prs->getlexeme_info), TopMemoryContext); + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 3, &isnull) ); + fmgr_info_cxt(oid, &(prs->end_info), TopMemoryContext); + prs->lextype=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 4, &isnull) ); + oid=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 5, &isnull) ); + fmgr_info_cxt(oid, &(prs->headline_info), TopMemoryContext); + prs->prs_id=id; + } else + ts_error(ERROR, "No parser with id %d", id); + SPI_finish(); +} + +typedef struct { + WParserInfo *last_prs; + int len; + int reallen; + WParserInfo *list; + SNMap name2id_map; +} PrsList; + +static PrsList PList = {NULL,0,0,NULL,{0,0,NULL}}; + +void +reset_prs(void) { + freeSNMap( &(PList.name2id_map) ); + if ( PList.list ) + free(PList.list); + memset(&PList,0,sizeof(PrsList)); +} + +static int +compareprs(const void *a, const void *b) { + return ((WParserInfo*)a)->prs_id - ((WParserInfo*)b)->prs_id; +} + +WParserInfo * +findprs(Oid id) { + /* last used prs */ + if ( PList.last_prs && PList.last_prs->prs_id==id ) + return PList.last_prs; + + /* already used prs */ + if ( PList.len != 0 ) { + WParserInfo key; + key.prs_id=id; + PList.last_prs = bsearch(&key, PList.list, PList.len, sizeof(WParserInfo), compareprs); + if ( PList.last_prs != NULL ) + return PList.last_prs; + } + + /* last chance */ + if ( PList.len==PList.reallen ) { + WParserInfo *tmp; + int reallen = ( PList.reallen ) ? 2*PList.reallen : 16; + tmp=(WParserInfo*)realloc(PList.list,sizeof(WParserInfo)*reallen); + if ( !tmp ) + ts_error(ERROR,"No memory"); + PList.reallen=reallen; + PList.list=tmp; + } + PList.last_prs=&(PList.list[PList.len]); + init_prs(id, PList.last_prs); + PList.len++; + qsort(PList.list, PList.len, sizeof(WParserInfo), compareprs); + return findprs(id); /* qsort changed order!! */; +} + +static void *plan_name2id=NULL; + +Oid +name2id_prs(text *name) { + Oid arg[1]={ TEXTOID }; + bool isnull; + Datum pars[1]={ PointerGetDatum(name) }; + int stat; + Oid id=findSNMap_t( &(PList.name2id_map), name ); + + if ( id ) + return id; + + + SPI_connect(); + if ( !plan_name2id ) { + plan_name2id = SPI_saveplan( SPI_prepare( "select oid from pg_ts_parser where prs_name = $1" , 1, arg ) ); + if ( !plan_name2id ) + ts_error(ERROR, "SPI_prepare() failed"); + } + + stat = SPI_execp(plan_name2id, pars, " ", 1); + if ( stat < 0 ) + ts_error (ERROR, "SPI_execp return %d", stat); + if ( SPI_processed > 0 ) + id=DatumGetObjectId( SPI_getbinval(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1, &isnull) ); + else + ts_error(ERROR, "No parser '%s'", text2char(name)); + SPI_finish(); + addSNMap_t( &(PList.name2id_map), name, id ); + return id; +} + + +/******sql-level interface******/ +typedef struct { + int cur; + LexDescr *list; +} TypeStorage; + +static void +setup_firstcall(FuncCallContext *funcctx, Oid prsid) { + TupleDesc tupdesc; + MemoryContext oldcontext; + TypeStorage *st; + WParserInfo *prs = findprs(prsid); + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + st=(TypeStorage*)palloc( sizeof(TypeStorage) ); + st->cur=0; + st->list = (LexDescr*)DatumGetPointer( + OidFunctionCall1( prs->lextype, PointerGetDatum(prs->prs) ) + ); + funcctx->user_fctx = (void*)st; + tupdesc = RelationNameGetTupleDesc("tokentype"); + funcctx->slot = TupleDescGetSlot(tupdesc); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); +} + +static Datum +process_call(FuncCallContext *funcctx) { + TypeStorage *st; + + st=(TypeStorage*)funcctx->user_fctx; + if ( st->list && st->list[st->cur].lexid ) { + Datum result; + char* values[3]; + char txtid[16]; + HeapTuple tuple; + + values[0]=txtid; + sprintf(txtid,"%d",st->list[st->cur].lexid); + values[1]=st->list[st->cur].alias; + values[2]=st->list[st->cur].descr; + + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = TupleGetDatum(funcctx->slot, tuple); + + pfree(values[1]); + pfree(values[2]); + st->cur++; + return result; + } else { + if ( st->list ) pfree(st->list); + pfree(st); + } + return (Datum)0; +} + +PG_FUNCTION_INFO_V1(token_type); +Datum token_type(PG_FUNCTION_ARGS); + +Datum +token_type(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + setup_firstcall(funcctx, PG_GETARG_OID(0) ); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +PG_FUNCTION_INFO_V1(token_type_byname); +Datum token_type_byname(PG_FUNCTION_ARGS); +Datum +token_type_byname(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + text *name = PG_GETARG_TEXT_P(0); + funcctx = SRF_FIRSTCALL_INIT(); + setup_firstcall(funcctx, name2id_prs( name ) ); + PG_FREE_IF_COPY(name,0); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +PG_FUNCTION_INFO_V1(token_type_current); +Datum token_type_current(PG_FUNCTION_ARGS); +Datum +token_type_current(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + funcctx = SRF_FIRSTCALL_INIT(); + if ( current_parser_id==InvalidOid ) + current_parser_id = name2id_prs( char2text("default") ); + setup_firstcall(funcctx, current_parser_id ); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + + +PG_FUNCTION_INFO_V1(set_curprs); +Datum set_curprs(PG_FUNCTION_ARGS); +Datum +set_curprs(PG_FUNCTION_ARGS) { + findprs(PG_GETARG_OID(0)); + current_parser_id=PG_GETARG_OID(0); + PG_RETURN_VOID(); +} + +PG_FUNCTION_INFO_V1(set_curprs_byname); +Datum set_curprs_byname(PG_FUNCTION_ARGS); +Datum +set_curprs_byname(PG_FUNCTION_ARGS) { + text *name=PG_GETARG_TEXT_P(0); + + DirectFunctionCall1( + set_curprs, + ObjectIdGetDatum( name2id_prs(name) ) + ); + PG_FREE_IF_COPY(name, 0); + PG_RETURN_VOID(); +} + +typedef struct { + int type; + char *lexem; +} LexemEntry; + +typedef struct { + int cur; + int len; + LexemEntry *list; +} PrsStorage; + + +static void +prs_setup_firstcall(FuncCallContext *funcctx, int prsid, text *txt) { + TupleDesc tupdesc; + MemoryContext oldcontext; + PrsStorage *st; + WParserInfo *prs = findprs(prsid); + char *lex=NULL; + int llen=0, type=0; + + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + st=(PrsStorage*)palloc( sizeof(PrsStorage) ); + st->cur=0; + st->len=16; + st->list=(LexemEntry*)palloc( sizeof(LexemEntry)*st->len ); + + prs->prs = (void*)DatumGetPointer( + FunctionCall2( + &(prs->start_info), + PointerGetDatum(VARDATA(txt)), + Int32GetDatum(VARSIZE(txt)-VARHDRSZ) + ) + ); + + while( ( type=DatumGetInt32(FunctionCall3( + &(prs->getlexeme_info), + PointerGetDatum(prs->prs), + PointerGetDatum(&lex), + PointerGetDatum(&llen))) ) != 0 ) { + + if ( st->cur>=st->len ) { + st->len=2*st->len; + st->list=(LexemEntry*)repalloc(st->list, sizeof(LexemEntry)*st->len); + } + st->list[st->cur].lexem = palloc(llen+1); + memcpy( st->list[st->cur].lexem, lex, llen); + st->list[st->cur].lexem[llen]='\0'; + st->list[st->cur].type=type; + st->cur++; + } + + FunctionCall1( + &(prs->end_info), + PointerGetDatum(prs->prs) + ); + + st->len=st->cur; + st->cur=0; + + funcctx->user_fctx = (void*)st; + tupdesc = RelationNameGetTupleDesc("tokenout"); + funcctx->slot = TupleDescGetSlot(tupdesc); + funcctx->attinmeta = TupleDescGetAttInMetadata(tupdesc); + MemoryContextSwitchTo(oldcontext); +} + +static Datum +prs_process_call(FuncCallContext *funcctx) { + PrsStorage *st; + + st=(PrsStorage*)funcctx->user_fctx; + if ( st->cur < st->len ) { + Datum result; + char* values[2]; + char tid[16]; + HeapTuple tuple; + + values[0]=tid; + sprintf(tid,"%d",st->list[st->cur].type); + values[1]=st->list[st->cur].lexem; + tuple = BuildTupleFromCStrings(funcctx->attinmeta, values); + result = TupleGetDatum(funcctx->slot, tuple); + + pfree(values[1]); + st->cur++; + return result; + } else { + if ( st->list ) pfree(st->list); + pfree(st); + } + return (Datum)0; +} + + + +PG_FUNCTION_INFO_V1(parse); +Datum parse(PG_FUNCTION_ARGS); +Datum +parse(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + text *txt = PG_GETARG_TEXT_P(1); + funcctx = SRF_FIRSTCALL_INIT(); + prs_setup_firstcall(funcctx, PG_GETARG_OID(0),txt ); + PG_FREE_IF_COPY(txt,1); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=prs_process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +PG_FUNCTION_INFO_V1(parse_byname); +Datum parse_byname(PG_FUNCTION_ARGS); +Datum +parse_byname(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + text *name = PG_GETARG_TEXT_P(0); + text *txt = PG_GETARG_TEXT_P(1); + funcctx = SRF_FIRSTCALL_INIT(); + prs_setup_firstcall(funcctx, name2id_prs( name ),txt ); + PG_FREE_IF_COPY(name,0); + PG_FREE_IF_COPY(txt,1); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=prs_process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + + +PG_FUNCTION_INFO_V1(parse_current); +Datum parse_current(PG_FUNCTION_ARGS); +Datum +parse_current(PG_FUNCTION_ARGS) { + FuncCallContext *funcctx; + Datum result; + + if (SRF_IS_FIRSTCALL()) { + text *txt = PG_GETARG_TEXT_P(0); + funcctx = SRF_FIRSTCALL_INIT(); + if ( current_parser_id==InvalidOid ) + current_parser_id = name2id_prs( char2text("default") ); + prs_setup_firstcall(funcctx, current_parser_id,txt ); + PG_FREE_IF_COPY(txt,0); + } + + funcctx = SRF_PERCALL_SETUP(); + + if ( (result=prs_process_call(funcctx)) != (Datum)0 ) + SRF_RETURN_NEXT(funcctx, result); + SRF_RETURN_DONE(funcctx); +} + +PG_FUNCTION_INFO_V1(headline); +Datum headline(PG_FUNCTION_ARGS); +Datum +headline(PG_FUNCTION_ARGS) { + TSCfgInfo *cfg=findcfg(PG_GETARG_OID(0)); + text *in = PG_GETARG_TEXT_P(1); + QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(2))); + text *opt=( PG_NARGS()>3 && PG_GETARG_POINTER(3) ) ? PG_GETARG_TEXT_P(3) : NULL; + HLPRSTEXT prs; + text *out; + WParserInfo *prsobj = findprs(cfg->prs_id); + + memset(&prs,0,sizeof(HLPRSTEXT)); + prs.lenwords = 32; + prs.words = (HLWORD *) palloc(sizeof(HLWORD) * prs.lenwords); + hlparsetext(cfg, &prs, query, VARDATA(in), VARSIZE(in) - VARHDRSZ); + + + FunctionCall3( + &(prsobj->headline_info), + PointerGetDatum(&prs), + PointerGetDatum(opt), + PointerGetDatum(query) + ); + + out = genhl(&prs); + + PG_FREE_IF_COPY(in,1); + PG_FREE_IF_COPY(query,2); + if ( opt ) PG_FREE_IF_COPY(opt,3); + pfree(prs.words); + pfree(prs.startsel); + pfree(prs.stopsel); + + PG_RETURN_POINTER(out); +} + + +PG_FUNCTION_INFO_V1(headline_byname); +Datum headline_byname(PG_FUNCTION_ARGS); +Datum +headline_byname(PG_FUNCTION_ARGS) { + text *cfg=PG_GETARG_TEXT_P(0); + + Datum out=DirectFunctionCall4( + headline, + ObjectIdGetDatum(name2id_cfg( cfg ) ), + PG_GETARG_DATUM(1), + PG_GETARG_DATUM(2), + ( PG_NARGS()>3 ) ? PG_GETARG_DATUM(3) : PointerGetDatum(NULL) + ); + + PG_FREE_IF_COPY(cfg,0); + PG_RETURN_DATUM(out); +} + +PG_FUNCTION_INFO_V1(headline_current); +Datum headline_current(PG_FUNCTION_ARGS); +Datum +headline_current(PG_FUNCTION_ARGS) { + PG_RETURN_DATUM(DirectFunctionCall4( + headline, + ObjectIdGetDatum(get_currcfg()), + PG_GETARG_DATUM(0), + PG_GETARG_DATUM(1), + ( PG_NARGS()>2 ) ? PG_GETARG_DATUM(2) : PointerGetDatum(NULL) + )); +} + + + diff --git a/contrib/tsearch2/wparser.h b/contrib/tsearch2/wparser.h new file mode 100644 index 0000000000..a8afc564a1 --- /dev/null +++ b/contrib/tsearch2/wparser.h @@ -0,0 +1,28 @@ +#ifndef __WPARSER_H__ +#define __WPARSER_H__ +#include "postgres.h" +#include "fmgr.h" + +typedef struct { + Oid prs_id; + FmgrInfo start_info; + FmgrInfo getlexeme_info; + FmgrInfo end_info; + FmgrInfo headline_info; + Oid lextype; + void *prs; +} WParserInfo; + +void init_prs(Oid id, WParserInfo *prs); +WParserInfo* findprs(Oid id); +Oid name2id_prs(text *name); +void reset_prs(void); + + +typedef struct { + int lexid; + char *alias; + char *descr; +} LexDescr; + +#endif diff --git a/contrib/tsearch2/wparser_def.c b/contrib/tsearch2/wparser_def.c new file mode 100644 index 0000000000..eec8b03ab8 --- /dev/null +++ b/contrib/tsearch2/wparser_def.c @@ -0,0 +1,291 @@ +/* + * default word parser + * Teodor Sigaev + */ +#include +#include +#include + +#include "postgres.h" +#include "utils/builtins.h" + +#include "dict.h" +#include "wparser.h" +#include "common.h" +#include "ts_cfg.h" +#include "wordparser/parser.h" +#include "wordparser/deflex.h" + +PG_FUNCTION_INFO_V1(prsd_lextype); +Datum prsd_lextype(PG_FUNCTION_ARGS); + +Datum +prsd_lextype(PG_FUNCTION_ARGS) { + LexDescr *descr=(LexDescr*)palloc(sizeof(LexDescr)*(LASTNUM+1)); + int i; + + for(i=1;i<=LASTNUM;i++) { + descr[i-1].lexid = i; + descr[i-1].alias = pstrdup(tok_alias[i]); + descr[i-1].descr = pstrdup(lex_descr[i]); + } + + descr[LASTNUM].lexid=0; + + PG_RETURN_POINTER(descr); +} + +PG_FUNCTION_INFO_V1(prsd_start); +Datum prsd_start(PG_FUNCTION_ARGS); +Datum +prsd_start(PG_FUNCTION_ARGS) { + start_parse_str( (char*)PG_GETARG_POINTER(0), PG_GETARG_INT32(1) ); + PG_RETURN_POINTER(NULL); +} + +PG_FUNCTION_INFO_V1(prsd_getlexeme); +Datum prsd_getlexeme(PG_FUNCTION_ARGS); +Datum +prsd_getlexeme(PG_FUNCTION_ARGS) { + /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */ + char **t=(char**)PG_GETARG_POINTER(1); + int *tlen=(int*)PG_GETARG_POINTER(2); + int type=tsearch2_yylex(); + + *t = token; + *tlen = tokenlen; + PG_RETURN_INT32(type); +} + +PG_FUNCTION_INFO_V1(prsd_end); +Datum prsd_end(PG_FUNCTION_ARGS); +Datum +prsd_end(PG_FUNCTION_ARGS) { + /* ParserState *p=(ParserState*)PG_GETARG_POINTER(0); */ + end_parse(); + PG_RETURN_VOID(); +} + +#define LEAVETOKEN(x) ( (x)==12 ) +#define COMPLEXTOKEN(x) ( (x)==5 || (x)==15 || (x)==16 || (x)==17 ) +#define ENDPUNCTOKEN(x) ( (x)==12 ) + + +#define IDIGNORE(x) ( (x)==13 || (x)==14 || (x)==12 || (x)==23 ) +#define HLIDIGNORE(x) ( (x)==5 || (x)==13 || (x)==15 || (x)==16 || (x)==17 ) +#define NONWORDTOKEN(x) ( (x)==12 || HLIDIGNORE(x) ) +#define NOENDTOKEN(x) ( NONWORDTOKEN(x) || (x)==7 || (x)==8 || (x)==20 || (x)==21 || (x)==22 || IDIGNORE(x) ) + +typedef struct { + HLWORD *words; + int len; +} hlCheck; + +static bool +checkcondition_HL(void *checkval, ITEM *val) { + int i; + for(i=0;i<((hlCheck*)checkval)->len;i++) { + if ( ((hlCheck*)checkval)->words[i].item==val ) + return true; + } + return false; +} + + +static bool +hlCover(HLPRSTEXT *prs, QUERYTYPE *query, int *p, int *q) { + int i,j; + ITEM *item=GETQUERY(query); + int pos=*p; + *q=0; + *p=0x7fffffff; + + for(j=0;jsize;j++) { + if ( item->type != VAL ) { + item++; + continue; + } + for(i=pos;icurwords;i++) { + if ( prs->words[i].item == item ) { + if ( i>*q) + *q = i; + break; + } + } + item++; + } + + if ( *q==0 ) + return false; + + item=GETQUERY(query); + for(j=0;jsize;j++) { + if ( item->type != VAL ) { + item++; + continue; + } + for(i=*q;i>=pos;i--) { + if ( prs->words[i].item == item ) { + if ( i<*p ) + *p=i; + break; + } + } + item++; + } + + if ( *p<=*q ) { + hlCheck ch={ &(prs->words[*p]), *q-*p+1 }; + if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_HL) ) { + return true; + } else { + (*p)++; + return hlCover(prs,query,p,q); + } + } + + return false; +} + +PG_FUNCTION_INFO_V1(prsd_headline); +Datum prsd_headline(PG_FUNCTION_ARGS); +Datum +prsd_headline(PG_FUNCTION_ARGS) { + HLPRSTEXT *prs=(HLPRSTEXT*)PG_GETARG_POINTER(0); + text *opt=(text*)PG_GETARG_POINTER(1); /* can't be toasted */ + QUERYTYPE *query=(QUERYTYPE*)PG_GETARG_POINTER(2); /* can't be toasted */ + /* from opt + start and and tag */ + int min_words=15; + int max_words=35; + int shortword=3; + + int p=0,q=0; + int bestb=-1,beste=-1; + int bestlen=-1; + int pose=0, poslen, curlen; + + int i; + + /*config*/ + prs->startsel=NULL; + prs->stopsel=NULL; + if ( opt ) { + Map *map,*mptr; + + parse_cfgdict(opt,&map); + mptr=map; + + while(mptr && mptr->key) { + if ( strcasecmp(mptr->key,"MaxWords")==0 ) + max_words=pg_atoi(mptr->value,4,1); + else if ( strcasecmp(mptr->key,"MinWords")==0 ) + min_words=pg_atoi(mptr->value,4,1); + else if ( strcasecmp(mptr->key,"ShortWord")==0 ) + shortword=pg_atoi(mptr->value,4,1); + else if ( strcasecmp(mptr->key,"StartSel")==0 ) + prs->startsel=pstrdup(mptr->value); + else if ( strcasecmp(mptr->key,"StopSel")==0 ) + prs->stopsel=pstrdup(mptr->value); + + pfree(mptr->key); + pfree(mptr->value); + + mptr++; + } + pfree(map); + + if ( min_words >= max_words ) + elog(ERROR,"Must be MinWords < MaxWords"); + if ( min_words<=0 ) + elog(ERROR,"Must be MinWords > 0"); + if ( shortword<0 ) + elog(ERROR,"Must be ShortWord >= 0"); + } + + while( hlCover(prs,query,&p,&q) ) { + /* find cover len in words */ + curlen=0; + poslen=0; + for(i=p;i<=q && curlen < max_words ; i++) { + if ( !NONWORDTOKEN(prs->words[i].type) ) + curlen++; + if ( prs->words[i].item && !prs->words[i].repeated ) + poslen++; + pose=i; + } + + if ( poslenwords[beste].type) || prs->words[beste].len <= shortword) ) { + /* best already finded, so try one more cover */ + p++; + continue; + } + + if ( curlen < max_words ) { /* find good end */ + for(i=i-1 ;icurwords && curlenwords[i].type) ) + curlen++; + if ( prs->words[i].item && !prs->words[i].repeated ) + poslen++; + } + pose=i; + if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) + continue; + if ( curlen>=min_words ) + break; + } + } else { /* shorter cover :((( */ + for(;curlen>min_words;i--) { + if ( !NONWORDTOKEN(prs->words[i].type) ) + curlen--; + if ( prs->words[i].item && !prs->words[i].repeated ) + poslen--; + pose=i; + if ( NOENDTOKEN(prs->words[i].type) || prs->words[i].len <= shortword ) + continue; + break; + } + } + + if ( bestlen <0 || (poslen>bestlen && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword)) || + ( bestlen>=0 && !(NOENDTOKEN(prs->words[pose].type) || prs->words[pose].len <= shortword) && + (NOENDTOKEN(prs->words[beste].type) || prs->words[beste].len <= shortword) ) ) { + bestb=p; beste=pose; + bestlen=poslen; + } + + p++; + } + + if ( bestlen<0 ) { + curlen=0; + poslen=0; + for(i=0;icurwords && curlenwords[i].type) ) + curlen++; + pose=i; + } + bestb=0; beste=pose; + } + + for(i=bestb;i<=beste;i++) { + if ( prs->words[i].item ) + prs->words[i].selected=1; + if ( prs->words[i].repeated ) + prs->words[i].skip=1; + if ( HLIDIGNORE(prs->words[i].type) ) + prs->words[i].replace=1; + + prs->words[i].in=1; + } + + if (!prs->startsel) + prs->startsel=pstrdup(""); + if (!prs->stopsel) + prs->stopsel=pstrdup(""); + prs->startsellen=strlen(prs->startsel); + prs->stopsellen=strlen(prs->stopsel); + + PG_RETURN_POINTER(prs); +} + -- 2.11.0