From 685eed2d625353293c7875a5e94575fb5284c85f Mon Sep 17 00:00:00 2001 From: MasaoFujii Date: Thu, 22 Aug 2013 17:51:52 +0900 Subject: [PATCH] Get rid of USE_WIDE_UPPER_LOWER dependency in bigram construction. This is due to the commit in pg_trgm on Apr 7, 2013 by Tom Lane 7844608e54a3a2e3dee461b00fd6ef028a845d7c. Beena Emerson --- bigm_op.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/bigm_op.c b/bigm_op.c index aeedc3d..4b4207a 100644 --- a/bigm_op.c +++ b/bigm_op.c @@ -154,13 +154,16 @@ find_word(char *str, int lenstr, char **endword, int *charlen) return beginword; } -#ifdef USE_WIDE_UPPER_LOWER +/* + * The function is named compact_bigram to maintain consistency with pg_trgm, + * though it does not reduce multibyte characters to hash values like in + * compact_trigram. + */ static void -cnt_bigram(bigm *bptr, char *str, int bytelen) +compact_bigram(bigm *bptr, char *str, int bytelen) { CPBIGM(bptr, str, bytelen); } -#endif /* * Adds bigrams from words (already padded). @@ -172,25 +175,21 @@ make_bigrams(bigm *bptr, char *str, int bytelen, int charlen) if (charlen < 2) { -#ifdef USE_WIDE_UPPER_LOWER - cnt_bigram(bptr, ptr, pg_mblen(str)); -#else - CPBIGM(bptr, ptr, 1); -#endif + compact_bigram(bptr, ptr, pg_mblen(str)); bptr->pmatch = true; bptr++; return bptr; } -#ifdef USE_WIDE_UPPER_LOWER - if (pg_database_encoding_max_length() > 1) + if (bytelen > charlen) { + /* Find multibyte character boundaries and call compact_bigram */ int lenfirst = pg_mblen(str), lenlast = pg_mblen(str + lenfirst); while ((ptr - str) + lenfirst + lenlast <= bytelen) { - cnt_bigram(bptr, ptr, lenfirst + lenlast); + compact_bigram(bptr, ptr, lenfirst + lenlast); ptr += lenfirst; bptr++; @@ -200,8 +199,8 @@ make_bigrams(bigm *bptr, char *str, int bytelen, int charlen) } } else -#endif { + /* Fast path when there are no multibyte characters */ Assert(bytelen == charlen); while (ptr - str < bytelen - 1 /* number of bigrams = strlen - 1 */ ) -- 2.11.0