OSDN Git Service

Fix URL generation in headline. Only tag lexeme will be replaced by space.
authorTeodor Sigaev <teodor@sigaev.ru>
Thu, 15 Jan 2009 16:33:59 +0000 (16:33 +0000)
committerTeodor Sigaev <teodor@sigaev.ru>
Thu, 15 Jan 2009 16:33:59 +0000 (16:33 +0000)
Per http://archives.postgresql.org/pgsql-bugs/2008-12/msg00013.php

src/backend/tsearch/ts_parse.c
src/backend/tsearch/wparser_def.c
src/include/tsearch/ts_public.h

index f5239e8..21b7233 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.10 2009/01/01 17:23:48 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/ts_parse.c,v 1.11 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -625,7 +625,7 @@ generateHeadline(HeadlineParsedText *prs)
                                *ptr = ' ';
                                ptr++;
                        }
-                       else
+                       else if (!wrd->skip)
                        {
                                if (wrd->selected)
                                {
index 1943c11..a414354 100644 (file)
@@ -7,7 +7,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.19 2009/01/15 16:33:28 teodor Exp $
+ *       $PostgreSQL: pgsql/src/backend/tsearch/wparser_def.c,v 1.20 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1587,10 +1587,11 @@ prsd_end(PG_FUNCTION_ARGS)
 #define COMPLEXTOKEN(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
 #define ENDPUNCTOKEN(x) ( (x)==SPACE )
 
-#define TS_IDIGNORE(x) ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
-#define HLIDIGNORE(x) ( (x)==URL_T || (x)==TAG_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define XMLHLIDIGNORE(x) ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
-#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDIGNORE(x) )
+#define TS_IDIGNORE(x)  ( (x)==TAG_T || (x)==PROTOCOL || (x)==SPACE || (x)==XMLENTITY )
+#define HLIDREPLACE(x)  ( (x)==TAG_T )
+#define HLIDSKIP(x)     ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define XMLHLIDSKIP(x)  ( (x)==URL_T || (x)==NUMHWORD || (x)==ASCIIHWORD || (x)==HWORD )
+#define NONWORDTOKEN(x) ( (x)==SPACE || HLIDREPLACE(x) || HLIDSKIP(x) )
 #define NOENDTOKEN(x)  ( NONWORDTOKEN(x) || (x)==SCIENTIFIC || (x)==VERSIONNUMBER || (x)==DECIMAL || (x)==SIGNEDINT || (x)==UNSIGNEDINT || TS_IDIGNORE(x) )
 
 typedef struct
@@ -1695,13 +1696,15 @@ mark_fragment(HeadlineParsedText *prs, int highlight, int startpos, int endpos)
                        prs->words[i].selected = 1;
                if (highlight == 0)
                {
-                       if (HLIDIGNORE(prs->words[i].type))
+                       if (HLIDREPLACE(prs->words[i].type))
                                prs->words[i].replace = 1;
+                       else if ( HLIDSKIP(prs->words[i].type) )
+                               prs->words[i].skip = 1;
                }
                else
                {
-                       if (XMLHLIDIGNORE(prs->words[i].type))
-                               prs->words[i].replace = 1;
+                       if (XMLHLIDSKIP(prs->words[i].type))
+                               prs->words[i].skip = 1;
                }
 
                prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
@@ -2050,13 +2053,15 @@ mark_hl_words(HeadlineParsedText *prs, TSQuery query, int highlight,
                        prs->words[i].selected = 1;
                if (highlight == 0)
                {
-                       if (HLIDIGNORE(prs->words[i].type))
+                       if (HLIDREPLACE(prs->words[i].type))
                                prs->words[i].replace = 1;
+                       else if ( HLIDSKIP(prs->words[i].type) )
+                               prs->words[i].skip = 1;
                }
                else
                {
-                       if (XMLHLIDIGNORE(prs->words[i].type))
-                               prs->words[i].replace = 1;
+                       if (XMLHLIDSKIP(prs->words[i].type))
+                               prs->words[i].skip = 1;
                }
 
                prs->words[i].in = (prs->words[i].repeated) ? 0 : 1;
index 1ae9a67..0bcc2be 100644 (file)
@@ -6,7 +6,7 @@
  *
  * Copyright (c) 1998-2009, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.12 2009/01/01 17:24:01 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.13 2009/01/15 16:33:59 teodor Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,7 +38,8 @@ typedef struct
                                in:1,
                                replace:1,
                                repeated:1,
-                               unused:4,
+                               skip:1,
+                               unused:3,
                                type:8,
                                len:16;
        char       *word;