Properly indent SGML in textsearch.sgml.

author Bruce Momjian <bruce@momjian.us>

Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)

committer Bruce Momjian <bruce@momjian.us>

Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)
author Bruce Momjian <bruce@momjian.us>
Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)
committer Bruce Momjian <bruce@momjian.us>
Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml

index ee2812a..afa4415 100644 (file)
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -1,148 +1,173 @@
  <chapter id="textsearch">
-<title>Full Text Search</title>
-
-
-<sect1 id="textsearch-intro">
-<title>Introduction</title>
-
-<para>
-Full Text Searching (or just <firstterm>text search</firstterm>) allows
-identifying documents that satisfy a <firstterm>query</firstterm>, and
-optionally sorting them by relevance to the query. The most common search
-is to find all documents containing given <firstterm>query terms</firstterm>
-and return them in order of their <firstterm>similarity</firstterm> to the
-<varname>query</varname>.  Notions of <varname>query</varname> and
-<varname>similarity</varname> are very flexible and depend on the specific
-application. The simplest search considers <varname>query</varname> as a
-set of words and <varname>similarity</varname> as the frequency of query
-words in the document.  Full text indexing can be done inside the
-database or outside.  Doing indexing inside the database allows easy access
-to document metadata to assist in indexing and display.
-</para>
-
-<para>
-Textual search operators have existed in databases for years.
-<productname>PostgreSQL</productname> has
-<literal>~</literal>,<literal>~*</literal>, <literal>LIKE</literal>,
-<literal>ILIKE</literal> operators for textual datatypes, but they lack
-many essential properties required by modern information systems:
-
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem>
-<para>
-There is no linguistic support, even for English.  Regular expressions are
-not sufficient because they cannot easily handle derived words,
-e.g., <literal>satisfies</literal> and <literal>satisfy</literal>. You might
-miss documents which contain <literal>satisfies</literal>, although you
-probably would like to find them when searching for
-<literal>satisfy</literal>. It is possible to use <literal>OR</literal>
-to search <emphasis>any</emphasis> of them, but it is tedious and error-prone
-(some words can have several thousand derivatives).
-</para>
-</listitem>
-<listitem><para>
-They provide no ordering (ranking) of search results, which makes them
-ineffective when thousands of matching documents are found.
-</para></listitem>
-<listitem>
-<para>
-They tend to be slow because they process all documents for every search and
-there is no index support.
-</para></listitem>
-</itemizedlist>
-
-</para>
-
-<para>
-Full text indexing allows documents to be <emphasis>preprocessed</emphasis>
-and an index saved for later rapid searching. Preprocessing includes:
-
-<itemizedlist  mark="none">
-<listitem><para>
-<emphasis>Parsing documents into <firstterm>lexemes</></emphasis>. It is
-useful to identify various lexemes, e.g. digits, words, complex words,
-email addresses, so they can be processed differently.  In principle
-lexemes depend on the specific application but for an ordinary search it
-is useful to have a predefined list of lexemes.  <!-- add list of lexemes.
--->
-</para></listitem>
-
-<listitem><para>
-<emphasis>Dictionaries</emphasis> allow the conversion of lexemes into
-a <emphasis>normalized form</emphasis> so it is not necessary to enter
-search words in a specific form.
-</para></listitem>
-
-<listitem><para>
-<emphasis>Store</emphasis> preprocessed documents
-optimized for searching.  For example, represent each document as a sorted array
-of lexemes. Along with lexemes it is desirable to store positional
-information to use for <varname>proximity ranking</varname>, so that a
-document which contains a more "dense" region of query words is assigned
-a higher rank than one with scattered query words.
-</para></listitem>
-</itemizedlist>
-</para>
-
-<para>
-Dictionaries allow fine-grained control over how lexemes are created.  With
-dictionaries you can:
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-Define "stop words" that should not be indexed.
-</para>
-</listitem>
-<listitem><para>
-Map synonyms to a single word using <application>ispell</>.
-</para></listitem>
-<listitem><para>
-Map phrases to a single word using a thesaurus.
-</para></listitem>
-<listitem><para>
-Map different variations of a word to a canonical form using
-an <application>ispell</> dictionary.
-</para></listitem>
-<listitem><para>
-Map different variations of a word to a canonical form using
-<application>snowball</> stemmer rules.
-</para></listitem>
-</itemizedlist>
-
-</para>
-
-<para>
-A data type (<xref linkend="textsearch-datatypes">), <type>tsvector</type>
-is provided, for storing preprocessed documents,
-along with a type <type>tsquery</type> for representing textual
-queries.  Also, a full text search operator <literal>@@</literal> is defined
-for these data types (<xref linkend="textsearch-searches">).  Full text
-searches can be accelerated using indexes (<xref
-linkend="textsearch-indexes">).
-</para>
-
-
-<sect2 id="textsearch-document">
-<title>What Is a <firstterm>Document</firstterm>?</title>
-
-<indexterm zone="textsearch-document">
-<primary>document</primary>
-</indexterm>
-
-<para>
-A document can be a simple text file stored in the file system.  The full
-text indexing engine can parse text files and store associations of lexemes
-(words) with their parent document. Later, these associations are used to
-search for documents which contain query words.  In this case, the database
-can be used to store the full text index and for executing searches, and
-some unique identifier can be used to retrieve the document from the file
-system.
-</para>
-
-<para>
-A document can also be any textual database attribute or a combination
-(concatenation), which in turn can be stored in various tables or obtained
-dynamically. In other words, a document can be constructed from different
-parts for indexing and it might not exist as a whole. For example:
+
+ <title>Full Text Search</title>
+
+
+ <sect1 id="textsearch-intro">
+ <title>Introduction</title>
+
+  <para>
+   Full Text Searching (or just <firstterm>text search</firstterm>) allows
+   identifying documents that satisfy a <firstterm>query</firstterm>, and
+   optionally sorting them by relevance to the query. The most common search
+   is to find all documents containing given <firstterm>query terms</firstterm>
+   and return them in order of their <firstterm>similarity</firstterm> to the
+   <varname>query</varname>.  Notions of <varname>query</varname> and
+   <varname>similarity</varname> are very flexible and depend on the specific
+   application. The simplest search considers <varname>query</varname> as a
+   set of words and <varname>similarity</varname> as the frequency of query
+   words in the document.  Full text indexing can be done inside the
+   database or outside.  Doing indexing inside the database allows easy access
+   to document metadata to assist in indexing and display.
+  </para>
+
+  <para>
+   Textual search operators have existed in databases for years.
+   <productname>PostgreSQL</productname> has
+   <literal>~</literal>,<literal>~*</literal>, <literal>LIKE</literal>,
+   <literal>ILIKE</literal> operators for textual datatypes, but they lack
+   many essential properties required by modern information systems:
+  </para>
+
+  <itemizedlist  spacing="compact" mark="bullet">
+   <listitem>
+    <para>
+     There is no linguistic support, even for English.  Regular expressions are
+     not sufficient because they cannot easily handle derived words,
+     e.g., <literal>satisfies</literal> and <literal>satisfy</literal>. You might
+     miss documents which contain <literal>satisfies</literal>, although you
+     probably would like to find them when searching for
+     <literal>satisfy</literal>. It is possible to use <literal>OR</literal>
+     to search <emphasis>any</emphasis> of them, but it is tedious and error-prone
+     (some words can have several thousand derivatives).
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     They provide no ordering (ranking) of search results, which makes them
+     ineffective when thousands of matching documents are found.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     They tend to be slow because they process all documents for every search and
+     there is no index support.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   Full text indexing allows documents to be <emphasis>preprocessed</emphasis>
+   and an index saved for later rapid searching. Preprocessing includes:
+  </para>
+
+  <itemizedlist  mark="none">
+   <listitem>
+    <para>
+     <emphasis>Parsing documents into <firstterm>lexemes</></emphasis>. It is
+     useful to identify various lexemes, e.g. digits, words, complex words,
+     email addresses, so they can be processed differently.  In principle
+     lexemes depend on the specific application but for an ordinary search it
+     is useful to have a predefined list of lexemes.  <!-- add list of lexemes.
+     -->
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     <emphasis>Dictionaries</emphasis> allow the conversion of lexemes into
+     a <emphasis>normalized form</emphasis> so it is not necessary to enter
+     search words in a specific form.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     <emphasis>Store</emphasis> preprocessed documents
+     optimized for searching.  For example, represent each document as a sorted array
+     of lexemes. Along with lexemes it is desirable to store positional
+     information to use for <varname>proximity ranking</varname>, so that a
+     document which contains a more "dense" region of query words is assigned
+     a higher rank than one with scattered query words.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+    Dictionaries allow fine-grained control over how lexemes are created.  With
+    dictionaries you can:
+  </para>
+
+  <itemizedlist  spacing="compact" mark="bullet">
+   <listitem>
+    <para>
+     Define "stop words" that should not be indexed.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Map synonyms to a single word using <application>ispell</>.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Map phrases to a single word using a thesaurus.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Map different variations of a word to a canonical form using
+     an <application>ispell</> dictionary.
+    </para>
+   </listitem>
+
+   <listitem>
+    <para>
+     Map different variations of a word to a canonical form using
+     <application>snowball</> stemmer rules.
+    </para>
+   </listitem>
+  </itemizedlist>
+
+  <para>
+   A data type (<xref linkend="textsearch-datatypes">), <type>tsvector</type>
+   is provided, for storing preprocessed documents,
+   along with a type <type>tsquery</type> for representing textual
+   queries.  Also, a full text search operator <literal>@@</literal> is defined
+   for these data types (<xref linkend="textsearch-searches">).  Full text
+   searches can be accelerated using indexes (<xref
+   linkend="textsearch-indexes">).
+  </para>
+
+
+  <sect2 id="textsearch-document">
+  <title>What Is a <firstterm>Document</firstterm>?</title>
+
+  <indexterm zone="textsearch-document">
+  <primary>document</primary>
+  </indexterm>
+
+   <para>
+    A document can be a simple text file stored in the file system.  The full
+    text indexing engine can parse text files and store associations of lexemes
+    (words) with their parent document. Later, these associations are used to
+    search for documents which contain query words.  In this case, the database
+    can be used to store the full text index and for executing searches, and
+    some unique identifier can be used to retrieve the document from the file
+    system.
+   </para>
+
+   <para>
+    A document can also be any textual database attribute or a combination
+    (concatenation), which in turn can be stored in various tables or obtained
+    dynamically. In other words, a document can be constructed from different
+    parts for indexing and it might not exist as a whole. For example:
+
  <programlisting>
  SELECT title || ' ' ||  author || ' ' ||  abstract || ' ' || body AS document
  FROM messages
@@ -152,39 +177,38 @@ SELECT m.title || ' ' || m.author || ' ' || m.abstract || ' ' || d.body AS docum
  FROM messages m, docs d
  WHERE mid = did AND mid = 12;
  </programlisting>
-</para>
-
-<note>
-<para>
-Actually, in the previous example queries, <literal>COALESCE</literal>
-<!-- TODO make this a link? -->
-should be used to prevent a <literal>NULL</literal> attribute from causing
-a <literal>NULL</literal> result.
-</para>
-</note>
-</sect2>
+   </para>
  
-<sect2 id="textsearch-datatypes">
-<title>Data Types</title>
+   <note>
+    <para>
+     Actually, in the previous example queries, <literal>COALESCE</literal>
+     <!-- TODO make this a link? -->
+     should be used to prevent a <literal>NULL</literal> attribute from causing
+     a <literal>NULL</literal> result.
+    </para>
+   </note>
+  </sect2>
  
-<variablelist>
+  <sect2 id="textsearch-datatypes">
+  <title>Data Types</title>
  
+   <variablelist>
  
-<indexterm zone="textsearch-datatypes">
-<primary>tsvector</primary>
-</indexterm>
+   <indexterm zone="textsearch-datatypes">
+   <primary>tsvector</primary>
+   </indexterm>
  
+    <varlistentry>
+    <term><firstterm>tsvector</firstterm></term>
+     <listitem>
  
-<varlistentry>
-<term><firstterm>tsvector</firstterm></term>
-<listitem>
+      <para>
+       <type>tsvector</type> is a data type that represents a document and is
+       optimized for full text searching. In the simplest case,
+       <type>tsvector</type> is a sorted list of lexemes, so even without indexes
+       full text searches perform better than standard <literal>~</literal> and
+       <literal>LIKE</literal> operations:
  
-<para>
-<type>tsvector</type> is a data type that represents a document and is
-optimized for full text searching. In the simplest case,
-<type>tsvector</type> is a sorted list of lexemes, so even without indexes
-full text searches perform better than standard <literal>~</literal> and
-<literal>LIKE</literal> operations:
  <programlisting>
  SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
                        tsvector
@@ -192,7 +216,7 @@ SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector;
   'a' 'on' 'and' 'ate' 'cat' 'fat' 'mat' 'rat' 'sat'
  </programlisting>
  
-Notice, that <literal>space</literal> is also a lexeme:
+       Notice, that <literal>space</literal> is also a lexeme:
  
  <programlisting>
  SELECT 'space ''    '' is a lexeme'::tsvector;
@@ -201,59 +225,62 @@ SELECT 'space ''    '' is a lexeme'::tsvector;
   'a' 'is' '    ' 'space' 'lexeme'
  </programlisting>
  
-Each lexeme, optionally, can have positional information which is used for
-<varname>proximity ranking</varname>:
+       Each lexeme, optionally, can have positional information which is used for
+       <varname>proximity ranking</varname>:
+
  <programlisting>
  SELECT 'a:1 fat:2 cat:3 sat:4 on:5 a:6 mat:7 and:8 ate:9 a:10 fat:11 rat:12'::tsvector;
-                                   tsvector
+                                  tsvector
  -------------------------------------------------------------------------------
   'a':1,6,10 'on':5 'and':8 'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
  </programlisting>
  
-Each lexeme position also can be labeled as <literal>A</literal>,
-<literal>B</literal>, <literal>C</literal>, <literal>D</literal>,
-where <literal>D</literal> is the default. These labels can be used to group
-lexemes into different <emphasis>importance</emphasis> or
-<emphasis>rankings</emphasis>, for example to reflect document structure.
-Actual values can be assigned at search time and used during the calculation
-of the document rank.  This is very useful for controlling search results.
-</para>
-<para>
-The concatenation operator, e.g. <literal>tsvector || tsvector</literal>,
-can "construct" a document from several parts. The order is important if
-<type>tsvector</type> contains positional information. Of course,
-it is also possible to build a document using different tables:
+       Each lexeme position also can be labeled as <literal>A</literal>,
+       <literal>B</literal>, <literal>C</literal>, <literal>D</literal>,
+       where <literal>D</literal> is the default. These labels can be used to group
+       lexemes into different <emphasis>importance</emphasis> or
+       <emphasis>rankings</emphasis>, for example to reflect document structure.
+       Actual values can be assigned at search time and used during the calculation
+       of the document rank.  This is very useful for controlling search results.
+      </para>
+
+      <para>
+       The concatenation operator, e.g. <literal>tsvector || tsvector</literal>,
+       can "construct" a document from several parts. The order is important if
+       <type>tsvector</type> contains positional information. Of course,
+       it is also possible to build a document using different tables:
  
  <programlisting>
  SELECT 'fat:1 cat:2'::tsvector || 'fat:1 rat:2'::tsvector;
           ?column?
  ---------------------------
   'cat':2 'fat':1,3 'rat':4
+
  SELECT 'fat:1 rat:2'::tsvector || 'fat:1 cat:2'::tsvector;
           ?column?
  ---------------------------
   'cat':4 'fat':1,3 'rat':2
  </programlisting>
  
-</para>
+      </para>
  
-</listitem>
+     </listitem>
  
-</varlistentry>
+    </varlistentry>
  
-<indexterm zone="textsearch-datatypes">
-<primary>tsquery</primary>
-</indexterm>
+    <indexterm zone="textsearch-datatypes">
+    <primary>tsquery</primary>
+    </indexterm>
  
-<varlistentry>
-<term><firstterm>tsquery</firstterm></term>
-<listitem>
+    <varlistentry>
+    <term><firstterm>tsquery</firstterm></term>
+     <listitem>
  
-<para>
-<type>tsquery</type> is a data type for textual queries which supports
-the boolean operators <literal>&amp;</literal> (AND), <literal>|</literal> (OR),
-and parentheses.  A <type>tsquery</type> consists of lexemes
-(optionally labeled by letters) with boolean operators in between:
+      <para>
+       <type>tsquery</type> is a data type for textual queries which supports
+       the boolean operators <literal>&amp;</literal> (AND), <literal>|</literal> (OR),
+       and parentheses.  A <type>tsquery</type> consists of lexemes
+       (optionally labeled by letters) with boolean operators in between:
  
  <programlisting>
  SELECT 'fat &amp; cat'::tsquery;
@@ -261,17 +288,19 @@ SELECT 'fat &amp; cat'::tsquery;
  ---------------
   'fat' &amp; 'cat'
  SELECT 'fat:ab &amp; cat'::tsquery;
- tsquery
+    tsquery
  ------------------
   'fat':AB &amp; 'cat'
  </programlisting>
-Labels can be used to restrict the search region, which allows the
-development of different search engines using the same full text index.
-</para>
  
-<para>
-<type>tsqueries</type> can be concatenated using <literal>&amp;&amp;</literal> (AND)
-and <literal>||</literal> (OR) operators:
+       Labels can be used to restrict the search region, which allows the
+       development of different search engines using the same full text index.
+      </para>
+
+      <para>
+       <type>tsqueries</type> can be concatenated using <literal>&amp;&amp;</literal> (AND)
+       and <literal>||</literal> (OR) operators:
+
  <programlisting>
  SELECT 'a &amp; b'::tsquery &amp;&amp; 'c | d'::tsquery;
           ?column?
@@ -283,515 +312,537 @@ SELECT 'a &amp; b'::tsquery || 'c|d'::tsquery;
  ---------------------------
   'a' &amp; 'b' | ( 'c' | 'd' )
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
-</variablelist>
  
-</sect2>
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
  
-<sect2 id="textsearch-searches">
-<title>Performing Searches</title>
+  </sect2>
+
+  <sect2 id="textsearch-searches">
+  <title>Performing Searches</title>
+
+   <para>
+    Full text searching in <productname>PostgreSQL</productname> is based on
+    the operator <literal>@@</literal>, which tests whether a <type>tsvector</type>
+    (document) matches a <type>tsquery</type> (query).  Also, this operator
+    supports <type>text</type> input, allowing explicit conversion of a text
+    string to <type>tsvector</type> to be skipped.  The variants available
+    are:
  
-<para>
-Full text searching in <productname>PostgreSQL</productname> is based on
-the operator <literal>@@</literal>, which tests whether a <type>tsvector</type>
-(document) matches a <type>tsquery</type> (query).  Also, this operator
-supports <type>text</type> input, allowing explicit conversion of a text
-string to <type>tsvector</type> to be skipped.  The variants available
-are:
  <programlisting>
  tsvector @@ tsquery
  tsquery  @@ tsvector
  text @@ tsquery
  text @@ text
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    The match operator <literal>@@</literal> returns <literal>true</literal> if
+    the <type>tsvector</type> matches the <type>tsquery</type>.  It doesn't
+    matter which data type is written first:
  
-<para>
-The match operator <literal>@@</literal> returns <literal>true</literal> if
-the <type>tsvector</type> matches the <type>tsquery</type>.  It doesn't
-matter which data type is written first:
  <programlisting>
  SELECT 'cat &amp; rat'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector;
   ?column?
  ----------
   t
+
  SELECT 'fat &amp; cow'::tsquery @@ 'a fat cat sat on a mat and ate a fat rat'::tsvector;
   ?column?
  ----------
   f
  </programlisting>
-</para>
-
-<para>
-The form <type>text</type> <literal>@@</literal> <type>tsquery</type>
-is equivalent to <literal>to_tsvector(x) @@ y</literal>.
-The form <type>text</type> <literal>@@</literal> <type>text</type>
-is equivalent to <literal>to_tsvector(x) @@ plainto_tsquery(y)</literal>.
-</para>
-
-<sect2 id="textsearch-configurations">
-<title>Configurations</title>
-
-<indexterm zone="textsearch-configurations">
-<primary>configurations</primary>
-</indexterm>
-
-<para>
-The above are all simple text search examples.  As mentioned before, full
-text search functionality includes the ability to do many more things:
-skip indexing certain words (stop words), process synonyms, and use
-sophisticated parsing, e.g. parse based on more than just white space.
-This functionality is controlled by <emphasis>configurations</>.
-Fortunately, <productname>PostgreSQL</> comes with predefined
-configurations for many languages.  (<application>psql</>'s <command>\dF</>
-shows all predefined configurations.)  During installation an appropriate
-configuration was selected and <xref
-linkend="guc-default-text-search-config"> was set accordingly.  If you
-need to change it, see <xref linkend="textsearch-tables-multiconfig">.
-</para>
-
-</sect2>
-</sect1>
-
-<sect1 id="textsearch-tables">
-<title>Tables and Indexes</title>
-
-<para>
-The previous section described how to perform full text searches using
-constant strings.  This section shows how to search table data, optionally
-using indexes.
-</para>
-
-<sect2 id="textsearch-tables-search">
-<title>Searching a Table</title>
-
-<para>
-It is possible to do full text table search with no index.  A simple query
-to find all <literal>title</> entries that contain the word
-<literal>friend</> is:
+   </para>
+
+   <para>
+    The form <type>text</type> <literal>@@</literal> <type>tsquery</type>
+    is equivalent to <literal>to_tsvector(x) @@ y</literal>.
+    The form <type>text</type> <literal>@@</literal> <type>text</type>
+    is equivalent to <literal>to_tsvector(x) @@ plainto_tsquery(y)</literal>.
+   </para>
+
+  <sect2 id="textsearch-configurations">
+  <title>Configurations</title>
+
+   <indexterm zone="textsearch-configurations">
+   <primary>configurations</primary>
+   </indexterm>
+
+   <para>
+    The above are all simple text search examples.  As mentioned before, full
+    text search functionality includes the ability to do many more things:
+    skip indexing certain words (stop words), process synonyms, and use
+    sophisticated parsing, e.g. parse based on more than just white space.
+    This functionality is controlled by <emphasis>configurations</>.
+    Fortunately, <productname>PostgreSQL</> comes with predefined
+    configurations for many languages.  (<application>psql</>'s <command>\dF</>
+    shows all predefined configurations.)  During installation an appropriate
+    configuration was selected and <xref
+    linkend="guc-default-text-search-config"> was set accordingly.  If you
+    need to change it, see <xref linkend="textsearch-tables-multiconfig">.
+   </para>
+
+  </sect2>
+ </sect1>
+
+ <sect1 id="textsearch-tables">
+ <title>Tables and Indexes</title>
+
+  <para>
+   The previous section described how to perform full text searches using
+   constant strings.  This section shows how to search table data, optionally
+   using indexes.
+  </para>
+
+  <sect2 id="textsearch-tables-search">
+  <title>Searching a Table</title>
+
+   <para>
+    It is possible to do full text table search with no index.  A simple query
+    to find all <literal>title</> entries that contain the word
+    <literal>friend</> is:
+
  <programlisting>
  SELECT title
  FROM pgweb
  WHERE to_tsvector('english', body) @@ to_tsquery('friend')
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    The query above uses the <literal>english</> the configuration set by <xref
+    linkend="guc-default-text-search-config">.  A more complex query is to
+    select the ten most recent documents which contain <literal>create</> and
+    <literal>table</> in the <literal>title</> or <literal>body</>:
  
-<para>
-A more complex query is to select the ten most recent documents which
-contain <literal>create</> and <literal>table</> in the <literal>title</>
-or <literal>body</>:
  <programlisting>
  SELECT title
  FROM pgweb
  WHERE to_tsvector('english', title || body) @@ to_tsquery('create &amp; table')
  ORDER BY dlm DESC LIMIT 10;
  </programlisting>
-<literal>dlm</> is the last-modified date so we
-used <command>ORDER BY dlm LIMIT 10</> to get the ten most recent
-matches.  For clarity we omitted the <function>coalesce</function> function
-which prevents the unwanted effect of <literal>NULL</literal>
-concatenation.
-</para>
  
-</sect2>
+    <literal>dlm</> is the last-modified date so we
+    used <command>ORDER BY dlm LIMIT 10</> to get the ten most recent
+    matches.  For clarity we omitted the <function>coalesce</function> function
+    which prevents the unwanted effect of <literal>NULL</literal>
+    concatenation.
+   </para>
  
-<sect2 id="textsearch-tables-index">
-<title>Creating Indexes</title>
+  </sect2>
+
+  <sect2 id="textsearch-tables-index">
+  <title>Creating Indexes</title>
+
+  <para>
+   We can create a <acronym>GIN</acronym> (<xref
+   linkend="textsearch-indexes">) index to speed up the search:
  
-<para>
-We can create a <acronym>GIN</acronym> (<xref
-linkend="textsearch-indexes">) index to speed up the search:
  <programlisting>
  CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', body));
  </programlisting>
-Notice that the 2-argument version of <function>to_tsvector</function> is
-used.  Only text search functions which specify a configuration name can
-be used in expression indexes (<xref linkend="indexes-expressional">).
-This is because the index contents must be unaffected by
-<xref linkend="guc-default-text-search-config">.
-If they were affected, the index
-contents might be inconsistent because different entries could contain
-<type>tsvector</>s that were created with different text search
-configurations, and there would be no way to guess which was which.
-It would be impossible to dump and restore such an index correctly.
-</para>
-
-<para>
-Because the two-argument version of <function>to_tsvector</function> was
-used in the index above, only a query reference that uses the 2-argument
-version of <function>to_tsvector</function> with the same configuration
-name will use that index, i.e. <literal>WHERE 'a &amp; b' @@
-to_svector('english', body)</> will use the index, but <literal>WHERE
-'a &amp; b' @@ to_svector(body))</> and <literal>WHERE 'a &amp; b' @@
-body::tsvector</> will not.  This guarantees that an index will be used
-only with the same configuration used to create the index rows.
-</para>
-
-<para>
-It is possible to setup more complex expression indexes where the
-configuration name is specified by another column, e.g.:
+ 
+   Notice that the 2-argument version of <function>to_tsvector</function> is
+   used.  Only text search functions which specify a configuration name can
+   be used in expression indexes (<xref linkend="indexes-expressional">).
+   This is because the index contents must be unaffected by <xref
+   linkend="guc-default-text-search-config">.  If they were affected, the
+   index contents might be inconsistent because different entries could
+   contain <type>tsvector</>s that were created with different text search
+   configurations, and there would be no way to guess which was which.  It
+   would be impossible to dump and restore such an index correctly.
+  </para>
+
+  <para>
+   Because the two-argument version of <function>to_tsvector</function> was
+   used in the index above, only a query reference that uses the 2-argument
+   version of <function>to_tsvector</function> with the same configuration
+   name will use that index, i.e. <literal>WHERE 'a &amp; b' @@
+   to_svector('english', body)</> will use the index, but <literal>WHERE
+   'a &amp; b' @@ to_svector(body))</> and <literal>WHERE 'a &amp; b' @@
+   body::tsvector</> will not.  This guarantees that an index will be used
+   only with the same configuration used to create the index rows.
+  </para>
+
+  <para>
+   It is possible to setup more complex expression indexes where the
+   configuration name is specified by another column, e.g.:
+
  <programlisting>
  CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector(config_name, body));
  </programlisting>
-where <literal>config_name</> is a column in the <literal>pgweb</>
-table.  This allows mixed configurations in the same index while
-recording which configuration was used for each index row.
-</para>
  
-<para>
-Indexes can even concatenate columns:
+   where <literal>config_name</> is a column in the <literal>pgweb</>
+   table.  This allows mixed configurations in the same index while
+   recording which configuration was used for each index row.
+  </para>
+
+  <para>
+   Indexes can even concatenate columns:
+
  <programlisting>
  CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('english', title || body));
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   A more complex case is to create a separate <type>tsvector</> column
+   to hold the output of <function>to_tsvector()</>.  This example is a
+   concatenation of <literal>title</literal> and <literal>body</literal>,
+   with ranking information.  We assign different labels to them to encode
+   information about the origin of each word:
  
-<para>
-A more complex case is to create a separate <type>tsvector</> column
-to hold the output of <function>to_tsvector()</>.  This example is a
-concatenation of <literal>title</literal> and <literal>body</literal>,
-with ranking information.  We assign different labels to them to encode
-information about the origin of each word:
  <programlisting>
  ALTER TABLE pgweb ADD COLUMN textsearch_index tsvector;
  UPDATE pgweb SET textsearch_index =
      setweight(to_tsvector('english', coalesce(title,'')), 'A') || ' ' ||
      setweight(to_tsvector('english', coalesce(body,'')),'D');
  </programlisting>
-Then we create a <acronym>GIN</acronym> index to speed up the search:
+
+   Then we create a <acronym>GIN</acronym> index to speed up the search:
+
  <programlisting>
  CREATE INDEX textsearch_idx ON pgweb USING gin(textsearch_index);
  </programlisting>
-After vacuuming, we are ready to perform a fast full text search:
+
+   After vacuuming, we are ready to perform a fast full text search:
+
  <programlisting>
  SELECT ts_rank_cd(textsearch_index, q) AS rank, title
  FROM pgweb, to_tsquery('create &amp; table') q
  WHERE q @@ textsearch_index
  ORDER BY rank DESC LIMIT 10;
  </programlisting>
-It is necessary to create a trigger to keep the new <type>tsvector</>
-column current anytime <literal>title</> or <literal>body</> changes.
-Keep in mind that, just like with expression indexes, it is important to
-specify the configuration name when creating text search data types
-inside triggers so the column's contents are not affected by changes to 
-<varname>default_text_search_config</>.
-</para>
  
-</sect2>
+   It is necessary to create a trigger to keep the new <type>tsvector</>
+   column current anytime <literal>title</> or <literal>body</> changes.
+   Keep in mind that, just like with expression indexes, it is important to
+   specify the configuration name when creating text search data types
+   inside triggers so the column's contents are not affected by changes to 
+   <varname>default_text_search_config</>.
+  </para>
+
+  </sect2>
  
-</sect1>
+ </sect1>
  
-<sect1 id="textsearch-opfunc">
-<title>Operators and Functions</title>
+ <sect1 id="textsearch-opfunc">
+ <title>Operators and Functions</title>
  
-<para>
-This section outlines all the functions and operators that are available
-for full text searching.
-</para>
+  <para>
+   This section outlines all the functions and operators that are available
+   for full text searching.
+  </para>
  
-<para>
-Full text search vectors and queries both use lexemes, but for different
-purposes.  A <type>tsvector</type> represents the lexemes (tokens) parsed
-out of a document, with an optional position. A <type>tsquery</type>
-specifies a boolean condition using lexemes.
-</para>
+  <para>
+   Full text search vectors and queries both use lexemes, but for different
+   purposes.  A <type>tsvector</type> represents the lexemes (tokens) parsed
+   out of a document, with an optional position. A <type>tsquery</type>
+   specifies a boolean condition using lexemes.
+  </para>
  
-<para>
-All of the following functions that accept a configuration argument can
-use a textual configuration name to select a configuration.  If the option
-is omitted the configuration specified by
-<varname>default_text_search_config</> is used.  For more information on
-configuration, see <xref linkend="textsearch-tables-configuration">.
-</para>
+  <para>
+   All of the following functions that accept a configuration argument can
+   use a textual configuration name to select a configuration.  If the option
+   is omitted the configuration specified by
+   <varname>default_text_search_config</> is used.  For more information on
+   configuration, see <xref linkend="textsearch-tables-configuration">.
+  </para>
  
-<sect2 id="textsearch-search-operator">
-<title>Search</title>
+  <sect2 id="textsearch-search-operator">
+  <title>Search</title>
  
-<para>The operator <literal>@@</> is used to perform full text
-searches:</para>
+   <para>The operator <literal>@@</> is used to perform full text
+    searches:
+   </para>
  
-<variablelist>
+   <variablelist>
  
-<varlistentry>
+    <varlistentry>
  
-<indexterm zone="textsearch-search-operator">
-<primary>TSVECTOR @@ TSQUERY</primary>
-</indexterm>
+     <indexterm zone="textsearch-search-operator">
+     <primary>TSVECTOR @@ TSQUERY</primary>
+     </indexterm>
  
-<term>
-<synopsis>
-<!-- why allow such combinations? -->
-TSVECTOR @@ TSQUERY
-TSQUERY @@ TSVECTOR
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+      <!-- why allow such combinations? -->
+      TSVECTOR @@ TSQUERY
+      TSQUERY @@ TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained 
+       in <literal>TSVECTOR</literal>, and <literal>false</literal> if not:
  
-<listitem>
-<para>
-Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained 
-in <literal>TSVECTOR</literal>, and <literal>false</literal> if not:
  <programlisting>
  SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector @@ 'cat &amp; rat'::tsquery;
   ?column?
- ----------
-  t
+----------
+ t
+
  SELECT 'a fat cat sat on a mat and ate a fat rat'::tsvector @@ 'fat &amp; cow'::tsquery;
   ?column?
- ----------
-  f
+----------
+ f
  </programlisting>
-</para>
+      </para>
  
-</listitem>
-</varlistentry>
+     </listitem>
+    </varlistentry>
  
-<varlistentry>
+    <varlistentry>
  
-<indexterm zone="textsearch-search-operator">
-<primary>TEXT @@ TSQUERY</primary>
-</indexterm>
+     <indexterm zone="textsearch-search-operator">
+     <primary>TEXT @@ TSQUERY</primary>
+     </indexterm>
  
-<term>
-<synopsis>
-text @@ tsquery
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+       text @@ tsquery
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained
+       in <literal>TEXT</literal>, and <literal>false</literal> if not:
  
-<listitem>
-<para>
-Returns <literal>true</literal> if <literal>TSQUERY</literal> is contained
-in <literal>TEXT</literal>, and <literal>false</literal> if not:
  <programlisting>
  SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; rat'::tsquery;
   ?column?
  ----------
   t
+
  SELECT 'a fat cat sat on a mat and ate a fat rat'::text @@ 'cat &amp; cow'::tsquery;
   ?column?
  ----------
   f
  </programlisting>
-</para>
+      </para>
+     </listitem>
+    </varlistentry>
  
-</listitem>
-</varlistentry>
+    <varlistentry>
  
-<varlistentry>
+     <indexterm zone="textsearch-search-operator">
+     <primary>TEXT @@ TEXT</primary>
+     </indexterm>
  
-<indexterm zone="textsearch-search-operator">
-<primary>TEXT @@ TEXT</primary>
-</indexterm>
+     <term>
+      <synopsis>
+       <!-- this is very confusing because there is no rule suggesting which is
+        first. -->
+       text @@ text
+      </synopsis>
+     </term>
  
-<term>
-<synopsis>
-<!-- this is very confusing because there is no rule suggesting which is
-first. -->
-text @@ text
-</synopsis>
-</term>
+     <listitem>
+      <para>
+       Returns <literal>true</literal> if the right
+       argument (the query) is contained in the left argument, and
+       <literal>false</literal> otherwise:
  
-<listitem>
-<para>
-Returns <literal>true</literal> if the right
-argument (the query) is contained in the left argument, and
-<literal>false</literal> otherwise:
  <programlisting>
  SELECT 'a fat cat sat on a mat and ate a fat rat' @@ 'cat rat';
   ?column?
  ----------
   t
+
  SELECT 'a fat cat sat on a mat and ate a fat rat' @@ 'cat cow';
   ?column?
  ----------
   f
  </programlisting>
-</para>
-
-</listitem>
-</varlistentry>
-
-
-</variablelist>
-
-<para>
-For index support of full text operators consult <xref linkend="textsearch-indexes">.
-</para>
-
-</sect2>
+       </para>
  
+     </listitem>
+    </varlistentry>
  
+   </variablelist>
  
-<sect2 id="textsearch-tsvector">
-<title>tsvector</title>
-
-<variablelist>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsvector">
-<primary>to_tsvector</primary>
-</indexterm>
-
-<term>
-<synopsis>
-to_tsvector(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>,  <replaceable class="PARAMETER">document</replaceable> TEXT) returns TSVECTOR
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Parses a document into tokens, reduces the tokens to lexemes, and returns a
-<type>tsvector</type> which lists the lexemes together with their positions in the document
-in lexicographic order.
-</para>
-
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsvector">
-<primary>strip</primary>
-</indexterm>
-
-<term>
-<synopsis>
-strip(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR) returns TSVECTOR
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns a vector which lists the same lexemes as the given vector, but
-which lacks any information about where in the document each lexeme
-appeared. While the returned vector is useless for relevance ranking it
-will usually be much smaller.
-</para>
-</listitem>
-
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsvector">
-<primary>setweight</primary>
-</indexterm>
-
-<term>
-<synopsis>
-setweight(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">letter</replaceable>) returns TSVECTOR
-</synopsis>
-</term>
-
-<listitem>
-<para>
-This function returns a copy of the input vector in which every location
-has been labeled with either the letter <literal>A</literal>,
-<literal>B</literal>, or <literal>C</literal>, or the default label
-<literal>D</literal> (which is the default for new vectors
-and as such is usually not displayed). These labels are retained
-when vectors are concatenated, allowing words from different parts of a
-document to be weighted differently by ranking functions.
-</para>
-</listitem>
-</varlistentry>
-
-
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsvector">
-<primary>tsvector concatenation</primary>
-</indexterm>
-
-<term>
-<synopsis>
-<replaceable class="PARAMETER">vector1</replaceable> || <replaceable class="PARAMETER">vector2</replaceable>
-tsvector_concat(<replaceable class="PARAMETER">vector1</replaceable> TSVECTOR, <replaceable class="PARAMETER">vector2</replaceable> TSVECTOR) returns TSVECTOR
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns a vector which combines the lexemes and positional information of
-the two vectors given as arguments. Positional weight labels (described
-in the previous paragraph) are retained during the concatenation.  This
-has at least two uses. First, if some sections of your document need to be
-parsed with different configurations than others, you can parse them
-separately and then concatenate the resulting vectors.  Second, you can
-weigh words from one section of your document differently than the others
-by parsing the sections into separate vectors and assigning each vector
-a different position label with the <function>setweight()</function>
-function.  You can then concatenate them into a single vector and provide
-a weights argument to the <function>ts_rank()</function> function that assigns
-different weights to positions with different labels.
-</para>
-</listitem>
-</varlistentry>
-
-
-<varlistentry>
-<indexterm zone="textsearch-tsvector">
-<primary>length(tsvector)</primary>
-</indexterm>
-
-<term>
-<synopsis>
-length(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR) returns INT4
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns the number of lexemes stored in the vector.
-</para>
-</listitem>
-</varlistentry>
-
-
-<varlistentry>
-<indexterm zone="textsearch-tsvector">
-<primary>text::tsvector</primary>
-</indexterm>
-
-<term>
-<synopsis>
-<replaceable>text</replaceable>::TSVECTOR returns TSVECTOR
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Directly casting <type>text</type> to a <type>tsvector</type> allows you
-to directly inject lexemes into a vector with whatever positions and
-positional weights you choose to specify. The text should be formatted to
-match the way a vector is displayed by <literal>SELECT</literal>.
-<!-- TODO what a strange definition, I think something like
-"input format" or so should be used (and defined somewhere, didn't see
-it yet) -->
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<indexterm zone="textsearch-tsvector">
-<primary>trigger</primary>
-<secondary>for updating a derived tsvector column</secondary>
-</indexterm>
-
-<term>
-<synopsis>
-tsvector_update_trigger(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
-tsvector_update_trigger_column(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_column_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Two built-in trigger functions are available to automatically update a
-<type>tsvector</> column from one or more textual columns.  An example
-of their use is:
+   <para>
+    For index support of full text operators consult <xref linkend="textsearch-indexes">.
+   </para>
+
+  </sect2>
+
+
+
+  <sect2 id="textsearch-tsvector">
+  <title>tsvector</title>
+
+   <variablelist>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>to_tsvector</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       to_tsvector(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>,  <replaceable class="PARAMETER">document</replaceable> TEXT) returns TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Parses a document into tokens, reduces the tokens to lexemes, and returns a
+       <type>tsvector</type> which lists the lexemes together with their positions in the document
+       in lexicographic order.
+      </para>
+
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>strip</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       strip(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR) returns TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns a vector which lists the same lexemes as the given vector, but
+       which lacks any information about where in the document each lexeme
+       appeared. While the returned vector is useless for relevance ranking it
+       will usually be much smaller.
+      </para>
+     </listitem>
+
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>setweight</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       setweight(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">letter</replaceable>) returns TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       This function returns a copy of the input vector in which every location
+       has been labeled with either the letter <literal>A</literal>,
+       <literal>B</literal>, or <literal>C</literal>, or the default label
+       <literal>D</literal> (which is the default for new vectors
+       and as such is usually not displayed). These labels are retained
+       when vectors are concatenated, allowing words from different parts of a
+       document to be weighted differently by ranking functions.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>tsvector concatenation</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       <replaceable class="PARAMETER">vector1</replaceable> || <replaceable class="PARAMETER">vector2</replaceable>
+       tsvector_concat(<replaceable class="PARAMETER">vector1</replaceable> TSVECTOR, <replaceable class="PARAMETER">vector2</replaceable> TSVECTOR) returns TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns a vector which combines the lexemes and positional information of
+       the two vectors given as arguments. Positional weight labels (described
+       in the previous paragraph) are retained during the concatenation.  This
+       has at least two uses. First, if some sections of your document need to be
+       parsed with different configurations than others, you can parse them
+       separately and then concatenate the resulting vectors.  Second, you can
+       weigh words from one section of your document differently than the others
+       by parsing the sections into separate vectors and assigning each vector
+       a different position label with the <function>setweight()</function>
+       function.  You can then concatenate them into a single vector and provide
+       a weights argument to the <function>ts_rank()</function> function that assigns
+       different weights to positions with different labels.
+      </para>
+     </listitem>
+    </varlistentry>
+
+
+    <varlistentry>
+     <indexterm zone="textsearch-tsvector">
+     <primary>length(tsvector)</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       length(<replaceable class="PARAMETER">vector</replaceable> TSVECTOR) returns INT4
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns the number of lexemes stored in the vector.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>text::tsvector</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       <replaceable>text</replaceable>::TSVECTOR returns TSVECTOR
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Directly casting <type>text</type> to a <type>tsvector</type> allows you
+       to directly inject lexemes into a vector with whatever positions and
+       positional weights you choose to specify. The text should be formatted to
+       match the way a vector is displayed by <literal>SELECT</literal>.
+       <!-- TODO what a strange definition, I think something like
+       "input format" or so should be used (and defined somewhere, didn't see
+       it yet) -->
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>trigger</primary>
+     <secondary>for updating a derived tsvector column</secondary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       tsvector_update_trigger(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
+       tsvector_update_trigger_column(<replaceable class="PARAMETER">tsvector_column_name</replaceable>, <replaceable class="PARAMETER">config_column_name</replaceable>, <replaceable class="PARAMETER">text_column_name</replaceable> <optional>, ... </optional>)
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Two built-in trigger functions are available to automatically update a
+       <type>tsvector</> column from one or more textual columns.  An example
+       of their use is:
  
  <programlisting>
  CREATE TABLE tblMessages (
@@ -804,50 +855,52 @@ ON tblMessages FOR EACH ROW EXECUTE PROCEDURE
  tsvector_update_trigger(tsv, 'pg_catalog.english', strMessage);
  </programlisting>
  
-Having created this trigger, any change in <structfield>strMessage</>
-will be automatically reflected into <structfield>tsv</>.
-</para>
-
-<para>
-Both triggers require you to specify the text search configuration to
-be used to perform the conversion.  For
-<function>tsvector_update_trigger</>, the configuration name is simply
-given as the second trigger argument.  It must be schema-qualified as
-shown above, so that the trigger behavior will not change with changes
-in <varname>search_path</>.  For
-<function>tsvector_update_trigger_column</>, the second trigger argument
-is the name of another table column, which must be of type
-<type>regconfig</>.  This allows a per-row selection of configuration
-to be made.
-</para>
-</listitem>
-</varlistentry>
-
-
-<varlistentry>
-<indexterm zone="textsearch-tsvector">
-<primary>ts_stat</primary>
-</indexterm>
-
-<term>
-<synopsis>
-ts_stat(<replaceable class="PARAMETER">sqlquery</replaceable> text <optional>, <replaceable class="PARAMETER">weights</replaceable> text </optional>) returns SETOF statinfo
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Here <type>statinfo</type> is a type, defined as:
+       Having created this trigger, any change in <structfield>strMessage</>
+       will be automatically reflected into <structfield>tsv</>.
+      </para>
+
+      <para>
+       Both triggers require you to specify the text search configuration to
+       be used to perform the conversion.  For
+       <function>tsvector_update_trigger</>, the configuration name is simply
+       given as the second trigger argument.  It must be schema-qualified as
+       shown above, so that the trigger behavior will not change with changes
+       in <varname>search_path</>.  For
+       <function>tsvector_update_trigger_column</>, the second trigger argument
+       is the name of another table column, which must be of type
+       <type>regconfig</>.  This allows a per-row selection of configuration
+       to be made.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsvector">
+     <primary>ts_stat</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       ts_stat(<replaceable class="PARAMETER">sqlquery</replaceable> text <optional>, <replaceable class="PARAMETER">weights</replaceable> text </optional>) returns SETOF statinfo
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Here <type>statinfo</type> is a type, defined as:
+
  <programlisting>
  CREATE TYPE statinfo AS (word text, ndoc integer, nentry integer);
  </programlisting>
-and <replaceable>sqlquery</replaceable> is a text value containing a SQL query
-which returns a single <type>tsvector</type> column.  <function>ts_stat</>
-executes the query and returns statistics about the resulting
-<type>tsvector</type> data, i.e., the number of documents, <literal>ndoc</>,
-and the total number of words in the collection, <literal>nentry</>.  It is
-useful for checking your configuration and to find stop word candidates.  For
-example, to find the ten most frequent words:
+
+       and <replaceable>sqlquery</replaceable> is a text value containing a SQL query
+       which returns a single <type>tsvector</type> column.  <function>ts_stat</>
+       executes the query and returns statistics about the resulting
+       <type>tsvector</type> data, i.e., the number of documents, <literal>ndoc</>,
+       and the total number of words in the collection, <literal>nentry</>.  It is
+       useful for checking your configuration and to find stop word candidates.  For
+       example, to find the ten most frequent words:
  
  <programlisting>
  SELECT * FROM ts_stat('SELECT vector from apod')
@@ -855,8 +908,8 @@ ORDER BY ndoc DESC, nentry DESC, word
  LIMIT 10;
  </programlisting>
  
-Optionally, one can specify <replaceable>weights</replaceable> to obtain
-statistics about words with a specific <replaceable>weight</replaceable>:
+       Optionally, one can specify <replaceable>weights</replaceable> to obtain
+       statistics about words with a specific <replaceable>weight</replaceable>:
  
  <programlisting>
  SELECT * FROM ts_stat('SELECT vector FROM apod','a')
@@ -864,387 +917,394 @@ ORDER BY ndoc DESC, nentry DESC, word
  LIMIT 10;
  </programlisting>
  
-</para>
-</listitem>
-</varlistentry>
+      </para>
+     </listitem>
+    </varlistentry>
  
+    <varlistentry>
  
-<varlistentry>
-<indexterm zone="textsearch-tsvector">
-<primary>Btree operations for tsvector</primary>
-</indexterm>
+     <indexterm zone="textsearch-tsvector">
+     <primary>Btree operations for tsvector</primary>
+     </indexterm>
  
-<term>
-<synopsis>
-TSVECTOR &lt; TSVECTOR
-TSVECTOR &lt;= TSVECTOR
-TSVECTOR = TSVECTOR
-TSVECTOR &gt;= TSVECTOR
-TSVECTOR &gt; TSVECTOR
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+       TSVECTOR &lt; TSVECTOR
+       TSVECTOR &lt;= TSVECTOR
+       TSVECTOR = TSVECTOR
+       TSVECTOR &gt;= TSVECTOR
+       TSVECTOR &gt; TSVECTOR
+      </synopsis>
+     </term>
  
-<listitem>
-<para>
-All btree operations are defined for the <type>tsvector</type> type.
-<type>tsvector</>s are compared with each other using
-<emphasis>lexicographical</emphasis> ordering.
-<!-- TODO of the output representation or something else? -->
-</para>
-</listitem>
-</varlistentry>
+     <listitem>
+      <para>
+       All btree operations are defined for the <type>tsvector</type> type.
+       <type>tsvector</>s are compared with each other using
+       <emphasis>lexicographical</emphasis> ordering.
+       <!-- TODO of the output representation or something else? -->
+      </para>
+     </listitem>
+    </varlistentry>
  
-</variablelist>
+   </variablelist>
  
+  </sect2>
  
-</sect2>
+  <sect2 id="textsearch-tsquery">
+  <title>tsquery</title>
  
-<sect2 id="textsearch-tsquery">
-<title>tsquery</title>
  
+   <variablelist>
  
-<variablelist>
+    <varlistentry>
  
-<varlistentry>
+     <indexterm zone="textsearch-tsquery">
+     <primary>to_tsquery</primary>
+     </indexterm>
  
-<indexterm zone="textsearch-tsquery">
-<primary>to_tsquery</primary>
-</indexterm>
+     <term>
+      <synopsis>
+       to_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
+      </synopsis>
+     </term>
  
-<term>
-<synopsis>
-to_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
-</synopsis>
-</term>
+     <listitem>
+      <para>
+       Accepts <replaceable>querytext</replaceable>, which should consist of single tokens
+       separated by the boolean operators <literal>&amp;</literal> (and), <literal>|</literal>
+       (or) and <literal>!</literal> (not), which can be grouped using parentheses.
+       In other words, <function>to_tsquery</function> expects already parsed text.
+       Each token is reduced to a lexeme using the specified or current configuration.
+       A weight class can be assigned to each lexeme entry to restrict the search region
+       (see <function>setweight</function> for an explanation). For example:
  
-<listitem>
-<para>
-Accepts <replaceable>querytext</replaceable>, which should consist of single tokens
-separated by the boolean operators <literal>&amp;</literal> (and), <literal>|</literal>
-(or) and <literal>!</literal> (not), which can be grouped using parentheses.
-In other words, <function>to_tsquery</function> expects already parsed text.
-Each token is reduced to a lexeme using the specified or current configuration.
-A weight class can be assigned to each lexeme entry to restrict the search region
-(see <function>setweight</function> for an explanation). For example:
  <programlisting>
  'fat:a &amp; rats'
  </programlisting>
-The <function>to_tsquery</function> function can also accept a <literal>text
-string</literal>. In this case <replaceable>querytext</replaceable> should
-be quoted. This may be useful, for example, to use with a thesaurus
-dictionary. In the example below, a thesaurus contains rule <literal>supernovae
-stars : sn</literal>:
+
+       The <function>to_tsquery</function> function can also accept a <literal>text
+       string</literal>. In this case <replaceable>querytext</replaceable> should
+       be quoted. This may be useful, for example, to use with a thesaurus
+       dictionary. In the example below, a thesaurus contains rule <literal>supernovae
+       stars : sn</literal>:
+
  <programlisting>
  SELECT to_tsquery('''supernovae stars'' &amp; !crab');
-   to_tsquery
-----------------
+  to_tsquery
+---------------
   'sn' &amp; !'crab'
  </programlisting>
-Without quotes <function>to_tsquery</function> will generate a syntax error.
-</para>
  
-</listitem>
-</varlistentry>
+       Without quotes <function>to_tsquery</function> will generate a syntax error.
+      </para>
+
+     </listitem>
+    </varlistentry>
  
  
  
-<varlistentry>
+    <varlistentry>
  
-<indexterm zone="textsearch-tsquery">
-<primary>plainto_tsquery</primary>
-</indexterm>
+     <indexterm zone="textsearch-tsquery">
+     <primary>plainto_tsquery</primary>
+     </indexterm>
  
-<term>
-<synopsis>
-plainto_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>,  <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+       plainto_tsquery(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>,  <replaceable class="PARAMETER">querytext</replaceable> text) returns TSQUERY
+      </synopsis>
+     </term>
  
-<listitem>
-<para>
-Transforms unformatted text <replaceable>querytext</replaceable> to <type>tsquery</type>.
-It is the same as <function>to_tsquery</function> but accepts <literal>text</literal>
-without quotes and will call the parser to break it into tokens.
-<function>plainto_tsquery</function> assumes the <literal>&amp;</literal> boolean
-operator between words and does not recognize weight classes.
-</para>
-</listitem>
-</varlistentry>
+     <listitem>
+      <para>
+       Transforms unformatted text <replaceable>querytext</replaceable> to <type>tsquery</type>.
+       It is the same as <function>to_tsquery</function> but accepts <literal>text</literal>
+       without quotes and will call the parser to break it into tokens.
+       <function>plainto_tsquery</function> assumes the <literal>&amp;</literal> boolean
+       operator between words and does not recognize weight classes.
+      </para>
+     </listitem>
+    </varlistentry>
  
  
  
-<varlistentry>
+    <varlistentry>
  
-<indexterm zone="textsearch-tsquery">
-<primary>querytree</primary>
-</indexterm>
+     <indexterm zone="textsearch-tsquery">
+     <primary>querytree</primary>
+     </indexterm>
  
-<term>
-<synopsis>
-querytree(<replaceable class="PARAMETER">query</replaceable> TSQUERY) returns TEXT
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+       querytree(<replaceable class="PARAMETER">query</replaceable> TSQUERY) returns TEXT
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       This returns the query used for searching an index. It can be used to test
+       for an empty query. The <command>SELECT</> below returns <literal>NULL</>,
+       which corresponds to an empty query since GIN indexes do not support queries with negation
+       <!-- TODO or "negated queries" (depending on what the correct rule is) -->
+       (a full index scan is inefficient):
  
-<listitem>
-<para>
-This returns the query used for searching an index. It can be used to test
-for an empty query. The <command>SELECT</> below returns <literal>NULL</>,
-which corresponds to an empty query since GIN indexes do not support queries with negation
-<!-- TODO or "negated queries" (depending on what the correct rule is) -->
-(a full index scan is inefficient):
  <programlisting>
  SELECT querytree(to_tsquery('!defined'));
   querytree
  -----------
  
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
+      </para>
+     </listitem>
+    </varlistentry>
  
+    <varlistentry>
  
-<varlistentry>
+     <indexterm zone="textsearch-tsquery">
+     <primary>text::tsquery casting</primary>
+     </indexterm>
  
-<indexterm zone="textsearch-tsquery">
-<primary>text::tsquery casting</primary>
-</indexterm>
+     <term>
+      <synopsis>
+       <replaceable class="PARAMETER">text</replaceable>::TSQUERY returns TSQUERY
+      </synopsis>
+     </term>
  
-<term>
-<synopsis>
-<replaceable class="PARAMETER">text</replaceable>::TSQUERY returns TSQUERY
-</synopsis>
-</term>
+     <listitem>
+      <para>
+       Directly casting <replaceable>text</replaceable> to a <type>tsquery</type>
+       allows you to directly inject lexemes into a query using whatever positions
+       and positional weight flags you choose to specify. The text should be
+       formatted  to match the way a vector is displayed by
+       <literal>SELECT</literal>.
+       <!-- TODO what a strange definition, I think something like
+       "input format" or so should be used (and defined somewhere, didn't see
+       it yet) -->
+      </para>
+     </listitem>
+    </varlistentry>
  
-<listitem>
-<para>
-Directly casting <replaceable>text</replaceable> to a <type>tsquery</type>
-allows you to directly inject lexemes into a query using whatever positions
-and positional weight flags you choose to specify. The text should be
-formatted  to match the way a vector is displayed by
-<literal>SELECT</literal>.
-<!-- TODO what a strange definition, I think something like
-"input format" or so should be used (and defined somewhere, didn't see
-it yet) -->
-</para>
-</listitem>
-</varlistentry>
+    <varlistentry>
  
-<varlistentry>
+     <indexterm zone="textsearch-tsquery">
+     <primary>numnode</primary>
+     </indexterm>
  
-<indexterm zone="textsearch-tsquery">
-<primary>numnode</primary>
-</indexterm>
+     <term>
+      <synopsis>
+       numnode(<replaceable class="PARAMETER">query</replaceable> TSQUERY) returns INTEGER
+      </synopsis>
+     </term>
  
-<term>
-<synopsis>
-numnode(<replaceable class="PARAMETER">query</replaceable> TSQUERY) returns INTEGER
-</synopsis>
-</term>
+     <listitem>
+      <para>
+       This returns the number of nodes in a query tree. This function can be
+       used to determine if <replaceable>query</replaceable> is meaningful
+       (returns &gt; 0), or contains only stop words (returns 0):
  
-<listitem>
-<para>
-This returns the number of nodes in a query tree. This function can be
-used to determine if <replaceable>query</replaceable> is meaningful
-(returns &gt; 0), or contains only stop words (returns 0):
  <programlisting>
  SELECT numnode(plainto_tsquery('the any'));
-NOTICE:  query contains only stopword(s) or does not contain lexeme(s),
-ignored
+NOTICE:  query contains only stopword(s) or does not contain lexeme(s), ignored
   numnode
  ---------
         0
+
  SELECT numnode(plainto_tsquery('the table'));
   numnode
  ---------
         1
+
  SELECT numnode(plainto_tsquery('long table'));
   numnode
  ---------
         3
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsquery">
-<primary>TSQUERY &amp;&amp; TSQUERY</primary>
-</indexterm>
-
-<term>
-<synopsis>
-TSQUERY &amp;&amp; TSQUERY returns TSQUERY
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns <literal>AND</literal>-ed TSQUERY
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsquery">
-<primary>TSQUERY || TSQUERY</primary>
-</indexterm>
-
-<term>
-<synopsis>
-TSQUERY || TSQUERY returns TSQUERY
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns <literal>OR</literal>-ed TSQUERY
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsquery">
-<primary>!! TSQUERY</primary>
-</indexterm>
-
-<term>
-<synopsis>
-!! TSQUERY returns TSQUERY
-</synopsis>
-</term>
-
-<listitem>
-<para>
-negation of TSQUERY
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-tsquery">
-<primary>Btree operations for tsquery</primary>
-</indexterm>
-
-<term>
-<synopsis>
-TSQUERY &lt; TSQUERY
-TSQUERY &lt;= TSQUERY
-TSQUERY = TSQUERY
-TSQUERY &gt;= TSQUERY
-TSQUERY &gt; TSQUERY
-</synopsis>
-</term>
-
-<listitem>
-<para>
-All btree operations are defined for the <type>tsquery</type> type.
-tsqueries are compared to each other using <emphasis>lexicographical</emphasis>
-ordering.
-</para>
-</listitem>
-</varlistentry>
-
-</variablelist>
-
-<sect3 id="textsearch-queryrewriting">
-<title>Query Rewriting</title>
-
-<para>
-Query rewriting is a set of functions and operators for the
-<type>tsquery</type> data type.  It allows control at search
-<emphasis>query time</emphasis> without reindexing (the opposite of the
-thesaurus).  For example, you can expand the search using synonyms
-(<literal>new york</>, <literal>big apple</>, <literal>nyc</>,
-<literal>gotham</>) or narrow the search to direct the user to some hot
-topic.
-</para>
-
-<para>
-The <function>ts_rewrite()</function> function changes the original query by
-replacing part of the query with some other string of type <type>tsquery</type>,
-as defined by the rewrite rule. Arguments to <function>ts_rewrite()</function>
-can be names of columns of type <type>tsquery</type>.
-</para>
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsquery">
+     <primary>TSQUERY &amp;&amp; TSQUERY</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       TSQUERY &amp;&amp; TSQUERY returns TSQUERY
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns <literal>AND</literal>-ed TSQUERY
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsquery">
+     <primary>TSQUERY || TSQUERY</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       TSQUERY || TSQUERY returns TSQUERY
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       Returns <literal>OR</literal>-ed TSQUERY
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsquery">
+     <primary>!! TSQUERY</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       !! TSQUERY returns TSQUERY
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       negation of TSQUERY
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+
+     <indexterm zone="textsearch-tsquery">
+     <primary>Btree operations for tsquery</primary>
+     </indexterm>
+
+     <term>
+      <synopsis>
+       TSQUERY &lt; TSQUERY
+       TSQUERY &lt;= TSQUERY
+       TSQUERY = TSQUERY
+       TSQUERY &gt;= TSQUERY
+       TSQUERY &gt; TSQUERY
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       All btree operations are defined for the <type>tsquery</type> type.
+       tsqueries are compared to each other using <emphasis>lexicographical</emphasis>
+       ordering.
+      </para>
+     </listitem>
+    </varlistentry>
+
+   </variablelist>
+
+   <sect3 id="textsearch-queryrewriting">
+   <title>Query Rewriting</title>
+
+    <para>
+     Query rewriting is a set of functions and operators for the
+     <type>tsquery</type> data type.  It allows control at search
+     <emphasis>query time</emphasis> without reindexing (the opposite of the
+     thesaurus).  For example, you can expand the search using synonyms
+     (<literal>new york</>, <literal>big apple</>, <literal>nyc</>,
+     <literal>gotham</>) or narrow the search to direct the user to some hot
+     topic.
+    </para>
+
+    <para>
+     The <function>ts_rewrite()</function> function changes the original query by
+     replacing part of the query with some other string of type <type>tsquery</type>,
+     as defined by the rewrite rule. Arguments to <function>ts_rewrite()</function>
+     can be names of columns of type <type>tsquery</type>.
+    </para>
  
  <programlisting>
  CREATE TABLE aliases (t TSQUERY PRIMARY KEY, s TSQUERY);
  INSERT INTO aliases VALUES('a', 'c');
  </programlisting>
  
-<variablelist>
-<varlistentry>
+    <variablelist>
  
-<indexterm zone="textsearch-tsquery">
-<primary>ts_rewrite</primary>
-</indexterm>
+     <varlistentry>
  
-<term>
-<synopsis>
-ts_rewrite (<replaceable class="PARAMETER">query</replaceable> TSQUERY, <replaceable class="PARAMETER">target</replaceable> TSQUERY, <replaceable class="PARAMETER">sample</replaceable> TSQUERY) returns TSQUERY
-</synopsis>
-</term>
+      <indexterm zone="textsearch-tsquery">
+      <primary>ts_rewrite</primary>
+      </indexterm>
  
-<listitem>
-<para>
+      <term>
+       <synopsis>
+        ts_rewrite (<replaceable class="PARAMETER">query</replaceable> TSQUERY, <replaceable class="PARAMETER">target</replaceable> TSQUERY, <replaceable class="PARAMETER">sample</replaceable> TSQUERY) returns TSQUERY
+       </synopsis>
+      </term>
+
+      <listitem>
+       <para>
  <programlisting>
  SELECT ts_rewrite('a &amp; b'::tsquery, 'a'::tsquery, 'c'::tsquery);
-  ts_rewrite
-  -----------
-   'b' &amp; 'c'
+ ts_rewrite
+------------
+ 'b' &amp; 'c'
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
+       </para>
+      </listitem>
+     </varlistentry>
  
-<varlistentry>
+     <varlistentry>
  
-<term>
-<synopsis>
-ts_rewrite(ARRAY[<replaceable class="PARAMETER">query</replaceable> TSQUERY, <replaceable class="PARAMETER">target</replaceable> TSQUERY, <replaceable class="PARAMETER">sample</replaceable> TSQUERY]) returns TSQUERY
-</synopsis>
-</term>
+      <term>
+       <synopsis>
+        ts_rewrite(ARRAY[<replaceable class="PARAMETER">query</replaceable> TSQUERY, <replaceable class="PARAMETER">target</replaceable> TSQUERY, <replaceable class="PARAMETER">sample</replaceable> TSQUERY]) returns TSQUERY
+       </synopsis>
+      </term>
  
-<listitem>
-<para>
+      <listitem>
+       <para>
  <programlisting>
  SELECT ts_rewrite(ARRAY['a &amp; b'::tsquery, t,s]) FROM aliases;
-  ts_rewrite
-  -----------
-   'b' &amp; 'c'
+ ts_rewrite
+------------
+ 'b' &amp; 'c'
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
+       </para>
+      </listitem>
+     </varlistentry>
  
-<varlistentry>
+     <varlistentry>
  
-<term>
-<synopsis>
-ts_rewrite (<replaceable class="PARAMETER">query</> TSQUERY,<literal>'SELECT target ,sample FROM test'</literal>::text) returns TSQUERY
-</synopsis>
-</term>
+      <term>
+       <synopsis>
+        ts_rewrite (<replaceable class="PARAMETER">query</> TSQUERY,<literal>'SELECT target ,sample FROM test'</literal>::text) returns TSQUERY
+       </synopsis>
+      </term>
  
-<listitem>
-<para>
+      <listitem>
+       <para>
  <programlisting>
  SELECT ts_rewrite('a &amp; b'::tsquery, 'SELECT t,s FROM aliases');
-  ts_rewrite
-  -----------
-   'b' &amp; 'c'
+ ts_rewrite
+------------
+ 'b' &amp; 'c'
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
-</variablelist>
+       </para>
+      </listitem>
+     </varlistentry>
  
-<para>
-What if there are several instances of rewriting? For example, query
-<literal>'a &amp; b'</literal> can be rewritten as
-<literal>'b &amp; c'</literal> and <literal>'cc'</literal>.
+    </variablelist>
+
+    <para>
+     What if there are several instances of rewriting? For example, query
+     <literal>'a &amp; b'</literal> can be rewritten as
+     <literal>'b &amp; c'</literal> and <literal>'cc'</literal>.
  
  <programlisting>
  SELECT * FROM aliases;
@@ -1254,191 +1314,203 @@ SELECT * FROM aliases;
   'x'       | 'z'
   'a' &amp; 'b' | 'cc'
  </programlisting>
-This ambiguity can be resolved by specifying a sort order:
+
+     This ambiguity can be resolved by specifying a sort order:
+
  <programlisting>
  SELECT ts_rewrite('a &amp; b', 'SELECT t, s FROM aliases ORDER BY t DESC');
   ts_rewrite
----------
+   ---------
   'cc'
+
  SELECT ts_rewrite('a &amp; b', 'SELECT t, s FROM aliases ORDER BY t ASC');
    ts_rewrite
------------
+--------------
   'b' &amp; 'c'
  </programlisting>
-</para>
+    </para>
+
+    <para>
+     Let's consider a real-life astronomical example. We'll expand query
+     <literal>supernovae</literal> using table-driven rewriting rules:
  
-<para>
-Let's consider a real-life astronomical example. We'll expand query
-<literal>supernovae</literal> using table-driven rewriting rules:
  <programlisting>
  CREATE TABLE aliases (t tsquery primary key, s tsquery);
  INSERT INTO aliases VALUES(to_tsquery('supernovae'), to_tsquery('supernovae|sn'));
+
  SELECT ts_rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') &amp;&amp; to_tsquery('crab');
-            ?column?
----------------------------------
- ( 'supernova' | 'sn' ) &amp; 'crab'
+           ?column?
+-------------------------------
+( 'supernova' | 'sn' ) &amp; 'crab'
  </programlisting>
-Notice, that we can change the rewriting rule online<!-- TODO maybe use another word for "online"? -->:
+
+     Notice, that we can change the rewriting rule online<!-- TODO maybe use another word for "online"? -->:
+
  <programlisting>
  UPDATE aliases SET s=to_tsquery('supernovae|sn &amp; !nebulae') WHERE t=to_tsquery('supernovae');
  SELECT ts_rewrite(to_tsquery('supernovae'),  'SELECT * FROM aliases') &amp;&amp; to_tsquery('crab');
-                  ?column?
----------------------------------------------
- ( 'supernova' | 'sn' &amp; !'nebula' ) &amp; 'crab'
+                   ?column?
+-----------------------------------------------
+ 'supernova' | 'sn' &amp; !'nebula' ) &amp; 'crab'
  </programlisting>
-</para>
-</sect3>
+    </para>
+   </sect3>
  
-<sect3 id="textsearch-tsquery-ops">
-<title>Operators For tsquery</title>
+   <sect3 id="textsearch-tsquery-ops">
+   <title>Operators For tsquery</title>
+
+    <para>
+     Rewriting can be slow for many rewriting rules since it checks every rule
+     for a possible hit. To filter out obvious non-candidate rules there are containment
+     operators for the <type>tsquery</type> type. In the example below, we select only those
+     rules which might contain the original query:
  
-<para>
-Rewriting can be slow for many rewriting rules since it checks every rule
-for a possible hit. To filter out obvious non-candidate rules there are containment
-operators for the <type>tsquery</type> type. In the example below, we select only those
-rules which might contain the original query:
  <programlisting>
  SELECT ts_rewrite(ARRAY['a &amp; b'::tsquery, t,s])
  FROM aliases
  WHERE 'a &amp; b' @> t;
-  ts_rewrite
------------
+ ts_rewrite
+------------
   'b' &amp; 'c'
  </programlisting>
  
-</para>
+    </para>
+
+    <para>
+     Two operators are defined for <type>tsquery</type>:
+    </para>
  
-<para>
-Two operators are defined for <type>tsquery</type>:
-</para>
+    <variablelist>
+ 
+     <varlistentry>
  
-<variablelist>
-<varlistentry>
+      <indexterm zone="textsearch-tsquery">
+      <primary>TSQUERY @&gt; TSQUERY</primary>
+      </indexterm>
  
-<indexterm zone="textsearch-tsquery">
-<primary>TSQUERY @&gt; TSQUERY</primary>
-</indexterm>
+      <term>
+       <synopsis>
+        TSQUERY @&gt; TSQUERY
+       </synopsis>
+      </term>
  
-<term>
-<synopsis>
-TSQUERY @&gt; TSQUERY
-</synopsis>
-</term>
+      <listitem>
+       <para>
+        Returns <literal>true</literal> if the right argument might be contained in left argument.
+       </para>
+      </listitem>
+     </varlistentry>
  
-<listitem>
-<para>
-Returns <literal>true</literal> if the right argument might be contained in left argument.
-</para>
-</listitem>
-</varlistentry>
+     <varlistentry>
  
-<varlistentry>
+      <indexterm zone="textsearch-tsquery">
+      <primary>tsquery &lt;@ tsquery</primary>
+      </indexterm>
  
-<indexterm zone="textsearch-tsquery">
-<primary>tsquery &lt;@ tsquery</primary>
-</indexterm>
+      <term>
+       <synopsis>
+        TSQUERY &lt;@ TSQUERY
+       </synopsis>
+      </term>
  
-<term>
-<synopsis>
-TSQUERY &lt;@ TSQUERY
-</synopsis>
-</term>
+      <listitem>
+       <para>
+        Returns <literal>true</literal> if the left argument might be contained in right argument.
+       </para>
+      </listitem>
+     </varlistentry>
  
-<listitem>
-<para>
-Returns <literal>true</literal> if the left argument might be contained in right argument.
-</para>
-</listitem>
-</varlistentry>
-</variablelist>
+    </variablelist>
  
  
-</sect3>
+   </sect3>
  
-<sect3 id="textsearch-tsqueryindex">
-<title>Index For tsquery</title>
+   <sect3 id="textsearch-tsqueryindex">
+   <title>Index For tsquery</title>
  
-<para>
-To speed up operators <literal>&lt;@</> and <literal>@&gt;</literal> for
-<type>tsquery</type> one can use a <acronym>GiST</acronym> index with
-a <literal>tsquery_ops</literal> opclass:
+    <para>
+     To speed up operators <literal>&lt;@</> and <literal>@&gt;</literal> for
+     <type>tsquery</type> one can use a <acronym>GiST</acronym> index with
+     a <literal>tsquery_ops</literal> opclass:
  
  <programlisting>
  CREATE INDEX t_idx ON aliases USING gist (t tsquery_ops);
  </programlisting>
-</para>
+    </para>
  
-</sect3>
+   </sect3>
  
-</sect2>
+  </sect2>
  
-</sect1>
+ </sect1>
  
-<sect1 id="textsearch-controls">
-<title>Additional Controls</title>
+ <sect1 id="textsearch-controls">
+ <title>Additional Controls</title>
  
-<para>
-To implement full text searching there must be a function to create a
-<type>tsvector</type> from a document and a <type>tsquery</type> from a
-user query. Also, we need to return results in some order, i.e., we need
-a function which compares documents with respect to their relevance to
-the <type>tsquery</type>.  Full text searching in
-<productname>PostgreSQL</productname> provides support for all of these
-functions.
-</para>
+  <para>
+   To implement full text searching there must be a function to create a
+   <type>tsvector</type> from a document and a <type>tsquery</type> from a
+   user query. Also, we need to return results in some order, i.e., we need
+   a function which compares documents with respect to their relevance to
+   the <type>tsquery</type>.  Full text searching in
+   <productname>PostgreSQL</productname> provides support for all of these
+   functions.
+  </para>
  
-<sect2 id="textsearch-parser">
-<title>Parsing</title>
+  <sect2 id="textsearch-parser">
+  <title>Parsing</title>
+
+   <para>
+    Full text searching in <productname>PostgreSQL</productname> provides
+    function <function>to_tsvector</function>, which converts a document to
+    the <type>tsvector</type> data type. More details are available in <xref
+    linkend="textsearch-tsvector">, but for now consider a simple example:
  
-<para>
-Full text searching in <productname>PostgreSQL</productname> provides
-function <function>to_tsvector</function>, which converts a document to
-the <type>tsvector</type> data type. More details are available in <xref
-linkend="textsearch-tsvector">, but for now consider a simple example:
  <programlisting>
  SELECT to_tsvector('english', 'a fat  cat sat on a mat - it ate a fat rats');
-                     to_tsvector
+                  to_tsvector
  -----------------------------------------------------
   'ate':9 'cat':3 'fat':2,11 'mat':7 'rat':12 'sat':4
  </programlisting>
-</para>
-
-<para> 
-In the example above we see that the resulting <type>tsvector</type> does not
-contain the words <literal>a</literal>, <literal>on</literal>, or
-<literal>it</literal>, the word <literal>rats</literal> became
-<literal>rat</literal>, and the punctuation sign <literal>-</literal> was
-ignored. 
-</para> 
-
-<para>
-The <function>to_tsvector</function> function internally calls a parser
-which breaks the document (<literal>a fat  cat sat on a mat - it ate a
-fat rats</literal>) into words and corresponding types. The default parser
-recognizes 23 types.  Each word, depending on its type, passes through a
-group of dictionaries (<xref linkend="textsearch-dictionaries">).  At the
-end of this step we obtain <emphasis>lexemes</emphasis>.  For example,
-<literal>rats</literal> became <literal>rat</literal> because one of the
-dictionaries recognized that the word <literal>rats</literal> is a plural
-form of <literal>rat</literal>.  Some words are treated as "stop words"
-(<xref linkend="textsearch-stopwords">) and ignored since they occur too
-frequently and have little informational value.  In our example these are
-<literal>a</literal>, <literal>on</literal>, and <literal>it</literal>.
-The punctuation sign <literal>-</literal> was also ignored because its
-type (<literal>Space symbols</literal>) is not indexed. All information
-about the parser, dictionaries and what types of lexemes to index is
-documented in the full text configuration section (<xref
-linkend="textsearch-tables-configuration">).  It is possible to have
-several different configurations in the same database, and many predefined
-system configurations are available for different languages. In our example
-we used the default configuration <literal>english</literal> for the
-English language.
-</para>
-
-<para>
-As another example, below is the output from the <function>ts_debug</function>
-function ( <xref linkend="textsearch-debugging"> ), which shows all details
-of the full text machinery:
+   </para>
+
+   <para> 
+    In the example above we see that the resulting <type>tsvector</type> does not
+    contain the words <literal>a</literal>, <literal>on</literal>, or
+    <literal>it</literal>, the word <literal>rats</literal> became
+    <literal>rat</literal>, and the punctuation sign <literal>-</literal> was
+    ignored. 
+   </para> 
+
+   <para>
+    The <function>to_tsvector</function> function internally calls a parser
+    which breaks the document (<literal>a fat  cat sat on a mat - it ate a
+    fat rats</literal>) into words and corresponding types. The default parser
+    recognizes 23 types.  Each word, depending on its type, passes through a
+    group of dictionaries (<xref linkend="textsearch-dictionaries">).  At the
+    end of this step we obtain <emphasis>lexemes</emphasis>.  For example,
+    <literal>rats</literal> became <literal>rat</literal> because one of the
+    dictionaries recognized that the word <literal>rats</literal> is a plural
+    form of <literal>rat</literal>.  Some words are treated as "stop words"
+    (<xref linkend="textsearch-stopwords">) and ignored since they occur too
+    frequently and have little informational value.  In our example these are
+    <literal>a</literal>, <literal>on</literal>, and <literal>it</literal>.
+    The punctuation sign <literal>-</literal> was also ignored because its
+    type (<literal>Space symbols</literal>) is not indexed. All information
+    about the parser, dictionaries and what types of lexemes to index is
+    documented in the full text configuration section (<xref
+    linkend="textsearch-tables-configuration">).  It is possible to have
+    several different configurations in the same database, and many predefined
+    system configurations are available for different languages. In our example
+    we used the default configuration <literal>english</literal> for the
+    English language.
+   </para>
+
+   <para>
+    As another example, below is the output from the <function>ts_debug</function>
+    function ( <xref linkend="textsearch-debugging"> ), which shows all details
+    of the full text machinery:
+
  <programlisting>
  SELECT * FROM ts_debug('english','a fat  cat sat on a mat - it ate a fat rats');
   Alias |  Description  | Token | Dictionaries | Lexized token  
@@ -1467,25 +1539,26 @@ SELECT * FROM ts_debug('english','a fat  cat sat on a mat - it ate a fat rats');
   lword | Latin word    | fat   | {english}    | english: {fat}
   blank | Space symbols |       |              | 
   lword | Latin word    | rats  | {english}    | english: {rat}
-(24 rows)
-</programlisting>
-</para>
-
-<para>
-Function <function>setweight()</function> is used to label
-<type>tsvector</type>. The typical usage of this is to mark out the
-different parts of a document, perhaps by importance.  Later, this can be
-used for ranking of search results in addition to positional information
-(distance between query terms).  If no ranking is required, positional
-information can be removed from <type>tsvector</type> using the
-<function>strip()</function> function to save space.
-</para>
-
-<para>
-Because <function>to_tsvector</function>(<LITERAL>NULL</LITERAL>) can
-return <LITERAL>NULL</LITERAL>, it is recommended to use
-<function>coalesce</function>. Here is the safe method for creating a
-<type>tsvector</type> from a structured document:
+   (24 rows)
+</programlisting>
+   </para>
+
+   <para>
+    Function <function>setweight()</function> is used to label
+    <type>tsvector</type>. The typical usage of this is to mark out the
+    different parts of a document, perhaps by importance.  Later, this can be
+    used for ranking of search results in addition to positional information
+    (distance between query terms).  If no ranking is required, positional
+    information can be removed from <type>tsvector</type> using the
+    <function>strip()</function> function to save space.
+   </para>
+
+   <para>
+    Because <function>to_tsvector</function>(<LITERAL>NULL</LITERAL>) can
+    return <LITERAL>NULL</LITERAL>, it is recommended to use
+    <function>coalesce</function>. Here is the safe method for creating a
+    <type>tsvector</type> from a structured document:
+
  <programlisting>
  UPDATE tt SET ti=
      setweight(to_tsvector(coalesce(title,'')), 'A')    || ' ' ||
@@ -1493,31 +1566,32 @@ UPDATE tt SET ti=
      setweight(to_tsvector(coalesce(abstract,'')), 'C') || ' ' ||
      setweight(to_tsvector(coalesce(body,'')), 'D');
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    The following functions allow manual parsing control:
  
-<para>
-The following functions allow manual parsing control:
+    <variablelist>
  
-<variablelist>
+     <varlistentry>
  
-<varlistentry>
+      <indexterm zone="textsearch-parser">
+      <primary>parse</primary>
+      </indexterm>
  
-<indexterm zone="textsearch-parser">
-<primary>parse</primary>
-</indexterm>
+      <term>
+       <synopsis>
+        ts_parse(<replaceable class="PARAMETER">parser</replaceable>,  <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF <type>tokenout</type>
+       </synopsis>
+      </term>
  
-<term>
-<synopsis>
-ts_parse(<replaceable class="PARAMETER">parser</replaceable>,  <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF <type>tokenout</type>
-</synopsis>
-</term>
+      <listitem>
+       <para>
+        Parses the given <replaceable>document</replaceable> and returns a series
+        of records, one for each token produced by parsing. Each record includes
+        a <varname>tokid</varname> giving its type and a <varname>token</varname>
+        which gives its content:
  
-<listitem>
-<para>
-Parses the given <replaceable>document</replaceable> and returns a series
-of records, one for each token produced by parsing. Each record includes
-a <varname>tokid</varname> giving its type and a <varname>token</varname>
-which gives its content:
  <programlisting>
  SELECT * FROM ts_parse('default','123 - a number');
   tokid | token
@@ -1529,29 +1603,30 @@ SELECT * FROM ts_parse('default','123 - a number');
      12 |
       1 | number
  </programlisting>
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<indexterm zone="textsearch-parser">
-<primary>ts_token_type</primary>
-</indexterm>
-
-<term>
-<synopsis>
-ts_token_type(<replaceable class="PARAMETER">parser</replaceable> ) returns SETOF <type>tokentype</type>
-</synopsis>
-</term>
-
-<listitem>
-<para>
-Returns a table which describes each kind of token the
-<replaceable>parser</replaceable> might produce as output.  For each token
-type the table gives the <varname>tokid</varname> which the
-<replaceable>parser</replaceable> uses to label each
-<varname>token</varname> of that type, the <varname>alias</varname> which
-names the token type, and a short <varname>description</varname>:
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <indexterm zone="textsearch-parser">
+      <primary>ts_token_type</primary>
+      </indexterm>
+
+      <term>
+       <synopsis>
+        ts_token_type(<replaceable class="PARAMETER">parser</replaceable> ) returns SETOF <type>tokentype</type>
+       </synopsis>
+      </term>
+
+      <listitem>
+       <para>
+        Returns a table which describes each kind of token the
+        <replaceable>parser</replaceable> might produce as output.  For each token
+        type the table gives the <varname>tokid</varname> which the
+        <replaceable>parser</replaceable> uses to label each
+        <varname>token</varname> of that type, the <varname>alias</varname> which
+        names the token type, and a short <varname>description</varname>:
+
  <programlisting>
  SELECT * FROM ts_token_type('default');
   tokid |    alias     |            description
@@ -1581,146 +1656,163 @@ SELECT * FROM ts_token_type('default');
      23 | entity       | HTML Entity
  </programlisting>
  
-</para>
-</listitem>
-</varlistentry>
+       </para>
+      </listitem>
+     </varlistentry>
  
-</variablelist>
-</para>
+    </variablelist>
+   </para>
  
-</sect2>
+  </sect2>
  
-<sect2 id="textsearch-ranking">
-<title>Ranking Search Results</title>
+  <sect2 id="textsearch-ranking">
+  <title>Ranking Search Results</title>
  
-<para>
-Ranking attempts to measure how relevant documents are to a particular
-query by inspecting the number of times each search word appears in the
-document, and whether different search terms occur near each other.  Full
-text searching provides two predefined ranking functions which attempt to
-produce a measure of how a document is relevant to the query.  In spite
-of that, the concept of relevancy is vague and very application-specific.
-These functions try to take into account lexical, proximity, and structural
-information.  Different applications might require additional information
-for ranking, e.g. document modification time.
-</para>
+   <para>
+    Ranking attempts to measure how relevant documents are to a particular
+    query by inspecting the number of times each search word appears in the
+    document, and whether different search terms occur near each other.  Full
+    text searching provides two predefined ranking functions which attempt to
+    produce a measure of how a document is relevant to the query.  In spite
+    of that, the concept of relevancy is vague and very application-specific.
+    These functions try to take into account lexical, proximity, and structural
+    information.  Different applications might require additional information
+    for ranking, e.g. document modification time.
+   </para>
  
-<para>
-The lexical part of ranking reflects how often the query terms appear in
-the document, how close the document query terms are, and in what part of
-the document they occur.  Note that ranking functions that use positional
-information will only work on unstripped tsvectors because stripped
-tsvectors lack positional information.
-</para>
+   <para>
+    The lexical part of ranking reflects how often the query terms appear in
+    the document, how close the document query terms are, and in what part of
+    the document they occur.  Note that ranking functions that use positional
+    information will only work on unstripped tsvectors because stripped
+    tsvectors lack positional information.
+   </para>
  
-<para>
-The two ranking functions currently available are:
+   <para>
+    The two ranking functions currently available are:
  
-<variablelist>
+    <variablelist>
  
-<varlistentry>
+     <varlistentry>
  
-<indexterm zone="textsearch-ranking">
-<primary>ts_rank</primary>
-</indexterm>
+      <indexterm zone="textsearch-ranking">
+      <primary>ts_rank</primary>
+      </indexterm>
  
-<term>
-<synopsis>
-ts_rank(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[]</optional>, <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
-</synopsis>
-</term>
+      <term>
+       <synopsis>
+        ts_rank(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[]</optional>, <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
+       </synopsis>
+      </term>
+
+      <listitem>
+       <para>
+        This ranking function offers the ability to weigh word instances more
+        heavily depending on how you have classified them.  The weights specify
+        how heavily to weigh each category of word:
  
-<listitem>
-<para>
-This ranking function offers the ability to weigh word instances more
-heavily depending on how you have classified them.  The weights specify
-how heavily to weigh each category of word:
  <programlisting>
  {D-weight, C-weight, B-weight, A-weight}
-</programlisting> 
-If no weights are provided,
-then these defaults are used:
+</programlisting>
+ 
+        If no weights are provided,
+        then these defaults are used:
+
  <programlisting>
  {0.1, 0.2, 0.4, 1.0}
  </programlisting>
-Often weights are used to mark words from special areas of the document,
-like the title or an initial abstract, and make them more or less important
-than words in the document body.
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-
-<indexterm zone="textsearch-ranking">
-<primary>ts_rank_cd</primary>
-</indexterm>
-
-<term>
-<synopsis>
-ts_rank_cd(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[], </optional> <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
-</synopsis>
-</term>
-
-<listitem>
-<para>
-This function computes the <emphasis>cover density</emphasis> ranking for
-the given document vector and query, as described in Clarke, Cormack, and
-Tudhope's "Relevance Ranking for One to Three Term Queries" in the
-"Information Processing and Management", 1999.
-</para>
-</listitem>
-</varlistentry>
-
-</variablelist>
-
-</para>
-
-<para>
-Since a longer document has a greater chance of containing a query term
-it is reasonable to take into account document size, i.e. a hundred-word
-document with five instances of a search word is probably more relevant
-than a thousand-word document with five instances.  Both ranking functions
-take an integer <replaceable>normalization</replaceable> option that
-specifies whether a document's length should impact its rank.  The integer
-option controls several behaviors which is done using bit-wise fields and
-<literal>|</literal> (for example, <literal>2|4</literal>):
-
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-0 (the default) ignores the document length
-</para></listitem>
-<listitem><para>
-1 divides the rank by 1 + the logarithm of the document length
-</para></listitem>
-<listitem><para>
-2 divides the rank by the length itself
-</para></listitem>
-<listitem><para>
-<!-- what is mean harmonic distance -->
-4 divides the rank by the mean harmonic distance between extents
-</para></listitem>
-<listitem><para>
-8 divides the rank by the number of unique words in document
-</para></listitem>
-<listitem><para>
-16 divides the rank by 1 + logarithm of the number of unique words in document
-</para></listitem>
-</itemizedlist>
-
-</para>
-
-<para>
-It is important to note that ranking functions do not use any global
-information so it is impossible to produce a fair normalization to 1% or
-100%, as sometimes required. However, a simple technique like
-<literal>rank/(rank+1)</literal> can be applied.  Of course, this is just
-a cosmetic change, i.e., the ordering of the search results will not change.
-</para>
-
-<para>
-Several examples are shown below; note that the second example uses
-normalized ranking:
+
+        Often weights are used to mark words from special areas of the document,
+        like the title or an initial abstract, and make them more or less important
+        than words in the document body.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+
+      <indexterm zone="textsearch-ranking">
+      <primary>ts_rank_cd</primary>
+      </indexterm>
+
+      <term>
+       <synopsis>
+        ts_rank_cd(<optional> <replaceable class="PARAMETER">weights</replaceable> float4[], </optional> <replaceable class="PARAMETER">vector</replaceable> TSVECTOR, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">normalization</replaceable> int4 </optional>) returns float4
+       </synopsis>
+      </term>
+
+      <listitem>
+       <para>
+        This function computes the <emphasis>cover density</emphasis> ranking for
+        the given document vector and query, as described in Clarke, Cormack, and
+        Tudhope's "Relevance Ranking for One to Three Term Queries" in the
+        "Information Processing and Management", 1999.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+
+   </para>
+
+   <para>
+    Since a longer document has a greater chance of containing a query term
+    it is reasonable to take into account document size, i.e. a hundred-word
+    document with five instances of a search word is probably more relevant
+    than a thousand-word document with five instances.  Both ranking functions
+    take an integer <replaceable>normalization</replaceable> option that
+    specifies whether a document's length should impact its rank.  The integer
+    option controls several behaviors which is done using bit-wise fields and
+    <literal>|</literal> (for example, <literal>2|4</literal>):
+
+    <itemizedlist  spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       0 (the default) ignores the document length
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       1 divides the rank by 1 + the logarithm of the document length
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       2 divides the rank by the length itself
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <!-- what is mean harmonic distance -->
+       4 divides the rank by the mean harmonic distance between extents
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       8 divides the rank by the number of unique words in document
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       16 divides the rank by 1 + logarithm of the number of unique words in document
+      </para>
+     </listitem>
+    </itemizedlist>
+
+   </para>
+
+   <para>
+    It is important to note that ranking functions do not use any global
+    information so it is impossible to produce a fair normalization to 1% or
+    100%, as sometimes required. However, a simple technique like
+    <literal>rank/(rank+1)</literal> can be applied.  Of course, this is just
+    a cosmetic change, i.e., the ordering of the search results will not change.
+   </para>
+
+   <para>
+    Several examples are shown below; note that the second example uses
+    normalized ranking:
+
  <programlisting>
  SELECT title, ts_rank_cd('{0.1, 0.2, 0.4, 1.0}',textsearch, query) AS rnk
  FROM apod, to_tsquery('neutrino|(dark &amp; matter)') query
@@ -1757,252 +1849,283 @@ ORDER BY rnk DESC LIMIT 10;
   Ice Fishing for Cosmic Neutrinos              | 0.615384618911517
   Weak Lensing Distorts the Universe            | 0.450010798361481
  </programlisting>
-</para>
-
-<para>
-The first argument in <function>ts_rank_cd</function> (<literal>'{0.1, 0.2,
-0.4, 1.0}'</literal>) is an optional parameter which specifies the
-weights for labels <literal>D</literal>, <literal>C</literal>,
-<literal>B</literal>, and <literal>A</literal> used in function
-<function>setweight</function>. These default values show that lexemes
-labeled as <literal>A</literal> are ten times more important than ones
-that are labeled with <literal>D</literal>.
-</para>
-
-<para>
-Ranking can be expensive since it requires consulting the
-<type>tsvector</type> of all documents, which can be I/O bound and
-therefore slow. Unfortunately, it is almost impossible to avoid since full
-text searching in a database should work without indexes <!-- TODO I don't
-get this -->.  Moreover an index can be lossy (a <acronym>GiST</acronym>
-index, for example) so it must check documents to avoid false hits.
-</para>
-
-<para>
-Note that the ranking functions above are only examples.  You can write
-your own ranking functions and/or combine additional factors to fit your
-specific needs.
-</para>
-
-</sect2>
-
-
-<sect2 id="textsearch-headline">
-<title>Highlighting Results</title>
-
-<indexterm zone="textsearch-headline">
-<primary>headline</primary>
-</indexterm>
-
-<para>
-To present search results it is ideal to show a part of each document and
-how it is related to the query. Usually, search engines show fragments of
-the document with marked search terms.  <productname>PostgreSQL</> full
-text searching provides the function <function>headline</function> that
-implements such functionality.
-</para>
-
-<variablelist>
-
-<varlistentry>
-
-<term>
-<synopsis>
-ts_headline(<optional> <replaceable class="PARAMETER">config_name</replaceable> text</optional>, <replaceable class="PARAMETER">document</replaceable> text, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">options</replaceable> text </optional>) returns text
-</synopsis>
-</term>
-
-<listitem>
-<para>
-The <function>ts_headline</function> function accepts a document along with
-a query, and returns one or more ellipsis-separated excerpts from the
-document in which terms from the query are highlighted.  The configuration
-used to parse the document can be specified by its
-<replaceable>config_name</replaceable>; if none is specified, the current
-configuration is used.
-</para>
-
-
-</listitem>
-</varlistentry>
-</variablelist>
-
-<para>
-If an <replaceable>options</replaceable> string is specified it should
-consist of a comma-separated list of one or more 'option=value' pairs.
-The available options are:
-
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-<literal>StartSel</>, <literal>StopSel</literal>: the strings with which
-query words appearing in the document should be delimited to distinguish
-them from other excerpted words.
-</para></listitem>
-<listitem><para>
-<literal>MaxWords</>, <literal>MinWords</literal>: limit the shortest and
-longest headlines to output
-</para></listitem>
-<listitem><para>
-<literal>ShortWord</literal>: this prevents your headline from beginning
-or ending with a word which has this many characters or less. The default
-value of three eliminates the English articles.
-</para></listitem>
-<listitem><para>
-<literal>HighlightAll</literal>: boolean flag;  if
-<literal>true</literal> the whole document will be highlighted
-</para></listitem>
-</itemizedlist>
-
-Any unspecified options receive these defaults:
+  </para>
+
+   <para>
+    The first argument in <function>ts_rank_cd</function> (<literal>'{0.1, 0.2,
+    0.4, 1.0}'</literal>) is an optional parameter which specifies the
+    weights for labels <literal>D</literal>, <literal>C</literal>,
+    <literal>B</literal>, and <literal>A</literal> used in function
+    <function>setweight</function>. These default values show that lexemes
+    labeled as <literal>A</literal> are ten times more important than ones
+    that are labeled with <literal>D</literal>.
+   </para>
+
+   <para>
+    Ranking can be expensive since it requires consulting the
+    <type>tsvector</type> of all documents, which can be I/O bound and
+    therefore slow. Unfortunately, it is almost impossible to avoid since full
+    text searching in a database should work without indexes <!-- TODO I don't
+    get this -->.  Moreover an index can be lossy (a <acronym>GiST</acronym>
+    index, for example) so it must check documents to avoid false hits.
+   </para>
+
+   <para>
+    Note that the ranking functions above are only examples.  You can write
+    your own ranking functions and/or combine additional factors to fit your
+    specific needs.
+   </para>
+
+  </sect2>
+
+
+  <sect2 id="textsearch-headline">
+  <title>Highlighting Results</title>
+
+   <indexterm zone="textsearch-headline">
+   <primary>headline</primary>
+   </indexterm>
+
+   <para>
+    To present search results it is ideal to show a part of each document and
+    how it is related to the query. Usually, search engines show fragments of
+    the document with marked search terms.  <productname>PostgreSQL</> full
+    text searching provides the function <function>headline</function> that
+    implements such functionality.
+   </para>
+
+   <variablelist>
+
+    <varlistentry>
+
+     <term>
+      <synopsis>
+       ts_headline(<optional> <replaceable class="PARAMETER">config_name</replaceable> text</optional>, <replaceable class="PARAMETER">document</replaceable> text, <replaceable class="PARAMETER">query</replaceable> TSQUERY, <optional> <replaceable class="PARAMETER">options</replaceable> text </optional>) returns text
+      </synopsis>
+     </term>
+
+     <listitem>
+      <para>
+       The <function>ts_headline</function> function accepts a document along with
+       a query, and returns one or more ellipsis-separated excerpts from the
+       document in which terms from the query are highlighted.  The configuration
+       used to parse the document can be specified by its
+       <replaceable>config_name</replaceable>; if none is specified, the current
+       configuration is used.
+      </para>
+
+
+     </listitem>
+    </varlistentry>
+   </variablelist>
+
+   <para>
+    If an <replaceable>options</replaceable> string is specified it should
+    consist of a comma-separated list of one or more 'option=value' pairs.
+    The available options are:
+
+    <itemizedlist  spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       <literal>StartSel</>, <literal>StopSel</literal>: the strings with which
+       query words appearing in the document should be delimited to distinguish
+       them from other excerpted words.
+      </para>
+     </listitem>
+     <listitem >
+      <para>
+       <literal>MaxWords</>, <literal>MinWords</literal>: limit the shortest and
+       longest headlines to output
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>ShortWord</literal>: this prevents your headline from beginning
+       or ending with a word which has this many characters or less. The default
+       value of three eliminates the English articles.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>HighlightAll</literal>: boolean flag;  if
+       <literal>true</literal> the whole document will be highlighted
+      </para>
+     </listitem>
+    </itemizedlist>
+
+    Any unspecified options receive these defaults:
+
  <programlisting>
  StartSel=&lt;b&gt;, StopSel=&lt;/b&gt;, MaxWords=35, MinWords=15, ShortWord=3, HighlightAll=FALSE
  </programlisting>
-</para>
+   </para>
  
-<para>
-For example:
+   <para>
+    For example:
  
  <programlisting>
  SELECT ts_headline('a b c', 'c'::tsquery);
     headline
  --------------
   a b &lt;b&gt;c&lt;/b&gt;
+
  SELECT ts_headline('a b c', 'c'::tsquery, 'StartSel=&lt;,StopSel=&gt;');
   ts_headline 
  -------------
   a b  &lt;c&gt;
  </programlisting>
-</para>
+   </para>
  
-<para>
-<function>headline</> uses the original document, not
-<type>tsvector</type>, so it can be slow and should be used with care.
-A typical mistake is to call <function>headline()</function> for
-<emphasis>every</emphasis> matching document when only ten documents are
-shown. <acronym>SQL</acronym> subselects can help here;  below is an
-example:
+   <para>
+    <function>headline</> uses the original document, not
+    <type>tsvector</type>, so it can be slow and should be used with care.
+    A typical mistake is to call <function>headline()</function> for
+    <emphasis>every</emphasis> matching document when only ten documents are
+    shown. <acronym>SQL</acronym> subselects can help here;  below is an
+    example:
  
  <programlisting>
  SELECT id,ts_headline(body,q), rank
  FROM (SELECT id,body,q, ts_rank_cd (ti,q) AS rank FROM apod, to_tsquery('stars') q
-      WHERE ti @@ q
-      ORDER BY rank DESC LIMIT 10) AS foo;
-</programlisting>
-</para>
-
-<para>
-Note that the cascade dropping of the <function>parser</function> function
-causes dropping of the <literal>ts_headline</literal> used in the full text search
-configuration <replaceable>config_name</replaceable><!-- TODO I don't get this -->.
-</para>
-
-</sect2>
-
-</sect1>
-
-<sect1 id="textsearch-dictionaries">
-<title>Dictionaries</title>
-
-<para>
-Dictionaries are used to eliminate words that should not be considered in a
-search (<firstterm>stop words</>), and to <firstterm>normalize</> words so
-that different derived forms of the same word will match.  Aside from
-improving search quality, normalization and removal of stop words reduce the
-size of the <type>tsvector</type> representation of a document, thereby
-improving performance.  Normalization does not always have linguistic meaning
-and usually depends on application semantics.
-</para>
-
-<para>
-Some examples of normalization:  
-
-<itemizedlist  spacing="compact" mark="bullet">
-
-<listitem>
-<para> Linguistic - ispell dictionaries try to reduce input words to a
-normalized form; stemmer dictionaries remove word endings
-</para></listitem> 
-<listitem>
-<para> Identical <acronym>URL</acronym> locations are identified and canonicalized:
-
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-http://www.pgsql.ru/db/mw/index.html
-</para></listitem>
-<listitem><para>
-http://www.pgsql.ru/db/mw/
-</para></listitem>
-<listitem><para>
-http://www.pgsql.ru/db/../db/mw/index.html
-</para></listitem>
-</itemizedlist>
-
-</para></listitem>
-<listitem><para>
-Colour names are substituted by their hexadecimal values, e.g.,
-<literal>red, green, blue, magenta -> FF0000, 00FF00, 0000FF, FF00FF</literal>
-</para></listitem>
-<listitem><para>
-Remove some numeric fractional digits to reduce the range of possible
-numbers, so <emphasis>3.14</emphasis>159265359,
-<emphasis>3.14</emphasis>15926, <emphasis>3.14</emphasis> will be the same
-after normalization if only two digits are kept after the decimal point.
-</para></listitem>
-</itemizedlist>
-
-</para>
-
-<para>
-A dictionary is a <emphasis>program</emphasis> which accepts lexemes as
-input and returns:
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-an array of lexemes if the input lexeme is known to the dictionary
-</para></listitem>
-<listitem><para>
-a void array if the dictionary knows the lexeme, but it is a stop word
-</para></listitem>
-<listitem><para>
-<literal>NULL</literal> if the dictionary does not recognize the input lexeme
-</para></listitem>
-</itemizedlist>
-</para>
-
-<para>
-Full text searching provides predefined dictionaries for many languages,
-and <acronym>SQL</acronym> commands to manipulate them.  There are also
-several predefined template dictionaries that can be used to create new
-dictionaries by overriding their default parameters.  Besides this, it is
-possible to develop custom dictionaries using an <acronym>API</acronym>;
-see the dictionary for integers (<xref
-linkend="textsearch-rule-dictionary-example">) as an example.
-</para>
-
-<para>
-The <literal>ALTER TEXT SEARCH CONFIGURATION ADD
-MAPPING</literal> command binds specific types of lexemes and a set of
-dictionaries to process them. (Mappings can also be specified as part of
-configuration creation.) Lexemes are processed by a stack of dictionaries
-until some dictionary identifies it as a known word or it turns out to be
-a stop word.  If no dictionary recognizes a lexeme, it will be discarded
-and not indexed. A general rule for configuring a stack of dictionaries
-is to place first the most narrow, most specific dictionary, then the more
-general dictionaries and finish it with a very general dictionary, like
-the <application>snowball</> stemmer or <literal>simple</>, which
-recognizes everything.  For example, for an astronomy-specific search
-(<literal>astro_en</literal> configuration) one could bind
-<type>lword</type> (latin word) with a synonym dictionary of astronomical
-terms, a general English dictionary and a <application>snowball</> English
-stemmer:
+WHERE ti @@ q
+ORDER BY rank DESC LIMIT 10) AS foo;
+</programlisting>
+   </para>
+
+   <para>
+    Note that the cascade dropping of the <function>parser</function> function
+    causes dropping of the <literal>ts_headline</literal> used in the full text search
+    configuration <replaceable>config_name</replaceable><!-- TODO I don't get this -->.
+   </para>
+
+  </sect2>
+
+ </sect1>
+
+ <sect1 id="textsearch-dictionaries">
+ <title>Dictionaries</title>
+
+  <para>
+   Dictionaries are used to eliminate words that should not be considered in a
+   search (<firstterm>stop words</>), and to <firstterm>normalize</> words so
+   that different derived forms of the same word will match.  Aside from
+   improving search quality, normalization and removal of stop words reduce the
+   size of the <type>tsvector</type> representation of a document, thereby
+   improving performance.  Normalization does not always have linguistic meaning
+   and usually depends on application semantics.
+  </para>
+
+  <para>
+   Some examples of normalization:  
+
+   <itemizedlist  spacing="compact" mark="bullet">
+
+    <listitem>
+     <para>
+      Linguistic - ispell dictionaries try to reduce input words to a
+      normalized form; stemmer dictionaries remove word endings
+     </para>
+    </listitem> 
+    <listitem>
+     <para>
+      Identical <acronym>URL</acronym> locations are identified and canonicalized:
+
+      <itemizedlist  spacing="compact" mark="bullet">
+       <listitem>
+        <para>
+         http://www.pgsql.ru/db/mw/index.html
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+         http://www.pgsql.ru/db/mw/
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+         http://www.pgsql.ru/db/../db/mw/index.html
+        </para>
+       </listitem>
+      </itemizedlist>
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Colour names are substituted by their hexadecimal values, e.g.,
+      <literal>red, green, blue, magenta -> FF0000, 00FF00, 0000FF, FF00FF</literal>
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      Remove some numeric fractional digits to reduce the range of possible
+      numbers, so <emphasis>3.14</emphasis>159265359,
+      <emphasis>3.14</emphasis>15926, <emphasis>3.14</emphasis> will be the same
+      after normalization if only two digits are kept after the decimal point.
+     </para>
+    </listitem>
+   </itemizedlist>
+
+  </para>
+
+  <para>
+   A dictionary is a <emphasis>program</emphasis> which accepts lexemes as
+   input and returns:
+   <itemizedlist  spacing="compact" mark="bullet">
+    <listitem>
+     <para>
+      an array of lexemes if the input lexeme is known to the dictionary
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      a void array if the dictionary knows the lexeme, but it is a stop word
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      <literal>NULL</literal> if the dictionary does not recognize the input lexeme
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+
+  <para>
+   Full text searching provides predefined dictionaries for many languages,
+   and <acronym>SQL</acronym> commands to manipulate them.  There are also
+   several predefined template dictionaries that can be used to create new
+   dictionaries by overriding their default parameters.  Besides this, it is
+   possible to develop custom dictionaries using an <acronym>API</acronym>;
+   see the dictionary for integers (<xref
+   linkend="textsearch-rule-dictionary-example">) as an example.
+  </para>
+
+  <para>
+   The <literal>ALTER TEXT SEARCH CONFIGURATION ADD
+   MAPPING</literal> command binds specific types of lexemes and a set of
+   dictionaries to process them. (Mappings can also be specified as part of
+   configuration creation.) Lexemes are processed by a stack of dictionaries
+   until some dictionary identifies it as a known word or it turns out to be
+   a stop word.  If no dictionary recognizes a lexeme, it will be discarded
+   and not indexed. A general rule for configuring a stack of dictionaries
+   is to place first the most narrow, most specific dictionary, then the more
+   general dictionaries and finish it with a very general dictionary, like
+   the <application>snowball</> stemmer or <literal>simple</>, which
+   recognizes everything.  For example, for an astronomy-specific search
+   (<literal>astro_en</literal> configuration) one could bind
+   <type>lword</type> (latin word) with a synonym dictionary of astronomical
+   terms, a general English dictionary and a <application>snowball</> English
+   stemmer:
+
  <programlisting>
  ALTER TEXT SEARCH CONFIGURATION astro_en
      ADD MAPPING FOR lword WITH astrosyn, english_ispell, english_stem;
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   Function <function>ts_lexize</function> can be used to test dictionaries,
+   for example:
  
-<para>
-Function <function>ts_lexize</function> can be used to test dictionaries,
-for example:
  <programlisting>
  SELECT ts_lexize('english_stem', 'stars');
   ts_lexize
@@ -2010,27 +2133,32 @@ SELECT ts_lexize('english_stem', 'stars');
   {star}
  (1 row)
  </programlisting>
-Also, the <function>ts_debug</function> function (<xref linkend="textsearch-debugging">)
-can be used for this.
-</para>
  
-<sect2 id="textsearch-stopwords">
-<title>Stop Words</title>
-<para>
-Stop words are words which are very common, appear in almost
-every document, and have no discrimination value. Therefore, they can be ignored
-in the context of full text searching. For example, every English text contains
-words like <literal>a</literal> although it is useless to store them in an index.
-However, stop words do affect the positions in <type>tsvector</type>,
-which in turn, do affect ranking:
+   Also, the <function>ts_debug</function> function (<xref linkend="textsearch-debugging">)
+   can be used for this.
+  </para>
+
+  <sect2 id="textsearch-stopwords">
+  <title>Stop Words</title>
+ 
+   <para>
+    Stop words are words which are very common, appear in almost
+    every document, and have no discrimination value. Therefore, they can be ignored
+    in the context of full text searching. For example, every English text contains
+    words like <literal>a</literal> although it is useless to store them in an index.
+    However, stop words do affect the positions in <type>tsvector</type>,
+    which in turn, do affect ranking:
+
  <programlisting>
  SELECT to_tsvector('english','in the list of stop words');
          to_tsvector
  ----------------------------
   'list':3 'stop':5 'word':6
  </programlisting>
-The gaps between positions 1-3 and 3-5 are because of stop words, so ranks
-calculated for documents with and without stop words are quite different:
+
+    The gaps between positions 1-3 and 3-5 are because of stop words, so ranks
+    calculated for documents with and without stop words are quite different:
+
  <programlisting>
  SELECT ts_rank_cd ('{1,1,1,1}', to_tsvector('english','in the list of stop words'), to_tsquery('list &amp; stop'));
   ts_rank_cd
@@ -2043,63 +2171,68 @@ SELECT ts_rank_cd ('{1,1,1,1}', to_tsvector('english','list stop words'), to_tsq
            1
  </programlisting>
  
-</para>
+   </para>
+
+   <para>
+    It is up to the specific dictionary how it treats stop words. For example,
+    <literal>ispell</literal> dictionaries first normalize words and then
+    look at the list of stop words, while <literal>stemmers</literal>
+    first check the list of stop words. The reason for the different
+    behaviour is an attempt to decrease possible noise.
+   </para>
  
-<para>
-It is up to the specific dictionary how it treats stop words. For example,
-<literal>ispell</literal> dictionaries first normalize words and then
-look at the list of stop words, while <literal>stemmers</literal>
-first check the list of stop words. The reason for the different
-behaviour is an attempt to decrease possible noise.
-</para>
+   <para>
+    Here is an example of a dictionary that returns the input word as lowercase
+    or <literal>NULL</literal> if it is a stop word; it also specifies the name
+    of a file of stop words.  It uses the <literal>simple</> dictionary as
+    a template:
  
-<para>
-Here is an example of a dictionary that returns the input word as lowercase
-or <literal>NULL</literal> if it is a stop word; it also specifies the name
-of a file of stop words.  It uses the <literal>simple</> dictionary as
-a template:
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY public.simple_dict (
      TEMPLATE = pg_catalog.simple,
      STOPWORDS = english
  );
  </programlisting>
-Now we can test our dictionary:
+
+    Now we can test our dictionary:
+
  <programlisting>
  SELECT ts_lexize('public.simple_dict','YeS');
   ts_lexize
  -----------
   {yes}
+
  SELECT ts_lexize('public.simple_dict','The');
   ts_lexize
  -----------
   {}
  </programlisting>
-</para>
+   </para>
  
-<caution>
-<para>
-Most types of dictionaries rely on configuration files, such as files of stop
-words.  These files <emphasis>must</> be stored in UTF-8 encoding.  They will
-be translated to the actual database encoding, if that is different, when they
-are read into the server.
-</para>
-</caution>
+   <caution>
+    <para>
+     Most types of dictionaries rely on configuration files, such as files of stop
+     words.  These files <emphasis>must</> be stored in UTF-8 encoding.  They will
+     be translated to the actual database encoding, if that is different, when they
+     are read into the server.
+    </para>
+   </caution>
  
-</sect2>
+  </sect2>
  
  
-<sect2 id="textsearch-synonym-dictionary">
-<title>Synonym Dictionary</title>
+  <sect2 id="textsearch-synonym-dictionary">
+  <title>Synonym Dictionary</title>
+
+   <para>
+    This dictionary template is used to create dictionaries which replace a
+    word with a synonym. Phrases are not supported (use the thesaurus
+    dictionary (<xref linkend="textsearch-thesaurus">) for that).  A synonym
+    dictionary can be used to overcome linguistic problems, for example, to
+    prevent an English stemmer dictionary from reducing the word 'Paris' to
+    'pari'.  It is enough to have a <literal>Paris paris</literal> line in the
+    synonym dictionary and put it before the <literal>english_stem</> dictionary:
  
-<para>
-This dictionary template is used to create dictionaries which replace a
-word with a synonym. Phrases are not supported (use the thesaurus
-dictionary (<xref linkend="textsearch-thesaurus">) for that).  A synonym
-dictionary can be used to overcome linguistic problems, for example, to
-prevent an English stemmer dictionary from reducing the word 'Paris' to
-'pari'.  It is enough to have a <literal>Paris paris</literal> line in the
-synonym dictionary and put it before the <literal>english_stem</> dictionary:
  <programlisting>
  SELECT * FROM ts_debug('english','Paris');
   Alias | Description | Token |  Dictionaries  |    Lexized token     
@@ -2119,90 +2252,95 @@ SELECT * FROM ts_debug('english','Paris');
   lword | Latin word  | Paris | {synonym,english_stem} | synonym: {paris}
  (1 row)
  </programlisting>
-</para>
-
-</sect2>
-
-<sect2 id="textsearch-thesaurus">
-<title>Thesaurus Dictionary</title>
-
-<para>
-A thesaurus dictionary (sometimes abbreviated as <acronym>TZ</acronym>) is
-a collection of words which includes information about the relationships
-of words and phrases, i.e., broader terms (<acronym>BT</acronym>), narrower
-terms (<acronym>NT</acronym>), preferred terms, non-preferred terms, related
-terms, etc.
-</para>
-<para>
-Basically a thesaurus dictionary replaces all non-preferred terms by one
-preferred term and, optionally, preserves them for indexing.  Thesauruses
-are used during indexing so any change in the thesaurus <emphasis>requires</emphasis>
-reindexing.  The current implementation of the thesaurus
-dictionary is an extension of the synonym dictionary with added
-<emphasis>phrase</emphasis> support.  A thesaurus dictionary requires
-a configuration file of the following format:
+   </para>
+
+  </sect2>
+
+  <sect2 id="textsearch-thesaurus">
+  <title>Thesaurus Dictionary</title>
+
+   <para>
+    A thesaurus dictionary (sometimes abbreviated as <acronym>TZ</acronym>) is
+    a collection of words which includes information about the relationships
+    of words and phrases, i.e., broader terms (<acronym>BT</acronym>), narrower
+    terms (<acronym>NT</acronym>), preferred terms, non-preferred terms, related
+    terms, etc.
+   </para>
+
+   <para>
+    Basically a thesaurus dictionary replaces all non-preferred terms by one
+    preferred term and, optionally, preserves them for indexing.  Thesauruses
+    are used during indexing so any change in the thesaurus <emphasis>requires</emphasis>
+    reindexing.  The current implementation of the thesaurus
+    dictionary is an extension of the synonym dictionary with added
+    <emphasis>phrase</emphasis> support.  A thesaurus dictionary requires
+    a configuration file of the following format:
+
  <programlisting>
  # this is a comment
  sample word(s) : indexed word(s)
  more sample word(s) : more indexed word(s)
  ...
  </programlisting>
-where  the colon (<symbol>:</symbol>) symbol acts as a delimiter between a
-a phrase and its replacement.
-</para>
-
-<para>
-A thesaurus dictionary uses a <emphasis>subdictionary</emphasis> (which
-is defined in the dictionary's configuration) to normalize the input text
-before checking for phrase matches. It is only possible to select one
-subdictionary.  An error is reported if the subdictionary fails to
-recognize a word. In that case, you should remove the use of the word or teach
-the subdictionary about it.  Use an asterisk (<symbol>*</symbol>) at the
-beginning of an indexed word to skip the subdictionary. It is still required
-that sample words are known.
-</para>
-
-<para>
-The thesaurus dictionary looks for the longest match.
-</para>
-
-<para>
-Stop words recognized by the subdictionary are replaced by a 'stop word
-placeholder' to record their position. To break possible ties the thesaurus
-uses the last definition. To illustrate this, consider a thesaurus (with
-a <parameter>simple</parameter> subdictionary) with pattern
-<replaceable>swsw</>, where <replaceable>s</> designates any stop word and
-<replaceable>w</>, any known word:
+
+    where  the colon (<symbol>:</symbol>) symbol acts as a delimiter between a
+    a phrase and its replacement.
+   </para>
+
+   <para>
+    A thesaurus dictionary uses a <emphasis>subdictionary</emphasis> (which
+    is defined in the dictionary's configuration) to normalize the input text
+    before checking for phrase matches. It is only possible to select one
+    subdictionary.  An error is reported if the subdictionary fails to
+    recognize a word. In that case, you should remove the use of the word or teach
+    the subdictionary about it.  Use an asterisk (<symbol>*</symbol>) at the
+    beginning of an indexed word to skip the subdictionary. It is still required
+    that sample words are known.
+   </para>
+
+   <para>
+    The thesaurus dictionary looks for the longest match.
+   </para>
+
+   <para>
+    Stop words recognized by the subdictionary are replaced by a 'stop word
+    placeholder' to record their position. To break possible ties the thesaurus
+    uses the last definition. To illustrate this, consider a thesaurus (with
+    a <parameter>simple</parameter> subdictionary) with pattern
+    <replaceable>swsw</>, where <replaceable>s</> designates any stop word and
+    <replaceable>w</>, any known word:
+
  <programlisting>
  a one the two : swsw
  the one a two : swsw2
  </programlisting>
-Words <literal>a</> and <literal>the</> are stop words defined in the
-configuration of a subdictionary. The thesaurus considers <literal>the
-one the two</literal> and <literal>that one then two</literal> as equal
-and will use definition <replaceable>swsw2</>.
-</para>
  
-<para>
-As any normal dictionary, it can be assigned to the specific lexeme types.
-Since a thesaurus dictionary has the capability to recognize phrases it
-must remember its state and interact with the parser. A thesaurus dictionary
-uses these assignments to check if it should handle the next word or stop
-accumulation.  The thesaurus dictionary compiler must be configured
-carefully. For example, if the thesaurus dictionary is assigned to handle
-only the <token>lword</token> lexeme, then a thesaurus dictionary
-definition like ' one 7' will not work since lexeme type
-<token>digit</token> is not assigned to the thesaurus dictionary.
-</para>
+    Words <literal>a</> and <literal>the</> are stop words defined in the
+    configuration of a subdictionary. The thesaurus considers <literal>the
+    one the two</literal> and <literal>that one then two</literal> as equal
+    and will use definition <replaceable>swsw2</>.
+   </para>
  
-</sect2>
+   <para>
+    As any normal dictionary, it can be assigned to the specific lexeme types.
+    Since a thesaurus dictionary has the capability to recognize phrases it
+    must remember its state and interact with the parser. A thesaurus dictionary
+    uses these assignments to check if it should handle the next word or stop
+    accumulation.  The thesaurus dictionary compiler must be configured
+    carefully. For example, if the thesaurus dictionary is assigned to handle
+    only the <token>lword</token> lexeme, then a thesaurus dictionary
+    definition like ' one 7' will not work since lexeme type
+    <token>digit</token> is not assigned to the thesaurus dictionary.
+   </para>
  
-<sect2 id="textsearch-thesaurus-config">
-<title>Thesaurus Configuration</title>
+  </sect2>
  
-<para>
-To define a new thesaurus dictionary one can use the thesaurus template.
-For example:
+  <sect2 id="textsearch-thesaurus-config">
+  <title>Thesaurus Configuration</title>
+
+   <para>
+    To define a new thesaurus dictionary one can use the thesaurus template.
+    For example:
  
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY thesaurus_simple (
@@ -2211,88 +2349,106 @@ CREATE TEXT SEARCH DICTIONARY thesaurus_simple (
      Dictionary = pg_catalog.english_stem
  );
  </programlisting>
-Here:
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-<literal>thesaurus_simple</literal> is the thesaurus dictionary name
-</para></listitem>
-<listitem><para>
-<literal>mythesaurus</literal> is the base name of the thesaurus file
-(its full name will be <filename>$SHAREDIR/tsearch_data/mythesaurus.ths</>,
-where <literal>$SHAREDIR</> means the installation shared-data directory,
-often <filename>/usr/local/share</>).
-</para></listitem>
-<listitem><para>
-<literal>pg_catalog.english_stem</literal> is the dictionary (Snowball
-English stemmer) to use for thesaurus normalization.  Notice that the
-<literal>english_stem</> dictionary has its own configuration (for example,
-stop words), which is not shown here.
-</para></listitem>
-</itemizedlist>
-
-Now it is possible to bind the thesaurus dictionary <literal>thesaurus_simple</literal>
-and selected <literal>tokens</literal>, for example:
+
+    Here:
+    <itemizedlist  spacing="compact" mark="bullet">
+     <listitem>
+      <para>
+       <literal>thesaurus_simple</literal> is the thesaurus dictionary name
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>mythesaurus</literal> is the base name of the thesaurus file
+       (its full name will be <filename>$SHAREDIR/tsearch_data/mythesaurus.ths</>,
+       where <literal>$SHAREDIR</> means the installation shared-data directory,
+       often <filename>/usr/local/share</>).
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       <literal>pg_catalog.english_stem</literal> is the dictionary (Snowball
+       English stemmer) to use for thesaurus normalization.  Notice that the
+       <literal>english_stem</> dictionary has its own configuration (for example,
+       stop words), which is not shown here.
+      </para>
+     </listitem>
+    </itemizedlist>
+
+    Now it is possible to bind the thesaurus dictionary <literal>thesaurus_simple</literal>
+    and selected <literal>tokens</literal>, for example:
  
  <programlisting>
  ALTER TEXT SEARCH CONFIGURATION russian
      ADD MAPPING FOR lword, lhword, lpart_hword WITH thesaurus_simple;
  </programlisting>
-</para>
+   </para>
+
+  </sect2>
  
-</sect2>
+  <sect2 id="textsearch-thesaurus-examples">
+  <title>Thesaurus Example</title>
  
-<sect2 id="textsearch-thesaurus-examples">
-<title>Thesaurus Example</title>
+   <para>
+    Consider a simple astronomical thesaurus <literal>thesaurus_astro</literal>,
+    which contains some astronomical word combinations:
  
-<para>
-Consider a simple astronomical thesaurus <literal>thesaurus_astro</literal>,
-which contains some astronomical word combinations:
  <programlisting>
  supernovae stars : sn
  crab nebulae : crab
  </programlisting>
-Below we create a dictionary and bind some token types with
-an astronomical thesaurus and english stemmer:
+
+    Below we create a dictionary and bind some token types with
+    an astronomical thesaurus and english stemmer:
+
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY thesaurus_astro (
      TEMPLATE = thesaurus,
      DictFile = thesaurus_astro,
      Dictionary = english_stem
  );
+
  ALTER TEXT SEARCH CONFIGURATION russian
      ADD MAPPING FOR lword, lhword, lpart_hword WITH thesaurus_astro, english_stem;
  </programlisting>
-Now we can see how it works. Note that <function>ts_lexize</function> cannot
-be used for testing the thesaurus (see description of
-<function>ts_lexize</function>), but we can use
-<function>plainto_tsquery</function> and <function>to_tsvector</function>
-which accept <literal>text</literal> arguments, not lexemes:
+
+    Now we can see how it works. Note that <function>ts_lexize</function> cannot
+    be used for testing the thesaurus (see description of
+    <function>ts_lexize</function>), but we can use
+    <function>plainto_tsquery</function> and <function>to_tsvector</function>
+    which accept <literal>text</literal> arguments, not lexemes:
  
  <programlisting>
  SELECT plainto_tsquery('supernova star');
   plainto_tsquery
  -----------------
   'sn'
+
  SELECT to_tsvector('supernova star');
   to_tsvector
  -------------
   'sn':1
  </programlisting>
-In principle, one can use <function>to_tsquery</function> if you quote
-the argument:
+
+    In principle, one can use <function>to_tsquery</function> if you quote
+    the argument:
+
  <programlisting>
  SELECT to_tsquery('''supernova star''');
   to_tsquery
  ------------
   'sn'
  </programlisting>
-Notice that <literal>supernova star</literal> matches <literal>supernovae
-stars</literal> in <literal>thesaurus_astro</literal> because we specified the
-<literal>english_stem</literal> stemmer in the thesaurus definition.
-</para>
-<para>
-To keep an original phrase in full text indexing just add it to the right part
-of the definition:
+
+    Notice that <literal>supernova star</literal> matches <literal>supernovae
+    stars</literal> in <literal>thesaurus_astro</literal> because we specified the
+    <literal>english_stem</literal> stemmer in the thesaurus definition.
+   </para>
+
+   <para>
+    To keep an original phrase in full text indexing just add it to the right part
+    of the definition:
+
  <programlisting>
  supernovae stars : sn supernovae stars
  
@@ -2301,56 +2457,60 @@ SELECT plainto_tsquery('supernova star');
  -----------------------------
   'sn' &amp; 'supernova' &amp; 'star'
  </programlisting>
-</para>
-
-</sect2>
-
-<sect2 id="textsearch-ispell-dictionary">
-<title>Ispell Dictionary</title>
-
-<para>
-The <application>Ispell</> template dictionary for full text allows the
-creation of morphological dictionaries based on <ulink
-url="http://ficus-www.cs.ucla.edu/geoff/ispell.html">Ispell</ulink>, which
-supports a large number of languages. This dictionary tries to change an
-input word to its normalized form. Also, more modern spelling dictionaries
-are supported - <ulink
-url="http://en.wikipedia.org/wiki/MySpell">MySpell</ulink> (OO &lt; 2.0.1)
-and <ulink url="http://sourceforge.net/projects/hunspell">Hunspell</ulink>
-(OO &gt;= 2.0.2).  A large list of dictionaries is available on the <ulink
-url="http://wiki.services.openoffice.org/wiki/Dictionaries">OpenOffice
-Wiki</ulink>.
-</para>
-
-<para>
-The <application>Ispell</> dictionary allows searches without bothering
-about different linguistic forms of a word. For example, a search on
-<literal>bank</literal> would return hits of all declensions and
-conjugations of the search term <literal>bank</literal>, e.g.
-<literal>banking</>, <literal>banked</>, <literal>banks</>,
-<literal>banks'</>, and <literal>bank's</>.
+   </para>
+
+  </sect2>
+
+  <sect2 id="textsearch-ispell-dictionary">
+  <title>Ispell Dictionary</title>
+
+   <para>
+    The <application>Ispell</> template dictionary for full text allows the
+    creation of morphological dictionaries based on <ulink
+    url="http://ficus-www.cs.ucla.edu/geoff/ispell.html">Ispell</ulink>, which
+    supports a large number of languages. This dictionary tries to change an
+    input word to its normalized form. Also, more modern spelling dictionaries
+    are supported - <ulink
+    url="http://en.wikipedia.org/wiki/MySpell">MySpell</ulink> (OO &lt; 2.0.1)
+    and <ulink url="http://sourceforge.net/projects/hunspell">Hunspell</ulink>
+    (OO &gt;= 2.0.2).  A large list of dictionaries is available on the <ulink
+    url="http://wiki.services.openoffice.org/wiki/Dictionaries">OpenOffice
+    Wiki</ulink>.
+   </para>
+
+   <para>
+    The <application>Ispell</> dictionary allows searches without bothering
+    about different linguistic forms of a word. For example, a search on
+    <literal>bank</literal> would return hits of all declensions and
+    conjugations of the search term <literal>bank</literal>, e.g.
+    <literal>banking</>, <literal>banked</>, <literal>banks</>,
+    <literal>banks'</>, and <literal>bank's</>.
+
  <programlisting>
  SELECT ts_lexize('english_ispell','banking');
   ts_lexize
  -----------
   {bank}
+
  SELECT ts_lexize('english_ispell','bank''s');
   ts_lexize
  -----------
   {bank}
+
  SELECT ts_lexize('english_ispell','banked');
   ts_lexize
  -----------
   {bank}
  </programlisting>
  
-</para>
+   </para>
+
+   <para>
+    To create an ispell dictionary one should use the built-in
+    <literal>ispell</literal> dictionary and specify several
+    parameters.
+   </para>
  
-<para>
-To create an ispell dictionary one should use the built-in
-<literal>ispell</literal> dictionary and specify several
-parameters.
-</para>
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY english_ispell (
      TEMPLATE = ispell,
@@ -2359,200 +2519,216 @@ CREATE TEXT SEARCH DICTIONARY english_ispell (
      StopWords = english
  );
  </programlisting>
-<para>
-Here, <literal>DictFile</>, <literal>AffFile</>, and <literal>StopWords</>
-specify the names of the dictionary, affixes, and stop-words files.
-</para>
-
-<para>
-Ispell dictionaries usually recognize a restricted set of words so they
-should be used in conjunction with another broader dictionary; for
-example, a stemming dictionary, which recognizes everything.
-</para>
-
-<para>
-Ispell dictionaries support splitting compound words based on an
-ispell dictionary. This is a nice feature and full text searching
-in <productname>PostgreSQL</productname> supports it.
-Notice that the affix file should specify a special flag using the
-<literal>compoundwords controlled</literal> statement that marks dictionary
-words that can participate in compound formation:
+
+   <para>
+    Here, <literal>DictFile</>, <literal>AffFile</>, and <literal>StopWords</>
+    specify the names of the dictionary, affixes, and stop-words files.
+   </para>
+
+   <para>
+    Ispell dictionaries usually recognize a restricted set of words so they
+    should be used in conjunction with another broader dictionary; for
+    example, a stemming dictionary, which recognizes everything.
+   </para>
+
+   <para>
+    Ispell dictionaries support splitting compound words based on an
+    ispell dictionary. This is a nice feature and full text searching
+    in <productname>PostgreSQL</productname> supports it.
+    Notice that the affix file should specify a special flag using the
+    <literal>compoundwords controlled</literal> statement that marks dictionary
+    words that can participate in compound formation:
+
  <programlisting>
  compoundwords  controlled z
  </programlisting>
-Several examples for the Norwegian language:
+
+    Several examples for the Norwegian language:
+
  <programlisting>
  SELECT ts_lexize('norwegian_ispell','overbuljongterningpakkmesterassistent');
- {over,buljong,terning,pakk,mester,assistent}
+   {over,buljong,terning,pakk,mester,assistent}
  SELECT ts_lexize('norwegian_ispell','sjokoladefabrikk');
- {sjokoladefabrikk,sjokolade,fabrikk}
-</programlisting>
-</para>
-
-<note>
-<para>
-<application>MySpell</> does not support compound words.
-<application>Hunspell</> has sophisticated support for compound words. At
-present,  full text searching implements only the basic compound word
-operations of Hunspell.
-</para>
-</note>
-
-</sect2>
-
-<sect2 id="textsearch-stemming-dictionary">
-<title><application>Snowball</> Stemming Dictionary</title>
-
-<para>
-The <application>Snowball</> dictionary template is based on the project
-of Martin Porter, inventor of the popular Porter's stemming algorithm
-for the English language and now supported in many languages (see the <ulink
-url="http://snowball.tartarus.org">Snowball site</ulink> for more
-information).  The Snowball project supplies a large number of stemmers for
-many languages. A Snowball dictionary requires a language parameter to
-identify which stemmer to use, and optionally can specify a stopword file name.
-For example, there is a built-in definition equivalent to
+   {sjokoladefabrikk,sjokolade,fabrikk}
+</programlisting>
+   </para>
+
+   <note>
+    <para>
+     <application>MySpell</> does not support compound words.
+     <application>Hunspell</> has sophisticated support for compound words. At
+     present,  full text searching implements only the basic compound word
+     operations of Hunspell.
+    </para>
+   </note>
+
+  </sect2>
+
+  <sect2 id="textsearch-stemming-dictionary">
+  <title><application>Snowball</> Stemming Dictionary</title>
+
+   <para>
+    The <application>Snowball</> dictionary template is based on the project
+    of Martin Porter, inventor of the popular Porter's stemming algorithm
+    for the English language and now supported in many languages (see the <ulink
+    url="http://snowball.tartarus.org">Snowball site</ulink> for more
+    information).  The Snowball project supplies a large number of stemmers for
+    many languages. A Snowball dictionary requires a language parameter to
+    identify which stemmer to use, and optionally can specify a stopword file name.
+    For example, there is a built-in definition equivalent to
+
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY english_stem (
      TEMPLATE = snowball, Language = english, StopWords = english
  );
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    The <application>Snowball</> dictionary recognizes everything, so it is best
+    to place it at the end of the dictionary stack. It it useless to have it
+    before any other dictionary because a lexeme will never pass through it to
+    the next dictionary.
+   </para>
  
-<para>
-The <application>Snowball</> dictionary recognizes everything, so it is best
-to place it at the end of the dictionary stack. It it useless to have it
-before any other dictionary because a lexeme will never pass through it to
-the next dictionary.
-</para>
+  </sect2>
  
-</sect2>
+  <sect2 id="textsearch-dictionary-testing">
+  <title>Dictionary Testing</title>
  
-<sect2 id="textsearch-dictionary-testing">
-<title>Dictionary Testing</title>
+   <para>
+    The <function>ts_lexize</> function facilitates dictionary testing:
  
-<para>
-The <function>ts_lexize</> function facilitates dictionary testing:
+    <variablelist>
  
-<variablelist>
-<varlistentry>
+     <varlistentry>
  
-<indexterm zone="textsearch-dictionaries">
-<primary>ts_lexize</primary>
-</indexterm>
+      <indexterm zone="textsearch-dictionaries">
+      <primary>ts_lexize</primary>
+      </indexterm>
  
-<term>
-<synopsis>
-ts_lexize(<replaceable class="PARAMETER">dict_name</replaceable> text, <replaceable class="PARAMETER">lexeme</replaceable> text) returns text[]
-</synopsis>
-</term>
+      <term>
+       <synopsis>
+        ts_lexize(<replaceable class="PARAMETER">dict_name</replaceable> text, <replaceable class="PARAMETER">lexeme</replaceable> text) returns text[]
+       </synopsis>
+      </term>
+
+      <listitem>
+       <para>
+        Returns an array of lexemes if the input <replaceable>lexeme</replaceable>
+        is known to the dictionary <replaceable>dictname</replaceable>, or a void
+        array if the lexeme is known to the dictionary but it is a stop word, or
+        <literal>NULL</literal> if it is an unknown word.
+       </para>
  
-<listitem>
-<para>
-Returns an array of lexemes if the input <replaceable>lexeme</replaceable>
-is known to the dictionary <replaceable>dictname</replaceable>, or a void
-array if the lexeme is known to the dictionary but it is a stop word, or
-<literal>NULL</literal> if it is an unknown word.
-</para>
  <programlisting>
  SELECT ts_lexize('english_stem', 'stars');
   ts_lexize
  -----------
   {star}
+
  SELECT ts_lexize('english_stem', 'a');
   ts_lexize
  -----------
   {}
  </programlisting>
-</listitem>
-</varlistentry>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
  
-</variablelist>
-</para>
+   <note>
+    <para>
+     The <function>ts_lexize</function> function expects a
+     <replaceable>lexeme</replaceable>, not text. Below is an example:
  
-<note>
-<para>
-The <function>ts_lexize</function> function expects a
-<replaceable>lexeme</replaceable>, not text. Below is an example:
  <programlisting>
  SELECT ts_lexize('thesaurus_astro','supernovae stars') is null;
   ?column?
  ----------
   t
  </programlisting>
-The thesaurus dictionary <literal>thesaurus_astro</literal> does know
-<literal>supernovae stars</literal>, but <function>ts_lexize</> fails since it
-does not parse the input text and considers it as a single lexeme. Use
-<function>plainto_tsquery</> and <function>to_tsvector</> to test thesaurus
-dictionaries:
+
+     The thesaurus dictionary <literal>thesaurus_astro</literal> does know
+     <literal>supernovae stars</literal>, but <function>ts_lexize</> fails since it
+     does not parse the input text and considers it as a single lexeme. Use
+     <function>plainto_tsquery</> and <function>to_tsvector</> to test thesaurus
+     dictionaries:
+
  <programlisting>
  SELECT plainto_tsquery('supernovae stars');
   plainto_tsquery
  -----------------
   'sn'
  </programlisting>
-</para>
-</note>
-
-</sect2>
-
-<sect2 id="textsearch-tables-configuration">
-<title>Configuration Example</title>
-
-<para>
-A full text configuration specifies all options necessary to transform a
-document into a <type>tsvector</type>: the parser breaks text into tokens,
-and the dictionaries transform each token into a lexeme.  Every call to
-<function>to_tsvector()</function> and <function>to_tsquery()</function>
-needs a configuration to perform its processing.  To facilitate management
-of full text searching objects, a set of <acronym>SQL</acronym> commands
-is available, and there are several psql commands which display information
-about full text searching objects (<xref linkend="textsearch-psql">).
-</para>
-
-<para>
-The configuration parameter
-<xref linkend="guc-default-text-search-config">
-specifies the name of the current default configuration, which is the
-one used by text search functions when an explicit configuration
-parameter is omitted.
-It can be set in <filename>postgresql.conf</filename>, or set for an
-individual session using the <command>SET</> command.
-</para>
-
-<para>
-Several predefined text searching configurations are available in the
-<literal>pg_catalog</literal> schema. If you need a custom configuration
-you can create a new text searching configuration and modify it using SQL
-commands.
-
-New text searching objects are created in the current schema by default
-(usually the <literal>public</literal> schema), but a schema-qualified
-name can be used to create objects in the specified schema.
-</para>
-
-<para>
-As an example, we will create a configuration
-<literal>pg</literal> which starts as a duplicate of the
-<literal>english</> configuration. To be safe, we do this in a transaction:
+    </para>
+   </note>
+
+  </sect2>
+
+  <sect2 id="textsearch-tables-configuration">
+  <title>Configuration Example</title>
+
+   <para>
+    A full text configuration specifies all options necessary to transform a
+    document into a <type>tsvector</type>: the parser breaks text into tokens,
+    and the dictionaries transform each token into a lexeme.  Every call to
+    <function>to_tsvector()</function> and <function>to_tsquery()</function>
+    needs a configuration to perform its processing.  To facilitate management
+    of full text searching objects, a set of <acronym>SQL</acronym> commands
+    is available, and there are several psql commands which display information
+    about full text searching objects (<xref linkend="textsearch-psql">).
+   </para>
+
+   <para>
+    The configuration parameter
+    <xref linkend="guc-default-text-search-config">
+    specifies the name of the current default configuration, which is the
+    one used by text search functions when an explicit configuration
+    parameter is omitted.
+    It can be set in <filename>postgresql.conf</filename>, or set for an
+    individual session using the <command>SET</> command.
+   </para>
+
+   <para>
+    Several predefined text searching configurations are available in the
+    <literal>pg_catalog</literal> schema. If you need a custom configuration
+    you can create a new text searching configuration and modify it using SQL
+    commands.
+   </para>
+
+   <para>
+    New text searching objects are created in the current schema by default
+    (usually the <literal>public</literal> schema), but a schema-qualified
+    name can be used to create objects in the specified schema.
+   </para>
+
+   <para>
+    As an example, we will create a configuration
+    <literal>pg</literal> which starts as a duplicate of the
+    <literal>english</> configuration. To be safe, we do this in a transaction:
+
  <programlisting>
  BEGIN;
  
  CREATE TEXT SEARCH CONFIGURATION public.pg ( COPY = english );
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    We will use a PostgreSQL-specific synonym list
+    and store it in <filename>share/tsearch_data/pg_dict.syn</filename>.
+    The file contents look like:
  
-<para>
-We will use a PostgreSQL-specific synonym list
-and store it in <filename>share/tsearch_data/pg_dict.syn</filename>.
-The file contents look like:
-<Programlisting>
+<programlisting>
  postgres    pg
  pgsql       pg
  postgresql  pg
  </programlisting>
  
-We define the dictionary like this:
+    We define the dictionary like this:
+
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY pg_dict (
      TEMPLATE = synonym
@@ -2560,11 +2736,11 @@ CREATE TEXT SEARCH DICTIONARY pg_dict (
  );
  </programlisting>
  
-</para>
+   </para>
  
-<para>
-Then register the <productname>ispell</> dictionary
-<literal>english_ispell</literal> using the <literal>ispell</literal> template:
+   <para>
+    Then register the <productname>ispell</> dictionary
+    <literal>english_ispell</literal> using the <literal>ispell</literal> template:
  
  <programlisting>
  CREATE TEXT SEARCH DICTIONARY english_ispell (
@@ -2574,29 +2750,30 @@ CREATE TEXT SEARCH DICTIONARY english_ispell (
      StopWords = english
  );
  </programlisting>
-</para>
+   </para>
  
-<para>
-Now modify mappings for Latin words for configuration <literal>pg</>:
+   <para>
+    Now modify mappings for Latin words for configuration <literal>pg</>:
  
  <programlisting>
  ALTER TEXT SEARCH CONFIGURATION pg
      ALTER MAPPING FOR lword, lhword, lpart_hword
      WITH pg_dict, english_ispell, english_stem;
  </programlisting>
-</para>
+   </para>
  
-<para>
-We do not index or search some tokens:
+   <para>
+    We do not index or search some tokens:
  
  <programlisting>
  ALTER TEXT SEARCH CONFIGURATION pg
      DROP MAPPING FOR email, url, sfloat, uri, float;
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    Now, we can test our configuration:
  
-<para>
-Now, we can test our configuration:
  <programlisting>
  SELECT * FROM ts_debug('public.pg', '
  PostgreSQL, the highly scalable, SQL compliant, open source object-relational
@@ -2604,15 +2781,16 @@ database management system, is now undergoing beta testing of the next
  version of our software: PostgreSQL 8.3.
  ');
  
-COMMIT;
+   COMMIT;
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    With the dictionaries and mappings set up, suppose we have a table
+    <literal>pgweb</literal> which contains 11239 documents from the
+    <productname>PostgreSQL</productname> web site.  Only relevant columns
+    are shown:
  
-<para>
-With the dictionaries and mappings set up, suppose we have a table
-<literal>pgweb</literal> which contains 11239 documents from the
-<productname>PostgreSQL</productname> web site.  Only relevant columns
-are shown:
  <programlisting>
  =&gt; \d pgweb
             Table "public.pgweb"
@@ -2624,18 +2802,18 @@ are shown:
   title     | character varying |
   dlm       | date              |
  </programlisting>
-</para>
+   </para>
+
+   <para>
+    The next step is to set the session to use the new configuration, which was
+    created in the <literal>public</> schema:
  
-<para>
-The next step is to set the session to use the new configuration, which was
-created in the <literal>public</> schema:
  <programlisting>
  =&gt; \dF
-postgres=# \dF public.*
-List of fulltext configurations
-  Schema | Name | Description
---------+------+-------------
-  public | pg   |
+   List of fulltext configurations
+ Schema  | Name | Description
+---------+------+-------------
+ public  | pg   |
  
  SET default_text_search_config = 'public.pg';
  SET
@@ -2645,41 +2823,43 @@ SHOW default_text_search_config;
  ----------------------------
   public.pg
  </programlisting>
-</para>
+   </para>
+
+  </sect2>
  
-</sect2>
+  <sect2 id="textsearch-tables-multiconfig">
+  <title>Managing Multiple Configurations</title>
  
-<sect2 id="textsearch-tables-multiconfig">
-<title>Managing Multiple Configurations</title>
+   <para>
+    If you are using the same text search configuration for the entire cluster
+    just set the value in <filename>postgresql.conf</>.  If using a single
+    text search configuration for an entire database, use <command>ALTER
+    DATABASE ... SET</>.
+   </para>
  
-<para>
-If you are using the same text search configuration for the entire cluster
-just set the value in <filename>postgresql.conf</>.  If using a single
-text search configuration for an entire database, use <command>ALTER
-DATABASE ... SET</>.
-</para>
+   <para>
+    However, if you need to use several text search configurations in the same
+    database you must be careful to reference the proper text search
+    configuration.  This can be done by either setting
+    <varname>default_text_search_config</> in each session or supplying the
+    configuration name in every function call, e.g.  to_tsquery('french',
+    'friend'), to_tsvector('english', col).  If you are using an expression
+    index you must embed the configuration name into the expression index, e.g.:
  
-<para>
-However, if you need to use several text search configurations in the same
-database you must be careful to reference the proper text search
-configuration.  This can be done by either setting
-<varname>default_text_search_config</> in each session or supplying the
-configuration name in every function call, e.g.  to_tsquery('french',
-'friend'), to_tsvector('english', col).  If you are using an expression
-index you must embed the configuration name into the expression index, e.g.:
  <programlisting>
  CREATE INDEX pgweb_idx ON pgweb USING gin(to_tsvector('french', title || body));
  </programlisting>
-And for an expression index, specify the configuration name in the
-<literal>WHERE</> clause as well so the expression index will be used.
-</para>
  
-</sect2>
+    And for an expression index, specify the configuration name in the
+    <literal>WHERE</> clause as well so the expression index will be used.
+   </para>
+
+  </sect2>
  
-</sect1>
+ </sect1>
  
-<sect1 id="textsearch-indexes">
-<title>GiST and GIN Index Types</title>
+ <sect1 id="textsearch-indexes">
+ <title>GiST and GIN Index Types</title>
  
    <indexterm zone="textsearch-indexes">
     <primary>index</primary>
@@ -2687,67 +2867,66 @@ And for an expression index, specify the configuration name in the
    </indexterm>
  
  
-<para>
-There are two kinds of indexes which can be used to speed up full text
-operators (<xref linkend="textsearch-searches">).
-Note that indexes are not mandatory for full text searching.
+  <para>
+   There are two kinds of indexes which can be used to speed up full text
+   operators (<xref linkend="textsearch-searches">).
+   Note that indexes are not mandatory for full text searching.
  
-<variablelist>
+   <variablelist>
  
-<varlistentry>
+    <varlistentry>
  
-<indexterm zone="textsearch-indexes">
-<primary>index</primary>
-<secondary>GIST, for text searching</secondary>
-</indexterm>
+     <indexterm zone="textsearch-indexes">
+     <primary>index</primary>
+     <secondary>GIST, for text searching</secondary>
+     </indexterm>
  
-<term>
-<synopsis>
-CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING gist(<replaceable>column</replaceable>);
-</synopsis>
-</term>
+     <term>
+      <synopsis>
+       CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING gist(<replaceable>column</replaceable>);
+      </synopsis>
+     </term>
  
-<listitem>
-<para>
-Creates a GiST (Generalized Search Tree)-based index.
-The <replaceable>column</replaceable> can be of <type>tsvector</> or
-<type>tsquery</> type.
-</para>
+     <listitem>
+      <para>
+       Creates a GiST (Generalized Search Tree)-based index.
+       The <replaceable>column</replaceable> can be of <type>tsvector</> or
+       <type>tsquery</> type.
+      </para>
+     </listitem>
+    </varlistentry>
  
-</listitem>
-</varlistentry>
+    <varlistentry>
  
-<varlistentry>
+     <indexterm zone="textsearch-indexes">
+     <primary>index</primary>
+     <secondary>GIN</secondary>
+     </indexterm>
  
-<indexterm zone="textsearch-indexes">
-<primary>index</primary>
-<secondary>GIN</secondary>
-</indexterm>
+     <term>
+      <synopsis>
+       CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING gin(<replaceable>column</replaceable>);
+      </synopsis>
+     </term>
  
-<term>
-<synopsis>
-CREATE INDEX <replaceable>name</replaceable> ON <replaceable>table</replaceable> USING gin(<replaceable>column</replaceable>);
-</synopsis>
-</term>
+     <listitem>
+      <para>
+       Creates a GIN (Generalized Inverted Index)-based index.
+       The <replaceable>column</replaceable> must be of <type>tsvector</> type.
+      </para>
+     </listitem>
+    </varlistentry>
  
-<listitem>
-<para>
-Creates a GIN (Generalized Inverted Index)-based index.
-The <replaceable>column</replaceable> must be of <type>tsvector</> type.
-</para>
+   </variablelist>
+  </para>
  
-</listitem>
-</varlistentry>
+  <para>
+   A GiST index is <firstterm>lossy</firstterm>, meaning it is necessary
+   to check the actual table row to eliminate false matches.
+   <productname>PostgreSQL</productname> does this automatically; for
+   example, in the query plan below, the <literal>Filter:</literal>
+   line indicates the index output will be rechecked:
  
-</variablelist>
-</para>
-
-<para>
-A GiST index is <firstterm>lossy</firstterm>, meaning it is necessary
-to check the actual table row to eliminate false matches.
-<productname>PostgreSQL</productname> does this automatically; for
-example, in the query plan below, the <literal>Filter:</literal>
-line indicates the index output will be rechecked:
  <programlisting>
  EXPLAIN SELECT * FROM apod WHERE textsearch @@ to_tsquery('supernovae');
                                 QUERY PLAN
@@ -2756,62 +2935,64 @@ EXPLAIN SELECT * FROM apod WHERE textsearch @@ to_tsquery('supernovae');
     Index Cond: (textsearch @@ '''supernova'''::tsquery)
     Filter: (textsearch @@ '''supernova'''::tsquery)
  </programlisting>
-GiST index lossiness happens because each document is represented by a
-fixed-length signature. The signature is generated by hashing (crc32) each
-word into a random bit in an n-bit string and all words combine to produce
-an n-bit document signature. Because of hashing there is a chance that
-some words hash to the same position and could result in a false hit.
-Signatures calculated for each document in a collection are stored in an
-<literal>RD-tree</literal> (Russian Doll tree), invented by Hellerstein,
-which is an adaptation of <literal>R-tree</literal> for sets.  In our case
-the transitive containment relation <!-- huh --> is realized by
-superimposed coding (Knuth, 1973) of signatures, i.e., a parent is the
-result of 'OR'-ing the bit-strings of all children.  This is a second
-factor of lossiness.  It is clear that parents tend to be full of
-<literal>1</>s (degenerates) and become quite useless because of the
-limited selectivity.  Searching is performed as a bit comparison of a
-signature representing the query and an <literal>RD-tree</literal> entry.
-If all <literal>1</>s of both signatures are in the same position we
-say that this branch probably matches the query, but if there is even one
-discrepancy we can definitely reject this branch.
-</para>
-
-<para>
-Lossiness causes serious performance degradation since random access of
-<literal>heap</literal> records is slow and limits the usefulness of GiST
-indexes.  The likelihood of false hits depends on several factors, like
-the number of unique words, so using dictionaries to reduce this number
-is recommended.
-</para>
-
-<para>
-Actually, this  is not the whole story. GiST indexes have an optimization
-for storing small tsvectors (&lt; <literal>TOAST_INDEX_TARGET</literal>
-bytes, 512 bytes).  On leaf pages small tsvectors are stored unchanged,
-while longer ones are represented by their signatures, which introduces
-some lossiness.  Unfortunately, the existing index API does not allow for
-a return value to say whether it found an exact value (tsvector) or whether
-the result needs to be checked.  This is why the GiST index is
-currently marked as lossy.  We hope to improve this in the future.
-</para>
-
-<para>
-GIN indexes are not lossy but their performance depends logarithmically on
-the number of unique words.
-</para>
-
-<para>
-There is one side-effect of the non-lossiness of a GIN index when using
-query labels/weights, like <literal>'supernovae:a'</literal>.  A GIN index
-has all the information necessary to determine a match, so the heap is
-not accessed.  However, label information is not stored in the index,
-so if the query involves label weights it must access
-the heap. Therefore, a special full text search operator <literal>@@@</literal>
-was created which forces the use of the heap to get information about
-labels.  GiST indexes are lossy so it always reads the  heap and there is
-no need for a special operator. In the example below,
-<literal>fulltext_idx</literal> is a GIN index:<!-- why isn't this
-automatic -->
+
+   GiST index lossiness happens because each document is represented by a
+   fixed-length signature. The signature is generated by hashing (crc32) each
+   word into a random bit in an n-bit string and all words combine to produce
+   an n-bit document signature. Because of hashing there is a chance that
+   some words hash to the same position and could result in a false hit.
+   Signatures calculated for each document in a collection are stored in an
+   <literal>RD-tree</literal> (Russian Doll tree), invented by Hellerstein,
+   which is an adaptation of <literal>R-tree</literal> for sets.  In our case
+   the transitive containment relation <!-- huh --> is realized by
+   superimposed coding (Knuth, 1973) of signatures, i.e., a parent is the
+   result of 'OR'-ing the bit-strings of all children.  This is a second
+   factor of lossiness.  It is clear that parents tend to be full of
+   <literal>1</>s (degenerates) and become quite useless because of the
+   limited selectivity.  Searching is performed as a bit comparison of a
+   signature representing the query and an <literal>RD-tree</literal> entry.
+   If all <literal>1</>s of both signatures are in the same position we
+   say that this branch probably matches the query, but if there is even one
+   discrepancy we can definitely reject this branch.
+  </para>
+
+  <para>
+   Lossiness causes serious performance degradation since random access of
+   <literal>heap</literal> records is slow and limits the usefulness of GiST
+   indexes.  The likelihood of false hits depends on several factors, like
+   the number of unique words, so using dictionaries to reduce this number
+   is recommended.
+  </para>
+
+  <para>
+   Actually, this  is not the whole story. GiST indexes have an optimization
+   for storing small tsvectors (&lt; <literal>TOAST_INDEX_TARGET</literal>
+   bytes, 512 bytes).  On leaf pages small tsvectors are stored unchanged,
+   while longer ones are represented by their signatures, which introduces
+   some lossiness.  Unfortunately, the existing index API does not allow for
+   a return value to say whether it found an exact value (tsvector) or whether
+   the result needs to be checked.  This is why the GiST index is
+   currently marked as lossy.  We hope to improve this in the future.
+  </para>
+
+  <para>
+   GIN indexes are not lossy but their performance depends logarithmically on
+   the number of unique words.
+  </para>
+
+  <para>
+   There is one side-effect of the non-lossiness of a GIN index when using
+   query labels/weights, like <literal>'supernovae:a'</literal>.  A GIN index
+   has all the information necessary to determine a match, so the heap is
+   not accessed.  However, label information is not stored in the index,
+   so if the query involves label weights it must access
+   the heap. Therefore, a special full text search operator <literal>@@@</literal>
+   was created which forces the use of the heap to get information about
+   labels.  GiST indexes are lossy so it always reads the  heap and there is
+   no need for a special operator. In the example below,
+   <literal>fulltext_idx</literal> is a GIN index:<!-- why isn't this
+   automatic -->
+
  <programlisting>
  EXPLAIN SELECT * FROM apod WHERE textsearch @@@ to_tsquery('supernovae:a');
                                 QUERY PLAN
@@ -2821,95 +3002,117 @@ EXPLAIN SELECT * FROM apod WHERE textsearch @@@ to_tsquery('supernovae:a');
     Filter: (textsearch @@@ '''supernova'':A'::tsquery)
  </programlisting>
  
-</para>
-
-<para>
-In choosing which index type to use, GiST or GIN, consider these differences:
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>
-GiN index lookups are three times faster than GiST
-</para></listitem>
-<listitem><para>
-GiN indexes take three times longer to build than GiST
-</para></listitem>
-<listitem><para>
-GiN is about ten times slower to update than GiST
-</para></listitem>
-<listitem><para>
-GiN indexes are two-to-three times larger than GiST
-</para></listitem>
-</itemizedlist>
-</para>
-
-<para>
-In summary, <acronym>GIN</acronym> indexes are best for static data because
-the indexes are faster for lookups.  For dynamic data, GiST indexes are
-faster to update.  Specifically, <acronym>GiST</acronym> indexes are very
-good for dynamic data and fast if the number of unique words (lexemes) is
-under 100,000, while <acronym>GIN</acronym> handles +100,000 lexemes better
-but is slower to update.
-</para>
-
-<para>
-Partitioning of big collections and the proper use of GiST and GIN indexes
-allows the implementation of very fast searches with online update.
-Partitioning can be done at the database level using table inheritance
-and <varname>constraint_exclusion</>, or distributing documents over
-servers and collecting search results using the <filename>contrib/dblink</>
-extension module. The latter is possible because ranking functions use
-only local information.
-</para>
-
-</sect1>
-
-<sect1 id="textsearch-limitations">
-<title>Limitations</title>
-
-<para>
-The current limitations of Full Text Searching are:
-<itemizedlist  spacing="compact" mark="bullet">
-<listitem><para>The length of each lexeme must be less than 2K bytes</para></listitem>
-<listitem><para>The length of a <type>tsvector</type> (lexemes + positions) must be less than 1 megabyte</para></listitem>
-<listitem><para>The number of lexemes must be less than 2<superscript>64</superscript></para></listitem>
-<listitem><para>Positional information must be non-negative and less than 16,383</para></listitem>
-<listitem><para>No more than 256 positions per lexeme</para></listitem>
-<listitem><para>The number of nodes (lexemes + operations) in tsquery must be less than 32,768</para></listitem>
-</itemizedlist>
-</para>
-
-<para>
-For comparison, the <productname>PostgreSQL</productname> 8.1 documentation
-contained 10,441 unique words, a total of 335,420 words, and the most frequent
-word <quote>postgresql</> was mentioned 6,127 times in 655 documents.
-</para>
-
-<!-- TODO we need to put a date on these numbers? -->
-<para>
-Another example &mdash; the <productname>PostgreSQL</productname> mailing list
-archives contained 910,989 unique words with 57,491,343 lexemes in 461,020
-messages.
-</para>
-
-</sect1>
-
-<sect1 id="textsearch-psql">
-<title><application>psql</> Support</title>
-
-<para>
-Information about full text searching objects can be obtained
-in <literal>psql</literal> using a set of commands:
-<synopsis>
-\dF{,d,p}<optional>+</optional> <optional>PATTERN</optional>
-</synopsis>
-An optional <literal>+</literal> produces more details.
-</para>
-<para>
-The optional parameter <literal>PATTERN</literal> should be the name of
-a full text searching object, optionally schema-qualified.  If
-<literal>PATTERN</literal> is not specified then information about all
-visible objects  will be displayed.  <literal>PATTERN</literal> can be a
-regular expression and can apply <emphasis>separately</emphasis> to schema
-names and object names.  The following examples illustrate this:
+  </para>
+
+  <para>
+   In choosing which index type to use, GiST or GIN, consider these differences:
+   <itemizedlist  spacing="compact" mark="bullet">
+    <listitem>
+     <para>
+      GiN index lookups are three times faster than GiST
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      GiN indexes take three times longer to build than GiST
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      GiN is about ten times slower to update than GiST
+     </para>
+    </listitem>
+    <listitem>
+     <para>
+      GiN indexes are two-to-three times larger than GiST
+     </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+
+  <para>
+   In summary, <acronym>GIN</acronym> indexes are best for static data because
+   the indexes are faster for lookups.  For dynamic data, GiST indexes are
+   faster to update.  Specifically, <acronym>GiST</acronym> indexes are very
+   good for dynamic data and fast if the number of unique words (lexemes) is
+   under 100,000, while <acronym>GIN</acronym> handles +100,000 lexemes better
+   but is slower to update.
+  </para>
+
+  <para>
+   Partitioning of big collections and the proper use of GiST and GIN indexes
+   allows the implementation of very fast searches with online update.
+   Partitioning can be done at the database level using table inheritance
+   and <varname>constraint_exclusion</>, or distributing documents over
+   servers and collecting search results using the <filename>contrib/dblink</>
+   extension module. The latter is possible because ranking functions use
+   only local information.
+  </para>
+
+ </sect1>
+
+ <sect1 id="textsearch-limitations">
+ <title>Limitations</title>
+
+  <para>
+   The current limitations of Full Text Searching are:
+   <itemizedlist  spacing="compact" mark="bullet">
+    <listitem>
+     <para>The length of each lexeme must be less than 2K bytes  </para>
+      </listitem>
+      <listitem>
+     <para>The length of a <type>tsvector</type> (lexemes + positions) must be less than 1 megabyte  </para>
+    </listitem>
+    <listitem>
+     <para>The number of lexemes must be less than 2<superscript>64</superscript>  </para>
+    </listitem>
+    <listitem>
+     <para>Positional information must be non-negative and less than 16,383  </para>
+    </listitem>
+    <listitem>
+     <para>No more than 256 positions per lexeme  </para>
+    </listitem>
+    <listitem>
+     <para>The number of nodes (lexemes + operations) in tsquery must be less than 32,768  </para>
+    </listitem>
+   </itemizedlist>
+  </para>
+
+  <para>
+   For comparison, the <productname>PostgreSQL</productname> 8.1 documentation
+   contained 10,441 unique words, a total of 335,420 words, and the most frequent
+   word <quote>postgresql</> was mentioned 6,127 times in 655 documents.
+  </para>
+
+   <!-- TODO we need to put a date on these numbers? -->
+  <para>
+   Another example &mdash; the <productname>PostgreSQL</productname> mailing list
+   archives contained 910,989 unique words with 57,491,343 lexemes in 461,020
+   messages.
+  </para>
+
+ </sect1>
+
+ <sect1 id="textsearch-psql">
+ <title><application>psql</> Support</title>
+
+  <para>
+   Information about full text searching objects can be obtained
+   in <literal>psql</literal> using a set of commands:
+   <synopsis>
+   \dF{,d,p}<optional>+</optional> <optional>PATTERN</optional>
+   </synopsis>
+   An optional <literal>+</literal> produces more details.
+  </para>
+
+  <para>
+   The optional parameter <literal>PATTERN</literal> should be the name of
+   a full text searching object, optionally schema-qualified.  If
+   <literal>PATTERN</literal> is not specified then information about all
+   visible objects  will be displayed.  <literal>PATTERN</literal> can be a
+   regular expression and can apply <emphasis>separately</emphasis> to schema
+   names and object names.  The following examples illustrate this:
+
  <programlisting>
  =&gt; \dF *fulltext*
         List of fulltext configurations
@@ -2926,23 +3129,24 @@ names and object names.  The following examples illustrate this:
   fulltext | fulltext_cfg | 
   public   | fulltext_cfg |
  </programlisting>
-</para>
+  </para>
  
-<variablelist>
+  <variablelist>
  
     <varlistentry>
-<term>\dF[+] [PATTERN]</term>
+    <term>\dF[+] [PATTERN]</term>
  
      <listitem>
       <para>
-     List full text searching configurations (add "+" for more detail)
+      List full text searching configurations (add "+" for more detail)
       </para>
       <para>
        By default (without <literal>PATTERN</literal>), information about
        all <emphasis>visible</emphasis> full text configurations will be
        displayed.
       </para>
-<para>
+     <para>
+
  <programlisting>
  =&gt; \dF russian
                                 List of fulltext configurations
@@ -2951,8 +3155,8 @@ names and object names.  The following examples illustrate this:
   pg_catalog | russian | default configuration for Russian
  
  =&gt; \dF+ russian
-Configuration "pg_catalog.russian"
-Parser name: "pg_catalog.default"
+   Configuration "pg_catalog.russian"
+   Parser name: "pg_catalog.default"
      Token     |      Dictionaries
  --------------+-------------------------
   email        | pg_catalog.simple
@@ -2975,21 +3179,22 @@ Parser name: "pg_catalog.default"
   version      | pg_catalog.simple
   word         | pg_catalog.russian_stem
  </programlisting>
-</para>
+     </para>
      </listitem>
     </varlistentry>
  
     <varlistentry>
-<term>\dFd[+] [PATTERN]</term>
+    <term>\dFd[+] [PATTERN]</term>
      <listitem>
       <para>
-     List full text dictionaries (add "+" for more detail).
+      List full text dictionaries (add "+" for more detail).
       </para>
       <para>
        By default (without <literal>PATTERN</literal>), information about
        all <emphasis>visible</emphasis> dictionaries will be displayed.
       </para>
-<para>
+
+     <para>
  <programlisting>
  =&gt; \dFd
                             List of fulltext dictionaries
@@ -3012,29 +3217,29 @@ Parser name: "pg_catalog.default"
   pg_catalog | swedish    | Snowball stemmer for swedish language
   pg_catalog | turkish    | Snowball stemmer for turkish language
  </programlisting>
-</para>
+     </para>
      </listitem>
     </varlistentry>
  
     <varlistentry>
  
-<term>\dFp[+] [PATTERN]</term>
+   <term>\dFp[+] [PATTERN]</term>
      <listitem>
       <para>
-    List full text parsers (add "+" for more detail)
+      List full text parsers (add "+" for more detail)
       </para>
       <para>
        By default (without <literal>PATTERN</literal>), information about
        all <emphasis>visible</emphasis> full text parsers will be displayed.
       </para>
-<para>
+     <para>
  <programlisting>
-=&gt; \dFp
+   =&gt; \dFp
            List of fulltext parsers
     Schema   |  Name   |     Description
  ------------+---------+---------------------
   pg_catalog | default | default word parser
-(1 row)
+   (1 row)
  =&gt; \dFp+
              Fulltext parser "pg_catalog.default"
        Method       |         Function          | Description
@@ -3073,35 +3278,36 @@ Parser name: "pg_catalog.default"
   word         | Word
  (23 rows)
  </programlisting>
-</para>
+     </para>
      </listitem>
     </varlistentry>
  
-
    </variablelist>
  
-</sect1>
+ </sect1>
  
-<sect1 id="textsearch-debugging">
-<title>Debugging</title>
+ <sect1 id="textsearch-debugging">
+ <title>Debugging</title>
  
-<para>
-Function <function>ts_debug</function> allows easy testing of your full text searching
-configuration.
-</para>
+  <para>
+   Function <function>ts_debug</function> allows easy testing of your full text searching
+   configuration.
+  </para>
  
-<synopsis>
-ts_debug(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF ts_debug
-</synopsis>
+  <synopsis>
+   ts_debug(<optional><replaceable class="PARAMETER">config_name</replaceable></optional>, <replaceable class="PARAMETER">document</replaceable> TEXT) returns SETOF ts_debug
+  </synopsis>
+
+  <para>
+   <function>ts_debug</> displays information about every token of
+   <replaceable class="PARAMETER">document</replaceable> as produced by the
+   parser and processed by the configured dictionaries using the configuration
+   specified by <replaceable class="PARAMETER">config_name</replaceable>.
+  </para>
+
+  <para>
+   <replaceable class="PARAMETER">ts_debug</replaceable> type defined as:
  
-<para>
-<function>ts_debug</> displays information about every token of
-<replaceable class="PARAMETER">document</replaceable> as produced by the
-parser and processed by the configured dictionaries using the configuration
-specified by <replaceable class="PARAMETER">config_name</replaceable>.
-</para>
-<para>
-<replaceable class="PARAMETER">ts_debug</replaceable> type defined as:
  <programlisting>
  CREATE TYPE ts_debug AS (
      "Alias" text,
@@ -3111,14 +3317,15 @@ CREATE TYPE ts_debug AS (
      "Lexized token" text
  );
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   For a demonstration of how function <function>ts_debug</function> works we
+   first create a <literal>public.english</literal> configuration and
+   ispell dictionary for the English language. You can skip the test step and
+   play with the standard <literal>english</literal> configuration.
+  </para>
  
-<para>
-For a demonstration of how function <function>ts_debug</function> works we
-first create a <literal>public.english</literal> configuration and
-ispell dictionary for the English language. You can skip the test step and
-play with the standard <literal>english</literal> configuration.
-</para>
  <programlisting>
  CREATE TEXT SEARCH CONFIGURATION public.english ( COPY = pg_catalog.english );
  
@@ -3130,7 +3337,7 @@ CREATE TEXT SEARCH DICTIONARY english_ispell (
  );
  
  ALTER TEXT SEARCH CONFIGURATION public.english
-    ALTER MAPPING FOR lword WITH english_ispell, english_stem;
+   ALTER MAPPING FOR lword WITH english_ispell, english_stem;
  </programlisting>
  
  <programlisting>
@@ -3144,26 +3351,28 @@ SELECT * FROM ts_debug('public.english','The Brightest supernovaes');
   lword | Latin word    | supernovaes | {public.english_ispell,pg_catalog.english_stem} | pg_catalog.english_stem: {supernova}
  (5 rows)
  </programlisting>
-<para>
-In this example, the word <literal>Brightest</> was recognized by a
-parser as a <literal>Latin word</literal> (alias <literal>lword</literal>)
-and came through the dictionaries <literal>public.english_ispell</> and
-<literal>pg_catalog.english_stem</literal>. It was recognized by
-<literal>public.english_ispell</literal>, which reduced it to the noun
-<literal>bright</literal>. The word <literal>supernovaes</literal> is unknown
-by the <literal>public.english_ispell</literal> dictionary so it was passed to
-the next dictionary, and, fortunately, was recognized (in fact,
-<literal>public.english_stem</literal> is a stemming dictionary and recognizes
-everything; that is why it was placed at the end of the dictionary stack).
-</para>
-
-<para>
-The word <literal>The</literal> was recognized by <literal>public.english_ispell</literal>
-dictionary as a stop word (<xref linkend="textsearch-stopwords">) and will not be indexed.
-</para>
-
-<para>
-You can always explicitly specify which columns you want to see:
+
+  <para>
+   In this example, the word <literal>Brightest</> was recognized by a
+   parser as a <literal>Latin word</literal> (alias <literal>lword</literal>)
+   and came through the dictionaries <literal>public.english_ispell</> and
+   <literal>pg_catalog.english_stem</literal>. It was recognized by
+   <literal>public.english_ispell</literal>, which reduced it to the noun
+   <literal>bright</literal>. The word <literal>supernovaes</literal> is unknown
+   by the <literal>public.english_ispell</literal> dictionary so it was passed to
+   the next dictionary, and, fortunately, was recognized (in fact,
+   <literal>public.english_stem</literal> is a stemming dictionary and recognizes
+   everything; that is why it was placed at the end of the dictionary stack).
+  </para>
+
+  <para>
+   The word <literal>The</literal> was recognized by <literal>public.english_ispell</literal>
+   dictionary as a stop word (<xref linkend="textsearch-stopwords">) and will not be indexed.
+  </para>
+
+  <para>
+   You can always explicitly specify which columns you want to see:
+
  <programlisting>
  SELECT "Alias", "Token", "Lexized token"
  FROM ts_debug('public.english','The Brightest supernovaes');
@@ -3176,96 +3385,104 @@ FROM ts_debug('public.english','The Brightest supernovaes');
   lword | supernovaes | pg_catalog.english_stem: {supernova}
  (5 rows)
  </programlisting>
-</para>
-
-</sect1>
-
-<sect1 id="textsearch-rule-dictionary-example">
-<title>Example of Creating a Rule-Based Dictionary</title>
-
-<para>
-The motivation for this example dictionary is to control the indexing of
-integers (signed and unsigned), and, consequently, to minimize the number
-of unique words which greatly affects to performance of searching.
-</para>
-
-<para>
-The dictionary accepts two options:
-<itemizedlist spacing="compact" mark="bullet">
-
-<listitem><para>
-The <LITERAL>MAXLEN</literal> parameter specifies the maximum length of the
-number considered as a 'good' integer. The default value is 6.
-</para></listitem>
-
-<listitem><para>
-The <LITERAL>REJECTLONG</LITERAL> parameter specifies if a 'long' integer
-should be indexed or treated as a stop word.  If
-<literal>REJECTLONG</literal>=<LITERAL>FALSE</LITERAL> (default),
-the dictionary returns the prefixed part of the integer with length
-<LITERAL>MAXLEN</literal>.  If
-<LITERAL>REJECTLONG</LITERAL>=<LITERAL>TRUE</LITERAL>, the dictionary
-considers a long integer as a stop word.
-</para></listitem>
-
-</itemizedlist>
-
-</para>
-
-<para>
-A similar idea can be applied to the indexing of decimal numbers, for
-example, in the <literal>DecDict</literal> dictionary. The dictionary
-accepts two options: the <literal>MAXLENFRAC</literal> parameter specifies
-the maximum length of the fractional part considered as a 'good' decimal.
-The default value is 3. The <literal>REJECTLONG</literal> parameter
-controls whether a decimal number with a 'long' fractional part should be indexed
-or treated as a stop word. If
-<literal>REJECTLONG</literal>=<literal>FALSE</literal> (default),
-the dictionary returns the decimal number with the length of its fraction part
-truncated to <literal>MAXLEN</literal>. If
-<literal>REJECTLONG</literal>=<literal>TRUE</literal>, the dictionary
-considers the number as a stop word. Notice that
-<literal>REJECTLONG</literal>=<literal>FALSE</literal> allows the indexing
-of 'shortened' numbers and search results will contain documents with
-shortened numbers.
-</para>
-
-
-<para>
-Examples:
+  </para>
+
+ </sect1>
+
+ <sect1 id="textsearch-rule-dictionary-example">
+ <title>Example of Creating a Rule-Based Dictionary</title>
+
+  <para>
+   The motivation for this example dictionary is to control the indexing of
+   integers (signed and unsigned), and, consequently, to minimize the number
+   of unique words which greatly affects to performance of searching.
+  </para>
+
+  <para>
+   The dictionary accepts two options:
+   <itemizedlist spacing="compact" mark="bullet">
+
+    <listitem>
+     <para>
+      The <LITERAL>MAXLEN</literal> parameter specifies the maximum length of the
+      number considered as a 'good' integer. The default value is 6.
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      The <LITERAL>REJECTLONG</LITERAL> parameter specifies if a 'long' integer
+      should be indexed or treated as a stop word.  If
+      <literal>REJECTLONG</literal>=<LITERAL>FALSE</LITERAL> (default),
+      the dictionary returns the prefixed part of the integer with length
+      <LITERAL>MAXLEN</literal>.  If
+      <LITERAL>REJECTLONG</LITERAL>=<LITERAL>TRUE</LITERAL>, the dictionary
+      considers a long integer as a stop word.
+     </para>
+    </listitem>
+
+   </itemizedlist>
+
+  </para>
+
+  <para>
+   A similar idea can be applied to the indexing of decimal numbers, for
+   example, in the <literal>DecDict</literal> dictionary. The dictionary
+   accepts two options: the <literal>MAXLENFRAC</literal> parameter specifies
+   the maximum length of the fractional part considered as a 'good' decimal.
+   The default value is 3. The <literal>REJECTLONG</literal> parameter
+   controls whether a decimal number with a 'long' fractional part should be indexed
+   or treated as a stop word. If
+   <literal>REJECTLONG</literal>=<literal>FALSE</literal> (default),
+   the dictionary returns the decimal number with the length of its fraction part
+   truncated to <literal>MAXLEN</literal>. If
+   <literal>REJECTLONG</literal>=<literal>TRUE</literal>, the dictionary
+   considers the number as a stop word. Notice that
+   <literal>REJECTLONG</literal>=<literal>FALSE</literal> allows the indexing
+   of 'shortened' numbers and search results will contain documents with
+   shortened numbers.
+  </para>
+
+  <para>
+   Examples:
+
  <programlisting>
  SELECT ts_lexize('intdict', 11234567890);
   ts_lexize
  -----------
   {112345}
  </programlisting>
-</para>
-<para>
-Now, we want to ignore long integers:
+  </para>
+
+  <para>
+   Now, we want to ignore long integers:
+
  <programlisting>
  
  ALTER TEXT SEARCH DICTIONARY intdict (
      MAXLEN = 6, REJECTLONG = TRUE
  );
+
  SELECT ts_lexize('intdict', 11234567890);
   ts_lexize
  -----------
   {}
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   Create <filename>contrib/dict_intdict</> directory with files
+   <filename>dict_tmpl.c</>, <filename>Makefile</>, <filename>dict_intdict.sql.in</>:
  
-<para>
-Create <filename>contrib/dict_intdict</> directory with files
-<filename>dict_tmpl.c</>, <filename>Makefile</>, <filename>dict_intdict.sql.in</>:
  <programlisting>
-make &amp;&amp; make install
-psql DBNAME < dict_intdict.sql
+$ make &amp;&amp; make install
+$ psql DBNAME < dict_intdict.sql
  </programlisting>
-</para>
+  </para>
  
-<para>
-This is a <filename>dict_tmpl.c</> file:
-</para>
+  <para>
+   This is a <filename>dict_tmpl.c</> file:
+  </para>
  
  <programlisting>
  #include "postgres.h"
@@ -3280,61 +3497,61 @@ PG_MODULE_MAGIC;
  #include "utils/ts_public.h"
  #include "utils/ts_utils.h"
  
- typedef struct {
-        int     maxlen;
-        bool    rejectlong;
- } DictInt;
+typedef struct {
+  int     maxlen;
+  bool    rejectlong;
+} DictInt;
  
  
- PG_FUNCTION_INFO_V1(dinit_intdict);
- Datum dinit_intdict(PG_FUNCTION_ARGS);
+PG_FUNCTION_INFO_V1(dinit_intdict);
+Datum dinit_intdict(PG_FUNCTION_ARGS);
  
- Datum
- dinit_intdict(PG_FUNCTION_ARGS) {
-        DictInt *d = (DictInt*)malloc( sizeof(DictInt) );
-        Map *cfg, *pcfg;
-        text *in;
+Datum
+dinit_intdict(PG_FUNCTION_ARGS) {
+    DictInt *d = (DictInt*)malloc( sizeof(DictInt) );
+    Map *cfg, *pcfg;
+    text *in;
  
-        if (!d)
-                elog(ERROR, "No memory");
-        memset(d, 0, sizeof(DictInt));
+    if (!d)
+        elog(ERROR, "No memory");
+    memset(d, 0, sizeof(DictInt));
  
-        /* Your INIT code */
-/* defaults */
-        d-&gt;maxlen = 6;
-        d-&gt;rejectlong = false;
+    /* Your INIT code */
+    /* defaults */
+    d-&gt;maxlen = 6;
+    d-&gt;rejectlong = false;
  
-        if ( PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL ) /* no options */
-             PG_RETURN_POINTER(d);
+    if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL) /* no options */
+        PG_RETURN_POINTER(d);
  
-        in = PG_GETARG_TEXT_P(0);
-        parse_keyvalpairs(in, &amp;cfg);
-        PG_FREE_IF_COPY(in, 0);
-        pcfg=cfg;
+    in = PG_GETARG_TEXT_P(0);
+    parse_keyvalpairs(in, &amp;cfg);
+    PG_FREE_IF_COPY(in, 0);
+    pcfg=cfg;
  
-        while (pcfg-&gt;key) 
+    while (pcfg-&gt;key) 
+    {
+        if (strcasecmp("MAXLEN", pcfg-&gt;key) == 0)
+                d-&gt;maxlen=atoi(pcfg-&gt;value);
+        else if ( strcasecmp("REJECTLONG", pcfg-&gt;key) == 0) 
          {
-            if (strcasecmp("MAXLEN", pcfg-&gt;key) == 0)
-                    d-&gt;maxlen=atoi(pcfg-&gt;value);
-            else if ( strcasecmp("REJECTLONG", pcfg-&gt;key) == 0) 
-            {
-               if ( strcasecmp("true", pcfg-&gt;value) == 0 )
-                   d-&gt;rejectlong=true;
-               else if ( strcasecmp("false", pcfg-&gt;value) == 0)
-                   d-&gt;rejectlong=false;
-               else
-                   elog(ERROR,"Unknown value: %s =&gt; %s", pcfg-&gt;key, pcfg-&gt;value);
-            }
-            else
-                elog(ERROR,"Unknown option: %s =&gt; %s", pcfg-&gt;key, pcfg-&gt;value);
-
-            pfree(pcfg-&gt;key);
-            pfree(pcfg-&gt;value);
-            pcfg++;
+           if ( strcasecmp("true", pcfg-&gt;value) == 0 )
+               d-&gt;rejectlong=true;
+           else if ( strcasecmp("false", pcfg-&gt;value) == 0)
+               d-&gt;rejectlong=false;
+           else
+               elog(ERROR,"Unknown value: %s =&gt; %s", pcfg-&gt;key, pcfg-&gt;value);
          }
-        pfree(cfg);
+        else
+            elog(ERROR,"Unknown option: %s =&gt; %s", pcfg-&gt;key, pcfg-&gt;value);
  
-        PG_RETURN_POINTER(d);
+        pfree(pcfg-&gt;key);
+        pfree(pcfg-&gt;value);
+        pcfg++;
+    }
+    pfree(cfg);
+
+    PG_RETURN_POINTER(d);
   }
  
  PG_FUNCTION_INFO_V1(dlexize_intdict);
@@ -3342,36 +3559,37 @@ Datum dlexize_intdict(PG_FUNCTION_ARGS);
  Datum
  dlexize_intdict(PG_FUNCTION_ARGS)
  {
-        DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
-        char       *in = (char*)PG_GETARG_POINTER(1);
-        char *txt = pnstrdup(in, PG_GETARG_INT32(2));
-        TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
+    DictInt *d = (DictInt*)PG_GETARG_POINTER(0);
+    char       *in = (char*)PG_GETARG_POINTER(1);
+    char *txt = pnstrdup(in, PG_GETARG_INT32(2));
+    TSLexeme *res = palloc(sizeof(TSLexeme) * 2);
  
-        /* Your INIT dictionary code */
-        res[1].lexeme = NULL;
+    /* Your INIT dictionary code */
+    res[1].lexeme = NULL;
  
-        if  (PG_GETARG_INT32(2) &gt; d-&gt;maxlen)
-        {
-           if (d-&gt;rejectlong) 
-           { /* stop, return void array */
-               pfree(txt);
-               res[0].lexeme = NULL;
-            }
-            else
-            { /* cut integer */
-               txt[d-&gt;maxlen] = '\0';
-               res[0].lexeme = txt;
-            }
+    if  (PG_GETARG_INT32(2) &gt; d-&gt;maxlen)
+    {
+       if (d-&gt;rejectlong) 
+       { /* stop, return void array */
+           pfree(txt);
+           res[0].lexeme = NULL;
          }
          else
-            res[0].lexeme = txt;
+        { /* cut integer */
+           txt[d-&gt;maxlen] = '\0';
+           res[0].lexeme = txt;
+        }
+    }
+    else
+        res[0].lexeme = txt;
  
-        PG_RETURN_POINTER(res);
+    PG_RETURN_POINTER(res);
  }
  </programlisting>
  
-<para>
-This is the <literal>Makefile</literal>:
+  <para>
+   This is the <literal>Makefile</literal>:
+
  <programlisting>
  subdir = contrib/dict_intdict
  top_builddir = ../..
@@ -3384,134 +3602,136 @@ DOCS =
  
  include $(top_srcdir)/contrib/contrib-global.mk
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   This is a <literal>dict_intdict.sql.in</literal>:
  
-<para>
-This is a <literal>dict_intdict.sql.in</literal>:
  <programlisting>
  SET default_text_search_config = 'english';
  
  BEGIN;
  
  CREATE OR REPLACE FUNCTION dinit_intdict(internal)
-RETURNS internal
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C';
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C';
  
  CREATE OR REPLACE FUNCTION dlexize_intdict(internal,internal,internal,internal)
-RETURNS internal
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C'
-WITH (isstrict);
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C'
+    WITH (isstrict);
  
  CREATE TEXT SEARCH TEMPLATE intdict_template (
      LEXIZE = dlexize_intdict, INIT = dinit_intdict
  );
  
  CREATE TEXT SEARCH DICTIONARY intdict (
-    TEMPLATE = intdict_template,
-    MAXLEN = 6, REJECTLONG = false
+  TEMPLATE = intdict_template,
+  MAXLEN = 6, REJECTLONG = false
  );
  
  COMMENT ON TEXT SEARCH DICTIONARY intdict IS 'Dictionary for Integers';
  
  END;
  </programlisting>
-</para>
-
-</sect1>
-
-<sect1 id="textsearch-parser-example">
-<title>Example of Creating a Parser</title>
-
-<para>
-<acronym>SQL</acronym> command <literal>CREATE TEXT SEARCH PARSER</literal> creates
-a parser for full text searching. In our example we will implement
-a simple parser which recognizes space-delimited words and
-has only two types (3, word, Word; 12, blank, Space symbols). Identifiers
-were chosen to keep compatibility with the default <function>headline()</function> function
-since we do not implement our own version.
-</para>
-
-<para>
-To implement a parser one needs to create a minimum of four functions.
-</para>
-
-<variablelist>
-
-<varlistentry>
-<term>
-<synopsis>
-START = <replaceable class="PARAMETER">start_function</replaceable>
-</synopsis>
-</term>
-<listitem>
-<para>
-Initialize the parser. Arguments are a pointer to the parsed text and its
-length.
-</para>
-<para>
-Returns a pointer to the internal structure of a parser. Note that it should
-be <function>malloc</>ed or <function>palloc</>ed in the
-<literal>TopMemoryContext</>.  We name it <literal>ParserState</>.
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>
-<synopsis>
-GETTOKEN = <replaceable class="PARAMETER">gettoken_function</replaceable>
-</synopsis>
-</term>
-<listitem>
-<para>
-Returns the next token.
-Arguments are <literal>ParserState *, char **, int *</literal>.
-</para>
-<para>
-This procedure will be called as long as the procedure returns token type zero.
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>
-<synopsis>
-END = <replaceable class="PARAMETER">end_function</replaceable>,
-</synopsis>
-</term>
-<listitem>
-<para>
-This void function will be called after parsing is finished to free
-allocated resources in this procedure (<literal>ParserState</>).  The argument
-is <literal>ParserState *</literal>.
-</para>
-</listitem>
-</varlistentry>
-
-<varlistentry>
-<term>
-<synopsis>
-LEXTYPES = <replaceable class="PARAMETER">lextypes_function</replaceable>
-</synopsis>
-</term>
-<listitem>
-<para>
-Returns an array containing the id, alias, and the description of the tokens
-in the parser. See <structname>LexDescr</structname> in <filename>src/include/utils/ts_public.h</>.
-</para>
-</listitem>
-</varlistentry>
-
-</variablelist>
-
-<para>
-Below is the source code of our test parser, organized as a <filename>contrib</> module.
-</para>
-
-<para>
-Testing:
+  </para>
+
+ </sect1>
+
+ <sect1 id="textsearch-parser-example">
+ <title>Example of Creating a Parser</title>
+
+  <para>
+   <acronym>SQL</acronym> command <literal>CREATE TEXT SEARCH PARSER</literal> creates
+   a parser for full text searching. In our example we will implement
+   a simple parser which recognizes space-delimited words and
+   has only two types (3, word, Word; 12, blank, Space symbols). Identifiers
+   were chosen to keep compatibility with the default <function>headline()</function> function
+   since we do not implement our own version.
+  </para>
+
+  <para>
+   To implement a parser one needs to create a minimum of four functions.
+  </para>
+
+  <variablelist>
+
+   <varlistentry>
+    <term>
+     <synopsis>
+      START = <replaceable class="PARAMETER">start_function</replaceable>
+     </synopsis>
+    </term>
+    <listitem>
+     <para>
+      Initialize the parser. Arguments are a pointer to the parsed text and its
+      length.
+     </para>
+     <para>
+      Returns a pointer to the internal structure of a parser. Note that it should
+      be <function>malloc</>ed or <function>palloc</>ed in the
+      <literal>TopMemoryContext</>.  We name it <literal>ParserState</>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <synopsis>
+      GETTOKEN = <replaceable class="PARAMETER">gettoken_function</replaceable>
+     </synopsis>
+    </term>
+    <listitem>
+     <para>
+      Returns the next token.
+      Arguments are <literal>ParserState *, char **, int *</literal>.
+     </para>
+     <para>
+      This procedure will be called as long as the procedure returns token type zero.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <synopsis>
+      END = <replaceable class="PARAMETER">end_function</replaceable>,
+     </synopsis>
+    </term>
+    <listitem>
+     <para>
+      This void function will be called after parsing is finished to free
+      allocated resources in this procedure (<literal>ParserState</>).  The argument
+      is <literal>ParserState *</literal>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>
+     <synopsis>
+      LEXTYPES = <replaceable class="PARAMETER">lextypes_function</replaceable>
+     </synopsis>
+    </term>
+    <listitem>
+     <para>
+      Returns an array containing the id, alias, and the description of the tokens
+      in the parser. See <structname>LexDescr</structname> in <filename>src/include/utils/ts_public.h</>.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+
+  <para>
+   Below is the source code of our test parser, organized as a <filename>contrib</> module.
+  </para>
+
+  <para>
+   Testing:
+
  <programlisting>
  SELECT * FROM ts_parse('testparser','That''s my first own parser');
   tokid | token
@@ -3525,35 +3745,39 @@ SELECT * FROM ts_parse('testparser','That''s my first own parser');
       3 | own
      12 |
       3 | parser
+
  SELECT to_tsvector('testcfg','That''s my first own parser');
                     to_tsvector
  -------------------------------------------------
   'my':2 'own':4 'first':3 'parser':5 'that''s':1
+
  SELECT ts_headline('testcfg','Supernovae stars are the brightest phenomena in galaxies', to_tsquery('testcfg', 'star'));
                              headline
  -----------------------------------------------------------------
   Supernovae &lt;b&gt;stars&lt;/b&gt; are the brightest phenomena in galaxies
  </programlisting>
  
-</para>
+  </para>
  
-<para>
-This test parser is an example adopted from a tutorial by Valli, <ulink
-url="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/HOWTO-parser-tsearch2.html">parser
-HOWTO</ulink>.
-</para>
+  <para>
+   This test parser is an example adopted from a tutorial by Valli, <ulink
+   url="http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/HOWTO-parser-tsearch2.html">parser
+   HOWTO</ulink>.
+  </para>
+
+  <para>
+   To compile the example just do:
  
-<para>
-To compile the example just do:
  <programlisting>
-make
-make install
-psql regression < test_parser.sql
+$ make
+$ make install
+$ psql regression < test_parser.sql
  </programlisting>
-</para>
+  </para>
+
+  <para>
+   This is a <filename>test_parser.c</>:
  
-<para>
-This is a <filename>test_parser.c</>:
  <programlisting>
  
  #ifdef PG_MODULE_MAGIC
@@ -3630,7 +3854,7 @@ Datum testprs_getlexeme(PG_FUNCTION_ARGS)
          /* go to the next white-space character */
          while ((pst-&gt;buffer)[pst-&gt;pos] != ' ' &amp;&amp; 
                 pst-&gt;pos &lt; pst-&gt;len)
-          (pst-&gt;pos)++;
+            (pst-&gt;pos)++;
      }
  
      *tlen = pst-&gt;pos - *tlen;
@@ -3641,6 +3865,7 @@ Datum testprs_getlexeme(PG_FUNCTION_ARGS)
  
      PG_RETURN_INT32(type);
  }
+
  Datum testprs_end(PG_FUNCTION_ARGS)
  {
      ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
@@ -3673,7 +3898,7 @@ Datum testprs_lextype(PG_FUNCTION_ARGS)
  
  </programlisting>
  
-This is a <literal>Makefile</literal>
+    This is a <literal>Makefile</literal>
  
  <programlisting>
  override CPPFLAGS := -I. $(CPPFLAGS)
@@ -3698,7 +3923,7 @@ include $(top_srcdir)/contrib/contrib-global.mk
  endif
  </programlisting>
  
-This is a <literal>test_parser.sql.in</literal>:
+   This is a <literal>test_parser.sql.in</literal>:
  
  <programlisting>
  SET default_text_search_config = 'english';
@@ -3706,41 +3931,41 @@ SET default_text_search_config = 'english';
  BEGIN;
  
  CREATE FUNCTION testprs_start(internal,int4)
-RETURNS internal
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C' with (isstrict);
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C' with (isstrict);
  
  CREATE FUNCTION testprs_getlexeme(internal,internal,internal)
-RETURNS internal
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C' with (isstrict);
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C' with (isstrict);
  
  CREATE FUNCTION testprs_end(internal)
-RETURNS void
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C' with (isstrict);
+    RETURNS void
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C' with (isstrict);
  
  CREATE FUNCTION testprs_lextype(internal)
-RETURNS internal
-AS 'MODULE_PATHNAME'
-LANGUAGE 'C' with (isstrict);
+    RETURNS internal
+    AS 'MODULE_PATHNAME'
+    LANGUAGE 'C' with (isstrict);
  
  
  CREATE TEXT SEARCH PARSER testparser (
-        START =    testprs_start,
-        GETTOKEN = testprs_getlexeme,
-        END =      testprs_end,
-        LEXTYPES = testprs_lextype
-;
+    START =    testprs_start,
+    GETTOKEN = testprs_getlexeme,
+    END =      testprs_end,
+    LEXTYPES = testprs_lextype
+);
  
-CREATE TEXT SEARCH CONFIGURATION testcfg ( PARSER = testparser );
+CREATE TEXT SEARCH CONFIGURATION testcfg (PARSER = testparser);
  ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH simple;
  
  END;
  </programlisting>
  
-</para>
+  </para>
  
-</sect1>
+ </sect1>
  
  </chapter>
author	Bruce Momjian <bruce@momjian.us>
	Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)
committer	Bruce Momjian <bruce@momjian.us>
	Wed, 29 Aug 2007 02:37:04 +0000 (02:37 +0000)