Rewrite of planner statistics-gathering code. ANALYZE is now available as

author Tom Lane <tgl@sss.pgh.pa.us>

Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml

index 1738a5b..01885a5 100644 (file)
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1,6 +1,6 @@
  <!--
   Documentation of the system catalogs, directed toward PostgreSQL developers
- $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.15 2001/04/20 15:52:33 thomas Exp $
+ $Header: /cvsroot/pgsql/doc/src/sgml/catalogs.sgml,v 2.16 2001/05/07 00:43:14 tgl Exp $
   -->
  
  <chapter id="catalogs">
@@ -16,7 +16,7 @@
     <productname>PostgreSQL</productname>'s system catalogs are regular
     tables.  You can drop and recreate the tables, add columns, insert
     and update values, and severely mess up your system that way.
-   Normally one never has to change the system catalogs by hand, there
+   Normally one should not change the system catalogs by hand, there
     are always SQL commands to do that.  (For example, <command>CREATE
     DATABASE</command> inserts a row into the
     <structname>pg_database</structname> catalog -- and actually
@@ -185,7 +185,7 @@
    <para>
     <structname>pg_aggregate</structname> stores information about
     aggregate functions.  An aggregate function is a function that
-   operates on a set of values (typically one column from each the row
+   operates on a set of values (typically one column from each row
     that matches a query condition) and returns a single value computed
     from all these values.  Typical aggregate functions are
     <function>sum</function>, <function>count</function>, and
@@ -233,7 +233,7 @@
        <entry>aggbasetype</entry>
        <entry><type>oid</type></entry>
        <entry>pg_type.oid</entry>
-      <entry>The type on which this function operates when invoked from SQL</entry>
+      <entry>The input datatype for this aggregate function</entry>
       </row>
       <row>
        <entry>aggtranstype</entry>
@@ -269,7 +269,7 @@
  
    <para>
     An aggregate function is identified through name
-   <emphasis>and</emphasis> argument type.  Hence aggname and aggname
+   <emphasis>and</emphasis> argument type.  Hence aggname and aggbasetype
     are the composite primary key.
    </para>
  
@@ -311,11 +311,8 @@
       <row>
        <entry>adnum</entry>
        <entry><type>int2</type></entry>
-      <entry></entry>
-      <entry>
-       The number of the column; see
-       <structname>pg_attribute</structname>.<structfield>pg_attnum</structfield>
-      </entry>
+      <entry>pg_attribute.attnum</entry>
+      <entry>The number of the column</entry>
       </row>
  
       <row>
@@ -390,20 +387,18 @@
       </row>
  
       <row>
-      <entry>attdispersion</entry>
-      <entry><type>float4</type></entry>
+      <entry>attstattarget</entry>
+      <entry><type>int4</type></entry>
        <entry></entry>
        <entry>
-       <structfield>attdispersion</structfield> is the dispersion
-       statistic of the column (0.0 to 1.0), or zero if the statistic
-       has not been calculated, or -1.0 if <command>VACUUM</command>
-       found that the column contains no duplicate entries (in which
-       case the dispersion should be taken as
-       1.0/<symbol>numberOfRows</symbol> for the current table size).
-       The -1.0 hack is useful because the number of rows may be
-       updated more often than
-       <structfield>attdispersion</structfield> is. We assume that the
-       column will retain its no-duplicate-entry property.
+       <structfield>attstattarget</structfield> controls the level of detail
+       of statistics accumulated for this column by
+       <command>ANALYZE</command>.
+       A zero value indicates that no statistics should be collected.
+       The exact meaning of positive values is datatype-dependent.
+       For scalar datatypes, <structfield>attstattarget</structfield>
+       is both the target number of <quote>most common values</quote>
+       to collect, and the target number of histogram bins to create.
        </entry>
       </row>
  
@@ -430,10 +425,12 @@
       </row>
  
       <row>
-      <entry>attnelems</entry>
+      <entry>attndims</entry>
        <entry><type>int4</type></entry>
        <entry></entry>
-      <entry>Number of dimensions, if the column is an array</entry>
+      <entry>
+       Number of dimensions, if the column is an array; otherwise 0.
+      </entry>
       </row>
  
       <row>
@@ -610,18 +607,22 @@
        <entry></entry>
        <entry>
         Size of the on-disk representation of this table in pages (size
-       <symbol>BLCKSZ</symbol>).  This is only an approximate value
-       which is calculated during vacuum.
+       <symbol>BLCKSZ</symbol>).
+       This is only an estimate used by the planner.
+       It is updated by <command>VACUUM</command>,
+       <command>ANALYZE</command>, and <command>CREATE INDEX</command>.
        </entry>
       </row>
  
       <row>
        <entry>reltuples</entry>
-      <entry><type>int4</type></entry>
+      <entry><type>float4</type></entry>
        <entry></entry>
        <entry>
-       Number of tuples in the table.  This is only an estimate used
-       by the planner, updated by <command>VACUUM</command>.
+       Number of tuples in the table.
+       This is only an estimate used by the planner.
+       It is updated by <command>VACUUM</command>,
+       <command>ANALYZE</command>, and <command>CREATE INDEX</command>.
        </entry>
       </row>
  
@@ -1671,6 +1672,130 @@
   </section>
  
  
+ <section id="catalog-pg-statistic">
+  <title>pg_statistic</title>
+
+  <para>
+   <structname>pg_statistic</structname> stores statistical data about
+   the contents of the database.  Entries are created by
+   <command>ANALYZE</command> and subsequently used by the query planner.
+   There is one entry for each table column that has been analyzed.
+   Note that all the statistical data is inherently approximate,
+   even assuming that it is up-to-date.
+  </para>
+
+  <para>
+   Since different kinds of statistics may be appropriate for different
+   kinds of data, <structname>pg_statistic</structname> is designed not
+   to assume very much about what sort of statistics it stores.  Only
+   extremely general statistics (such as NULL-ness) are given dedicated
+   columns in <structname>pg_statistic</structname>.  Everything else
+   is stored in "slots", which are groups of associated columns whose
+   content is identified by a code number in one of the slot's columns.
+   For more information see
+   <filename>src/include/catalog/pg_statistic.h</filename>.
+  </para>
+
+  <table>
+   <title>pg_statistic Columns</title>
+
+   <tgroup cols=4>
+    <thead>
+     <row>
+      <entry>Name</entry>
+      <entry>Type</entry>
+      <entry>References</entry>
+      <entry>Description</entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry>starelid</entry>
+      <entry><type>oid</type></entry>
+      <entry>pg_class.oid</entry>
+      <entry>The table that the described column belongs to</entry>
+     </row>
+
+     <row>
+      <entry>staattnum</entry>
+      <entry><type>int2</type></entry>
+      <entry>pg_attribute.attnum</entry>
+      <entry>The number of the described column</entry>
+     </row>
+
+     <row>
+      <entry>stanullfrac</entry>
+      <entry><type>float4</type></entry>
+      <entry></entry>
+      <entry>The fraction of the column's entries that are NULL</entry>
+     </row>
+
+     <row>
+      <entry>stawidth</entry>
+      <entry><type>int4</type></entry>
+      <entry></entry>
+      <entry>The average stored width, in bytes, of non-NULL entries</entry>
+     </row>
+
+     <row>
+      <entry>stadistinct</entry>
+      <entry><type>float4</type></entry>
+      <entry></entry>
+      <entry>The number of distinct non-NULL data values in the column.
+      A value greater than zero is the actual number of distinct values.
+      A value less than zero is the negative of a fraction of the number
+      of rows in the table (for example, a column in which values appear about
+      twice on the average could be represented by stadistinct = -0.5).
+      A zero value means the number of distinct values is unknown.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stakindN</entry>
+      <entry><type>int2</type></entry>
+      <entry></entry>
+      <entry>A code number indicating the kind of statistics stored in the Nth
+      "slot" of the <structname>pg_statistic</structname> row.
+      </entry>
+     </row>
+
+     <row>
+      <entry>staopN</entry>
+      <entry><type>oid</type></entry>
+      <entry>pg_operator.oid</entry>
+      <entry>An operator used to derive the statistics stored in the
+      Nth "slot".  For example, a histogram slot would show the "&lt;"
+      operator that defines the sort order of the data.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stanumbersN</entry>
+      <entry><type>float4[]</type></entry>
+      <entry></entry>
+      <entry>Numerical statistics of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not involve numerical values.
+      </entry>
+     </row>
+
+     <row>
+      <entry>stavaluesN</entry>
+      <entry><type>text[]</type></entry>
+      <entry></entry>
+      <entry>Column data values of the appropriate kind for the Nth
+      "slot", or NULL if the slot kind does not store any data values.
+      For datatype independence, all column data values are converted
+      to external textual form and stored as TEXT datums.
+      </entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ </section>
+
+
   <section id="catalog-pg-type">
    <title>pg_type</title>
  
diff --git a/doc/src/sgml/indices.sgml b/doc/src/sgml/indices.sgml

index 32ecd9e..42cab24 100644 (file)
--- a/doc/src/sgml/indices.sgml
+++ b/doc/src/sgml/indices.sgml
@@ -1,4 +1,4 @@
-<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.14 2001/02/20 22:27:56 petere Exp $ -->
+<!-- $Header: /cvsroot/pgsql/doc/src/sgml/indices.sgml,v 1.15 2001/05/07 00:43:14 tgl Exp $ -->
  
  <chapter id="indices">
   <title id="indices-title">Indices</title>
@@ -71,7 +71,7 @@ CREATE INDEX test1_id_index ON test1 (id);
     Once the index is created, no further intervention is required: the
     system will use the index when it thinks it would be more efficient
     than a sequential table scan.  But you may have to run the
-   <command>VACUUM ANALYZE</command> command regularly to update
+   <command>ANALYZE</command> command regularly to update
     statistics to allow the query planner to make educated decisions.
     Also read <xref linkend="performance-tips"> for information about
     how to find out whether an index is used and when and why the
diff --git a/doc/src/sgml/ref/allfiles.sgml b/doc/src/sgml/ref/allfiles.sgml

index 0088896..dea65e9 100644 (file)
--- a/doc/src/sgml/ref/allfiles.sgml
+++ b/doc/src/sgml/ref/allfiles.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.27 2001/01/13 03:11:12 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/allfiles.sgml,v 1.28 2001/05/07 00:43:14 tgl Exp $
  Postgres documentation
  Complete list of usable sgml source files in this directory.
  -->
@@ -40,6 +40,7 @@ Complete list of usable sgml source files in this directory.
  <!entity alterGroup         system "alter_group.sgml">
  <!entity alterTable         system "alter_table.sgml">
  <!entity alterUser          system "alter_user.sgml">
+<!entity analyze            system "analyze.sgml">
  <!entity begin              system "begin.sgml">
  <!entity checkpoint         system "checkpoint.sgml">
  <!entity close              system "close.sgml">
diff --git a/doc/src/sgml/ref/alter_table.sgml b/doc/src/sgml/ref/alter_table.sgml

index 4c258c8..21fc8c2 100644 (file)
--- a/doc/src/sgml/ref/alter_table.sgml
+++ b/doc/src/sgml/ref/alter_table.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.22 2001/03/05 18:42:55 momjian Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/alter_table.sgml,v 1.23 2001/05/07 00:43:15 tgl Exp $
  Postgres documentation
  -->
  
@@ -29,7 +29,9 @@ ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
  ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
      ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> { SET DEFAULT <replaceable
      class="PARAMETER">value</replaceable> | DROP DEFAULT }
-ALTER TABLE <replaceable class="PARAMETER">table</replaceable> [ * ]
+ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
+    ALTER [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> SET STATISTICS <replaceable class="PARAMETER">integer</replaceable>
+ALTER TABLE [ ONLY ] <replaceable class="PARAMETER">table</replaceable> [ * ]
      RENAME [ COLUMN ] <replaceable class="PARAMETER">column</replaceable> TO <replaceable
      class="PARAMETER">newcolumn</replaceable>
  ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
@@ -159,9 +161,14 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
     <command>ALTER TABLE</command> changes the definition of an existing table.
     The <literal>ADD COLUMN</literal> form adds a new column to the table
     using the same syntax as <xref linkend="SQL-CREATETABLE"
-   endterm="SQL-CREATETABLE-title">. The <literal>ALTER COLUMN</literal> form
-   allows you to set or remove the default for the column. Note that defaults
-   only apply to newly inserted rows.
+   endterm="SQL-CREATETABLE-title">.
+   The <literal>ALTER COLUMN SET/DROP DEFAULT</literal> forms
+   allow you to set or remove the default for the column. Note that defaults
+   only apply to subsequent <command>INSERT</command> commands; they do not
+   cause rows already in the table to change.
+   The <literal>ALTER COLUMN SET STATISTICS</literal> form allows you to
+   set the statistics-gathering target for subsequent
+   <xref linkend="sql-analyze" endterm="sql-analyze-title"> operations.
     The <literal>RENAME</literal> clause causes the name of a table or column
     to change without changing any of the data contained in
     the affected table. Thus, the table or column will
@@ -170,7 +177,7 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
     The ADD <replaceable class="PARAMETER">table constraint definition</replaceable> clause 
     adds a new constraint to the table using the same syntax as <xref
     linkend="SQL-CREATETABLE" endterm="SQL-CREATETABLE-title">. 
-   The OWNER clause chnages the owner of the table to the user <replaceable class="PARAMETER">
+   The OWNER clause changes the owner of the table to the user <replaceable class="PARAMETER">
     new user</replaceable>.
    </para>
  
@@ -190,10 +197,11 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
     </para>
  
     <para>
-    In the current implementation, default and constraint clauses for the
+    In the current implementation of <literal>ADD COLUMN</literal>,
+    default and constraint clauses for the
      new column will be ignored. You can use the <literal>SET DEFAULT</literal>
      form of <command>ALTER TABLE</command> to set the default later.
-    (You will also have to update the already existing rows to the
+    (You may also want to update the already existing rows to the
      new default value, using <xref linkend="sql-update"
      endterm="sql-update-title">.)
     </para>
@@ -210,7 +218,7 @@ ALTER TABLE <replaceable class="PARAMETER">table</replaceable>
  
     <para>
      You must own the table in order to change it.
-    Renaming any  part  of  the schema of a system
+    Changing any  part  of  the schema of a system
      catalog is not permitted.
      The <citetitle>PostgreSQL User's Guide</citetitle> has further
      information on inheritance.
diff --git a/doc/src/sgml/ref/analyze.sgml b/doc/src/sgml/ref/analyze.sgml

new file mode 100644 (file)

index 0000000..57d3213
--- /dev/null
+++ b/doc/src/sgml/ref/analyze.sgml
@@ -0,0 +1,219 @@
+<!--
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/analyze.sgml,v 1.1 2001/05/07 00:43:15 tgl Exp $
+Postgres documentation
+-->
+
+<refentry id="SQL-ANALYZE">
+ <refmeta>
+  <refentrytitle id="sql-analyze-title">
+   ANALYZE
+  </refentrytitle>
+  <refmiscinfo>SQL - Language Statements</refmiscinfo>
+ </refmeta>
+ <refnamediv>
+  <refname>
+   ANALYZE
+  </refname>
+  <refpurpose>
+   Collect statistics about a <productname>Postgres</productname> database
+  </refpurpose>
+ </refnamediv>
+ <refsynopsisdiv>
+  <refsynopsisdivinfo>
+   <date>2001-05-04</date>
+  </refsynopsisdivinfo>
+  <synopsis>
+ANALYZE [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ]
+  </synopsis>
+
+  <refsect2 id="R2-SQL-ANALYZE-1">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Inputs
+   </title>
+
+   <para>
+    <variablelist>
+     <varlistentry>
+      <term>VERBOSE</term>
+      <listitem>
+       <para>
+       Enables display of progress messages.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><replaceable class="PARAMETER">table</replaceable></term>
+      <listitem>
+       <para>
+       The name of a specific table to analyze. Defaults to all tables.
+       </para>
+      </listitem>
+     </varlistentry>
+     <varlistentry>
+      <term><replaceable class="PARAMETER">column</replaceable></term>
+      <listitem>
+       <para>
+       The name of a specific column to analyze. Defaults to all columns.
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
+  </refsect2>
+
+  <refsect2 id="R2-SQL-ANALYZE-2">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Outputs
+   </title>
+   <para>
+
+    <variablelist>
+     <varlistentry>
+      <term><computeroutput>
+<returnvalue>ANALYZE</returnvalue>
+       </computeroutput></term>
+      <listitem>
+       <para>
+       The command is complete.
+       </para>
+      </listitem>
+     </varlistentry>
+
+    </variablelist>
+   </para>
+  </refsect2>
+ </refsynopsisdiv>
+
+ <refsect1 id="R1-SQL-ANALYZE-1">
+  <refsect1info>
+   <date>2001-05-04</date>
+  </refsect1info>
+  <title>
+   Description
+  </title>
+  <para>
+   <command>ANALYZE</command> collects statistics about the contents of
+   <productname>Postgres</productname> tables, and stores the results in
+   the system table <literal>pg_statistic</literal>.  Subsequently,
+   the query planner uses the statistics to help determine the most efficient
+   execution plans for queries.
+  </para>
+
+  <para>
+   With no parameter, <command>ANALYZE</command> examines every table in the
+   current database.  With a parameter, <command>ANALYZE</command> examines
+   only that table.  It is further possible to give a list of column names,
+   in which case only the statistics for those columns are updated.
+  </para>
+
+  <refsect2 id="R2-SQL-ANALYZE-3">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    Notes
+   </title>
+
+  <para>
+   It is a good idea to run <command>ANALYZE</command> periodically, or
+   just after making major changes in the contents of a table.  Accurate
+   statistics will help the planner to choose the most appropriate query
+   plan, and thereby improve the speed of query processing.  A common
+   strategy is to run <command>VACUUM</command> and <command>ANALYZE</command>
+   once a day during a low-usage time of day.
+  </para>
+
+  <para>
+   Unlike <xref linkend="sql-vacuum" endterm="sql-vacuum-title">,
+   <command>ANALYZE</command> requires
+   only a read lock on the target table, so it can run in parallel with
+   other activity on the table.
+  </para>
+
+  <para>
+   For large tables, <command>ANALYZE</command> takes a random sample of the
+   table contents, rather than examining every row.  This allows even very
+   large tables to be analyzed in a small amount of time.  Note however
+   that the statistics are only approximate, and will change slightly each
+   time <command>ANALYZE</command> is run, even if the actual table contents
+   did not change.  This may result in small changes in the planner's
+   estimated costs shown by <command>EXPLAIN</command>.
+  </para>
+
+  <para>
+   The collected statistics usually include a list of some of the most common
+   values in each column and a histogram showing the approximate data
+   distribution in each column.  One or both of these may be omitted if
+   <command>ANALYZE</command> deems them uninteresting (for example, in
+   a unique-key column, there are no common values) or if the column
+   datatype does not support the appropriate operators.
+  </para>
+
+  <para>
+   The extent of analysis can be controlled by adjusting the per-column
+   statistics target with <command>ALTER TABLE ALTER COLUMN SET
+   STATISTICS</command> (see
+   <xref linkend="sql-altertable" endterm="sql-altertable-title">).  The
+   target value sets the maximum number of entries in the most-common-value
+   list and the maximum number of bins in the histogram.  The default
+   target value is 10, but this can be adjusted up or down to trade off
+   accuracy of planner estimates against the time taken for
+   <command>ANALYZE</command> and the
+   amount of space occupied in <literal>pg_statistic</literal>.
+   In particular, setting the statistics target to zero disables collection of
+   statistics for that column.  It may be useful to do that for columns that
+   are never used as part of the WHERE, GROUP BY, or ORDER BY clauses of
+   queries, since the planner will have no use for statistics on such columns.
+  </para>
+
+  <para>
+   The largest statistics target among the columns being analyzed determines
+   the number of table rows sampled to prepare the statistics.  Increasing
+   the target causes a proportional increase in the time and space needed
+   to do <command>ANALYZE</command>.
+  </para>
+
+  </refsect2>
+ </refsect1>
+
+ <refsect1 id="R1-SQL-ANALYZE-3">
+  <title>
+   Compatibility
+  </title>
+
+  <refsect2 id="R2-SQL-ANALYZE-4">
+   <refsect2info>
+    <date>2001-05-04</date>
+   </refsect2info>
+   <title>
+    SQL92
+   </title>
+   <para>
+    There is no <command>ANALYZE</command> statement in <acronym>SQL92</acronym>.
+   </para>
+  </refsect2>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:nil
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-default-dtd-file:"../reference.ced"
+sgml-exposed-tags:nil
+sgml-local-catalogs:"/usr/lib/sgml/catalog"
+sgml-local-ecat-files:nil
+End:
+-->
diff --git a/doc/src/sgml/ref/vacuum.sgml b/doc/src/sgml/ref/vacuum.sgml

index 51cb8a9..cbb1824 100644 (file)
--- a/doc/src/sgml/ref/vacuum.sgml
+++ b/doc/src/sgml/ref/vacuum.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.13 2001/01/13 23:58:55 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/ref/vacuum.sgml,v 1.14 2001/05/07 00:43:15 tgl Exp $
  Postgres documentation
  -->
  
@@ -15,15 +15,15 @@ Postgres documentation
     VACUUM
    </refname>
    <refpurpose>
-   Clean and analyze a <productname>Postgres</productname> database
+   Clean and optionally analyze a <productname>Postgres</productname> database
    </refpurpose>
   </refnamediv>
   <refsynopsisdiv>
    <refsynopsisdivinfo>
-   <date>1999-07-20</date>
+   <date>2001-05-04</date>
    </refsynopsisdivinfo>
    <synopsis>
-VACUUM [ VERBOSE ] [ ANALYZE ] [ <replaceable class="PARAMETER">table</replaceable> ]
+VACUUM [ VERBOSE ] [ <replaceable class="PARAMETER">table</replaceable> ]
  VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable> [ (<replaceable class="PARAMETER">column</replaceable> [, ...] ) ] ]
    </synopsis>
  
@@ -49,7 +49,7 @@ VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable>
        <term>ANALYZE</term>
        <listitem>
         <para>
-       Updates column statistics used by the optimizer to
+       Updates statistics used by the optimizer to
         determine the most efficient way to execute a query.
         </para>
        </listitem>
@@ -90,7 +90,7 @@ VACUUM [ VERBOSE ] ANALYZE [ <replaceable class="PARAMETER">table</replaceable>
         </computeroutput></term>
        <listitem>
         <para>
-       The command has been accepted and the database is being cleaned.
+       The command is complete.
         </para>
        </listitem>
       </varlistentry>
@@ -144,28 +144,26 @@ NOTICE:  Index <replaceable class="PARAMETER">index</replaceable>: Pages 28;
     Description
    </title>
    <para>
-   <command>VACUUM</command> serves two purposes in 
-   <productname>Postgres</productname> as both a means to reclaim storage and
-   also a means to collect information for the optimizer.
+   <command>VACUUM</command> reclaims storage occupied by deleted tuples.
+   In normal <productname>Postgres</productname> operation, tuples that
+   are DELETEd or obsoleted by UPDATE are not physically removed from
+   their table; they remain present until a <command>VACUUM</command> is
+   done.  Therefore it's necessary to do <command>VACUUM</command>
+   periodically, especially on frequently-updated tables.
    </para>
  
    <para>
-   <command>VACUUM</command> opens every table in the database,
-   cleans out records from rolled back transactions, and updates statistics in the
-   system catalogs.  The statistics maintained include the number of
-   tuples and number of pages stored in all tables.
-  </para>
-
-
-  <para>
-   <command>VACUUM ANALYZE</command> collects statistics representing the
-   dispersion of the data in each column.
-   This information is valuable when several query execution paths are possible.
+   With no parameter, <command>VACUUM</command> processes every table in the
+   current database.  With a parameter, <command>VACUUM</command> processes
+   only that table.
    </para>
  
    <para>
-   Running <command>VACUUM</command>
-   periodically will increase the speed of the database in processing user queries.
+   <command>VACUUM ANALYZE</command> performs a <command>VACUUM</command>
+   and then an <command>ANALYZE</command> for each selected table.  This
+   is a handy combination form for routine maintenance scripts.  See
+   <xref linkend="sql-analyze" endterm="sql-analyze-title">
+   for more details about its processing.
    </para>
  
    <refsect2 id="R2-SQL-VACUUM-3">
@@ -175,16 +173,15 @@ NOTICE:  Index <replaceable class="PARAMETER">index</replaceable>: Pages 28;
     <title>
      Notes
     </title>
-   <para>
-    The open database is the target for <command>VACUUM</command>.
-   </para>
+
     <para>
      We recommend that active production databases be
      <command>VACUUM</command>-ed nightly, in order to remove
      expired rows. After copying a large table into
      <productname>Postgres</productname> or after deleting a large number
      of records, it may be a good idea to issue a <command>VACUUM
-    ANALYZE</command> query. This will update the system catalogs with
+    ANALYZE</command> command for the affected table. This will update the
+    system catalogs with
      the results of all recent changes, and allow the
      <productname>Postgres</productname> query optimizer to make better
      choices in planning user queries.
diff --git a/doc/src/sgml/reference.sgml b/doc/src/sgml/reference.sgml

index b92ee08..9a977a6 100644 (file)
--- a/doc/src/sgml/reference.sgml
+++ b/doc/src/sgml/reference.sgml
@@ -1,5 +1,5 @@
  <!-- reference.sgml
-$Header: /cvsroot/pgsql/doc/src/sgml/reference.sgml,v 1.15 2001/03/24 13:21:14 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/reference.sgml,v 1.16 2001/05/07 00:43:14 tgl Exp $
  
  PostgreSQL Reference Manual
  -->
@@ -26,6 +26,7 @@ PostgreSQL Reference Manual
     &alterGroup;
     &alterTable;
     &alterUser;
+   &analyze;
     &begin;
     &checkpoint;
     &close;
diff --git a/doc/src/sgml/xoper.sgml b/doc/src/sgml/xoper.sgml

index d38e78a..57d8bb7 100644 (file)
--- a/doc/src/sgml/xoper.sgml
+++ b/doc/src/sgml/xoper.sgml
@@ -1,5 +1,5 @@
  <!--
-$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.11 2000/09/29 20:21:34 petere Exp $
+$Header: /cvsroot/pgsql/doc/src/sgml/xoper.sgml,v 1.12 2001/05/07 00:43:14 tgl Exp $
  -->
  
   <Chapter Id="xoper">
@@ -244,7 +244,7 @@ SELECT (a + b) AS c FROM test_complex;
      only a small fraction.  '&lt;' will accept a fraction that depends on
      where the given constant falls in the range of values for that table
      column (which, it just so happens, is information collected by
-    VACUUM ANALYZE and made available to the selectivity estimator).
+    <command>ANALYZE</command> and made available to the selectivity estimator).
      '&lt;=' will accept a slightly larger fraction than '&lt;' for the same
      comparison constant, but they're close enough to not be worth
      distinguishing, especially since we're not likely to do better than a
diff --git a/src/backend/access/common/tupdesc.c b/src/backend/access/common/tupdesc.c

index 769f754..86d704e 100644 (file)
--- a/src/backend/access/common/tupdesc.c
+++ b/src/backend/access/common/tupdesc.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.73 2001/03/22 06:16:06 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/common/tupdesc.c,v 1.74 2001/05/07 00:43:15 tgl Exp $
   *
   * NOTES
   *       some of the executor utility code such as "ExecTypeFromTL" should be
@@ -237,16 +237,16 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
                 Form_pg_attribute attr2 = tupdesc2->attrs[i];
  
                 /*
-                * We do not need to check every single field here, and in fact
-                * some fields such as attdispersion probably shouldn't be
-                * compared.  We can also disregard attnum (it was used to place
-                * the row in the attrs array) and everything derived from the
-                * column datatype.
+                * We do not need to check every single field here: we can disregard
+                * attrelid, attnum (it was used to place the row in the attrs array)
+                * and everything derived from the column datatype.
                  */
                 if (strcmp(NameStr(attr1->attname), NameStr(attr2->attname)) != 0)
                         return false;
                 if (attr1->atttypid != attr2->atttypid)
                         return false;
+               if (attr1->attstattarget != attr2->attstattarget)
+                       return false;
                 if (attr1->atttypmod != attr2->atttypmod)
                         return false;
                 if (attr1->attstorage != attr2->attstorage)
@@ -365,12 +365,12 @@ TupleDescInitEntry(TupleDesc desc,
         else
                 MemSet(NameStr(att->attname), 0, NAMEDATALEN);
  
-       att->attdispersion = 0;         /* dummy value */
+       att->attstattarget = 0;
         att->attcacheoff = -1;
         att->atttypmod = typmod;
  
         att->attnum = attributeNumber;
-       att->attnelems = attdim;
+       att->attndims = attdim;
         att->attisset = attisset;
  
         att->attnotnull = false;
@@ -506,7 +506,7 @@ TupleDescMakeSelfReference(TupleDesc desc,
         att->attbyval = true;
         att->attalign = 'i';
         att->attstorage = 'p';
-       att->attnelems = 0;
+       att->attndims = 0;
  }
  
  /* ----------------------------------------------------------------
diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c

index 1c5577b..0601089 100644 (file)
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -6,7 +6,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.72 2001/03/22 03:59:12 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/gist/gist.c,v 1.73 2001/05/07 00:43:15 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -84,8 +84,8 @@ static void gist_dumptree(Relation r, int level, BlockNumber blk, OffsetNumber c
  #endif
  
  /*
-** routine to build an index.  Basically calls insert over and over
-*/
+ * routine to build an index.  Basically calls insert over and over
+ */
  Datum
  gistbuild(PG_FUNCTION_ARGS)
  {
@@ -105,7 +105,7 @@ gistbuild(PG_FUNCTION_ARGS)
                                 itupdesc;
         Datum           attdata[INDEX_MAX_KEYS];
         char            nulls[INDEX_MAX_KEYS];
-       int                     nhtups,
+       double          nhtups,
                                 nitups;
         Node       *pred = indexInfo->ii_Predicate;
  
@@ -172,7 +172,7 @@ gistbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
         /* build the index */
-       nhtups = nitups = 0;
+       nhtups = nitups = 0.0;
  
         compvec = (bool *) palloc(sizeof(bool) * indexInfo->ii_NumIndexAttrs);
  
@@ -183,7 +183,7 @@ gistbuild(PG_FUNCTION_ARGS)
         {
                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-               nhtups++;
+               nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -196,7 +196,7 @@ gistbuild(PG_FUNCTION_ARGS)
                         slot->val = htup;
                         if (ExecQual((List *) oldPred, econtext, false))
                         {
-                               nitups++;
+                               nitups += 1.0;
                                 continue;
                         }
                 }
@@ -213,7 +213,7 @@ gistbuild(PG_FUNCTION_ARGS)
                 }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-               nitups++;
+               nitups += 1.0;
  
                 /*
                  * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/hash/hash.c b/src/backend/access/hash/hash.c

index aa76ba2..9617fcc 100644 (file)
--- a/src/backend/access/hash/hash.c
+++ b/src/backend/access/hash/hash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.50 2001/03/22 03:59:12 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/hash/hash.c,v 1.51 2001/05/07 00:43:15 tgl Exp $
   *
   * NOTES
   *       This file contains only the public interface routines.
@@ -57,7 +57,7 @@ hashbuild(PG_FUNCTION_ARGS)
                                 itupdesc;
         Datum           attdata[INDEX_MAX_KEYS];
         char            nulls[INDEX_MAX_KEYS];
-       int                     nhtups,
+       double          nhtups,
                                 nitups;
         HashItem        hitem;
         Node       *pred = indexInfo->ii_Predicate;
@@ -109,7 +109,7 @@ hashbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
         /* build the index */
-       nhtups = nitups = 0;
+       nhtups = nitups = 0.0;
  
         /* start a heap scan */
         hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -118,7 +118,7 @@ hashbuild(PG_FUNCTION_ARGS)
         {
                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-               nhtups++;
+               nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -131,7 +131,7 @@ hashbuild(PG_FUNCTION_ARGS)
                         slot->val = htup;
                         if (ExecQual((List *) oldPred, econtext, false))
                         {
-                               nitups++;
+                               nitups += 1.0;
                                 continue;
                         }
                 }
@@ -148,7 +148,7 @@ hashbuild(PG_FUNCTION_ARGS)
                 }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-               nitups++;
+               nitups += 1.0;
  
                 /*
                  * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/heap/tuptoaster.c b/src/backend/access/heap/tuptoaster.c

index fb509ab..2a9df57 100644 (file)
--- a/src/backend/access/heap/tuptoaster.c
+++ b/src/backend/access/heap/tuptoaster.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.21 2001/03/25 00:45:20 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/heap/tuptoaster.c,v 1.22 2001/05/07 00:43:15 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -167,6 +167,43 @@ heap_tuple_untoast_attr(varattrib *attr)
  
  
  /* ----------
+ * toast_raw_datum_size -
+ *
+ *     Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+Size
+toast_raw_datum_size(Datum value)
+{
+       varattrib  *attr = (varattrib *) DatumGetPointer(value);
+       Size            result;
+
+       if (VARATT_IS_COMPRESSED(attr))
+       {
+               /*
+                * va_rawsize shows the original data size, whether the datum
+                * is external or not.
+                */
+               result = attr->va_content.va_compressed.va_rawsize + VARHDRSZ;
+       }
+       else if (VARATT_IS_EXTERNAL(attr))
+       {
+               /*
+                * an uncompressed external attribute has rawsize including the
+                * header (not too consistent!)
+                */
+               result = attr->va_content.va_external.va_rawsize;
+       }
+       else
+       {
+               /* plain untoasted datum */
+               result = VARSIZE(attr);
+       }
+       return result;
+}
+
+
+/* ----------
   * toast_delete -
   *
   *     Cascaded delete toast-entries on DELETE
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c

index 97d99da..f456e0c 100644 (file)
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -12,7 +12,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.79 2001/03/22 03:59:15 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtree.c,v 1.80 2001/05/07 00:43:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -69,7 +69,7 @@ btbuild(PG_FUNCTION_ARGS)
                                 itupdesc;
         Datum           attdata[INDEX_MAX_KEYS];
         char            nulls[INDEX_MAX_KEYS];
-       int                     nhtups,
+       double          nhtups,
                                 nitups;
         Node       *pred = indexInfo->ii_Predicate;
  
@@ -156,7 +156,7 @@ btbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
         /* build the index */
-       nhtups = nitups = 0;
+       nhtups = nitups = 0.0;
  
         if (usefast)
         {
@@ -196,7 +196,7 @@ btbuild(PG_FUNCTION_ARGS)
  
                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-               nhtups++;
+               nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -209,7 +209,7 @@ btbuild(PG_FUNCTION_ARGS)
                         slot->val = htup;
                         if (ExecQual((List *) oldPred, econtext, false))
                         {
-                               nitups++;
+                               nitups += 1.0;
                                 continue;
                         }
                 }
@@ -226,7 +226,7 @@ btbuild(PG_FUNCTION_ARGS)
                 }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-               nitups++;
+               nitups += 1.0;
  
                 /*
                  * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/access/rtree/rtree.c b/src/backend/access/rtree/rtree.c

index 3752a59..a8c6a13 100644 (file)
--- a/src/backend/access/rtree/rtree.c
+++ b/src/backend/access/rtree/rtree.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.62 2001/05/07 00:43:16 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -100,7 +100,7 @@ rtbuild(PG_FUNCTION_ARGS)
                                 itupdesc;
         Datum           attdata[INDEX_MAX_KEYS];
         char            nulls[INDEX_MAX_KEYS];
-       int                     nhtups,
+       double          nhtups,
                                 nitups;
         Node       *pred = indexInfo->ii_Predicate;
  
@@ -163,7 +163,7 @@ rtbuild(PG_FUNCTION_ARGS)
  #endif  /* OMIT_PARTIAL_INDEX */
  
         /* count the tuples as we insert them */
-       nhtups = nitups = 0;
+       nhtups = nitups = 0.0;
  
         /* start a heap scan */
         hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);
@@ -172,7 +172,7 @@ rtbuild(PG_FUNCTION_ARGS)
         {
                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-               nhtups++;
+               nhtups += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -185,7 +185,7 @@ rtbuild(PG_FUNCTION_ARGS)
                         slot->val = htup;
                         if (ExecQual((List *) oldPred, econtext, false))
                         {
-                               nitups++;
+                               nitups += 1.0;
                                 continue;
                         }
                 }
@@ -202,7 +202,7 @@ rtbuild(PG_FUNCTION_ARGS)
                 }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-               nitups++;
+               nitups += 1.0;
  
                 /*
                  * For the current heap tuple, extract all the attributes we use
diff --git a/src/backend/catalog/genbki.sh b/src/backend/catalog/genbki.sh

index c2993fa..cac53f3 100644 (file)
--- a/src/backend/catalog/genbki.sh
+++ b/src/backend/catalog/genbki.sh
@@ -10,7 +10,7 @@
  #
  #
  # IDENTIFICATION
-#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.19 2001/01/16 22:48:34 tgl Exp $
+#    $Header: /cvsroot/pgsql/src/backend/catalog/Attic/genbki.sh,v 1.20 2001/05/07 00:43:16 tgl Exp $
  #
  # NOTES
  #    non-essential whitespace is removed from the generated file.
@@ -126,10 +126,12 @@ for dir in $INCLUDE_DIRS; do
      fi
  done
  
-# Get INDEX_MAX_KEYS from config.h (who needs consistency?)
+# Get INDEX_MAX_KEYS and DEFAULT_ATTSTATTARGET from config.h
+# (who needs consistency?)
  for dir in $INCLUDE_DIRS; do
      if [ -f "$dir/config.h" ]; then
          INDEXMAXKEYS=`grep '#define[   ]*INDEX_MAX_KEYS' $dir/config.h | $AWK '{ print $3 }'`
+        DEFAULTATTSTATTARGET=`grep '#define[   ]*DEFAULT_ATTSTATTARGET' $dir/config.h | $AWK '{ print $3 }'`
          break
      fi
  done
@@ -168,6 +170,7 @@ sed -e "s/;[        ]*$//g" \
      -e "s/(NameData/(name/g" \
      -e "s/(Oid/(oid/g" \
      -e "s/NAMEDATALEN/$NAMEDATALEN/g" \
+    -e "s/DEFAULT_ATTSTATTARGET/$DEFAULTATTSTATTARGET/g" \
      -e "s/INDEX_MAX_KEYS\*2/$INDEXMAXKEYS2/g" \
      -e "s/INDEX_MAX_KEYS\*4/$INDEXMAXKEYS4/g" \
      -e "s/INDEX_MAX_KEYS/$INDEXMAXKEYS/g" \
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c

index 54867d5..03f16e1 100644 (file)
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.162 2001/03/22 06:16:10 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/heap.c,v 1.163 2001/05/07 00:43:17 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -96,54 +96,72 @@ static void RemoveStatistics(Relation rel);
  
  /*
   * Note:
- *             Should the executor special case these attributes in the future?
- *             Advantage:      consume 1/2 the space in the ATTRIBUTE relation.
- *             Disadvantage:  having rules to compute values in these tuples may
- *                             be more difficult if not impossible.
+ *             Should the system special case these attributes in the future?
+ *             Advantage:      consume much less space in the ATTRIBUTE relation.
+ *             Disadvantage:  special cases will be all over the place.
   */
  
  static FormData_pg_attribute a1 = {
-       0xffffffff, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
-       SelfItemPointerAttributeNumber, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'
+       0, {"ctid"}, TIDOID, 0, sizeof(ItemPointerData),
+       SelfItemPointerAttributeNumber, 0, -1, -1,
+       false, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a2 = {
-       0xffffffff, {"oid"}, OIDOID, 0, sizeof(Oid),
-       ObjectIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"oid"}, OIDOID, 0, sizeof(Oid),
+       ObjectIdAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a3 = {
-       0xffffffff, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
-       MinTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"xmin"}, XIDOID, 0, sizeof(TransactionId),
+       MinTransactionIdAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a4 = {
-       0xffffffff, {"cmin"}, CIDOID, 0, sizeof(CommandId),
-       MinCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"cmin"}, CIDOID, 0, sizeof(CommandId),
+       MinCommandIdAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a5 = {
-       0xffffffff, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
-       MaxTransactionIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"xmax"}, XIDOID, 0, sizeof(TransactionId),
+       MaxTransactionIdAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
  static FormData_pg_attribute a6 = {
-       0xffffffff, {"cmax"}, CIDOID, 0, sizeof(CommandId),
-       MaxCommandIdAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"cmax"}, CIDOID, 0, sizeof(CommandId),
+       MaxCommandIdAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
  /*
-   We decide to call this attribute "tableoid" rather than say
-"classoid" on the basis that in the future there may be more than one
-table of a particular class/type. In any case table is still the word
-used in SQL.
-*/
+ * We decided to call this attribute "tableoid" rather than say
+ * "classoid" on the basis that in the future there may be more than one
+ * table of a particular class/type. In any case table is still the word
+ * used in SQL.
+ */
  static FormData_pg_attribute a7 = {
-       0xffffffff, {"tableoid"}, OIDOID, 0, sizeof(Oid),
-       TableOidAttributeNumber, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'
+       0, {"tableoid"}, OIDOID, 0, sizeof(Oid),
+       TableOidAttributeNumber, 0, -1, -1,
+       true, 'p', false, 'i', false, false
  };
  
-static Form_pg_attribute HeapAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+static Form_pg_attribute SysAtt[] = {&a1, &a2, &a3, &a4, &a5, &a6, &a7};
+
+/*
+ * This function returns a Form_pg_attribute pointer for a system attribute.
+ */
+Form_pg_attribute
+SystemAttributeDefinition(AttrNumber attno)
+{
+       if (attno >= 0 || attno < - (int) lengthof(SysAtt))
+               elog(ERROR, "SystemAttributeDefinition: invalid attribute number %d",
+                        attno);
+       return SysAtt[-attno - 1];
+}
  
  /* ----------------------------------------------------------------
   *                             XXX END OF UGLY HARD CODED BADNESS XXX
@@ -380,32 +398,6 @@ heap_storage_create(Relation rel)
   *             8) the relations are closed and the new relation's oid
   *                is returned.
   *
- * old comments:
- *             A new relation is inserted into the RELATION relation
- *             with the specified attribute(s) (newly inserted into
- *             the ATTRIBUTE relation).  How does concurrency control
- *             work?  Is it automatic now?  Expects the caller to have
- *             attname, atttypid, atttyparg, attproc, and attlen domains filled.
- *             Create fills the attnum domains sequentually from zero,
- *             fills the attdispersion domains with zeros, and fills the
- *             attrelid fields with the relid.
- *
- *             scan relation catalog for name conflict
- *             scan type catalog for typids (if not arg)
- *             create and insert attribute(s) into attribute catalog
- *             create new relation
- *             insert new relation into attribute catalog
- *
- *             Should coordinate with heap_create_with_catalog(). Either
- *             it should not be called or there should be a way to prevent
- *             the relation from being removed at the end of the
- *             transaction if it is successful ('u'/'r' may be enough).
- *             Also, if the transaction does not commit, then the
- *             relation should be removed.
- *
- *             XXX amcreate ignores "off" when inserting (for now).
- *             XXX amcreate (like the other utilities) needs to understand indexes.
- *
   * ----------------------------------------------------------------
   */
  
@@ -432,14 +424,14 @@ CheckAttributeNames(TupleDesc tupdesc)
          */
         for (i = 0; i < natts; i++)
         {
-               for (j = 0; j < (int) (sizeof(HeapAtt) / sizeof(HeapAtt[0])); j++)
+               for (j = 0; j < (int) lengthof(SysAtt); j++)
                 {
-                       if (strcmp(NameStr(HeapAtt[j]->attname),
+                       if (strcmp(NameStr(SysAtt[j]->attname),
                                            NameStr(tupdesc->attrs[i]->attname)) == 0)
                         {
                                 elog(ERROR, "Attribute '%s' has a name conflict"
                                          "\n\tName matches an existing system attribute",
-                                        NameStr(HeapAtt[j]->attname));
+                                        NameStr(SysAtt[j]->attname));
                         }
                 }
                 if (tupdesc->attrs[i]->atttypid == UNKNOWNOID)
@@ -574,7 +566,7 @@ AddNewAttributeTuples(Oid new_rel_oid,
                 /* Fill in the correct relation OID */
                 (*dpp)->attrelid = new_rel_oid;
                 /* Make sure these are OK, too */
-               (*dpp)->attdispersion = 0;
+               (*dpp)->attstattarget = DEFAULT_ATTSTATTARGET;
                 (*dpp)->attcacheoff = -1;
  
                 tup = heap_addheader(Natts_pg_attribute,
@@ -593,14 +585,14 @@ AddNewAttributeTuples(Oid new_rel_oid,
         /*
          * next we add the system attributes..
          */
-       dpp = HeapAtt;
+       dpp = SysAtt;
         for (i = 0; i < -1 - FirstLowInvalidHeapAttributeNumber; i++)
         {
                 /* Fill in the correct relation OID */
                 /* HACK: we are writing on static data here */
                 (*dpp)->attrelid = new_rel_oid;
                 /* Unneeded since they should be OK in the constant data anyway */
-               /* (*dpp)->attdispersion = 0; */
+               /* (*dpp)->attstattarget = 0; */
                 /* (*dpp)->attcacheoff = -1; */
  
                 tup = heap_addheader(Natts_pg_attribute,
@@ -669,8 +661,23 @@ AddNewRelationTuple(Relation pg_class_desc,
          * save. (NOTE: CREATE INDEX inserts the same bogus estimates if it
          * finds the relation has 0 rows and pages. See index.c.)
          */
-       new_rel_reltup->relpages = 10;          /* bogus estimates */
-       new_rel_reltup->reltuples = 1000;
+       switch (relkind)
+       {
+               case RELKIND_RELATION:
+               case RELKIND_INDEX:
+               case RELKIND_TOASTVALUE:
+                       new_rel_reltup->relpages = 10;  /* bogus estimates */
+                       new_rel_reltup->reltuples = 1000;
+                       break;
+               case RELKIND_SEQUENCE:
+                       new_rel_reltup->relpages = 1;
+                       new_rel_reltup->reltuples = 1;
+                       break;
+               default:                                /* views, etc */
+                       new_rel_reltup->relpages = 0;
+                       new_rel_reltup->reltuples = 0;
+                       break;
+       }
  
         new_rel_reltup->relowner = GetUserId();
         new_rel_reltup->reltype = new_type_oid;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c

index 2adb30e..5eefab1 100644 (file)
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.145 2001/04/02 14:34:25 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/catalog/index.c,v 1.146 2001/05/07 00:43:17 tgl Exp $
   *
   *
   * INTERFACE ROUTINES
@@ -55,7 +55,7 @@
   */
  #define AVG_ATTR_SIZE 8
  #define NTUPLES_PER_PAGE(natts) \
-       ((BLCKSZ - MAXALIGN(sizeof (PageHeaderData))) / \
+       ((BLCKSZ - MAXALIGN(sizeof(PageHeaderData))) / \
         ((natts) * AVG_ATTR_SIZE + MAXALIGN(sizeof(HeapTupleHeaderData))))
  
  /* non-export function prototypes */
@@ -99,39 +99,6 @@ IsReindexProcessing(void)
  }
  
  /* ----------------------------------------------------------------
- *       sysatts is a structure containing attribute tuple forms
- *       for system attributes (numbered -1, -2, ...).  This really
- *       should be generated or eliminated or moved elsewhere. -cim 1/19/91
- *
- * typedef struct FormData_pg_attribute {
- *             Oid                             attrelid;
- *             NameData                attname;
- *             Oid                             atttypid;
- *             uint32                  attnvals;
- *             int16                   attlen;
- *             AttrNumber              attnum;
- *             uint32                  attnelems;
- *             int32                   attcacheoff;
- *             int32                   atttypmod;
- *             bool                    attbyval;
- *             bool                    attisset;
- *             char                    attalign;
- *             bool                    attnotnull;
- *             bool                    atthasdef;
- * } FormData_pg_attribute;
- *
- * ----------------------------------------------------------------
- */
-static FormData_pg_attribute sysatts[] = {
-       {0, {"ctid"}, TIDOID, 0, 6, -1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0'},
-       {0, {"oid"}, OIDOID, 0, 4, -2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-       {0, {"xmin"}, XIDOID, 0, 4, -3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-       {0, {"cmin"}, CIDOID, 0, 4, -4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-       {0, {"xmax"}, XIDOID, 0, 4, -5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-       {0, {"cmax"}, CIDOID, 0, 4, -6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0'},
-};
-
-/* ----------------------------------------------------------------
   *             GetHeapRelationOid
   * ----------------------------------------------------------------
   */
@@ -250,7 +217,6 @@ ConstructTupleDescriptor(Relation heapRelation,
         for (i = 0; i < numatts; i++)
         {
                 AttrNumber      atnum;          /* attributeNumber[attributeOffset] */
-               AttrNumber      atind;
                 Form_pg_attribute from;
                 Form_pg_attribute to;
  
@@ -264,16 +230,9 @@ ConstructTupleDescriptor(Relation heapRelation,
                 {
  
                         /*
-                        * here we are indexing on a system attribute (-1...-n) so we
-                        * convert atnum into a usable index 0...n-1 so we can use it
-                        * to dereference the array sysatts[] which stores tuple
-                        * descriptor information for system attributes.
+                        * here we are indexing on a system attribute (-1...-n)
                          */
-                       if (atnum <= FirstLowInvalidHeapAttributeNumber || atnum >= 0)
-                               elog(ERROR, "Cannot create index on system attribute: attribute number out of range (%d)", atnum);
-                       atind = (-atnum) - 1;
-
-                       from = &sysatts[atind];
+                       from = SystemAttributeDefinition(atnum);
                 }
                 else
                 {
@@ -284,9 +243,8 @@ ConstructTupleDescriptor(Relation heapRelation,
                         if (atnum > natts)
                                 elog(ERROR, "Cannot create index: attribute %d does not exist",
                                          atnum);
-                       atind = AttrNumberGetAttrOffset(atnum);
  
-                       from = heapTupDesc->attrs[atind];
+                       from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
                 }
  
                 /*
@@ -303,10 +261,10 @@ ConstructTupleDescriptor(Relation heapRelation,
                  */
                 to->attnum = i + 1;
  
-               to->attdispersion = 0.0;
+               to->attstattarget = 0;
+               to->attcacheoff = -1;
                 to->attnotnull = false;
                 to->atthasdef = false;
-               to->attcacheoff = -1;
  
                 /*
                  * We do not yet have the correct relation OID for the index, so
@@ -1542,10 +1500,14 @@ setNewRelfilenode(Relation relation)
  
  /* ----------------
   *             UpdateStats
+ *
+ * Update pg_class' relpages and reltuples statistics for the given relation
+ * (which can be either a table or an index).  Note that this is not used
+ * in the context of VACUUM.
   * ----------------
   */
  void
-UpdateStats(Oid relid, long reltuples)
+UpdateStats(Oid relid, double reltuples)
  {
         Relation        whichRel;
         Relation        pg_class;
@@ -1636,6 +1598,10 @@ UpdateStats(Oid relid, long reltuples)
          * with zero size statistics until a VACUUM is done.  The optimizer
          * will generate very bad plans if the stats claim the table is empty
          * when it is actually sizable.  See also CREATE TABLE in heap.c.
+        *
+        * Note: this path is also taken during bootstrap, because bootstrap.c
+        * passes reltuples = 0 after loading a table.  We have to estimate some
+        * number for reltuples based on the actual number of pages.
          */
         relpages = RelationGetNumberOfBlocks(whichRel);
  
@@ -1689,15 +1655,15 @@ UpdateStats(Oid relid, long reltuples)
  
                 for (i = 0; i < Natts_pg_class; i++)
                 {
-                       nulls[i] = heap_attisnull(tuple, i + 1) ? 'n' : ' ';
+                       nulls[i] = ' ';
                         replace[i] = ' ';
                         values[i] = (Datum) NULL;
                 }
  
                 replace[Anum_pg_class_relpages - 1] = 'r';
-               values[Anum_pg_class_relpages - 1] = (Datum) relpages;
+               values[Anum_pg_class_relpages - 1] = Int32GetDatum(relpages);
                 replace[Anum_pg_class_reltuples - 1] = 'r';
-               values[Anum_pg_class_reltuples - 1] = (Datum) reltuples;
+               values[Anum_pg_class_reltuples - 1] = Float4GetDatum((float4) reltuples);
                 newtup = heap_modifytuple(tuple, pg_class, values, nulls, replace);
                 simple_heap_update(pg_class, &tuple->t_self, newtup);
                 if (!IsIgnoringSystemIndexes())
@@ -1741,7 +1707,7 @@ DefaultBuild(Relation heapRelation,
         TupleDesc       heapDescriptor;
         Datum           datum[INDEX_MAX_KEYS];
         char            nullv[INDEX_MAX_KEYS];
-       long            reltuples,
+       double          reltuples,
                                 indtuples;
         Node       *predicate = indexInfo->ii_Predicate;
  
@@ -1796,7 +1762,7 @@ DefaultBuild(Relation heapRelation,
                                                   0,    /* number of keys */
                                                   (ScanKey) NULL);              /* scan key */
  
-       reltuples = indtuples = 0;
+       reltuples = indtuples = 0.0;
  
         /*
          * for each tuple in the base relation, we create an index tuple and
@@ -1808,7 +1774,7 @@ DefaultBuild(Relation heapRelation,
         {
                 MemoryContextReset(econtext->ecxt_per_tuple_memory);
  
-               reltuples++;
+               reltuples += 1.0;
  
  #ifndef OMIT_PARTIAL_INDEX
  
@@ -1821,7 +1787,7 @@ DefaultBuild(Relation heapRelation,
                         slot->val = heapTuple;
                         if (ExecQual((List *) oldPred, econtext, false))
                         {
-                               indtuples++;
+                               indtuples += 1.0;
                                 continue;
                         }
                 }
@@ -1838,7 +1804,7 @@ DefaultBuild(Relation heapRelation,
                 }
  #endif  /* OMIT_PARTIAL_INDEX */
  
-               indtuples++;
+               indtuples += 1.0;
  
                 /*
                  * FormIndexDatum fills in its datum and null parameters with
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c

index 88e5686..24cc7a8 100644 (file)
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -8,19 +8,16 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.16 2001/03/22 06:16:11 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/analyze.c,v 1.17 2001/05/07 00:43:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #include "postgres.h"
  
-#include <sys/types.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <unistd.h>
+#include <math.h>
  
  #include "access/heapam.h"
+#include "access/tuptoaster.h"
  #include "catalog/catname.h"
  #include "catalog/indexing.h"
  #include "catalog/pg_operator.h"
@@ -29,43 +26,139 @@
  #include "commands/vacuum.h"
  #include "miscadmin.h"
  #include "parser/parse_oper.h"
-#include "tcop/tcopprot.h"
  #include "utils/acl.h"
  #include "utils/builtins.h"
+#include "utils/datum.h"
  #include "utils/fmgroids.h"
-#include "utils/inval.h"
  #include "utils/syscache.h"
+#include "utils/tuplesort.h"
  
-#define swapLong(a,b)  {long tmp; tmp=a; a=b; b=tmp;}
-#define swapInt(a,b)   {int tmp; tmp=a; a=b; b=tmp;}
-#define swapDatum(a,b) {Datum tmp; tmp=a; a=b; b=tmp;}
-#define VacAttrStatsEqValid(stats) ( stats->f_cmpeq.fn_addr != NULL )
-#define VacAttrStatsLtGtValid(stats) ( stats->f_cmplt.fn_addr != NULL && \
-                                                                  stats->f_cmpgt.fn_addr != NULL && \
-                                                                  RegProcedureIsValid(stats->outfunc) )
  
+/*
+ * Analysis algorithms supported
+ */
+typedef enum {
+       ALG_MINIMAL = 1,                        /* Compute only most-common-values */
+       ALG_SCALAR                                      /* Compute MCV, histogram, sort correlation */
+} AlgCode;
+
+/*
+ * To avoid consuming too much memory during analysis and/or too much space
+ * in the resulting pg_statistic rows, we ignore varlena datums that are wider
+ * than WIDTH_THRESHOLD (after detoasting!).  This is legitimate for MCV
+ * and distinct-value calculations since a wide value is unlikely to be
+ * duplicated at all, much less be a most-common value.  For the same reason,
+ * ignoring wide values will not affect our estimates of histogram bin
+ * boundaries very much.
+ */
+#define WIDTH_THRESHOLD  256
+
+/*
+ * We build one of these structs for each attribute (column) that is to be
+ * analyzed.  The struct and subsidiary data are in TransactionCommandContext,
+ * so they live until the end of the ANALYZE operation.
+ */
+typedef struct
+{
+       /* These fields are set up by examine_attribute */
+       int                     attnum;                 /* attribute number */
+       AlgCode         algcode;                /* Which algorithm to use for this column */
+       int                     minrows;                /* Minimum # of rows needed for stats */
+       Form_pg_attribute attr;         /* copy of pg_attribute row for column */
+       Form_pg_type attrtype;          /* copy of pg_type row for column */
+       Oid                     eqopr;                  /* '=' operator for datatype, if any */
+       Oid                     eqfunc;                 /* and associated function */
+       Oid                     ltopr;                  /* '<' operator for datatype, if any */
+
+       /* These fields are filled in by the actual statistics-gathering routine */
+       bool            stats_valid;
+       float4          stanullfrac;    /* fraction of entries that are NULL */
+       int4            stawidth;               /* average width */
+       float4          stadistinct;    /* # distinct values */
+       int2            stakind[STATISTIC_NUM_SLOTS];
+       Oid                     staop[STATISTIC_NUM_SLOTS];
+       int                     numnumbers[STATISTIC_NUM_SLOTS];
+       float4     *stanumbers[STATISTIC_NUM_SLOTS];
+       int                     numvalues[STATISTIC_NUM_SLOTS];
+       Datum      *stavalues[STATISTIC_NUM_SLOTS];
+} VacAttrStats;
+
+
+typedef struct
+{
+       Datum           value;                  /* a data value */
+       int                     tupno;                  /* position index for tuple it came from */
+} ScalarItem;
+
+typedef struct
+{
+       int                     count;                  /* # of duplicates */
+       int                     first;                  /* values[] index of first occurrence */
+} ScalarMCVItem;
+
+
+#define swapInt(a,b)   {int _tmp; _tmp=a; a=b; b=_tmp;}
+#define swapDatum(a,b) {Datum _tmp; _tmp=a; a=b; b=_tmp;}
  
-static void attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple);
-static void bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len);
-static void update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats);
-static void del_stats(Oid relid, int attcnt, int *attnums);
+
+static int MESSAGE_LEVEL;
+
+/* context information for compare_scalars() */
+static FmgrInfo *datumCmpFn;
+static SortFunctionKind datumCmpFnKind;
+static int *datumCmpTupnoLink;
+
+
+static VacAttrStats *examine_attribute(Relation onerel, int attnum);
+static int acquire_sample_rows(Relation onerel, HeapTuple *rows,
+                                                          int targrows, long *totalrows);
+static double random_fract(void);
+static double init_selection_state(int n);
+static long select_next_random_record(long t, int n, double *stateptr);
+static int compare_rows(const void *a, const void *b);
+static int compare_scalars(const void *a, const void *b);
+static int compare_mcvs(const void *a, const void *b);
+static OffsetNumber get_page_max_offset(Relation relation,
+                                                                               BlockNumber blocknumber);
+static void compute_minimal_stats(VacAttrStats *stats,
+                                                                 TupleDesc tupDesc, long totalrows,
+                                                                 HeapTuple *rows, int numrows);
+static void compute_scalar_stats(VacAttrStats *stats,
+                                                                TupleDesc tupDesc, long totalrows,
+                                                                HeapTuple *rows, int numrows);
+static void update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats);
  
  
  /*
- *     analyze_rel() -- analyze relation
+ *     analyze_rel() -- analyze one relation
   */
  void
-analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
+analyze_rel(Oid relid, VacuumStmt *vacstmt)
  {
-       HeapTuple       tuple;
         Relation        onerel;
-       int32           i;
-       int                     attr_cnt,
-                          *attnums = NULL;
         Form_pg_attribute *attr;
-       VacAttrStats *vacattrstats;
-       HeapScanDesc scan;
+       int                     attr_cnt,
+                               tcnt,
+                               i;
+       VacAttrStats **vacattrstats;
+       int                     targrows,
+                               numrows;
+       long            totalrows;
+       HeapTuple  *rows;
+       HeapTuple       tuple;
+
+       if (vacstmt->verbose)
+               MESSAGE_LEVEL = NOTICE;
+       else
+               MESSAGE_LEVEL = DEBUG;
  
+       /*
+        * Begin a transaction for analyzing this relation.
+        *
+        * Note: All memory allocated during ANALYZE will live in
+        * TransactionCommandContext or a subcontext thereof, so it will
+        * all be released by transaction commit at the end of this routine.
+        */
         StartTransactionCommand();
  
         /*
@@ -76,7 +169,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
  
         /*
          * Race condition -- if the pg_class tuple has gone away since the
-        * last time we saw it, we don't need to vacuum it.
+        * last time we saw it, we don't need to process it.
          */
         tuple = SearchSysCache(RELOID,
                                                    ObjectIdGetDatum(relid),
@@ -88,8 +181,7 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
         }
  
         /*
-        * We can VACUUM ANALYZE any table except pg_statistic. see
-        * update_relstats
+        * We can ANALYZE any table except pg_statistic. See update_attstats
          */
         if (strcmp(NameStr(((Form_pg_class) GETSTRUCT(tuple))->relname),
                            StatisticRelationName) == 0)
@@ -100,586 +192,1466 @@ analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL)
         }
         ReleaseSysCache(tuple);
  
+       /*
+        * Open the class, getting only a read lock on it, and check permissions
+        */
         onerel = heap_open(relid, AccessShareLock);
  
         if (!pg_ownercheck(GetUserId(), RelationGetRelationName(onerel),
                                            RELNAME))
         {
-
-               /*
-                * we already did an elog during vacuum elog(NOTICE, "Skipping
-                * \"%s\" --- only table owner can VACUUM it",
-                * RelationGetRelationName(onerel));
-                */
+               /* No need for a notice if we already complained during VACUUM */
+               if (!vacstmt->vacuum)
+                       elog(NOTICE, "Skipping \"%s\" --- only table owner can ANALYZE it",
+                                RelationGetRelationName(onerel));
                 heap_close(onerel, NoLock);
                 CommitTransactionCommand();
                 return;
         }
  
-       elog(MESSAGE_LEVEL, "Analyzing...");
+       elog(MESSAGE_LEVEL, "Analyzing %s", RelationGetRelationName(onerel));
  
-       attr_cnt = onerel->rd_att->natts;
+       /*
+        * Determine which columns to analyze
+        *
+        * Note that system attributes are never analyzed.
+        */
         attr = onerel->rd_att->attrs;
+       attr_cnt = onerel->rd_att->natts;
  
-       if (anal_cols2 != NIL)
+       if (vacstmt->va_cols != NIL)
         {
-               int                     tcnt = 0;
                 List       *le;
  
-               if (length(anal_cols2) > attr_cnt)
-                       elog(ERROR, "vacuum: too many attributes specified for relation %s",
-                                RelationGetRelationName(onerel));
-               attnums = (int *) palloc(attr_cnt * sizeof(int));
-               foreach(le, anal_cols2)
+               vacattrstats = (VacAttrStats **) palloc(length(vacstmt->va_cols) *
+                                                                                               sizeof(VacAttrStats *));
+               tcnt = 0;
+               foreach(le, vacstmt->va_cols)
                 {
-                       char       *col = (char *) lfirst(le);
+                       char       *col = strVal(lfirst(le));
  
                         for (i = 0; i < attr_cnt; i++)
                         {
                                 if (namestrcmp(&(attr[i]->attname), col) == 0)
                                         break;
                         }
-                       if (i < attr_cnt)       /* found */
-                               attnums[tcnt++] = i;
-                       else
-                       {
-                               elog(ERROR, "vacuum: there is no attribute %s in %s",
+                       if (i >= attr_cnt)
+                               elog(ERROR, "ANALYZE: there is no attribute %s in %s",
                                          col, RelationGetRelationName(onerel));
-                       }
+                       vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+                       if (vacattrstats[tcnt] != NULL)
+                               tcnt++;
+               }
+               attr_cnt = tcnt;
+       }
+       else
+       {
+               vacattrstats = (VacAttrStats **) palloc(attr_cnt *
+                                                                                               sizeof(VacAttrStats *));
+               tcnt = 0;
+               for (i = 0; i < attr_cnt; i++)
+               {
+                       vacattrstats[tcnt] = examine_attribute(onerel, i+1);
+                       if (vacattrstats[tcnt] != NULL)
+                               tcnt++;
                 }
                 attr_cnt = tcnt;
         }
  
-       vacattrstats = (VacAttrStats *) palloc(attr_cnt * sizeof(VacAttrStats));
+       /*
+        * Quit if no analyzable columns
+        */
+       if (attr_cnt <= 0)
+       {
+               heap_close(onerel, NoLock);
+               CommitTransactionCommand();
+               return;
+       }
  
+       /*
+        * Determine how many rows we need to sample, using the worst case
+        * from all analyzable columns.  We use a lower bound of 100 rows
+        * to avoid possible overflow in Vitter's algorithm.
+        */
+       targrows = 100;
         for (i = 0; i < attr_cnt; i++)
         {
-               Operator        func_operator;
-               VacAttrStats *stats;
-
-               stats = &vacattrstats[i];
-               stats->attr = palloc(ATTRIBUTE_TUPLE_SIZE);
-               memcpy(stats->attr, attr[((attnums) ? attnums[i] : i)],
-                          ATTRIBUTE_TUPLE_SIZE);
-               stats->best = stats->guess1 = stats->guess2 = 0;
-               stats->max = stats->min = 0;
-               stats->best_len = stats->guess1_len = stats->guess2_len = 0;
-               stats->max_len = stats->min_len = 0;
-               stats->initialized = false;
-               stats->best_cnt = stats->guess1_cnt = stats->guess1_hits = stats->guess2_hits = 0;
-               stats->max_cnt = stats->min_cnt = stats->null_cnt = stats->nonnull_cnt = 0;
-
-               func_operator = compatible_oper("=",
-                                                                               stats->attr->atttypid,
-                                                                               stats->attr->atttypid,
-                                                                               true);
-               if (func_operator != NULL)
-               {
-                       fmgr_info(oprfuncid(func_operator), &(stats->f_cmpeq));
-                       ReleaseSysCache(func_operator);
-               }
-               else
-                       stats->f_cmpeq.fn_addr = NULL;
+               if (targrows < vacattrstats[i]->minrows)
+                       targrows = vacattrstats[i]->minrows;
+       }
+
+       /*
+        * Acquire the sample rows
+        */
+       rows = (HeapTuple *) palloc(targrows * sizeof(HeapTuple));
+       numrows = acquire_sample_rows(onerel, rows, targrows, &totalrows);
  
-               func_operator = compatible_oper("<",
-                                                                               stats->attr->atttypid,
-                                                                               stats->attr->atttypid,
-                                                                               true);
-               if (func_operator != NULL)
+       /*
+        * If we are running a standalone ANALYZE, update pages/tuples stats
+        * in pg_class.  We have the accurate page count from heap_beginscan,
+        * but only an approximate number of tuples; therefore, if we are
+        * part of VACUUM ANALYZE do *not* overwrite the accurate count already
+        * inserted by VACUUM.
+        */
+       if (!vacstmt->vacuum)
+               vac_update_relstats(RelationGetRelid(onerel),
+                                                       onerel->rd_nblocks,
+                                                       (double) totalrows,
+                                                       RelationGetForm(onerel)->relhasindex);
+
+       /*
+        * Compute the statistics.  Temporary results during the calculations
+        * for each column are stored in a child context.  The calc routines
+        * are responsible to make sure that whatever they store into the
+        * VacAttrStats structure is allocated in TransactionCommandContext.
+        */
+       if (numrows > 0)
+       {
+               MemoryContext col_context,
+                                       old_context;
+
+               col_context = AllocSetContextCreate(CurrentMemoryContext,
+                                                                                       "Analyze Column",
+                                                                                       ALLOCSET_DEFAULT_MINSIZE,
+                                                                                       ALLOCSET_DEFAULT_INITSIZE,
+                                                                                       ALLOCSET_DEFAULT_MAXSIZE);
+               old_context = MemoryContextSwitchTo(col_context);
+               for (i = 0; i < attr_cnt; i++)
                 {
-                       fmgr_info(oprfuncid(func_operator), &(stats->f_cmplt));
-                       stats->op_cmplt = oprid(func_operator);
-                       ReleaseSysCache(func_operator);
+                       switch (vacattrstats[i]->algcode)
+                       {
+                               case ALG_MINIMAL:
+                                       compute_minimal_stats(vacattrstats[i],
+                                                                                 onerel->rd_att, totalrows,
+                                                                                 rows, numrows);
+                                       break;
+                               case ALG_SCALAR:
+                                       compute_scalar_stats(vacattrstats[i],
+                                                                                onerel->rd_att, totalrows,
+                                                                                rows, numrows);
+                                       break;
+                       }
+                       MemoryContextResetAndDeleteChildren(col_context);
                 }
-               else
+               MemoryContextSwitchTo(old_context);
+               MemoryContextDelete(col_context);
+
+               /*
+                * Emit the completed stats rows into pg_statistic, replacing any
+                * previous statistics for the target columns.  (If there are stats
+                * in pg_statistic for columns we didn't process, we leave them alone.)
+                */
+               update_attstats(relid, attr_cnt, vacattrstats);
+       }
+
+       /*
+        * Close source relation now, but keep lock so that no one deletes it
+        * before we commit.  (If someone did, they'd fail to clean up the
+        * entries we made in pg_statistic.)
+        */
+       heap_close(onerel, NoLock);
+
+       /* Commit and release working memory */
+       CommitTransactionCommand();
+}
+
+/*
+ * examine_attribute -- pre-analysis of a single column
+ *
+ * Determine whether the column is analyzable; if so, create and initialize
+ * a VacAttrStats struct for it.  If not, return NULL.
+ */
+static VacAttrStats *
+examine_attribute(Relation onerel, int attnum)
+{
+       Form_pg_attribute attr = onerel->rd_att->attrs[attnum-1];
+       Operator        func_operator;
+       Oid                     oprrest;
+       HeapTuple       typtuple;
+       Oid                     eqopr = InvalidOid;
+       Oid                     eqfunc = InvalidOid;
+       Oid                     ltopr = InvalidOid;
+       VacAttrStats *stats;
+
+       /* Don't analyze column if user has specified not to */
+       if (attr->attstattarget <= 0)
+               return NULL;
+
+       /* If column has no "=" operator, we can't do much of anything */
+       func_operator = compatible_oper("=",
+                                                                       attr->atttypid,
+                                                                       attr->atttypid,
+                                                                       true);
+       if (func_operator != NULL)
+       {
+               oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+               if (oprrest == F_EQSEL)
                 {
-                       stats->f_cmplt.fn_addr = NULL;
-                       stats->op_cmplt = InvalidOid;
+                       eqopr = oprid(func_operator);
+                       eqfunc = oprfuncid(func_operator);
                 }
+               ReleaseSysCache(func_operator);
+       }
+       if (!OidIsValid(eqfunc))
+               return NULL;
  
-               func_operator = compatible_oper(">",
-                                                                               stats->attr->atttypid,
-                                                                               stats->attr->atttypid,
-                                                                               true);
-               if (func_operator != NULL)
+       /*
+        * If we have "=" then we're at least able to do the minimal algorithm,
+        * so start filling in a VacAttrStats struct.
+        */
+       stats = (VacAttrStats *) palloc(sizeof(VacAttrStats));
+       MemSet(stats, 0, sizeof(VacAttrStats));
+       stats->attnum = attnum;
+       stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_TUPLE_SIZE);
+       memcpy(stats->attr, attr, ATTRIBUTE_TUPLE_SIZE);
+       typtuple = SearchSysCache(TYPEOID,
+                                                         ObjectIdGetDatum(attr->atttypid),
+                                                         0, 0, 0);
+       if (!HeapTupleIsValid(typtuple))
+               elog(ERROR, "cache lookup of type %u failed", attr->atttypid);
+       stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type));
+       memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type));
+       ReleaseSysCache(typtuple);
+       stats->eqopr = eqopr;
+       stats->eqfunc = eqfunc;
+
+       /* Is there a "<" operator with suitable semantics? */
+       func_operator = compatible_oper("<",
+                                                                       attr->atttypid,
+                                                                       attr->atttypid,
+                                                                       true);
+       if (func_operator != NULL)
+       {
+               oprrest = ((Form_pg_operator) GETSTRUCT(func_operator))->oprrest;
+               if (oprrest == F_SCALARLTSEL)
                 {
-                       fmgr_info(oprfuncid(func_operator), &(stats->f_cmpgt));
-                       ReleaseSysCache(func_operator);
+                       ltopr = oprid(func_operator);
                 }
-               else
-                       stats->f_cmpgt.fn_addr = NULL;
+               ReleaseSysCache(func_operator);
+       }
+       stats->ltopr = ltopr;
+
+       /*
+        * Determine the algorithm to use (this will get more complicated later)
+        */
+       if (OidIsValid(ltopr))
+       {
+               /* Seems to be a scalar datatype */
+               stats->algcode = ALG_SCALAR;
+               /*--------------------
+                * The following choice of minrows is based on the paper
+                * "Random sampling for histogram construction: how much is enough?"
+                * by Surajit Chaudhuri, Rajeev Motwani and Vivek Narasayya, in
+                * Proceedings of ACM SIGMOD International Conference on Management
+                * of Data, 1998, Pages 436-447.  Their Corollary 1 to Theorem 5
+                * says that for table size n, histogram size k, maximum relative
+                * error in bin size f, and error probability gamma, the minimum
+                * random sample size is
+                *              r = 4 * k * ln(2*n/gamma) / f^2
+                * Taking f = 0.5, gamma = 0.01, n = 1 million rows, we obtain
+                *              r = 305.82 * k
+                * Note that because of the log function, the dependence on n is
+                * quite weak; even at n = 1 billion, a 300*k sample gives <= 0.59
+                * bin size error with probability 0.99.  So there's no real need to
+                * scale for n, which is a good thing because we don't necessarily
+                * know it at this point.
+                *--------------------
+                */
+               stats->minrows = 300 * attr->attstattarget;
+       }
+       else
+       {
+               /* Can't do much but the minimal stuff */
+               stats->algcode = ALG_MINIMAL;
+               /* Might as well use the same minrows as above */
+               stats->minrows = 300 * attr->attstattarget;
+       }
+
+       return stats;
+}
  
-               tuple = SearchSysCache(TYPEOID,
-                                                          ObjectIdGetDatum(stats->attr->atttypid),
-                                                          0, 0, 0);
-               if (HeapTupleIsValid(tuple))
+/*
+ * acquire_sample_rows -- acquire a random sample of rows from the table
+ *
+ * Up to targrows rows are collected (if there are fewer than that many
+ * rows in the table, all rows are collected).  When the table is larger
+ * than targrows, a truly random sample is collected: every row has an
+ * equal chance of ending up in the final sample.
+ *
+ * We also estimate the total number of rows in the table, and return that
+ * into *totalrows.
+ *
+ * The returned list of tuples is in order by physical position in the table.
+ * (We will rely on this later to derive correlation estimates.)
+ */
+static int
+acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
+                                       long *totalrows)
+{
+       int                     numrows = 0;
+       HeapScanDesc scan;
+       HeapTuple       tuple;
+       ItemPointer     lasttuple;
+       BlockNumber     lastblock,
+                               estblock;
+       OffsetNumber lastoffset;
+       int                     numest;
+       double          tuplesperpage;
+       long            t;
+       double          rstate;
+
+       Assert(targrows > 1);
+       /*
+        * Do a simple linear scan until we reach the target number of rows.
+        */
+       scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
+       while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+               rows[numrows++] = heap_copytuple(tuple);
+               if (numrows >= targrows)
+                       break;
+       }
+       heap_endscan(scan);
+       /*
+        * If we ran out of tuples then we're done, no matter how few we 
+        * collected.  No sort is needed, since they're already in order.
+        */
+       if (!HeapTupleIsValid(tuple))
+       {
+               *totalrows = numrows;
+               return numrows;
+       }
+       /*
+        * Otherwise, start replacing tuples in the sample until we reach the
+        * end of the relation.  This algorithm is from Jeff Vitter's paper
+        * (see full citation below).  It works by repeatedly computing the number
+        * of the next tuple we want to fetch, which will replace a randomly
+        * chosen element of the reservoir (current set of tuples).  At all times
+        * the reservoir is a true random sample of the tuples we've passed over
+        * so far, so when we fall off the end of the relation we're done.
+        *
+        * A slight difficulty is that since we don't want to fetch tuples or even
+        * pages that we skip over, it's not possible to fetch *exactly* the N'th
+        * tuple at each step --- we don't know how many valid tuples are on
+        * the skipped pages.  We handle this by assuming that the average number
+        * of valid tuples/page on the pages already scanned over holds good for
+        * the rest of the relation as well; this lets us estimate which page
+        * the next tuple should be on and its position in the page.  Then we
+        * fetch the first valid tuple at or after that position, being careful
+        * not to use the same tuple twice.  This approach should still give a
+        * good random sample, although it's not perfect.
+        */
+       lasttuple = &(rows[numrows-1]->t_self);
+       lastblock = ItemPointerGetBlockNumber(lasttuple);
+       lastoffset = ItemPointerGetOffsetNumber(lasttuple);
+       /*
+        * If possible, estimate tuples/page using only completely-scanned pages.
+        */
+       for (numest = numrows; numest > 0; numest--)
+       {
+               if (ItemPointerGetBlockNumber(&(rows[numest-1]->t_self)) != lastblock)
+                       break;
+       }
+       if (numest == 0)
+       {
+               numest = numrows;               /* don't have a full page? */
+               estblock = lastblock + 1;
+       }
+       else
+       {
+               estblock = lastblock;
+       }
+       tuplesperpage = (double) numest / (double) estblock;
+
+       t = numrows;                            /* t is the # of records processed so far */
+       rstate = init_selection_state(targrows);
+       for (;;)
+       {
+               double                  targpos;
+               BlockNumber             targblock;
+               OffsetNumber    targoffset,
+                                               maxoffset;
+
+               t = select_next_random_record(t, targrows, &rstate);
+               /* Try to read the t'th record in the table */
+               targpos = (double) t / tuplesperpage;
+               targblock = (BlockNumber) targpos;
+               targoffset = ((int) (targpos - targblock) * tuplesperpage) + 
+                       FirstOffsetNumber;
+               /* Make sure we are past the last selected record */
+               if (targblock <= lastblock)
                 {
-                       stats->outfunc = ((Form_pg_type) GETSTRUCT(tuple))->typoutput;
-                       stats->typelem = ((Form_pg_type) GETSTRUCT(tuple))->typelem;
-                       ReleaseSysCache(tuple);
+                       targblock = lastblock;
+                       if (targoffset <= lastoffset)
+                               targoffset = lastoffset + 1;
                 }
-               else
+               /* Loop to find first valid record at or after given position */
+       pageloop:;
+               /*
+                * Have we fallen off the end of the relation?  (We rely on
+                * heap_beginscan to have updated rd_nblocks.)
+                */
+               if (targblock >= onerel->rd_nblocks)
+                       break;
+               maxoffset = get_page_max_offset(onerel, targblock);
+               for (;;)
                 {
-                       stats->outfunc = InvalidOid;
-                       stats->typelem = InvalidOid;
+                       HeapTupleData targtuple;
+                       Buffer          targbuffer;
+
+                       if (targoffset > maxoffset)
+                       {
+                               /* Fell off end of this page, try next */
+                               targblock++;
+                               targoffset = FirstOffsetNumber;
+                               goto pageloop;
+                       }
+                       ItemPointerSet(&targtuple.t_self, targblock, targoffset);
+                       heap_fetch(onerel, SnapshotNow, &targtuple, &targbuffer);
+                       if (targtuple.t_data != NULL)
+                       {
+                               /*
+                                * Found a suitable tuple, so save it, replacing one old
+                                * tuple at random
+                                */
+                               int             k = (int) (targrows * random_fract());
+
+                               Assert(k >= 0 && k < targrows);
+                               heap_freetuple(rows[k]);
+                               rows[k] = heap_copytuple(&targtuple);
+                               ReleaseBuffer(targbuffer);
+                               lastblock = targblock;
+                               lastoffset = targoffset;
+                               break;
+                       }
+                       /* this tuple is dead, so advance to next one on same page */
+                       targoffset++;
                 }
         }
-       /* delete existing pg_statistic rows for relation */
-       del_stats(relid, ((attnums) ? attr_cnt : 0), attnums);
-
-       /* scan relation to gather statistics */
-       scan = heap_beginscan(onerel, false, SnapshotNow, 0, NULL);
  
-       while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
-               attr_stats(onerel, attr_cnt, vacattrstats, tuple);
+       /*
+        * Now we need to sort the collected tuples by position (itempointer).
+        */
+       qsort((void *) rows, numrows, sizeof(HeapTuple), compare_rows);
  
-       heap_endscan(scan);
+       /*
+        * Estimate total number of valid rows in relation.
+        */
+       *totalrows = (long) (onerel->rd_nblocks * tuplesperpage + 0.5);
  
-       /* close rel, but keep lock so it doesn't go away before commit */
-       heap_close(onerel, NoLock);
+       return numrows;
+}
  
-       /* update statistics in pg_class */
-       update_attstats(relid, attr_cnt, vacattrstats);
+/* Select a random value R uniformly distributed in 0 < R < 1 */
+static double
+random_fract(void)
+{
+       long    z;
  
-       CommitTransactionCommand();
+       /* random() can produce endpoint values, try again if so */
+       do
+       {
+               z = random();
+       } while (! (z > 0 && z < MAX_RANDOM_VALUE));
+       return (double) z / (double) MAX_RANDOM_VALUE;
  }
  
  /*
- *     attr_stats() -- compute column statistics used by the planner
+ * These two routines embody Algorithm Z from "Random sampling with a
+ * reservoir" by Jeffrey S. Vitter, in ACM Trans. Math. Softw. 11, 1
+ * (Mar. 1985), Pages 37-57.  While Vitter describes his algorithm in terms
+ * of the count S of records to skip before processing another record,
+ * it is convenient to work primarily with t, the index (counting from 1)
+ * of the last record processed and next record to process.  The only extra
+ * state needed between calls is W, a random state variable.
   *
- *     We compute the column min, max, null and non-null counts.
- *     Plus we attempt to find the count of the value that occurs most
- *     frequently in each column.      These figures are used to compute
- *     the selectivity of the column.
+ * init_selection_state computes the initial W value.
   *
- *     We use a three-bucket cache to get the most frequent item.
- *     The 'guess' buckets count hits.  A cache miss causes guess1
- *     to get the most hit 'guess' item in the most recent cycle, and
- *     the new item goes into guess2.  Whenever the total count of hits
- *     of a 'guess' entry is larger than 'best', 'guess' becomes 'best'.
+ * Given that we've already processed t records (t >= n),
+ * select_next_random_record determines the number of the next record to
+ * process.
+ */
+static double
+init_selection_state(int n)
+{
+       /* Initial value of W (for use when Algorithm Z is first applied) */
+       return exp(- log(random_fract())/n);
+}
+
+static long
+select_next_random_record(long t, int n, double *stateptr)
+{
+       /* The magic constant here is T from Vitter's paper */
+       if (t <= (22 * n))
+       {
+               /* Process records using Algorithm X until t is large enough */
+               double  V,
+                               quot;
+
+               V = random_fract();             /* Generate V */
+               t++;
+               quot = (double) (t - n) / (double) t;
+               /* Find min S satisfying (4.1) */
+               while (quot > V)
+               {
+                       t++;
+                       quot *= (double) (t - n) / (double) t;
+               }
+       }
+       else
+       {
+               /* Now apply Algorithm Z */
+               double  W = *stateptr;
+               long    term = t - n + 1;
+               int             S;
+
+               for (;;)
+               {
+                       long    numer,
+                                       numer_lim,
+                                       denom;
+                       double  U,
+                                       X,
+                                       lhs,
+                                       rhs,
+                                       y,
+                                       tmp;
+
+                       /* Generate U and X */
+                       U = random_fract();
+                       X = t * (W - 1.0);
+                       S = X;                          /* S is tentatively set to floor(X) */
+                       /* Test if U <= h(S)/cg(X) in the manner of (6.3) */
+                       tmp = (double) (t + 1) / (double) term;
+                       lhs = exp(log(((U * tmp * tmp) * (term + S))/(t + X))/n);
+                       rhs = (((t + X)/(term + S)) * term)/t;
+                       if (lhs <= rhs)
+                       {
+                               W = rhs/lhs;
+                               break;
+                       }
+                       /* Test if U <= f(S)/cg(X) */
+                       y = (((U * (t + 1))/term) * (t + S + 1))/(t + X);
+                       if (n < S)
+                       {
+                               denom = t;
+                               numer_lim = term + S;
+                       }
+                       else
+                       {
+                               denom = t - n + S;
+                               numer_lim = t + 1;
+                       }
+                       for (numer = t + S; numer >= numer_lim; numer--)
+                       {
+                               y *= (double) numer / (double) denom;
+                               denom--;
+                       }
+                       W = exp(- log(random_fract())/n); /* Generate W in advance */
+                       if (exp(log(y)/n) <= (t + X)/t)
+                               break;
+               }
+               t += S + 1;
+               *stateptr = W;
+       }
+       return t;
+}
+
+/*
+ * qsort comparator for sorting rows[] array
+ */
+static int
+compare_rows(const void *a, const void *b)
+{
+       HeapTuple       ha = * (HeapTuple *) a;
+       HeapTuple       hb = * (HeapTuple *) b;
+       BlockNumber     ba = ItemPointerGetBlockNumber(&ha->t_self);
+       OffsetNumber oa = ItemPointerGetOffsetNumber(&ha->t_self);
+       BlockNumber     bb = ItemPointerGetBlockNumber(&hb->t_self);
+       OffsetNumber ob = ItemPointerGetOffsetNumber(&hb->t_self);
+
+       if (ba < bb)
+               return -1;
+       if (ba > bb)
+               return 1;
+       if (oa < ob)
+               return -1;
+       if (oa > ob)
+               return 1;
+       return 0;
+}
+
+/*
+ * Discover the largest valid tuple offset number on the given page
+ *
+ * This code probably ought to live in some other module.
+ */
+static OffsetNumber
+get_page_max_offset(Relation relation, BlockNumber blocknumber)
+{
+       Buffer          buffer;
+       Page            p;
+       OffsetNumber offnum;
+
+       buffer = ReadBuffer(relation, blocknumber);
+       if (!BufferIsValid(buffer))
+               elog(ERROR, "get_page_max_offset: %s relation: ReadBuffer(%ld) failed",
+                        RelationGetRelationName(relation), (long) blocknumber);
+       LockBuffer(buffer, BUFFER_LOCK_SHARE);
+       p = BufferGetPage(buffer);
+       offnum = PageGetMaxOffsetNumber(p);
+       LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
+       ReleaseBuffer(buffer);
+       return offnum;
+}
+
+
+/*
+ *     compute_minimal_stats() -- compute minimal column statistics
   *
- *     This method works perfectly for columns with unique values, and columns
- *     with only two unique values, plus nulls.
+ *     We use this when we can find only an "=" operator for the datatype.
   *
- *     It becomes less perfect as the number of unique values increases and
- *     their distribution in the table becomes more random.
+ *     We determine the fraction of non-null rows, the average width, the
+ *     most common values, and the (estimated) number of distinct values.
   *
+ *     The most common values are determined by brute force: we keep a list
+ *     of previously seen values, ordered by number of times seen, as we scan
+ *     the samples.  A newly seen value is inserted just after the last
+ *     multiply-seen value, causing the bottommost (oldest) singly-seen value
+ *     to drop off the list.  The accuracy of this method, and also its cost,
+ *     depend mainly on the length of the list we are willing to keep.
   */
  static void
-attr_stats(Relation onerel, int attr_cnt, VacAttrStats *vacattrstats, HeapTuple tuple)
+compute_minimal_stats(VacAttrStats *stats,
+                                         TupleDesc tupDesc, long totalrows,
+                                         HeapTuple *rows, int numrows)
  {
         int                     i;
-       TupleDesc       tupDesc = onerel->rd_att;
-
-       for (i = 0; i < attr_cnt; i++)
+       int                     null_cnt = 0;
+       int                     nonnull_cnt = 0;
+       int                     toowide_cnt = 0;
+       double          total_width = 0;
+       bool            is_varlena = (!stats->attr->attbyval &&
+                                                         stats->attr->attlen == -1);
+       FmgrInfo        f_cmpeq;
+       typedef struct
+       {
+               Datum   value;
+               int             count;
+       } TrackItem;
+       TrackItem  *track;
+       int                     track_cnt,
+                               track_max;
+       int                     num_mcv = stats->attr->attstattarget;
+
+       /* We track up to 2*n values for an n-element MCV list; but at least 10 */
+       track_max = 2 * num_mcv;
+       if (track_max < 10)
+               track_max = 10;
+       track = (TrackItem *) palloc(track_max * sizeof(TrackItem));
+       track_cnt = 0;
+
+       fmgr_info(stats->eqfunc, &f_cmpeq);
+
+       for (i = 0; i < numrows; i++)
         {
-               VacAttrStats *stats = &vacattrstats[i];
-               Datum           origvalue;
+               HeapTuple       tuple = rows[i];
                 Datum           value;
                 bool            isnull;
-               bool            value_hit;
-
-               if (!VacAttrStatsEqValid(stats))
-                       continue;
-
-#ifdef _DROP_COLUMN_HACK__
-               if (COLUMN_IS_DROPPED(stats->attr))
-                       continue;
-#endif  /* _DROP_COLUMN_HACK__ */
+               bool            match;
+               int                     firstcount1,
+                                       j;
  
-               origvalue = heap_getattr(tuple, stats->attr->attnum,
-                                                                tupDesc, &isnull);
+               value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
  
+               /* Check for null/nonnull */
                 if (isnull)
                 {
-                       stats->null_cnt++;
+                       null_cnt++;
                         continue;
                 }
-               stats->nonnull_cnt++;
+               nonnull_cnt++;
  
                 /*
-                * If the value is toasted, detoast it to avoid repeated
-                * detoastings and resultant memory leakage inside the comparison
-                * routines.
+                * If it's a varlena field, add up widths for average width
+                * calculation.  Note that if the value is toasted, we
+                * use the toasted width.  We don't bother with this calculation
+                * if it's a fixed-width type.
                  */
-               if (!stats->attr->attbyval && stats->attr->attlen == -1)
-                       value = PointerGetDatum(PG_DETOAST_DATUM(origvalue));
-               else
-                       value = origvalue;
-
-               if (!stats->initialized)
+               if (is_varlena)
                 {
-                       bucketcpy(stats->attr, value, &stats->best, &stats->best_len);
-                       /* best_cnt gets incremented below */
-                       bucketcpy(stats->attr, value, &stats->guess1, &stats->guess1_len);
-                       stats->guess1_cnt = stats->guess1_hits = 1;
-                       bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-                       stats->guess2_hits = 1;
-                       if (VacAttrStatsLtGtValid(stats))
+                       total_width += VARSIZE(DatumGetPointer(value));
+                       /*
+                        * If the value is toasted, we want to detoast it just once to
+                        * avoid repeated detoastings and resultant excess memory usage
+                        * during the comparisons.  Also, check to see if the value is
+                        * excessively wide, and if so don't detoast at all --- just
+                        * ignore the value.
+                        */
+                       if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
                         {
-                               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-                               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-                               /* min_cnt, max_cnt get incremented below */
+                               toowide_cnt++;
+                               continue;
                         }
-                       stats->initialized = true;
+                       value = PointerGetDatum(PG_DETOAST_DATUM(value));
                 }
  
-               if (VacAttrStatsLtGtValid(stats))
+               /*
+                * See if the value matches anything we're already tracking.
+                */
+               match = false;
+               firstcount1 = track_cnt;
+               for (j = 0; j < track_cnt; j++)
                 {
-                       if (DatumGetBool(FunctionCall2(&stats->f_cmplt,
-                                                                                  value, stats->min)))
+                       if (DatumGetBool(FunctionCall2(&f_cmpeq, value, track[j].value)))
                         {
-                               bucketcpy(stats->attr, value, &stats->min, &stats->min_len);
-                               stats->min_cnt = 1;
+                               match = true;
+                               break;
                         }
-                       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                                                                               value, stats->min)))
-                               stats->min_cnt++;
+                       if (j < firstcount1 && track[j].count == 1)
+                               firstcount1 = j;
+               }
  
-                       if (DatumGetBool(FunctionCall2(&stats->f_cmpgt,
-                                                                                  value, stats->max)))
+               if (match)
+               {
+                       /* Found a match */
+                       track[j].count++;
+                       /* This value may now need to "bubble up" in the track list */
+                       while (j > 0 && track[j].count > track[j-1].count)
                         {
-                               bucketcpy(stats->attr, value, &stats->max, &stats->max_len);
-                               stats->max_cnt = 1;
+                               swapDatum(track[j].value, track[j-1].value);
+                               swapInt(track[j].count, track[j-1].count);
+                               j--;
                         }
-                       else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                                                                               value, stats->max)))
-                               stats->max_cnt++;
                 }
-
-               value_hit = true;
-               if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                                                          value, stats->best)))
-                       stats->best_cnt++;
-               else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                                                                       value, stats->guess1)))
+               else
                 {
-                       stats->guess1_cnt++;
-                       stats->guess1_hits++;
+                       /* No match.  Insert at head of count-1 list */
+                       if (track_cnt < track_max)
+                               track_cnt++;
+                       for (j = track_cnt-1; j > firstcount1; j--)
+                       {
+                               track[j].value = track[j-1].value;
+                               track[j].count = track[j-1].count;
+                       }
+                       if (firstcount1 < track_cnt)
+                       {
+                               track[firstcount1].value = value;
+                               track[firstcount1].count = 1;
+                       }
                 }
-               else if (DatumGetBool(FunctionCall2(&stats->f_cmpeq,
-                                                                                       value, stats->guess2)))
-                       stats->guess2_hits++;
+       }
+
+       /* We can only compute valid stats if we found some non-null values. */
+       if (nonnull_cnt > 0)
+       {
+               int             nmultiple,
+                               summultiple;
+
+               stats->stats_valid = true;
+               /* Do the simple null-frac and width stats */
+               stats->stanullfrac = (double) null_cnt / (double) numrows;
+               if (is_varlena)
+                       stats->stawidth = total_width / (double) nonnull_cnt;
                 else
-                       value_hit = false;
+                       stats->stawidth = stats->attrtype->typlen;
  
-               if (stats->guess2_hits > stats->guess1_hits)
+               /* Count the number of values we found multiple times */
+               summultiple = 0;
+               for (nmultiple = 0; nmultiple < track_cnt; nmultiple++)
                 {
-                       swapDatum(stats->guess1, stats->guess2);
-                       swapInt(stats->guess1_len, stats->guess2_len);
-                       swapLong(stats->guess1_hits, stats->guess2_hits);
-                       stats->guess1_cnt = stats->guess1_hits;
+                       if (track[nmultiple].count == 1)
+                               break;
+                       summultiple += track[nmultiple].count;
                 }
-               if (stats->guess1_cnt > stats->best_cnt)
+
+               if (nmultiple == 0)
                 {
-                       swapDatum(stats->best, stats->guess1);
-                       swapInt(stats->best_len, stats->guess1_len);
-                       swapLong(stats->best_cnt, stats->guess1_cnt);
-                       stats->guess1_hits = 1;
-                       stats->guess2_hits = 1;
+                       /* If we found no repeated values, assume it's a unique column */
+                       stats->stadistinct = -1.0;
                 }
-               if (!value_hit)
+               else if (track_cnt < track_max && toowide_cnt == 0 &&
+                                nmultiple == track_cnt)
                 {
-                       bucketcpy(stats->attr, value, &stats->guess2, &stats->guess2_len);
-                       stats->guess1_hits = 1;
-                       stats->guess2_hits = 1;
+                       /*
+                        * Our track list includes every value in the sample, and every
+                        * value appeared more than once.  Assume the column has just
+                        * these values.
+                        */
+                       stats->stadistinct = track_cnt;
                 }
+               else
+               {
+                       /*----------
+                        * Estimate the number of distinct values using the estimator
+                        * proposed by Chaudhuri et al (see citation above).  This is
+                        *              sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+                        * where fk is the number of distinct values that occurred
+                        * exactly k times in our sample of r rows (from a total of n).
+                        * We assume (not very reliably!) that all the multiply-occurring
+                        * values are reflected in the final track[] list, and the other
+                        * nonnull values all appeared but once.
+                        *----------
+                        */
+                       int             f1 = nonnull_cnt - summultiple;
+                       double  term1;
  
-               /* Clean up detoasted copy, if any */
-               if (value != origvalue)
-                       pfree(DatumGetPointer(value));
-       }
-}
+                       if (f1 < 1)
+                               f1 = 1;
+                       term1 = sqrt((double) totalrows / (double) numrows) * f1;
+                       stats->stadistinct = floor(term1 + nmultiple + 0.5);
+               }
  
-/*
- *     bucketcpy() -- copy a new value into one of the statistics buckets
- */
-static void
-bucketcpy(Form_pg_attribute attr, Datum value, Datum *bucket, int *bucket_len)
-{
-       if (attr->attbyval)
-               *bucket = value;
-       else
-       {
-               int                     len = (attr->attlen != -1 ? attr->attlen : VARSIZE(value));
+               /*
+                * If we estimated the number of distinct values at more than 10%
+                * of the total row count (a very arbitrary limit), then assume
+                * that stadistinct should scale with the row count rather than be
+                * a fixed value.
+                */
+               if (stats->stadistinct > 0.1 * totalrows)
+                       stats->stadistinct = - (stats->stadistinct / totalrows);
  
-               /* Avoid unnecessary palloc() traffic... */
-               if (len > *bucket_len)
+               /* Generate an MCV slot entry, only if we found multiples */
+               if (nmultiple < num_mcv)
+                       num_mcv = nmultiple;
+               if (num_mcv > 0)
                 {
-                       if (*bucket_len != 0)
-                               pfree(DatumGetPointer(*bucket));
-                       *bucket = PointerGetDatum(palloc(len));
-                       *bucket_len = len;
+                       MemoryContext old_context;
+                       Datum  *mcv_values;
+                       float4 *mcv_freqs;
+
+                       /* Must copy the target values into TransactionCommandContext */
+                       old_context = MemoryContextSwitchTo(TransactionCommandContext);
+                       mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+                       mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+                       for (i = 0; i < num_mcv; i++)
+                       {
+                               mcv_values[i] = datumCopy(track[i].value,
+                                                                                 stats->attr->attbyval,
+                                                                                 stats->attr->attlen);
+                               mcv_freqs[i] = (double) track[i].count / (double) numrows;
+                       }
+                       MemoryContextSwitchTo(old_context);
+
+                       stats->stakind[0] = STATISTIC_KIND_MCV;
+                       stats->staop[0] = stats->eqopr;
+                       stats->stanumbers[0] = mcv_freqs;
+                       stats->numnumbers[0] = num_mcv;
+                       stats->stavalues[0] = mcv_values;
+                       stats->numvalues[0] = num_mcv;
                 }
-               memcpy(DatumGetPointer(*bucket), DatumGetPointer(value), len);
         }
+
+       /* We don't need to bother cleaning up any of our temporary palloc's */
  }
  
  
  /*
- *     update_attstats() -- update attribute statistics for one relation
+ *     compute_scalar_stats() -- compute column statistics
   *
- *             Statistics are stored in several places: the pg_class row for the
- *             relation has stats about the whole relation, the pg_attribute rows
- *             for each attribute store "dispersion", and there is a pg_statistic
- *             row for each (non-system) attribute.  (Dispersion probably ought to
- *             be moved to pg_statistic, but it's not worth doing unless there's
- *             another reason to have to change pg_attribute.)  The pg_class values
- *             are updated by VACUUM, not here.
- *
- *             We violate no-overwrite semantics here by storing new values for
- *             the dispersion column directly into the pg_attribute tuple that's
- *             already on the page.  The reason for this is that if we updated
- *             these tuples in the usual way, vacuuming pg_attribute itself
- *             wouldn't work very well --- by the time we got done with a vacuum
- *             cycle, most of the tuples in pg_attribute would've been obsoleted.
- *             Updating pg_attribute's own statistics would be especially tricky.
- *             Of course, this only works for fixed-size never-null columns, but
- *             dispersion is.
+ *     We use this when we can find "=" and "<" operators for the datatype.
   *
- *             pg_statistic rows are just added normally.      This means that
- *             pg_statistic will probably contain some deleted rows at the
- *             completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *     We determine the fraction of non-null rows, the average width, the
+ *     most common values, the (estimated) number of distinct values, the
+ *     distribution histogram, and the correlation of physical to logical order.
   *
- *             To keep things simple, we punt for pg_statistic, and don't try
- *             to compute or store rows for pg_statistic itself in pg_statistic.
- *             This could possibly be made to work, but it's not worth the trouble.
+ *     The desired stats can be determined fairly easily after sorting the
+ *     data values into order.
   */
  static void
-update_attstats(Oid relid, int natts, VacAttrStats *vacattrstats)
+compute_scalar_stats(VacAttrStats *stats,
+                                        TupleDesc tupDesc, long totalrows,
+                                        HeapTuple *rows, int numrows)
  {
-       Relation        ad,
-                               sd;
-       HeapScanDesc scan;
-       HeapTuple       atup,
-                               stup;
-       ScanKeyData askey;
-       Form_pg_attribute attp;
-
-       ad = heap_openr(AttributeRelationName, RowExclusiveLock);
-       sd = heap_openr(StatisticRelationName, RowExclusiveLock);
-
-       /* Find pg_attribute rows for this relation */
-       ScanKeyEntryInitialize(&askey, 0, Anum_pg_attribute_attrelid,
-                                                  F_INT4EQ, relid);
-
-       scan = heap_beginscan(ad, false, SnapshotNow, 1, &askey);
-
-       while (HeapTupleIsValid(atup = heap_getnext(scan, 0)))
+       int                     i;
+       int                     null_cnt = 0;
+       int                     nonnull_cnt = 0;
+       int                     toowide_cnt = 0;
+       double          total_width = 0;
+       bool            is_varlena = (!stats->attr->attbyval &&
+                                                         stats->attr->attlen == -1);
+       double          corr_xysum;
+       RegProcedure cmpFn;
+       SortFunctionKind cmpFnKind;
+       FmgrInfo        f_cmpfn;
+       ScalarItem *values;
+       int                     values_cnt = 0;
+       int                *tupnoLink;
+       ScalarMCVItem *track;
+       int                     track_cnt = 0;
+       int                     num_mcv = stats->attr->attstattarget;
+
+       values = (ScalarItem *) palloc(numrows * sizeof(ScalarItem));
+       tupnoLink = (int *) palloc(numrows * sizeof(int));
+       track = (ScalarMCVItem *) palloc(num_mcv * sizeof(ScalarMCVItem));
+
+       SelectSortFunction(stats->ltopr, &cmpFn, &cmpFnKind);
+       fmgr_info(cmpFn, &f_cmpfn);
+
+       /* Initial scan to find sortable values */
+       for (i = 0; i < numrows; i++)
         {
-               int                     i;
-               VacAttrStats *stats;
+               HeapTuple       tuple = rows[i];
+               Datum           value;
+               bool            isnull;
  
-               attp = (Form_pg_attribute) GETSTRUCT(atup);
-               if (attp->attnum <= 0)  /* skip system attributes for now */
-                       continue;
+               value = heap_getattr(tuple, stats->attnum, tupDesc, &isnull);
  
-               for (i = 0; i < natts; i++)
+               /* Check for null/nonnull */
+               if (isnull)
                 {
-                       if (attp->attnum == vacattrstats[i].attr->attnum)
-                               break;
+                       null_cnt++;
+                       continue;
                 }
-               if (i >= natts)
-                       continue;                       /* skip attr if no stats collected */
-               stats = &(vacattrstats[i]);
+               nonnull_cnt++;
  
-               if (VacAttrStatsEqValid(stats))
+               /*
+                * If it's a varlena field, add up widths for average width
+                * calculation.  Note that if the value is toasted, we
+                * use the toasted width.  We don't bother with this calculation
+                * if it's a fixed-width type.
+                */
+               if (is_varlena)
                 {
-                       float4          selratio;               /* average ratio of rows selected
-                                                                                * for a random constant */
-
-                       /* Compute dispersion */
-                       if (stats->nonnull_cnt == 0 && stats->null_cnt == 0)
+                       total_width += VARSIZE(DatumGetPointer(value));
+                       /*
+                        * If the value is toasted, we want to detoast it just once to
+                        * avoid repeated detoastings and resultant excess memory usage
+                        * during the comparisons.  Also, check to see if the value is
+                        * excessively wide, and if so don't detoast at all --- just
+                        * ignore the value.
+                        */
+                       if (toast_raw_datum_size(value) > WIDTH_THRESHOLD)
                         {
-
-                               /*
-                                * empty relation, so put a dummy value in attdispersion
-                                */
-                               selratio = 0;
+                               toowide_cnt++;
+                               continue;
                         }
-                       else if (stats->null_cnt <= 1 && stats->best_cnt == 1)
-                       {
+                       value = PointerGetDatum(PG_DETOAST_DATUM(value));
+               }
  
-                               /*
-                                * looks like we have a unique-key attribute --- flag this
-                                * with special -1.0 flag value.
-                                *
-                                * The correct dispersion is 1.0/numberOfRows, but since the
-                                * relation row count can get updated without recomputing
-                                * dispersion, we want to store a "symbolic" value and
-                                * figure 1.0/numberOfRows on the fly.
-                                */
-                               selratio = -1;
-                       }
-                       else
+               /* Add it to the list to be sorted */
+               values[values_cnt].value = value;
+               values[values_cnt].tupno = values_cnt;
+               tupnoLink[values_cnt] = values_cnt;
+               values_cnt++;
+       }
+
+       /* We can only compute valid stats if we found some sortable values. */
+       if (values_cnt > 0)
+       {
+               int             ndistinct,              /* # distinct values in sample */
+                               nmultiple,              /* # that appear multiple times */
+                               num_hist,
+                               dups_cnt;
+               int             slot_idx = 0;
+
+               /* Sort the collected values */
+               datumCmpFn = &f_cmpfn;
+               datumCmpFnKind = cmpFnKind;
+               datumCmpTupnoLink = tupnoLink;
+               qsort((void *) values, values_cnt,
+                         sizeof(ScalarItem), compare_scalars);
+
+               /*
+                * Now scan the values in order, find the most common ones,
+                * and also accumulate ordering-correlation statistics.
+                *
+                * To determine which are most common, we first have to count the
+                * number of duplicates of each value.  The duplicates are adjacent
+                * in the sorted list, so a brute-force approach is to compare
+                * successive datum values until we find two that are not equal.
+                * However, that requires N-1 invocations of the datum comparison
+                * routine, which are completely redundant with work that was done
+                * during the sort.  (The sort algorithm must at some point have
+                * compared each pair of items that are adjacent in the sorted order;
+                * otherwise it could not know that it's ordered the pair correctly.)
+                * We exploit this by having compare_scalars remember the highest
+                * tupno index that each ScalarItem has been found equal to.  At the
+                * end of the sort, a ScalarItem's tupnoLink will still point to
+                * itself if and only if it is the last item of its group of
+                * duplicates (since the group will be ordered by tupno).
+                */
+               corr_xysum = 0;
+               ndistinct = 0;
+               nmultiple = 0;
+               dups_cnt = 0;
+               for (i = 0; i < values_cnt; i++)
+               {
+                       int                     tupno = values[i].tupno;
+
+                       corr_xysum += (double) i * (double) tupno;
+                       dups_cnt++;
+                       if (tupnoLink[tupno] == tupno)
                         {
-                               if (VacAttrStatsLtGtValid(stats) &&
-                                       stats->min_cnt + stats->max_cnt == stats->nonnull_cnt)
+                               /* Reached end of duplicates of this value */
+                               ndistinct++;
+                               if (dups_cnt > 1)
                                 {
+                                       nmultiple++;
+                                       if (track_cnt < num_mcv ||
+                                               dups_cnt > track[track_cnt-1].count)
+                                       {
+                                               /*
+                                                * Found a new item for the mcv list; find its
+                                                * position, bubbling down old items if needed.
+                                                * Loop invariant is that j points at an empty/
+                                                * replaceable slot.
+                                                */
+                                               int             j;
+
+                                               if (track_cnt < num_mcv)
+                                                       track_cnt++;
+                                               for (j = track_cnt-1; j > 0; j--)
+                                               {
+                                                       if (dups_cnt <= track[j-1].count)
+                                                               break;
+                                                       track[j].count = track[j-1].count;
+                                                       track[j].first = track[j-1].first;
+                                               }
+                                               track[j].count = dups_cnt;
+                                               track[j].first = i + 1 - dups_cnt;
+                                       }
+                               }
+                               dups_cnt = 0;
+                       }
+               }
  
-                                       /*
-                                        * exact result when there are just 1 or 2 values...
-                                        */
-                                       double          min_cnt_d = stats->min_cnt,
-                                                               max_cnt_d = stats->max_cnt,
-                                                               null_cnt_d = stats->null_cnt;
-                                       double          total = ((double) stats->nonnull_cnt) + null_cnt_d;
+               stats->stats_valid = true;
+               /* Do the simple null-frac and width stats */
+               stats->stanullfrac = (double) null_cnt / (double) numrows;
+               if (is_varlena)
+                       stats->stawidth = total_width / (double) nonnull_cnt;
+               else
+                       stats->stawidth = stats->attrtype->typlen;
  
-                                       selratio = (min_cnt_d * min_cnt_d + max_cnt_d * max_cnt_d + null_cnt_d * null_cnt_d) / (total * total);
-                               }
-                               else
-                               {
-                                       double          most = (double) (stats->best_cnt > stats->null_cnt ? stats->best_cnt : stats->null_cnt);
-                                       double          total = ((double) stats->nonnull_cnt) + ((double) stats->null_cnt);
+               if (nmultiple == 0)
+               {
+                       /* If we found no repeated values, assume it's a unique column */
+                       stats->stadistinct = -1.0;
+               }
+               else if (toowide_cnt == 0 && nmultiple == ndistinct)
+               {
+                       /*
+                        * Every value in the sample appeared more than once.  Assume the
+                        * column has just these values.
+                        */
+                       stats->stadistinct = ndistinct;
+               }
+               else
+               {
+                       /*----------
+                        * Estimate the number of distinct values using the estimator
+                        * proposed by Chaudhuri et al (see citation above).  This is
+                        *              sqrt(n/r) * max(f1,1) + f2 + f3 + ...
+                        * where fk is the number of distinct values that occurred
+                        * exactly k times in our sample of r rows (from a total of n).
+                        * Overwidth values are assumed to have been distinct.
+                        *----------
+                        */
+                       int             f1 = ndistinct - nmultiple + toowide_cnt;
+                       double  term1;
  
-                                       /*
-                                        * we assume count of other values are 20% of best
-                                        * count in table
-                                        */
-                                       selratio = (most * most + 0.20 * most * (total - most)) / (total * total);
-                               }
-                               /* Make sure calculated values are in-range */
-                               if (selratio < 0.0)
-                                       selratio = 0.0;
-                               else if (selratio > 1.0)
-                                       selratio = 1.0;
+                       if (f1 < 1)
+                               f1 = 1;
+                       term1 = sqrt((double) totalrows / (double) numrows) * f1;
+                       stats->stadistinct = floor(term1 + nmultiple + 0.5);
+               }
+
+               /*
+                * If we estimated the number of distinct values at more than 10%
+                * of the total row count (a very arbitrary limit), then assume
+                * that stadistinct should scale with the row count rather than be
+                * a fixed value.
+                */
+               if (stats->stadistinct > 0.1 * totalrows)
+                       stats->stadistinct = - (stats->stadistinct / totalrows);
+
+               /* Generate an MCV slot entry, only if we found multiples */
+               if (nmultiple < num_mcv)
+                       num_mcv = nmultiple;
+               Assert(track_cnt >= num_mcv);
+               if (num_mcv > 0)
+               {
+                       MemoryContext old_context;
+                       Datum  *mcv_values;
+                       float4 *mcv_freqs;
+
+                       /* Must copy the target values into TransactionCommandContext */
+                       old_context = MemoryContextSwitchTo(TransactionCommandContext);
+                       mcv_values = (Datum *) palloc(num_mcv * sizeof(Datum));
+                       mcv_freqs = (float4 *) palloc(num_mcv * sizeof(float4));
+                       for (i = 0; i < num_mcv; i++)
+                       {
+                               mcv_values[i] = datumCopy(values[track[i].first].value,
+                                                                                 stats->attr->attbyval,
+                                                                                 stats->attr->attlen);
+                               mcv_freqs[i] = (double) track[i].count / (double) numrows;
                         }
+                       MemoryContextSwitchTo(old_context);
+
+                       stats->stakind[slot_idx] = STATISTIC_KIND_MCV;
+                       stats->staop[slot_idx] = stats->eqopr;
+                       stats->stanumbers[slot_idx] = mcv_freqs;
+                       stats->numnumbers[slot_idx] = num_mcv;
+                       stats->stavalues[slot_idx] = mcv_values;
+                       stats->numvalues[slot_idx] = num_mcv;
+                       slot_idx++;
+               }
  
-                       /* overwrite the existing statistics in the tuple */
-                       attp->attdispersion = selratio;
+               /*
+                * Generate a histogram slot entry if there are at least two
+                * distinct values not accounted for in the MCV list.  (This
+                * ensures the histogram won't collapse to empty or a singleton.)
+                */
+               num_hist = ndistinct - num_mcv;
+               if (num_hist > stats->attr->attstattarget)
+                       num_hist = stats->attr->attstattarget + 1;
+               if (num_hist >= 2)
+               {
+                       MemoryContext old_context;
+                       Datum  *hist_values;
+                       int             nvals;
  
-                       /* invalidate the tuple in the cache and write the buffer */
-                       RelationInvalidateHeapTuple(ad, atup);
-                       WriteNoReleaseBuffer(scan->rs_cbuf);
+                       /* Sort the MCV items into position order to speed next loop */
+                       qsort((void *) track, num_mcv,
+                                 sizeof(ScalarMCVItem), compare_mcvs);
  
                         /*
-                        * Create pg_statistic tuples for the relation, if we have
-                        * gathered the right data.  del_stats() previously deleted
-                        * all the pg_statistic tuples for the rel, so we just have to
-                        * insert new ones here.
+                        * Collapse out the MCV items from the values[] array.
                          *
-                        * Note analyze_rel() has seen to it that we won't come here when
-                        * vacuuming pg_statistic itself.
+                        * Note we destroy the values[] array here... but we don't need
+                        * it for anything more.  We do, however, still need values_cnt.
                          */
-                       if (VacAttrStatsLtGtValid(stats) && stats->initialized)
+                       if (num_mcv > 0)
                         {
-                               float4          nullratio;
-                               float4          bestratio;
-                               FmgrInfo        out_function;
-                               char       *out_string;
-                               double          best_cnt_d = stats->best_cnt,
-                                                       null_cnt_d = stats->null_cnt,
-                                                       nonnull_cnt_d = stats->nonnull_cnt; /* prevent overflow */
-                               Datum           values[Natts_pg_statistic];
-                               char            nulls[Natts_pg_statistic];
-                               Relation        irelations[Num_pg_statistic_indices];
+                               int             src,
+                                               dest;
+                               int             j;
  
-                               nullratio = null_cnt_d / (nonnull_cnt_d + null_cnt_d);
-                               bestratio = best_cnt_d / (nonnull_cnt_d + null_cnt_d);
-
-                               fmgr_info(stats->outfunc, &out_function);
+                               src = dest = 0;
+                               j = 0;                  /* index of next interesting MCV item */
+                               while (src < values_cnt)
+                               {
+                                       int             ncopy;
+
+                                       if (j < num_mcv)
+                                       {
+                                               int             first = track[j].first;
+
+                                               if (src >= first)
+                                               {
+                                                       /* advance past this MCV item */
+                                                       src = first + track[j].count;
+                                                       j++;
+                                                       continue;
+                                               }
+                                               ncopy = first - src;
+                                       }
+                                       else
+                                       {
+                                               ncopy = values_cnt - src;
+                                       }
+                                       memmove(&values[dest], &values[src],
+                                                       ncopy * sizeof(ScalarItem));
+                                       src += ncopy;
+                                       dest += ncopy;
+                               }
+                               nvals = dest;
+                       }
+                       else
+                               nvals = values_cnt;
+                       Assert(nvals >= num_hist);
  
-                               for (i = 0; i < Natts_pg_statistic; ++i)
-                                       nulls[i] = ' ';
+                       /* Must copy the target values into TransactionCommandContext */
+                       old_context = MemoryContextSwitchTo(TransactionCommandContext);
+                       hist_values = (Datum *) palloc(num_hist * sizeof(Datum));
+                       for (i = 0; i < num_hist; i++)
+                       {
+                               int             pos;
  
-                               /*
-                                * initialize values[]
-                                */
-                               i = 0;
-                               values[i++] = ObjectIdGetDatum(relid);  /* starelid */
-                               values[i++] = Int16GetDatum(attp->attnum);              /* staattnum */
-                               values[i++] = ObjectIdGetDatum(stats->op_cmplt);                /* staop */
-                               values[i++] = Float4GetDatum(nullratio);                /* stanullfrac */
-                               values[i++] = Float4GetDatum(bestratio);                /* stacommonfrac */
-                               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                                                                                  stats->best,
-                                                                               ObjectIdGetDatum(stats->typelem),
-                                                                Int32GetDatum(stats->attr->atttypmod)));
-                               values[i++] = DirectFunctionCall1(textin,               /* stacommonval */
-                                                                                       CStringGetDatum(out_string));
-                               pfree(out_string);
-                               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                                                                                  stats->min,
-                                                                               ObjectIdGetDatum(stats->typelem),
-                                                                Int32GetDatum(stats->attr->atttypmod)));
-                               values[i++] = DirectFunctionCall1(textin,               /* staloval */
-                                                                                       CStringGetDatum(out_string));
-                               pfree(out_string);
-                               out_string = DatumGetCString(FunctionCall3(&out_function,
-                                                                                                                  stats->max,
-                                                                               ObjectIdGetDatum(stats->typelem),
-                                                                Int32GetDatum(stats->attr->atttypmod)));
-                               values[i++] = DirectFunctionCall1(textin,               /* stahival */
-                                                                                       CStringGetDatum(out_string));
-                               pfree(out_string);
-
-                               stup = heap_formtuple(sd->rd_att, values, nulls);
-
-                               /* store tuple and update indexes too */
-                               heap_insert(sd, stup);
-
-                               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices, irelations);
-                               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
-                               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
-
-                               /* release allocated space */
-                               pfree(DatumGetPointer(values[Anum_pg_statistic_stacommonval - 1]));
-                               pfree(DatumGetPointer(values[Anum_pg_statistic_staloval - 1]));
-                               pfree(DatumGetPointer(values[Anum_pg_statistic_stahival - 1]));
-                               heap_freetuple(stup);
+                               pos = (i * (nvals - 1)) / (num_hist - 1);
+                               hist_values[i] = datumCopy(values[pos].value,
+                                                                                  stats->attr->attbyval,
+                                                                                  stats->attr->attlen);
                         }
+                       MemoryContextSwitchTo(old_context);
+
+                       stats->stakind[slot_idx] = STATISTIC_KIND_HISTOGRAM;
+                       stats->staop[slot_idx] = stats->ltopr;
+                       stats->stavalues[slot_idx] = hist_values;
+                       stats->numvalues[slot_idx] = num_hist;
+                       slot_idx++;
+               }
+
+               /* Generate a correlation entry if there are multiple values */
+               if (values_cnt > 1)
+               {
+                       MemoryContext old_context;
+                       float4 *corrs;
+                       double  corr_xsum,
+                                       corr_x2sum;
+
+                       /* Must copy the target values into TransactionCommandContext */
+                       old_context = MemoryContextSwitchTo(TransactionCommandContext);
+                       corrs = (float4 *) palloc(sizeof(float4));
+                       MemoryContextSwitchTo(old_context);
+
+                       /*----------
+                        * Since we know the x and y value sets are both
+                        *              0, 1, ..., values_cnt-1
+                        * we have sum(x) = sum(y) =
+                        *              (values_cnt-1)*values_cnt / 2
+                        * and sum(x^2) = sum(y^2) =
+                        *              (values_cnt-1)*values_cnt*(2*values_cnt-1) / 6.
+                        *----------
+                        */
+                       corr_xsum = (double) (values_cnt-1) * (double) values_cnt / 2.0;
+                       corr_x2sum = (double) (values_cnt-1) * (double) values_cnt *
+                               (double) (2*values_cnt-1) / 6.0;
+                       /* And the correlation coefficient reduces to */
+                       corrs[0] = (values_cnt * corr_xysum - corr_xsum * corr_xsum) /
+                               (values_cnt * corr_x2sum - corr_xsum * corr_xsum);
+
+                       stats->stakind[slot_idx] = STATISTIC_KIND_CORRELATION;
+                       stats->staop[slot_idx] = stats->ltopr;
+                       stats->stanumbers[slot_idx] = corrs;
+                       stats->numnumbers[slot_idx] = 1;
+                       slot_idx++;
                 }
         }
-       heap_endscan(scan);
-       /* close rels, but hold locks till upcoming commit */
-       heap_close(ad, NoLock);
-       heap_close(sd, NoLock);
+
+       /* We don't need to bother cleaning up any of our temporary palloc's */
  }
  
  /*
- *     del_stats() -- delete pg_statistic rows for a relation
+ * qsort comparator for sorting ScalarItems
   *
- *     If a list of attribute numbers is given, only zap stats for those attrs.
+ * Aside from sorting the items, we update the datumCmpTupnoLink[] array
+ * whenever two ScalarItems are found to contain equal datums.  The array
+ * is indexed by tupno; for each ScalarItem, it contains the highest
+ * tupno that that item's datum has been found to be equal to.  This allows
+ * us to avoid additional comparisons in compute_scalar_stats().
   */
-static void
-del_stats(Oid relid, int attcnt, int *attnums)
+static int
+compare_scalars(const void *a, const void *b)
  {
-       Relation        pgstatistic;
-       HeapScanDesc scan;
-       HeapTuple       tuple;
-       ScanKeyData key;
+       Datum           da = ((ScalarItem *) a)->value;
+       int                     ta = ((ScalarItem *) a)->tupno;
+       Datum           db = ((ScalarItem *) b)->value;
+       int                     tb = ((ScalarItem *) b)->tupno;
  
-       pgstatistic = heap_openr(StatisticRelationName, RowExclusiveLock);
+       if (datumCmpFnKind == SORTFUNC_LT)
+       {
+               if (DatumGetBool(FunctionCall2(datumCmpFn, da, db)))
+                       return -1;                      /* a < b */
+               if (DatumGetBool(FunctionCall2(datumCmpFn, db, da)))
+                       return 1;                       /* a > b */
+       }
+       else
+       {
+               /* sort function is CMP or REVCMP */
+               int32   compare;
  
-       ScanKeyEntryInitialize(&key, 0x0, Anum_pg_statistic_starelid,
-                                                  F_OIDEQ, ObjectIdGetDatum(relid));
-       scan = heap_beginscan(pgstatistic, false, SnapshotNow, 1, &key);
+               compare = DatumGetInt32(FunctionCall2(datumCmpFn, da, db));
+               if (compare != 0)
+               {
+                       if (datumCmpFnKind == SORTFUNC_REVCMP)
+                               compare = -compare;
+                       return compare;
+               }
+       }
  
-       while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       /*
+        * The two datums are equal, so update datumCmpTupnoLink[].
+        */
+       if (datumCmpTupnoLink[ta] < tb)
+               datumCmpTupnoLink[ta] = tb;
+       if (datumCmpTupnoLink[tb] < ta)
+               datumCmpTupnoLink[tb] = ta;
+
+       /*
+        * For equal datums, sort by tupno
+        */
+       return ta - tb;
+}
+
+/*
+ * qsort comparator for sorting ScalarMCVItems by position
+ */
+static int
+compare_mcvs(const void *a, const void *b)
+{
+       int                     da = ((ScalarMCVItem *) a)->first;
+       int                     db = ((ScalarMCVItem *) b)->first;
+
+       return da - db;
+}
+
+
+/*
+ *     update_attstats() -- update attribute statistics for one relation
+ *
+ *             Statistics are stored in several places: the pg_class row for the
+ *             relation has stats about the whole relation, and there is a
+ *             pg_statistic row for each (non-system) attribute that has ever
+ *             been analyzed.  The pg_class values are updated by VACUUM, not here.
+ *
+ *             pg_statistic rows are just added or updated normally.  This means
+ *             that pg_statistic will probably contain some deleted rows at the
+ *             completion of a vacuum cycle, unless it happens to get vacuumed last.
+ *
+ *             To keep things simple, we punt for pg_statistic, and don't try
+ *             to compute or store rows for pg_statistic itself in pg_statistic.
+ *             This could possibly be made to work, but it's not worth the trouble.
+ *             Note analyze_rel() has seen to it that we won't come here when
+ *             vacuuming pg_statistic itself.
+ */
+static void
+update_attstats(Oid relid, int natts, VacAttrStats **vacattrstats)
+{
+       Relation        sd;
+       int                     attno;
+
+       /*
+        * We use an ExclusiveLock on pg_statistic to ensure that only one
+        * backend is writing it at a time --- without that, we might have to
+        * deal with concurrent updates here, and it's not worth the trouble.
+        */
+       sd = heap_openr(StatisticRelationName, ExclusiveLock);
+
+       for (attno = 0; attno < natts; attno++)
         {
-               if (attcnt > 0)
+               VacAttrStats *stats = vacattrstats[attno];
+               FmgrInfo        out_function;
+               HeapTuple       stup,
+                                       oldtup;
+               int                     i, k, n;
+               Datum           values[Natts_pg_statistic];
+               char            nulls[Natts_pg_statistic];
+               char            replaces[Natts_pg_statistic];
+               Relation        irelations[Num_pg_statistic_indices];
+
+               /* Ignore attr if we weren't able to collect stats */
+               if (!stats->stats_valid)
+                       continue;
+
+               fmgr_info(stats->attrtype->typoutput, &out_function);
+
+               /*
+                * Construct a new pg_statistic tuple
+                */
+               for (i = 0; i < Natts_pg_statistic; ++i)
                 {
-                       Form_pg_statistic pgs = (Form_pg_statistic) GETSTRUCT(tuple);
-                       int                     i;
+                       nulls[i] = ' ';
+                       replaces[i] = 'r';
+               }
  
-                       for (i = 0; i < attcnt; i++)
+               i = 0;
+               values[i++] = ObjectIdGetDatum(relid); /* starelid */
+               values[i++] = Int16GetDatum(stats->attnum); /* staattnum */
+               values[i++] = Float4GetDatum(stats->stanullfrac); /* stanullfrac */
+               values[i++] = Int32GetDatum(stats->stawidth); /* stawidth */
+               values[i++] = Float4GetDatum(stats->stadistinct); /* stadistinct */
+               for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+               {
+                       values[i++] = Int16GetDatum(stats->stakind[k]); /* stakindN */
+               }
+               for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+               {
+                       values[i++] = ObjectIdGetDatum(stats->staop[k]); /* staopN */
+               }
+               for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+               {
+                       int             nnum = stats->numnumbers[k];
+
+                       if (nnum > 0)
                         {
-                               if (pgs->staattnum == attnums[i] + 1)
-                                       break;
+                               Datum      *numdatums = (Datum *) palloc(nnum * sizeof(Datum));
+                               ArrayType  *arry;
+
+                               for (n = 0; n < nnum; n++)
+                                       numdatums[n] = Float4GetDatum(stats->stanumbers[k][n]);
+                               /* XXX knows more than it should about type float4: */
+                               arry = construct_array(numdatums, nnum,
+                                                                          false, sizeof(float4), 'i');
+                               values[i++] = PointerGetDatum(arry); /* stanumbersN */
+                       }
+                       else
+                       {
+                               nulls[i] = 'n';
+                               values[i++] = (Datum) 0;
                         }
-                       if (i >= attcnt)
-                               continue;               /* don't delete it */
                 }
-               simple_heap_delete(pgstatistic, &tuple->t_self);
-       }
+               for (k = 0; k < STATISTIC_NUM_SLOTS; k++)
+               {
+                       int             ntxt = stats->numvalues[k];
  
-       heap_endscan(scan);
+                       if (ntxt > 0)
+                       {
+                               Datum      *txtdatums = (Datum *) palloc(ntxt * sizeof(Datum));
+                               ArrayType  *arry;
  
-       /*
-        * Close rel, but *keep* lock; we will need to reacquire it later, so
-        * there's a possibility of deadlock against another VACUUM process if
-        * we let go now.  Keeping the lock shouldn't delay any common
-        * operation other than an attempted VACUUM of pg_statistic itself.
-        */
-       heap_close(pgstatistic, NoLock);
+                               for (n = 0; n < ntxt; n++)
+                               {
+                                       /*
+                                        * Convert data values to a text string to be inserted
+                                        * into the text array.
+                                        */
+                                       Datum   stringdatum;
+
+                                       stringdatum =
+                                               FunctionCall3(&out_function,
+                                                                         stats->stavalues[k][n],
+                                                                         ObjectIdGetDatum(stats->attrtype->typelem),
+                                                                         Int32GetDatum(stats->attr->atttypmod));
+                                       txtdatums[n] = DirectFunctionCall1(textin, stringdatum);
+                                       pfree(DatumGetPointer(stringdatum));
+                               }
+                               /* XXX knows more than it should about type text: */
+                               arry = construct_array(txtdatums, ntxt,
+                                                                          false, -1, 'i');
+                               values[i++] = PointerGetDatum(arry); /* stavaluesN */
+                       }
+                       else
+                       {
+                               nulls[i] = 'n';
+                               values[i++] = (Datum) 0;
+                       }
+               }
+
+               /* Is there already a pg_statistic tuple for this attribute? */
+               oldtup = SearchSysCache(STATRELATT,
+                                                               ObjectIdGetDatum(relid),
+                                                               Int16GetDatum(stats->attnum),
+                                                               0, 0);
+
+               if (HeapTupleIsValid(oldtup))
+               {
+                       /* Yes, replace it */
+                       stup = heap_modifytuple(oldtup,
+                                                                       sd,
+                                                                       values,
+                                                                       nulls,
+                                                                       replaces);
+                       ReleaseSysCache(oldtup);
+                       simple_heap_update(sd, &stup->t_self, stup);
+               }
+               else
+               {
+                       /* No, insert new tuple */
+                       stup = heap_formtuple(sd->rd_att, values, nulls);
+                       heap_insert(sd, stup);
+               }
+
+               /* update indices too */
+               CatalogOpenIndices(Num_pg_statistic_indices, Name_pg_statistic_indices,
+                                                  irelations);
+               CatalogIndexInsert(irelations, Num_pg_statistic_indices, sd, stup);
+               CatalogCloseIndices(Num_pg_statistic_indices, irelations);
+
+               heap_freetuple(stup);
+       }
+
+       /* close rel, but hold lock till upcoming commit */
+       heap_close(sd, NoLock);
  }
diff --git a/src/backend/commands/command.c b/src/backend/commands/command.c

index 96d4936..13a78f1 100644 (file)
--- a/src/backend/commands/command.c
+++ b/src/backend/commands/command.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.125 2001/03/23 04:49:52 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/Attic/command.c,v 1.126 2001/05/07 00:43:17 tgl Exp $
   *
   * NOTES
   *       The PerformAddAttribute() code, like most of the relation
@@ -56,6 +56,7 @@
  #include "access/genam.h"
  
  
+static void drop_default(Oid relid, int16 attnum);
  static bool needs_toast_table(Relation rel);
  static bool is_relation(char *name);
  
@@ -408,7 +409,7 @@ AlterTableAddColumn(const char *relationName,
                 HeapTuple       typeTuple;
                 Form_pg_type tform;
                 char       *typename;
-               int                     attnelems;
+               int                     attndims;
  
                 if (SearchSysCacheExists(ATTNAME,
                                                                  ObjectIdGetDatum(reltup->t_data->t_oid),
@@ -425,11 +426,11 @@ AlterTableAddColumn(const char *relationName,
  
                 if (colDef->typename->arrayBounds)
                 {
-                       attnelems = length(colDef->typename->arrayBounds);
+                       attndims = length(colDef->typename->arrayBounds);
                         typename = makeArrayTypeName(colDef->typename->name);
                 }
                 else
-                       attnelems = 0;
+                       attndims = 0;
  
                 typeTuple = SearchSysCache(TYPENAME,
                                                                    PointerGetDatum(typename),
@@ -441,12 +442,12 @@ AlterTableAddColumn(const char *relationName,
                 namestrcpy(&(attribute->attname), colDef->colname);
                 attribute->atttypid = typeTuple->t_data->t_oid;
                 attribute->attlen = tform->typlen;
-               attribute->attdispersion = 0;
+               attribute->attstattarget = DEFAULT_ATTSTATTARGET;
                 attribute->attcacheoff = -1;
                 attribute->atttypmod = colDef->typename->typmod;
                 attribute->attnum = i;
                 attribute->attbyval = tform->typbyval;
-               attribute->attnelems = attnelems;
+               attribute->attndims = attndims;
                 attribute->attisset = (bool) (tform->typtype == 'c');
                 attribute->attstorage = tform->typstorage;
                 attribute->attalign = tform->typalign;
@@ -496,17 +497,13 @@ AlterTableAddColumn(const char *relationName,
  }
  
  
-
-static void drop_default(Oid relid, int16 attnum);
-
-
  /*
   * ALTER TABLE ALTER COLUMN SET/DROP DEFAULT
   */
  void
-AlterTableAlterColumn(const char *relationName,
-                                         bool inh, const char *colName,
-                                         Node *newDefault)
+AlterTableAlterColumnDefault(const char *relationName,
+                                                        bool inh, const char *colName,
+                                                        Node *newDefault)
  {
         Relation        rel;
         HeapTuple       tuple;
@@ -551,8 +548,8 @@ AlterTableAlterColumn(const char *relationName,
                         if (childrelid == myrelid)
                                 continue;
                         rel = heap_open(childrelid, AccessExclusiveLock);
-                       AlterTableAlterColumn(RelationGetRelationName(rel),
-                                                                 false, colName, newDefault);
+                       AlterTableAlterColumnDefault(RelationGetRelationName(rel),
+                                                                                false, colName, newDefault);
                         heap_close(rel, AccessExclusiveLock);
                 }
         }
@@ -560,7 +557,7 @@ AlterTableAlterColumn(const char *relationName,
         /* -= now do the thing on this relation =- */
  
         /* reopen the business */
-       rel = heap_openr((char *) relationName, AccessExclusiveLock);
+       rel = heap_openr(relationName, AccessExclusiveLock);
  
         /*
          * get the number of the attribute
@@ -647,7 +644,6 @@ AlterTableAlterColumn(const char *relationName,
  }
  
  
-
  static void
  drop_default(Oid relid, int16 attnum)
  {
@@ -675,6 +671,104 @@ drop_default(Oid relid, int16 attnum)
  }
  
  
+/*
+ * ALTER TABLE ALTER COLUMN SET STATISTICS
+ */
+void
+AlterTableAlterColumnStatistics(const char *relationName,
+                                                               bool inh, const char *colName,
+                                                               Node *statsTarget)
+{
+       Relation        rel;
+       Oid                     myrelid;
+       int                     newtarget;
+       Relation        attrelation;
+       HeapTuple       tuple;
+
+#ifndef NO_SECURITY
+       if (!pg_ownercheck(GetUserId(), relationName, RELNAME))
+               elog(ERROR, "ALTER TABLE: permission denied");
+#endif
+
+       rel = heap_openr(relationName, AccessExclusiveLock);
+       if (rel->rd_rel->relkind != RELKIND_RELATION)
+               elog(ERROR, "ALTER TABLE: relation \"%s\" is not a table",
+                        relationName);
+       myrelid = RelationGetRelid(rel);
+       heap_close(rel, NoLock);        /* close rel, but keep lock! */
+
+       /*
+        * Propagate to children if desired
+        */
+       if (inh)
+       {
+               List       *child,
+                                  *children;
+
+               /* this routine is actually in the planner */
+               children = find_all_inheritors(myrelid);
+
+               /*
+                * find_all_inheritors does the recursive search of the
+                * inheritance hierarchy, so all we have to do is process all of
+                * the relids in the list that it returns.
+                */
+               foreach(child, children)
+               {
+                       Oid                     childrelid = lfirsti(child);
+
+                       if (childrelid == myrelid)
+                               continue;
+                       rel = heap_open(childrelid, AccessExclusiveLock);
+                       AlterTableAlterColumnStatistics(RelationGetRelationName(rel),
+                                                                                       false, colName, statsTarget);
+                       heap_close(rel, AccessExclusiveLock);
+               }
+       }
+
+       /* -= now do the thing on this relation =- */
+
+       Assert(IsA(statsTarget, Integer));
+       newtarget = intVal(statsTarget);
+
+       /* Limit target to sane range (should we raise an error instead?) */
+       if (newtarget < 0)
+               newtarget = 0;
+       else if (newtarget > 1000)
+               newtarget = 1000;
+
+       attrelation = heap_openr(AttributeRelationName, RowExclusiveLock);
+
+       tuple = SearchSysCacheCopy(ATTNAME,
+                                                          ObjectIdGetDatum(myrelid),
+                                                          PointerGetDatum(colName),
+                                                          0, 0);
+       if (!HeapTupleIsValid(tuple))
+               elog(ERROR, "ALTER TABLE: relation \"%s\" has no column \"%s\"",
+                        relationName, colName);
+
+       if (((Form_pg_attribute) GETSTRUCT(tuple))->attnum < 0)
+               elog(ERROR, "ALTER TABLE: cannot change system attribute \"%s\"",
+                        colName);
+
+       ((Form_pg_attribute) GETSTRUCT(tuple))->attstattarget = newtarget;
+
+       simple_heap_update(attrelation, &tuple->t_self, tuple);
+
+       /* keep system catalog indices current */
+       {
+               Relation        irelations[Num_pg_attr_indices];
+
+               CatalogOpenIndices(Num_pg_attr_indices, Name_pg_attr_indices, irelations);
+               CatalogIndexInsert(irelations, Num_pg_attr_indices, attrelation, tuple);
+               CatalogCloseIndices(Num_pg_attr_indices, irelations);
+       }
+
+       heap_freetuple(tuple);
+       heap_close(attrelation, RowExclusiveLock);
+}
+
+
  #ifdef _DROP_COLUMN_HACK__
  /*
   *     ALTER TABLE DROP COLUMN trial implementation
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c

index 694d0e8..9a0dbdc 100644 (file)
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.189 2001/03/25 23:23:58 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/commands/vacuum.c,v 1.190 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,25 +53,90 @@ extern XLogRecPtr log_heap_move(Relation reln,
                           Buffer oldbuf, ItemPointerData from,
                           Buffer newbuf, HeapTuple newtup);
  
+
+typedef struct VRelListData
+{
+       Oid                     vrl_relid;
+       struct VRelListData *vrl_next;
+} VRelListData;
+
+typedef VRelListData *VRelList;
+
+typedef struct VacPageData
+{
+       BlockNumber blkno;                      /* BlockNumber of this Page */
+       Size            free;                   /* FreeSpace on this Page */
+       uint16          offsets_used;   /* Number of OffNums used by vacuum */
+       uint16          offsets_free;   /* Number of OffNums free or to be free */
+       OffsetNumber offsets[1];        /* Array of its OffNums */
+} VacPageData;
+
+typedef VacPageData *VacPage;
+
+typedef struct VacPageListData
+{
+       int                     empty_end_pages;/* Number of "empty" end-pages */
+       int                     num_pages;              /* Number of pages in pagedesc */
+       int                     num_allocated_pages;    /* Number of allocated pages in
+                                                                                * pagedesc */
+       VacPage    *pagedesc;           /* Descriptions of pages */
+} VacPageListData;
+
+typedef VacPageListData *VacPageList;
+
+typedef struct VTupleLinkData
+{
+       ItemPointerData new_tid;
+       ItemPointerData this_tid;
+} VTupleLinkData;
+
+typedef VTupleLinkData *VTupleLink;
+
+typedef struct VTupleMoveData
+{
+       ItemPointerData tid;            /* tuple ID */
+       VacPage         vacpage;                /* where to move */
+       bool            cleanVpd;               /* clean vacpage before using */
+} VTupleMoveData;
+
+typedef VTupleMoveData *VTupleMove;
+
+typedef struct VRelStats
+{
+       Oid                     relid;
+       long            num_pages;
+       long            num_tuples;
+       Size            min_tlen;
+       Size            max_tlen;
+       bool            hasindex;
+       int                     num_vtlinks;
+       VTupleLink      vtlinks;
+} VRelStats;
+
+
  static MemoryContext vac_context = NULL;
  
  static int     MESSAGE_LEVEL;          /* message level */
  
  static TransactionId XmaxRecent;
  
+
  /* non-export function prototypes */
  static void vacuum_init(void);
  static void vacuum_shutdown(void);
-static void vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2);
-static VRelList getrels(NameData *VacRelP);
+static VRelList getrels(Name VacRelP, const char *stmttype);
  static void vacuum_rel(Oid relid);
-static void scan_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages);
-static void repair_frag(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages, VacPageList fraged_pages, int nindices, Relation *Irel);
-static void vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacpagelist);
+static void scan_heap(VRelStats *vacrelstats, Relation onerel,
+                                         VacPageList vacuum_pages, VacPageList fraged_pages);
+static void repair_frag(VRelStats *vacrelstats, Relation onerel,
+                                               VacPageList vacuum_pages, VacPageList fraged_pages,
+                                               int nindices, Relation *Irel);
+static void vacuum_heap(VRelStats *vacrelstats, Relation onerel,
+                                               VacPageList vacpagelist);
  static void vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage);
-static void vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples);
-static void scan_index(Relation indrel, int num_tuples);
-static void update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex, VRelStats *vacrelstats);
+static void vacuum_index(VacPageList vacpagelist, Relation indrel,
+                                                long num_tuples, int keep_tuples);
+static void scan_index(Relation indrel, long num_tuples);
  static VacPage tid_reaped(ItemPointer itemptr, VacPageList vacpagelist);
  static void reap_page(VacPageList vacpagelist, VacPage vacpage);
  static void vpage_insert(VacPageList vacpagelist, VacPage vpnew);
@@ -88,17 +153,17 @@ static bool enough_space(VacPage vacpage, Size len);
  static char *show_rusage(struct rusage * ru0);
  
  
+/*
+ * Primary entry point for VACUUM and ANALYZE commands.
+ */
  void
-vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
+vacuum(VacuumStmt *vacstmt)
  {
+       const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
         NameData        VacRel;
         Name            VacRelName;
-       MemoryContext old;
-       List       *le;
-       List       *anal_cols2 = NIL;
-
-       if (anal_cols != NIL && !analyze)
-               elog(ERROR, "Can't vacuum columns, only tables.  You can 'vacuum analyze' columns.");
+       VRelList        vrl,
+                               cur;
  
         /*
          * We cannot run VACUUM inside a user transaction block; if we were
@@ -110,9 +175,9 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
          * behavior.
          */
         if (IsTransactionBlock())
-               elog(ERROR, "VACUUM cannot run inside a BEGIN/END block");
+               elog(ERROR, "%s cannot run inside a BEGIN/END block", stmttype);
  
-       if (verbose)
+       if (vacstmt->verbose)
                 MESSAGE_LEVEL = NOTICE;
         else
                 MESSAGE_LEVEL = DEBUG;
@@ -130,37 +195,36 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
                                                                                 ALLOCSET_DEFAULT_INITSIZE,
                                                                                 ALLOCSET_DEFAULT_MAXSIZE);
  
-       /* vacrel gets de-allocated on xact commit, so copy it to safe storage */
-       if (vacrel)
+       /* Convert vacrel, which is just a string, to a Name */
+       if (vacstmt->vacrel)
         {
-               namestrcpy(&VacRel, vacrel);
+               namestrcpy(&VacRel, vacstmt->vacrel);
                 VacRelName = &VacRel;
         }
         else
                 VacRelName = NULL;
  
-       /* must also copy the column list, if any, to safe storage */
-       old = MemoryContextSwitchTo(vac_context);
-       foreach(le, anal_cols)
-       {
-               char       *col = (char *) lfirst(le);
-
-               anal_cols2 = lappend(anal_cols2, pstrdup(col));
-       }
-       MemoryContextSwitchTo(old);
+       /* Build list of relations to process (note this lives in vac_context) */
+       vrl = getrels(VacRelName, stmttype);
  
         /*
          * Start up the vacuum cleaner.
-        *
-        * NOTE: since this commits the current transaction, the memory holding
-        * any passed-in parameters gets freed here.  We must have already
-        * copied pass-by-reference parameters to safe storage.  Don't make me
-        * fix this again!
          */
         vacuum_init();
  
-       /* vacuum the database */
-       vac_vacuum(VacRelName, analyze, anal_cols2);
+       /*
+        * Process each selected relation.  We are careful to process
+        * each relation in a separate transaction in order to avoid holding
+        * too many locks at one time.
+        */
+       for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
+       {
+               if (vacstmt->vacuum)
+                       vacuum_rel(cur->vrl_relid);
+               /* analyze separately so locking is minimized */
+               if (vacstmt->analyze)
+                       analyze_rel(cur->vrl_relid, vacstmt);
+       }
  
         /* clean up */
         vacuum_shutdown();
@@ -187,14 +251,14 @@ vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols)
   *             PostgresMain().
   */
  static void
-vacuum_init()
+vacuum_init(void)
  {
         /* matches the StartTransaction in PostgresMain() */
         CommitTransactionCommand();
  }
  
  static void
-vacuum_shutdown()
+vacuum_shutdown(void)
  {
         /* on entry, we are not in a transaction */
  
@@ -223,34 +287,10 @@ vacuum_shutdown()
  }
  
  /*
- *     vac_vacuum() -- vacuum the database.
- *
- *             This routine builds a list of relations to vacuum, and then calls
- *             code that vacuums them one at a time.  We are careful to vacuum each
- *             relation in a separate transaction in order to avoid holding too many
- *             locks at one time.
+ * Build a list of VRelListData nodes for each relation to be processed
   */
-static void
-vac_vacuum(NameData *VacRelP, bool analyze, List *anal_cols2)
-{
-       VRelList        vrl,
-                               cur;
-
-       /* get list of relations */
-       vrl = getrels(VacRelP);
-
-       /* vacuum each heap relation */
-       for (cur = vrl; cur != (VRelList) NULL; cur = cur->vrl_next)
-       {
-               vacuum_rel(cur->vrl_relid);
-               /* analyze separately so locking is minimized */
-               if (analyze)
-                       analyze_rel(cur->vrl_relid, anal_cols2, MESSAGE_LEVEL);
-       }
-}
-
  static VRelList
-getrels(NameData *VacRelP)
+getrels(Name VacRelP, const char *stmttype)
  {
         Relation        rel;
         TupleDesc       tupdesc;
@@ -262,12 +302,9 @@ getrels(NameData *VacRelP)
         char       *rname;
         char            rkind;
         bool            n;
-       bool            found = false;
         ScanKeyData key;
  
-       StartTransactionCommand();
-
-       if (NameStr(*VacRelP))
+       if (VacRelP)
         {
  
                 /*
@@ -287,6 +324,7 @@ getrels(NameData *VacRelP)
         }
         else
         {
+               /* find all relations listed in pg_class */
                 ScanKeyEntryInitialize(&key, 0x0, Anum_pg_class_relkind,
                                                            F_CHAREQ, CharGetDatum('r'));
         }
@@ -300,21 +338,20 @@ getrels(NameData *VacRelP)
  
         while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
         {
-               found = true;
-
                 d = heap_getattr(tuple, Anum_pg_class_relname, tupdesc, &n);
-               rname = (char *) DatumGetPointer(d);
+               rname = (char *) DatumGetName(d);
  
                 d = heap_getattr(tuple, Anum_pg_class_relkind, tupdesc, &n);
                 rkind = DatumGetChar(d);
  
                 if (rkind != RELKIND_RELATION)
                 {
-                       elog(NOTICE, "Vacuum: can not process indices, views and certain system tables");
+                       elog(NOTICE, "%s: can not process indexes, views or special system tables",
+                                stmttype);
                         continue;
                 }
  
-               /* get a relation list entry for this guy */
+               /* Make a relation list entry for this guy */
                 if (vrl == (VRelList) NULL)
                         vrl = cur = (VRelList)
                                 MemoryContextAlloc(vac_context, sizeof(VRelListData));
@@ -332,10 +369,8 @@ getrels(NameData *VacRelP)
         heap_endscan(scan);
         heap_close(rel, AccessShareLock);
  
-       if (!found)
-               elog(NOTICE, "Vacuum: table not found");
-
-       CommitTransactionCommand();
+       if (vrl == NULL)
+               elog(NOTICE, "%s: table not found", stmttype);
  
         return vrl;
  }
@@ -432,7 +467,8 @@ vacuum_rel(Oid relid)
          */
         vacrelstats = (VRelStats *) palloc(sizeof(VRelStats));
         vacrelstats->relid = relid;
-       vacrelstats->num_pages = vacrelstats->num_tuples = 0;
+       vacrelstats->num_pages = 0;
+       vacrelstats->num_tuples = 0;
         vacrelstats->hasindex = false;
  
         GetXmaxRecent(&XmaxRecent);
@@ -457,8 +493,8 @@ vacuum_rel(Oid relid)
                 vacrelstats->hasindex = true;
         else
                 vacrelstats->hasindex = false;
-#ifdef NOT_USED
  
+#ifdef NOT_USED
         /*
          * reindex in VACUUM is dangerous under WAL. ifdef out until it
          * becomes safe.
@@ -528,9 +564,8 @@ vacuum_rel(Oid relid)
         heap_close(onerel, NoLock);
  
         /* update statistics in pg_class */
-       update_relstats(vacrelstats->relid, vacrelstats->num_pages,
-                                       vacrelstats->num_tuples, vacrelstats->hasindex,
-                                       vacrelstats);
+       vac_update_relstats(vacrelstats->relid, vacrelstats->num_pages,
+                                               vacrelstats->num_tuples, vacrelstats->hasindex);
  
         /*
          * Complete the transaction and free all temporary memory used.
@@ -582,8 +617,8 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
         char       *relname;
         VacPage         vacpage,
                                 vp;
+       long            num_tuples;
         uint32          tups_vacuumed,
-                               num_tuples,
                                 nkeep,
                                 nunused,
                                 ncrash,
@@ -913,7 +948,6 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
         /* save stats in the rel list for use later */
         vacrelstats->num_tuples = num_tuples;
         vacrelstats->num_pages = nblocks;
-/*       vacrelstats->natts = attr_cnt;*/
         if (num_tuples == 0)
                 min_tlen = max_tlen = 0;
         vacrelstats->min_tlen = min_tlen;
@@ -960,7 +994,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
         }
  
         elog(MESSAGE_LEVEL, "Pages %u: Changed %u, reaped %u, Empty %u, New %u; \
-Tup %u: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
+Tup %lu: Vac %u, Keep/VTL %u/%u, Crash %u, UnUsed %u, MinLen %lu, MaxLen %lu; \
  Re-using: Free/Avail. Space %lu/%lu; EndEmpty/Avail. Pages %u/%u. %s",
                  nblocks, changed_pages, vacuum_pages->num_pages, empty_pages,
                  new_pages, num_tuples, tups_vacuumed,
@@ -2009,7 +2043,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
  {
         Buffer          buf;
         VacPage    *vacpage;
-       int                     nblocks;
+       long            nblocks;
         int                     i;
  
         nblocks = vacuum_pages->num_pages;
@@ -2044,7 +2078,7 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
         /* truncate relation if there are some empty end-pages */
         if (vacuum_pages->empty_end_pages > 0)
         {
-               elog(MESSAGE_LEVEL, "Rel %s: Pages: %u --> %u.",
+               elog(MESSAGE_LEVEL, "Rel %s: Pages: %lu --> %lu.",
                          RelationGetRelationName(onerel),
                          vacrelstats->num_pages, nblocks);
                 nblocks = smgrtruncate(DEFAULT_SMGR, onerel, nblocks);
@@ -2094,11 +2128,11 @@ vacuum_page(Relation onerel, Buffer buffer, VacPage vacpage)
   *
   */
  static void
-scan_index(Relation indrel, int num_tuples)
+scan_index(Relation indrel, long num_tuples)
  {
         RetrieveIndexResult res;
         IndexScanDesc iscan;
-       int                     nitups;
+       long            nitups;
         int                     nipages;
         struct rusage ru0;
  
@@ -2119,14 +2153,14 @@ scan_index(Relation indrel, int num_tuples)
  
         /* now update statistics in pg_class */
         nipages = RelationGetNumberOfBlocks(indrel);
-       update_relstats(RelationGetRelid(indrel), nipages, nitups, false, NULL);
+       vac_update_relstats(RelationGetRelid(indrel), nipages, nitups, false);
  
-       elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u. %s",
+       elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu. %s",
                  RelationGetRelationName(indrel), nipages, nitups,
                  show_rusage(&ru0));
  
         if (nitups != num_tuples)
-               elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+               elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
  \n\tRecreate the index.",
                          RelationGetRelationName(indrel), nitups, num_tuples);
  
@@ -2145,13 +2179,14 @@ scan_index(Relation indrel, int num_tuples)
   *             pg_class.
   */
  static void
-vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_tuples)
+vacuum_index(VacPageList vacpagelist, Relation indrel,
+                        long num_tuples, int keep_tuples)
  {
         RetrieveIndexResult res;
         IndexScanDesc iscan;
         ItemPointer heapptr;
         int                     tups_vacuumed;
-       int                     num_index_tuples;
+       long            num_index_tuples;
         int                     num_pages;
         VacPage         vp;
         struct rusage ru0;
@@ -2196,15 +2231,16 @@ vacuum_index(VacPageList vacpagelist, Relation indrel, int num_tuples, int keep_
  
         /* now update statistics in pg_class */
         num_pages = RelationGetNumberOfBlocks(indrel);
-       update_relstats(RelationGetRelid(indrel), num_pages, num_index_tuples, false, NULL);
+       vac_update_relstats(RelationGetRelid(indrel),
+                                               num_pages, num_index_tuples, false);
  
-       elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %u: Deleted %u. %s",
+       elog(MESSAGE_LEVEL, "Index %s: Pages %u; Tuples %lu: Deleted %u. %s",
                  RelationGetRelationName(indrel), num_pages,
                  num_index_tuples - keep_tuples, tups_vacuumed,
                  show_rusage(&ru0));
  
         if (num_index_tuples != num_tuples + keep_tuples)
-               elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%u) IS NOT THE SAME AS HEAP' (%u).\
+               elog(NOTICE, "Index %s: NUMBER OF INDEX' TUPLES (%lu) IS NOT THE SAME AS HEAP' (%lu).\
  \n\tRecreate the index.",
                   RelationGetRelationName(indrel), num_index_tuples, num_tuples);
  
@@ -2255,7 +2291,7 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
  }
  
  /*
- *     update_relstats() -- update statistics for one relation
+ *     vac_update_relstats() -- update statistics for one relation
   *
   *             Update the whole-relation statistics that are kept in its pg_class
   *             row.  There are additional stats that will be updated if we are
@@ -2268,13 +2304,12 @@ tid_reaped(ItemPointer itemptr, VacPageList vacpagelist)
   *             we updated these tuples in the usual way, vacuuming pg_class itself
   *             wouldn't work very well --- by the time we got done with a vacuum
   *             cycle, most of the tuples in pg_class would've been obsoleted.
- *             Updating pg_class's own statistics would be especially tricky.
   *             Of course, this only works for fixed-size never-null columns, but
   *             these are.
   */
-static void
-update_relstats(Oid relid, int num_pages, int num_tuples, bool hasindex,
-                               VRelStats *vacrelstats)
+void
+vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                                       bool hasindex)
  {
         Relation        rd;
         HeapTupleData rtup;
diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c

index 12c6f82..e0543a2 100644 (file)
--- a/src/backend/executor/nodeSort.c
+++ b/src/backend/executor/nodeSort.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.32 2001/03/22 06:16:13 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.33 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,24 +20,24 @@
  #include "utils/tuplesort.h"
  
  /* ----------------------------------------------------------------
- *             FormSortKeys(node)
+ *             ExtractSortKeys
   *
- *             Forms the structure containing information used to sort the relation.
+ *             Extract the sorting key information from the plan node.
   *
- *             Returns an array of ScanKeyData.
+ *             Returns two palloc'd arrays, one of sort operator OIDs and
+ *             one of attribute numbers.
   * ----------------------------------------------------------------
   */
-static ScanKey
-FormSortKeys(Sort *sortnode)
+static void
+ExtractSortKeys(Sort *sortnode,
+                               Oid **sortOperators,
+                               AttrNumber **attNums)
  {
-       ScanKey         sortkeys;
         List       *targetList;
-       List       *tl;
         int                     keycount;
-       Resdom     *resdom;
-       AttrNumber      resno;
-       Index           reskey;
-       Oid                     reskeyop;
+       Oid                *sortOps;
+       AttrNumber *attNos;
+       List       *tl;
  
         /*
          * get information from the node
@@ -46,36 +46,33 @@ FormSortKeys(Sort *sortnode)
         keycount = sortnode->keycount;
  
         /*
-        * first allocate space for scan keys
+        * first allocate space for results
          */
         if (keycount <= 0)
-               elog(ERROR, "FormSortKeys: keycount <= 0");
-       sortkeys = (ScanKey) palloc(keycount * sizeof(ScanKeyData));
-       MemSet((char *) sortkeys, 0, keycount * sizeof(ScanKeyData));
+               elog(ERROR, "ExtractSortKeys: keycount <= 0");
+       sortOps = (Oid *) palloc(keycount * sizeof(Oid));
+       MemSet(sortOps, 0, keycount * sizeof(Oid));
+       *sortOperators = sortOps;
+       attNos = (AttrNumber *) palloc(keycount * sizeof(AttrNumber));
+       MemSet(attNos, 0, keycount * sizeof(AttrNumber));
+       *attNums = attNos;
  
         /*
-        * form each scan key from the resdom info in the target list
+        * extract info from the resdom nodes in the target list
          */
         foreach(tl, targetList)
         {
                 TargetEntry *target = (TargetEntry *) lfirst(tl);
-
-               resdom = target->resdom;
-               resno = resdom->resno;
-               reskey = resdom->reskey;
-               reskeyop = resdom->reskeyop;
+               Resdom     *resdom = target->resdom;
+               Index           reskey = resdom->reskey;
  
                 if (reskey > 0)                 /* ignore TLEs that are not sort keys */
                 {
-                       ScanKeyEntryInitialize(&sortkeys[reskey - 1],
-                                                                  0x0,
-                                                                  resno,
-                                                                  (RegProcedure) reskeyop,
-                                                                  (Datum) 0);
+                       Assert(reskey <= keycount);
+                       sortOps[reskey - 1] = resdom->reskeyop;
+                       attNos[reskey - 1] = resdom->resno;
                 }
         }
-
-       return sortkeys;
  }
  
  /* ----------------------------------------------------------------
@@ -124,8 +121,8 @@ ExecSort(Sort *node)
         {
                 Plan       *outerNode;
                 TupleDesc       tupDesc;
-               int                     keycount;
-               ScanKey         sortkeys;
+               Oid                *sortOperators;
+               AttrNumber *attNums;
  
                 SO1_printf("ExecSort: %s\n",
                                    "sorting subplan");
@@ -145,14 +142,17 @@ ExecSort(Sort *node)
  
                 outerNode = outerPlan((Plan *) node);
                 tupDesc = ExecGetTupType(outerNode);
-               keycount = node->keycount;
-               sortkeys = (ScanKey) sortstate->sort_Keys;
  
-               tuplesortstate = tuplesort_begin_heap(tupDesc, keycount, sortkeys,
-                                                                                         true /* randomAccess */ );
+               ExtractSortKeys(node, &sortOperators, &attNums);
  
+               tuplesortstate = tuplesort_begin_heap(tupDesc, node->keycount,
+                                                                                         sortOperators, attNums,
+                                                                                         true /* randomAccess */ );
                 sortstate->tuplesortstate = (void *) tuplesortstate;
  
+               pfree(sortOperators);
+               pfree(attNums);
+
                 /*
                  * Scan the subplan and feed all the tuples to tuplesort.
                  */
@@ -230,7 +230,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
          */
         sortstate = makeNode(SortState);
         sortstate->sort_Done = false;
-       sortstate->sort_Keys = NULL;
         sortstate->tuplesortstate = NULL;
  
         node->sortstate = sortstate;
@@ -259,11 +258,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent)
         ExecInitNode(outerPlan, estate, (Plan *) node);
  
         /*
-        * initialize sortstate information
-        */
-       sortstate->sort_Keys = FormSortKeys(node);
-
-       /*
          * initialize tuple type.  no need to initialize projection info
          * because this node doesn't do projections.
          */
@@ -321,9 +315,6 @@ ExecEndSort(Sort *node)
                 tuplesort_end((Tuplesortstate *) sortstate->tuplesortstate);
         sortstate->tuplesortstate = NULL;
  
-       if (sortstate->sort_Keys != NULL)
-               pfree(sortstate->sort_Keys);
-
         pfree(sortstate);
         node->sortstate = NULL;
  
diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c

index ad50630..ee5a803 100644 (file)
--- a/src/backend/nodes/copyfuncs.c
+++ b/src/backend/nodes/copyfuncs.c
@@ -15,7 +15,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.140 2001/03/22 06:16:14 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.141 2001/05/07 00:43:18 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1378,8 +1378,8 @@ _copyRestrictInfo(RestrictInfo *from)
         newnode->left_pathkey = NIL;
         newnode->right_pathkey = NIL;
         newnode->hashjoinoperator = from->hashjoinoperator;
-       newnode->left_dispersion = from->left_dispersion;
-       newnode->right_dispersion = from->right_dispersion;
+       newnode->left_bucketsize = from->left_bucketsize;
+       newnode->right_bucketsize = from->right_bucketsize;
  
         return newnode;
  }
@@ -2209,11 +2209,12 @@ _copyVacuumStmt(VacuumStmt *from)
  {
         VacuumStmt *newnode = makeNode(VacuumStmt);
  
-       newnode->verbose = from->verbose;
+       newnode->vacuum = from->vacuum;
         newnode->analyze = from->analyze;
+       newnode->verbose = from->verbose;
         if (from->vacrel)
                 newnode->vacrel = pstrdup(from->vacrel);
-       Node_Copy(from, newnode, va_spec);
+       Node_Copy(from, newnode, va_cols);
  
         return newnode;
  }
diff --git a/src/backend/nodes/equalfuncs.c b/src/backend/nodes/equalfuncs.c

index 06ee63b..284a534 100644 (file)
--- a/src/backend/nodes/equalfuncs.c
+++ b/src/backend/nodes/equalfuncs.c
@@ -20,7 +20,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.88 2001/03/22 03:59:31 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.89 2001/05/07 00:43:19 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -516,7 +516,7 @@ _equalRestrictInfo(RestrictInfo *a, RestrictInfo *b)
                 return false;
  
         /*
-        * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
+        * ignore eval_cost, left/right_pathkey, and left/right_bucketsize,
          * since they may not be set yet, and should be derivable from the
          * clause anyway
          */
@@ -1113,13 +1113,15 @@ _equalDropdbStmt(DropdbStmt *a, DropdbStmt *b)
  static bool
  _equalVacuumStmt(VacuumStmt *a, VacuumStmt *b)
  {
-       if (a->verbose != b->verbose)
+       if (a->vacuum != b->vacuum)
                 return false;
         if (a->analyze != b->analyze)
                 return false;
+       if (a->verbose != b->verbose)
+               return false;
         if (!equalstr(a->vacrel, b->vacrel))
                 return false;
-       if (!equal(a->va_spec, b->va_spec))
+       if (!equal(a->va_cols, b->va_cols))
                 return false;
  
         return true;
diff --git a/src/backend/nodes/readfuncs.c b/src/backend/nodes/readfuncs.c

index 9a071e7..4c0c1b0 100644 (file)
--- a/src/backend/nodes/readfuncs.c
+++ b/src/backend/nodes/readfuncs.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.107 2001/03/22 03:59:32 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.108 2001/05/07 00:43:19 tgl Exp $
   *
   * NOTES
   *       Most of the read functions for plan nodes are tested. (In fact, they
@@ -1874,11 +1874,11 @@ _readRestrictInfo(void)
  
         /* eval_cost is not part of saved representation; compute on first use */
         local_node->eval_cost = -1;
-       /* ditto for cached pathkeys and dispersion */
+       /* ditto for cached pathkeys and bucketsize */
         local_node->left_pathkey = NIL;
         local_node->right_pathkey = NIL;
-       local_node->left_dispersion = -1;
-       local_node->right_dispersion = -1;
+       local_node->left_bucketsize = -1;
+       local_node->right_bucketsize = -1;
  
         return local_node;
  }
diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c

index c52af72..bdfbbb1 100644 (file)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -41,7 +41,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.70 2001/04/25 22:04:37 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.71 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -50,11 +50,15 @@
  
  #include <math.h>
  
+#include "catalog/pg_statistic.h"
  #include "executor/nodeHash.h"
  #include "miscadmin.h"
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
+#include "optimizer/pathnode.h"
+#include "parser/parsetree.h"
  #include "utils/lsyscache.h"
+#include "utils/syscache.h"
  
  
  /*
@@ -573,7 +577,7 @@ cost_mergejoin(Path *path,
   * 'outer_path' is the path for the outer relation
   * 'inner_path' is the path for the inner relation
   * 'restrictlist' are the RestrictInfo nodes to be applied at the join
- * 'innerdispersion' is an estimate of the dispersion statistic
+ * 'innerbucketsize' is an estimate of the bucketsize statistic
   *                             for the inner hash key.
   */
  void
@@ -581,7 +585,7 @@ cost_hashjoin(Path *path,
                           Path *outer_path,
                           Path *inner_path,
                           List *restrictlist,
-                         Selectivity innerdispersion)
+                         Selectivity innerbucketsize)
  {
         Cost            startup_cost = 0;
         Cost            run_cost = 0;
@@ -607,22 +611,20 @@ cost_hashjoin(Path *path,
  
         /*
          * The number of tuple comparisons needed is the number of outer
-        * tuples times the typical hash bucket size.  nodeHash.c tries for
-        * average bucket loading of NTUP_PER_BUCKET, but that goal will be
-        * reached only if data values are uniformly distributed among the
-        * buckets.  To be conservative, we scale up the target bucket size by
-        * the number of inner rows times inner dispersion, giving an estimate
-        * of the typical number of duplicates of each value. We then charge
-        * one cpu_operator_cost per tuple comparison.
+        * tuples times the typical number of tuples in a hash bucket,
+        * which is the inner relation size times its bucketsize fraction.
+        * We charge one cpu_operator_cost per tuple comparison.
          */
         run_cost += cpu_operator_cost * outer_path->parent->rows *
-               NTUP_PER_BUCKET * ceil(inner_path->parent->rows * innerdispersion);
+               ceil(inner_path->parent->rows * innerbucketsize);
  
         /*
          * Estimate the number of tuples that get through the hashing filter
          * as one per tuple in the two source relations.  This could be a
          * drastic underestimate if there are many equal-keyed tuples in
-        * either relation, but we have no good way of estimating that...
+        * either relation, but we have no simple way of estimating that;
+        * and since this is only a second-order parameter, it's probably
+        * not worth expending a lot of effort on the estimate.
          */
         ntuples = outer_path->parent->rows + inner_path->parent->rows;
  
@@ -651,7 +653,7 @@ cost_hashjoin(Path *path,
         /*
          * Bias against putting larger relation on inside.      We don't want an
          * absolute prohibition, though, since larger relation might have
-        * better dispersion --- and we can't trust the size estimates
+        * better bucketsize --- and we can't trust the size estimates
          * unreservedly, anyway.  Instead, inflate the startup cost by the
          * square root of the size ratio.  (Why square root?  No real good
          * reason, but it seems reasonable...)
@@ -663,6 +665,171 @@ cost_hashjoin(Path *path,
         path->total_cost = startup_cost + run_cost;
  }
  
+/*
+ * Estimate hash bucketsize fraction (ie, number of entries in a bucket
+ * divided by total tuples in relation) if the specified Var is used
+ * as a hash key.
+ *
+ * This statistic is used by cost_hashjoin.  We split out the calculation
+ * because it's useful to cache the result for re-use across multiple path
+ * cost calculations.
+ *
+ * XXX This is really pretty bogus since we're effectively assuming that the
+ * distribution of hash keys will be the same after applying restriction
+ * clauses as it was in the underlying relation.  However, we are not nearly
+ * smart enough to figure out how the restrict clauses might change the
+ * distribution, so this will have to do for now.
+ *
+ * The executor tries for average bucket loading of NTUP_PER_BUCKET by setting
+ * number of buckets equal to ntuples / NTUP_PER_BUCKET, which would yield
+ * a bucketsize fraction of NTUP_PER_BUCKET / ntuples.  But that goal will
+ * be reached only if the data values are uniformly distributed among the
+ * buckets, which requires (a) at least ntuples / NTUP_PER_BUCKET distinct
+ * data values, and (b) a not-too-skewed data distribution.  Otherwise the
+ * buckets will be nonuniformly occupied.  If the other relation in the join
+ * has a similar distribution, the most-loaded buckets are exactly those
+ * that will be probed most often.  Therefore, the "average" bucket size for
+ * costing purposes should really be taken as something close to the "worst
+ * case" bucket size.  We try to estimate this by first scaling up if there
+ * are too few distinct data values, and then scaling up again by the
+ * ratio of the most common value's frequency to the average frequency.
+ *
+ * If no statistics are available, use a default estimate of 0.1.  This will
+ * discourage use of a hash rather strongly if the inner relation is large,
+ * which is what we want.  We do not want to hash unless we know that the
+ * inner rel is well-dispersed (or the alternatives seem much worse).
+ */
+Selectivity
+estimate_hash_bucketsize(Query *root, Var *var)
+{
+       Oid                     relid;
+       RelOptInfo *rel;
+       HeapTuple       tuple;
+       Form_pg_statistic stats;
+       double          estfract,
+                               ndistinct,
+                               needdistinct,
+                               mcvfreq,
+                               avgfreq;
+       float4     *numbers;
+       int                     nnumbers;
+
+       /*
+        * Lookup info about var's relation and attribute;
+        * if none available, return default estimate.
+        */
+       if (!IsA(var, Var))
+               return 0.1;
+
+       relid = getrelid(var->varno, root->rtable);
+       if (relid == InvalidOid)
+               return 0.1;
+
+       rel = get_base_rel(root, var->varno);
+
+       if (rel->tuples <= 0.0 || rel->rows <= 0.0)
+               return 0.1;                             /* ensure we can divide below */
+
+       tuple = SearchSysCache(STATRELATT,
+                                                  ObjectIdGetDatum(relid),
+                                                  Int16GetDatum(var->varattno),
+                                                  0, 0);
+       if (!HeapTupleIsValid(tuple))
+       {
+               /*
+                * Perhaps the Var is a system attribute; if so, it will have no
+                * entry in pg_statistic, but we may be able to guess something
+                * about its distribution anyway.
+                */
+               switch (var->varattno)
+               {
+                       case ObjectIdAttributeNumber:
+                       case SelfItemPointerAttributeNumber:
+                               /* these are unique, so buckets should be well-distributed */
+                               return (double) NTUP_PER_BUCKET / rel->rows;
+                       case TableOidAttributeNumber:
+                               /* hashing this is a terrible idea... */
+                               return 1.0;
+               }
+               return 0.1;
+       }
+       stats = (Form_pg_statistic) GETSTRUCT(tuple);
+
+       /*
+        * Obtain number of distinct data values in raw relation.
+        */
+       ndistinct = stats->stadistinct;
+       if (ndistinct < 0.0)
+               ndistinct = -ndistinct * rel->tuples;
+
+       /*
+        * Adjust ndistinct to account for restriction clauses.  Observe we are
+        * assuming that the data distribution is affected uniformly by the
+        * restriction clauses!
+        *
+        * XXX Possibly better way, but much more expensive: multiply by
+        * selectivity of rel's restriction clauses that mention the target Var.
+        */
+       ndistinct *= rel->rows / rel->tuples;
+
+       /*
+        * Discourage use of hash join if there seem not to be very many distinct
+        * data values.  The threshold here is somewhat arbitrary, as is the
+        * fraction used to "discourage" the choice.
+        */
+       if (ndistinct < 50.0)
+       {
+               ReleaseSysCache(tuple);
+               return 0.5;
+       }
+
+       /*
+        * Form initial estimate of bucketsize fraction.  Here we use rel->rows,
+        * ie the number of rows after applying restriction clauses, because
+        * that's what the fraction will eventually be multiplied by in
+        * cost_heapjoin.
+        */
+       estfract = (double) NTUP_PER_BUCKET / rel->rows;
+
+       /*
+        * Adjust estimated bucketsize if too few distinct values to fill
+        * all the buckets.
+        */
+       needdistinct = rel->rows / (double) NTUP_PER_BUCKET;
+       if (ndistinct < needdistinct)
+               estfract *= needdistinct / ndistinct;
+
+       /*
+        * Look up the frequency of the most common value, if available.
+        */
+       mcvfreq = 0.0;
+
+       if (get_attstatsslot(tuple, var->vartype, var->vartypmod,
+                                                STATISTIC_KIND_MCV, InvalidOid,
+                                                NULL, NULL, &numbers, &nnumbers))
+       {
+               /*
+                * The first MCV stat is for the most common value.
+                */
+               if (nnumbers > 0)
+                       mcvfreq = numbers[0];
+               free_attstatsslot(var->vartype, NULL, 0,
+                                                 numbers, nnumbers);
+       }
+
+       /*
+        * Adjust estimated bucketsize upward to account for skewed distribution.
+        */
+       avgfreq = (1.0 - stats->stanullfrac) / ndistinct;
+
+       if (avgfreq > 0.0 && mcvfreq > avgfreq)
+               estfract *= mcvfreq / avgfreq;
+
+       ReleaseSysCache(tuple);
+
+       return (Selectivity) estfract;
+}
+
  
  /*
   * cost_qual_eval
diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c

index d41336d..cd7cabd 100644 (file)
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,15 +8,15 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.63 2001/04/15 00:48:17 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.64 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
+#include "postgres.h"
+
  #include <sys/types.h>
  #include <math.h>
  
-#include "postgres.h"
-
  #include "optimizer/clauses.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
@@ -45,7 +45,6 @@ static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
                                          List *restrictlist, JoinType jointype);
  static Path *best_innerjoin(List *join_paths, List *outer_relid,
                            JoinType jointype);
-static Selectivity estimate_dispersion(Query *root, Var *var);
  static List *select_mergejoin_clauses(RelOptInfo *joinrel,
                                                  RelOptInfo *outerrel,
                                                  RelOptInfo *innerrel,
@@ -722,7 +721,7 @@ hash_inner_and_outer(Query *root,
                 Expr       *clause;
                 Var                *left,
                                    *right;
-               Selectivity innerdispersion;
+               Selectivity innerbucketsize;
                 List       *hashclauses;
  
                 if (restrictinfo->hashjoinoperator == InvalidOid)
@@ -742,34 +741,34 @@ hash_inner_and_outer(Query *root,
  
                 /*
                  * Check if clause is usable with these sub-rels, find inner side,
-                * estimate dispersion of inner var for costing purposes.
+                * estimate bucketsize of inner var for costing purposes.
                  *
                  * Since we tend to visit the same clauses over and over when
-                * planning a large query, we cache the dispersion estimates in
+                * planning a large query, we cache the bucketsize estimates in
                  * the RestrictInfo node to avoid repeated lookups of statistics.
                  */
                 if (intMember(left->varno, outerrelids) &&
                         intMember(right->varno, innerrelids))
                 {
                         /* righthand side is inner */
-                       innerdispersion = restrictinfo->right_dispersion;
-                       if (innerdispersion < 0)
+                       innerbucketsize = restrictinfo->right_bucketsize;
+                       if (innerbucketsize < 0)
                         {
                                 /* not cached yet */
-                               innerdispersion = estimate_dispersion(root, right);
-                               restrictinfo->right_dispersion = innerdispersion;
+                               innerbucketsize = estimate_hash_bucketsize(root, right);
+                               restrictinfo->right_bucketsize = innerbucketsize;
                         }
                 }
                 else if (intMember(left->varno, innerrelids) &&
                                  intMember(right->varno, outerrelids))
                 {
                         /* lefthand side is inner */
-                       innerdispersion = restrictinfo->left_dispersion;
-                       if (innerdispersion < 0)
+                       innerbucketsize = restrictinfo->left_bucketsize;
+                       if (innerbucketsize < 0)
                         {
                                 /* not cached yet */
-                               innerdispersion = estimate_dispersion(root, left);
-                               restrictinfo->left_dispersion = innerdispersion;
+                               innerbucketsize = estimate_hash_bucketsize(root, left);
+                               restrictinfo->left_bucketsize = innerbucketsize;
                         }
                 }
                 else
@@ -790,7 +789,7 @@ hash_inner_and_outer(Query *root,
                                                                           innerrel->cheapest_total_path,
                                                                           restrictlist,
                                                                           hashclauses,
-                                                                         innerdispersion));
+                                                                         innerbucketsize));
                 if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
                         add_path(joinrel, (Path *)
                                          create_hashjoin_path(joinrel,
@@ -799,7 +798,7 @@ hash_inner_and_outer(Query *root,
                                                                                   innerrel->cheapest_total_path,
                                                                                   restrictlist,
                                                                                   hashclauses,
-                                                                                 innerdispersion));
+                                                                                 innerbucketsize));
         }
  }
  
@@ -867,31 +866,6 @@ best_innerjoin(List *join_paths, Relids outer_relids, JoinType jointype)
  }
  
  /*
- * Estimate dispersion of the specified Var
- *
- * We use a default of 0.1 if we can't figure out anything better.
- * This will typically discourage use of a hash rather strongly,
- * if the inner relation is large.     We do not want to hash unless
- * we know that the inner rel is well-dispersed (or the alternatives
- * seem much worse).
- */
-static Selectivity
-estimate_dispersion(Query *root, Var *var)
-{
-       Oid                     relid;
-
-       if (!IsA(var, Var))
-               return 0.1;
-
-       relid = getrelid(var->varno, root->rtable);
-
-       if (relid == InvalidOid)
-               return 0.1;
-
-       return (Selectivity) get_attdispersion(relid, var->varattno, 0.1);
-}
-
-/*
   * select_mergejoin_clauses
   *       Select mergejoin clauses that are usable for a particular join.
   *       Returns a list of RestrictInfo nodes for those clauses.
diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c

index 8c3b002..2d264c4 100644 (file)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,14 +10,14 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.104 2001/03/22 03:59:36 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.105 2001/05/07 00:43:20 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include <sys/types.h>
-
  #include "postgres.h"
  
+#include <sys/types.h>
+
  #include "catalog/pg_index.h"
  #include "nodes/makefuncs.h"
  #include "nodes/nodeFuncs.h"
@@ -1484,9 +1484,9 @@ make_sort_from_pathkeys(List *tlist, Plan *lefttree, List *pathkeys)
                  */
                 if (resdom->reskey == 0)
                 {
-                       /* OK, mark it as a sort key and set the sort operator regproc */
+                       /* OK, mark it as a sort key and set the sort operator */
                         resdom->reskey = ++numsortkeys;
-                       resdom->reskeyop = get_opcode(pathkey->sortop);
+                       resdom->reskeyop = pathkey->sortop;
                 }
         }
  
diff --git a/src/backend/optimizer/plan/initsplan.c b/src/backend/optimizer/plan/initsplan.c

index 7c3e15a..5d67e02 100644 (file)
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -8,13 +8,14 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.59 2001/04/16 19:44:10 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.60 2001/05/07 00:43:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
+#include "postgres.h"
+
  #include <sys/types.h>
  
-#include "postgres.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_type.h"
  #include "nodes/makefuncs.h"
@@ -348,8 +349,8 @@ distribute_qual_to_rels(Query *root, Node *clause,
         restrictinfo->left_pathkey = NIL;       /* not computable yet */
         restrictinfo->right_pathkey = NIL;
         restrictinfo->hashjoinoperator = InvalidOid;
-       restrictinfo->left_dispersion = -1; /* not computed until needed */
-       restrictinfo->right_dispersion = -1;
+       restrictinfo->left_bucketsize = -1; /* not computed until needed */
+       restrictinfo->right_bucketsize = -1;
  
         /*
          * Retrieve all relids and vars contained within the clause.
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c

index b2ab460..0aba480 100644 (file)
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.105 2001/04/30 19:24:47 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.106 2001/05/07 00:43:21 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1367,7 +1367,7 @@ make_groupplan(List *group_tlist,
                         {
                                 /* OK, insert the ordering info needed by the executor. */
                                 resdom->reskey = ++keyno;
-                               resdom->reskeyop = get_opcode(grpcl->sortop);
+                               resdom->reskeyop = grpcl->sortop;
                         }
                 }
  
@@ -1412,7 +1412,7 @@ make_sortplan(List *tlist, Plan *plannode, List *sortcls)
                 {
                         /* OK, insert the ordering info needed by the executor. */
                         resdom->reskey = ++keyno;
-                       resdom->reskeyop = get_opcode(sortcl->sortop);
+                       resdom->reskeyop = sortcl->sortop;
                 }
         }
  
diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c

index 0b17346..ede4159 100644 (file)
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -14,7 +14,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.62 2001/03/27 18:02:19 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.63 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -682,8 +682,8 @@ adjust_inherited_attrs_mutator(Node *node,
                 newinfo->eval_cost = -1;                /* reset this too */
                 newinfo->left_pathkey = NIL;    /* and these */
                 newinfo->right_pathkey = NIL;
-               newinfo->left_dispersion = -1;
-               newinfo->right_dispersion = -1;
+               newinfo->left_bucketsize = -1;
+               newinfo->right_bucketsize = -1;
  
                 return (Node *) newinfo;
         }
diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c

index cfba3ee..407c132 100644 (file)
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,14 +8,14 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.71 2001/03/22 03:59:39 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.72 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-#include <math.h>
-
  #include "postgres.h"
  
+#include <math.h>
+
  #include "nodes/plannodes.h"
  #include "optimizer/cost.h"
  #include "optimizer/pathnode.h"
@@ -559,7 +559,7 @@ create_mergejoin_path(RelOptInfo *joinrel,
   * 'restrict_clauses' are the RestrictInfo nodes to apply at the join
   * 'hashclauses' is a list of the hash join clause (always a 1-element list)
   *             (this should be a subset of the restrict_clauses list)
- * 'innerdispersion' is an estimate of the dispersion of the inner hash key
+ * 'innerbucketsize' is an estimate of the bucketsize of the inner hash key
   *
   */
  HashPath   *
@@ -569,7 +569,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                                          Path *inner_path,
                                          List *restrict_clauses,
                                          List *hashclauses,
-                                        Selectivity innerdispersion)
+                                        Selectivity innerbucketsize)
  {
         HashPath   *pathnode = makeNode(HashPath);
  
@@ -587,7 +587,7 @@ create_hashjoin_path(RelOptInfo *joinrel,
                                   outer_path,
                                   inner_path,
                                   restrict_clauses,
-                                 innerdispersion);
+                                 innerbucketsize);
  
         return pathnode;
  }
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c

index 4f711df..ee35235 100644 (file)
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -9,11 +9,10 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.64 2001/03/22 03:59:40 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.65 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
-
  #include "postgres.h"
  
  #include <math.h>
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c

index 4687a55..76cc095 100644 (file)
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- *     $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.183 2001/03/22 06:16:15 momjian Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/parser/analyze.c,v 1.184 2001/05/07 00:43:22 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -2660,7 +2660,7 @@ transformForUpdate(Query *qry, List *forUpdate)
                 /* just the named tables */
                 foreach(l, forUpdate)
                 {
-                       char       *relname = lfirst(l);
+                       char       *relname = strVal(lfirst(l));
  
                         i = 0;
                         foreach(rt, qry->rtable)
diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y

index bed0ce2..40c379a 100644 (file)
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -11,7 +11,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.222 2001/05/01 01:36:10 thomas Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/gram.y,v 2.223 2001/05/07 00:43:23 tgl Exp $
   *
   * HISTORY
   *       AUTHOR                        DATE                    MAJOR EVENT
@@ -104,7 +104,6 @@ static void doNegateFloat(Value *v);
         char                            *str;
         bool                            boolean;
         JoinType                        jtype;
-       InhOption                       inhOpt;
         List                            *list;
         Node                            *node;
         Value                           *value;
@@ -130,6 +129,7 @@ static void doNegateFloat(Value *v);
  
  %type <node>   stmt,
                 AlterGroupStmt, AlterSchemaStmt, AlterTableStmt, AlterUserStmt,
+               AnalyzeStmt,
                 ClosePortalStmt, ClusterStmt, CommentStmt, ConstraintsSetStmt,
                 CopyStmt, CreateAsStmt, CreateGroupStmt, CreatePLangStmt,
                 CreateSchemaStmt, CreateSeqStmt, CreateStmt, CreateTrigStmt,
@@ -147,7 +147,7 @@ static void doNegateFloat(Value *v);
  %type <node>   select_no_parens, select_with_parens, select_clause,
                                 simple_select
  
-%type <node>    alter_column_action
+%type <node>    alter_column_default
  %type <ival>    drop_behavior
  
  %type <list>   createdb_opt_list, createdb_opt_item
@@ -185,7 +185,7 @@ static void doNegateFloat(Value *v);
                 OptTableElementList, OptInherit, definition, opt_distinct,
                 opt_with, func_args, func_args_list, func_as,
                 oper_argtypes, RuleActionList, RuleActionMulti,
-               opt_column_list, columnList, opt_va_list, va_list,
+               opt_column_list, columnList, opt_name_list,
                 sort_clause, sortby_list, index_params, index_list, name_list,
                 from_clause, from_list, opt_array_bounds,
                 expr_list, attrs, target_list, update_target_list,
@@ -210,9 +210,7 @@ static void doNegateFloat(Value *v);
  %type <node>   substr_from, substr_for
  
  %type <boolean>        opt_binary, opt_using, opt_instead, opt_cursor
-%type <boolean>        opt_with_copy, index_opt_unique, opt_verbose, opt_analyze
-
-%type <inhOpt> opt_inh_star, opt_only
+%type <boolean>        opt_with_copy, index_opt_unique, opt_verbose, analyze_keyword
  
  %type <ival>   copy_dirn, direction, reindex_type, drop_type,
                 opt_column, event, comment_type, comment_cl,
@@ -350,7 +348,8 @@ static void doNegateFloat(Value *v);
                 NEW, NOCREATEDB, NOCREATEUSER, NONE, NOTHING, NOTIFY, NOTNULL,
                 OFFSET, OIDS, OPERATOR, OWNER, PASSWORD, PROCEDURAL,
                 REINDEX, RENAME, RESET, RETURNS, ROW, RULE,
-               SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT, STDIN, STDOUT, SYSID,
+               SEQUENCE, SERIAL, SETOF, SHARE, SHOW, START, STATEMENT,
+               STATISTICS, STDIN, STDOUT, SYSID,
                 TEMP, TEMPLATE, TOAST, TRUNCATE, TRUSTED, 
                 UNLISTEN, UNTIL, VACUUM, VALID, VERBOSE, VERSION
  
@@ -470,6 +469,7 @@ stmt :      AlterSchemaStmt
                 | CreatedbStmt
                 | DropdbStmt
                 | VacuumStmt
+               | AnalyzeStmt
                 | VariableSetStmt
                 | VariableShowStmt
                 | VariableResetStmt
@@ -938,57 +938,68 @@ CheckPointStmt: CHECKPOINT
   *****************************************************************************/
  
  AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
-               ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+               ALTER TABLE relation_expr ADD opt_column columnDef
                                 {
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'A';
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->def = $7;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->def = $6;
                                         $$ = (Node *)n;
                                 }
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
-               | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+               | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                                 {
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'T';
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->name = $7;
-                                       n->def = $8;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->name = $6;
+                                       n->def = $7;
                                         $$ = (Node *)n;
                                 }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-               | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+               | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
+                               {
+                                       AlterTableStmt *n = makeNode(AlterTableStmt);
+                                       n->subtype = 'S';
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->name = $6;
+                                       n->def = (Node *) makeInteger($9);
+                                       $$ = (Node *)n;
+                               }
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+               | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                                 {
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'D';
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->name = $7;
-                                       n->behavior = $8;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->name = $6;
+                                       n->behavior = $7;
                                         $$ = (Node *)n;
                                 }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-               | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+               | ALTER TABLE relation_expr ADD TableConstraint
                                 {
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'C';
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->def = $6;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->def = $5;
                                         $$ = (Node *)n;
                                 }
-/* ALTER TABLE <name> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
-               | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT <name> {RESTRICT|CASCADE} */
+               | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                                 {
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'X';
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->name = $7;
-                                       n->behavior = $8;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->name = $6;
+                                       n->behavior = $7;
                                         $$ = (Node *)n;
                                 }
  /* ALTER TABLE <name> CREATE TOAST TABLE */
@@ -997,6 +1008,7 @@ AlterTableStmt:
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'E';
                                         n->relname = $3;
+                                       n->inhOpt = INH_NO;
                                         $$ = (Node *)n;
                                 }
  /* ALTER TABLE <name> OWNER TO UserId */
@@ -1005,12 +1017,13 @@ AlterTableStmt:
                                         AlterTableStmt *n = makeNode(AlterTableStmt);
                                         n->subtype = 'U';
                                         n->relname = $3;
+                                       n->inhOpt = INH_NO;
                                         n->name = $6;
                                         $$ = (Node *)n;
                                 }
                 ;
  
-alter_column_action:
+alter_column_default:
                 SET DEFAULT a_expr
                         {
                                 /* Treat SET DEFAULT NULL the same as DROP DEFAULT */
@@ -1478,10 +1491,6 @@ key_reference:  NO ACTION                                { $$ = FKCONSTR_ON_KEY_NOACTION; }
                 | SET DEFAULT                                   { $$ = FKCONSTR_ON_KEY_SETDEFAULT; }
                 ;
  
-opt_only: ONLY                                 { $$ = INH_NO; }
-        | /*EMPTY*/                                                            { $$ = INH_DEFAULT; } 
-               ;
-
  OptInherit:  INHERITS '(' relation_name_list ')'       { $$ = $3; }
                 | /*EMPTY*/                                                                     { $$ = NIL; }
                 ;
@@ -2598,14 +2607,13 @@ opt_force:      FORCE                                                                   {  $$ = TRUE; }
   *
   *****************************************************************************/
  
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                                 {
                                         RenameStmt *n = makeNode(RenameStmt);
-                                       n->relname = $3;
-                                       n->inhOpt = $4;
-                                       n->column = $7;
-                                       n->newname = $9;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->column = $6;
+                                       n->newname = $8;
                                         $$ = (Node *)n;
                                 }
                 ;
@@ -2994,49 +3002,71 @@ ClusterStmt:  CLUSTER index_name ON relation_name
   *
   *             QUERY:
   *                             vacuum
+ *                             analyze
   *
   *****************************************************************************/
  
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
                                 {
                                         VacuumStmt *n = makeNode(VacuumStmt);
+                                       n->vacuum = true;
+                                       n->analyze = false;
                                         n->verbose = $2;
-                                       n->analyze = $3;
                                         n->vacrel = NULL;
-                                       n->va_spec = NIL;
+                                       n->va_cols = NIL;
                                         $$ = (Node *)n;
                                 }
-               | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+               | VACUUM opt_verbose relation_name
                                 {
                                         VacuumStmt *n = makeNode(VacuumStmt);
+                                       n->vacuum = true;
+                                       n->analyze = false;
                                         n->verbose = $2;
-                                       n->analyze = $3;
-                                       n->vacrel = $4;
-                                       n->va_spec = $5;
-                                       if ( $5 != NIL && !$4 )
-                                               elog(ERROR,"VACUUM syntax error at or near \"(\""
-                                                       "\n\tRelation name must be specified");
+                                       n->vacrel = $3;
+                                       n->va_cols = NIL;
+                                       $$ = (Node *)n;
+                               }
+               | VACUUM opt_verbose AnalyzeStmt
+                               {
+                                       VacuumStmt *n = (VacuumStmt *) $3;
+                                       n->vacuum = true;
+                                       n->verbose |= $2;
                                         $$ = (Node *)n;
                                 }
                 ;
  
-opt_verbose:  VERBOSE                                                  { $$ = TRUE; }
-               | /*EMPTY*/                                                             { $$ = FALSE; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+                               {
+                                       VacuumStmt *n = makeNode(VacuumStmt);
+                                       n->vacuum = false;
+                                       n->analyze = true;
+                                       n->verbose = $2;
+                                       n->vacrel = NULL;
+                                       n->va_cols = NIL;
+                                       $$ = (Node *)n;
+                               }
+               | analyze_keyword opt_verbose relation_name opt_name_list
+                               {
+                                       VacuumStmt *n = makeNode(VacuumStmt);
+                                       n->vacuum = false;
+                                       n->analyze = true;
+                                       n->verbose = $2;
+                                       n->vacrel = $3;
+                                       n->va_cols = $4;
+                                       $$ = (Node *)n;
+                               }
                 ;
  
-opt_analyze:  ANALYZE                                                  { $$ = TRUE; }
+analyze_keyword:  ANALYZE                                              { $$ = TRUE; }
                 |         ANALYSE /* British */                         { $$ = TRUE; }
-               | /*EMPTY*/                                                             { $$ = FALSE; }
                 ;
  
-opt_va_list:  '(' va_list ')'                                  { $$ = $2; }
-               | /*EMPTY*/                                                             { $$ = NIL; }
+opt_verbose:  VERBOSE                                                  { $$ = TRUE; }
+               | /*EMPTY*/                                                             { $$ = FALSE; }
                 ;
  
-va_list:  name
-                               { $$ = makeList1($1); }
-               | va_list ',' name
-                               { $$ = lappend($1, $3); }
+opt_name_list:  '(' name_list ')'                              { $$ = $2; }
+               | /*EMPTY*/                                                             { $$ = NIL; }
                 ;
  
  
@@ -3160,12 +3190,12 @@ columnElem:  ColId opt_indirection
   *
   *****************************************************************************/
  
-DeleteStmt:  DELETE FROM opt_only relation_name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                                 {
                                         DeleteStmt *n = makeNode(DeleteStmt);
-                                       n->inhOpt = $3;
-                                       n->relname = $4;
-                                       n->whereClause = $5;
+                                       n->relname = $3->relname;
+                                       n->inhOpt = $3->inhOpt;
+                                       n->whereClause = $4;
                                         $$ = (Node *)n;
                                 }
                 ;
@@ -3202,17 +3232,17 @@ opt_lmode:      SHARE                           { $$ = TRUE; }
   *
   *****************************************************************************/
  
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
                           SET update_target_list
                           from_clause
                           where_clause
                                 {
                                         UpdateStmt *n = makeNode(UpdateStmt);
-                                       n->inhOpt = $2;
-                                       n->relname = $3;
-                                       n->targetList = $5;
-                                       n->fromClause = $6;
-                                       n->whereClause = $7;
+                                       n->relname = $2->relname;
+                                       n->inhOpt = $2->inhOpt;
+                                       n->targetList = $4;
+                                       n->fromClause = $5;
+                                       n->whereClause = $6;
                                         $$ = (Node *)n;
                                 }
                 ;
@@ -3545,10 +3575,6 @@ select_offset_value:     Iconst
   *     ...however, recursive addattr and rename supported.  make special
   *     cases for these.
   */
-opt_inh_star:  '*'                                                             { $$ = INH_YES; }
-               | /*EMPTY*/                                                             { $$ = INH_DEFAULT; }
-               ;
-
  relation_name_list:  name_list;
  
  name_list:  name
@@ -3576,7 +3602,7 @@ opt_for_update_clause:    for_update_clause               { $$ = $1; }
                 | /* EMPTY */                                                   { $$ = NULL; }
                 ;
  
-update_list:  OF va_list                                               { $$ = $2; }
+update_list:  OF name_list                                             { $$ = $2; }
                 | /* EMPTY */                                                   { $$ = makeList1(NULL); }
                 ;
  
@@ -5525,6 +5551,7 @@ TokenId:  ABSOLUTE                                                { $$ = "absolute"; }
                 | SHARE                                                 { $$ = "share"; }
                 | START                                                 { $$ = "start"; }
                 | STATEMENT                                             { $$ = "statement"; }
+               | STATISTICS                                    { $$ = "statistics"; }
                 | STDIN                                                 { $$ = "stdin"; }
                 | STDOUT                                                { $$ = "stdout"; }
                 | SYSID                                                 { $$ = "sysid"; }
diff --git a/src/backend/parser/keywords.c b/src/backend/parser/keywords.c

index 402dbfd..8ab19f8 100644 (file)
--- a/src/backend/parser/keywords.c
+++ b/src/backend/parser/keywords.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.90 2001/03/22 03:59:40 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/keywords.c,v 1.91 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
         {"some", SOME},
         {"start", START},
         {"statement", STATEMENT},
+       {"statistics", STATISTICS},
         {"stdin", STDIN},
         {"stdout", STDOUT},
         {"substring", SUBSTRING},
diff --git a/src/backend/parser/parse_relation.c b/src/backend/parser/parse_relation.c

index f5324cb..e1d4984 100644 (file)
--- a/src/backend/parser/parse_relation.c
+++ b/src/backend/parser/parse_relation.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.54 2001/04/18 17:04:24 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/parser/parse_relation.c,v 1.55 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -75,7 +75,7 @@ static struct
         }
  };
  
-#define SPECIALS ((int) (sizeof(special_attr)/sizeof(special_attr[0])))
+#define SPECIALS ((int) lengthof(special_attr))
  
  
  /*
@@ -670,7 +670,7 @@ isForUpdate(ParseState *pstate, char *relname)
  
                                 foreach(l, pstate->p_forUpdate)
                                 {
-                                       char       *rname = lfirst(l);
+                                       char       *rname = strVal(lfirst(l));
  
                                         if (strcmp(relname, rname) == 0)
                                                 return true;
@@ -1020,20 +1020,6 @@ attnameIsSet(Relation rd, char *name)
  
  #endif
  
-#ifdef NOT_USED
-/*
- *     This should only be used if the relation is already
- *     heap_open()'ed.  Use the cache version
- *     for access to non-opened relations.
- */
-int
-attnumAttNelems(Relation rd, int attid)
-{
-       return rd->rd_att->attrs[attid - 1]->attnelems;
-}
-
-#endif
-
  /* given attribute id, return type of that attribute */
  /*
   *     This should only be used if the relation is already
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c

index ae6cd20..b616f7e 100644 (file)
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -10,7 +10,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.109 2001/03/22 06:16:17 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/tcop/utility.c,v 1.110 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -427,13 +427,19 @@ ProcessUtility(Node *parsetree,
                                                                                 interpretInhOption(stmt->inhOpt),
                                                                                         (ColumnDef *) stmt->def);
                                                 break;
-                                       case 'T':       /* ALTER COLUMN */
-                                               AlterTableAlterColumn(stmt->relname,
+                                       case 'T':       /* ALTER COLUMN DEFAULT */
+                                               AlterTableAlterColumnDefault(stmt->relname,
                                                                                 interpretInhOption(stmt->inhOpt),
-                                                                                         stmt->name,
-                                                                                         stmt->def);
+                                                                                                        stmt->name,
+                                                                                                        stmt->def);
                                                 break;
-                                       case 'D':       /* ALTER DROP */
+                                       case 'S':       /* ALTER COLUMN STATISTICS */
+                                               AlterTableAlterColumnStatistics(stmt->relname,
+                                                                               interpretInhOption(stmt->inhOpt),
+                                                                                                               stmt->name,
+                                                                                                               stmt->def);
+                                               break;
+                                       case 'D':       /* DROP COLUMN */
                                                 AlterTableDropColumn(stmt->relname,
                                                                                 interpretInhOption(stmt->inhOpt),
                                                                                          stmt->name,
@@ -703,12 +709,13 @@ ProcessUtility(Node *parsetree,
                         break;
  
                 case T_VacuumStmt:
-                       set_ps_display(commandTag = "VACUUM");
+                       if (((VacuumStmt *) parsetree)->vacuum)
+                               commandTag = "VACUUM";
+                       else
+                               commandTag = "ANALYZE";
+                       set_ps_display(commandTag);
  
-                       vacuum(((VacuumStmt *) parsetree)->vacrel,
-                                  ((VacuumStmt *) parsetree)->verbose,
-                                  ((VacuumStmt *) parsetree)->analyze,
-                                  ((VacuumStmt *) parsetree)->va_spec);
+                       vacuum((VacuumStmt *) parsetree);
                         break;
  
                 case T_ExplainStmt:
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c

index 1fe0afb..41ba82d 100644 (file)
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -15,7 +15,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.87 2001/03/23 04:49:54 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.88 2001/05/07 00:43:23 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -57,9 +57,6 @@
  /* default selectivity estimate for pattern-match operators such as LIKE */
  #define DEFAULT_MATCH_SEL      0.01
  
-/* "fudge factor" for estimating frequency of not-most-common values */
-#define NOT_MOST_COMMON_RATIO  0.1
-
  static bool convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
                                   Datum lobound, Datum hibound, Oid boundstypid,
                                   double *scaledlobound, double *scaledhibound);
@@ -75,17 +72,9 @@ static double convert_one_string_to_scalar(unsigned char *value,
  static unsigned char *convert_string_datum(Datum value, Oid typid);
  static double convert_timevalue_to_scalar(Datum value, Oid typid);
  static void getattproperties(Oid relid, AttrNumber attnum,
-                                Oid *typid,
-                                int *typlen,
-                                bool *typbyval,
-                                int32 *typmod);
-static bool getattstatistics(Oid relid, AttrNumber attnum,
-                                Oid typid, int32 typmod,
-                                double *nullfrac,
-                                double *commonfrac,
-                                Datum *commonval,
-                                Datum *loval,
-                                Datum *hival);
+                                                        Oid *typid, int32 *typmod);
+static double get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                                                 Form_pg_statistic stats);
  static Selectivity prefix_selectivity(char *prefix,
                                    Oid relid,
                                    AttrNumber attno,
@@ -115,134 +104,173 @@ eqsel(PG_FUNCTION_ARGS)
         AttrNumber      attno = PG_GETARG_INT16(2);
         Datum           value = PG_GETARG_DATUM(3);
         int32           flag = PG_GETARG_INT32(4);
-       float8          result;
-
-       if (NONVALUE(attno) || NONVALUE(relid))
-               result = DEFAULT_EQ_SEL;
-       else
+       Oid                     typid;
+       int32           typmod;
+       HeapTuple       statsTuple;
+       Datum      *values;
+       int                     nvalues;
+       float4     *numbers;
+       int                     nnumbers;
+       double          selec;
+
+       if (NONVALUE(relid) || NONVALUE(attno))
+               PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
+
+       /* get info about the attribute */
+       getattproperties(relid, attno, &typid, &typmod);
+
+       /* get stats for the attribute, if available */
+       statsTuple = SearchSysCache(STATRELATT,
+                                                               ObjectIdGetDatum(relid),
+                                                               Int16GetDatum(attno),
+                                                               0, 0);
+       if (HeapTupleIsValid(statsTuple))
         {
-               Oid                     typid;
-               int                     typlen;
-               bool            typbyval;
-               int32           typmod;
-               double          nullfrac;
-               double          commonfrac;
-               Datum           commonval;
-               double          selec;
-
-               /* get info about the attribute */
-               getattproperties(relid, attno,
-                                                &typid, &typlen, &typbyval, &typmod);
-
-               /* get stats for the attribute, if available */
-               if (getattstatistics(relid, attno, typid, typmod,
-                                                        &nullfrac, &commonfrac, &commonval,
-                                                        NULL, NULL))
-               {
-                       if (flag & SEL_CONSTANT)
-                       {
+               Form_pg_statistic stats;
  
-                               /*
-                                * Is the constant "=" to the column's most common value?
-                                * (Although the operator may not really be "=", we will
-                                * assume that seeing whether it returns TRUE for the most
-                                * common value is useful information. If you don't like
-                                * it, maybe you shouldn't be using eqsel for your
-                                * operator...)
-                                */
-                               RegProcedure eqproc = get_opcode(opid);
-                               bool            mostcommon;
+               stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
  
-                               if (eqproc == (RegProcedure) NULL)
-                                       elog(ERROR, "eqsel: no procedure for operator %u",
-                                                opid);
+               if (flag & SEL_CONSTANT)
+               {
+                       bool    match = false;
+                       int             i;
  
-                               /* be careful to apply operator right way 'round */
-                               if (flag & SEL_RIGHT)
-                                       mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                                                                                          commonval,
-                                                                                                                          value));
-                               else
-                                       mostcommon = DatumGetBool(OidFunctionCall2(eqproc,
-                                                                                                                          value,
-                                                                                                                        commonval));
+                       /*
+                        * Is the constant "=" to any of the column's most common
+                        * values?  (Although the given operator may not really be
+                        * "=", we will assume that seeing whether it returns TRUE
+                        * is an appropriate test.  If you don't like this, maybe you
+                        * shouldn't be using eqsel for your operator...)
+                        */
+                       if (get_attstatsslot(statsTuple, typid, typmod,
+                                                                STATISTIC_KIND_MCV, InvalidOid,
+                                                                &values, &nvalues,
+                                                                &numbers, &nnumbers))
+                       {
+                               FmgrInfo        eqproc;
  
-                               if (mostcommon)
-                               {
+                               fmgr_info(get_opcode(opid), &eqproc);
  
-                                       /*
-                                        * Constant is "=" to the most common value.  We know
-                                        * selectivity exactly (or as exactly as VACUUM could
-                                        * calculate it, anyway).
-                                        */
-                                       selec = commonfrac;
-                               }
-                               else
+                               for (i = 0; i < nvalues; i++)
                                 {
-
-                                       /*
-                                        * Comparison is against a constant that is neither
-                                        * the most common value nor null.      Its selectivity
-                                        * cannot be more than this:
-                                        */
-                                       selec = 1.0 - commonfrac - nullfrac;
-                                       if (selec > commonfrac)
-                                               selec = commonfrac;
-
-                                       /*
-                                        * and in fact it's probably less, so we should apply
-                                        * a fudge factor.      The only case where we don't is
-                                        * for a boolean column, where indeed we have
-                                        * estimated the less-common value's frequency
-                                        * exactly!
-                                        */
-                                       if (typid != BOOLOID)
-                                               selec *= NOT_MOST_COMMON_RATIO;
+                                       /* be careful to apply operator right way 'round */
+                                       if (flag & SEL_RIGHT)
+                                               match = DatumGetBool(FunctionCall2(&eqproc,
+                                                                                                                  values[i],
+                                                                                                                  value));
+                                       else
+                                               match = DatumGetBool(FunctionCall2(&eqproc,
+                                                                                                                  value,
+                                                                                                                  values[i]));
+                                       if (match)
+                                               break;
                                 }
                         }
                         else
                         {
+                               /* no most-common-value info available */
+                               values = NULL;
+                               numbers = NULL;
+                               i = nvalues = nnumbers = 0;
+                       }
  
+                       if (match)
+                       {
+                               /*
+                                * Constant is "=" to this common value.  We know
+                                * selectivity exactly (or as exactly as VACUUM
+                                * could calculate it, anyway).
+                                */
+                               selec = numbers[i];
+                       }
+                       else
+                       {
                                 /*
-                                * Search is for a value that we do not know a priori, but
-                                * we will assume it is not NULL.  Selectivity cannot be
-                                * more than this:
+                                * Comparison is against a constant that is neither
+                                * NULL nor any of the common values.  Its selectivity
+                                * cannot be more than this:
                                  */
-                               selec = 1.0 - nullfrac;
-                               if (selec > commonfrac)
-                                       selec = commonfrac;
+                               double  sumcommon = 0.0;
+                               double  otherdistinct;
  
+                               for (i = 0; i < nnumbers; i++)
+                                       sumcommon += numbers[i];
+                               selec = 1.0 - sumcommon - stats->stanullfrac;
+                               /*
+                                * and in fact it's probably a good deal less.
+                                * We approximate that all the not-common values
+                                * share this remaining fraction equally, so we
+                                * divide by the number of other distinct values.
+                                */
+                               otherdistinct = get_att_numdistinct(relid, attno,
+                                                                                                       typid, stats)
+                                       - nnumbers;
+                               if (otherdistinct > 1)
+                                       selec /= otherdistinct;
                                 /*
-                                * and in fact it's probably less, so apply a fudge
-                                * factor.
+                                * Another cross-check: selectivity shouldn't be
+                                * estimated as more than the least common
+                                * "most common value".
                                  */
-                               selec *= NOT_MOST_COMMON_RATIO;
+                               if (nnumbers > 0 && selec > numbers[nnumbers-1])
+                                       selec = numbers[nnumbers-1];
                         }
  
-                       /* result should be in range, but make sure... */
-                       if (selec < 0.0)
-                               selec = 0.0;
-                       else if (selec > 1.0)
-                               selec = 1.0;
-
-                       if (!typbyval)
-                               pfree(DatumGetPointer(commonval));
+                       free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
                 }
                 else
                 {
+                       double          ndistinct;
  
                         /*
-                        * No VACUUM ANALYZE stats available, so make a guess using
-                        * the dispersion stat (if we have that, which is unlikely for
-                        * a normal attribute; but for a system attribute we may be
-                        * able to estimate it).
+                        * Search is for a value that we do not know a priori, but
+                        * we will assume it is not NULL.  Estimate the selectivity
+                        * as non-null fraction divided by number of distinct values,
+                        * so that we get a result averaged over all possible values
+                        * whether common or uncommon.  (Essentially, we are assuming
+                        * that the not-yet-known comparison value is equally likely
+                        * to be any of the possible values, regardless of their
+                        * frequency in the table.  Is that a good idea?)
+                        */
+                       selec = 1.0 - stats->stanullfrac;
+                       ndistinct = get_att_numdistinct(relid, attno, typid, stats);
+                       if (ndistinct > 1)
+                               selec /= ndistinct;
+                       /*
+                        * Cross-check: selectivity should never be
+                        * estimated as more than the most common value's.
                          */
-                       selec = get_attdispersion(relid, attno, 0.01);
+                       if (get_attstatsslot(statsTuple, typid, typmod,
+                                                                STATISTIC_KIND_MCV, InvalidOid,
+                                                                NULL, NULL,
+                                                                &numbers, &nnumbers))
+                       {
+                               if (nnumbers > 0 && selec > numbers[0])
+                                       selec = numbers[0];
+                               free_attstatsslot(typid, NULL, 0, numbers, nnumbers);
+                       }
                 }
  
-               result = (float8) selec;
+               ReleaseSysCache(statsTuple);
         }
-       PG_RETURN_FLOAT8(result);
+       else
+       {
+               /*
+                * No VACUUM ANALYZE stats available, so make a guess using
+                * estimated number of distinct values and assuming they are
+                * equally common.  (The guess is unlikely to be very good,
+                * but we do know a few special cases.)
+                */
+               selec = 1.0 / get_att_numdistinct(relid, attno, typid, NULL);
+       }
+
+       /* result should be in range, but make sure... */
+       if (selec < 0.0)
+               selec = 0.0;
+       else if (selec > 1.0)
+               selec = 1.0;
+
+       PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -301,117 +329,263 @@ scalarltsel(PG_FUNCTION_ARGS)
         AttrNumber      attno = PG_GETARG_INT16(2);
         Datum           value = PG_GETARG_DATUM(3);
         int32           flag = PG_GETARG_INT32(4);
-       float8          result;
+       bool            isgt;
+       HeapTuple       oprTuple;
+       HeapTuple       statsTuple;
+       Form_pg_statistic stats;
+       Oid                     contype;
+       FmgrInfo        opproc;
+       Oid                     typid;
+       int32           typmod;
+       Datum      *values;
+       int                     nvalues;
+       float4     *numbers;
+       int                     nnumbers;
+       double          mcv_selec,
+                               hist_selec,
+                               sumcommon;
+       double          selec;
+       int                     i;
+
+       if (NONVALUE(relid) || NONVALUE(attno))
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+
+       /* Can't do anything useful if no constant to compare against, either */
+       if (!(flag & SEL_CONSTANT))
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
  
-       if (!(flag & SEL_CONSTANT) || NONVALUE(attno) || NONVALUE(relid))
-               result = DEFAULT_INEQ_SEL;
+       /*
+        * Force the constant to be on the right to simplify later logic.
+        * This means that we may be dealing with either "<" or ">" cases.
+        */
+       if (flag & SEL_RIGHT)
+       {
+               /* we have x < const */
+               isgt = false;
+       }
         else
         {
-               HeapTuple       oprtuple;
-               Oid                     ltype,
-                                       rtype,
-                                       contype;
-               Oid                     typid;
-               int                     typlen;
-               bool            typbyval;
-               int32           typmod;
-               Datum           hival,
-                                       loval;
-               double          val,
-                                       high,
-                                       low,
-                                       numerator,
-                                       denominator;
-
-               /*
-                * Get left and right datatypes of the operator so we know what
-                * type the constant is.
-                */
-               oprtuple = SearchSysCache(OPEROID,
-                                                                 ObjectIdGetDatum(opid),
-                                                                 0, 0, 0);
-               if (!HeapTupleIsValid(oprtuple))
-                       elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
-               ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-               rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-               contype = (flag & SEL_RIGHT) ? rtype : ltype;
-               ReleaseSysCache(oprtuple);
-
-               /* Now get info and stats about the attribute */
-               getattproperties(relid, attno,
-                                                &typid, &typlen, &typbyval, &typmod);
-
-               if (!getattstatistics(relid, attno, typid, typmod,
-                                                         NULL, NULL, NULL,
-                                                         &loval, &hival))
+               /* we have const < x, commute to make x > const */
+               opid = get_commutator(opid);
+               if (!opid)
                 {
-                       /* no stats available, so default result */
+                       /* Use default selectivity (should we raise an error instead?) */
                         PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
                 }
+               isgt = true;
+       }
  
-               /* Convert the values to a uniform comparison scale. */
-               if (!convert_to_scalar(value, contype, &val,
-                                                          loval, hival, typid,
-                                                          &low, &high))
-               {
+       /*
+        * The constant might not be the same datatype as the column;
+        * look at the operator's input types to find out what it is.
+        * Also set up to be able to call the operator's execution proc.
+        */
+       oprTuple = SearchSysCache(OPEROID,
+                                                         ObjectIdGetDatum(opid),
+                                                         0, 0, 0);
+       if (!HeapTupleIsValid(oprTuple))
+               elog(ERROR, "scalarltsel: no tuple for operator %u", opid);
+       contype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+       fmgr_info(((Form_pg_operator) GETSTRUCT(oprTuple))->oprcode, &opproc);
+       ReleaseSysCache(oprTuple);
+
+       /* Now get info and stats about the attribute */
+       getattproperties(relid, attno, &typid, &typmod);
+
+       statsTuple = SearchSysCache(STATRELATT,
+                                                               ObjectIdGetDatum(relid),
+                                                               Int16GetDatum(attno),
+                                                               0, 0);
+       if (!HeapTupleIsValid(statsTuple))
+       {
+               /* no stats available, so default result */
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
+       }
+       stats = (Form_pg_statistic) GETSTRUCT(statsTuple);
  
-                       /*
-                        * Ideally we'd produce an error here, on the grounds that the
-                        * given operator shouldn't have scalarltsel registered as its
-                        * selectivity func unless we can deal with its operand types.
-                        * But currently, all manner of stuff is invoking scalarltsel,
-                        * so give a default estimate until that can be fixed.
-                        */
-                       if (!typbyval)
-                       {
-                               pfree(DatumGetPointer(hival));
-                               pfree(DatumGetPointer(loval));
-                       }
-                       PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
-               }
+       /*
+        * If we have most-common-values info, add up the fractions of the
+        * MCV entries that satisfy MCV OP CONST.  These fractions contribute
+        * directly to the result selectivity.  Also add up the total fraction
+        * represented by MCV entries.
+        */
+       mcv_selec = 0.0;
+       sumcommon = 0.0;
  
-               /* release temp storage if needed */
-               if (!typbyval)
+       if (get_attstatsslot(statsTuple, typid, typmod,
+                                                STATISTIC_KIND_MCV, InvalidOid,
+                                                &values, &nvalues,
+                                                &numbers, &nnumbers))
+       {
+               for (i = 0; i < nvalues; i++)
                 {
-                       pfree(DatumGetPointer(hival));
-                       pfree(DatumGetPointer(loval));
+                       if (DatumGetBool(FunctionCall2(&opproc,
+                                                                                  values[i],
+                                                                                  value)))
+                               mcv_selec += numbers[i];
+                       sumcommon += numbers[i];
                 }
+               free_attstatsslot(typid, values, nvalues, numbers, nnumbers);
+       }
+
+       /*
+        * If there is a histogram, determine which bin the constant falls in,
+        * and compute the resulting contribution to selectivity.
+        *
+        * Someday, VACUUM might store more than one histogram per rel/att,
+        * corresponding to more than one possible sort ordering defined for
+        * the column type.  However, to make that work we will need to figure
+        * out which staop to search for --- it's not necessarily the one we
+        * have at hand!  (For example, we might have a '<=' operator rather
+        * than the '<' operator that will appear in staop.)  For now, assume
+        * that whatever appears in pg_statistic is sorted the same way our
+        * operator sorts.
+        */
+       hist_selec = 0.0;
  
-               if (high <= low)
+       if (get_attstatsslot(statsTuple, typid, typmod,
+                                                STATISTIC_KIND_HISTOGRAM, InvalidOid,
+                                                &values, &nvalues,
+                                                NULL, NULL))
+       {
+               if (nvalues > 1)
                 {
+                       double  histfrac;
+                       bool    ltcmp;
+
+                       ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                                                          values[0],
+                                                                                          value));
+                       if (isgt)
+                               ltcmp = !ltcmp;
+                       if (!ltcmp)
+                       {
+                               /* Constant is below lower histogram boundary. */
+                               histfrac = 0.0;
+                       }
+                       else
+                       {
+                               /*
+                                * Scan to find proper location.  This could be made faster
+                                * by using a binary-search method, but it's probably not
+                                * worth the trouble for typical histogram sizes.
+                                */
+                               for (i = 1; i < nvalues; i++)
+                               {
+                                       ltcmp = DatumGetBool(FunctionCall2(&opproc,
+                                                                                                          values[i],
+                                                                                                          value));
+                                       if (isgt)
+                                               ltcmp = !ltcmp;
+                                       if (!ltcmp)
+                                               break;
+                               }
+                               if (i >= nvalues)
+                               {
+                                       /* Constant is above upper histogram boundary. */
+                                       histfrac = 1.0;
+                               }
+                               else
+                               {
+                                       double          val,
+                                                               high,
+                                                               low;
+                                       double          binfrac;
  
+                                       /*
+                                        * We have values[i-1] < constant < values[i].
+                                        *
+                                        * Convert the constant and the two nearest bin boundary
+                                        * values to a uniform comparison scale, and do a linear
+                                        * interpolation within this bin.
+                                        */
+                                       if (convert_to_scalar(value, contype, &val,
+                                                                                 values[i-1], values[i], typid,
+                                                                                 &low, &high))
+                                       {
+                                               if (high <= low)
+                                               {
+                                                       /* cope if bin boundaries appear identical */
+                                                       binfrac = 0.5;
+                                               }
+                                               else if (val <= low)
+                                                       binfrac = 0.0;
+                                               else if (val >= high)
+                                                       binfrac = 1.0;
+                                               else
+                                                       binfrac = (val - low) / (high - low);
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * Ideally we'd produce an error here, on the grounds
+                                                * that the given operator shouldn't have scalarltsel
+                                                * registered as its selectivity func unless we can
+                                                * deal with its operand types.  But currently, all
+                                                * manner of stuff is invoking scalarltsel, so give a
+                                                * default estimate until that can be fixed.
+                                                */
+                                               binfrac = 0.5;
+                                       }
+                                       /*
+                                        * Now, compute the overall selectivity across the values
+                                        * represented by the histogram.  We have i-1 full bins
+                                        * and binfrac partial bin below the constant.
+                                        */
+                                       histfrac = (double) (i-1) + binfrac;
+                                       histfrac /= (double) (nvalues - 1);
+                               }
+                       }
                         /*
-                        * If we trusted the stats fully, we could return a small or
-                        * large selec depending on which side of the single data
-                        * point the constant is on.  But it seems better to assume
-                        * that the stats are wrong and return a default...
+                        * Now histfrac = fraction of histogram entries below the constant.
+                        *
+                        * Account for "<" vs ">"
                          */
-                       result = DEFAULT_INEQ_SEL;
-               }
-               else if (val < low || val > high)
-               {
-
+                       hist_selec = isgt ? (1.0 - histfrac) : histfrac;
                         /*
-                        * If given value is outside the statistical range, return a
-                        * small or large value; but not 0.0/1.0 since there is a
-                        * chance the stats are out of date.
+                        * The histogram boundaries are only approximate to begin
+                        * with, and may well be out of date anyway.  Therefore,
+                        * don't believe extremely small or large selectivity
+                        * estimates.
                          */
-                       if (flag & SEL_RIGHT)
-                               result = (val < low) ? 0.001 : 0.999;
-                       else
-                               result = (val < low) ? 0.999 : 0.001;
-               }
-               else
-               {
-                       denominator = high - low;
-                       if (flag & SEL_RIGHT)
-                               numerator = val - low;
-                       else
-                               numerator = high - val;
-                       result = numerator / denominator;
+                       if (hist_selec < 0.001)
+                               hist_selec = 0.001;
+                       else if (hist_selec > 0.999)
+                               hist_selec = 0.999;
                 }
+
+               free_attstatsslot(typid, values, nvalues, NULL, 0);
         }
-       PG_RETURN_FLOAT8(result);
+
+       /*
+        * Now merge the results from the MCV and histogram calculations,
+        * realizing that the histogram covers only the non-null values that
+        * are not listed in MCV.
+        */
+       selec = 1.0 - stats->stanullfrac - sumcommon;
+
+       if (hist_selec > 0.0)
+               selec *= hist_selec;
+       else
+       {
+               /*
+                * If no histogram but there are values not accounted for by MCV,
+                * arbitrarily assume half of them will match.
+                */
+               selec *= 0.5;
+       }
+
+       selec += mcv_selec;
+
+       ReleaseSysCache(statsTuple);
+
+       /* result should be in range, but make sure... */
+       if (selec < 0.0)
+               selec = 0.0;
+       else if (selec > 1.0)
+               selec = 1.0;
+
+       PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -428,34 +602,25 @@ scalargtsel(PG_FUNCTION_ARGS)
         Datum           value = PG_GETARG_DATUM(3);
         int32           flag = PG_GETARG_INT32(4);
         Oid                     ltopid;
-       float8          result;
  
         /*
-        * Compute selectivity of "<", then invert --- but only if we were
-        * able to produce a non-default estimate.      Note that we get the
-        * negator which strictly speaking means we are looking at "<=" for
-        * ">" or "<" for ">=".  We assume this won't matter.
+        * Commute so that we have a "<" or "<=" operator, then apply
+        * scalarltsel.
          */
-       ltopid = get_negator(opid);
-       if (ltopid)
-       {
-               result = DatumGetFloat8(DirectFunctionCall5(scalarltsel,
-                                                                                               ObjectIdGetDatum(ltopid),
-                                                                                                ObjectIdGetDatum(relid),
-                                                                                                       Int16GetDatum(attno),
-                                                                                                       value,
-                                                                                                       Int32GetDatum(flag)));
-       }
-       else
+       ltopid = get_commutator(opid);
+       if (!ltopid)
         {
                 /* Use default selectivity (should we raise an error instead?) */
-               result = DEFAULT_INEQ_SEL;
+               PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
         }
  
-       if (result != DEFAULT_INEQ_SEL)
-               result = 1.0 - result;
-
-       PG_RETURN_FLOAT8(result);
+       flag ^= SEL_RIGHT;
+       return DirectFunctionCall5(scalarltsel,
+                                                          ObjectIdGetDatum(ltopid),
+                                                          ObjectIdGetDatum(relid),
+                                                          Int16GetDatum(attno),
+                                                          value,
+                                                          Int32GetDatum(flag));
  }
  
  /*
@@ -476,7 +641,7 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
                 result = DEFAULT_MATCH_SEL;
         else
         {
-               HeapTuple       oprtuple;
+               HeapTuple       oprTuple;
                 Oid                     ltype,
                                         rtype;
                 char       *patt;
@@ -488,14 +653,14 @@ patternsel(PG_FUNCTION_ARGS, Pattern_Type ptype)
                  * Get left and right datatypes of the operator so we know what
                  * type the attribute is.
                  */
-               oprtuple = SearchSysCache(OPEROID,
+               oprTuple = SearchSysCache(OPEROID,
                                                                   ObjectIdGetDatum(opid),
                                                                   0, 0, 0);
-               if (!HeapTupleIsValid(oprtuple))
+               if (!HeapTupleIsValid(oprTuple))
                         elog(ERROR, "patternsel: no tuple for operator %u", opid);
-               ltype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprleft;
-               rtype = ((Form_pg_operator) GETSTRUCT(oprtuple))->oprright;
-               ReleaseSysCache(oprtuple);
+               ltype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprleft;
+               rtype = ((Form_pg_operator) GETSTRUCT(oprTuple))->oprright;
+               ReleaseSysCache(oprTuple);
  
                 /* the right-hand const is type text for all supported operators */
                 Assert(rtype == TEXTOID);
@@ -659,42 +824,88 @@ eqjoinsel(PG_FUNCTION_ARGS)
         AttrNumber      attno1 = PG_GETARG_INT16(2);
         Oid                     relid2 = PG_GETARG_OID(3);
         AttrNumber      attno2 = PG_GETARG_INT16(4);
-       float8          result;
-       float8          num1,
-                               num2,
-                               min;
         bool            unknown1 = NONVALUE(relid1) || NONVALUE(attno1);
         bool            unknown2 = NONVALUE(relid2) || NONVALUE(attno2);
+       double          selec;
  
         if (unknown1 && unknown2)
-               result = DEFAULT_EQ_SEL;
+               selec = DEFAULT_EQ_SEL;
         else
         {
-               num1 = unknown1 ? 1.0 : get_attdispersion(relid1, attno1, 0.01);
-               num2 = unknown2 ? 1.0 : get_attdispersion(relid2, attno2, 0.01);
+               Oid                     typid1;
+               Oid                     typid2;
+               int32           typmod1;
+               int32           typmod2;
+               HeapTuple       statsTuple1 = NULL;
+               HeapTuple       statsTuple2 = NULL;
+               Form_pg_statistic stats1 = NULL;
+               Form_pg_statistic stats2 = NULL;
+               double          nd1,
+                                       nd2;
+
+               if (unknown1)
+               {
+                       nd1 = 100.0;
+               }
+               else
+               {
+                       /* get info about the attribute */
+                       getattproperties(relid1, attno1, &typid1, &typmod1);
+
+                       /* get stats for the attribute, if available */
+                       statsTuple1 = SearchSysCache(STATRELATT,
+                                                                                ObjectIdGetDatum(relid1),
+                                                                                Int16GetDatum(attno1),
+                                                                                0, 0);
+                       if (HeapTupleIsValid(statsTuple1))
+                               stats1 = (Form_pg_statistic) GETSTRUCT(statsTuple1);
+
+                       nd1 = get_att_numdistinct(relid1, attno1, typid1, stats1);
+               }
+
+               if (unknown2)
+               {
+                       nd2 = 100.0;
+               }
+               else
+               {
+                       /* get info about the attribute */
+                       getattproperties(relid2, attno2, &typid2, &typmod2);
+
+                       /* get stats for the attribute, if available */
+                       statsTuple2 = SearchSysCache(STATRELATT,
+                                                                                ObjectIdGetDatum(relid2),
+                                                                                Int16GetDatum(attno2),
+                                                                                0, 0);
+                       if (HeapTupleIsValid(statsTuple2))
+                               stats2 = (Form_pg_statistic) GETSTRUCT(statsTuple2);
+
+                       nd2 = get_att_numdistinct(relid2, attno2, typid2, stats2);
+               }
  
                 /*
-                * The join selectivity cannot be more than num2, since each tuple
-                * in table 1 could match no more than num2 fraction of tuples in
-                * table 2 (and that's only if the table-1 tuple matches the most
-                * common value in table 2, so probably it's less).  By the same
-                * reasoning it is not more than num1. The min is therefore an
-                * upper bound.
+                * Estimate the join selectivity as 1 / sqrt(nd1*nd2)
+                * (can we produce any theory for this)?
                  *
-                * If we know the dispersion of only one side, use it; the reasoning
-                * above still works.
+                * XXX possibility to do better: if both attributes have histograms
+                * then we could determine the exact join selectivity between the
+                * MCV sets, and only have to assume the join behavior of the non-MCV
+                * values.  This could be a big win when the MCVs cover a large part
+                * of the population.
                  *
-                * XXX can we make a better estimate here?      Using the nullfrac
-                * statistic might be helpful, for example.  Assuming the operator
-                * is strict (does not succeed for null inputs) then the
-                * selectivity couldn't be more than (1-nullfrac1)*(1-nullfrac2),
-                * which might be usefully small if there are many nulls.  How
-                * about applying the operator to the most common values?
+                * XXX what about nulls?
                  */
-               min = (num1 < num2) ? num1 : num2;
-               result = min;
+               selec = 1.0 / sqrt(nd1 * nd2);
+               if (selec > 1.0)
+                       selec = 1.0;
+
+               if (HeapTupleIsValid(statsTuple1))
+                       ReleaseSysCache(statsTuple1);
+               if (HeapTupleIsValid(statsTuple2))
+                       ReleaseSysCache(statsTuple2);
+
         }
-       PG_RETURN_FLOAT8(result);
+       PG_RETURN_FLOAT8((float8) selec);
  }
  
  /*
@@ -829,7 +1040,8 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
   *       Returns "true" if successful.
   *
   * All numeric datatypes are simply converted to their equivalent
- * "double" values.
+ * "double" values.  XXX what about NUMERIC values that are outside
+ * the range of "double"?
   *
   * String datatypes are converted by convert_string_to_scalar(),
   * which is explained below.  The reason why this routine deals with
@@ -917,7 +1129,7 @@ convert_numeric_to_scalar(Datum value, Oid typid)
  {
         switch (typid)
         {
-                       case BOOLOID:
+               case BOOLOID:
                         return (double) DatumGetBool(value);
                 case INT2OID:
                         return (double) DatumGetInt16(value);
@@ -963,6 +1175,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
   * three strings before computing the scaled values.  This allows us to
   * "zoom in" when we encounter a narrow data range.  An example is a phone
   * number database where all the values begin with the same area code.
+ * (Actually, the bounds will be adjacent histogram-bin-boundary values,
+ * so this is more likely to happen than you might think.)
   */
  static void
  convert_string_to_scalar(unsigned char *value,
@@ -1208,11 +1422,11 @@ convert_timevalue_to_scalar(Datum value, Oid typid)
  /*
   * getattproperties
   *       Retrieve pg_attribute properties for an attribute,
- *       including type OID, type len, type byval flag, typmod.
+ *       including type OID and typmod.
   */
  static void
  getattproperties(Oid relid, AttrNumber attnum,
-                                Oid *typid, int *typlen, bool *typbyval, int32 *typmod)
+                                Oid *typid, int32 *typmod)
  {
         HeapTuple       atp;
         Form_pg_attribute att_tup;
@@ -1227,164 +1441,87 @@ getattproperties(Oid relid, AttrNumber attnum,
         att_tup = (Form_pg_attribute) GETSTRUCT(atp);
  
         *typid = att_tup->atttypid;
-       *typlen = att_tup->attlen;
-       *typbyval = att_tup->attbyval;
         *typmod = att_tup->atttypmod;
  
         ReleaseSysCache(atp);
  }
  
  /*
- * getattstatistics
- *       Retrieve the pg_statistic data for an attribute.
- *       Returns 'false' if no stats are available.
+ * get_att_numdistinct
   *
- * Inputs:
- * 'relid' and 'attnum' are the relation and attribute number.
- * 'typid' and 'typmod' are the type and typmod of the column,
- * which the caller must already have looked up.
+ *       Estimate the number of distinct values of an attribute.
   *
- * Outputs:
- * The available stats are nullfrac, commonfrac, commonval, loval, hival.
- * The caller need not retrieve all five --- pass NULL pointers for the
- * unwanted values.
+ * relid, attnum: identify the attribute to examine.
+ * typid: type of attribute.
+ * stats: pg_statistic tuple for attribute, or NULL if not available.
   *
- * commonval, loval, hival are returned as Datums holding the internal
- * representation of the values.  (Note that these should be pfree'd
- * after use if the data type is not by-value.)
+ * XXX possible future improvement: look to see if there is a unique
+ * index on the attribute.  If so, we can estimate ndistinct = ntuples.
+ * This should probably override any info from pg_statistic.
   */
-static bool
-getattstatistics(Oid relid,
-                                AttrNumber attnum,
-                                Oid typid,
-                                int32 typmod,
-                                double *nullfrac,
-                                double *commonfrac,
-                                Datum *commonval,
-                                Datum *loval,
-                                Datum *hival)
+static double
+get_att_numdistinct(Oid relid, AttrNumber attnum, Oid typid,
+                                       Form_pg_statistic stats)
  {
-       HeapTuple       tuple;
-       HeapTuple       typeTuple;
-       FmgrInfo        inputproc;
-       Oid                     typelem;
-       bool            isnull;
+       HeapTuple       reltup;
+       double          ntuples;
  
         /*
-        * We assume that there will only be one entry in pg_statistic for the
-        * given rel/att, so we search WITHOUT considering the staop column.
-        * Someday, VACUUM might store more than one entry per rel/att,
-        * corresponding to more than one possible sort ordering defined for
-        * the column type.  However, to make that work we will need to figure
-        * out which staop to search for --- it's not necessarily the one we
-        * have at hand!  (For example, we might have a '>' operator rather
-        * than the '<' operator that will appear in staop.)
+        * Special-case boolean columns: presumably, two distinct values.
+        *
+        * Are there any other cases we should wire in special estimates for?
          */
-       tuple = SearchSysCache(STATRELID,
-                                                  ObjectIdGetDatum(relid),
-                                                  Int16GetDatum((int16) attnum),
-                                                  0, 0);
-       if (!HeapTupleIsValid(tuple))
-       {
-               /* no such stats entry */
-               return false;
-       }
+       if (typid == BOOLOID)
+               return 2.0;
  
-       if (nullfrac)
-               *nullfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stanullfrac;
-       if (commonfrac)
-               *commonfrac = ((Form_pg_statistic) GETSTRUCT(tuple))->stacommonfrac;
-
-       /* Get the type input proc for the column datatype */
-       typeTuple = SearchSysCache(TYPEOID,
-                                                          ObjectIdGetDatum(typid),
-                                                          0, 0, 0);
-       if (!HeapTupleIsValid(typeTuple))
-               elog(ERROR, "getattstatistics: Cache lookup failed for type %u",
-                        typid);
-       fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
-       typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
-       ReleaseSysCache(typeTuple);
+       /*
+        * If VACUUM ANALYZE determined a fixed estimate, use it.
+        */
+       if (stats && stats->stadistinct > 0.0)
+               return stats->stadistinct;
  
         /*
-        * Values are variable-length fields, so cannot access as struct
-        * fields. Must do it the hard way with SysCacheGetAttr.
+        * Otherwise we need to get the relation size.
          */
-       if (commonval)
-       {
-               Datum           val = SysCacheGetAttr(STATRELID, tuple,
-                                                                                 Anum_pg_statistic_stacommonval,
-                                                                                 &isnull);
+       reltup = SearchSysCache(RELOID,
+                                                       ObjectIdGetDatum(relid),
+                                                       0, 0, 0);
+       if (!HeapTupleIsValid(reltup))
+               elog(ERROR, "get_att_numdistinct: no relation tuple %u", relid);
  
-               if (isnull)
-               {
-                       elog(DEBUG, "getattstatistics: stacommonval is null");
-                       *commonval = PointerGetDatum(NULL);
-               }
-               else
-               {
-                       char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                                                                                  val));
-
-                       *commonval = FunctionCall3(&inputproc,
-                                                                          CStringGetDatum(strval),
-                                                                          ObjectIdGetDatum(typelem),
-                                                                          Int32GetDatum(typmod));
-                       pfree(strval);
-               }
-       }
+       ntuples = ((Form_pg_class) GETSTRUCT(reltup))->reltuples;
  
-       if (loval)
-       {
-               Datum           val = SysCacheGetAttr(STATRELID, tuple,
-                                                                                 Anum_pg_statistic_staloval,
-                                                                                 &isnull);
+       ReleaseSysCache(reltup);
  
-               if (isnull)
-               {
-                       elog(DEBUG, "getattstatistics: staloval is null");
-                       *loval = PointerGetDatum(NULL);
-               }
-               else
-               {
-                       char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                                                                                  val));
-
-                       *loval = FunctionCall3(&inputproc,
-                                                                  CStringGetDatum(strval),
-                                                                  ObjectIdGetDatum(typelem),
-                                                                  Int32GetDatum(typmod));
-                       pfree(strval);
-               }
-       }
+       if (ntuples <= 0.0)
+               return 100.0;                   /* no data available; return a default */
  
-       if (hival)
-       {
-               Datum           val = SysCacheGetAttr(STATRELID, tuple,
-                                                                                 Anum_pg_statistic_stahival,
-                                                                                 &isnull);
+       /*
+        * If VACUUM ANALYZE determined a scaled estimate, use it.
+        */
+       if (stats && stats->stadistinct < 0.0)
+               return - stats->stadistinct * ntuples;
  
-               if (isnull)
-               {
-                       elog(DEBUG, "getattstatistics: stahival is null");
-                       *hival = PointerGetDatum(NULL);
-               }
-               else
-               {
-                       char       *strval = DatumGetCString(DirectFunctionCall1(textout,
-                                                                                                                                  val));
-
-                       *hival = FunctionCall3(&inputproc,
-                                                                  CStringGetDatum(strval),
-                                                                  ObjectIdGetDatum(typelem),
-                                                                  Int32GetDatum(typmod));
-                       pfree(strval);
-               }
+       /*
+        * VACUUM ANALYZE does not compute stats for system attributes,
+        * but some of them can reasonably be assumed unique anyway.
+        */
+       switch (attnum)
+       {
+               case ObjectIdAttributeNumber:
+               case SelfItemPointerAttributeNumber:
+                       return ntuples;
+               case TableOidAttributeNumber:
+                       return 1.0;
         }
  
-       ReleaseSysCache(tuple);
+       /*
+        * Estimate ndistinct = ntuples if the table is small, else 100.
+        */
+       if (ntuples < 100.0)
+               return ntuples;
  
-       return true;
+       return 100.0;
  }
  
  /*-------------------------------------------------------------------------
diff --git a/src/backend/utils/cache/lsyscache.c b/src/backend/utils/cache/lsyscache.c

index 82d5586..3995de5 100644 (file)
--- a/src/backend/utils/cache/lsyscache.c
+++ b/src/backend/utils/cache/lsyscache.c
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.52 2001/03/23 04:49:55 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/lsyscache.c,v 1.53 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *       Eventually, the index information should go through here, too.
@@ -18,7 +18,10 @@
  #include "access/tupmacs.h"
  #include "catalog/pg_operator.h"
  #include "catalog/pg_proc.h"
+#include "catalog/pg_statistic.h"
  #include "catalog/pg_type.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
  #include "utils/lsyscache.h"
  #include "utils/syscache.h"
  
@@ -182,106 +185,6 @@ get_atttypmod(Oid relid, AttrNumber attnum)
                 return -1;
  }
  
-/*
- * get_attdispersion
- *
- *       Retrieve the dispersion statistic for an attribute,
- *       or produce an estimate if no info is available.
- *
- * min_estimate is the minimum estimate to return if insufficient data
- * is available to produce a reliable value.  This value may vary
- * depending on context.  (For example, when deciding whether it is
- * safe to use a hashjoin, we want to be more conservative than when
- * estimating the number of tuples produced by an equijoin.)
- */
-double
-get_attdispersion(Oid relid, AttrNumber attnum, double min_estimate)
-{
-       HeapTuple       atp;
-       Form_pg_attribute att_tup;
-       double          dispersion;
-       Oid                     atttypid;
-       int32           ntuples;
-
-       atp = SearchSysCache(ATTNUM,
-                                                ObjectIdGetDatum(relid),
-                                                Int16GetDatum(attnum),
-                                                0, 0);
-       if (!HeapTupleIsValid(atp))
-       {
-               /* this should not happen */
-               elog(ERROR, "get_attdispersion: no attribute tuple %u %d",
-                        relid, attnum);
-               return min_estimate;
-       }
-
-       att_tup = (Form_pg_attribute) GETSTRUCT(atp);
-
-       dispersion = att_tup->attdispersion;
-       atttypid = att_tup->atttypid;
-
-       ReleaseSysCache(atp);
-
-       if (dispersion > 0.0)
-               return dispersion;              /* we have a specific estimate from VACUUM */
-
-       /*
-        * Special-case boolean columns: the dispersion of a boolean is highly
-        * unlikely to be anywhere near 1/numtuples, instead it's probably
-        * more like 0.5.
-        *
-        * Are there any other cases we should wire in special estimates for?
-        */
-       if (atttypid == BOOLOID)
-               return 0.5;
-
-       /*
-        * Dispersion is either 0 (no data available) or -1 (dispersion is
-        * 1/numtuples).  Either way, we need the relation size.
-        */
-
-       atp = SearchSysCache(RELOID,
-                                                ObjectIdGetDatum(relid),
-                                                0, 0, 0);
-       if (!HeapTupleIsValid(atp))
-       {
-               /* this should not happen */
-               elog(ERROR, "get_attdispersion: no relation tuple %u", relid);
-               return min_estimate;
-       }
-
-       ntuples = ((Form_pg_class) GETSTRUCT(atp))->reltuples;
-
-       ReleaseSysCache(atp);
-
-       if (ntuples == 0)
-               return min_estimate;    /* no data available */
-
-       if (dispersion < 0.0)           /* VACUUM thinks there are no duplicates */
-               return 1.0 / (double) ntuples;
-
-       /*
-        * VACUUM ANALYZE does not compute dispersion for system attributes,
-        * but some of them can reasonably be assumed unique anyway.
-        */
-       if (attnum == ObjectIdAttributeNumber ||
-               attnum == SelfItemPointerAttributeNumber)
-               return 1.0 / (double) ntuples;
-       if (attnum == TableOidAttributeNumber)
-               return 1.0;
-
-       /*
-        * VACUUM ANALYZE has not been run for this table. Produce an estimate
-        * of 1/numtuples.  This may produce unreasonably small estimates for
-        * large tables, so limit the estimate to no less than min_estimate.
-        */
-       dispersion = 1.0 / (double) ntuples;
-       if (dispersion < min_estimate)
-               dispersion = min_estimate;
-
-       return dispersion;
-}
-
  /*                             ---------- INDEX CACHE ----------                                                */
  
  /*             watch this space...
@@ -876,3 +779,157 @@ get_typtype(Oid typid)
  }
  
  #endif
+
+/*                             ---------- STATISTICS CACHE ----------                                   */
+
+/*
+ * get_attstatsslot
+ *
+ *             Extract the contents of a "slot" of a pg_statistic tuple.
+ *             Returns TRUE if requested slot type was found, else FALSE.
+ *
+ * Unlike other routines in this file, this takes a pointer to an
+ * already-looked-up tuple in the pg_statistic cache.  We do this since
+ * most callers will want to extract more than one value from the cache
+ * entry, and we don't want to repeat the cache lookup unnecessarily.
+ *
+ * statstuple: pg_statistics tuple to be examined.
+ * atttype: type OID of attribute.
+ * atttypmod: typmod of attribute.
+ * reqkind: STAKIND code for desired statistics slot kind.
+ * reqop: STAOP value wanted, or InvalidOid if don't care.
+ * values, nvalues: if not NULL, the slot's stavalues are extracted.
+ * numbers, nnumbers: if not NULL, the slot's stanumbers are extracted.
+ *
+ * If assigned, values and numbers are set to point to palloc'd arrays.
+ * If the attribute type is pass-by-reference, the values referenced by
+ * the values array are themselves palloc'd.  The palloc'd stuff can be
+ * freed by calling free_attstatsslot.
+ */
+bool
+get_attstatsslot(HeapTuple statstuple,
+                                Oid atttype, int32 atttypmod,
+                                int reqkind, Oid reqop,
+                                Datum **values, int *nvalues,
+                                float4 **numbers, int *nnumbers)
+{
+       Form_pg_statistic stats = (Form_pg_statistic) GETSTRUCT(statstuple);
+       int                     i,
+                               j;
+       Datum           val;
+       bool            isnull;
+       ArrayType  *statarray;
+       int                     narrayelem;
+       HeapTuple       typeTuple;
+       FmgrInfo        inputproc;
+       Oid                     typelem;
+
+       for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
+       {
+               if ((&stats->stakind1)[i] == reqkind &&
+                       (reqop == InvalidOid || (&stats->staop1)[i] == reqop))
+                       break;
+       }
+       if (i >= STATISTIC_NUM_SLOTS)
+               return false;                   /* not there */
+
+       if (values)
+       {
+               val = SysCacheGetAttr(STATRELATT, statstuple,
+                                                         Anum_pg_statistic_stavalues1 + i,
+                                                         &isnull);
+               if (isnull)
+                       elog(ERROR, "get_attstatsslot: stavalues is null");
+               statarray = DatumGetArrayTypeP(val);
+               /*
+                * Do initial examination of the array.  This produces a list
+                * of text Datums --- ie, pointers into the text array value.
+                */
+               deconstruct_array(statarray, false, -1, 'i', values, nvalues);
+               narrayelem = *nvalues;
+               /*
+                * We now need to replace each text Datum by its internal equivalent.
+                *
+                * Get the type input proc and typelem for the column datatype.
+                */
+               typeTuple = SearchSysCache(TYPEOID,
+                                                                  ObjectIdGetDatum(atttype),
+                                                                  0, 0, 0);
+               if (!HeapTupleIsValid(typeTuple))
+                       elog(ERROR, "get_attstatsslot: Cache lookup failed for type %u",
+                                atttype);
+               fmgr_info(((Form_pg_type) GETSTRUCT(typeTuple))->typinput, &inputproc);
+               typelem = ((Form_pg_type) GETSTRUCT(typeTuple))->typelem;
+               ReleaseSysCache(typeTuple);
+               /*
+                * Do the conversions.  The palloc'd array of Datums is reused
+                * in place.
+                */
+               for (j = 0; j < narrayelem; j++)
+               {
+                       char       *strval;
+
+                       strval = DatumGetCString(DirectFunctionCall1(textout,
+                                                                                                                (*values)[j]));
+                       (*values)[j] = FunctionCall3(&inputproc,
+                                                                                CStringGetDatum(strval),
+                                                                                ObjectIdGetDatum(typelem),
+                                                                                Int32GetDatum(atttypmod));
+                       pfree(strval);
+               }
+               /*
+                * Free statarray if it's a detoasted copy.
+                */
+               if ((Pointer) statarray != DatumGetPointer(val))
+                       pfree(statarray);
+       }
+
+       if (numbers)
+       {
+               val = SysCacheGetAttr(STATRELATT, statstuple,
+                                                         Anum_pg_statistic_stanumbers1 + i,
+                                                         &isnull);
+               if (isnull)
+                       elog(ERROR, "get_attstatsslot: stanumbers is null");
+               statarray = DatumGetArrayTypeP(val);
+               /*
+                * We expect the array to be a 1-D float4 array; verify that.
+                * We don't need to use deconstruct_array() since the array
+                * data is just going to look like a C array of float4 values.
+                */
+               narrayelem = ARR_DIMS(statarray)[0];
+               if (ARR_NDIM(statarray) != 1 || narrayelem <= 0 ||
+                       ARR_SIZE(statarray) != (ARR_OVERHEAD(1) + narrayelem * sizeof(float4)))
+                       elog(ERROR, "get_attstatsslot: stanumbers is bogus");
+               *numbers = (float4 *) palloc(narrayelem * sizeof(float4));
+               memcpy(*numbers, ARR_DATA_PTR(statarray), narrayelem * sizeof(float4));
+               *nnumbers = narrayelem;
+               /*
+                * Free statarray if it's a detoasted copy.
+                */
+               if ((Pointer) statarray != DatumGetPointer(val))
+                       pfree(statarray);
+       }
+
+       return true;
+}
+
+void
+free_attstatsslot(Oid atttype,
+                                 Datum *values, int nvalues,
+                                 float4 *numbers, int nnumbers)
+{
+       if (values)
+       {
+               if (! get_typbyval(atttype))
+               {
+                       int             i;
+
+                       for (i = 0; i < nvalues; i++)
+                               pfree(DatumGetPointer(values[i]));
+               }
+               pfree(values);
+       }
+       if (numbers)
+               pfree(numbers);
+}
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c

index 75ef317..4e35b3f 100644 (file)
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.60 2001/03/22 03:59:57 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/syscache.c,v 1.61 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *       These routines allow the parser/planner/executor to perform
@@ -313,7 +313,7 @@ static struct cachedesc cacheinfo[] = {
                         0,
                         0
         }},
-       {StatisticRelationName,         /* STATRELID */
+       {StatisticRelationName,         /* STATRELATT */
                 StatisticRelidAttnumIndex,
                 2,
                 {
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c

index d27bfb2..5a77c47 100644 (file)
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -78,7 +78,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.15 2001/03/23 04:49:55 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v 1.16 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -87,7 +87,11 @@
  
  #include "access/heapam.h"
  #include "access/nbtree.h"
+#include "catalog/catname.h"
+#include "catalog/pg_amop.h"
+#include "catalog/pg_amproc.h"
  #include "miscadmin.h"
+#include "utils/fmgroids.h"
  #include "utils/logtape.h"
  #include "utils/lsyscache.h"
  #include "utils/tuplesort.h"
@@ -263,6 +267,7 @@ struct Tuplesortstate
         TupleDesc       tupDesc;
         int                     nKeys;
         ScanKey         scanKeys;
+       SortFunctionKind *sortFnKinds;
  
         /*
          * These variables are specific to the IndexTuple case; they are set
@@ -279,6 +284,7 @@ struct Tuplesortstate
         Oid                     datumType;
         Oid                     sortOperator;
         FmgrInfo        sortOpFn;               /* cached lookup data for sortOperator */
+       SortFunctionKind sortFnKind;
         /* we need typelen and byval in order to know how to copy the Datums. */
         int                     datumTypeLen;
         bool            datumTypeByVal;
@@ -458,14 +464,14 @@ tuplesort_begin_common(bool randomAccess)
  
  Tuplesortstate *
  tuplesort_begin_heap(TupleDesc tupDesc,
-                                        int nkeys, ScanKey keys,
+                                        int nkeys,
+                                        Oid *sortOperators, AttrNumber *attNums,
                                          bool randomAccess)
  {
         Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+       int                     i;
  
-       AssertArg(nkeys >= 1);
-       AssertArg(keys[0].sk_attno != 0);
-       AssertArg(keys[0].sk_procedure != 0);
+       AssertArg(nkeys > 0);
  
         state->comparetup = comparetup_heap;
         state->copytup = copytup_heap;
@@ -475,7 +481,29 @@ tuplesort_begin_heap(TupleDesc tupDesc,
  
         state->tupDesc = tupDesc;
         state->nKeys = nkeys;
-       state->scanKeys = keys;
+       state->scanKeys = (ScanKey) palloc(nkeys * sizeof(ScanKeyData));
+       MemSet(state->scanKeys, 0, nkeys * sizeof(ScanKeyData));
+       state->sortFnKinds = (SortFunctionKind *)
+               palloc(nkeys * sizeof(SortFunctionKind));
+       MemSet(state->sortFnKinds, 0, nkeys * sizeof(SortFunctionKind));
+
+       for (i = 0; i < nkeys; i++)
+       {
+               RegProcedure sortFunction;
+
+               AssertArg(sortOperators[i] != 0);
+               AssertArg(attNums[i] != 0);
+
+               /* select a function that implements the sort operator */
+               SelectSortFunction(sortOperators[i], &sortFunction,
+                                                  &state->sortFnKinds[i]);
+
+               ScanKeyEntryInitialize(&state->scanKeys[i],
+                                                          0x0,
+                                                          attNums[i],
+                                                          sortFunction,
+                                                          (Datum) 0);
+       }
  
         return state;
  }
@@ -507,6 +535,7 @@ tuplesort_begin_datum(Oid datumType,
                                           bool randomAccess)
  {
         Tuplesortstate *state = tuplesort_begin_common(randomAccess);
+       RegProcedure sortFunction;
         int16           typlen;
         bool            typbyval;
  
@@ -518,8 +547,12 @@ tuplesort_begin_datum(Oid datumType,
  
         state->datumType = datumType;
         state->sortOperator = sortOperator;
-       /* lookup the function that implements the sort operator */
-       fmgr_info(get_opcode(sortOperator), &state->sortOpFn);
+
+       /* select a function that implements the sort operator */
+       SelectSortFunction(sortOperator, &sortFunction, &state->sortFnKind);
+       /* and look up the function */
+       fmgr_info(sortFunction, &state->sortOpFn);
+
         /* lookup necessary attributes of the datum type */
         get_typlenbyval(datumType, &typlen, &typbyval);
         state->datumTypeLen = typlen;
@@ -548,6 +581,13 @@ tuplesort_end(Tuplesortstate *state)
         }
         if (state->memtupindex)
                 pfree(state->memtupindex);
+
+       /* this stuff might better belong in a variant-specific shutdown routine */
+       if (state->scanKeys)
+               pfree(state->scanKeys);
+       if (state->sortFnKinds)
+               pfree(state->sortFnKinds);
+
         pfree(state);
  }
  
@@ -1692,6 +1732,7 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
         for (nkey = 0; nkey < state->nKeys; nkey++)
         {
                 ScanKey         scanKey = state->scanKeys + nkey;
+               SortFunctionKind fnKind = state->sortFnKinds[nkey];
                 AttrNumber      attno = scanKey->sk_attno;
                 Datum           lattr,
                                         rattr;
@@ -1708,23 +1749,36 @@ comparetup_heap(Tuplesortstate *state, const void *a, const void *b)
                 }
                 else if (isnull2)
                         return -1;
-               else if (scanKey->sk_flags & SK_COMMUTE)
-               {
-                       if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                                                                  rattr, lattr)))
-                               return -1;              /* a < b after commute */
-                       if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                                                                  lattr, rattr)))
-                               return 1;               /* a > b after commute */
-               }
                 else
                 {
-                       if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                                                                  lattr, rattr)))
-                               return -1;              /* a < b */
-                       if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
-                                                                                  rattr, lattr)))
-                               return 1;               /* a > b */
+                       int32           compare;
+
+                       if (fnKind == SORTFUNC_LT)
+                       {
+                               if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                                                          lattr, rattr)))
+                                       compare = -1;   /* a < b */
+                               else if (DatumGetBool(FunctionCall2(&scanKey->sk_func,
+                                                                                                       rattr, lattr)))
+                                       compare = 1;    /* a > b */
+                               else
+                                       compare = 0;
+                       }
+                       else
+                       {
+                               /* sort function is CMP or REVCMP */
+                               compare = DatumGetInt32(FunctionCall2(&scanKey->sk_func,
+                                                                                                         lattr, rattr));
+                               if (fnKind == SORTFUNC_REVCMP)
+                                       compare = -compare;
+                       }
+
+                       if (compare != 0)
+                       {
+                               if (scanKey->sk_flags & SK_COMMUTE)
+                                       compare = -compare;
+                               return compare;
+                       }
                 }
         }
  
@@ -1852,8 +1906,10 @@ comparetup_index(Tuplesortstate *state, const void *a, const void *b)
                 }
                 else
                 {
+                       /* the comparison function is always of CMP type */
                         compare = DatumGetInt32(FunctionCall2(&entry->sk_func,
-                                                                                               attrDatum1, attrDatum2));
+                                                                                                 attrDatum1,
+                                                                                                 attrDatum2));
                 }
  
                 if (compare != 0)
@@ -1954,7 +2010,7 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
         }
         else if (rtup->isNull)
                 return -1;
-       else
+       else if (state->sortFnKind == SORTFUNC_LT)
         {
                 if (DatumGetBool(FunctionCall2(&state->sortOpFn,
                                                                            ltup->val, rtup->val)))
@@ -1964,6 +2020,17 @@ comparetup_datum(Tuplesortstate *state, const void *a, const void *b)
                         return 1;                       /* a > b */
                 return 0;
         }
+       else
+       {
+               /* sort function is CMP or REVCMP */
+               int32   compare;
+
+               compare = DatumGetInt32(FunctionCall2(&state->sortOpFn,
+                                                                                         ltup->val, rtup->val));
+               if (state->sortFnKind == SORTFUNC_REVCMP)
+                       compare = -compare;
+               return compare;
+       }
  }
  
  static void *
@@ -2032,3 +2099,119 @@ tuplesize_datum(Tuplesortstate *state, void *tup)
                 return (unsigned int) tuplelen;
         }
  }
+
+
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.  The straightforward
+ * method is to use the operator's implementation proc --- ie, "<"
+ * comparison.  However, that way often requires two calls of the function
+ * per comparison.  If we can find a btree three-way comparator function
+ * associated with the operator, we can use it to do the comparisons
+ * more efficiently.  We also support the possibility that the operator
+ * is ">" (descending sort), in which case we have to reverse the output
+ * of the btree comparator.
+ *
+ * Possibly this should live somewhere else (backend/catalog/, maybe?).
+ */
+void
+SelectSortFunction(Oid sortOperator,
+                                  RegProcedure *sortFunction,
+                                  SortFunctionKind *kind)
+{
+       Relation        relation;
+       HeapScanDesc scan;
+       ScanKeyData skey[3];
+       HeapTuple       tuple;
+       Oid                     opclass = InvalidOid;
+
+       /*
+        * Scan pg_amop to see if the target operator is registered as the
+        * "<" or ">" operator of any btree opclass.  It's possible that it
+        * might be registered both ways (eg, if someone were to build a
+        * "reverse sort" opclass for some reason); prefer the "<" case if so.
+        * If the operator is registered the same way in multiple opclasses,
+        * assume we can use the associated comparator function from any one.
+        */
+       relation = heap_openr(AccessMethodOperatorRelationName,
+                                                 AccessShareLock);
+
+       ScanKeyEntryInitialize(&skey[0], 0,
+                                                  Anum_pg_amop_amopid,
+                                                  F_OIDEQ,
+                                                  ObjectIdGetDatum(BTREE_AM_OID));
+
+       ScanKeyEntryInitialize(&skey[1], 0,
+                                                  Anum_pg_amop_amopopr,
+                                                  F_OIDEQ,
+                                                  ObjectIdGetDatum(sortOperator));
+
+       scan = heap_beginscan(relation, false, SnapshotNow, 2, skey);
+
+       while (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+       {
+               Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
+
+               if (aform->amopstrategy == BTLessStrategyNumber)
+               {
+                       opclass = aform->amopclaid;
+                       *kind = SORTFUNC_CMP;
+                       break;                          /* done looking */
+               }
+               else if (aform->amopstrategy == BTGreaterStrategyNumber)
+               {
+                       opclass = aform->amopclaid;
+                       *kind = SORTFUNC_REVCMP;
+                       /* keep scanning in hopes of finding a BTLess entry */
+               }
+       }
+
+       heap_endscan(scan);
+       heap_close(relation, AccessShareLock);
+
+       if (OidIsValid(opclass))
+       {
+               /* Found a suitable opclass, get its comparator support function */
+               relation = heap_openr(AccessMethodProcedureRelationName,
+                                                         AccessShareLock);
+
+               ScanKeyEntryInitialize(&skey[0], 0,
+                                                          Anum_pg_amproc_amid,
+                                                          F_OIDEQ,
+                                                          ObjectIdGetDatum(BTREE_AM_OID));
+
+               ScanKeyEntryInitialize(&skey[1], 0,
+                                                          Anum_pg_amproc_amopclaid,
+                                                          F_OIDEQ,
+                                                          ObjectIdGetDatum(opclass));
+
+               ScanKeyEntryInitialize(&skey[2], 0,
+                                                          Anum_pg_amproc_amprocnum,
+                                                          F_INT2EQ,
+                                                          Int16GetDatum(BTORDER_PROC));
+
+               scan = heap_beginscan(relation, false, SnapshotNow, 3, skey);
+
+               *sortFunction = InvalidOid;
+
+               if (HeapTupleIsValid(tuple = heap_getnext(scan, 0)))
+               {
+                       Form_pg_amproc aform = (Form_pg_amproc) GETSTRUCT(tuple);
+                       *sortFunction = aform->amproc;
+               }
+
+               heap_endscan(scan);
+               heap_close(relation, AccessShareLock);
+
+               if (RegProcedureIsValid(*sortFunction))
+                       return;
+       }
+
+       /* Can't find a comparator, so use the operator as-is */
+
+       *kind = SORTFUNC_LT;
+       *sortFunction = get_opcode(sortOperator);
+       if (!RegProcedureIsValid(*sortFunction))
+               elog(ERROR, "SelectSortFunction: operator %u has no implementation",
+                        sortOperator);
+}
diff --git a/src/include/access/tuptoaster.h b/src/include/access/tuptoaster.h

index 759ab3d..6e38529 100644 (file)
--- a/src/include/access/tuptoaster.h
+++ b/src/include/access/tuptoaster.h
@@ -6,15 +6,13 @@
   *
   * Copyright (c) 2000, PostgreSQL Development Team
   *
- * $Id: tuptoaster.h,v 1.10 2001/03/22 04:00:32 momjian Exp $
+ * $Id: tuptoaster.h,v 1.11 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef TUPTOASTER_H
  #define TUPTOASTER_H
  
-#ifdef TUPLE_TOASTER_ACTIVE
-
  #include "access/heapam.h"
  #include "access/htup.h"
  #include "access/tupmacs.h"
@@ -109,7 +107,13 @@ extern varattrib *heap_tuple_untoast_attr(varattrib *attr);
   */
  extern Datum toast_compress_datum(Datum value);
  
-#endif  /* TUPLE_TOASTER_ACTIVE */
+/* ----------
+ * toast_raw_datum_size -
+ *
+ *     Return the raw (detoasted) size of a varlena datum
+ * ----------
+ */
+extern Size toast_raw_datum_size(Datum value);
  
  
  #endif  /* TUPTOASTER_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 963b11c..832f91f 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: catversion.h,v 1.70 2001/03/22 04:00:35 momjian Exp $
+ * $Id: catversion.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200101061
+#define CATALOG_VERSION_NO     200105051
  
  #endif
diff --git a/src/include/catalog/heap.h b/src/include/catalog/heap.h

index a7248f6..7ab04b0 100644 (file)
--- a/src/include/catalog/heap.h
+++ b/src/include/catalog/heap.h
@@ -7,13 +7,14 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: heap.h,v 1.34 2001/03/22 04:00:35 momjian Exp $
+ * $Id: heap.h,v 1.35 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef HEAP_H
  #define HEAP_H
  
+#include "catalog/pg_attribute.h"
  #include "utils/rel.h"
  
  typedef struct RawColumnDefault
@@ -44,4 +45,6 @@ extern void AddRelationRawConstraints(Relation rel,
                                                   List *rawColDefaults,
                                                   List *rawConstraints);
  
+extern Form_pg_attribute SystemAttributeDefinition(AttrNumber attno);
+
  #endif  /* HEAP_H */
diff --git a/src/include/catalog/index.h b/src/include/catalog/index.h

index 1dac0bb..07aaad6 100644 (file)
--- a/src/include/catalog/index.h
+++ b/src/include/catalog/index.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: index.h,v 1.33 2001/03/22 04:00:35 momjian Exp $
+ * $Id: index.h,v 1.34 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -46,7 +46,7 @@ extern void FormIndexDatum(IndexInfo *indexInfo,
                            Datum *datum,
                            char *nullv);
  
-extern void UpdateStats(Oid relid, long reltuples);
+extern void UpdateStats(Oid relid, double reltuples);
  extern bool IndexesAreActive(Oid relid, bool comfirmCommitted);
  extern void setRelhasindex(Oid relid, bool hasindex);
  
diff --git a/src/include/catalog/indexing.h b/src/include/catalog/indexing.h

index 41a580a..cc155cf 100644 (file)
--- a/src/include/catalog/indexing.h
+++ b/src/include/catalog/indexing.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: indexing.h,v 1.48 2001/03/22 04:00:36 momjian Exp $
+ * $Id: indexing.h,v 1.49 2001/05/07 00:43:24 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -171,7 +171,7 @@ DECLARE_UNIQUE_INDEX(pg_rewrite_rulename_index on pg_rewrite using btree(rulenam
  xDECLARE_UNIQUE_INDEX(pg_shadow_name_index on pg_shadow using btree(usename name_ops));
  xDECLARE_UNIQUE_INDEX(pg_shadow_sysid_index on pg_shadow using btree(usesysid int4_ops));
  */
-DECLARE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
+DECLARE_UNIQUE_INDEX(pg_statistic_relid_att_index on pg_statistic using btree(starelid oid_ops, staattnum int2_ops));
  DECLARE_INDEX(pg_trigger_tgconstrname_index on pg_trigger using btree(tgconstrname name_ops));
  DECLARE_INDEX(pg_trigger_tgconstrrelid_index on pg_trigger using btree(tgconstrrelid oid_ops));
  DECLARE_INDEX(pg_trigger_tgrelid_index on pg_trigger using btree(tgrelid oid_ops));
diff --git a/src/include/catalog/pg_attribute.h b/src/include/catalog/pg_attribute.h

index 58724e9..6e11aa6 100644 (file)
--- a/src/include/catalog/pg_attribute.h
+++ b/src/include/catalog/pg_attribute.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_attribute.h,v 1.70 2001/03/22 04:00:37 momjian Exp $
+ * $Id: pg_attribute.h,v 1.71 2001/05/07 00:43:24 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -36,15 +36,14 @@
   *             typedef struct FormData_pg_attribute
   *
   *             If you change the following, make sure you change the structs for
- *             system attributes in heap.c and index.c also.
+ *             system attributes in catalog/heap.c also.
   * ----------------
   */
  CATALOG(pg_attribute) BOOTSTRAP
  {
         Oid                     attrelid;               /* OID of relation containing this
                                                                  * attribute */
-       NameData        attname;
-       Oid                     atttypid;
+       NameData        attname;                /* name of attribute */
  
         /*
          * atttypid is the OID of the instance in Catalog Class pg_type that
@@ -53,30 +52,20 @@ CATALOG(pg_attribute) BOOTSTRAP
          * attalign attributes of this instance, so they had better match or
          * Postgres will fail.
          */
-
-       float4          attdispersion;
+       Oid                     atttypid;
  
         /*
-        * attdispersion is the dispersion statistic of the column (0.0 to
-        * 1.0), or zero if the statistic has not been calculated, or -1.0 if
-        * VACUUM found that the column contains no duplicate entries (in
-        * which case the dispersion should be taken as 1.0/numberOfRows for
-        * the current table size).  The -1.0 hack is useful because the
-        * number of rows may be updated more often than attdispersion is. We
-        * assume that the column will retain its no-duplicate-entry property.
-        * (Perhaps this should be driven off the existence of a UNIQUE index
-        * for the column, instead of being a statistical guess?)
+        * attstattarget is the target number of statistics datapoints to collect
+        * during VACUUM ANALYZE of this column.  A zero here indicates that we
+        * do not wish to collect any stats about this column.
          */
-
-       int2            attlen;
+       int4            attstattarget;
  
         /*
          * attlen is a copy of the typlen field from pg_type for this
-        * attribute.  See atttypid above.      See struct Form_pg_type for
-        * definition.
+        * attribute.  See atttypid comments above.
          */
-
-       int2            attnum;
+       int2            attlen;
  
         /*
          * attnum is the "attribute number" for the attribute:  A value that
@@ -91,10 +80,13 @@ CATALOG(pg_attribute) BOOTSTRAP
          *
          * Note that (attnum - 1) is often used as the index to an array.
          */
+       int2            attnum;
  
-       int4            attnelems;              /* number of dimensions, if an array type */
-
-       int4            attcacheoff;
+       /*
+        * attndims is the declared number of dimensions, if an array type,
+        * otherwise zero.
+        */
+       int4            attndims;
  
         /*
          * fastgetattr() uses attcacheoff to cache byte offsets of attributes
@@ -103,8 +95,7 @@ CATALOG(pg_attribute) BOOTSTRAP
          * tuple descriptor, we may then update attcacheoff in the copies.
          * This speeds up the attribute walking process.
          */
-
-       int4            atttypmod;
+       int4            attcacheoff;
  
         /*
          * atttypmod records type-specific data supplied at table creation
@@ -113,16 +104,13 @@ CATALOG(pg_attribute) BOOTSTRAP
          * argument. The value will generally be -1 for types that do not need
          * typmod.
          */
-
-       bool            attbyval;
+       int4            atttypmod;
  
         /*
          * attbyval is a copy of the typbyval field from pg_type for this
-        * attribute.  See atttypid above.      See struct Form_pg_type for
-        * definition.
+        * attribute.  See atttypid comments above.
          */
-
-       char            attstorage;
+       bool            attbyval;
  
         /*----------
          * attstorage tells for VARLENA attributes, what the heap access
@@ -137,30 +125,31 @@ CATALOG(pg_attribute) BOOTSTRAP
          * but only as a last resort ('e' and 'x' fields are moved first).
          *----------
          */
+       char            attstorage;
  
+       /* This flag indicates that the attribute is really a set */
         bool            attisset;
-       char            attalign;
  
         /*
          * attalign is a copy of the typalign field from pg_type for this
-        * attribute.  See atttypid above.      See struct Form_pg_type for
-        * definition.
+        * attribute.  See atttypid comments above.
          */
-
-       bool            attnotnull;
+       char            attalign;
  
         /* This flag represents the "NOT NULL" constraint */
-       bool            atthasdef;
+       bool            attnotnull;
  
         /* Has DEFAULT value or not */
+       bool            atthasdef;
  } FormData_pg_attribute;
  
  /*
   * someone should figure out how to do this properly. (The problem is
- * the size of the C struct is not the same as the size of the tuple.)
+ * the size of the C struct is not the same as the size of the tuple
+ * because of alignment padding at the end of the struct.)
   */
  #define ATTRIBUTE_TUPLE_SIZE \
-       (offsetof(FormData_pg_attribute,atthasdef) + sizeof(char))
+       (offsetof(FormData_pg_attribute,atthasdef) + sizeof(bool))
  
  /* ----------------
   *             Form_pg_attribute corresponds to a pointer to a tuple with
@@ -178,10 +167,10 @@ typedef FormData_pg_attribute *Form_pg_attribute;
  #define Anum_pg_attribute_attrelid             1
  #define Anum_pg_attribute_attname              2
  #define Anum_pg_attribute_atttypid             3
-#define Anum_pg_attribute_attdispersion 4
+#define Anum_pg_attribute_attstattarget 4
  #define Anum_pg_attribute_attlen               5
  #define Anum_pg_attribute_attnum               6
-#define Anum_pg_attribute_attnelems            7
+#define Anum_pg_attribute_attndims             7
  #define Anum_pg_attribute_attcacheoff  8
  #define Anum_pg_attribute_atttypmod            9
  #define Anum_pg_attribute_attbyval             10
@@ -206,6 +195,7 @@ typedef FormData_pg_attribute *Form_pg_attribute;
         (attribute)->attnotnull = false; \
         (attribute)->atthasdef = false;
  #endif  /* _DROP_COLUMN_HACK__ */
+
  /* ----------------
   *             SCHEMA_ macros for declaring hardcoded tuple descriptors.
   *             these are used in utils/cache/relcache.c
@@ -231,25 +221,25 @@ typedef FormData_pg_attribute *Form_pg_attribute;
   * ----------------
   */
  #define Schema_pg_type \
-{ 1247, {"typname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typowner"},     23, 0,       4,      2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typlen"},               21, 0,       2,      3, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typprtlen"},    21, 0,       2,      4, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1247, {"typbyval"},     16, 0,       1,      5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typtype"},      18, 0,       1,      6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typisdefined"},  16, 0,      1,      7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdelim"},     18, 0,       1,      8, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typrelid"},     26, 0,       4,      9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typelem"},      26, 0,       4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typinput"},     24, 0,       4, 11, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typoutput"},    24, 0,       4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typreceive"},    24, 0,      4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typsend"},      24, 0,       4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1247, {"typalign"},     18, 0,       1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typstorage"},    18, 0,      1, 16, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, '\0'     , 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1247 typname                     19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1247, {"typname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,      1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1247, {"typowner"},     23, 0,       4,      2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typlen"},               21, 0,       2,      3, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typprtlen"},    21, 0,       2,      4, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1247, {"typbyval"},     16, 0,       1,      5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typtype"},      18, 0,       1,      6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typisdefined"},  16, 0,      1,      7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdelim"},     18, 0,       1,      8, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typrelid"},     26, 0,       4,      9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typelem"},      26, 0,       4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typinput"},     24, 0,       4, 11, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typoutput"},    24, 0,       4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typreceive"},    24, 0,      4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typsend"},      24, 0,       4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1247, {"typalign"},     18, 0,       1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typstorage"},    18, 0,      1, 16, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1247, {"typdefault"},    25, 0,  -1, 17, 0, -1, -1, false    , 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1247 typname                     19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1247 typowner                    23 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1247 typlen                      21 0  2   3 0 -1 -1 t p f s f f));
  DATA(insert OID = 0 ( 1247 typprtlen           21 0  2   4 0 -1 -1 t p f s f f));
@@ -299,25 +289,25 @@ DATA(insert OID = 0 ( 1262 tableoid                       26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_proc \
-{ 1255, {"proname"},                   19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proowner"},                  23, 0,  4,      2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prolang"},                   26, 0,  4,      3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proisinh"},                  16, 0,  1,      4, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proistrusted"},              16, 0,  1,      5, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proiscachable"},             16, 0,  1,      6, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"proisstrict"},               16, 0,  1,      7, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"pronargs"},                  21, 0,  2,      8, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1255, {"proretset"},                 16, 0,  1,      9, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1255, {"prorettype"},                        26, 0,  4, 10, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"proargtypes"},               30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probyte_pct"},               23, 0,  4, 12, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"properbyte_cpu"},            23, 0,  4, 13, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"propercall_cpu"},            23, 0,  4, 14, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prooutin_ratio"},            23, 0,  4, 15, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"prosrc"},                            25, 0, -1, 16, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }, \
-{ 1255, {"probin"},                            17, 0, -1, 17, 0, -1, -1, '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1255 proname                     19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1255, {"proname"},                   19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,  1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"proowner"},                  23, 0,  4,      2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prolang"},                   26, 0,  4,      3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proisinh"},                  16, 0,  1,      4, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proistrusted"},              16, 0,  1,      5, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proiscachable"},             16, 0,  1,      6, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"proisstrict"},               16, 0,  1,      7, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"pronargs"},                  21, 0,  2,      8, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1255, {"proretset"},                 16, 0,  1,      9, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1255, {"prorettype"},                        26, 0,  4, 10, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"proargtypes"},               30, 0, INDEX_MAX_KEYS*4, 11, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1255, {"probyte_pct"},               23, 0,  4, 12, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"properbyte_cpu"},            23, 0,  4, 13, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"propercall_cpu"},            23, 0,  4, 14, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prooutin_ratio"},            23, 0,  4, 15, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1255, {"prosrc"},                            25, 0, -1, 16, 0, -1, -1, false, 'x', false, 'i', false, false }, \
+{ 1255, {"probin"},                            17, 0, -1, 17, 0, -1, -1, false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1255 proname                     19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1255 proowner                    23 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1255 prolang                     26 0  4   3 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1255 proisinh                    16 0  1   4 0 -1 -1 t p f c f f));
@@ -346,8 +336,8 @@ DATA(insert OID = 0 ( 1255 tableoid                 26 0  4  -7 0 -1 -1 t p f i f f));
   *             pg_shadow
   * ----------------
   */
-DATA(insert OID = 0 ( 1260 usename                     19      0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1260 usesysid                    23      0       4       2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1260 usename                     19      DEFAULT_ATTSTATTARGET NAMEDATALEN       1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1260 usesysid                    23      DEFAULT_ATTSTATTARGET   4       2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1260 usecreatedb         16      0       1       3 0 -1 -1 t p f c f f));
  DATA(insert OID = 0 ( 1260 usetrace                    16      0       1       4 0 -1 -1 t p f c f f));
  DATA(insert OID = 0 ( 1260 usesuper                    16      0       1       5 0 -1 -1 t p f c f f));
@@ -366,8 +356,8 @@ DATA(insert OID = 0 ( 1260 tableoid                 26 0  4  -7 0 -1 -1 t p f i f f));
   *             pg_group
   * ----------------
   */
-DATA(insert OID = 0 ( 1261 groname                     19 0 NAMEDATALEN  1 0 -1 -1 f p f i f f));
-DATA(insert OID = 0 ( 1261 grosysid                    23 0  4   2 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1261 groname                     19 DEFAULT_ATTSTATTARGET NAMEDATALEN  1 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1261 grosysid                    23 DEFAULT_ATTSTATTARGET  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1261 grolist               1007 0 -1   3 0 -1 -1 f x f i f f));
  DATA(insert OID = 0 ( 1261 ctid                                27 0  6  -1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1261 oid                         26 0  4  -2 0 -1 -1 t p f i f f));
@@ -382,29 +372,29 @@ DATA(insert OID = 0 ( 1261 tableoid                       26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_attribute \
-{ 1249, {"attrelid"},    26, 0,        4,      1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attname"},     19, 0, NAMEDATALEN,   2, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypid"},    26, 0,        4,      3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attdispersion"}, 700, 0,     4,      4, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attlen"},              21, 0,        2,      5, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnum"},              21, 0,        2,      6, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1249, {"attnelems"},   23, 0,        4,      7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attcacheoff"},  23, 0,       4,      8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"atttypmod"},   23, 0,        4,      9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1249, {"attbyval"},    16, 0,        1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attstorage"},   18, 0,       1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attisset"},    16, 0,        1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attalign"},    18, 0,        1, 13, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1249 attrelid                    26 0  4   1 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attname                     19 0 NAMEDATALEN  2 0 -1 -1 f p f i f f));
+{ 1249, {"attrelid"},    26, DEFAULT_ATTSTATTARGET,    4,      1, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attname"},     19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,       2, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypid"},    26, 0,        4,      3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attstattarget"}, 23, 0,      4,      4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attlen"},              21, 0,        2,      5, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attnum"},              21, 0,        2,      6, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1249, {"attndims"},    23, 0,        4,      7, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attcacheoff"},  23, 0,       4,      8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"atttypmod"},   23, 0,        4,      9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1249, {"attbyval"},    16, 0,        1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attstorage"},   18, 0,       1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attisset"},    16, 0,        1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attalign"},    18, 0,        1, 13, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"attnotnull"},  16, 0, 1, 14, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1249, {"atthasdef"},  16, 0, 1, 15, 0, -1, -1, true, 'p', false, 'c', false, false }
+
+DATA(insert OID = 0 ( 1249 attrelid                    26 DEFAULT_ATTSTATTARGET  4   1 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attname                     19 DEFAULT_ATTSTATTARGET NAMEDATALEN  2 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1249 atttypid                    26 0  4   3 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1249 attdispersion   700 0  4   4 0 -1 -1 f p f i f f));
+DATA(insert OID = 0 ( 1249 attstattarget       23 0  4   4 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attlen                      21 0  2   5 0 -1 -1 t p f s f f));
  DATA(insert OID = 0 ( 1249 attnum                      21 0  2   6 0 -1 -1 t p f s f f));
-DATA(insert OID = 0 ( 1249 attnelems           23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1249 attndims                    23 0  4   7 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attcacheoff         23 0  4   8 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 atttypmod           23 0  4   9 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1249 attbyval                    16 0  1  10 0 -1 -1 t p f c f f));
@@ -426,36 +416,36 @@ DATA(insert OID = 0 ( 1249 tableoid                       26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_class \
-{ 1259, {"relname"},      19, 0, NAMEDATALEN,  1, 0, -1, -1, '\0', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltype"},      26, 0,       4,      2, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relowner"},     23, 0,       4,      3, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relam"},                26, 0,       4,      4, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relfilenode"},   26, 0,      4,      5, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relpages"},     23, 0,       4,      6, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltuples"},    23, 0,       4,      7, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastrelid"}, 26, 0,      4,      8, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"reltoastidxid"}, 26, 0,      4,      9, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }, \
-{ 1259, {"relhasindex"},   16, 0,      1, 10, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relisshared"},   16, 0,      1, 11, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relkind"},      18, 0,       1, 12, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relnatts"},     21, 0,       2, 13, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relchecks"},    21, 0,       2, 14, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"reltriggers"},   21, 0,      2, 15, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relukeys"},     21, 0,       2, 16, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relfkeys"},     21, 0,       2, 17, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relrefs"},      21, 0,       2, 18, 0, -1, -1, '\001', 'p', '\0', 's', '\0', '\0' }, \
-{ 1259, {"relhaspkey"},    16, 0,      1, 19, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhasrules"},   16, 0,      1, 20, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relhassubclass"},16, 0,      1, 21, 0, -1, -1, '\001', 'p', '\0', 'c', '\0', '\0' }, \
-{ 1259, {"relacl"},             1034, 0,  -1, 22, 0, -1, -1,   '\0', 'x', '\0', 'i', '\0', '\0' }
-
-DATA(insert OID = 0 ( 1259 relname                     19 0 NAMEDATALEN   1 0 -1 -1 f p f i f f));
+{ 1259, {"relname"},      19, DEFAULT_ATTSTATTARGET, NAMEDATALEN,      1, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltype"},      26, 0,       4,      2, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relowner"},     23, 0,       4,      3, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relam"},                26, 0,       4,      4, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relfilenode"},   26, 0,      4,      5, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relpages"},     23, 0,       4,      6, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltuples"},    700, 0,      4,      7, 0, -1, -1, false, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastrelid"}, 26, 0,      4,      8, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"reltoastidxid"}, 26, 0,      4,      9, 0, -1, -1, true, 'p', false, 'i', false, false }, \
+{ 1259, {"relhasindex"},   16, 0,      1, 10, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relisshared"},   16, 0,      1, 11, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relkind"},      18, 0,       1, 12, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relnatts"},     21, 0,       2, 13, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relchecks"},    21, 0,       2, 14, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"reltriggers"},   21, 0,      2, 15, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relukeys"},     21, 0,       2, 16, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relfkeys"},     21, 0,       2, 17, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relrefs"},      21, 0,       2, 18, 0, -1, -1, true, 'p', false, 's', false, false }, \
+{ 1259, {"relhaspkey"},    16, 0,      1, 19, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhasrules"},   16, 0,      1, 20, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relhassubclass"},16, 0,      1, 21, 0, -1, -1, true, 'p', false, 'c', false, false }, \
+{ 1259, {"relacl"},             1034, 0,  -1, 22, 0, -1, -1,   false, 'x', false, 'i', false, false }
+
+DATA(insert OID = 0 ( 1259 relname                     19 DEFAULT_ATTSTATTARGET NAMEDATALEN   1 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1259 reltype                     26 0  4   2 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relowner                    23 0  4   3 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relam                       26 0  4   4 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relfilenode         26 0  4   5 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relpages                    23 0  4   6 0 -1 -1 t p f i f f));
-DATA(insert OID = 0 ( 1259 reltuples           23 0  4   7 0 -1 -1 t p f i f f));
+DATA(insert OID = 0 ( 1259 reltuples      700 0  4   7 0 -1 -1 f p f i f f));
  DATA(insert OID = 0 ( 1259 reltoastrelid       26 0  4   8 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 reltoastidxid       26 0  4   9 0 -1 -1 t p f i f f));
  DATA(insert OID = 0 ( 1259 relhasindex         16 0  1  10 0 -1 -1 t p f c f f));
@@ -544,7 +534,7 @@ DATA(insert OID = 0 ( 1219 tableoid                 26 0  4  -7 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_variable \
-{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1264, {"varfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 1264 varfoo                      26 0  4   1 0 -1 -1 t p f i f f));
  
@@ -555,7 +545,7 @@ DATA(insert OID = 0 ( 1264 varfoo                   26 0  4   1 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_log \
-{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 1269, {"logfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 1269 logfoo                      26 0  4   1 0 -1 -1 t p f i f f));
  
@@ -566,7 +556,7 @@ DATA(insert OID = 0 ( 1269 logfoo                   26 0  4   1 0 -1 -1 t p f i f f));
   * ----------------
   */
  #define Schema_pg_xactlock \
-{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, '\001', 'p', '\0', 'i', '\0', '\0' }
+{ 376, {"xactlockfoo"},  26, 0, 4, 1, 0, -1, -1, true, 'p', false, 'i', false, false }
  
  DATA(insert OID = 0 ( 376 xactlockfoo          26 0  4   1 0 -1 -1 t p f i f f));
  
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h

index 81e75e1..86de88c 100644 (file)
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_class.h,v 1.47 2001/03/22 04:00:38 momjian Exp $
+ * $Id: pg_class.h,v 1.48 2001/05/07 00:43:25 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -52,7 +52,7 @@ CATALOG(pg_class) BOOTSTRAP
         Oid                     relam;
         Oid                     relfilenode;
         int4            relpages;
-       int4            reltuples;
+       float4          reltuples;
         Oid                     reltoastrelid;
         Oid                     reltoastidxid;
         bool            relhasindex;
diff --git a/src/include/catalog/pg_statistic.h b/src/include/catalog/pg_statistic.h

index 2f39bea..8d6a6b3 100644 (file)
--- a/src/include/catalog/pg_statistic.h
+++ b/src/include/catalog/pg_statistic.h
@@ -8,7 +8,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pg_statistic.h,v 1.10 2001/01/24 19:43:22 momjian Exp $
+ * $Id: pg_statistic.h,v 1.11 2001/05/07 00:43:25 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -36,40 +36,91 @@ CATALOG(pg_statistic)
         /* These fields form the unique key for the entry: */
         Oid                     starelid;               /* relation containing attribute */
         int2            staattnum;              /* attribute (column) stats are for */
-       Oid                     staop;                  /* '<' comparison op used for lo/hi vals */
+
+       /* the fraction of the column's entries that are NULL: */
+       float4          stanullfrac;
  
         /*
-        * Note: the current VACUUM code will never produce more than one
-        * entry per column, but in theory there could be multiple entries if
-        * a datatype has more than one useful ordering operator.  Also, the
-        * current code will not write an entry unless it found at least one
-        * non-NULL value in the column; so the remaining fields will never be
-        * NULL.
+        * stawidth is the average width in bytes of non-null entries.  For
+        * fixed-width datatypes this is of course the same as the typlen, but
+        * for varlena types it is more useful.  Note that this is the average
+        * width of the data as actually stored, post-TOASTing (eg, for a
+        * moved-out-of-line value, only the size of the pointer object is
+        * counted).  This is the appropriate definition for the primary use of
+        * the statistic, which is to estimate sizes of in-memory hash tables of
+        * tuples.
+        */
+       int4            stawidth;
+
+       /* ----------------
+        * stadistinct indicates the (approximate) number of distinct non-null
+        * data values in the column.  The interpretation is:
+        *              0               unknown or not computed
+        *              > 0             actual number of distinct values
+        *              < 0             negative of multiplier for number of rows
+        * The special negative case allows us to cope with columns that are
+        * unique (stadistinct = -1) or nearly so (for example, a column in
+        * which values appear about twice on the average could be represented
+        * by stadistinct = -0.5).  Because the number-of-rows statistic in
+        * pg_class may be updated more frequently than pg_statistic is, it's
+        * important to be able to describe such situations as a multiple of
+        * the number of rows, rather than a fixed number of distinct values.
+        * But in other cases a fixed number is correct (eg, a boolean column).
+        * ----------------
+        */
+       float4          stadistinct;
+
+       /* ----------------
+        * To allow keeping statistics on different kinds of datatypes,
+        * we do not hard-wire any particular meaning for the remaining
+        * statistical fields.  Instead, we provide several "slots" in which
+        * statistical data can be placed.  Each slot includes:
+        *              kind                    integer code identifying kind of data
+        *              op                              OID of associated operator, if needed
+        *              numbers                 float4 array (for statistical values)
+        *              values                  text array (for representations of data values)
+        * The ID and operator fields are never NULL; they are zeroes in an
+        * unused slot.  The numbers and values fields are NULL in an unused
+        * slot, and might also be NULL in a used slot if the slot kind has
+        * no need for one or the other.
+        * ----------------
          */
  
+       int2            stakind1;
+       int2            stakind2;
+       int2            stakind3;
+       int2            stakind4;
+
+       Oid                     staop1;
+       Oid                     staop2;
+       Oid                     staop3;
+       Oid                     staop4;
+
         /*
-        * These fields contain the stats about the column indicated by the
-        * key
+        * THE REST OF THESE ARE VARIABLE LENGTH FIELDS, and may even be absent
+        * (NULL). They cannot be accessed as C struct entries; you have to use
+        * the full field access machinery (heap_getattr) for them.  We declare
+        * them here for the catalog machinery.
          */
-       float4          stanullfrac;    /* the fraction of the entries that are
-                                                                * NULL */
-       float4          stacommonfrac;  /* the fraction that are the most common
-                                                                * val */
+
+       float4          stanumbers1[1];
+       float4          stanumbers2[1];
+       float4          stanumbers3[1];
+       float4          stanumbers4[1];
  
         /*
-        * THE REST OF THESE ARE VARIABLE LENGTH FIELDS. They cannot be
-        * accessed as C struct entries; you have to use the full field access
-        * machinery (heap_getattr) for them.
-        *
-        * All three of these are text representations of data values of the
-        * column's data type.  To re-create the actual Datum, do
-        * datatypein(textout(givenvalue)).
+        * Values in these text arrays are external representations of values
+        * of the column's data type.  To re-create the actual Datum, do
+        * datatypein(textout(arrayelement)).
          */
-       text            stacommonval;   /* most common non-null value in column */
-       text            staloval;               /* smallest non-null value in column */
-       text            stahival;               /* largest non-null value in column */
+       text            stavalues1[1];
+       text            stavalues2[1];
+       text            stavalues3[1];
+       text            stavalues4[1];
  } FormData_pg_statistic;
  
+#define STATISTIC_NUM_SLOTS  4
+
  /* ----------------
   *             Form_pg_statistic corresponds to a pointer to a tuple with
   *             the format of pg_statistic relation.
@@ -81,14 +132,78 @@ typedef FormData_pg_statistic *Form_pg_statistic;
   *             compiler constants for pg_statistic
   * ----------------
   */
-#define Natts_pg_statistic                             8
+#define Natts_pg_statistic                             21
  #define Anum_pg_statistic_starelid             1
  #define Anum_pg_statistic_staattnum            2
-#define Anum_pg_statistic_staop                        3
-#define Anum_pg_statistic_stanullfrac  4
-#define Anum_pg_statistic_stacommonfrac 5
-#define Anum_pg_statistic_stacommonval 6
-#define Anum_pg_statistic_staloval             7
-#define Anum_pg_statistic_stahival             8
+#define Anum_pg_statistic_stanullfrac  3
+#define Anum_pg_statistic_stawidth             4
+#define Anum_pg_statistic_stadistinct  5
+#define Anum_pg_statistic_stakind1             6
+#define Anum_pg_statistic_stakind2             7
+#define Anum_pg_statistic_stakind3             8
+#define Anum_pg_statistic_stakind4             9
+#define Anum_pg_statistic_staop1               10
+#define Anum_pg_statistic_staop2               11
+#define Anum_pg_statistic_staop3               12
+#define Anum_pg_statistic_staop4               13
+#define Anum_pg_statistic_stanumbers1  14
+#define Anum_pg_statistic_stanumbers2  15
+#define Anum_pg_statistic_stanumbers3  16
+#define Anum_pg_statistic_stanumbers4  17
+#define Anum_pg_statistic_stavalues1   18
+#define Anum_pg_statistic_stavalues2   19
+#define Anum_pg_statistic_stavalues3   20
+#define Anum_pg_statistic_stavalues4   21
+
+/*
+ * Currently, three statistical slot "kinds" are defined: most common values,
+ * histogram, and correlation.  Additional "kinds" will probably appear in
+ * future to help cope with non-scalar datatypes.
+ *
+ * Code reading the pg_statistic relation should not assume that a particular
+ * data "kind" will appear in any particular slot.  Instead, search the
+ * stakind fields to see if the desired data is available.
+ */
+
+/*
+ * In a "most common values" slot, staop is the OID of the "=" operator
+ * used to decide whether values are the same or not.  stavalues contains
+ * the K most common non-null values appearing in the column, and stanumbers
+ * contains their frequencies (fractions of total row count).  The values
+ * shall be ordered in decreasing frequency.  Note that since the arrays are
+ * variable-size, K may be chosen by the statistics collector.  Values should
+ * not appear in MCV unless they have been observed to occur more than once;
+ * a unique column will have no MCV slot.
+ */
+#define STATISTIC_KIND_MCV  1
+
+/*
+ * A "histogram" slot describes the distribution of scalar data.  staop is
+ * the OID of the "<" operator that describes the sort ordering.  (In theory,
+ * more than one histogram could appear, if a datatype has more than one
+ * useful sort operator.)  stavalues contains M (>=2) non-null values that
+ * divide the non-null column data values into M-1 bins of approximately equal
+ * population.  The first stavalues item is the MIN and the last is the MAX.
+ * stanumbers is not used and should be NULL.  IMPORTANT POINT: if an MCV
+ * slot is also provided, then the histogram describes the data distribution
+ * *after removing the values listed in MCV* (thus, it's a "compressed
+ * histogram" in the technical parlance).  This allows a more accurate
+ * representation of the distribution of a column with some very-common
+ * values.  In a column with only a few distinct values, it's possible that
+ * the MCV list describes the entire data population; in this case the
+ * histogram reduces to empty and should be omitted.
+ */
+#define STATISTIC_KIND_HISTOGRAM  2
+
+/*
+ * A "correlation" slot describes the correlation between the physical order
+ * of table tuples and the ordering of data values of this column, as seen
+ * by the "<" operator identified by staop.  (As with the histogram, more
+ * than one entry could theoretically appear.)  stavalues is not used and
+ * should be NULL.  stanumbers contains a single entry, the correlation
+ * coefficient between the sequence of data values and the sequence of
+ * their actual tuple positions.  The coefficient ranges from +1 to -1.
+ */
+#define STATISTIC_KIND_CORRELATION  3
  
  #endif  /* PG_STATISTIC_H */
diff --git a/src/include/commands/command.h b/src/include/commands/command.h

index 8b10845..7eb1a4f 100644 (file)
--- a/src/include/commands/command.h
+++ b/src/include/commands/command.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: command.h,v 1.26 2001/03/22 04:00:41 momjian Exp $
+ * $Id: command.h,v 1.27 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -43,9 +43,13 @@ extern void PortalCleanup(Portal portal);
  extern void AlterTableAddColumn(const char *relationName,
                                         bool inh, ColumnDef *colDef);
  
-extern void AlterTableAlterColumn(const char *relationName,
-                                         bool inh, const char *colName,
-                                         Node *newDefault);
+extern void AlterTableAlterColumnDefault(const char *relationName,
+                                                                                bool inh, const char *colName,
+                                                                                Node *newDefault);
+
+extern void AlterTableAlterColumnStatistics(const char *relationName,
+                                                                                       bool inh, const char *colName,
+                                                                                       Node *statsTarget);
  
  extern void AlterTableDropColumn(const char *relationName,
                                          bool inh, const char *colName,
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h

index d82d22f..87bb000 100644 (file)
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -1,129 +1,27 @@
  /*-------------------------------------------------------------------------
   *
   * vacuum.h
- *       header file for postgres vacuum cleaner
+ *       header file for postgres vacuum cleaner and statistics analyzer
   *
   *
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: vacuum.h,v 1.34 2001/03/22 04:00:43 momjian Exp $
+ * $Id: vacuum.h,v 1.35 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
  #ifndef VACUUM_H
  #define VACUUM_H
  
-#include "catalog/pg_attribute.h"
-#include "catalog/pg_index.h"
-#include "fmgr.h"
-#include "nodes/pg_list.h"
-#include "storage/itemptr.h"
+#include "nodes/parsenodes.h"
  
  
-typedef struct VAttListData
-{
-       int                     val_dummy;
-       struct VAttListData *val_next;
-} VAttListData;
-
-typedef VAttListData *VAttList;
-
-typedef struct VacPageData
-{
-       BlockNumber blkno;                      /* BlockNumber of this Page */
-       Size            free;                   /* FreeSpace on this Page */
-       uint16          offsets_used;   /* Number of OffNums used by vacuum */
-       uint16          offsets_free;   /* Number of OffNums free or to be free */
-       OffsetNumber offsets[1];        /* Array of its OffNums */
-} VacPageData;
-
-typedef VacPageData *VacPage;
-
-typedef struct VacPageListData
-{
-       int                     empty_end_pages;/* Number of "empty" end-pages */
-       int                     num_pages;              /* Number of pages in pagedesc */
-       int                     num_allocated_pages;    /* Number of allocated pages in
-                                                                                * pagedesc */
-       VacPage    *pagedesc;           /* Descriptions of pages */
-} VacPageListData;
-
-typedef VacPageListData *VacPageList;
-
-typedef struct
-{
-       Form_pg_attribute attr;
-       Datum           best,
-                               guess1,
-                               guess2,
-                               max,
-                               min;
-       int                     best_len,
-                               guess1_len,
-                               guess2_len,
-                               max_len,
-                               min_len;
-       long            best_cnt,
-                               guess1_cnt,
-                               guess1_hits,
-                               guess2_hits,
-                               null_cnt,
-                               nonnull_cnt,
-                               max_cnt,
-                               min_cnt;
-       FmgrInfo        f_cmpeq,
-                               f_cmplt,
-                               f_cmpgt;
-       Oid                     op_cmplt;
-       regproc         outfunc;
-       Oid                     typelem;
-       bool            initialized;
-} VacAttrStats;
-
-typedef struct VRelListData
-{
-       Oid                     vrl_relid;
-       struct VRelListData *vrl_next;
-} VRelListData;
-
-typedef VRelListData *VRelList;
-
-typedef struct VTupleLinkData
-{
-       ItemPointerData new_tid;
-       ItemPointerData this_tid;
-} VTupleLinkData;
-
-typedef VTupleLinkData *VTupleLink;
-
-typedef struct VTupleMoveData
-{
-       ItemPointerData tid;            /* tuple ID */
-       VacPage         vacpage;                /* where to move */
-       bool            cleanVpd;               /* clean vacpage before using */
-} VTupleMoveData;
-
-typedef VTupleMoveData *VTupleMove;
-
-typedef struct VRelStats
-{
-       Oid                     relid;
-       int                     num_tuples;
-       int                     num_pages;
-       Size            min_tlen;
-       Size            max_tlen;
-       bool            hasindex;
-       int                     num_vtlinks;
-       VTupleLink      vtlinks;
-} VRelStats;
-
-extern bool VacuumRunning;
-
-extern void vc_abort(void);
-extern void vacuum(char *vacrel, bool verbose, bool analyze, List *anal_cols);
-extern void analyze_rel(Oid relid, List *anal_cols2, int MESSAGE_LEVEL);
-
-#define ATTNVALS_SCALE 1000000000              /* XXX so it can act as a float4 */
+/* in commands/vacuum.c */
+extern void vacuum(VacuumStmt *vacstmt);
+extern void vac_update_relstats(Oid relid, long num_pages, double num_tuples,
+                                                               bool hasindex);
+/* in commands/analyze.c */
+extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
  
  #endif  /* VACUUM_H */
diff --git a/src/include/config.h.in b/src/include/config.h.in

index 0d989db..01593a4 100644 (file)
--- a/src/include/config.h.in
+++ b/src/include/config.h.in
@@ -8,7 +8,7 @@
   * or in config.h afterwards.  Of course, if you edit config.h, then your
   * changes will be overwritten the next time you run configure.
   *
- * $Id: config.h.in,v 1.162 2001/04/14 22:55:02 petere Exp $
+ * $Id: config.h.in,v 1.163 2001/05/07 00:43:25 tgl Exp $
   */
  
  #ifndef CONFIG_H
@@ -157,6 +157,11 @@
  #define FUNC_MAX_ARGS          INDEX_MAX_KEYS
  
  /*
+ * System default value for pg_attribute.attstattarget
+ */
+#define DEFAULT_ATTSTATTARGET  10
+
+/*
   * Define this to make libpgtcl's "pg_result -assign" command process C-style
   * backslash sequences in returned tuple data and convert Postgres array
   * attributes into Tcl lists.  CAUTION: this conversion is *wrong* unless
diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h

index 2cf9378..0967bef 100644 (file)
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: execnodes.h,v 1.57 2001/03/22 04:00:50 momjian Exp $
+ * $Id: execnodes.h,v 1.58 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -628,7 +628,6 @@ typedef struct GroupState
   *      SortState information
   *
   *             sort_Done               indicates whether sort has been performed yet
- *             sort_Keys               scan key structures describing the sort keys
   *             tuplesortstate  private state of tuplesort.c
   * ----------------
   */
@@ -636,7 +635,6 @@ typedef struct SortState
  {
         CommonScanState csstate;        /* its first field is NodeTag */
         bool            sort_Done;
-       ScanKey         sort_Keys;
         void       *tuplesortstate;
  } SortState;
  
diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h

index 1614d78..63b1b10 100644 (file)
--- a/src/include/nodes/parsenodes.h
+++ b/src/include/nodes/parsenodes.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: parsenodes.h,v 1.126 2001/03/23 04:49:56 momjian Exp $
+ * $Id: parsenodes.h,v 1.127 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -118,11 +118,12 @@ typedef struct AlterTableStmt
         NodeTag         type;
         char            subtype;                /*------------
                                                                  *      A = add column
-                                                                *      T = alter column
+                                                                *      T = alter column default
+                                                                *      S = alter column statistics
                                                                  *      D = drop column
                                                                  *      C = add constraint
                                                                  *      X = drop constraint
-                                                                *      E = add toast table,
+                                                                *      E = create toast table
                                                                  *      U = change owner
                                                                  *------------
                                                                  */
@@ -690,16 +691,20 @@ typedef struct ClusterStmt
  } ClusterStmt;
  
  /* ----------------------
- *             Vacuum Statement
+ *             Vacuum and Analyze Statements
+ *
+ * Even though these are nominally two statements, it's convenient to use
+ * just one node type for both.
   * ----------------------
   */
  typedef struct VacuumStmt
  {
         NodeTag         type;
-       bool            verbose;                /* print status info */
-       bool            analyze;                /* analyze data */
-       char       *vacrel;                     /* table to vacuum */
-       List       *va_spec;            /* columns to analyse */
+       bool            vacuum;                 /* do VACUUM step */
+       bool            analyze;                /* do ANALYZE step */
+       bool            verbose;                /* print progress info */
+       char       *vacrel;                     /* name of single table to process, or NULL */
+       List       *va_cols;            /* list of column names, or NIL for all */
  } VacuumStmt;
  
  /* ----------------------
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h

index 3ae8e09..9e69ed6 100644 (file)
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -10,7 +10,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: primnodes.h,v 1.53 2001/03/22 04:00:52 momjian Exp $
+ * $Id: primnodes.h,v 1.54 2001/05/07 00:43:25 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -45,8 +45,8 @@ typedef struct FunctionCache *FunctionCachePtr;
   * reskey and reskeyop are the execution-time representation of sorting.
   * reskey must be zero in any non-sort-key item.  The reskey of sort key
   * targetlist items for a sort plan node is 1,2,...,n for the n sort keys.
- * The reskeyop of each such targetlist item is the sort operator's
- * regproc OID.  reskeyop will be zero in non-sort-key items.
+ * The reskeyop of each such targetlist item is the sort operator's OID.
+ * reskeyop will be zero in non-sort-key items.
   *
   * Both reskey and reskeyop are typically zero during parse/plan stages.
   * The executor does not pay any attention to ressortgroupref.
@@ -62,7 +62,7 @@ typedef struct Resdom
         Index           ressortgroupref;
         /* nonzero if referenced by a sort/group clause */
         Index           reskey;                 /* order of key in a sort (for those > 0) */
-       Oid                     reskeyop;               /* sort operator's regproc Oid */
+       Oid                     reskeyop;               /* sort operator's Oid */
         bool            resjunk;                /* set to true to eliminate the attribute
                                                                  * from final target list */
  } Resdom;
diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h

index f643ef8..c76d9b4 100644 (file)
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: relation.h,v 1.54 2001/03/22 04:00:53 momjian Exp $
+ * $Id: relation.h,v 1.55 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -512,8 +512,8 @@ typedef struct RestrictInfo
         Oid                     hashjoinoperator;               /* copy of clause operator */
  
         /* cache space for hashclause processing; -1 if not yet set */
-       Selectivity left_dispersion;/* dispersion of left side */
-       Selectivity right_dispersion;           /* dispersion of right side */
+       Selectivity left_bucketsize;            /* avg bucketsize of left side */
+       Selectivity right_bucketsize;           /* avg bucketsize of right side */
  } RestrictInfo;
  
  /*
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h

index 5caa576..cbf6df0 100644 (file)
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: cost.h,v 1.38 2001/02/16 00:03:05 tgl Exp $
+ * $Id: cost.h,v 1.39 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -64,7 +64,8 @@ extern void cost_mergejoin(Path *path, Path *outer_path, Path *inner_path,
                            List *restrictlist,
                            List *outersortkeys, List *innersortkeys);
  extern void cost_hashjoin(Path *path, Path *outer_path, Path *inner_path,
-                         List *restrictlist, Selectivity innerdispersion);
+                         List *restrictlist, Selectivity innerbucketsize);
+extern Selectivity estimate_hash_bucketsize(Query *root, Var *var);
  extern Cost cost_qual_eval(List *quals);
  extern void set_baserel_size_estimates(Query *root, RelOptInfo *rel);
  extern void set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h

index 5b71ede..0839feb 100644 (file)
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: pathnode.h,v 1.35 2001/03/22 04:00:54 momjian Exp $
+ * $Id: pathnode.h,v 1.36 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -59,7 +59,7 @@ extern HashPath *create_hashjoin_path(RelOptInfo *joinrel,
                                          Path *inner_path,
                                          List *restrict_clauses,
                                          List *hashclauses,
-                                        Selectivity innerdispersion);
+                                        Selectivity innerbucketsize);
  
  /*
   * prototypes for relnode.c
diff --git a/src/include/utils/lsyscache.h b/src/include/utils/lsyscache.h

index f1c4aff..6b35dee 100644 (file)
--- a/src/include/utils/lsyscache.h
+++ b/src/include/utils/lsyscache.h
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: lsyscache.h,v 1.30 2001/03/22 04:01:13 momjian Exp $
+ * $Id: lsyscache.h,v 1.31 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -21,8 +21,6 @@ extern AttrNumber get_attnum(Oid relid, char *attname);
  extern Oid     get_atttype(Oid relid, AttrNumber attnum);
  extern bool get_attisset(Oid relid, char *attname);
  extern int32 get_atttypmod(Oid relid, AttrNumber attnum);
-extern double get_attdispersion(Oid relid, AttrNumber attnum,
-                                 double min_estimate);
  extern RegProcedure get_opcode(Oid opno);
  extern char *get_opname(Oid opno);
  extern bool op_mergejoinable(Oid opno, Oid ltype, Oid rtype,
@@ -41,6 +39,14 @@ extern bool get_typbyval(Oid typid);
  extern void get_typlenbyval(Oid typid, int16 *typlen, bool *typbyval);
  extern char get_typstorage(Oid typid);
  extern Datum get_typdefault(Oid typid);
+extern bool get_attstatsslot(HeapTuple statstuple,
+                                                        Oid atttype, int32 atttypmod,
+                                                        int reqkind, Oid reqop,
+                                                        Datum **values, int *nvalues,
+                                                        float4 **numbers, int *nnumbers);
+extern void free_attstatsslot(Oid atttype,
+                                                         Datum *values, int nvalues,
+                                                         float4 *numbers, int nnumbers);
  
  #define TypeIsToastable(typid) (get_typstorage(typid) != 'p')
  
diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h

index 8d4e2ae..342f7bf 100644 (file)
--- a/src/include/utils/syscache.h
+++ b/src/include/utils/syscache.h
@@ -9,7 +9,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: syscache.h,v 1.29 2001/03/22 04:01:14 momjian Exp $
+ * $Id: syscache.h,v 1.30 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,7 +53,7 @@
  #define RULEOID                        22
  #define SHADOWNAME             23
  #define SHADOWSYSID            24
-#define STATRELID              25
+#define STATRELATT             25
  #define TYPENAME               26
  #define TYPEOID                        27
  
diff --git a/src/include/utils/tuplesort.h b/src/include/utils/tuplesort.h

index 7f27377..0017617 100644 (file)
--- a/src/include/utils/tuplesort.h
+++ b/src/include/utils/tuplesort.h
@@ -13,7 +13,7 @@
   * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $Id: tuplesort.h,v 1.6 2001/01/24 19:43:29 momjian Exp $
+ * $Id: tuplesort.h,v 1.7 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -36,8 +36,9 @@ typedef struct Tuplesortstate Tuplesortstate;
   */
  
  extern Tuplesortstate *tuplesort_begin_heap(TupleDesc tupDesc,
-                                        int nkeys, ScanKey keys,
-                                        bool randomAccess);
+                                         int nkeys,
+                                         Oid *sortOperators, AttrNumber *attNums,
+                                         bool randomAccess);
  extern Tuplesortstate *tuplesort_begin_index(Relation indexRel,
                                           bool enforceUnique,
                                           bool randomAccess);
@@ -75,4 +76,19 @@ extern void tuplesort_rescan(Tuplesortstate *state);
  extern void tuplesort_markpos(Tuplesortstate *state);
  extern void tuplesort_restorepos(Tuplesortstate *state);
  
+/*
+ * This routine selects an appropriate sorting function to implement
+ * a sort operator as efficiently as possible.
+ */
+typedef enum
+{
+       SORTFUNC_LT,                            /* raw "<" operator */
+       SORTFUNC_CMP,                           /* -1 / 0 / 1 three-way comparator */
+       SORTFUNC_REVCMP                         /* 1 / 0 / -1 (reversed) 3-way comparator */
+} SortFunctionKind;
+
+extern void SelectSortFunction(Oid sortOperator,
+                                                          RegProcedure *sortFunction,
+                                                          SortFunctionKind *kind);
+
  #endif  /* TUPLESORT_H */
diff --git a/src/interfaces/ecpg/preproc/keywords.c b/src/interfaces/ecpg/preproc/keywords.c

index 5614a34..c03880f 100644 (file)
--- a/src/interfaces/ecpg/preproc/keywords.c
+++ b/src/interfaces/ecpg/preproc/keywords.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.39 2001/03/22 04:01:21 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/interfaces/ecpg/preproc/keywords.c,v 1.40 2001/05/07 00:43:26 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -238,6 +238,7 @@ static ScanKeyword ScanKeywords[] = {
         {"some", SOME},
         {"start", START},
         {"statement", STATEMENT},
+       {"statistics", STATISTICS},
         {"stdin", STDIN},
         {"stdout", STDOUT},
         {"substring", SUBSTRING},
diff --git a/src/interfaces/ecpg/preproc/preproc.y b/src/interfaces/ecpg/preproc/preproc.y

index 345efb6..91708bd 100644 (file)
--- a/src/interfaces/ecpg/preproc/preproc.y
+++ b/src/interfaces/ecpg/preproc/preproc.y
@@ -134,7 +134,7 @@ make_name(void)
  
  %union {
         double                  dval;
-        int                     ival;
+       int                     ival;
         char *                  str;
         struct when             action;
         struct index            index;
@@ -224,7 +224,7 @@ make_name(void)
                 NONE, NOTHING, NOTIFY, NOTNULL, OFFSET, OIDS,
                 OPERATOR, OWNER, PASSWORD, PROCEDURAL, REINDEX, RENAME, RESET,
                 RETURNS, ROW, RULE, SEQUENCE, SERIAL, SETOF, SHARE,
-               SHOW, START, STATEMENT, STDIN, STDOUT, SYSID TEMP,
+               SHOW, START, STATEMENT, STATISTICS, STDIN, STDOUT, SYSID TEMP,
                 TEMPLATE, TOAST, TRUNCATE, TRUSTED, UNLISTEN, UNTIL, VACUUM,
                 VALID, VERBOSE, VERSION
  
@@ -285,7 +285,7 @@ make_name(void)
  %type  <str>    file_name AexprConst ParamNo c_expr ConstTypename
  %type  <str>   in_expr_nodes a_expr b_expr TruncateStmt CommentStmt
  %type  <str>   opt_indirection expr_list extract_list extract_arg
-%type  <str>   position_list substr_list substr_from alter_column_action
+%type  <str>   position_list substr_list substr_from alter_column_default
  %type  <str>   trim_list in_expr substr_for attr attrs drop_behavior
  %type  <str>   Typename SimpleTypename Generic Numeric generic opt_float opt_numeric
  %type  <str>   opt_decimal Character character opt_varying opt_charset
@@ -293,7 +293,7 @@ make_name(void)
  %type  <str>   row_expr row_descriptor row_list ConstDatetime opt_chain
  %type  <str>   SelectStmt into_clause OptTemp ConstraintAttributeSpec
  %type  <str>   opt_table opt_all sort_clause sortby_list ConstraintAttr 
-%type  <str>   sortby OptUseOp opt_inh_star relation_name_list name_list
+%type  <str>   sortby OptUseOp relation_name_list name_list
  %type  <str>   group_clause having_clause from_clause opt_distinct
  %type  <str>   join_outer where_clause relation_expr sub_type opt_arg
  %type  <str>   opt_column_list insert_rest InsertStmt OptimizableStmt
@@ -301,8 +301,8 @@ make_name(void)
  %type  <str>    NotifyStmt columnElem copy_dirn UnlistenStmt copy_null
  %type  <str>    copy_delimiter ListenStmt CopyStmt copy_file_name opt_binary
  %type  <str>    opt_with_copy FetchStmt direction fetch_how_many from_in
-%type  <str>    ClosePortalStmt DropStmt VacuumStmt opt_verbose func_arg
-%type  <str>    opt_analyze opt_va_list va_list ExplainStmt index_params
+%type  <str>    ClosePortalStmt DropStmt VacuumStmt AnalyzeStmt opt_verbose func_arg
+%type  <str>    analyze_keyword opt_name_list ExplainStmt index_params
  %type  <str>    index_list func_index index_elem opt_class access_method_clause
  %type  <str>    index_opt_unique IndexStmt func_return ConstInterval
  %type  <str>    func_args_list func_args opt_with ProcedureStmt def_arg
@@ -329,7 +329,7 @@ make_name(void)
  %type  <str>   opt_cursor opt_lmode ConstraintsSetStmt comment_tg AllConst
  %type  <str>   case_expr when_clause_list case_default case_arg when_clause
  %type  <str>    select_clause opt_select_limit select_limit_value ConstraintTimeSpec
-%type  <str>    select_offset_value ReindexStmt join_type opt_only opt_boolean
+%type  <str>    select_offset_value ReindexStmt join_type opt_boolean
  %type  <str>   join_qual update_list AlterSchemaStmt joined_table
  %type  <str>   opt_level opt_lock lock_type users_in_new_group_clause
  %type  <str>    OptConstrFromTable comment_op OptTempTableName StringConst
@@ -447,6 +447,7 @@ stmt:  AlterSchemaStmt                      { output_statement($1, 0, NULL, connection); }
                 | CreatedbStmt          { output_statement($1, 0, NULL, connection); }
                 | DropdbStmt            { output_statement($1, 0, NULL, connection); }
                 | VacuumStmt            { output_statement($1, 0, NULL, connection); }
+               | AnalyzeStmt           { output_statement($1, 0, NULL, connection); }
                 | VariableSetStmt       { output_statement($1, 0, NULL, connection); }
                 | VariableShowStmt      { output_statement($1, 0, NULL, connection); }
                 | VariableResetStmt     { output_statement($1, 0, NULL, connection); }
@@ -909,39 +910,40 @@ CheckPointStmt: CHECKPOINT     { $$= make_str("checkpoint"); }
  
  /*****************************************************************************
   *
- *             QUERY :
- *
   *     ALTER TABLE variations
   *
   *****************************************************************************/
  
  AlterTableStmt:
-/* ALTER TABLE <name> ADD [COLUMN] <coldef> */
-        ALTER TABLE relation_name opt_inh_star ADD opt_column columnDef
+/* ALTER TABLE <relation> ADD [COLUMN] <coldef> */
+        ALTER TABLE relation_expr ADD opt_column columnDef
+               {
+                       $$ = cat_str(5, make_str("alter table"), $3, make_str("add"), $5, $6);
+               }
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP DEFAULT} */
+       | ALTER TABLE relation_expr ALTER opt_column ColId alter_column_default
                 {
-                       $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("add"), $6, $7);
+                       $$ = cat_str(6, make_str("alter table"), $3, make_str("alter"), $5, $6, $7);
                 }
-/* ALTER TABLE <name> ALTER [COLUMN] <colname> {SET DEFAULT <expr>|DROP
-DEFAULT} */
-       | ALTER TABLE relation_name opt_inh_star ALTER opt_column ColId
-               alter_column_action
+/* ALTER TABLE <relation> ALTER [COLUMN] <colname> SET STATISTICS <Iconst> */
+       | ALTER TABLE relation_expr ALTER opt_column ColId SET STATISTICS Iconst
                 {
-                       $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("alter"), $6, $7, $8);
+                       $$ = cat_str(7, make_str("alter table"), $3, make_str("alter"), $5, $6, make_str("set statistics"), $9);
                 }
-/* ALTER TABLE <name> DROP [COLUMN] <name> {RESTRICT|CASCADE} */
-       | ALTER TABLE relation_name opt_inh_star DROP opt_column ColId drop_behavior
+/* ALTER TABLE <relation> DROP [COLUMN] <colname> {RESTRICT|CASCADE} */
+       | ALTER TABLE relation_expr DROP opt_column ColId drop_behavior
                 {
-                       $$ = cat_str(7, make_str("alter table"), $3, $4, make_str("drop"), $6, $7, $8);
+                       $$ = cat_str(6, make_str("alter table"), $3, make_str("drop"), $5, $6, $7);
                 }
-/* ALTER TABLE <name> ADD CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star ADD TableConstraint
+/* ALTER TABLE <relation> ADD CONSTRAINT ... */
+       | ALTER TABLE relation_expr ADD TableConstraint
                 {
-                       $$ = cat_str(5, make_str("alter table"), $3, $4, make_str("add"), $6);
+                       $$ = cat_str(4, make_str("alter table"), $3, make_str("add"), $5);
                 }
-/* ALTER TABLE <name> DROP CONSTRAINT ... */
-       | ALTER TABLE relation_name opt_inh_star DROP CONSTRAINT name drop_behavior
+/* ALTER TABLE <relation> DROP CONSTRAINT ... */
+       | ALTER TABLE relation_expr DROP CONSTRAINT name drop_behavior
                 {
-                       $$ = cat_str(6, make_str("alter table"), $3, $4, make_str("drop constraint"), $7, $8);
+                       $$ = cat_str(5, make_str("alter table"), $3, make_str("drop constraint"), $6, $7);
                 }
  /* ALTER TABLE <name> OWNER TO UserId */     
         | ALTER TABLE relation_name OWNER TO UserId   
@@ -950,7 +952,7 @@ DEFAULT} */
                 }
                 ;
  
-alter_column_action:
+alter_column_default:
          SET DEFAULT a_expr     { $$ = cat2_str(make_str("set default"), $3); }
          | DROP DEFAULT          { $$ = make_str("drop default"); }
          ;
@@ -1234,10 +1236,6 @@ key_reference:  NO ACTION        { $$ = make_str("no action"); }
                 | SET NULL_P    { $$ = make_str("set null"); }
                 ;
  
-opt_only: ONLY         { $$ = make_str("only"); }
-       | /*EMPTY*/     { $$ = EMPTY; }
-       ;
-
  OptInherit:  INHERITS '(' relation_name_list ')'                { $$ = cat_str(3, make_str("inherits ("), $3, make_str(")")); }
                  | /*EMPTY*/                                    { $$ = EMPTY; }
                  ;      
@@ -2013,10 +2011,9 @@ opt_force:      FORCE            { $$ = make_str("force"); }
   *
   *****************************************************************************/
  
-RenameStmt:  ALTER TABLE relation_name opt_inh_star
-                                 RENAME opt_column opt_name TO name
+RenameStmt:  ALTER TABLE relation_expr RENAME opt_column opt_name TO name
                                 {
-                                       $$ = cat_str(8, make_str("alter table"), $3, $4, make_str("rename"), $6, $7, make_str("to"), $9);
+                                       $$ = cat_str(7, make_str("alter table"), $3, make_str("rename"), $5, $6, make_str("to"), $8);
                                 }
                 ;
  
@@ -2250,38 +2247,44 @@ ClusterStmt:  CLUSTER index_name ON relation_name
   *
   *             QUERY:
   *                             vacuum
+ *                             analyze
   *
   *****************************************************************************/
  
-VacuumStmt:  VACUUM opt_verbose opt_analyze
+VacuumStmt:  VACUUM opt_verbose
+                               {
+                                       $$ = cat_str(2, make_str("vacuum"), $2);
+                               }
+               | VACUUM opt_verbose relation_name
                                 {
                                         $$ = cat_str(3, make_str("vacuum"), $2, $3);
                                 }
-               | VACUUM opt_verbose opt_analyze relation_name opt_va_list
+               | VACUUM opt_verbose AnalyzeStmt
                                 {
-                                       if ( strlen($5) > 0 && strlen($4) == 0 )
-                                               mmerror(ET_ERROR, "VACUUM syntax error at or near \"(\"\n\tRelations name must be specified");
-                                       $$ = cat_str(5, make_str("vacuum"), $2, $3, $4, $5);
+                                       $$ = cat_str(3, make_str("vacuum"), $2, $3);
                                 }
                 ;
  
-opt_verbose:  VERBOSE                                  { $$ = make_str("verbose"); }
-               | /*EMPTY*/                             { $$ = EMPTY; }
+AnalyzeStmt:  analyze_keyword opt_verbose
+                               {
+                                       $$ = cat_str(2, $1, $2);
+                               }
+               | analyze_keyword opt_verbose relation_name opt_name_list
+                               {
+                                       $$ = cat_str(4, $1, $2, $3, $4);
+                               }
                 ;
  
-opt_analyze:  ANALYZE                                  { $$ = make_str("analyze"); }
-               | ANALYSE                               { $$ = make_str("analyse"); }
-               | /*EMPTY*/                             { $$ = EMPTY; }
+analyze_keyword:  ANALYZE                                      { $$ = make_str("analyze"); }
+               | ANALYSE                                                       { $$ = make_str("analyse"); }
                 ;
  
-opt_va_list:  '(' va_list ')'                          { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+opt_verbose:  VERBOSE                                  { $$ = make_str("verbose"); }
                 | /*EMPTY*/                             { $$ = EMPTY; }
                 ;
  
-va_list:  name
-                               { $$=$1; }
-               | va_list ',' name
-                               { $$=cat_str(3, $1, make_str(","), $3); }
+opt_name_list:  '(' name_list ')'              { $$ = cat_str(3, make_str("("), $2, make_str(")")); }
+               | /*EMPTY*/                             { $$ = EMPTY; }
                 ;
  
  
@@ -2383,9 +2386,9 @@ columnElem:  ColId opt_indirection
   *
   *****************************************************************************/
  
-DeleteStmt:  DELETE FROM opt_only name where_clause
+DeleteStmt:  DELETE FROM relation_expr where_clause
                                 {
-                                       $$ = cat_str(4, make_str("delete from"), $3, $4, $5);
+                                       $$ = cat_str(3, make_str("delete from"), $3, $4);
                                 }
                 ;
  
@@ -2416,12 +2419,12 @@ opt_lmode:      SHARE                           { $$ = make_str("share"); }
   *
   *****************************************************************************/
  
-UpdateStmt:  UPDATE opt_only relation_name
+UpdateStmt:  UPDATE relation_expr
                           SET update_target_list
                           from_clause
                           where_clause
                                 {
-                                       $$ = cat_str(7, make_str("update"), $2, $3, make_str("set"), $5, $6, $7);
+                                       $$ = cat_str(6, make_str("update"), $2, make_str("set"), $4, $5, $6);
                                 }
                 ;
  
@@ -2667,10 +2670,6 @@ select_offset_value:     PosIntConst     {
   *     ...however, recursive addattr and rename supported.  make special
   *     cases for these.
   */
-opt_inh_star:  '*'                                     { $$ = make_str("*"); }
-               | /*EMPTY*/                             { $$ = EMPTY; }
-               ;
-
  relation_name_list:  name_list { $$ = $1; };
  
  name_list:  name
@@ -2704,7 +2703,7 @@ opt_for_update_clause: for_update_clause                { $$ = $1; }
                 | /* EMPTY */                           { $$ = EMPTY; }
                  ;
  
-update_list:  OF va_list
+update_list:  OF name_list
                {
                         $$ = cat2_str(make_str("of"), $2);
               }
@@ -5028,6 +5027,7 @@ TokenId:  ABSOLUTE                        { $$ = make_str("absolute"); }
         | SHARE                         { $$ = make_str("share"); }
         | START                         { $$ = make_str("start"); }
         | STATEMENT                     { $$ = make_str("statement"); }
+       | STATISTICS            { $$ = make_str("statistics"); }
         | STDIN                         { $$ = make_str("stdin"); }
         | STDOUT                        { $$ = make_str("stdout"); }
         | SYSID                         { $$ = make_str("sysid"); }
diff --git a/src/test/regress/expected/oidjoins.out b/src/test/regress/expected/oidjoins.out

index a2b0ad9..46bc60f 100644 (file)
--- a/src/test/regress/expected/oidjoins.out
+++ b/src/test/regress/expected/oidjoins.out
@@ -353,12 +353,28 @@ WHERE     pg_statistic.starelid != 0 AND
  -----+----------
  (0 rows)
  
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
  FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
- oid | staop 
------+-------
+WHERE  pg_statistic.staop1 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+ oid | staop1 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+ oid | staop2 
+-----+--------
+(0 rows)
+
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
+ oid | staop3 
+-----+--------
  (0 rows)
  
  SELECT oid, pg_trigger.tgrelid 
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 9d4ff1b..1b094a6 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -482,8 +482,8 @@ WHERE p1.aggtransfn = p2.oid AND
            (p2.pronargs = 1 AND p1.aggbasetype = 0)));
    oid  | aggname | oid |   proname   
  -------+---------+-----+-------------
- 16997 | max     | 768 | int4larger
- 17011 | min     | 769 | int4smaller
+ 17010 | max     | 768 | int4larger
+ 17024 | min     | 769 | int4smaller
  (2 rows)
  
  -- Cross-check finalfn (if present) against its entry in pg_proc.
diff --git a/src/test/regress/sql/oidjoins.sql b/src/test/regress/sql/oidjoins.sql

index b7ea1f6..88727a6 100644 (file)
--- a/src/test/regress/sql/oidjoins.sql
+++ b/src/test/regress/sql/oidjoins.sql
@@ -177,10 +177,18 @@ SELECT    oid, pg_statistic.starelid
  FROM   pg_statistic 
  WHERE  pg_statistic.starelid != 0 AND 
         NOT EXISTS(SELECT * FROM pg_class AS t1 WHERE t1.oid = pg_statistic.starelid);
-SELECT oid, pg_statistic.staop 
+SELECT oid, pg_statistic.staop1 
  FROM   pg_statistic 
-WHERE  pg_statistic.staop != 0 AND 
-       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop);
+WHERE  pg_statistic.staop1 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop1);
+SELECT oid, pg_statistic.staop2 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop2 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop2);
+SELECT oid, pg_statistic.staop3 
+FROM   pg_statistic 
+WHERE  pg_statistic.staop3 != 0 AND 
+       NOT EXISTS(SELECT * FROM pg_operator AS t1 WHERE t1.oid = pg_statistic.staop3);
  SELECT oid, pg_trigger.tgrelid 
  FROM   pg_trigger 
  WHERE  pg_trigger.tgrelid != 0 AND
author	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Mon, 7 May 2001 00:43:27 +0000 (00:43 +0000)
doc/src/sgml/catalogs.sgml		patch \| blob \| history
doc/src/sgml/indices.sgml		patch \| blob \| history
doc/src/sgml/ref/allfiles.sgml		patch \| blob \| history
doc/src/sgml/ref/alter_table.sgml		patch \| blob \| history
doc/src/sgml/ref/analyze.sgml	[new file with mode: 0644]	patch \| blob
doc/src/sgml/ref/vacuum.sgml		patch \| blob \| history
doc/src/sgml/reference.sgml		patch \| blob \| history
doc/src/sgml/xoper.sgml		patch \| blob \| history
src/backend/access/common/tupdesc.c		patch \| blob \| history
src/backend/access/gist/gist.c		patch \| blob \| history
src/backend/access/hash/hash.c		patch \| blob \| history
src/backend/access/heap/tuptoaster.c		patch \| blob \| history
src/backend/access/nbtree/nbtree.c		patch \| blob \| history
src/backend/access/rtree/rtree.c		patch \| blob \| history
src/backend/catalog/genbki.sh		patch \| blob \| history
src/backend/catalog/heap.c		patch \| blob \| history
src/backend/catalog/index.c		patch \| blob \| history
src/backend/commands/analyze.c		patch \| blob \| history
src/backend/commands/command.c		patch \| blob \| history
src/backend/commands/vacuum.c		patch \| blob \| history
src/backend/executor/nodeSort.c		patch \| blob \| history
src/backend/nodes/copyfuncs.c		patch \| blob \| history
src/backend/nodes/equalfuncs.c		patch \| blob \| history
src/backend/nodes/readfuncs.c		patch \| blob \| history
src/backend/optimizer/path/costsize.c		patch \| blob \| history
src/backend/optimizer/path/joinpath.c		patch \| blob \| history
src/backend/optimizer/plan/createplan.c		patch \| blob \| history
src/backend/optimizer/plan/initsplan.c		patch \| blob \| history
src/backend/optimizer/plan/planner.c		patch \| blob \| history
src/backend/optimizer/prep/prepunion.c		patch \| blob \| history
src/backend/optimizer/util/pathnode.c		patch \| blob \| history
src/backend/optimizer/util/plancat.c		patch \| blob \| history
src/backend/parser/analyze.c		patch \| blob \| history
src/backend/parser/gram.y		patch \| blob \| history
src/backend/parser/keywords.c		patch \| blob \| history
src/backend/parser/parse_relation.c		patch \| blob \| history
src/backend/tcop/utility.c		patch \| blob \| history
src/backend/utils/adt/selfuncs.c		patch \| blob \| history
src/backend/utils/cache/lsyscache.c		patch \| blob \| history
src/backend/utils/cache/syscache.c		patch \| blob \| history
src/backend/utils/sort/tuplesort.c		patch \| blob \| history
src/include/access/tuptoaster.h		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/heap.h		patch \| blob \| history
src/include/catalog/index.h		patch \| blob \| history
src/include/catalog/indexing.h		patch \| blob \| history
src/include/catalog/pg_attribute.h		patch \| blob \| history
src/include/catalog/pg_class.h		patch \| blob \| history
src/include/catalog/pg_statistic.h		patch \| blob \| history
src/include/commands/command.h		patch \| blob \| history
src/include/commands/vacuum.h		patch \| blob \| history
src/include/config.h.in		patch \| blob \| history
src/include/nodes/execnodes.h		patch \| blob \| history
src/include/nodes/parsenodes.h		patch \| blob \| history
src/include/nodes/primnodes.h		patch \| blob \| history
src/include/nodes/relation.h		patch \| blob \| history
src/include/optimizer/cost.h		patch \| blob \| history
src/include/optimizer/pathnode.h		patch \| blob \| history
src/include/utils/lsyscache.h		patch \| blob \| history
src/include/utils/syscache.h		patch \| blob \| history
src/include/utils/tuplesort.h		patch \| blob \| history
src/interfaces/ecpg/preproc/keywords.c		patch \| blob \| history
src/interfaces/ecpg/preproc/preproc.y		patch \| blob \| history
src/test/regress/expected/oidjoins.out		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/sql/oidjoins.sql		patch \| blob \| history