OSDN Git Service

Support PostgreSQL 9.5.0. REL95_1_1_3
authorKyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Fri, 15 Jan 2016 05:12:14 +0000 (14:12 +0900)
committerKyotaro Horiguchi <horiguchi.kyotaro@lab.ntt.co.jp>
Fri, 15 Jan 2016 05:12:14 +0000 (14:12 +0900)
This branch PG95 has been a bit too early so it needs an additional
merge from master branch to complete to support PostgreSQL 9.5.

COPYRIGHT
COPYRIGHT.postgresql
Makefile
SPECS/pg_hint_plan94.spec [deleted file]
core.c
expected/pg_hint_plan.out
pg_hint_plan--1.1.3.sql [moved from pg_hint_plan--1.1.2.sql with 92% similarity]
pg_hint_plan.c
pg_hint_plan.control
pg_stat_statements.c
sql/pg_hint_plan.sql

index bf54977..f7344e7 100644 (file)
--- a/COPYRIGHT
+++ b/COPYRIGHT
@@ -1,4 +1,4 @@
-Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
+Copyright (c) 2012-2016, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
index e233796..d8cde9a 100644 (file)
@@ -2,7 +2,7 @@ core.c and make_join_rel.c are parts of PostgreSQL Database Management System.
 (formerly known as Postgres, then as Postgres95)
 Copyright holders of those files are following organizations:
 
-Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
 
 Portions Copyright (c) 1994, The Regents of the University of California
 
index 693dc7f..53937eb 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,11 +1,11 @@
 #
 # pg_hint_plan: Makefile
 #
-# Copyright (c) 2012-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
+# Copyright (c) 2012-2015, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
 #
 
 MODULES = pg_hint_plan
-HINTPLANVER = 1.1.2
+HINTPLANVER = 1.1.3
 
 REGRESS = init base_plan pg_hint_plan ut-init ut-A ut-S ut-J ut-L ut-G ut-R ut-fdw ut-fini
 
@@ -14,7 +14,7 @@ REGRESSION_EXPECTED = expected/init.out expected/base_plan.out expected/pg_hint_
 REGRESS_OPTS = --encoding=UTF8
 
 EXTENSION = pg_hint_plan
-DATA = pg_hint_plan--1.1.2.sql
+DATA = pg_hint_plan--1.1.3.sql
 
 EXTRA_CLEAN = sql/ut-fdw.sql expected/ut-fdw.out
 
@@ -23,8 +23,8 @@ PGXS := $(shell $(PG_CONFIG) --pgxs)
 include $(PGXS)
 
 STARBALL = pg_dbms_stats-$(DBMSSTATSVER).tar.gz
-STARBALL94 = pg_hint_plan94-$(HINTPLANVER).tar.gz
-STARBALLS = $(STARBALL) $(STARBALL94)
+STARBALL95 = pg_hint_plan95-$(HINTPLANVER).tar.gz
+STARBALLS = $(STARBALL) $(STARBALL95)
 
 TARSOURCES = Makefile *.c  *.h \
        pg_hint_plan--*.sql \
@@ -34,7 +34,7 @@ TARSOURCES = Makefile *.c  *.h \
 
 installcheck: $(REGRESSION_EXPECTED)
 
-rpms: rpm94
+rpms: rpm95
 
 # pg_hint_plan.c includes core.c and make_join_rel.c
 pg_hint_plan.o: core.c make_join_rel.c # pg_stat_statements.c
@@ -49,7 +49,7 @@ $(STARBALLS): $(TARSOURCES)
        tar -chzf $@ $(addprefix $(subst .tar.gz,,$@)/, $^)
        rm $(subst .tar.gz,,$@)
 
-rpm94: $(STARBALL94)
-       MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan94.spec
+rpm95: $(STARBALL95)
+       MAKE_ROOT=`pwd` rpmbuild -bb SPECS/pg_hint_plan95.spec
 
 
diff --git a/SPECS/pg_hint_plan94.spec b/SPECS/pg_hint_plan94.spec
deleted file mode 100644 (file)
index 6aae24a..0000000
+++ /dev/null
@@ -1,84 +0,0 @@
-# SPEC file for pg_hint_plan
-# Copyright(C) 2012-2014 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
-
-%define _pgdir   /usr/pgsql-9.4
-%define _bindir  %{_pgdir}/bin
-%define _libdir  %{_pgdir}/lib
-%define _datadir %{_pgdir}/share
-%if "%(echo ${MAKE_ROOT})" != ""
-  %define _rpmdir %(echo ${MAKE_ROOT})/RPMS
-  %define _sourcedir %(echo ${MAKE_ROOT})
-%endif
-
-## Set general information for pg_hint_plan.
-Summary:    Optimizer hint for PostgreSQL 9.4
-Name:       pg_hint_plan94
-Version:    1.1.2
-Release:    1%{?dist}
-License:    BSD
-Group:      Applications/Databases
-Source0:    %{name}-%{version}.tar.gz
-#URL:        http://example.com/pg_hint_plan/
-BuildRoot:  %{_tmppath}/%{name}-%{version}-%{release}-%(%{__id_u} -n)
-Vendor:     NIPPON TELEGRAPH AND TELEPHONE CORPORATION
-
-## We use postgresql-devel package
-BuildRequires:  postgresql94-devel
-Requires:  postgresql94-libs
-
-## Description for "pg_hint_plan"
-%description
-pg_hint_plan provides capability to force arbitrary plan to PostgreSQL' planner
-to optimize queries by hand directly.
-
-If you have query plan better than which PostgreSQL chooses, you can force your
-plan by adding special comment block with optimizer hint before the query you
-want to optimize.  You can control scan method, join method, join order, and
-planner-related GUC parameters during planning.
-
-Note that this package is available for only PostgreSQL 9.4.
-
-## pre work for build pg_hint_plan
-%prep
-PATH=/usr/pgsql-9.4/bin:$PATH
-if [ "${MAKE_ROOT}" != "" ]; then
-  pushd ${MAKE_ROOT}
-  make clean %{name}-%{version}.tar.gz
-  popd
-fi
-if [ ! -d %{_rpmdir} ]; then mkdir -p %{_rpmdir}; fi
-%setup -q
-
-## Set variables for build environment
-%build
-PATH=/usr/pgsql-9.4/bin:$PATH
-make USE_PGXS=1 %{?_smp_mflags}
-
-## Set variables for install
-%install
-rm -rf %{buildroot}
-install -d %{buildroot}%{_libdir}
-install pg_hint_plan.so %{buildroot}%{_libdir}/pg_hint_plan.so
-install -d %{buildroot}%{_datadir}/extension
-install -m 644 pg_hint_plan--1.1.2.sql %{buildroot}%{_datadir}/extension/pg_hint_plan--1.1.2.sql
-install -m 644 pg_hint_plan.control %{buildroot}%{_datadir}/extension/pg_hint_plan.control
-
-%clean
-rm -rf %{buildroot}
-
-%files
-%defattr(0755,root,root)
-%{_libdir}/pg_hint_plan.so
-%defattr(0644,root,root)
-%{_datadir}/extension/pg_hint_plan--1.1.2.sql
-%{_datadir}/extension/pg_hint_plan.control
-
-# History of pg_hint_plan.
-%changelog
-* Thu Dec 17 2014 Kyotaro Horiguchi
-- Support 9.4. New rev 1.1.2.
-* Mon Sep 02 2013 Takashi Suzuki
-- Initial cut for 1.1.0
-* Mon Sep 24 2012 Shigeru Hanada <shigeru.hanada@gmail.com>
-- Initial cut for 1.0.0
-
diff --git a/core.c b/core.c
index 4d0a9a1..94d8031 100644 (file)
--- a/core.c
+++ b/core.c
@@ -20,7 +20,7 @@
  *     mark_dummy_rel()
  *     restriction_is_constant_false()
  *
- * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *-------------------------------------------------------------------------
@@ -215,9 +215,6 @@ set_append_rel_pathlist(PlannerInfo *root, RelOptInfo *rel,
                        add_path(rel, (Path *)
                                         create_append_path(rel, subpaths, required_outer));
        }
-
-       /* Select cheapest paths */
-       set_cheapest(rel);
 }
 
 /*
@@ -720,7 +717,7 @@ join_search_one_level(PlannerInfo *root, int level)
                 */
                if (joinrels[level] == NIL &&
                        root->join_info_list == NIL &&
-                       root->lateral_info_list == NIL)
+                       !root->hasLateralRTEs)
                        elog(ERROR, "failed to build any %d-way joins", level);
        }
 }
@@ -819,9 +816,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
        SpecialJoinInfo *match_sjinfo;
        bool            reversed;
        bool            unique_ified;
-       bool            is_valid_inner;
-       bool            lateral_fwd;
-       bool            lateral_rev;
+       bool            must_be_leftjoin;
        ListCell   *l;
 
        /*
@@ -834,12 +829,12 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
        /*
         * If we have any special joins, the proposed join might be illegal; and
         * in any case we have to determine its join type.  Scan the join info
-        * list for conflicts.
+        * list for matches and conflicts.
         */
        match_sjinfo = NULL;
        reversed = false;
        unique_ified = false;
-       is_valid_inner = true;
+       must_be_leftjoin = false;
 
        foreach(l, root->join_info_list)
        {
@@ -890,7 +885,8 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                 * If one input contains min_lefthand and the other contains
                 * min_righthand, then we can perform the SJ at this join.
                 *
-                * Barf if we get matches to more than one SJ (is that possible?)
+                * Reject if we get matches to more than one SJ; that implies we're
+                * considering something that's not really valid.
                 */
                if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
                        bms_is_subset(sjinfo->min_righthand, rel2->relids))
@@ -955,90 +951,168 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
                }
                else
                {
-                       /*----------
-                        * Otherwise, the proposed join overlaps the RHS but isn't
-                        * a valid implementation of this SJ.  It might still be
-                        * a legal join, however.  If both inputs overlap the RHS,
-                        * assume that it's OK.  Since the inputs presumably got past
-                        * this function's checks previously, they can't overlap the
-                        * LHS and their violations of the RHS boundary must represent
-                        * SJs that have been determined to commute with this one.
-                        * We have to allow this to work correctly in cases like
-                        *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
-                        * when the c/d join has been determined to commute with the join
-                        * to a, and hence d is not part of min_righthand for the upper
-                        * join.  It should be legal to join b to c/d but this will appear
-                        * as a violation of the upper join's RHS.
-                        * Furthermore, if one input overlaps the RHS and the other does
-                        * not, we should still allow the join if it is a valid
-                        * implementation of some other SJ.  We have to allow this to
-                        * support the associative identity
-                        *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
-                        * since joining B directly to C violates the lower SJ's RHS.
-                        * We assume that make_outerjoininfo() set things up correctly
-                        * so that we'll only match to some SJ if the join is valid.
-                        * Set flag here to check at bottom of loop.
-                        *----------
+                       /*
+                        * Otherwise, the proposed join overlaps the RHS but isn't a valid
+                        * implementation of this SJ.  But don't panic quite yet: the RHS
+                        * violation might have occurred previously, in one or both input
+                        * relations, in which case we must have previously decided that
+                        * it was OK to commute some other SJ with this one.  If we need
+                        * to perform this join to finish building up the RHS, rejecting
+                        * it could lead to not finding any plan at all.  (This can occur
+                        * because of the heuristics elsewhere in this file that postpone
+                        * clauseless joins: we might not consider doing a clauseless join
+                        * within the RHS until after we've performed other, validly
+                        * commutable SJs with one or both sides of the clauseless join.)
+                        * This consideration boils down to the rule that if both inputs
+                        * overlap the RHS, we can allow the join --- they are either
+                        * fully within the RHS, or represent previously-allowed joins to
+                        * rels outside it.
                         */
-                       if (sjinfo->jointype != JOIN_SEMI &&
-                               bms_overlap(rel1->relids, sjinfo->min_righthand) &&
+                       if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
                                bms_overlap(rel2->relids, sjinfo->min_righthand))
-                       {
-                               /* seems OK */
-                               Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
-                       }
-                       else
-                               is_valid_inner = false;
+                               continue;               /* assume valid previous violation of RHS */
+
+                       /*
+                        * The proposed join could still be legal, but only if we're
+                        * allowed to associate it into the RHS of this SJ.  That means
+                        * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
+                        * not FULL) and the proposed join must not overlap the LHS.
+                        */
+                       if (sjinfo->jointype != JOIN_LEFT ||
+                               bms_overlap(joinrelids, sjinfo->min_lefthand))
+                               return false;   /* invalid join path */
+
+                       /*
+                        * To be valid, the proposed join must be a LEFT join; otherwise
+                        * it can't associate into this SJ's RHS.  But we may not yet have
+                        * found the SpecialJoinInfo matching the proposed join, so we
+                        * can't test that yet.  Remember the requirement for later.
+                        */
+                       must_be_leftjoin = true;
                }
        }
 
        /*
-        * Fail if violated some SJ's RHS and didn't match to another SJ. However,
-        * "matching" to a semijoin we are implementing by unique-ification
-        * doesn't count (think: it's really an inner join).
+        * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
+        * proposed join can't associate into an SJ's RHS.
+        *
+        * Also, fail if the proposed join's predicate isn't strict; we're
+        * essentially checking to see if we can apply outer-join identity 3, and
+        * that's a requirement.  (This check may be redundant with checks in
+        * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
         */
-       if (!is_valid_inner &&
-               (match_sjinfo == NULL || unique_ified))
+       if (must_be_leftjoin &&
+               (match_sjinfo == NULL ||
+                match_sjinfo->jointype != JOIN_LEFT ||
+                !match_sjinfo->lhs_strict))
                return false;                   /* invalid join path */
 
        /*
         * We also have to check for constraints imposed by LATERAL references.
-        * The proposed rels could each contain lateral references to the other,
-        * in which case the join is impossible.  If there are lateral references
-        * in just one direction, then the join has to be done with a nestloop
-        * with the lateral referencer on the inside.  If the join matches an SJ
-        * that cannot be implemented by such a nestloop, the join is impossible.
         */
-       lateral_fwd = lateral_rev = false;
-       foreach(l, root->lateral_info_list)
+       if (root->hasLateralRTEs)
        {
-               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+               bool            lateral_fwd;
+               bool            lateral_rev;
+               Relids          join_lateral_rels;
 
-               if (bms_is_subset(ljinfo->lateral_rhs, rel2->relids) &&
-                       bms_overlap(ljinfo->lateral_lhs, rel1->relids))
+               /*
+                * The proposed rels could each contain lateral references to the
+                * other, in which case the join is impossible.  If there are lateral
+                * references in just one direction, then the join has to be done with
+                * a nestloop with the lateral referencer on the inside.  If the join
+                * matches an SJ that cannot be implemented by such a nestloop, the
+                * join is impossible.
+                *
+                * Also, if the lateral reference is only indirect, we should reject
+                * the join; whatever rel(s) the reference chain goes through must be
+                * joined to first.
+                *
+                * Another case that might keep us from building a valid plan is the
+                * implementation restriction described by have_dangerous_phv().
+                */
+               lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
+               lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
+               if (lateral_fwd && lateral_rev)
+                       return false;           /* have lateral refs in both directions */
+               if (lateral_fwd)
                {
                        /* has to be implemented as nestloop with rel1 on left */
-                       if (lateral_rev)
-                               return false;   /* have lateral refs in both directions */
-                       lateral_fwd = true;
-                       if (!bms_is_subset(ljinfo->lateral_lhs, rel1->relids))
-                               return false;   /* rel1 can't compute the required parameter */
                        if (match_sjinfo &&
-                               (reversed || match_sjinfo->jointype == JOIN_FULL))
+                               (reversed ||
+                                unique_ified ||
+                                match_sjinfo->jointype == JOIN_FULL))
                                return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel2 to rel1 */
+                       if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
                }
-               if (bms_is_subset(ljinfo->lateral_rhs, rel1->relids) &&
-                       bms_overlap(ljinfo->lateral_lhs, rel2->relids))
+               else if (lateral_rev)
                {
                        /* has to be implemented as nestloop with rel2 on left */
-                       if (lateral_fwd)
-                               return false;   /* have lateral refs in both directions */
-                       lateral_rev = true;
-                       if (!bms_is_subset(ljinfo->lateral_lhs, rel2->relids))
-                               return false;   /* rel2 can't compute the required parameter */
                        if (match_sjinfo &&
-                               (!reversed || match_sjinfo->jointype == JOIN_FULL))
+                               (!reversed ||
+                                unique_ified ||
+                                match_sjinfo->jointype == JOIN_FULL))
                                return false;   /* not implementable as nestloop */
+                       /* check there is a direct reference from rel1 to rel2 */
+                       if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
+                               return false;   /* only indirect refs, so reject */
+                       /* check we won't have a dangerous PHV */
+                       if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
+                               return false;   /* might be unable to handle required PHV */
+               }
+
+               /*
+                * LATERAL references could also cause problems later on if we accept
+                * this join: if the join's minimum parameterization includes any rels
+                * that would have to be on the inside of an outer join with this join
+                * rel, then it's never going to be possible to build the complete
+                * query using this join.  We should reject this join not only because
+                * it'll save work, but because if we don't, the clauseless-join
+                * heuristics might think that legality of this join means that some
+                * other join rel need not be formed, and that could lead to failure
+                * to find any plan at all.  We have to consider not only rels that
+                * are directly on the inner side of an OJ with the joinrel, but also
+                * ones that are indirectly so, so search to find all such rels.
+                */
+               join_lateral_rels = min_join_parameterization(root, joinrelids,
+                                                                                                         rel1, rel2);
+               if (join_lateral_rels)
+               {
+                       Relids          join_plus_rhs = bms_copy(joinrelids);
+                       bool            more;
+
+                       do
+                       {
+                               more = false;
+                               foreach(l, root->join_info_list)
+                               {
+                                       SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
+
+                                       if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                         sjinfo->min_righthand);
+                                               more = true;
+                                       }
+                                       /* full joins constrain both sides symmetrically */
+                                       if (sjinfo->jointype == JOIN_FULL &&
+                                               bms_overlap(sjinfo->min_righthand, join_plus_rhs) &&
+                                               !bms_is_subset(sjinfo->min_lefthand, join_plus_rhs))
+                                       {
+                                               join_plus_rhs = bms_add_members(join_plus_rhs,
+                                                                                                               sjinfo->min_lefthand);
+                                               more = true;
+                                       }
+                               }
+                       } while (more);
+                       if (bms_overlap(join_plus_rhs, join_lateral_rels))
+                               return false;   /* will not be able to join to some RHS rel */
                }
        }
 
@@ -1052,7 +1126,7 @@ join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
  * has_join_restriction
  *             Detect whether the specified relation has join-order restrictions,
  *             due to being inside an outer join or an IN (sub-SELECT),
- *             or participating in any LATERAL references.
+ *             or participating in any LATERAL references or multi-rel PHVs.
  *
  * Essentially, this tests whether have_join_order_restriction() could
  * succeed with this rel and some other one.  It's OK if we sometimes
@@ -1064,12 +1138,15 @@ has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
 {
        ListCell   *l;
 
-       foreach(l, root->lateral_info_list)
+       if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
+               return true;
+
+       foreach(l, root->placeholder_list)
        {
-               LateralJoinInfo *ljinfo = (LateralJoinInfo *) lfirst(l);
+               PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
 
-               if (bms_is_subset(ljinfo->lateral_rhs, rel->relids) ||
-                       bms_overlap(ljinfo->lateral_lhs, rel->relids))
+               if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
+                       !bms_equal(rel->relids, phinfo->ph_eval_at))
                        return true;
        }
 
index c5790c8..bb67f7e 100644 (file)
@@ -3016,7 +3016,7 @@ error hint:
          ->  Seq Scan on t2
 (5 rows)
 
--- inherite table test
+-- inheritance tables test
 SET constraint_exclusion TO off;
 EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
                               QUERY PLAN                               
@@ -7983,7 +7983,7 @@ duplication hint:
 error hint:
 
 CONTEXT:  SQL statement "/*+ SeqScan(t1) */ SELECT * FROM t1"
-PL/pgSQL function testfunc() line 3 at EXECUTE statement
+PL/pgSQL function testfunc() line 3 at EXECUTE
  testfunc 
 ----------
  
similarity index 92%
rename from pg_hint_plan--1.1.2.sql
rename to pg_hint_plan--1.1.3.sql
index 4a465f7..880838e 100644 (file)
@@ -1,4 +1,4 @@
-/* pg_hint_plan/pg_hint_plan--1.1.2.sql */
+/* pg_hint_plan/pg_hint_plan--1.1.3.sql */
 
 -- complain if script is sourced in psql, rather than via CREATE EXTENSION
 \echo Use "CREATE EXTENSION pg_hint_plan" to load this file. \quit
index 50494e7..db6254c 100644 (file)
@@ -3785,6 +3785,8 @@ rebuild_scan_path(HintState *hstate, PlannerInfo *root, int level,
                {
                        set_plain_rel_pathlist(root, rel, rte);
                }
+
+               set_cheapest(rel);
        }
 
        /*
index 20657d1..bd2d451 100644 (file)
@@ -1,6 +1,6 @@
 # pg_hint_plan extension
 
 comment = ''
-default_version = '1.1.2'
+default_version = '1.1.3'
 relocatable = false
 schema = hint_plan
index 7304565..1a92cdc 100644 (file)
 /*-------------------------------------------------------------------------
  *
  * pg_stat_statements.c
- *             Track statement execution times across a whole database cluster.
+ * 
+ * Part of pg_stat_statements.c in PostgreSQL 9.5.
  *
- * Execution costs are totalled for each distinct source query, and kept in
- * a shared hashtable.  (We track only as many distinct queries as will fit
- * in the designated amount of shared memory.)
+ * Copyright (c) 2008-2015, PostgreSQL Global Development Group
  *
- * As of Postgres 9.2, this module normalizes query entries.  Normalization
- * is a process whereby similar queries, typically differing only in their
- * constants (though the exact rules are somewhat more subtle than that) are
- * recognized as equivalent, and are tracked as a single entry.  This is
- * particularly useful for non-prepared queries.
- *
- * Normalization is implemented by fingerprinting queries, selectively
- * serializing those fields of each query tree's nodes that are judged to be
- * essential to the query.  This is referred to as a query jumble.  This is
- * distinct from a regular serialization in that various extraneous
- * information is ignored as irrelevant or not essential to the query, such
- * as the collations of Vars and, most notably, the values of constants.
- *
- * This jumble is acquired at the end of parse analysis of each query, and
- * a 32-bit hash of it is stored into the query's Query.queryId field.
- * The server then copies this value around, making it available in plan
- * tree(s) generated from the query.  The executor can then use this value
- * to blame query costs on the proper queryId.
- *
- * To facilitate presenting entries to users, we create "representative" query
- * strings in which constants are replaced with '?' characters, to make it
- * clearer what a normalized entry can represent.  To save on shared memory,
- * and to avoid having to truncate oversized query strings, we store these
- * strings in a temporary external query-texts file.  Offsets into this
- * file are kept in shared memory.
- *
- * Note about locking issues: to create or delete an entry in the shared
- * hashtable, one must hold pgss->lock exclusively.  Modifying any field
- * in an entry except the counters requires the same.  To look up an entry,
- * one must hold the lock shared.  To read or update the counters within
- * an entry, one must hold the lock shared or exclusive (so the entry doesn't
- * disappear!) and also take the entry's mutex spinlock.
- * The shared state variable pgss->extent (the next free spot in the external
- * query-text file) should be accessed only while holding either the
- * pgss->mutex spinlock, or exclusive lock on pgss->lock.  We use the mutex to
- * allow reserving file space while holding only shared lock on pgss->lock.
- * Rewriting the entire external query-text file, eg for garbage collection,
- * requires holding pgss->lock exclusively; this allows individual entries
- * in the file to be read or written while holding only shared lock.
- *
- *
- * Copyright (c) 2008-2014, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- *       contrib/pg_stat_statements/pg_stat_statements.c
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include <sys/stat.h>
-
-#ifdef NOT_USED
-#include <unistd.h>
-#endif
-
-#include "access/hash.h"
-#ifdef NOT_USED
-#include "executor/instrument.h"
-#include "funcapi.h"
-#include "mb/pg_wchar.h"
-#include "miscadmin.h"
-#include "parser/analyze.h"
-#include "parser/parsetree.h"
-#endif
-#include "parser/scanner.h"
-#ifdef NOT_USED
-#include "pgstat.h"
-#include "storage/fd.h"
-#include "storage/ipc.h"
-#include "storage/spin.h"
-#include "tcop/utility.h"
-#include "utils/builtins.h"
-#include "utils/memutils.h"
-
-PG_MODULE_MAGIC;
-
-/* Location of permanent stats file (valid when database is shut down) */
-#define PGSS_DUMP_FILE PGSTAT_STAT_PERMANENT_DIRECTORY "/pg_stat_statements.stat"
-
-/*
- * Location of external query text file.  We don't keep it in the core
- * system's stats_temp_directory.  The core system can safely use that GUC
- * setting, because the statistics collector temp file paths are set only once
- * as part of changing the GUC, but pg_stat_statements has no way of avoiding
- * race conditions.  Besides, we only expect modest, infrequent I/O for query
- * strings, so placing the file on a faster filesystem is not compelling.
- */
-#define PGSS_TEXT_FILE PG_STAT_TMP_DIR "/pgss_query_texts.stat"
-
-/* Magic number identifying the stats file format */
-static const uint32 PGSS_FILE_HEADER = 0x20140125;
-
-/* PostgreSQL major version number, changes in which invalidate all entries */
-static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
-
-/* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
-#define USAGE_EXEC(duration)   (1.0)
-#define USAGE_INIT                             (1.0)   /* including initial planning */
-#define ASSUMED_MEDIAN_INIT            (10.0)  /* initial assumed median usage */
-#define ASSUMED_LENGTH_INIT            1024    /* initial assumed mean query length */
-#define USAGE_DECREASE_FACTOR  (0.99)  /* decreased every entry_dealloc */
-#define STICKY_DECREASE_FACTOR (0.50)  /* factor for sticky entries */
-#define USAGE_DEALLOC_PERCENT  5               /* free this % of entries at once */
-
-#define JUMBLE_SIZE                            1024    /* query serialization buffer size */
-
-/*
- * Extension version number, for supporting older extension versions' objects
- */
-typedef enum pgssVersion
-{
-       PGSS_V1_0 = 0,
-       PGSS_V1_1,
-       PGSS_V1_2
-} pgssVersion;
-
-/*
- * Hashtable key that defines the identity of a hashtable entry.  We separate
- * queries by user and by database even if they are otherwise identical.
- */
-typedef struct pgssHashKey
-{
-       Oid                     userid;                 /* user OID */
-       Oid                     dbid;                   /* database OID */
-       uint32          queryid;                /* query identifier */
-} pgssHashKey;
-
-/*
- * The actual stats counters kept within pgssEntry.
- */
-typedef struct Counters
-{
-       int64           calls;                  /* # of times executed */
-       double          total_time;             /* total execution time, in msec */
-       int64           rows;                   /* total # of retrieved or affected rows */
-       int64           shared_blks_hit;        /* # of shared buffer hits */
-       int64           shared_blks_read;               /* # of shared disk blocks read */
-       int64           shared_blks_dirtied;    /* # of shared disk blocks dirtied */
-       int64           shared_blks_written;    /* # of shared disk blocks written */
-       int64           local_blks_hit; /* # of local buffer hits */
-       int64           local_blks_read;        /* # of local disk blocks read */
-       int64           local_blks_dirtied;             /* # of local disk blocks dirtied */
-       int64           local_blks_written;             /* # of local disk blocks written */
-       int64           temp_blks_read; /* # of temp blocks read */
-       int64           temp_blks_written;              /* # of temp blocks written */
-       double          blk_read_time;  /* time spent reading, in msec */
-       double          blk_write_time; /* time spent writing, in msec */
-       double          usage;                  /* usage factor */
-} Counters;
-
-/*
- * Statistics per statement
- *
- * Note: in event of a failure in garbage collection of the query text file,
- * we reset query_offset to zero and query_len to -1.  This will be seen as
- * an invalid state by qtext_fetch().
- */
-typedef struct pgssEntry
-{
-       pgssHashKey key;                        /* hash key of entry - MUST BE FIRST */
-       Counters        counters;               /* the statistics for this query */
-       Size            query_offset;   /* query text offset in external file */
-       int                     query_len;              /* # of valid bytes in query string */
-       int                     encoding;               /* query text encoding */
-       slock_t         mutex;                  /* protects the counters only */
-} pgssEntry;
-
-/*
- * Global shared state
- */
-typedef struct pgssSharedState
-{
-       LWLock     *lock;                       /* protects hashtable search/modification */
-       double          cur_median_usage;               /* current median usage in hashtable */
-       Size            mean_query_len; /* current mean entry text length */
-       slock_t         mutex;                  /* protects following fields only: */
-       Size            extent;                 /* current extent of query file */
-       int                     n_writers;              /* number of active writers to query file */
-       int                     gc_count;               /* query file garbage collection cycle count */
-} pgssSharedState;
-
-/*
- * Struct for tracking locations/lengths of constants during normalization
- */
-typedef struct pgssLocationLen
-{
-       int                     location;               /* start offset in query text */
-       int                     length;                 /* length in bytes, or -1 to ignore */
-} pgssLocationLen;
-
-/*
- * Working state for computing a query jumble and producing a normalized
- * query string
- */
-typedef struct pgssJumbleState
-{
-       /* Jumble of current query tree */
-       unsigned char *jumble;
-
-       /* Number of bytes used in jumble[] */
-       Size            jumble_len;
-
-       /* Array of locations of constants that should be removed */
-       pgssLocationLen *clocations;
-
-       /* Allocated length of clocations array */
-       int                     clocations_buf_size;
-
-       /* Current number of valid entries in clocations array */
-       int                     clocations_count;
-} pgssJumbleState;
-
-/*---- Local variables ----*/
-
-/* Current nesting depth of ExecutorRun+ProcessUtility calls */
-static int     nested_level = 0;
-
-/* Saved hook values in case of unload */
-static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
-static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
-static ExecutorStart_hook_type prev_ExecutorStart = NULL;
-static ExecutorRun_hook_type prev_ExecutorRun = NULL;
-static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
-static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
-static ProcessUtility_hook_type prev_ProcessUtility = NULL;
-
-/* Links to shared memory state */
-static pgssSharedState *pgss = NULL;
-static HTAB *pgss_hash = NULL;
-
-/*---- GUC variables ----*/
-
-typedef enum
-{
-       PGSS_TRACK_NONE,                        /* track no statements */
-       PGSS_TRACK_TOP,                         /* only top level statements */
-       PGSS_TRACK_ALL                          /* all statements, including nested ones */
-}      PGSSTrackLevel;
-
-static const struct config_enum_entry track_options[] =
-{
-       {"none", PGSS_TRACK_NONE, false},
-       {"top", PGSS_TRACK_TOP, false},
-       {"all", PGSS_TRACK_ALL, false},
-       {NULL, 0, false}
-};
-
-static int     pgss_max;                       /* max # statements to track */
-static int     pgss_track;                     /* tracking level */
-static bool pgss_track_utility; /* whether to track utility commands */
-static bool pgss_save;                 /* whether to save stats across shutdown */
-
-
-#define pgss_enabled() \
-       (pgss_track == PGSS_TRACK_ALL || \
-       (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
-
-#define record_gc_qtexts() \
-       do { \
-               volatile pgssSharedState *s = (volatile pgssSharedState *) pgss; \
-               SpinLockAcquire(&s->mutex); \
-               s->gc_count++; \
-               SpinLockRelease(&s->mutex); \
-       } while(0)
-
-/*---- Function declarations ----*/
-
-void           _PG_init(void);
-void           _PG_fini(void);
-
-PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
-PG_FUNCTION_INFO_V1(pg_stat_statements_1_2);
-PG_FUNCTION_INFO_V1(pg_stat_statements);
-
-static void pgss_shmem_startup(void);
-static void pgss_shmem_shutdown(int code, Datum arg);
-static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
-static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
-static void pgss_ExecutorRun(QueryDesc *queryDesc,
-                                ScanDirection direction,
-                                long count);
-static void pgss_ExecutorFinish(QueryDesc *queryDesc);
-static void pgss_ExecutorEnd(QueryDesc *queryDesc);
-static void pgss_ProcessUtility(Node *parsetree, const char *queryString,
-                                       ProcessUtilityContext context, ParamListInfo params,
-                                       DestReceiver *dest, char *completionTag);
-static uint32 pgss_hash_fn(const void *key, Size keysize);
-static int     pgss_match_fn(const void *key1, const void *key2, Size keysize);
-static uint32 pgss_hash_string(const char *str);
-static void pgss_store(const char *query, uint32 queryId,
-                  double total_time, uint64 rows,
-                  const BufferUsage *bufusage,
-                  pgssJumbleState *jstate);
-static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
-                                                       pgssVersion api_version,
-                                                       bool showtext);
-static Size pgss_memsize(void);
-static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
-                       int encoding, bool sticky);
-static void entry_dealloc(void);
-static bool qtext_store(const char *query, int query_len,
-                       Size *query_offset, int *gc_count);
-static char *qtext_load_file(Size *buffer_size);
-static char *qtext_fetch(Size query_offset, int query_len,
-                       char *buffer, Size buffer_size);
-static bool need_gc_qtexts(void);
-static void gc_qtexts(void);
-static void entry_reset(void);
-#endif
-static void AppendJumble(pgssJumbleState *jstate,
-                        const unsigned char *item, Size size);
-static void JumbleQuery(pgssJumbleState *jstate, Query *query);
-static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
-static void JumbleExpr(pgssJumbleState *jstate, Node *node);
-static void RecordConstLocation(pgssJumbleState *jstate, int location);
-#ifdef NOT_USED
-static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
-                                                 int *query_len_p, int encoding);
-#endif
-static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
-static int     comp_location(const void *a, const void *b);
-
-
-#ifdef NOT_USED
-/*
- * Module load callback
- */
-void
-_PG_init(void)
-{
-       /*
-        * In order to create our shared memory area, we have to be loaded via
-        * shared_preload_libraries.  If not, fall out without hooking into any of
-        * the main system.  (We don't throw error here because it seems useful to
-        * allow the pg_stat_statements functions to be created even when the
-        * module isn't active.  The functions must protect themselves against
-        * being called then, however.)
-        */
-       if (!process_shared_preload_libraries_in_progress)
-               return;
-
-       /*
-        * Define (or redefine) custom GUC variables.
-        */
-       DefineCustomIntVariable("pg_stat_statements.max",
-         "Sets the maximum number of statements tracked by pg_stat_statements.",
-                                                       NULL,
-                                                       &pgss_max,
-                                                       5000,
-                                                       100,
-                                                       INT_MAX,
-                                                       PGC_POSTMASTER,
-                                                       0,
-                                                       NULL,
-                                                       NULL,
-                                                       NULL);
-
-       DefineCustomEnumVariable("pg_stat_statements.track",
-                          "Selects which statements are tracked by pg_stat_statements.",
-                                                        NULL,
-                                                        &pgss_track,
-                                                        PGSS_TRACK_TOP,
-                                                        track_options,
-                                                        PGC_SUSET,
-                                                        0,
-                                                        NULL,
-                                                        NULL,
-                                                        NULL);
-
-       DefineCustomBoolVariable("pg_stat_statements.track_utility",
-          "Selects whether utility commands are tracked by pg_stat_statements.",
-                                                        NULL,
-                                                        &pgss_track_utility,
-                                                        true,
-                                                        PGC_SUSET,
-                                                        0,
-                                                        NULL,
-                                                        NULL,
-                                                        NULL);
-
-       DefineCustomBoolVariable("pg_stat_statements.save",
-                          "Save pg_stat_statements statistics across server shutdowns.",
-                                                        NULL,
-                                                        &pgss_save,
-                                                        true,
-                                                        PGC_SIGHUP,
-                                                        0,
-                                                        NULL,
-                                                        NULL,
-                                                        NULL);
-
-       EmitWarningsOnPlaceholders("pg_stat_statements");
-
-       /*
-        * Request additional shared resources.  (These are no-ops if we're not in
-        * the postmaster process.)  We'll allocate or attach to the shared
-        * resources in pgss_shmem_startup().
-        */
-       RequestAddinShmemSpace(pgss_memsize());
-       RequestAddinLWLocks(1);
-
-       /*
-        * Install hooks.
-        */
-       prev_shmem_startup_hook = shmem_startup_hook;
-       shmem_startup_hook = pgss_shmem_startup;
-       prev_post_parse_analyze_hook = post_parse_analyze_hook;
-       post_parse_analyze_hook = pgss_post_parse_analyze;
-       prev_ExecutorStart = ExecutorStart_hook;
-       ExecutorStart_hook = pgss_ExecutorStart;
-       prev_ExecutorRun = ExecutorRun_hook;
-       ExecutorRun_hook = pgss_ExecutorRun;
-       prev_ExecutorFinish = ExecutorFinish_hook;
-       ExecutorFinish_hook = pgss_ExecutorFinish;
-       prev_ExecutorEnd = ExecutorEnd_hook;
-       ExecutorEnd_hook = pgss_ExecutorEnd;
-       prev_ProcessUtility = ProcessUtility_hook;
-       ProcessUtility_hook = pgss_ProcessUtility;
-}
-
-/*
- * Module unload callback
- */
-void
-_PG_fini(void)
-{
-       /* Uninstall hooks. */
-       shmem_startup_hook = prev_shmem_startup_hook;
-       post_parse_analyze_hook = prev_post_parse_analyze_hook;
-       ExecutorStart_hook = prev_ExecutorStart;
-       ExecutorRun_hook = prev_ExecutorRun;
-       ExecutorFinish_hook = prev_ExecutorFinish;
-       ExecutorEnd_hook = prev_ExecutorEnd;
-       ProcessUtility_hook = prev_ProcessUtility;
-}
-
-/*
- * shmem_startup hook: allocate or attach to shared memory,
- * then load any pre-existing statistics from file.
- * Also create and load the query-texts file, which is expected to exist
- * (even if empty) while the module is enabled.
- */
-static void
-pgss_shmem_startup(void)
-{
-       bool            found;
-       HASHCTL         info;
-       FILE       *file = NULL;
-       FILE       *qfile = NULL;
-       uint32          header;
-       int32           num;
-       int32           pgver;
-       int32           i;
-       int                     buffer_size;
-       char       *buffer = NULL;
-
-       if (prev_shmem_startup_hook)
-               prev_shmem_startup_hook();
-
-       /* reset in case this is a restart within the postmaster */
-       pgss = NULL;
-       pgss_hash = NULL;
-
-       /*
-        * Create or attach to the shared memory state, including hash table
-        */
-       LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
-
-       pgss = ShmemInitStruct("pg_stat_statements",
-                                                  sizeof(pgssSharedState),
-                                                  &found);
-
-       if (!found)
-       {
-               /* First time through ... */
-               pgss->lock = LWLockAssign();
-               pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
-               pgss->mean_query_len = ASSUMED_LENGTH_INIT;
-               SpinLockInit(&pgss->mutex);
-               pgss->extent = 0;
-               pgss->n_writers = 0;
-               pgss->gc_count = 0;
-       }
-
-       memset(&info, 0, sizeof(info));
-       info.keysize = sizeof(pgssHashKey);
-       info.entrysize = sizeof(pgssEntry);
-       info.hash = pgss_hash_fn;
-       info.match = pgss_match_fn;
-       pgss_hash = ShmemInitHash("pg_stat_statements hash",
-                                                         pgss_max, pgss_max,
-                                                         &info,
-                                                         HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
-
-       LWLockRelease(AddinShmemInitLock);
-
-       /*
-        * If we're in the postmaster (or a standalone backend...), set up a shmem
-        * exit hook to dump the statistics to disk.
-        */
-       if (!IsUnderPostmaster)
-               on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
-
-       /*
-        * Done if some other process already completed our initialization.
-        */
-       if (found)
-               return;
-
-       /*
-        * Note: we don't bother with locks here, because there should be no other
-        * processes running when this code is reached.
-        */
-
-       /* Unlink query text file possibly left over from crash */
-       unlink(PGSS_TEXT_FILE);
-
-       /* Allocate new query text temp file */
-       qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
-       if (qfile == NULL)
-               goto write_error;
-
-       /*
-        * If we were told not to load old statistics, we're done.  (Note we do
-        * not try to unlink any old dump file in this case.  This seems a bit
-        * questionable but it's the historical behavior.)
-        */
-       if (!pgss_save)
-       {
-               FreeFile(qfile);
-               return;
-       }
-
-       /*
-        * Attempt to load old statistics from the dump file.
-        */
-       file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
-       if (file == NULL)
-       {
-               if (errno != ENOENT)
-                       goto read_error;
-               /* No existing persisted stats file, so we're done */
-               FreeFile(qfile);
-               return;
-       }
-
-       buffer_size = 2048;
-       buffer = (char *) palloc(buffer_size);
-
-       if (fread(&header, sizeof(uint32), 1, file) != 1 ||
-               fread(&pgver, sizeof(uint32), 1, file) != 1 ||
-               fread(&num, sizeof(int32), 1, file) != 1)
-               goto read_error;
-
-       if (header != PGSS_FILE_HEADER ||
-               pgver != PGSS_PG_MAJOR_VERSION)
-               goto data_error;
-
-       for (i = 0; i < num; i++)
-       {
-               pgssEntry       temp;
-               pgssEntry  *entry;
-               Size            query_offset;
-
-               if (fread(&temp, sizeof(pgssEntry), 1, file) != 1)
-                       goto read_error;
-
-               /* Encoding is the only field we can easily sanity-check */
-               if (!PG_VALID_BE_ENCODING(temp.encoding))
-                       goto data_error;
-
-               /* Resize buffer as needed */
-               if (temp.query_len >= buffer_size)
-               {
-                       buffer_size = Max(buffer_size * 2, temp.query_len + 1);
-                       buffer = repalloc(buffer, buffer_size);
-               }
-
-               if (fread(buffer, 1, temp.query_len + 1, file) != temp.query_len + 1)
-                       goto read_error;
-
-               /* Should have a trailing null, but let's make sure */
-               buffer[temp.query_len] = '\0';
-
-               /* Skip loading "sticky" entries */
-               if (temp.counters.calls == 0)
-                       continue;
-
-               /* Store the query text */
-               query_offset = pgss->extent;
-               if (fwrite(buffer, 1, temp.query_len + 1, qfile) != temp.query_len + 1)
-                       goto write_error;
-               pgss->extent += temp.query_len + 1;
-
-               /* make the hashtable entry (discards old entries if too many) */
-               entry = entry_alloc(&temp.key, query_offset, temp.query_len,
-                                                       temp.encoding,
-                                                       false);
-
-               /* copy in the actual stats */
-               entry->counters = temp.counters;
-       }
-
-       pfree(buffer);
-       FreeFile(file);
-       FreeFile(qfile);
-
-       /*
-        * Remove the persisted stats file so it's not included in
-        * backups/replication slaves, etc.  A new file will be written on next
-        * shutdown.
-        *
-        * Note: it's okay if the PGSS_TEXT_FILE is included in a basebackup,
-        * because we remove that file on startup; it acts inversely to
-        * PGSS_DUMP_FILE, in that it is only supposed to be around when the
-        * server is running, whereas PGSS_DUMP_FILE is only supposed to be around
-        * when the server is not running.  Leaving the file creates no danger of
-        * a newly restored database having a spurious record of execution costs,
-        * which is what we're really concerned about here.
-        */
-       unlink(PGSS_DUMP_FILE);
-
-       return;
-
-read_error:
-       ereport(LOG,
-                       (errcode_for_file_access(),
-                        errmsg("could not read pg_stat_statement file \"%s\": %m",
-                                       PGSS_DUMP_FILE)));
-       goto fail;
-data_error:
-       ereport(LOG,
-                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                        errmsg("ignoring invalid data in pg_stat_statement file \"%s\"",
-                                       PGSS_DUMP_FILE)));
-       goto fail;
-write_error:
-       ereport(LOG,
-                       (errcode_for_file_access(),
-                        errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                       PGSS_TEXT_FILE)));
-fail:
-       if (buffer)
-               pfree(buffer);
-       if (file)
-               FreeFile(file);
-       if (qfile)
-               FreeFile(qfile);
-       /* If possible, throw away the bogus file; ignore any error */
-       unlink(PGSS_DUMP_FILE);
-
-       /*
-        * Don't unlink PGSS_TEXT_FILE here; it should always be around while the
-        * server is running with pg_stat_statements enabled
-        */
-}
-
-/*
- * shmem_shutdown hook: Dump statistics into file.
- *
- * Note: we don't bother with acquiring lock, because there should be no
- * other processes running when this is called.
- */
-static void
-pgss_shmem_shutdown(int code, Datum arg)
-{
-       FILE       *file;
-       char       *qbuffer = NULL;
-       Size            qbuffer_size = 0;
-       HASH_SEQ_STATUS hash_seq;
-       int32           num_entries;
-       pgssEntry  *entry;
-
-       /* Don't try to dump during a crash. */
-       if (code)
-               return;
-
-       /* Safety check ... shouldn't get here unless shmem is set up. */
-       if (!pgss || !pgss_hash)
-               return;
-
-       /* Don't dump if told not to. */
-       if (!pgss_save)
-               return;
-
-       file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
-       if (file == NULL)
-               goto error;
-
-       if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
-               goto error;
-       if (fwrite(&PGSS_PG_MAJOR_VERSION, sizeof(uint32), 1, file) != 1)
-               goto error;
-       num_entries = hash_get_num_entries(pgss_hash);
-       if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
-               goto error;
-
-       qbuffer = qtext_load_file(&qbuffer_size);
-       if (qbuffer == NULL)
-               goto error;
-
-       /*
-        * When serializing to disk, we store query texts immediately after their
-        * entry data.  Any orphaned query texts are thereby excluded.
-        */
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               int                     len = entry->query_len;
-               char       *qstr = qtext_fetch(entry->query_offset, len,
-                                                                          qbuffer, qbuffer_size);
-
-               if (qstr == NULL)
-                       continue;                       /* Ignore any entries with bogus texts */
-
-               if (fwrite(entry, sizeof(pgssEntry), 1, file) != 1 ||
-                       fwrite(qstr, 1, len + 1, file) != len + 1)
-               {
-                       /* note: we assume hash_seq_term won't change errno */
-                       hash_seq_term(&hash_seq);
-                       goto error;
-               }
-       }
-
-       free(qbuffer);
-       qbuffer = NULL;
-
-       if (FreeFile(file))
-       {
-               file = NULL;
-               goto error;
-       }
-
-       /*
-        * Rename file into place, so we atomically replace any old one.
-        */
-       if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not rename pg_stat_statement file \"%s\": %m",
-                                               PGSS_DUMP_FILE ".tmp")));
-
-       /* Unlink query-texts file; it's not needed while shutdown */
-       unlink(PGSS_TEXT_FILE);
-
-       return;
-
-error:
-       ereport(LOG,
-                       (errcode_for_file_access(),
-                        errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                       PGSS_DUMP_FILE ".tmp")));
-       if (qbuffer)
-               free(qbuffer);
-       if (file)
-               FreeFile(file);
-       unlink(PGSS_DUMP_FILE ".tmp");
-       unlink(PGSS_TEXT_FILE);
-}
-
-/*
- * Post-parse-analysis hook: mark query with a queryId
- */
-static void
-pgss_post_parse_analyze(ParseState *pstate, Query *query)
-{
-       pgssJumbleState jstate;
-
-       if (prev_post_parse_analyze_hook)
-               prev_post_parse_analyze_hook(pstate, query);
-
-       /* Assert we didn't do this already */
-       Assert(query->queryId == 0);
-
-       /* Safety check... */
-       if (!pgss || !pgss_hash)
-               return;
-
-       /*
-        * Utility statements get queryId zero.  We do this even in cases where
-        * the statement contains an optimizable statement for which a queryId
-        * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
-        * runtime control will first go through ProcessUtility and then the
-        * executor, and we don't want the executor hooks to do anything, since we
-        * are already measuring the statement's costs at the utility level.
-        */
-       if (query->utilityStmt)
-       {
-               query->queryId = 0;
-               return;
-       }
-
-       /* Set up workspace for query jumbling */
-       jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
-       jstate.jumble_len = 0;
-       jstate.clocations_buf_size = 32;
-       jstate.clocations = (pgssLocationLen *)
-               palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
-       jstate.clocations_count = 0;
-
-       /* Compute query ID and mark the Query node with it */
-       JumbleQuery(&jstate, query);
-       query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
-
-       /*
-        * If we are unlucky enough to get a hash of zero, use 1 instead, to
-        * prevent confusion with the utility-statement case.
-        */
-       if (query->queryId == 0)
-               query->queryId = 1;
-
-       /*
-        * If we were able to identify any ignorable constants, we immediately
-        * create a hash table entry for the query, so that we can record the
-        * normalized form of the query string.  If there were no such constants,
-        * the normalized string would be the same as the query text anyway, so
-        * there's no need for an early entry.
-        */
-       if (jstate.clocations_count > 0)
-               pgss_store(pstate->p_sourcetext,
-                                  query->queryId,
-                                  0,
-                                  0,
-                                  NULL,
-                                  &jstate);
-}
-
-/*
- * ExecutorStart hook: start up tracking if needed
- */
-static void
-pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
-{
-       if (prev_ExecutorStart)
-               prev_ExecutorStart(queryDesc, eflags);
-       else
-               standard_ExecutorStart(queryDesc, eflags);
-
-       /*
-        * If query has queryId zero, don't track it.  This prevents double
-        * counting of optimizable statements that are directly contained in
-        * utility statements.
-        */
-       if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
-       {
-               /*
-                * Set up to track total elapsed time in ExecutorRun.  Make sure the
-                * space is allocated in the per-query context so it will go away at
-                * ExecutorEnd.
-                */
-               if (queryDesc->totaltime == NULL)
-               {
-                       MemoryContext oldcxt;
-
-                       oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
-                       queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
-                       MemoryContextSwitchTo(oldcxt);
-               }
-       }
-}
-
-/*
- * ExecutorRun hook: all we need do is track nesting depth
- */
-static void
-pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
-{
-       nested_level++;
-       PG_TRY();
-       {
-               if (prev_ExecutorRun)
-                       prev_ExecutorRun(queryDesc, direction, count);
-               else
-                       standard_ExecutorRun(queryDesc, direction, count);
-               nested_level--;
-       }
-       PG_CATCH();
-       {
-               nested_level--;
-               PG_RE_THROW();
-       }
-       PG_END_TRY();
-}
-
-/*
- * ExecutorFinish hook: all we need do is track nesting depth
- */
-static void
-pgss_ExecutorFinish(QueryDesc *queryDesc)
-{
-       nested_level++;
-       PG_TRY();
-       {
-               if (prev_ExecutorFinish)
-                       prev_ExecutorFinish(queryDesc);
-               else
-                       standard_ExecutorFinish(queryDesc);
-               nested_level--;
-       }
-       PG_CATCH();
-       {
-               nested_level--;
-               PG_RE_THROW();
-       }
-       PG_END_TRY();
-}
-
-/*
- * ExecutorEnd hook: store results if needed
- */
-static void
-pgss_ExecutorEnd(QueryDesc *queryDesc)
-{
-       uint32          queryId = queryDesc->plannedstmt->queryId;
-
-       if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
-       {
-               /*
-                * Make sure stats accumulation is done.  (Note: it's okay if several
-                * levels of hook all do this.)
-                */
-               InstrEndLoop(queryDesc->totaltime);
-
-               pgss_store(queryDesc->sourceText,
-                                  queryId,
-                                  queryDesc->totaltime->total * 1000.0,                /* convert to msec */
-                                  queryDesc->estate->es_processed,
-                                  &queryDesc->totaltime->bufusage,
-                                  NULL);
-       }
-
-       if (prev_ExecutorEnd)
-               prev_ExecutorEnd(queryDesc);
-       else
-               standard_ExecutorEnd(queryDesc);
-}
-
-/*
- * ProcessUtility hook
- */
-static void
-pgss_ProcessUtility(Node *parsetree, const char *queryString,
-                                       ProcessUtilityContext context, ParamListInfo params,
-                                       DestReceiver *dest, char *completionTag)
-{
-       /*
-        * If it's an EXECUTE statement, we don't track it and don't increment the
-        * nesting level.  This allows the cycles to be charged to the underlying
-        * PREPARE instead (by the Executor hooks), which is much more useful.
-        *
-        * We also don't track execution of PREPARE.  If we did, we would get one
-        * hash table entry for the PREPARE (with hash calculated from the query
-        * string), and then a different one with the same query string (but hash
-        * calculated from the query tree) would be used to accumulate costs of
-        * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
-        * cases where planning time is not included at all.
-        *
-        * Likewise, we don't track execution of DEALLOCATE.
-        */
-       if (pgss_track_utility && pgss_enabled() &&
-               !IsA(parsetree, ExecuteStmt) &&
-               !IsA(parsetree, PrepareStmt) &&
-               !IsA(parsetree, DeallocateStmt))
-       {
-               instr_time      start;
-               instr_time      duration;
-               uint64          rows;
-               BufferUsage bufusage_start,
-                                       bufusage;
-               uint32          queryId;
-
-               bufusage_start = pgBufferUsage;
-               INSTR_TIME_SET_CURRENT(start);
-
-               nested_level++;
-               PG_TRY();
-               {
-                       if (prev_ProcessUtility)
-                               prev_ProcessUtility(parsetree, queryString,
-                                                                       context, params,
-                                                                       dest, completionTag);
-                       else
-                               standard_ProcessUtility(parsetree, queryString,
-                                                                               context, params,
-                                                                               dest, completionTag);
-                       nested_level--;
-               }
-               PG_CATCH();
-               {
-                       nested_level--;
-                       PG_RE_THROW();
-               }
-               PG_END_TRY();
-
-               INSTR_TIME_SET_CURRENT(duration);
-               INSTR_TIME_SUBTRACT(duration, start);
-
-               /* parse command tag to retrieve the number of affected rows. */
-               if (completionTag &&
-                       strncmp(completionTag, "COPY ", 5) == 0)
-               {
-#ifdef HAVE_STRTOULL
-                       rows = strtoull(completionTag + 5, NULL, 10);
-#else
-                       rows = strtoul(completionTag + 5, NULL, 10);
-#endif
-               }
-               else
-                       rows = 0;
-
-               /* calc differences of buffer counters. */
-               bufusage.shared_blks_hit =
-                       pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
-               bufusage.shared_blks_read =
-                       pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
-               bufusage.shared_blks_dirtied =
-                       pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
-               bufusage.shared_blks_written =
-                       pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
-               bufusage.local_blks_hit =
-                       pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
-               bufusage.local_blks_read =
-                       pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
-               bufusage.local_blks_dirtied =
-                       pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
-               bufusage.local_blks_written =
-                       pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
-               bufusage.temp_blks_read =
-                       pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
-               bufusage.temp_blks_written =
-                       pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
-               bufusage.blk_read_time = pgBufferUsage.blk_read_time;
-               INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
-               bufusage.blk_write_time = pgBufferUsage.blk_write_time;
-               INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
-
-               /* For utility statements, we just hash the query string directly */
-               queryId = pgss_hash_string(queryString);
-
-               pgss_store(queryString,
-                                  queryId,
-                                  INSTR_TIME_GET_MILLISEC(duration),
-                                  rows,
-                                  &bufusage,
-                                  NULL);
-       }
-       else
-       {
-               if (prev_ProcessUtility)
-                       prev_ProcessUtility(parsetree, queryString,
-                                                               context, params,
-                                                               dest, completionTag);
-               else
-                       standard_ProcessUtility(parsetree, queryString,
-                                                                       context, params,
-                                                                       dest, completionTag);
-       }
-}
-
-/*
- * Calculate hash value for a key
- */
-static uint32
-pgss_hash_fn(const void *key, Size keysize)
-{
-       const pgssHashKey *k = (const pgssHashKey *) key;
-
-       return hash_uint32((uint32) k->userid) ^
-               hash_uint32((uint32) k->dbid) ^
-               hash_uint32((uint32) k->queryid);
-}
-
-/*
- * Compare two keys - zero means match
- */
-static int
-pgss_match_fn(const void *key1, const void *key2, Size keysize)
-{
-       const pgssHashKey *k1 = (const pgssHashKey *) key1;
-       const pgssHashKey *k2 = (const pgssHashKey *) key2;
-
-       if (k1->userid == k2->userid &&
-               k1->dbid == k2->dbid &&
-               k1->queryid == k2->queryid)
-               return 0;
-       else
-               return 1;
-}
-
-/*
- * Given an arbitrarily long query string, produce a hash for the purposes of
- * identifying the query, without normalizing constants.  Used when hashing
- * utility statements.
- */
-static uint32
-pgss_hash_string(const char *str)
-{
-       return hash_any((const unsigned char *) str, strlen(str));
-}
-
-/*
- * Store some statistics for a statement.
- *
- * If jstate is not NULL then we're trying to create an entry for which
- * we have no statistics as yet; we just want to record the normalized
- * query string.  total_time, rows, bufusage are ignored in this case.
- */
-static void
-pgss_store(const char *query, uint32 queryId,
-                  double total_time, uint64 rows,
-                  const BufferUsage *bufusage,
-                  pgssJumbleState *jstate)
-{
-       pgssHashKey key;
-       pgssEntry  *entry;
-       char       *norm_query = NULL;
-       int                     encoding = GetDatabaseEncoding();
-       int                     query_len;
-
-       Assert(query != NULL);
-
-       /* Safety check... */
-       if (!pgss || !pgss_hash)
-               return;
-
-       query_len = strlen(query);
-
-       /* Set up key for hashtable search */
-       key.userid = GetUserId();
-       key.dbid = MyDatabaseId;
-       key.queryid = queryId;
-
-       /* Lookup the hash table entry with shared lock. */
-       LWLockAcquire(pgss->lock, LW_SHARED);
-
-       entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
-
-       /* Create new entry, if not present */
-       if (!entry)
-       {
-               Size            query_offset;
-               int                     gc_count;
-               bool            stored;
-               bool            do_gc;
-
-               /*
-                * Create a new, normalized query string if caller asked.  We don't
-                * need to hold the lock while doing this work.  (Note: in any case,
-                * it's possible that someone else creates a duplicate hashtable entry
-                * in the interval where we don't hold the lock below.  That case is
-                * handled by entry_alloc.)
-                */
-               if (jstate)
-               {
-                       LWLockRelease(pgss->lock);
-                       norm_query = generate_normalized_query(jstate, query,
-                                                                                                  &query_len,
-                                                                                                  encoding);
-                       LWLockAcquire(pgss->lock, LW_SHARED);
-               }
-
-               /* Append new query text to file with only shared lock held */
-               stored = qtext_store(norm_query ? norm_query : query, query_len,
-                                                        &query_offset, &gc_count);
-
-               /*
-                * Determine whether we need to garbage collect external query texts
-                * while the shared lock is still held.  This micro-optimization
-                * avoids taking the time to decide this while holding exclusive lock.
-                */
-               do_gc = need_gc_qtexts();
-
-               /* Need exclusive lock to make a new hashtable entry - promote */
-               LWLockRelease(pgss->lock);
-               LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
-
-               /*
-                * A garbage collection may have occurred while we weren't holding the
-                * lock.  In the unlikely event that this happens, the query text we
-                * stored above will have been garbage collected, so write it again.
-                * This should be infrequent enough that doing it while holding
-                * exclusive lock isn't a performance problem.
-                */
-               if (!stored || pgss->gc_count != gc_count)
-                       stored = qtext_store(norm_query ? norm_query : query, query_len,
-                                                                &query_offset, NULL);
-
-               /* If we failed to write to the text file, give up */
-               if (!stored)
-                       goto done;
-
-               /* OK to create a new hashtable entry */
-               entry = entry_alloc(&key, query_offset, query_len, encoding,
-                                                       jstate != NULL);
-
-               /* If needed, perform garbage collection while exclusive lock held */
-               if (do_gc)
-                       gc_qtexts();
-       }
-
-       /* Increment the counts, except when jstate is not NULL */
-       if (!jstate)
-       {
-               /*
-                * Grab the spinlock while updating the counters (see comment about
-                * locking rules at the head of the file)
-                */
-               volatile pgssEntry *e = (volatile pgssEntry *) entry;
-
-               SpinLockAcquire(&e->mutex);
-
-               /* "Unstick" entry if it was previously sticky */
-               if (e->counters.calls == 0)
-                       e->counters.usage = USAGE_INIT;
-
-               e->counters.calls += 1;
-               e->counters.total_time += total_time;
-               e->counters.rows += rows;
-               e->counters.shared_blks_hit += bufusage->shared_blks_hit;
-               e->counters.shared_blks_read += bufusage->shared_blks_read;
-               e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
-               e->counters.shared_blks_written += bufusage->shared_blks_written;
-               e->counters.local_blks_hit += bufusage->local_blks_hit;
-               e->counters.local_blks_read += bufusage->local_blks_read;
-               e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
-               e->counters.local_blks_written += bufusage->local_blks_written;
-               e->counters.temp_blks_read += bufusage->temp_blks_read;
-               e->counters.temp_blks_written += bufusage->temp_blks_written;
-               e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
-               e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
-               e->counters.usage += USAGE_EXEC(total_time);
-
-               SpinLockRelease(&e->mutex);
-       }
-
-done:
-       LWLockRelease(pgss->lock);
-
-       /* We postpone this clean-up until we're out of the lock */
-       if (norm_query)
-               pfree(norm_query);
-}
-
-/*
- * Reset all statement statistics.
- */
-Datum
-pg_stat_statements_reset(PG_FUNCTION_ARGS)
-{
-       if (!pgss || !pgss_hash)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
-       entry_reset();
-       PG_RETURN_VOID();
-}
-
-/* Number of output arguments (columns) for various API versions */
-#define PG_STAT_STATEMENTS_COLS_V1_0   14
-#define PG_STAT_STATEMENTS_COLS_V1_1   18
-#define PG_STAT_STATEMENTS_COLS_V1_2   19
-#define PG_STAT_STATEMENTS_COLS                        19              /* maximum of above */
-
-/*
- * Retrieve statement statistics.
- *
- * The SQL API of this function has changed multiple times, and will likely
- * do so again in future.  To support the case where a newer version of this
- * loadable module is being used with an old SQL declaration of the function,
- * we continue to support the older API versions.  For 1.2 and later, the
- * expected API version is identified by embedding it in the C name of the
- * function.  Unfortunately we weren't bright enough to do that for 1.1.
- */
-Datum
-pg_stat_statements_1_2(PG_FUNCTION_ARGS)
-{
-       bool            showtext = PG_GETARG_BOOL(0);
-
-       pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
-
-       return (Datum) 0;
-}
-
-/*
- * Legacy entry point for pg_stat_statements() API versions 1.0 and 1.1.
- * This can be removed someday, perhaps.
- */
-Datum
-pg_stat_statements(PG_FUNCTION_ARGS)
-{
-       /* If it's really API 1.1, we'll figure that out below */
-       pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
-
-       return (Datum) 0;
-}
-
-/* Common code for all versions of pg_stat_statements() */
-static void
-pg_stat_statements_internal(FunctionCallInfo fcinfo,
-                                                       pgssVersion api_version,
-                                                       bool showtext)
-{
-       ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
-       TupleDesc       tupdesc;
-       Tuplestorestate *tupstore;
-       MemoryContext per_query_ctx;
-       MemoryContext oldcontext;
-       Oid                     userid = GetUserId();
-       bool            is_superuser = superuser();
-       char       *qbuffer = NULL;
-       Size            qbuffer_size = 0;
-       Size            extent = 0;
-       int                     gc_count = 0;
-       HASH_SEQ_STATUS hash_seq;
-       pgssEntry  *entry;
-
-       /* hash table must exist already */
-       if (!pgss || !pgss_hash)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
-                                errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
-
-       /* check to see if caller supports us returning a tuplestore */
-       if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                errmsg("set-valued function called in context that cannot accept a set")));
-       if (!(rsinfo->allowedModes & SFRM_Materialize))
-               ereport(ERROR,
-                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-                                errmsg("materialize mode required, but it is not " \
-                                               "allowed in this context")));
-
-       /* Switch into long-lived context to construct returned data structures */
-       per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
-       oldcontext = MemoryContextSwitchTo(per_query_ctx);
-
-       /* Build a tuple descriptor for our result type */
-       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
-               elog(ERROR, "return type must be a row type");
-
-       /*
-        * Check we have the expected number of output arguments.  Aside from
-        * being a good safety check, we need a kluge here to detect API version
-        * 1.1, which was wedged into the code in an ill-considered way.
-        */
-       switch (tupdesc->natts)
-       {
-               case PG_STAT_STATEMENTS_COLS_V1_0:
-                       if (api_version != PGSS_V1_0)
-                               elog(ERROR, "incorrect number of output arguments");
-                       break;
-               case PG_STAT_STATEMENTS_COLS_V1_1:
-                       /* pg_stat_statements() should have told us 1.0 */
-                       if (api_version != PGSS_V1_0)
-                               elog(ERROR, "incorrect number of output arguments");
-                       api_version = PGSS_V1_1;
-                       break;
-               case PG_STAT_STATEMENTS_COLS_V1_2:
-                       if (api_version != PGSS_V1_2)
-                               elog(ERROR, "incorrect number of output arguments");
-                       break;
-               default:
-                       elog(ERROR, "incorrect number of output arguments");
-       }
-
-       tupstore = tuplestore_begin_heap(true, false, work_mem);
-       rsinfo->returnMode = SFRM_Materialize;
-       rsinfo->setResult = tupstore;
-       rsinfo->setDesc = tupdesc;
-
-       MemoryContextSwitchTo(oldcontext);
-
-       /*
-        * We'd like to load the query text file (if needed) while not holding any
-        * lock on pgss->lock.  In the worst case we'll have to do this again
-        * after we have the lock, but it's unlikely enough to make this a win
-        * despite occasional duplicated work.  We need to reload if anybody
-        * writes to the file (either a retail qtext_store(), or a garbage
-        * collection) between this point and where we've gotten shared lock.  If
-        * a qtext_store is actually in progress when we look, we might as well
-        * skip the speculative load entirely.
-        */
-       if (showtext)
-       {
-               int                     n_writers;
-
-               /* Take the mutex so we can examine variables */
-               {
-                       volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
-
-                       SpinLockAcquire(&s->mutex);
-                       extent = s->extent;
-                       n_writers = s->n_writers;
-                       gc_count = s->gc_count;
-                       SpinLockRelease(&s->mutex);
-               }
-
-               /* No point in loading file now if there are active writers */
-               if (n_writers == 0)
-                       qbuffer = qtext_load_file(&qbuffer_size);
-       }
-
-       /*
-        * Get shared lock, load or reload the query text file if we must, and
-        * iterate over the hashtable entries.
-        *
-        * With a large hash table, we might be holding the lock rather longer
-        * than one could wish.  However, this only blocks creation of new hash
-        * table entries, and the larger the hash table the less likely that is to
-        * be needed.  So we can hope this is okay.  Perhaps someday we'll decide
-        * we need to partition the hash table to limit the time spent holding any
-        * one lock.
-        */
-       LWLockAcquire(pgss->lock, LW_SHARED);
-
-       if (showtext)
-       {
-               /*
-                * Here it is safe to examine extent and gc_count without taking the
-                * mutex.  Note that although other processes might change
-                * pgss->extent just after we look at it, the strings they then write
-                * into the file cannot yet be referenced in the hashtable, so we
-                * don't care whether we see them or not.
-                *
-                * If qtext_load_file fails, we just press on; we'll return NULL for
-                * every query text.
-                */
-               if (qbuffer == NULL ||
-                       pgss->extent != extent ||
-                       pgss->gc_count != gc_count)
-               {
-                       if (qbuffer)
-                               free(qbuffer);
-                       qbuffer = qtext_load_file(&qbuffer_size);
-               }
-       }
-
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               Datum           values[PG_STAT_STATEMENTS_COLS];
-               bool            nulls[PG_STAT_STATEMENTS_COLS];
-               int                     i = 0;
-               Counters        tmp;
-               int64           queryid = entry->key.queryid;
-
-               memset(values, 0, sizeof(values));
-               memset(nulls, 0, sizeof(nulls));
-
-               values[i++] = ObjectIdGetDatum(entry->key.userid);
-               values[i++] = ObjectIdGetDatum(entry->key.dbid);
-
-               if (is_superuser || entry->key.userid == userid)
-               {
-                       if (api_version >= PGSS_V1_2)
-                               values[i++] = Int64GetDatumFast(queryid);
-
-                       if (showtext)
-                       {
-                               char       *qstr = qtext_fetch(entry->query_offset,
-                                                                                          entry->query_len,
-                                                                                          qbuffer,
-                                                                                          qbuffer_size);
-
-                               if (qstr)
-                               {
-                                       char       *enc;
-
-                                       enc = pg_any_to_server(qstr,
-                                                                                  entry->query_len,
-                                                                                  entry->encoding);
-
-                                       values[i++] = CStringGetTextDatum(enc);
-
-                                       if (enc != qstr)
-                                               pfree(enc);
-                               }
-                               else
-                               {
-                                       /* Just return a null if we fail to find the text */
-                                       nulls[i++] = true;
-                               }
-                       }
-                       else
-                       {
-                               /* Query text not requested */
-                               nulls[i++] = true;
-                       }
-               }
-               else
-               {
-                       /* Don't show queryid */
-                       if (api_version >= PGSS_V1_2)
-                               nulls[i++] = true;
-
-                       /*
-                        * Don't show query text, but hint as to the reason for not doing
-                        * so if it was requested
-                        */
-                       if (showtext)
-                               values[i++] = CStringGetTextDatum("<insufficient privilege>");
-                       else
-                               nulls[i++] = true;
-               }
-
-               /* copy counters to a local variable to keep locking time short */
-               {
-                       volatile pgssEntry *e = (volatile pgssEntry *) entry;
-
-                       SpinLockAcquire(&e->mutex);
-                       tmp = e->counters;
-                       SpinLockRelease(&e->mutex);
-               }
-
-               /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
-               if (tmp.calls == 0)
-                       continue;
-
-               values[i++] = Int64GetDatumFast(tmp.calls);
-               values[i++] = Float8GetDatumFast(tmp.total_time);
-               values[i++] = Int64GetDatumFast(tmp.rows);
-               values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
-               values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
-               if (api_version >= PGSS_V1_1)
-                       values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
-               values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
-               values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
-               values[i++] = Int64GetDatumFast(tmp.local_blks_read);
-               if (api_version >= PGSS_V1_1)
-                       values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
-               values[i++] = Int64GetDatumFast(tmp.local_blks_written);
-               values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
-               values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
-               if (api_version >= PGSS_V1_1)
-               {
-                       values[i++] = Float8GetDatumFast(tmp.blk_read_time);
-                       values[i++] = Float8GetDatumFast(tmp.blk_write_time);
-               }
-
-               Assert(i == (api_version == PGSS_V1_0 ? PG_STAT_STATEMENTS_COLS_V1_0 :
-                                        api_version == PGSS_V1_1 ? PG_STAT_STATEMENTS_COLS_V1_1 :
-                                        api_version == PGSS_V1_2 ? PG_STAT_STATEMENTS_COLS_V1_2 :
-                                        -1 /* fail if you forget to update this assert */ ));
-
-               tuplestore_putvalues(tupstore, tupdesc, values, nulls);
-       }
-
-       /* clean up and return the tuplestore */
-       LWLockRelease(pgss->lock);
-
-       if (qbuffer)
-               free(qbuffer);
-
-       tuplestore_donestoring(tupstore);
-}
-
-/*
- * Estimate shared memory space needed.
- */
-static Size
-pgss_memsize(void)
-{
-       Size            size;
-
-       size = MAXALIGN(sizeof(pgssSharedState));
-       size = add_size(size, hash_estimate_size(pgss_max, sizeof(pgssEntry)));
-
-       return size;
-}
-
-/*
- * Allocate a new hashtable entry.
- * caller must hold an exclusive lock on pgss->lock
- *
- * "query" need not be null-terminated; we rely on query_len instead
- *
- * If "sticky" is true, make the new entry artificially sticky so that it will
- * probably still be there when the query finishes execution.  We do this by
- * giving it a median usage value rather than the normal value.  (Strictly
- * speaking, query strings are normalized on a best effort basis, though it
- * would be difficult to demonstrate this even under artificial conditions.)
- *
- * Note: despite needing exclusive lock, it's not an error for the target
- * entry to already exist.  This is because pgss_store releases and
- * reacquires lock after failing to find a match; so someone else could
- * have made the entry while we waited to get exclusive lock.
- */
-static pgssEntry *
-entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding,
-                       bool sticky)
-{
-       pgssEntry  *entry;
-       bool            found;
-
-       /* Make space if needed */
-       while (hash_get_num_entries(pgss_hash) >= pgss_max)
-               entry_dealloc();
-
-       /* Find or create an entry with desired hash code */
-       entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
-
-       if (!found)
-       {
-               /* New entry, initialize it */
-
-               /* reset the statistics */
-               memset(&entry->counters, 0, sizeof(Counters));
-               /* set the appropriate initial usage count */
-               entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
-               /* re-initialize the mutex each time ... we assume no one using it */
-               SpinLockInit(&entry->mutex);
-               /* ... and don't forget the query text metadata */
-               Assert(query_len >= 0);
-               entry->query_offset = query_offset;
-               entry->query_len = query_len;
-               entry->encoding = encoding;
-       }
-
-       return entry;
-}
-
-/*
- * qsort comparator for sorting into increasing usage order
- */
-static int
-entry_cmp(const void *lhs, const void *rhs)
-{
-       double          l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
-       double          r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
-
-       if (l_usage < r_usage)
-               return -1;
-       else if (l_usage > r_usage)
-               return +1;
-       else
-               return 0;
-}
-
-/*
- * Deallocate least used entries.
- * Caller must hold an exclusive lock on pgss->lock.
- */
-static void
-entry_dealloc(void)
-{
-       HASH_SEQ_STATUS hash_seq;
-       pgssEntry **entries;
-       pgssEntry  *entry;
-       int                     nvictims;
-       int                     i;
-       Size            totlen = 0;
-
-       /*
-        * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
-        * While we're scanning the table, apply the decay factor to the usage
-        * values.
-        */
-
-       entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
-
-       i = 0;
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               entries[i++] = entry;
-               /* "Sticky" entries get a different usage decay rate. */
-               if (entry->counters.calls == 0)
-                       entry->counters.usage *= STICKY_DECREASE_FACTOR;
-               else
-                       entry->counters.usage *= USAGE_DECREASE_FACTOR;
-               /* Accumulate total size, too. */
-               totlen += entry->query_len + 1;
-       }
-
-       qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
-
-       if (i > 0)
-       {
-               /* Record the (approximate) median usage */
-               pgss->cur_median_usage = entries[i / 2]->counters.usage;
-               /* Record the mean query length */
-               pgss->mean_query_len = totlen / i;
-       }
-
-       nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
-       nvictims = Min(nvictims, i);
-
-       for (i = 0; i < nvictims; i++)
-       {
-               hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
-       }
-
-       pfree(entries);
-}
-
-/*
- * Given a null-terminated string, allocate a new entry in the external query
- * text file and store the string there.
- *
- * Although we could compute the string length via strlen(), callers already
- * have it handy, so we require them to pass it too.
- *
- * If successful, returns true, and stores the new entry's offset in the file
- * into *query_offset.  Also, if gc_count isn't NULL, *gc_count is set to the
- * number of garbage collections that have occurred so far.
- *
- * On failure, returns false.
- *
- * At least a shared lock on pgss->lock must be held by the caller, so as
- * to prevent a concurrent garbage collection.  Share-lock-holding callers
- * should pass a gc_count pointer to obtain the number of garbage collections,
- * so that they can recheck the count after obtaining exclusive lock to
- * detect whether a garbage collection occurred (and removed this entry).
- */
-static bool
-qtext_store(const char *query, int query_len,
-                       Size *query_offset, int *gc_count)
-{
-       Size            off;
-       int                     fd;
-
-       /*
-        * We use a spinlock to protect extent/n_writers/gc_count, so that
-        * multiple processes may execute this function concurrently.
-        */
-       {
-               volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
-
-               SpinLockAcquire(&s->mutex);
-               off = s->extent;
-               s->extent += query_len + 1;
-               s->n_writers++;
-               if (gc_count)
-                       *gc_count = s->gc_count;
-               SpinLockRelease(&s->mutex);
-       }
-
-       *query_offset = off;
-
-       /* Now write the data into the successfully-reserved part of the file */
-       fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDWR | O_CREAT | PG_BINARY,
-                                                  S_IRUSR | S_IWUSR);
-       if (fd < 0)
-               goto error;
-
-       if (lseek(fd, off, SEEK_SET) != off)
-               goto error;
-
-       if (write(fd, query, query_len + 1) != query_len + 1)
-               goto error;
-
-       CloseTransientFile(fd);
-
-       /* Mark our write complete */
-       {
-               volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
-
-               SpinLockAcquire(&s->mutex);
-               s->n_writers--;
-               SpinLockRelease(&s->mutex);
-       }
-
-       return true;
-
-error:
-       ereport(LOG,
-                       (errcode_for_file_access(),
-                        errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                       PGSS_TEXT_FILE)));
-
-       if (fd >= 0)
-               CloseTransientFile(fd);
-
-       /* Mark our write complete */
-       {
-               volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
-
-               SpinLockAcquire(&s->mutex);
-               s->n_writers--;
-               SpinLockRelease(&s->mutex);
-       }
-
-       return false;
-}
-
-/*
- * Read the external query text file into a malloc'd buffer.
- *
- * Returns NULL (without throwing an error) if unable to read, eg
- * file not there or insufficient memory.
- *
- * On success, the buffer size is also returned into *buffer_size.
- *
- * This can be called without any lock on pgss->lock, but in that case
- * the caller is responsible for verifying that the result is sane.
- */
-static char *
-qtext_load_file(Size *buffer_size)
-{
-       char       *buf;
-       int                     fd;
-       struct stat stat;
-
-       fd = OpenTransientFile(PGSS_TEXT_FILE, O_RDONLY | PG_BINARY, 0);
-       if (fd < 0)
-       {
-               if (errno != ENOENT)
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                  errmsg("could not read pg_stat_statement file \"%s\": %m",
-                                                 PGSS_TEXT_FILE)));
-               return NULL;
-       }
-
-       /* Get file length */
-       if (fstat(fd, &stat))
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not stat pg_stat_statement file \"%s\": %m",
-                                               PGSS_TEXT_FILE)));
-               CloseTransientFile(fd);
-               return NULL;
-       }
-
-       /* Allocate buffer; beware that off_t might be wider than size_t */
-       if (stat.st_size <= MaxAllocSize)
-               buf = (char *) malloc(stat.st_size);
-       else
-               buf = NULL;
-       if (buf == NULL)
-       {
-               ereport(LOG,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of memory")));
-               CloseTransientFile(fd);
-               return NULL;
-       }
-
-       /*
-        * OK, slurp in the file.  If we get a short read and errno doesn't get
-        * set, the reason is probably that garbage collection truncated the file
-        * since we did the fstat(), so we don't log a complaint --- but we don't
-        * return the data, either, since it's most likely corrupt due to
-        * concurrent writes from garbage collection.
-        */
-       errno = 0;
-       if (read(fd, buf, stat.st_size) != stat.st_size)
-       {
-               if (errno)
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                  errmsg("could not read pg_stat_statement file \"%s\": %m",
-                                                 PGSS_TEXT_FILE)));
-               free(buf);
-               CloseTransientFile(fd);
-               return NULL;
-       }
-
-       CloseTransientFile(fd);
-
-       *buffer_size = stat.st_size;
-       return buf;
-}
-
-/*
- * Locate a query text in the file image previously read by qtext_load_file().
- *
- * We validate the given offset/length, and return NULL if bogus.  Otherwise,
- * the result points to a null-terminated string within the buffer.
- */
-static char *
-qtext_fetch(Size query_offset, int query_len,
-                       char *buffer, Size buffer_size)
-{
-       /* File read failed? */
-       if (buffer == NULL)
-               return NULL;
-       /* Bogus offset/length? */
-       if (query_len < 0 ||
-               query_offset + query_len >= buffer_size)
-               return NULL;
-       /* As a further sanity check, make sure there's a trailing null */
-       if (buffer[query_offset + query_len] != '\0')
-               return NULL;
-       /* Looks OK */
-       return buffer + query_offset;
-}
-
-/*
- * Do we need to garbage-collect the external query text file?
- *
- * Caller should hold at least a shared lock on pgss->lock.
- */
-static bool
-need_gc_qtexts(void)
-{
-       Size            extent;
-
-       /* Read shared extent pointer */
-       {
-               volatile pgssSharedState *s = (volatile pgssSharedState *) pgss;
-
-               SpinLockAcquire(&s->mutex);
-               extent = s->extent;
-               SpinLockRelease(&s->mutex);
-       }
-
-       /* Don't proceed if file does not exceed 512 bytes per possible entry */
-       if (extent < 512 * pgss_max)
-               return false;
-
-       /*
-        * Don't proceed if file is less than about 50% bloat.  Nothing can or
-        * should be done in the event of unusually large query texts accounting
-        * for file's large size.  We go to the trouble of maintaining the mean
-        * query length in order to prevent garbage collection from thrashing
-        * uselessly.
-        */
-       if (extent < pgss->mean_query_len * pgss_max * 2)
-               return false;
-
-       return true;
-}
-
-/*
- * Garbage-collect orphaned query texts in external file.
- *
- * This won't be called often in the typical case, since it's likely that
- * there won't be too much churn, and besides, a similar compaction process
- * occurs when serializing to disk at shutdown or as part of resetting.
- * Despite this, it seems prudent to plan for the edge case where the file
- * becomes unreasonably large, with no other method of compaction likely to
- * occur in the foreseeable future.
- *
- * The caller must hold an exclusive lock on pgss->lock.
- */
-static void
-gc_qtexts(void)
-{
-       char       *qbuffer;
-       Size            qbuffer_size;
-       FILE       *qfile;
-       HASH_SEQ_STATUS hash_seq;
-       pgssEntry  *entry;
-       Size            extent;
-       int                     nentries;
-
-       /*
-        * When called from pgss_store, some other session might have proceeded
-        * with garbage collection in the no-lock-held interim of lock strength
-        * escalation.  Check once more that this is actually necessary.
-        */
-       if (!need_gc_qtexts())
-               return;
-
-       /*
-        * Load the old texts file.  If we fail (out of memory, for instance) just
-        * skip the garbage collection.
-        */
-       qbuffer = qtext_load_file(&qbuffer_size);
-       if (qbuffer == NULL)
-               return;
-
-       /*
-        * We overwrite the query texts file in place, so as to reduce the risk of
-        * an out-of-disk-space failure.  Since the file is guaranteed not to get
-        * larger, this should always work on traditional filesystems; though we
-        * could still lose on copy-on-write filesystems.
-        */
-       qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
-       if (qfile == NULL)
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                               PGSS_TEXT_FILE)));
-               goto gc_fail;
-       }
-
-       extent = 0;
-       nentries = 0;
-
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               int                     query_len = entry->query_len;
-               char       *qry = qtext_fetch(entry->query_offset,
-                                                                         query_len,
-                                                                         qbuffer,
-                                                                         qbuffer_size);
-
-               if (qry == NULL)
-               {
-                       /* Trouble ... drop the text */
-                       entry->query_offset = 0;
-                       entry->query_len = -1;
-                       continue;
-               }
-
-               if (fwrite(qry, 1, query_len + 1, qfile) != query_len + 1)
-               {
-                       ereport(LOG,
-                                       (errcode_for_file_access(),
-                                 errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                                PGSS_TEXT_FILE)));
-                       hash_seq_term(&hash_seq);
-                       goto gc_fail;
-               }
-
-               entry->query_offset = extent;
-               extent += query_len + 1;
-               nentries++;
-       }
-
-       /*
-        * Truncate away any now-unused space.  If this fails for some odd reason,
-        * we log it, but there's no need to fail.
-        */
-       if (ftruncate(fileno(qfile), extent) != 0)
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                          errmsg("could not truncate pg_stat_statement file \"%s\": %m",
-                                         PGSS_TEXT_FILE)));
-
-       if (FreeFile(qfile))
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not write pg_stat_statement file \"%s\": %m",
-                                               PGSS_TEXT_FILE)));
-               qfile = NULL;
-               goto gc_fail;
-       }
-
-       elog(DEBUG1, "pgss gc of queries file shrunk size from %zu to %zu",
-                pgss->extent, extent);
-
-       /* Reset the shared extent pointer */
-       pgss->extent = extent;
-
-       /*
-        * Also update the mean query length, to be sure that need_gc_qtexts()
-        * won't still think we have a problem.
-        */
-       if (nentries > 0)
-               pgss->mean_query_len = extent / nentries;
-       else
-               pgss->mean_query_len = ASSUMED_LENGTH_INIT;
-
-       free(qbuffer);
-
-       /*
-        * OK, count a garbage collection cycle.  (Note: even though we have
-        * exclusive lock on pgss->lock, we must take pgss->mutex for this, since
-        * other processes may examine gc_count while holding only the mutex.
-        * Also, we have to advance the count *after* we've rewritten the file,
-        * else other processes might not realize they read a stale file.)
-        */
-       record_gc_qtexts();
-
-       return;
-
-gc_fail:
-       /* clean up resources */
-       if (qfile)
-               FreeFile(qfile);
-       if (qbuffer)
-               free(qbuffer);
-
-       /*
-        * Since the contents of the external file are now uncertain, mark all
-        * hashtable entries as having invalid texts.
-        */
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               entry->query_offset = 0;
-               entry->query_len = -1;
-       }
-
-       /* Seems like a good idea to bump the GC count even though we failed */
-       record_gc_qtexts();
-}
-
-/*
- * Release all entries.
+ *-------------------------------------------------------------------------
  */
-static void
-entry_reset(void)
-{
-       HASH_SEQ_STATUS hash_seq;
-       pgssEntry  *entry;
-       FILE       *qfile;
-
-       LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
-
-       hash_seq_init(&hash_seq, pgss_hash);
-       while ((entry = hash_seq_search(&hash_seq)) != NULL)
-       {
-               hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
-       }
-
-       /*
-        * Write new empty query file, perhaps even creating a new one to recover
-        * if the file was missing.
-        */
-       qfile = AllocateFile(PGSS_TEXT_FILE, PG_BINARY_W);
-       if (qfile == NULL)
-       {
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                                errmsg("could not create pg_stat_statement file \"%s\": %m",
-                                               PGSS_TEXT_FILE)));
-               goto done;
-       }
-
-       /* If ftruncate fails, log it, but it's not a fatal problem */
-       if (ftruncate(fileno(qfile), 0) != 0)
-               ereport(LOG,
-                               (errcode_for_file_access(),
-                          errmsg("could not truncate pg_stat_statement file \"%s\": %m",
-                                         PGSS_TEXT_FILE)));
+#include "postgres.h"
 
-       FreeFile(qfile);
+#include <sys/stat.h>
 
-done:
-       pgss->extent = 0;
-       /* This counts as a query text garbage collection for our purposes */
-       record_gc_qtexts();
+#include "access/hash.h"
+#include "parser/scanner.h"
 
-       LWLockRelease(pgss->lock);
-}
-#endif
+static void AppendJumble(pgssJumbleState *jstate,
+                        const unsigned char *item, Size size);
+static void JumbleQuery(pgssJumbleState *jstate, Query *query);
+static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
+static void JumbleExpr(pgssJumbleState *jstate, Node *node);
+static void RecordConstLocation(pgssJumbleState *jstate, int location);
+static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
+static int     comp_location(const void *a, const void *b);
 
 /*
  * AppendJumble: Append a value that is substantive in a given query to
@@ -2209,8 +89,10 @@ JumbleQuery(pgssJumbleState *jstate, Query *query)
        JumbleRangeTable(jstate, query->rtable);
        JumbleExpr(jstate, (Node *) query->jointree);
        JumbleExpr(jstate, (Node *) query->targetList);
+       JumbleExpr(jstate, (Node *) query->onConflict);
        JumbleExpr(jstate, (Node *) query->returningList);
        JumbleExpr(jstate, (Node *) query->groupClause);
+       JumbleExpr(jstate, (Node *) query->groupingSets);
        JumbleExpr(jstate, query->havingQual);
        JumbleExpr(jstate, (Node *) query->windowClause);
        JumbleExpr(jstate, (Node *) query->distinctClause);
@@ -2239,6 +121,7 @@ JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
                {
                        case RTE_RELATION:
                                APP_JUMB(rte->relid);
+                               JumbleExpr(jstate, (Node *) rte->tablesample);
                                break;
                        case RTE_SUBQUERY:
                                JumbleQuery(jstate, rte->subquery);
@@ -2341,6 +224,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                JumbleExpr(jstate, (Node *) expr->aggfilter);
                        }
                        break;
+               case T_GroupingFunc:
+                       {
+                               GroupingFunc *grpnode = (GroupingFunc *) node;
+
+                               JumbleExpr(jstate, (Node *) grpnode->refs);
+                       }
+                       break;
                case T_WindowFunc:
                        {
                                WindowFunc *expr = (WindowFunc *) node;
@@ -2576,6 +466,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                APP_JUMB(ce->cursor_param);
                        }
                        break;
+               case T_InferenceElem:
+                       {
+                               InferenceElem *ie = (InferenceElem *) node;
+
+                               APP_JUMB(ie->infercollid);
+                               APP_JUMB(ie->inferopclass);
+                               JumbleExpr(jstate, ie->expr);
+                       }
+                       break;
                case T_TargetEntry:
                        {
                                TargetEntry *tle = (TargetEntry *) node;
@@ -2612,12 +511,32 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                JumbleExpr(jstate, from->quals);
                        }
                        break;
+               case T_OnConflictExpr:
+                       {
+                               OnConflictExpr *conf = (OnConflictExpr *) node;
+
+                               APP_JUMB(conf->action);
+                               JumbleExpr(jstate, (Node *) conf->arbiterElems);
+                               JumbleExpr(jstate, conf->arbiterWhere);
+                               JumbleExpr(jstate, (Node *) conf->onConflictSet);
+                               JumbleExpr(jstate, conf->onConflictWhere);
+                               APP_JUMB(conf->constraint);
+                               APP_JUMB(conf->exclRelIndex);
+                               JumbleExpr(jstate, (Node *) conf->exclRelTlist);
+                       }
+                       break;
                case T_List:
                        foreach(temp, (List *) node)
                        {
                                JumbleExpr(jstate, (Node *) lfirst(temp));
                        }
                        break;
+               case T_IntList:
+                       foreach(temp, (List *) node)
+                       {
+                               APP_JUMB(lfirst_int(temp));
+                       }
+                       break;
                case T_SortGroupClause:
                        {
                                SortGroupClause *sgc = (SortGroupClause *) node;
@@ -2628,6 +547,13 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                APP_JUMB(sgc->nulls_first);
                        }
                        break;
+               case T_GroupingSet:
+                       {
+                               GroupingSet *gsnode = (GroupingSet *) node;
+
+                               JumbleExpr(jstate, (Node *) gsnode->content);
+                       }
+                       break;
                case T_WindowClause:
                        {
                                WindowClause *wc = (WindowClause *) node;
@@ -2666,6 +592,15 @@ JumbleExpr(pgssJumbleState *jstate, Node *node)
                                JumbleExpr(jstate, rtfunc->funcexpr);
                        }
                        break;
+               case T_TableSampleClause:
+                       {
+                               TableSampleClause *tsc = (TableSampleClause *) node;
+
+                               APP_JUMB(tsc->tsmhandler);
+                               JumbleExpr(jstate, (Node *) tsc->args);
+                               JumbleExpr(jstate, (Node *) tsc->repeatable);
+                       }
+                       break;
                default:
                        /* Only a warning, since we can stumble along anyway */
                        elog(WARNING, "unrecognized node type: %d",
@@ -2827,6 +762,9 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
                                                         ScanKeywords,
                                                         NumScanKeywords);
 
+       /* we don't want to re-emit any escape string warnings */
+       yyextra.escape_string_warning = false;
+
        /* Search for each constant, in sequence */
        for (i = 0; i < jstate->clocations_count; i++)
        {
index ae8aac3..6ca894f 100644 (file)
@@ -338,7 +338,7 @@ EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN  t2 ON (t1.id = t2.id);
 /*+NestLoop(t1 t2)*/
 EXPLAIN (COSTS false) SELECT * FROM t1 FULL OUTER JOIN  t2 ON (t1.id = t2.id);
 
--- inherite table test
+-- inheritance tables test
 SET constraint_exclusion TO off;
 EXPLAIN (COSTS false) SELECT * FROM p1 WHERE id >= 50 AND id <= 51 AND p1.ctid = '(1,1)';
 SET constraint_exclusion TO on;