1 /*-------------------------------------------------------------------------
4 * Track statement execution times across a whole database cluster.
6 * Execution costs are totalled for each distinct source query, and kept in
7 * a shared hashtable. (We track only as many distinct queries as will fit
8 * in the designated amount of shared memory.)
10 * As of Postgres 9.2, this module normalizes query entries. Normalization
11 * is a process whereby similar queries, typically differing only in their
12 * constants (though the exact rules are somewhat more subtle than that) are
13 * recognized as equivalent, and are tracked as a single entry. This is
14 * particularly useful for non-prepared queries.
16 * Normalization is implemented by fingerprinting queries, selectively
17 * serializing those fields of each query tree's nodes that are judged to be
18 * essential to the query. This is referred to as a query jumble. This is
19 * distinct from a regular serialization in that various extraneous
20 * information is ignored as irrelevant or not essential to the query, such
21 * as the collations of Vars and, most notably, the values of constants.
23 * This jumble is acquired at the end of parse analysis of each query, and
24 * a 32-bit hash of it is stored into the query's Query.queryId field.
25 * The server then copies this value around, making it available in plan
26 * tree(s) generated from the query. The executor can then use this value
27 * to blame query costs on the proper queryId.
29 * Note about locking issues: to create or delete an entry in the shared
30 * hashtable, one must hold pgss->lock exclusively. Modifying any field
31 * in an entry except the counters requires the same. To look up an entry,
32 * one must hold the lock shared. To read or update the counters within
33 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
34 * disappear!) and also take the entry's mutex spinlock.
37 * Copyright (c) 2008-2013, PostgreSQL Global Development Group
40 * contrib/pg_stat_statements/pg_stat_statements.c
42 *-------------------------------------------------------------------------
51 #include "access/hash.h"
53 #include "executor/instrument.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "parser/analyze.h"
58 #include "parser/parsetree.h"
60 #include "parser/scanner.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/spin.h"
66 #include "tcop/utility.h"
67 #include "utils/builtins.h"
72 /* Location of stats file */
73 #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
75 /* This constant defines the magic number in the stats file header */
76 static const uint32 PGSS_FILE_HEADER = 0x20120328;
78 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
79 #define USAGE_EXEC(duration) (1.0)
80 #define USAGE_INIT (1.0) /* including initial planning */
81 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
82 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
83 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
84 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
86 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
89 * Hashtable key that defines the identity of a hashtable entry. We separate
90 * queries by user and by database even if they are otherwise identical.
92 * Presently, the query encoding is fully determined by the source database
93 * and so we don't really need it to be in the key. But that might not always
94 * be true. Anyway it's notationally convenient to pass it as part of the key.
96 typedef struct pgssHashKey
98 Oid userid; /* user OID */
99 Oid dbid; /* database OID */
100 int encoding; /* query encoding */
101 uint32 queryid; /* query identifier */
105 * The actual stats counters kept within pgssEntry.
107 typedef struct Counters
109 int64 calls; /* # of times executed */
110 double total_time; /* total execution time, in msec */
111 int64 rows; /* total # of retrieved or affected rows */
112 int64 shared_blks_hit; /* # of shared buffer hits */
113 int64 shared_blks_read; /* # of shared disk blocks read */
114 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
115 int64 shared_blks_written; /* # of shared disk blocks written */
116 int64 local_blks_hit; /* # of local buffer hits */
117 int64 local_blks_read; /* # of local disk blocks read */
118 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
119 int64 local_blks_written; /* # of local disk blocks written */
120 int64 temp_blks_read; /* # of temp blocks read */
121 int64 temp_blks_written; /* # of temp blocks written */
122 double blk_read_time; /* time spent reading, in msec */
123 double blk_write_time; /* time spent writing, in msec */
124 double usage; /* usage factor */
128 * Statistics per statement
130 * NB: see the file read/write code before changing field order here.
132 typedef struct pgssEntry
134 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
135 Counters counters; /* the statistics for this query */
136 int query_len; /* # of valid bytes in query string */
137 slock_t mutex; /* protects the counters only */
138 char query[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
139 /* Note: the allocated length of query[] is actually pgss->query_size */
143 * Global shared state
145 typedef struct pgssSharedState
147 LWLockId lock; /* protects hashtable search/modification */
148 int query_size; /* max query length in bytes */
149 double cur_median_usage; /* current median usage in hashtable */
153 * Struct for tracking locations/lengths of constants during normalization
155 typedef struct pgssLocationLen
157 int location; /* start offset in query text */
158 int length; /* length in bytes, or -1 to ignore */
162 * Working state for computing a query jumble and producing a normalized
165 typedef struct pgssJumbleState
167 /* Jumble of current query tree */
168 unsigned char *jumble;
170 /* Number of bytes used in jumble[] */
173 /* Array of locations of constants that should be removed */
174 pgssLocationLen *clocations;
176 /* Allocated length of clocations array */
177 int clocations_buf_size;
179 /* Current number of valid entries in clocations array */
180 int clocations_count;
183 /*---- Local variables ----*/
185 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
186 static int nested_level = 0;
188 /* Saved hook values in case of unload */
189 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
190 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
191 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
192 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
193 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
194 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
195 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
197 /* Links to shared memory state */
198 static pgssSharedState *pgss = NULL;
199 static HTAB *pgss_hash = NULL;
201 /*---- GUC variables ----*/
205 PGSS_TRACK_NONE, /* track no statements */
206 PGSS_TRACK_TOP, /* only top level statements */
207 PGSS_TRACK_ALL /* all statements, including nested ones */
210 static const struct config_enum_entry track_options[] =
212 {"none", PGSS_TRACK_NONE, false},
213 {"top", PGSS_TRACK_TOP, false},
214 {"all", PGSS_TRACK_ALL, false},
218 static int pgss_max; /* max # statements to track */
219 static int pgss_track; /* tracking level */
220 static bool pgss_track_utility; /* whether to track utility commands */
221 static bool pgss_save; /* whether to save stats across shutdown */
224 #define pgss_enabled() \
225 (pgss_track == PGSS_TRACK_ALL || \
226 (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
228 /*---- Function declarations ----*/
233 Datum pg_stat_statements_reset(PG_FUNCTION_ARGS);
234 Datum pg_stat_statements(PG_FUNCTION_ARGS);
236 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
237 PG_FUNCTION_INFO_V1(pg_stat_statements);
239 static void pgss_shmem_startup(void);
240 static void pgss_shmem_shutdown(int code, Datum arg);
241 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
242 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
243 static void pgss_ExecutorRun(QueryDesc *queryDesc,
244 ScanDirection direction,
246 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
247 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
248 static void pgss_ProcessUtility(Node *parsetree, const char *queryString,
249 ProcessUtilityContext context, ParamListInfo params,
250 DestReceiver *dest, char *completionTag);
251 static uint32 pgss_hash_fn(const void *key, Size keysize);
252 static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
253 static uint32 pgss_hash_string(const char *str);
254 static void pgss_store(const char *query, uint32 queryId,
255 double total_time, uint64 rows,
256 const BufferUsage *bufusage,
257 pgssJumbleState *jstate);
258 static Size pgss_memsize(void);
259 static pgssEntry *entry_alloc(pgssHashKey *key, const char *query,
260 int query_len, bool sticky);
261 static void entry_dealloc(void);
262 static void entry_reset(void);
264 static void AppendJumble(pgssJumbleState *jstate,
265 const unsigned char *item, Size size);
267 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
269 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
270 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
271 static void RecordConstLocation(pgssJumbleState *jstate, int location);
273 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
274 int *query_len_p, int encoding);
276 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
277 static int comp_location(const void *a, const void *b);
282 * Module load callback
288 * In order to create our shared memory area, we have to be loaded via
289 * shared_preload_libraries. If not, fall out without hooking into any of
290 * the main system. (We don't throw error here because it seems useful to
291 * allow the pg_stat_statements functions to be created even when the
292 * module isn't active. The functions must protect themselves against
293 * being called then, however.)
295 if (!process_shared_preload_libraries_in_progress)
299 * Define (or redefine) custom GUC variables.
301 DefineCustomIntVariable("pg_stat_statements.max",
302 "Sets the maximum number of statements tracked by pg_stat_statements.",
314 DefineCustomEnumVariable("pg_stat_statements.track",
315 "Selects which statements are tracked by pg_stat_statements.",
326 DefineCustomBoolVariable("pg_stat_statements.track_utility",
327 "Selects whether utility commands are tracked by pg_stat_statements.",
337 DefineCustomBoolVariable("pg_stat_statements.save",
338 "Save pg_stat_statements statistics across server shutdowns.",
348 EmitWarningsOnPlaceholders("pg_stat_statements");
351 * Request additional shared resources. (These are no-ops if we're not in
352 * the postmaster process.) We'll allocate or attach to the shared
353 * resources in pgss_shmem_startup().
355 RequestAddinShmemSpace(pgss_memsize());
356 RequestAddinLWLocks(1);
361 prev_shmem_startup_hook = shmem_startup_hook;
362 shmem_startup_hook = pgss_shmem_startup;
363 prev_post_parse_analyze_hook = post_parse_analyze_hook;
364 post_parse_analyze_hook = pgss_post_parse_analyze;
365 prev_ExecutorStart = ExecutorStart_hook;
366 ExecutorStart_hook = pgss_ExecutorStart;
367 prev_ExecutorRun = ExecutorRun_hook;
368 ExecutorRun_hook = pgss_ExecutorRun;
369 prev_ExecutorFinish = ExecutorFinish_hook;
370 ExecutorFinish_hook = pgss_ExecutorFinish;
371 prev_ExecutorEnd = ExecutorEnd_hook;
372 ExecutorEnd_hook = pgss_ExecutorEnd;
373 prev_ProcessUtility = ProcessUtility_hook;
374 ProcessUtility_hook = pgss_ProcessUtility;
378 * Module unload callback
383 /* Uninstall hooks. */
384 shmem_startup_hook = prev_shmem_startup_hook;
385 post_parse_analyze_hook = prev_post_parse_analyze_hook;
386 ExecutorStart_hook = prev_ExecutorStart;
387 ExecutorRun_hook = prev_ExecutorRun;
388 ExecutorFinish_hook = prev_ExecutorFinish;
389 ExecutorEnd_hook = prev_ExecutorEnd;
390 ProcessUtility_hook = prev_ProcessUtility;
394 * shmem_startup hook: allocate or attach to shared memory,
395 * then load any pre-existing statistics from file.
398 pgss_shmem_startup(void)
410 if (prev_shmem_startup_hook)
411 prev_shmem_startup_hook();
413 /* reset in case this is a restart within the postmaster */
418 * Create or attach to the shared memory state, including hash table
420 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
422 pgss = ShmemInitStruct("pg_stat_statements",
423 sizeof(pgssSharedState),
428 /* First time through ... */
429 pgss->lock = LWLockAssign();
430 pgss->query_size = pgstat_track_activity_query_size;
431 pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
434 /* Be sure everyone agrees on the hash table entry size */
435 query_size = pgss->query_size;
437 memset(&info, 0, sizeof(info));
438 info.keysize = sizeof(pgssHashKey);
439 info.entrysize = offsetof(pgssEntry, query) +query_size;
440 info.hash = pgss_hash_fn;
441 info.match = pgss_match_fn;
442 pgss_hash = ShmemInitHash("pg_stat_statements hash",
445 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
447 LWLockRelease(AddinShmemInitLock);
450 * If we're in the postmaster (or a standalone backend...), set up a shmem
451 * exit hook to dump the statistics to disk.
453 if (!IsUnderPostmaster)
454 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
457 * Attempt to load old statistics from the dump file, if this is the first
458 * time through and we weren't told not to.
460 if (found || !pgss_save)
464 * Note: we don't bother with locks here, because there should be no other
465 * processes running when this code is reached.
467 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
471 return; /* ignore not-found error */
475 buffer_size = query_size;
476 buffer = (char *) palloc(buffer_size);
478 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
479 header != PGSS_FILE_HEADER ||
480 fread(&num, sizeof(int32), 1, file) != 1)
483 for (i = 0; i < num; i++)
488 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
491 /* Encoding is the only field we can easily sanity-check */
492 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
495 /* Previous incarnation might have had a larger query_size */
496 if (temp.query_len >= buffer_size)
498 buffer = (char *) repalloc(buffer, temp.query_len + 1);
499 buffer_size = temp.query_len + 1;
502 if (fread(buffer, 1, temp.query_len, file) != temp.query_len)
504 buffer[temp.query_len] = '\0';
506 /* Skip loading "sticky" entries */
507 if (temp.counters.calls == 0)
510 /* Clip to available length if needed */
511 if (temp.query_len >= query_size)
512 temp.query_len = pg_encoding_mbcliplen(temp.key.encoding,
517 /* make the hashtable entry (discards old entries if too many) */
518 entry = entry_alloc(&temp.key, buffer, temp.query_len, false);
520 /* copy in the actual stats */
521 entry->counters = temp.counters;
528 * Remove the file so it's not included in backups/replication slaves,
529 * etc. A new file will be written on next shutdown.
531 unlink(PGSS_DUMP_FILE);
537 (errcode_for_file_access(),
538 errmsg("could not read pg_stat_statement file \"%s\": %m",
544 /* If possible, throw away the bogus file; ignore any error */
545 unlink(PGSS_DUMP_FILE);
549 * shmem_shutdown hook: Dump statistics into file.
551 * Note: we don't bother with acquiring lock, because there should be no
552 * other processes running when this is called.
555 pgss_shmem_shutdown(int code, Datum arg)
558 HASH_SEQ_STATUS hash_seq;
562 /* Don't try to dump during a crash. */
566 /* Safety check ... shouldn't get here unless shmem is set up. */
567 if (!pgss || !pgss_hash)
570 /* Don't dump if told not to. */
574 file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
578 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
580 num_entries = hash_get_num_entries(pgss_hash);
581 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
584 hash_seq_init(&hash_seq, pgss_hash);
585 while ((entry = hash_seq_search(&hash_seq)) != NULL)
587 int len = entry->query_len;
589 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
590 fwrite(entry->query, 1, len, file) != len)
601 * Rename file into place, so we atomically replace the old one.
603 if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
605 (errcode_for_file_access(),
606 errmsg("could not rename pg_stat_statement file \"%s\": %m",
607 PGSS_DUMP_FILE ".tmp")));
613 (errcode_for_file_access(),
614 errmsg("could not write pg_stat_statement file \"%s\": %m",
615 PGSS_DUMP_FILE ".tmp")));
618 unlink(PGSS_DUMP_FILE ".tmp");
622 * Post-parse-analysis hook: mark query with a queryId
625 pgss_post_parse_analyze(ParseState *pstate, Query *query)
627 pgssJumbleState jstate;
629 /* Assert we didn't do this already */
630 Assert(query->queryId == 0);
632 /* Safety check... */
633 if (!pgss || !pgss_hash)
637 * Utility statements get queryId zero. We do this even in cases where
638 * the statement contains an optimizable statement for which a queryId
639 * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases,
640 * runtime control will first go through ProcessUtility and then the
641 * executor, and we don't want the executor hooks to do anything, since we
642 * are already measuring the statement's costs at the utility level.
644 if (query->utilityStmt)
650 /* Set up workspace for query jumbling */
651 jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
652 jstate.jumble_len = 0;
653 jstate.clocations_buf_size = 32;
654 jstate.clocations = (pgssLocationLen *)
655 palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
656 jstate.clocations_count = 0;
658 /* Compute query ID and mark the Query node with it */
659 JumbleQuery(&jstate, query);
660 query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
663 * If we are unlucky enough to get a hash of zero, use 1 instead, to
664 * prevent confusion with the utility-statement case.
666 if (query->queryId == 0)
670 * If we were able to identify any ignorable constants, we immediately
671 * create a hash table entry for the query, so that we can record the
672 * normalized form of the query string. If there were no such constants,
673 * the normalized string would be the same as the query text anyway, so
674 * there's no need for an early entry.
676 if (jstate.clocations_count > 0)
677 pgss_store(pstate->p_sourcetext,
686 * ExecutorStart hook: start up tracking if needed
689 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
691 if (prev_ExecutorStart)
692 prev_ExecutorStart(queryDesc, eflags);
694 standard_ExecutorStart(queryDesc, eflags);
697 * If query has queryId zero, don't track it. This prevents double
698 * counting of optimizable statements that are directly contained in
699 * utility statements.
701 if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
704 * Set up to track total elapsed time in ExecutorRun. Make sure the
705 * space is allocated in the per-query context so it will go away at
708 if (queryDesc->totaltime == NULL)
710 MemoryContext oldcxt;
712 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
713 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
714 MemoryContextSwitchTo(oldcxt);
720 * ExecutorRun hook: all we need do is track nesting depth
723 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
728 if (prev_ExecutorRun)
729 prev_ExecutorRun(queryDesc, direction, count);
731 standard_ExecutorRun(queryDesc, direction, count);
743 * ExecutorFinish hook: all we need do is track nesting depth
746 pgss_ExecutorFinish(QueryDesc *queryDesc)
751 if (prev_ExecutorFinish)
752 prev_ExecutorFinish(queryDesc);
754 standard_ExecutorFinish(queryDesc);
766 * ExecutorEnd hook: store results if needed
769 pgss_ExecutorEnd(QueryDesc *queryDesc)
771 uint32 queryId = queryDesc->plannedstmt->queryId;
773 if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
776 * Make sure stats accumulation is done. (Note: it's okay if several
777 * levels of hook all do this.)
779 InstrEndLoop(queryDesc->totaltime);
781 pgss_store(queryDesc->sourceText,
783 queryDesc->totaltime->total * 1000.0, /* convert to msec */
784 queryDesc->estate->es_processed,
785 &queryDesc->totaltime->bufusage,
789 if (prev_ExecutorEnd)
790 prev_ExecutorEnd(queryDesc);
792 standard_ExecutorEnd(queryDesc);
796 * ProcessUtility hook
799 pgss_ProcessUtility(Node *parsetree, const char *queryString,
800 ProcessUtilityContext context, ParamListInfo params,
801 DestReceiver *dest, char *completionTag)
804 * If it's an EXECUTE statement, we don't track it and don't increment the
805 * nesting level. This allows the cycles to be charged to the underlying
806 * PREPARE instead (by the Executor hooks), which is much more useful.
808 * We also don't track execution of PREPARE. If we did, we would get one
809 * hash table entry for the PREPARE (with hash calculated from the query
810 * string), and then a different one with the same query string (but hash
811 * calculated from the query tree) would be used to accumulate costs of
812 * ensuing EXECUTEs. This would be confusing, and inconsistent with other
813 * cases where planning time is not included at all.
815 if (pgss_track_utility && pgss_enabled() &&
816 !IsA(parsetree, ExecuteStmt) &&
817 !IsA(parsetree, PrepareStmt))
822 BufferUsage bufusage_start,
826 bufusage_start = pgBufferUsage;
827 INSTR_TIME_SET_CURRENT(start);
832 if (prev_ProcessUtility)
833 prev_ProcessUtility(parsetree, queryString,
835 dest, completionTag);
837 standard_ProcessUtility(parsetree, queryString,
839 dest, completionTag);
849 INSTR_TIME_SET_CURRENT(duration);
850 INSTR_TIME_SUBTRACT(duration, start);
852 /* parse command tag to retrieve the number of affected rows. */
854 sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1)
857 /* calc differences of buffer counters. */
858 bufusage.shared_blks_hit =
859 pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
860 bufusage.shared_blks_read =
861 pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
862 bufusage.shared_blks_dirtied =
863 pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
864 bufusage.shared_blks_written =
865 pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
866 bufusage.local_blks_hit =
867 pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
868 bufusage.local_blks_read =
869 pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
870 bufusage.local_blks_dirtied =
871 pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
872 bufusage.local_blks_written =
873 pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
874 bufusage.temp_blks_read =
875 pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
876 bufusage.temp_blks_written =
877 pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
878 bufusage.blk_read_time = pgBufferUsage.blk_read_time;
879 INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
880 bufusage.blk_write_time = pgBufferUsage.blk_write_time;
881 INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
883 /* For utility statements, we just hash the query string directly */
884 queryId = pgss_hash_string(queryString);
886 pgss_store(queryString,
888 INSTR_TIME_GET_MILLISEC(duration),
895 if (prev_ProcessUtility)
896 prev_ProcessUtility(parsetree, queryString,
898 dest, completionTag);
900 standard_ProcessUtility(parsetree, queryString,
902 dest, completionTag);
907 * Calculate hash value for a key
910 pgss_hash_fn(const void *key, Size keysize)
912 const pgssHashKey *k = (const pgssHashKey *) key;
914 /* we don't bother to include encoding in the hash */
915 return hash_uint32((uint32) k->userid) ^
916 hash_uint32((uint32) k->dbid) ^
917 hash_uint32((uint32) k->queryid);
921 * Compare two keys - zero means match
924 pgss_match_fn(const void *key1, const void *key2, Size keysize)
926 const pgssHashKey *k1 = (const pgssHashKey *) key1;
927 const pgssHashKey *k2 = (const pgssHashKey *) key2;
929 if (k1->userid == k2->userid &&
930 k1->dbid == k2->dbid &&
931 k1->encoding == k2->encoding &&
932 k1->queryid == k2->queryid)
939 * Given an arbitrarily long query string, produce a hash for the purposes of
940 * identifying the query, without normalizing constants. Used when hashing
941 * utility statements.
944 pgss_hash_string(const char *str)
946 return hash_any((const unsigned char *) str, strlen(str));
950 * Store some statistics for a statement.
952 * If jstate is not NULL then we're trying to create an entry for which
953 * we have no statistics as yet; we just want to record the normalized
954 * query string. total_time, rows, bufusage are ignored in this case.
957 pgss_store(const char *query, uint32 queryId,
958 double total_time, uint64 rows,
959 const BufferUsage *bufusage,
960 pgssJumbleState *jstate)
964 char *norm_query = NULL;
966 Assert(query != NULL);
968 /* Safety check... */
969 if (!pgss || !pgss_hash)
972 /* Set up key for hashtable search */
973 key.userid = GetUserId();
974 key.dbid = MyDatabaseId;
975 key.encoding = GetDatabaseEncoding();
976 key.queryid = queryId;
978 /* Lookup the hash table entry with shared lock. */
979 LWLockAcquire(pgss->lock, LW_SHARED);
981 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
983 /* Create new entry, if not present */
989 * We'll need exclusive lock to make a new entry. There is no point
990 * in holding shared lock while we normalize the string, though.
992 LWLockRelease(pgss->lock);
994 query_len = strlen(query);
998 /* Normalize the string if enabled */
999 norm_query = generate_normalized_query(jstate, query,
1003 /* Acquire exclusive lock as required by entry_alloc() */
1004 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1006 entry = entry_alloc(&key, norm_query, query_len, true);
1011 * We're just going to store the query string as-is; but we have
1012 * to truncate it if over-length.
1014 if (query_len >= pgss->query_size)
1015 query_len = pg_encoding_mbcliplen(key.encoding,
1018 pgss->query_size - 1);
1020 /* Acquire exclusive lock as required by entry_alloc() */
1021 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1023 entry = entry_alloc(&key, query, query_len, false);
1027 /* Increment the counts, except when jstate is not NULL */
1031 * Grab the spinlock while updating the counters (see comment about
1032 * locking rules at the head of the file)
1034 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1036 SpinLockAcquire(&e->mutex);
1038 /* "Unstick" entry if it was previously sticky */
1039 if (e->counters.calls == 0)
1040 e->counters.usage = USAGE_INIT;
1042 e->counters.calls += 1;
1043 e->counters.total_time += total_time;
1044 e->counters.rows += rows;
1045 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1046 e->counters.shared_blks_read += bufusage->shared_blks_read;
1047 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1048 e->counters.shared_blks_written += bufusage->shared_blks_written;
1049 e->counters.local_blks_hit += bufusage->local_blks_hit;
1050 e->counters.local_blks_read += bufusage->local_blks_read;
1051 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1052 e->counters.local_blks_written += bufusage->local_blks_written;
1053 e->counters.temp_blks_read += bufusage->temp_blks_read;
1054 e->counters.temp_blks_written += bufusage->temp_blks_written;
1055 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1056 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1057 e->counters.usage += USAGE_EXEC(total_time);
1059 SpinLockRelease(&e->mutex);
1062 LWLockRelease(pgss->lock);
1064 /* We postpone this pfree until we're out of the lock */
1070 * Reset all statement statistics.
1073 pg_stat_statements_reset(PG_FUNCTION_ARGS)
1075 if (!pgss || !pgss_hash)
1077 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1078 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1083 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1084 #define PG_STAT_STATEMENTS_COLS 18
1087 * Retrieve statement statistics.
1090 pg_stat_statements(PG_FUNCTION_ARGS)
1092 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1094 Tuplestorestate *tupstore;
1095 MemoryContext per_query_ctx;
1096 MemoryContext oldcontext;
1097 Oid userid = GetUserId();
1098 bool is_superuser = superuser();
1099 HASH_SEQ_STATUS hash_seq;
1101 bool sql_supports_v1_1_counters = true;
1103 if (!pgss || !pgss_hash)
1105 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1106 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1108 /* check to see if caller supports us returning a tuplestore */
1109 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1111 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1112 errmsg("set-valued function called in context that cannot accept a set")));
1113 if (!(rsinfo->allowedModes & SFRM_Materialize))
1115 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1116 errmsg("materialize mode required, but it is not " \
1117 "allowed in this context")));
1119 /* Build a tuple descriptor for our result type */
1120 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1121 elog(ERROR, "return type must be a row type");
1122 if (tupdesc->natts == PG_STAT_STATEMENTS_COLS_V1_0)
1123 sql_supports_v1_1_counters = false;
1125 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1126 oldcontext = MemoryContextSwitchTo(per_query_ctx);
1128 tupstore = tuplestore_begin_heap(true, false, work_mem);
1129 rsinfo->returnMode = SFRM_Materialize;
1130 rsinfo->setResult = tupstore;
1131 rsinfo->setDesc = tupdesc;
1133 MemoryContextSwitchTo(oldcontext);
1135 LWLockAcquire(pgss->lock, LW_SHARED);
1137 hash_seq_init(&hash_seq, pgss_hash);
1138 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1140 Datum values[PG_STAT_STATEMENTS_COLS];
1141 bool nulls[PG_STAT_STATEMENTS_COLS];
1145 memset(values, 0, sizeof(values));
1146 memset(nulls, 0, sizeof(nulls));
1148 values[i++] = ObjectIdGetDatum(entry->key.userid);
1149 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1151 if (is_superuser || entry->key.userid == userid)
1156 pg_do_encoding_conversion((unsigned char *) entry->query,
1158 entry->key.encoding,
1159 GetDatabaseEncoding());
1160 values[i++] = CStringGetTextDatum(qstr);
1161 if (qstr != entry->query)
1165 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1167 /* copy counters to a local variable to keep locking time short */
1169 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1171 SpinLockAcquire(&e->mutex);
1173 SpinLockRelease(&e->mutex);
1176 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1180 values[i++] = Int64GetDatumFast(tmp.calls);
1181 values[i++] = Float8GetDatumFast(tmp.total_time);
1182 values[i++] = Int64GetDatumFast(tmp.rows);
1183 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1184 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1185 if (sql_supports_v1_1_counters)
1186 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1187 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1188 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1189 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1190 if (sql_supports_v1_1_counters)
1191 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1192 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1193 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1194 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1195 if (sql_supports_v1_1_counters)
1197 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1198 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1201 Assert(i == (sql_supports_v1_1_counters ?
1202 PG_STAT_STATEMENTS_COLS : PG_STAT_STATEMENTS_COLS_V1_0));
1204 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1207 LWLockRelease(pgss->lock);
1209 /* clean up and return the tuplestore */
1210 tuplestore_donestoring(tupstore);
1216 * Estimate shared memory space needed.
1224 size = MAXALIGN(sizeof(pgssSharedState));
1225 entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
1226 size = add_size(size, hash_estimate_size(pgss_max, entrysize));
1232 * Allocate a new hashtable entry.
1233 * caller must hold an exclusive lock on pgss->lock
1235 * "query" need not be null-terminated; we rely on query_len instead
1237 * If "sticky" is true, make the new entry artificially sticky so that it will
1238 * probably still be there when the query finishes execution. We do this by
1239 * giving it a median usage value rather than the normal value. (Strictly
1240 * speaking, query strings are normalized on a best effort basis, though it
1241 * would be difficult to demonstrate this even under artificial conditions.)
1243 * Note: despite needing exclusive lock, it's not an error for the target
1244 * entry to already exist. This is because pgss_store releases and
1245 * reacquires lock after failing to find a match; so someone else could
1246 * have made the entry while we waited to get exclusive lock.
1249 entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
1254 /* Make space if needed */
1255 while (hash_get_num_entries(pgss_hash) >= pgss_max)
1258 /* Find or create an entry with desired hash code */
1259 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1263 /* New entry, initialize it */
1265 /* reset the statistics */
1266 memset(&entry->counters, 0, sizeof(Counters));
1267 /* set the appropriate initial usage count */
1268 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1269 /* re-initialize the mutex each time ... we assume no one using it */
1270 SpinLockInit(&entry->mutex);
1271 /* ... and don't forget the query text */
1272 Assert(query_len >= 0 && query_len < pgss->query_size);
1273 entry->query_len = query_len;
1274 memcpy(entry->query, query, query_len);
1275 entry->query[query_len] = '\0';
1282 * qsort comparator for sorting into increasing usage order
1285 entry_cmp(const void *lhs, const void *rhs)
1287 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1288 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1290 if (l_usage < r_usage)
1292 else if (l_usage > r_usage)
1299 * Deallocate least used entries.
1300 * Caller must hold an exclusive lock on pgss->lock.
1305 HASH_SEQ_STATUS hash_seq;
1306 pgssEntry **entries;
1312 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1313 * While we're scanning the table, apply the decay factor to the usage
1317 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1320 hash_seq_init(&hash_seq, pgss_hash);
1321 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1323 entries[i++] = entry;
1324 /* "Sticky" entries get a different usage decay rate. */
1325 if (entry->counters.calls == 0)
1326 entry->counters.usage *= STICKY_DECREASE_FACTOR;
1328 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1331 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1333 /* Also, record the (approximate) median usage */
1335 pgss->cur_median_usage = entries[i / 2]->counters.usage;
1337 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1338 nvictims = Min(nvictims, i);
1340 for (i = 0; i < nvictims; i++)
1342 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1349 * Release all entries.
1354 HASH_SEQ_STATUS hash_seq;
1357 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1359 hash_seq_init(&hash_seq, pgss_hash);
1360 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1362 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
1365 LWLockRelease(pgss->lock);
1370 * AppendJumble: Append a value that is substantive in a given query to
1371 * the current jumble.
1374 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
1376 unsigned char *jumble = jstate->jumble;
1377 Size jumble_len = jstate->jumble_len;
1380 * Whenever the jumble buffer is full, we hash the current contents and
1381 * reset the buffer to contain just that hash value, thus relying on the
1382 * hash to summarize everything so far.
1388 if (jumble_len >= JUMBLE_SIZE)
1390 uint32 start_hash = hash_any(jumble, JUMBLE_SIZE);
1392 memcpy(jumble, &start_hash, sizeof(start_hash));
1393 jumble_len = sizeof(start_hash);
1395 part_size = Min(size, JUMBLE_SIZE - jumble_len);
1396 memcpy(jumble + jumble_len, item, part_size);
1397 jumble_len += part_size;
1401 jstate->jumble_len = jumble_len;
1405 * Wrappers around AppendJumble to encapsulate details of serialization
1406 * of individual local variable elements.
1408 #define APP_JUMB(item) \
1409 AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
1410 #define APP_JUMB_STRING(str) \
1411 AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
1414 * JumbleQuery: Selectively serialize the query tree, appending significant
1415 * data to the "query jumble" while ignoring nonsignificant data.
1417 * Rule of thumb for what to include is that we should ignore anything not
1418 * semantically significant (such as alias names) as well as anything that can
1419 * be deduced from child nodes (else we'd just be double-hashing that piece
1423 JumbleQuery(pgssJumbleState *jstate, Query *query)
1425 Assert(IsA(query, Query));
1426 Assert(query->utilityStmt == NULL);
1428 APP_JUMB(query->commandType);
1429 /* resultRelation is usually predictable from commandType */
1430 JumbleExpr(jstate, (Node *) query->cteList);
1431 JumbleRangeTable(jstate, query->rtable);
1432 JumbleExpr(jstate, (Node *) query->jointree);
1433 JumbleExpr(jstate, (Node *) query->targetList);
1434 JumbleExpr(jstate, (Node *) query->returningList);
1435 JumbleExpr(jstate, (Node *) query->groupClause);
1436 JumbleExpr(jstate, query->havingQual);
1437 JumbleExpr(jstate, (Node *) query->windowClause);
1438 JumbleExpr(jstate, (Node *) query->distinctClause);
1439 JumbleExpr(jstate, (Node *) query->sortClause);
1440 JumbleExpr(jstate, query->limitOffset);
1441 JumbleExpr(jstate, query->limitCount);
1442 /* we ignore rowMarks */
1443 JumbleExpr(jstate, query->setOperations);
1447 * Jumble a range table
1450 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
1456 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1458 Assert(IsA(rte, RangeTblEntry));
1459 APP_JUMB(rte->rtekind);
1460 switch (rte->rtekind)
1463 APP_JUMB(rte->relid);
1466 JumbleQuery(jstate, rte->subquery);
1469 APP_JUMB(rte->jointype);
1472 JumbleExpr(jstate, rte->funcexpr);
1475 JumbleExpr(jstate, (Node *) rte->values_lists);
1480 * Depending on the CTE name here isn't ideal, but it's the
1481 * only info we have to identify the referenced WITH item.
1483 APP_JUMB_STRING(rte->ctename);
1484 APP_JUMB(rte->ctelevelsup);
1487 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
1494 * Jumble an expression tree
1496 * In general this function should handle all the same node types that
1497 * expression_tree_walker() does, and therefore it's coded to be as parallel
1498 * to that function as possible. However, since we are only invoked on
1499 * queries immediately post-parse-analysis, we need not handle node types
1500 * that only appear in planning.
1502 * Note: the reason we don't simply use expression_tree_walker() is that the
1503 * point of that function is to support tree walkers that don't care about
1504 * most tree node types, but here we care about all types. We should complain
1505 * about any unrecognized node type.
1508 JumbleExpr(pgssJumbleState *jstate, Node *node)
1515 /* Guard against stack overflow due to overly complex expressions */
1516 check_stack_depth();
1519 * We always emit the node's NodeTag, then any additional fields that are
1520 * considered significant, and then we recurse to any child nodes.
1522 APP_JUMB(node->type);
1524 switch (nodeTag(node))
1528 Var *var = (Var *) node;
1530 APP_JUMB(var->varno);
1531 APP_JUMB(var->varattno);
1532 APP_JUMB(var->varlevelsup);
1537 Const *c = (Const *) node;
1539 /* We jumble only the constant's type, not its value */
1540 APP_JUMB(c->consttype);
1541 /* Also, record its parse location for query normalization */
1542 RecordConstLocation(jstate, c->location);
1547 Param *p = (Param *) node;
1549 APP_JUMB(p->paramkind);
1550 APP_JUMB(p->paramid);
1551 APP_JUMB(p->paramtype);
1556 Aggref *expr = (Aggref *) node;
1558 APP_JUMB(expr->aggfnoid);
1559 JumbleExpr(jstate, (Node *) expr->args);
1560 JumbleExpr(jstate, (Node *) expr->aggorder);
1561 JumbleExpr(jstate, (Node *) expr->aggdistinct);
1566 WindowFunc *expr = (WindowFunc *) node;
1568 APP_JUMB(expr->winfnoid);
1569 APP_JUMB(expr->winref);
1570 JumbleExpr(jstate, (Node *) expr->args);
1575 ArrayRef *aref = (ArrayRef *) node;
1577 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
1578 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
1579 JumbleExpr(jstate, (Node *) aref->refexpr);
1580 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
1585 FuncExpr *expr = (FuncExpr *) node;
1587 APP_JUMB(expr->funcid);
1588 JumbleExpr(jstate, (Node *) expr->args);
1591 case T_NamedArgExpr:
1593 NamedArgExpr *nae = (NamedArgExpr *) node;
1595 APP_JUMB(nae->argnumber);
1596 JumbleExpr(jstate, (Node *) nae->arg);
1600 case T_DistinctExpr: /* struct-equivalent to OpExpr */
1601 case T_NullIfExpr: /* struct-equivalent to OpExpr */
1603 OpExpr *expr = (OpExpr *) node;
1605 APP_JUMB(expr->opno);
1606 JumbleExpr(jstate, (Node *) expr->args);
1609 case T_ScalarArrayOpExpr:
1611 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
1613 APP_JUMB(expr->opno);
1614 APP_JUMB(expr->useOr);
1615 JumbleExpr(jstate, (Node *) expr->args);
1620 BoolExpr *expr = (BoolExpr *) node;
1622 APP_JUMB(expr->boolop);
1623 JumbleExpr(jstate, (Node *) expr->args);
1628 SubLink *sublink = (SubLink *) node;
1630 APP_JUMB(sublink->subLinkType);
1631 JumbleExpr(jstate, (Node *) sublink->testexpr);
1632 JumbleQuery(jstate, (Query *) sublink->subselect);
1637 FieldSelect *fs = (FieldSelect *) node;
1639 APP_JUMB(fs->fieldnum);
1640 JumbleExpr(jstate, (Node *) fs->arg);
1645 FieldStore *fstore = (FieldStore *) node;
1647 JumbleExpr(jstate, (Node *) fstore->arg);
1648 JumbleExpr(jstate, (Node *) fstore->newvals);
1653 RelabelType *rt = (RelabelType *) node;
1655 APP_JUMB(rt->resulttype);
1656 JumbleExpr(jstate, (Node *) rt->arg);
1661 CoerceViaIO *cio = (CoerceViaIO *) node;
1663 APP_JUMB(cio->resulttype);
1664 JumbleExpr(jstate, (Node *) cio->arg);
1667 case T_ArrayCoerceExpr:
1669 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
1671 APP_JUMB(acexpr->resulttype);
1672 JumbleExpr(jstate, (Node *) acexpr->arg);
1675 case T_ConvertRowtypeExpr:
1677 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
1679 APP_JUMB(crexpr->resulttype);
1680 JumbleExpr(jstate, (Node *) crexpr->arg);
1685 CollateExpr *ce = (CollateExpr *) node;
1687 APP_JUMB(ce->collOid);
1688 JumbleExpr(jstate, (Node *) ce->arg);
1693 CaseExpr *caseexpr = (CaseExpr *) node;
1695 JumbleExpr(jstate, (Node *) caseexpr->arg);
1696 foreach(temp, caseexpr->args)
1698 CaseWhen *when = (CaseWhen *) lfirst(temp);
1700 Assert(IsA(when, CaseWhen));
1701 JumbleExpr(jstate, (Node *) when->expr);
1702 JumbleExpr(jstate, (Node *) when->result);
1704 JumbleExpr(jstate, (Node *) caseexpr->defresult);
1707 case T_CaseTestExpr:
1709 CaseTestExpr *ct = (CaseTestExpr *) node;
1711 APP_JUMB(ct->typeId);
1715 JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
1718 JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
1720 case T_RowCompareExpr:
1722 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
1724 APP_JUMB(rcexpr->rctype);
1725 JumbleExpr(jstate, (Node *) rcexpr->largs);
1726 JumbleExpr(jstate, (Node *) rcexpr->rargs);
1729 case T_CoalesceExpr:
1730 JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
1734 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
1736 APP_JUMB(mmexpr->op);
1737 JumbleExpr(jstate, (Node *) mmexpr->args);
1742 XmlExpr *xexpr = (XmlExpr *) node;
1744 APP_JUMB(xexpr->op);
1745 JumbleExpr(jstate, (Node *) xexpr->named_args);
1746 JumbleExpr(jstate, (Node *) xexpr->args);
1751 NullTest *nt = (NullTest *) node;
1753 APP_JUMB(nt->nulltesttype);
1754 JumbleExpr(jstate, (Node *) nt->arg);
1759 BooleanTest *bt = (BooleanTest *) node;
1761 APP_JUMB(bt->booltesttype);
1762 JumbleExpr(jstate, (Node *) bt->arg);
1765 case T_CoerceToDomain:
1767 CoerceToDomain *cd = (CoerceToDomain *) node;
1769 APP_JUMB(cd->resulttype);
1770 JumbleExpr(jstate, (Node *) cd->arg);
1773 case T_CoerceToDomainValue:
1775 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
1777 APP_JUMB(cdv->typeId);
1780 case T_SetToDefault:
1782 SetToDefault *sd = (SetToDefault *) node;
1784 APP_JUMB(sd->typeId);
1787 case T_CurrentOfExpr:
1789 CurrentOfExpr *ce = (CurrentOfExpr *) node;
1791 APP_JUMB(ce->cvarno);
1792 if (ce->cursor_name)
1793 APP_JUMB_STRING(ce->cursor_name);
1794 APP_JUMB(ce->cursor_param);
1799 TargetEntry *tle = (TargetEntry *) node;
1801 APP_JUMB(tle->resno);
1802 APP_JUMB(tle->ressortgroupref);
1803 JumbleExpr(jstate, (Node *) tle->expr);
1808 RangeTblRef *rtr = (RangeTblRef *) node;
1810 APP_JUMB(rtr->rtindex);
1815 JoinExpr *join = (JoinExpr *) node;
1817 APP_JUMB(join->jointype);
1818 APP_JUMB(join->isNatural);
1819 APP_JUMB(join->rtindex);
1820 JumbleExpr(jstate, join->larg);
1821 JumbleExpr(jstate, join->rarg);
1822 JumbleExpr(jstate, join->quals);
1827 FromExpr *from = (FromExpr *) node;
1829 JumbleExpr(jstate, (Node *) from->fromlist);
1830 JumbleExpr(jstate, from->quals);
1834 foreach(temp, (List *) node)
1836 JumbleExpr(jstate, (Node *) lfirst(temp));
1839 case T_SortGroupClause:
1841 SortGroupClause *sgc = (SortGroupClause *) node;
1843 APP_JUMB(sgc->tleSortGroupRef);
1844 APP_JUMB(sgc->eqop);
1845 APP_JUMB(sgc->sortop);
1846 APP_JUMB(sgc->nulls_first);
1849 case T_WindowClause:
1851 WindowClause *wc = (WindowClause *) node;
1853 APP_JUMB(wc->winref);
1854 APP_JUMB(wc->frameOptions);
1855 JumbleExpr(jstate, (Node *) wc->partitionClause);
1856 JumbleExpr(jstate, (Node *) wc->orderClause);
1857 JumbleExpr(jstate, wc->startOffset);
1858 JumbleExpr(jstate, wc->endOffset);
1861 case T_CommonTableExpr:
1863 CommonTableExpr *cte = (CommonTableExpr *) node;
1865 /* we store the string name because RTE_CTE RTEs need it */
1866 APP_JUMB_STRING(cte->ctename);
1867 JumbleQuery(jstate, (Query *) cte->ctequery);
1870 case T_SetOperationStmt:
1872 SetOperationStmt *setop = (SetOperationStmt *) node;
1874 APP_JUMB(setop->op);
1875 APP_JUMB(setop->all);
1876 JumbleExpr(jstate, setop->larg);
1877 JumbleExpr(jstate, setop->rarg);
1881 /* Only a warning, since we can stumble along anyway */
1882 elog(WARNING, "unrecognized node type: %d",
1883 (int) nodeTag(node));
1889 * Record location of constant within query string of query tree
1890 * that is currently being walked.
1893 RecordConstLocation(pgssJumbleState *jstate, int location)
1895 /* -1 indicates unknown or undefined location */
1898 /* enlarge array if needed */
1899 if (jstate->clocations_count >= jstate->clocations_buf_size)
1901 jstate->clocations_buf_size *= 2;
1902 jstate->clocations = (pgssLocationLen *)
1903 repalloc(jstate->clocations,
1904 jstate->clocations_buf_size *
1905 sizeof(pgssLocationLen));
1907 jstate->clocations[jstate->clocations_count].location = location;
1908 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
1909 jstate->clocations[jstate->clocations_count].length = -1;
1910 jstate->clocations_count++;
1915 * Generate a normalized version of the query string that will be used to
1916 * represent all similar queries.
1918 * Note that the normalized representation may well vary depending on
1919 * just which "equivalent" query is used to create the hashtable entry.
1920 * We assume this is OK.
1922 * *query_len_p contains the input string length, and is updated with
1923 * the result string length (which cannot be longer) on exit.
1925 * Returns a palloc'd string, which is not necessarily null-terminated.
1928 generate_normalized_query(pgssJumbleState *jstate, const char *query,
1929 int *query_len_p, int encoding)
1932 int query_len = *query_len_p;
1935 len_to_wrt, /* Length (in bytes) to write */
1936 quer_loc = 0, /* Source query byte location */
1937 n_quer_loc = 0, /* Normalized query byte location */
1938 last_off = 0, /* Offset from start for previous tok */
1939 last_tok_len = 0; /* Length (in bytes) of that tok */
1942 * Get constants' lengths (core system only gives us locations). Note
1943 * this also ensures the items are sorted by location.
1945 fill_in_constant_lengths(jstate, query);
1947 /* Allocate result buffer, ensuring we limit result to allowed size */
1949 max_output_len = Min(query_len, pgss->query_size - 1);
1951 /* XXX: pg_hint_plan doesn't truncate query string. */
1952 max_output_len = query_len;
1953 norm_query = palloc(max_output_len);
1955 for (i = 0; i < jstate->clocations_count; i++)
1957 int off, /* Offset from start for cur tok */
1958 tok_len; /* Length (in bytes) of that tok */
1960 off = jstate->clocations[i].location;
1961 tok_len = jstate->clocations[i].length;
1964 continue; /* ignore any duplicates */
1966 /* Copy next chunk, or as much as will fit */
1967 len_to_wrt = off - last_off;
1968 len_to_wrt -= last_tok_len;
1969 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1971 Assert(len_to_wrt >= 0);
1972 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1973 n_quer_loc += len_to_wrt;
1975 if (n_quer_loc < max_output_len)
1976 norm_query[n_quer_loc++] = '?';
1978 quer_loc = off + tok_len;
1980 last_tok_len = tok_len;
1982 /* If we run out of space, might as well stop iterating */
1983 if (n_quer_loc >= max_output_len)
1988 * We've copied up until the last ignorable constant. Copy over the
1989 * remaining bytes of the original query string, or at least as much as
1992 len_to_wrt = query_len - quer_loc;
1993 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1995 Assert(len_to_wrt >= 0);
1996 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1997 n_quer_loc += len_to_wrt;
1999 /* XXX: pg_hint_plan doesn't truncate query string. */
2002 * If we ran out of space, we need to do an encoding-aware truncation,
2003 * just to make sure we don't have an incomplete character at the end.
2005 if (n_quer_loc >= max_output_len)
2006 query_len = pg_encoding_mbcliplen(encoding,
2009 pgss->query_size - 1);
2012 query_len = n_quer_loc;
2014 *query_len_p = query_len;
2019 * Given a valid SQL string and an array of constant-location records,
2020 * fill in the textual lengths of those constants.
2022 * The constants may use any allowed constant syntax, such as float literals,
2023 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2024 * accomplished by using the public API for the core scanner.
2026 * It is the caller's job to ensure that the string is a valid SQL statement
2027 * with constants at the indicated locations. Since in practice the string
2028 * has already been parsed, and the locations that the caller provides will
2029 * have originated from within the authoritative parser, this should not be
2032 * Duplicate constant pointers are possible, and will have their lengths
2033 * marked as '-1', so that they are later ignored. (Actually, we assume the
2034 * lengths were initialized as -1 to start with, and don't change them here.)
2036 * N.B. There is an assumption that a '-' character at a Const location begins
2037 * a negative numeric constant. This precludes there ever being another
2038 * reason for a constant to start with a '-'.
2041 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
2043 pgssLocationLen *locs;
2044 core_yyscan_t yyscanner;
2045 core_yy_extra_type yyextra;
2046 core_YYSTYPE yylval;
2052 * Sort the records by location so that we can process them in order while
2053 * scanning the query text.
2055 if (jstate->clocations_count > 1)
2056 qsort(jstate->clocations, jstate->clocations_count,
2057 sizeof(pgssLocationLen), comp_location);
2058 locs = jstate->clocations;
2060 /* initialize the flex scanner --- should match raw_parser() */
2061 yyscanner = scanner_init(query,
2066 /* Search for each constant, in sequence */
2067 for (i = 0; i < jstate->clocations_count; i++)
2069 int loc = locs[i].location;
2074 if (loc <= last_loc)
2075 continue; /* Duplicate constant, ignore */
2077 /* Lex tokens until we find the desired constant */
2080 tok = core_yylex(&yylval, &yylloc, yyscanner);
2082 /* We should not hit end-of-string, but if we do, behave sanely */
2084 break; /* out of inner for-loop */
2087 * We should find the token position exactly, but if we somehow
2088 * run past it, work with that.
2092 if (query[loc] == '-')
2095 * It's a negative value - this is the one and only case
2096 * where we replace more than a single token.
2098 * Do not compensate for the core system's special-case
2099 * adjustment of location to that of the leading '-'
2100 * operator in the event of a negative constant. It is
2101 * also useful for our purposes to start from the minus
2102 * symbol. In this way, queries like "select * from foo
2103 * where bar = 1" and "select * from foo where bar = -2"
2104 * will have identical normalized query strings.
2106 tok = core_yylex(&yylval, &yylloc, yyscanner);
2108 break; /* out of inner for-loop */
2112 * We now rely on the assumption that flex has placed a zero
2113 * byte after the text of the current token in scanbuf.
2115 locs[i].length = strlen(yyextra.scanbuf + loc);
2116 break; /* out of inner for-loop */
2120 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2127 scanner_finish(yyscanner);
2131 * comp_location: comparator for qsorting pgssLocationLen structs by location
2134 comp_location(const void *a, const void *b)
2136 int l = ((const pgssLocationLen *) a)->location;
2137 int r = ((const pgssLocationLen *) b)->location;