1 /*-------------------------------------------------------------------------
4 * Track statement execution times across a whole database cluster.
6 * Execution costs are totalled for each distinct source query, and kept in
7 * a shared hashtable. (We track only as many distinct queries as will fit
8 * in the designated amount of shared memory.)
10 * As of Postgres 9.2, this module normalizes query entries. Normalization
11 * is a process whereby similar queries, typically differing only in their
12 * constants (though the exact rules are somewhat more subtle than that) are
13 * recognized as equivalent, and are tracked as a single entry. This is
14 * particularly useful for non-prepared queries.
16 * Normalization is implemented by fingerprinting queries, selectively
17 * serializing those fields of each query tree's nodes that are judged to be
18 * essential to the query. This is referred to as a query jumble. This is
19 * distinct from a regular serialization in that various extraneous
20 * information is ignored as irrelevant or not essential to the query, such
21 * as the collations of Vars and, most notably, the values of constants.
23 * This jumble is acquired at the end of parse analysis of each query, and
24 * a 32-bit hash of it is stored into the query's Query.queryId field.
25 * The server then copies this value around, making it available in plan
26 * tree(s) generated from the query. The executor can then use this value
27 * to blame query costs on the proper queryId.
29 * Note about locking issues: to create or delete an entry in the shared
30 * hashtable, one must hold pgss->lock exclusively. Modifying any field
31 * in an entry except the counters requires the same. To look up an entry,
32 * one must hold the lock shared. To read or update the counters within
33 * an entry, one must hold the lock shared or exclusive (so the entry doesn't
34 * disappear!) and also take the entry's mutex spinlock.
37 * Copyright (c) 2008-2014, PostgreSQL Global Development Group
40 * contrib/pg_stat_statements/pg_stat_statements.c
42 *-------------------------------------------------------------------------
51 #include "access/hash.h"
53 #include "executor/instrument.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "parser/analyze.h"
58 #include "parser/parsetree.h"
60 #include "parser/scanner.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/spin.h"
66 #include "tcop/utility.h"
67 #include "utils/builtins.h"
72 /* Location of stats file */
73 #define PGSS_DUMP_FILE "global/pg_stat_statements.stat"
75 /* This constant defines the magic number in the stats file header */
76 static const uint32 PGSS_FILE_HEADER = 0x20120328;
78 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
79 #define USAGE_EXEC(duration) (1.0)
80 #define USAGE_INIT (1.0) /* including initial planning */
81 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
82 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
83 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
84 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
86 #define JUMBLE_SIZE 1024 /* query serialization buffer size */
89 * Hashtable key that defines the identity of a hashtable entry. We separate
90 * queries by user and by database even if they are otherwise identical.
92 * Presently, the query encoding is fully determined by the source database
93 * and so we don't really need it to be in the key. But that might not always
94 * be true. Anyway it's notationally convenient to pass it as part of the key.
96 typedef struct pgssHashKey
98 Oid userid; /* user OID */
99 Oid dbid; /* database OID */
100 int encoding; /* query encoding */
101 uint32 queryid; /* query identifier */
105 * The actual stats counters kept within pgssEntry.
107 typedef struct Counters
109 int64 calls; /* # of times executed */
110 double total_time; /* total execution time, in msec */
111 int64 rows; /* total # of retrieved or affected rows */
112 int64 shared_blks_hit; /* # of shared buffer hits */
113 int64 shared_blks_read; /* # of shared disk blocks read */
114 int64 shared_blks_dirtied; /* # of shared disk blocks dirtied */
115 int64 shared_blks_written; /* # of shared disk blocks written */
116 int64 local_blks_hit; /* # of local buffer hits */
117 int64 local_blks_read; /* # of local disk blocks read */
118 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
119 int64 local_blks_written; /* # of local disk blocks written */
120 int64 temp_blks_read; /* # of temp blocks read */
121 int64 temp_blks_written; /* # of temp blocks written */
122 double blk_read_time; /* time spent reading, in msec */
123 double blk_write_time; /* time spent writing, in msec */
124 double usage; /* usage factor */
128 * Statistics per statement
130 * NB: see the file read/write code before changing field order here.
132 typedef struct pgssEntry
134 pgssHashKey key; /* hash key of entry - MUST BE FIRST */
135 Counters counters; /* the statistics for this query */
136 int query_len; /* # of valid bytes in query string */
137 slock_t mutex; /* protects the counters only */
138 char query[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
139 /* Note: the allocated length of query[] is actually pgss->query_size */
143 * Global shared state
145 typedef struct pgssSharedState
147 LWLockId lock; /* protects hashtable search/modification */
148 int query_size; /* max query length in bytes */
149 double cur_median_usage; /* current median usage in hashtable */
153 * Struct for tracking locations/lengths of constants during normalization
155 typedef struct pgssLocationLen
157 int location; /* start offset in query text */
158 int length; /* length in bytes, or -1 to ignore */
162 * Working state for computing a query jumble and producing a normalized
165 typedef struct pgssJumbleState
167 /* Jumble of current query tree */
168 unsigned char *jumble;
170 /* Number of bytes used in jumble[] */
173 /* Array of locations of constants that should be removed */
174 pgssLocationLen *clocations;
176 /* Allocated length of clocations array */
177 int clocations_buf_size;
179 /* Current number of valid entries in clocations array */
180 int clocations_count;
183 /*---- Local variables ----*/
185 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
186 static int nested_level = 0;
188 /* Saved hook values in case of unload */
189 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
190 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
191 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
192 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
193 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
194 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
195 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
197 /* Links to shared memory state */
198 static pgssSharedState *pgss = NULL;
199 static HTAB *pgss_hash = NULL;
201 /*---- GUC variables ----*/
205 PGSS_TRACK_NONE, /* track no statements */
206 PGSS_TRACK_TOP, /* only top level statements */
207 PGSS_TRACK_ALL /* all statements, including nested ones */
210 static const struct config_enum_entry track_options[] =
212 {"none", PGSS_TRACK_NONE, false},
213 {"top", PGSS_TRACK_TOP, false},
214 {"all", PGSS_TRACK_ALL, false},
218 static int pgss_max; /* max # statements to track */
219 static int pgss_track; /* tracking level */
220 static bool pgss_track_utility; /* whether to track utility commands */
221 static bool pgss_save; /* whether to save stats across shutdown */
224 #define pgss_enabled() \
225 (pgss_track == PGSS_TRACK_ALL || \
226 (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
228 /*---- Function declarations ----*/
233 Datum pg_stat_statements_reset(PG_FUNCTION_ARGS);
234 Datum pg_stat_statements(PG_FUNCTION_ARGS);
236 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
237 PG_FUNCTION_INFO_V1(pg_stat_statements);
239 static void pgss_shmem_startup(void);
240 static void pgss_shmem_shutdown(int code, Datum arg);
241 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
242 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
243 static void pgss_ExecutorRun(QueryDesc *queryDesc,
244 ScanDirection direction,
246 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
247 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
248 static void pgss_ProcessUtility(Node *parsetree,
249 const char *queryString, ParamListInfo params, bool isTopLevel,
250 DestReceiver *dest, char *completionTag);
251 static uint32 pgss_hash_fn(const void *key, Size keysize);
252 static int pgss_match_fn(const void *key1, const void *key2, Size keysize);
253 static uint32 pgss_hash_string(const char *str);
254 static void pgss_store(const char *query, uint32 queryId,
255 double total_time, uint64 rows,
256 const BufferUsage *bufusage,
257 pgssJumbleState *jstate);
258 static Size pgss_memsize(void);
259 static pgssEntry *entry_alloc(pgssHashKey *key, const char *query,
260 int query_len, bool sticky);
261 static void entry_dealloc(void);
262 static void entry_reset(void);
264 static void AppendJumble(pgssJumbleState *jstate,
265 const unsigned char *item, Size size);
267 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
269 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
270 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
271 static void RecordConstLocation(pgssJumbleState *jstate, int location);
273 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
274 int *query_len_p, int encoding);
276 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
277 static int comp_location(const void *a, const void *b);
282 * Module load callback
288 * In order to create our shared memory area, we have to be loaded via
289 * shared_preload_libraries. If not, fall out without hooking into any of
290 * the main system. (We don't throw error here because it seems useful to
291 * allow the pg_stat_statements functions to be created even when the
292 * module isn't active. The functions must protect themselves against
293 * being called then, however.)
295 if (!process_shared_preload_libraries_in_progress)
299 * Define (or redefine) custom GUC variables.
301 DefineCustomIntVariable("pg_stat_statements.max",
302 "Sets the maximum number of statements tracked by pg_stat_statements.",
314 DefineCustomEnumVariable("pg_stat_statements.track",
315 "Selects which statements are tracked by pg_stat_statements.",
326 DefineCustomBoolVariable("pg_stat_statements.track_utility",
327 "Selects whether utility commands are tracked by pg_stat_statements.",
337 DefineCustomBoolVariable("pg_stat_statements.save",
338 "Save pg_stat_statements statistics across server shutdowns.",
348 EmitWarningsOnPlaceholders("pg_stat_statements");
351 * Request additional shared resources. (These are no-ops if we're not in
352 * the postmaster process.) We'll allocate or attach to the shared
353 * resources in pgss_shmem_startup().
355 RequestAddinShmemSpace(pgss_memsize());
356 RequestAddinLWLocks(1);
361 prev_shmem_startup_hook = shmem_startup_hook;
362 shmem_startup_hook = pgss_shmem_startup;
363 prev_post_parse_analyze_hook = post_parse_analyze_hook;
364 post_parse_analyze_hook = pgss_post_parse_analyze;
365 prev_ExecutorStart = ExecutorStart_hook;
366 ExecutorStart_hook = pgss_ExecutorStart;
367 prev_ExecutorRun = ExecutorRun_hook;
368 ExecutorRun_hook = pgss_ExecutorRun;
369 prev_ExecutorFinish = ExecutorFinish_hook;
370 ExecutorFinish_hook = pgss_ExecutorFinish;
371 prev_ExecutorEnd = ExecutorEnd_hook;
372 ExecutorEnd_hook = pgss_ExecutorEnd;
373 prev_ProcessUtility = ProcessUtility_hook;
374 ProcessUtility_hook = pgss_ProcessUtility;
378 * Module unload callback
383 /* Uninstall hooks. */
384 shmem_startup_hook = prev_shmem_startup_hook;
385 post_parse_analyze_hook = prev_post_parse_analyze_hook;
386 ExecutorStart_hook = prev_ExecutorStart;
387 ExecutorRun_hook = prev_ExecutorRun;
388 ExecutorFinish_hook = prev_ExecutorFinish;
389 ExecutorEnd_hook = prev_ExecutorEnd;
390 ProcessUtility_hook = prev_ProcessUtility;
394 * shmem_startup hook: allocate or attach to shared memory,
395 * then load any pre-existing statistics from file.
398 pgss_shmem_startup(void)
410 if (prev_shmem_startup_hook)
411 prev_shmem_startup_hook();
413 /* reset in case this is a restart within the postmaster */
418 * Create or attach to the shared memory state, including hash table
420 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
422 pgss = ShmemInitStruct("pg_stat_statements",
423 sizeof(pgssSharedState),
428 /* First time through ... */
429 pgss->lock = LWLockAssign();
430 pgss->query_size = pgstat_track_activity_query_size;
431 pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
434 /* Be sure everyone agrees on the hash table entry size */
435 query_size = pgss->query_size;
437 memset(&info, 0, sizeof(info));
438 info.keysize = sizeof(pgssHashKey);
439 info.entrysize = offsetof(pgssEntry, query) +query_size;
440 info.hash = pgss_hash_fn;
441 info.match = pgss_match_fn;
442 pgss_hash = ShmemInitHash("pg_stat_statements hash",
445 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
447 LWLockRelease(AddinShmemInitLock);
450 * If we're in the postmaster (or a standalone backend...), set up a shmem
451 * exit hook to dump the statistics to disk.
453 if (!IsUnderPostmaster)
454 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
457 * Attempt to load old statistics from the dump file, if this is the first
458 * time through and we weren't told not to.
460 if (found || !pgss_save)
464 * Note: we don't bother with locks here, because there should be no other
465 * processes running when this code is reached.
467 file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
471 return; /* ignore not-found error */
475 buffer_size = query_size;
476 buffer = (char *) palloc(buffer_size);
478 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
479 header != PGSS_FILE_HEADER ||
480 fread(&num, sizeof(int32), 1, file) != 1)
483 for (i = 0; i < num; i++)
488 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
491 /* Encoding is the only field we can easily sanity-check */
492 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
495 /* Previous incarnation might have had a larger query_size */
496 if (temp.query_len >= buffer_size)
498 buffer = (char *) repalloc(buffer, temp.query_len + 1);
499 buffer_size = temp.query_len + 1;
502 if (fread(buffer, 1, temp.query_len, file) != temp.query_len)
504 buffer[temp.query_len] = '\0';
506 /* Skip loading "sticky" entries */
507 if (temp.counters.calls == 0)
510 /* Clip to available length if needed */
511 if (temp.query_len >= query_size)
512 temp.query_len = pg_encoding_mbcliplen(temp.key.encoding,
517 /* make the hashtable entry (discards old entries if too many) */
518 entry = entry_alloc(&temp.key, buffer, temp.query_len, false);
520 /* copy in the actual stats */
521 entry->counters = temp.counters;
528 * Remove the file so it's not included in backups/replication slaves,
529 * etc. A new file will be written on next shutdown.
531 unlink(PGSS_DUMP_FILE);
537 (errcode_for_file_access(),
538 errmsg("could not read pg_stat_statement file \"%s\": %m",
544 /* If possible, throw away the bogus file; ignore any error */
545 unlink(PGSS_DUMP_FILE);
549 * shmem_shutdown hook: Dump statistics into file.
551 * Note: we don't bother with acquiring lock, because there should be no
552 * other processes running when this is called.
555 pgss_shmem_shutdown(int code, Datum arg)
558 HASH_SEQ_STATUS hash_seq;
562 /* Don't try to dump during a crash. */
566 /* Safety check ... shouldn't get here unless shmem is set up. */
567 if (!pgss || !pgss_hash)
570 /* Don't dump if told not to. */
574 file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
578 if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
580 num_entries = hash_get_num_entries(pgss_hash);
581 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
584 hash_seq_init(&hash_seq, pgss_hash);
585 while ((entry = hash_seq_search(&hash_seq)) != NULL)
587 int len = entry->query_len;
589 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
590 fwrite(entry->query, 1, len, file) != len)
601 * Rename file into place, so we atomically replace the old one.
603 if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
605 (errcode_for_file_access(),
606 errmsg("could not rename pg_stat_statement file \"%s\": %m",
607 PGSS_DUMP_FILE ".tmp")));
613 (errcode_for_file_access(),
614 errmsg("could not write pg_stat_statement file \"%s\": %m",
615 PGSS_DUMP_FILE ".tmp")));
618 unlink(PGSS_DUMP_FILE ".tmp");
622 * Post-parse-analysis hook: mark query with a queryId
625 pgss_post_parse_analyze(ParseState *pstate, Query *query)
627 pgssJumbleState jstate;
629 /* Assert we didn't do this already */
630 Assert(query->queryId == 0);
632 /* Safety check... */
633 if (!pgss || !pgss_hash)
637 * Utility statements get queryId zero. We do this even in cases where
638 * the statement contains an optimizable statement for which a queryId
639 * could be derived (such as EXPLAIN or DECLARE CURSOR). For such cases,
640 * runtime control will first go through ProcessUtility and then the
641 * executor, and we don't want the executor hooks to do anything, since we
642 * are already measuring the statement's costs at the utility level.
644 if (query->utilityStmt)
650 /* Set up workspace for query jumbling */
651 jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
652 jstate.jumble_len = 0;
653 jstate.clocations_buf_size = 32;
654 jstate.clocations = (pgssLocationLen *)
655 palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
656 jstate.clocations_count = 0;
658 /* Compute query ID and mark the Query node with it */
659 JumbleQuery(&jstate, query);
660 query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
663 * If we are unlucky enough to get a hash of zero, use 1 instead, to
664 * prevent confusion with the utility-statement case.
666 if (query->queryId == 0)
670 * If we were able to identify any ignorable constants, we immediately
671 * create a hash table entry for the query, so that we can record the
672 * normalized form of the query string. If there were no such constants,
673 * the normalized string would be the same as the query text anyway, so
674 * there's no need for an early entry.
676 if (jstate.clocations_count > 0)
677 pgss_store(pstate->p_sourcetext,
686 * ExecutorStart hook: start up tracking if needed
689 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
691 if (prev_ExecutorStart)
692 prev_ExecutorStart(queryDesc, eflags);
694 standard_ExecutorStart(queryDesc, eflags);
697 * If query has queryId zero, don't track it. This prevents double
698 * counting of optimizable statements that are directly contained in
699 * utility statements.
701 if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
704 * Set up to track total elapsed time in ExecutorRun. Make sure the
705 * space is allocated in the per-query context so it will go away at
708 if (queryDesc->totaltime == NULL)
710 MemoryContext oldcxt;
712 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
713 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
714 MemoryContextSwitchTo(oldcxt);
720 * ExecutorRun hook: all we need do is track nesting depth
723 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
728 if (prev_ExecutorRun)
729 prev_ExecutorRun(queryDesc, direction, count);
731 standard_ExecutorRun(queryDesc, direction, count);
743 * ExecutorFinish hook: all we need do is track nesting depth
746 pgss_ExecutorFinish(QueryDesc *queryDesc)
751 if (prev_ExecutorFinish)
752 prev_ExecutorFinish(queryDesc);
754 standard_ExecutorFinish(queryDesc);
766 * ExecutorEnd hook: store results if needed
769 pgss_ExecutorEnd(QueryDesc *queryDesc)
771 uint32 queryId = queryDesc->plannedstmt->queryId;
773 if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
776 * Make sure stats accumulation is done. (Note: it's okay if several
777 * levels of hook all do this.)
779 InstrEndLoop(queryDesc->totaltime);
781 pgss_store(queryDesc->sourceText,
783 queryDesc->totaltime->total * 1000.0, /* convert to msec */
784 queryDesc->estate->es_processed,
785 &queryDesc->totaltime->bufusage,
789 if (prev_ExecutorEnd)
790 prev_ExecutorEnd(queryDesc);
792 standard_ExecutorEnd(queryDesc);
796 * ProcessUtility hook
799 pgss_ProcessUtility(Node *parsetree, const char *queryString,
800 ParamListInfo params, bool isTopLevel,
801 DestReceiver *dest, char *completionTag)
804 * If it's an EXECUTE statement, we don't track it and don't increment the
805 * nesting level. This allows the cycles to be charged to the underlying
806 * PREPARE instead (by the Executor hooks), which is much more useful.
808 * We also don't track execution of PREPARE. If we did, we would get one
809 * hash table entry for the PREPARE (with hash calculated from the query
810 * string), and then a different one with the same query string (but hash
811 * calculated from the query tree) would be used to accumulate costs of
812 * ensuing EXECUTEs. This would be confusing, and inconsistent with other
813 * cases where planning time is not included at all.
815 if (pgss_track_utility && pgss_enabled() &&
816 !IsA(parsetree, ExecuteStmt) &&
817 !IsA(parsetree, PrepareStmt))
822 BufferUsage bufusage_start,
826 bufusage_start = pgBufferUsage;
827 INSTR_TIME_SET_CURRENT(start);
832 if (prev_ProcessUtility)
833 prev_ProcessUtility(parsetree, queryString, params,
834 isTopLevel, dest, completionTag);
836 standard_ProcessUtility(parsetree, queryString, params,
837 isTopLevel, dest, completionTag);
847 INSTR_TIME_SET_CURRENT(duration);
848 INSTR_TIME_SUBTRACT(duration, start);
850 /* parse command tag to retrieve the number of affected rows. */
852 sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1)
855 /* calc differences of buffer counters. */
856 bufusage.shared_blks_hit =
857 pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
858 bufusage.shared_blks_read =
859 pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
860 bufusage.shared_blks_dirtied =
861 pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
862 bufusage.shared_blks_written =
863 pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
864 bufusage.local_blks_hit =
865 pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
866 bufusage.local_blks_read =
867 pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
868 bufusage.local_blks_dirtied =
869 pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
870 bufusage.local_blks_written =
871 pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
872 bufusage.temp_blks_read =
873 pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
874 bufusage.temp_blks_written =
875 pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
876 bufusage.blk_read_time = pgBufferUsage.blk_read_time;
877 INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
878 bufusage.blk_write_time = pgBufferUsage.blk_write_time;
879 INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
881 /* For utility statements, we just hash the query string directly */
882 queryId = pgss_hash_string(queryString);
884 pgss_store(queryString,
886 INSTR_TIME_GET_MILLISEC(duration),
893 if (prev_ProcessUtility)
894 prev_ProcessUtility(parsetree, queryString, params,
895 isTopLevel, dest, completionTag);
897 standard_ProcessUtility(parsetree, queryString, params,
898 isTopLevel, dest, completionTag);
903 * Calculate hash value for a key
906 pgss_hash_fn(const void *key, Size keysize)
908 const pgssHashKey *k = (const pgssHashKey *) key;
910 /* we don't bother to include encoding in the hash */
911 return hash_uint32((uint32) k->userid) ^
912 hash_uint32((uint32) k->dbid) ^
913 hash_uint32((uint32) k->queryid);
917 * Compare two keys - zero means match
920 pgss_match_fn(const void *key1, const void *key2, Size keysize)
922 const pgssHashKey *k1 = (const pgssHashKey *) key1;
923 const pgssHashKey *k2 = (const pgssHashKey *) key2;
925 if (k1->userid == k2->userid &&
926 k1->dbid == k2->dbid &&
927 k1->encoding == k2->encoding &&
928 k1->queryid == k2->queryid)
935 * Given an arbitrarily long query string, produce a hash for the purposes of
936 * identifying the query, without normalizing constants. Used when hashing
937 * utility statements.
940 pgss_hash_string(const char *str)
942 return hash_any((const unsigned char *) str, strlen(str));
946 * Store some statistics for a statement.
948 * If jstate is not NULL then we're trying to create an entry for which
949 * we have no statistics as yet; we just want to record the normalized
950 * query string. total_time, rows, bufusage are ignored in this case.
953 pgss_store(const char *query, uint32 queryId,
954 double total_time, uint64 rows,
955 const BufferUsage *bufusage,
956 pgssJumbleState *jstate)
960 char *norm_query = NULL;
962 Assert(query != NULL);
964 /* Safety check... */
965 if (!pgss || !pgss_hash)
968 /* Set up key for hashtable search */
969 key.userid = GetUserId();
970 key.dbid = MyDatabaseId;
971 key.encoding = GetDatabaseEncoding();
972 key.queryid = queryId;
974 /* Lookup the hash table entry with shared lock. */
975 LWLockAcquire(pgss->lock, LW_SHARED);
977 entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
979 /* Create new entry, if not present */
985 * We'll need exclusive lock to make a new entry. There is no point
986 * in holding shared lock while we normalize the string, though.
988 LWLockRelease(pgss->lock);
990 query_len = strlen(query);
994 /* Normalize the string if enabled */
995 norm_query = generate_normalized_query(jstate, query,
999 /* Acquire exclusive lock as required by entry_alloc() */
1000 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1002 entry = entry_alloc(&key, norm_query, query_len, true);
1007 * We're just going to store the query string as-is; but we have
1008 * to truncate it if over-length.
1010 if (query_len >= pgss->query_size)
1011 query_len = pg_encoding_mbcliplen(key.encoding,
1014 pgss->query_size - 1);
1016 /* Acquire exclusive lock as required by entry_alloc() */
1017 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1019 entry = entry_alloc(&key, query, query_len, false);
1023 /* Increment the counts, except when jstate is not NULL */
1027 * Grab the spinlock while updating the counters (see comment about
1028 * locking rules at the head of the file)
1030 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1032 SpinLockAcquire(&e->mutex);
1034 /* "Unstick" entry if it was previously sticky */
1035 if (e->counters.calls == 0)
1036 e->counters.usage = USAGE_INIT;
1038 e->counters.calls += 1;
1039 e->counters.total_time += total_time;
1040 e->counters.rows += rows;
1041 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1042 e->counters.shared_blks_read += bufusage->shared_blks_read;
1043 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1044 e->counters.shared_blks_written += bufusage->shared_blks_written;
1045 e->counters.local_blks_hit += bufusage->local_blks_hit;
1046 e->counters.local_blks_read += bufusage->local_blks_read;
1047 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1048 e->counters.local_blks_written += bufusage->local_blks_written;
1049 e->counters.temp_blks_read += bufusage->temp_blks_read;
1050 e->counters.temp_blks_written += bufusage->temp_blks_written;
1051 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1052 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1053 e->counters.usage += USAGE_EXEC(total_time);
1055 SpinLockRelease(&e->mutex);
1058 LWLockRelease(pgss->lock);
1060 /* We postpone this pfree until we're out of the lock */
1066 * Reset all statement statistics.
1069 pg_stat_statements_reset(PG_FUNCTION_ARGS)
1071 if (!pgss || !pgss_hash)
1073 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1074 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1079 #define PG_STAT_STATEMENTS_COLS_V1_0 14
1080 #define PG_STAT_STATEMENTS_COLS 18
1083 * Retrieve statement statistics.
1086 pg_stat_statements(PG_FUNCTION_ARGS)
1088 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1090 Tuplestorestate *tupstore;
1091 MemoryContext per_query_ctx;
1092 MemoryContext oldcontext;
1093 Oid userid = GetUserId();
1094 bool is_superuser = superuser();
1095 HASH_SEQ_STATUS hash_seq;
1097 bool sql_supports_v1_1_counters = true;
1099 if (!pgss || !pgss_hash)
1101 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1102 errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1104 /* check to see if caller supports us returning a tuplestore */
1105 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1107 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1108 errmsg("set-valued function called in context that cannot accept a set")));
1109 if (!(rsinfo->allowedModes & SFRM_Materialize))
1111 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1112 errmsg("materialize mode required, but it is not " \
1113 "allowed in this context")));
1115 /* Build a tuple descriptor for our result type */
1116 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1117 elog(ERROR, "return type must be a row type");
1118 if (tupdesc->natts == PG_STAT_STATEMENTS_COLS_V1_0)
1119 sql_supports_v1_1_counters = false;
1121 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1122 oldcontext = MemoryContextSwitchTo(per_query_ctx);
1124 tupstore = tuplestore_begin_heap(true, false, work_mem);
1125 rsinfo->returnMode = SFRM_Materialize;
1126 rsinfo->setResult = tupstore;
1127 rsinfo->setDesc = tupdesc;
1129 MemoryContextSwitchTo(oldcontext);
1131 LWLockAcquire(pgss->lock, LW_SHARED);
1133 hash_seq_init(&hash_seq, pgss_hash);
1134 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1136 Datum values[PG_STAT_STATEMENTS_COLS];
1137 bool nulls[PG_STAT_STATEMENTS_COLS];
1141 memset(values, 0, sizeof(values));
1142 memset(nulls, 0, sizeof(nulls));
1144 values[i++] = ObjectIdGetDatum(entry->key.userid);
1145 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1147 if (is_superuser || entry->key.userid == userid)
1152 pg_do_encoding_conversion((unsigned char *) entry->query,
1154 entry->key.encoding,
1155 GetDatabaseEncoding());
1156 values[i++] = CStringGetTextDatum(qstr);
1157 if (qstr != entry->query)
1161 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1163 /* copy counters to a local variable to keep locking time short */
1165 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1167 SpinLockAcquire(&e->mutex);
1169 SpinLockRelease(&e->mutex);
1172 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1176 values[i++] = Int64GetDatumFast(tmp.calls);
1177 values[i++] = Float8GetDatumFast(tmp.total_time);
1178 values[i++] = Int64GetDatumFast(tmp.rows);
1179 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1180 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1181 if (sql_supports_v1_1_counters)
1182 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1183 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1184 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1185 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1186 if (sql_supports_v1_1_counters)
1187 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1188 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1189 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1190 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1191 if (sql_supports_v1_1_counters)
1193 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1194 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1197 Assert(i == (sql_supports_v1_1_counters ?
1198 PG_STAT_STATEMENTS_COLS : PG_STAT_STATEMENTS_COLS_V1_0));
1200 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1203 LWLockRelease(pgss->lock);
1205 /* clean up and return the tuplestore */
1206 tuplestore_donestoring(tupstore);
1212 * Estimate shared memory space needed.
1220 size = MAXALIGN(sizeof(pgssSharedState));
1221 entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
1222 size = add_size(size, hash_estimate_size(pgss_max, entrysize));
1228 * Allocate a new hashtable entry.
1229 * caller must hold an exclusive lock on pgss->lock
1231 * "query" need not be null-terminated; we rely on query_len instead
1233 * If "sticky" is true, make the new entry artificially sticky so that it will
1234 * probably still be there when the query finishes execution. We do this by
1235 * giving it a median usage value rather than the normal value. (Strictly
1236 * speaking, query strings are normalized on a best effort basis, though it
1237 * would be difficult to demonstrate this even under artificial conditions.)
1239 * Note: despite needing exclusive lock, it's not an error for the target
1240 * entry to already exist. This is because pgss_store releases and
1241 * reacquires lock after failing to find a match; so someone else could
1242 * have made the entry while we waited to get exclusive lock.
1245 entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
1250 /* Make space if needed */
1251 while (hash_get_num_entries(pgss_hash) >= pgss_max)
1254 /* Find or create an entry with desired hash code */
1255 entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1259 /* New entry, initialize it */
1261 /* reset the statistics */
1262 memset(&entry->counters, 0, sizeof(Counters));
1263 /* set the appropriate initial usage count */
1264 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1265 /* re-initialize the mutex each time ... we assume no one using it */
1266 SpinLockInit(&entry->mutex);
1267 /* ... and don't forget the query text */
1268 Assert(query_len >= 0 && query_len < pgss->query_size);
1269 entry->query_len = query_len;
1270 memcpy(entry->query, query, query_len);
1271 entry->query[query_len] = '\0';
1278 * qsort comparator for sorting into increasing usage order
1281 entry_cmp(const void *lhs, const void *rhs)
1283 double l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1284 double r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1286 if (l_usage < r_usage)
1288 else if (l_usage > r_usage)
1295 * Deallocate least used entries.
1296 * Caller must hold an exclusive lock on pgss->lock.
1301 HASH_SEQ_STATUS hash_seq;
1302 pgssEntry **entries;
1308 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1309 * While we're scanning the table, apply the decay factor to the usage
1313 entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1316 hash_seq_init(&hash_seq, pgss_hash);
1317 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1319 entries[i++] = entry;
1320 /* "Sticky" entries get a different usage decay rate. */
1321 if (entry->counters.calls == 0)
1322 entry->counters.usage *= STICKY_DECREASE_FACTOR;
1324 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1327 qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1329 /* Also, record the (approximate) median usage */
1331 pgss->cur_median_usage = entries[i / 2]->counters.usage;
1333 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1334 nvictims = Min(nvictims, i);
1336 for (i = 0; i < nvictims; i++)
1338 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1345 * Release all entries.
1350 HASH_SEQ_STATUS hash_seq;
1353 LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1355 hash_seq_init(&hash_seq, pgss_hash);
1356 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1358 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
1361 LWLockRelease(pgss->lock);
1366 * AppendJumble: Append a value that is substantive in a given query to
1367 * the current jumble.
1370 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
1372 unsigned char *jumble = jstate->jumble;
1373 Size jumble_len = jstate->jumble_len;
1376 * Whenever the jumble buffer is full, we hash the current contents and
1377 * reset the buffer to contain just that hash value, thus relying on the
1378 * hash to summarize everything so far.
1384 if (jumble_len >= JUMBLE_SIZE)
1386 uint32 start_hash = hash_any(jumble, JUMBLE_SIZE);
1388 memcpy(jumble, &start_hash, sizeof(start_hash));
1389 jumble_len = sizeof(start_hash);
1391 part_size = Min(size, JUMBLE_SIZE - jumble_len);
1392 memcpy(jumble + jumble_len, item, part_size);
1393 jumble_len += part_size;
1397 jstate->jumble_len = jumble_len;
1401 * Wrappers around AppendJumble to encapsulate details of serialization
1402 * of individual local variable elements.
1404 #define APP_JUMB(item) \
1405 AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
1406 #define APP_JUMB_STRING(str) \
1407 AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
1410 * JumbleQuery: Selectively serialize the query tree, appending significant
1411 * data to the "query jumble" while ignoring nonsignificant data.
1413 * Rule of thumb for what to include is that we should ignore anything not
1414 * semantically significant (such as alias names) as well as anything that can
1415 * be deduced from child nodes (else we'd just be double-hashing that piece
1419 JumbleQuery(pgssJumbleState *jstate, Query *query)
1421 Assert(IsA(query, Query));
1422 Assert(query->utilityStmt == NULL);
1424 APP_JUMB(query->commandType);
1425 /* resultRelation is usually predictable from commandType */
1426 JumbleExpr(jstate, (Node *) query->cteList);
1427 JumbleRangeTable(jstate, query->rtable);
1428 JumbleExpr(jstate, (Node *) query->jointree);
1429 JumbleExpr(jstate, (Node *) query->targetList);
1430 JumbleExpr(jstate, (Node *) query->returningList);
1431 JumbleExpr(jstate, (Node *) query->groupClause);
1432 JumbleExpr(jstate, query->havingQual);
1433 JumbleExpr(jstate, (Node *) query->windowClause);
1434 JumbleExpr(jstate, (Node *) query->distinctClause);
1435 JumbleExpr(jstate, (Node *) query->sortClause);
1436 JumbleExpr(jstate, query->limitOffset);
1437 JumbleExpr(jstate, query->limitCount);
1438 /* we ignore rowMarks */
1439 JumbleExpr(jstate, query->setOperations);
1443 * Jumble a range table
1446 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
1452 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1454 Assert(IsA(rte, RangeTblEntry));
1455 APP_JUMB(rte->rtekind);
1456 switch (rte->rtekind)
1459 APP_JUMB(rte->relid);
1462 JumbleQuery(jstate, rte->subquery);
1465 APP_JUMB(rte->jointype);
1468 JumbleExpr(jstate, rte->funcexpr);
1471 JumbleExpr(jstate, (Node *) rte->values_lists);
1476 * Depending on the CTE name here isn't ideal, but it's the
1477 * only info we have to identify the referenced WITH item.
1479 APP_JUMB_STRING(rte->ctename);
1480 APP_JUMB(rte->ctelevelsup);
1483 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
1490 * Jumble an expression tree
1492 * In general this function should handle all the same node types that
1493 * expression_tree_walker() does, and therefore it's coded to be as parallel
1494 * to that function as possible. However, since we are only invoked on
1495 * queries immediately post-parse-analysis, we need not handle node types
1496 * that only appear in planning.
1498 * Note: the reason we don't simply use expression_tree_walker() is that the
1499 * point of that function is to support tree walkers that don't care about
1500 * most tree node types, but here we care about all types. We should complain
1501 * about any unrecognized node type.
1504 JumbleExpr(pgssJumbleState *jstate, Node *node)
1511 /* Guard against stack overflow due to overly complex expressions */
1512 check_stack_depth();
1515 * We always emit the node's NodeTag, then any additional fields that are
1516 * considered significant, and then we recurse to any child nodes.
1518 APP_JUMB(node->type);
1520 switch (nodeTag(node))
1524 Var *var = (Var *) node;
1526 APP_JUMB(var->varno);
1527 APP_JUMB(var->varattno);
1528 APP_JUMB(var->varlevelsup);
1533 Const *c = (Const *) node;
1535 /* We jumble only the constant's type, not its value */
1536 APP_JUMB(c->consttype);
1537 /* Also, record its parse location for query normalization */
1538 RecordConstLocation(jstate, c->location);
1543 Param *p = (Param *) node;
1545 APP_JUMB(p->paramkind);
1546 APP_JUMB(p->paramid);
1547 APP_JUMB(p->paramtype);
1552 Aggref *expr = (Aggref *) node;
1554 APP_JUMB(expr->aggfnoid);
1555 JumbleExpr(jstate, (Node *) expr->args);
1556 JumbleExpr(jstate, (Node *) expr->aggorder);
1557 JumbleExpr(jstate, (Node *) expr->aggdistinct);
1562 WindowFunc *expr = (WindowFunc *) node;
1564 APP_JUMB(expr->winfnoid);
1565 APP_JUMB(expr->winref);
1566 JumbleExpr(jstate, (Node *) expr->args);
1571 ArrayRef *aref = (ArrayRef *) node;
1573 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
1574 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
1575 JumbleExpr(jstate, (Node *) aref->refexpr);
1576 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
1581 FuncExpr *expr = (FuncExpr *) node;
1583 APP_JUMB(expr->funcid);
1584 JumbleExpr(jstate, (Node *) expr->args);
1587 case T_NamedArgExpr:
1589 NamedArgExpr *nae = (NamedArgExpr *) node;
1591 APP_JUMB(nae->argnumber);
1592 JumbleExpr(jstate, (Node *) nae->arg);
1596 case T_DistinctExpr: /* struct-equivalent to OpExpr */
1597 case T_NullIfExpr: /* struct-equivalent to OpExpr */
1599 OpExpr *expr = (OpExpr *) node;
1601 APP_JUMB(expr->opno);
1602 JumbleExpr(jstate, (Node *) expr->args);
1605 case T_ScalarArrayOpExpr:
1607 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
1609 APP_JUMB(expr->opno);
1610 APP_JUMB(expr->useOr);
1611 JumbleExpr(jstate, (Node *) expr->args);
1616 BoolExpr *expr = (BoolExpr *) node;
1618 APP_JUMB(expr->boolop);
1619 JumbleExpr(jstate, (Node *) expr->args);
1624 SubLink *sublink = (SubLink *) node;
1626 APP_JUMB(sublink->subLinkType);
1627 JumbleExpr(jstate, (Node *) sublink->testexpr);
1628 JumbleQuery(jstate, (Query *) sublink->subselect);
1633 FieldSelect *fs = (FieldSelect *) node;
1635 APP_JUMB(fs->fieldnum);
1636 JumbleExpr(jstate, (Node *) fs->arg);
1641 FieldStore *fstore = (FieldStore *) node;
1643 JumbleExpr(jstate, (Node *) fstore->arg);
1644 JumbleExpr(jstate, (Node *) fstore->newvals);
1649 RelabelType *rt = (RelabelType *) node;
1651 APP_JUMB(rt->resulttype);
1652 JumbleExpr(jstate, (Node *) rt->arg);
1657 CoerceViaIO *cio = (CoerceViaIO *) node;
1659 APP_JUMB(cio->resulttype);
1660 JumbleExpr(jstate, (Node *) cio->arg);
1663 case T_ArrayCoerceExpr:
1665 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
1667 APP_JUMB(acexpr->resulttype);
1668 JumbleExpr(jstate, (Node *) acexpr->arg);
1671 case T_ConvertRowtypeExpr:
1673 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
1675 APP_JUMB(crexpr->resulttype);
1676 JumbleExpr(jstate, (Node *) crexpr->arg);
1681 CollateExpr *ce = (CollateExpr *) node;
1683 APP_JUMB(ce->collOid);
1684 JumbleExpr(jstate, (Node *) ce->arg);
1689 CaseExpr *caseexpr = (CaseExpr *) node;
1691 JumbleExpr(jstate, (Node *) caseexpr->arg);
1692 foreach(temp, caseexpr->args)
1694 CaseWhen *when = (CaseWhen *) lfirst(temp);
1696 Assert(IsA(when, CaseWhen));
1697 JumbleExpr(jstate, (Node *) when->expr);
1698 JumbleExpr(jstate, (Node *) when->result);
1700 JumbleExpr(jstate, (Node *) caseexpr->defresult);
1703 case T_CaseTestExpr:
1705 CaseTestExpr *ct = (CaseTestExpr *) node;
1707 APP_JUMB(ct->typeId);
1711 JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
1714 JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
1716 case T_RowCompareExpr:
1718 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
1720 APP_JUMB(rcexpr->rctype);
1721 JumbleExpr(jstate, (Node *) rcexpr->largs);
1722 JumbleExpr(jstate, (Node *) rcexpr->rargs);
1725 case T_CoalesceExpr:
1726 JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
1730 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
1732 APP_JUMB(mmexpr->op);
1733 JumbleExpr(jstate, (Node *) mmexpr->args);
1738 XmlExpr *xexpr = (XmlExpr *) node;
1740 APP_JUMB(xexpr->op);
1741 JumbleExpr(jstate, (Node *) xexpr->named_args);
1742 JumbleExpr(jstate, (Node *) xexpr->args);
1747 NullTest *nt = (NullTest *) node;
1749 APP_JUMB(nt->nulltesttype);
1750 JumbleExpr(jstate, (Node *) nt->arg);
1755 BooleanTest *bt = (BooleanTest *) node;
1757 APP_JUMB(bt->booltesttype);
1758 JumbleExpr(jstate, (Node *) bt->arg);
1761 case T_CoerceToDomain:
1763 CoerceToDomain *cd = (CoerceToDomain *) node;
1765 APP_JUMB(cd->resulttype);
1766 JumbleExpr(jstate, (Node *) cd->arg);
1769 case T_CoerceToDomainValue:
1771 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
1773 APP_JUMB(cdv->typeId);
1776 case T_SetToDefault:
1778 SetToDefault *sd = (SetToDefault *) node;
1780 APP_JUMB(sd->typeId);
1783 case T_CurrentOfExpr:
1785 CurrentOfExpr *ce = (CurrentOfExpr *) node;
1787 APP_JUMB(ce->cvarno);
1788 if (ce->cursor_name)
1789 APP_JUMB_STRING(ce->cursor_name);
1790 APP_JUMB(ce->cursor_param);
1795 TargetEntry *tle = (TargetEntry *) node;
1797 APP_JUMB(tle->resno);
1798 APP_JUMB(tle->ressortgroupref);
1799 JumbleExpr(jstate, (Node *) tle->expr);
1804 RangeTblRef *rtr = (RangeTblRef *) node;
1806 APP_JUMB(rtr->rtindex);
1811 JoinExpr *join = (JoinExpr *) node;
1813 APP_JUMB(join->jointype);
1814 APP_JUMB(join->isNatural);
1815 APP_JUMB(join->rtindex);
1816 JumbleExpr(jstate, join->larg);
1817 JumbleExpr(jstate, join->rarg);
1818 JumbleExpr(jstate, join->quals);
1823 FromExpr *from = (FromExpr *) node;
1825 JumbleExpr(jstate, (Node *) from->fromlist);
1826 JumbleExpr(jstate, from->quals);
1830 foreach(temp, (List *) node)
1832 JumbleExpr(jstate, (Node *) lfirst(temp));
1835 case T_SortGroupClause:
1837 SortGroupClause *sgc = (SortGroupClause *) node;
1839 APP_JUMB(sgc->tleSortGroupRef);
1840 APP_JUMB(sgc->eqop);
1841 APP_JUMB(sgc->sortop);
1842 APP_JUMB(sgc->nulls_first);
1845 case T_WindowClause:
1847 WindowClause *wc = (WindowClause *) node;
1849 APP_JUMB(wc->winref);
1850 APP_JUMB(wc->frameOptions);
1851 JumbleExpr(jstate, (Node *) wc->partitionClause);
1852 JumbleExpr(jstate, (Node *) wc->orderClause);
1853 JumbleExpr(jstate, wc->startOffset);
1854 JumbleExpr(jstate, wc->endOffset);
1857 case T_CommonTableExpr:
1859 CommonTableExpr *cte = (CommonTableExpr *) node;
1861 /* we store the string name because RTE_CTE RTEs need it */
1862 APP_JUMB_STRING(cte->ctename);
1863 JumbleQuery(jstate, (Query *) cte->ctequery);
1866 case T_SetOperationStmt:
1868 SetOperationStmt *setop = (SetOperationStmt *) node;
1870 APP_JUMB(setop->op);
1871 APP_JUMB(setop->all);
1872 JumbleExpr(jstate, setop->larg);
1873 JumbleExpr(jstate, setop->rarg);
1877 /* Only a warning, since we can stumble along anyway */
1878 elog(WARNING, "unrecognized node type: %d",
1879 (int) nodeTag(node));
1885 * Record location of constant within query string of query tree
1886 * that is currently being walked.
1889 RecordConstLocation(pgssJumbleState *jstate, int location)
1891 /* -1 indicates unknown or undefined location */
1894 /* enlarge array if needed */
1895 if (jstate->clocations_count >= jstate->clocations_buf_size)
1897 jstate->clocations_buf_size *= 2;
1898 jstate->clocations = (pgssLocationLen *)
1899 repalloc(jstate->clocations,
1900 jstate->clocations_buf_size *
1901 sizeof(pgssLocationLen));
1903 jstate->clocations[jstate->clocations_count].location = location;
1904 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
1905 jstate->clocations[jstate->clocations_count].length = -1;
1906 jstate->clocations_count++;
1911 * Generate a normalized version of the query string that will be used to
1912 * represent all similar queries.
1914 * Note that the normalized representation may well vary depending on
1915 * just which "equivalent" query is used to create the hashtable entry.
1916 * We assume this is OK.
1918 * *query_len_p contains the input string length, and is updated with
1919 * the result string length (which cannot be longer) on exit.
1921 * Returns a palloc'd string, which is not necessarily null-terminated.
1924 generate_normalized_query(pgssJumbleState *jstate, const char *query,
1925 int *query_len_p, int encoding)
1928 int query_len = *query_len_p;
1931 len_to_wrt, /* Length (in bytes) to write */
1932 quer_loc = 0, /* Source query byte location */
1933 n_quer_loc = 0, /* Normalized query byte location */
1934 last_off = 0, /* Offset from start for previous tok */
1935 last_tok_len = 0; /* Length (in bytes) of that tok */
1938 * Get constants' lengths (core system only gives us locations). Note
1939 * this also ensures the items are sorted by location.
1941 fill_in_constant_lengths(jstate, query);
1943 /* Allocate result buffer, ensuring we limit result to allowed size */
1945 max_output_len = Min(query_len, pgss->query_size - 1);
1947 /* XXX: pg_hint_plan doesn't truncate query string. */
1948 max_output_len = query_len;
1949 norm_query = palloc(max_output_len);
1951 for (i = 0; i < jstate->clocations_count; i++)
1953 int off, /* Offset from start for cur tok */
1954 tok_len; /* Length (in bytes) of that tok */
1956 off = jstate->clocations[i].location;
1957 tok_len = jstate->clocations[i].length;
1960 continue; /* ignore any duplicates */
1962 /* Copy next chunk, or as much as will fit */
1963 len_to_wrt = off - last_off;
1964 len_to_wrt -= last_tok_len;
1965 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1967 Assert(len_to_wrt >= 0);
1968 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1969 n_quer_loc += len_to_wrt;
1971 if (n_quer_loc < max_output_len)
1972 norm_query[n_quer_loc++] = '?';
1974 quer_loc = off + tok_len;
1976 last_tok_len = tok_len;
1978 /* If we run out of space, might as well stop iterating */
1979 if (n_quer_loc >= max_output_len)
1984 * We've copied up until the last ignorable constant. Copy over the
1985 * remaining bytes of the original query string, or at least as much as
1988 len_to_wrt = query_len - quer_loc;
1989 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1991 Assert(len_to_wrt >= 0);
1992 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1993 n_quer_loc += len_to_wrt;
1995 /* XXX: pg_hint_plan doesn't truncate query string. */
1998 * If we ran out of space, we need to do an encoding-aware truncation,
1999 * just to make sure we don't have an incomplete character at the end.
2001 if (n_quer_loc >= max_output_len)
2002 query_len = pg_encoding_mbcliplen(encoding,
2005 pgss->query_size - 1);
2008 query_len = n_quer_loc;
2010 *query_len_p = query_len;
2015 * Given a valid SQL string and an array of constant-location records,
2016 * fill in the textual lengths of those constants.
2018 * The constants may use any allowed constant syntax, such as float literals,
2019 * bit-strings, single-quoted strings and dollar-quoted strings. This is
2020 * accomplished by using the public API for the core scanner.
2022 * It is the caller's job to ensure that the string is a valid SQL statement
2023 * with constants at the indicated locations. Since in practice the string
2024 * has already been parsed, and the locations that the caller provides will
2025 * have originated from within the authoritative parser, this should not be
2028 * Duplicate constant pointers are possible, and will have their lengths
2029 * marked as '-1', so that they are later ignored. (Actually, we assume the
2030 * lengths were initialized as -1 to start with, and don't change them here.)
2032 * N.B. There is an assumption that a '-' character at a Const location begins
2033 * a negative numeric constant. This precludes there ever being another
2034 * reason for a constant to start with a '-'.
2037 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
2039 pgssLocationLen *locs;
2040 core_yyscan_t yyscanner;
2041 core_yy_extra_type yyextra;
2042 core_YYSTYPE yylval;
2048 * Sort the records by location so that we can process them in order while
2049 * scanning the query text.
2051 if (jstate->clocations_count > 1)
2052 qsort(jstate->clocations, jstate->clocations_count,
2053 sizeof(pgssLocationLen), comp_location);
2054 locs = jstate->clocations;
2056 /* initialize the flex scanner --- should match raw_parser() */
2057 yyscanner = scanner_init(query,
2062 /* Search for each constant, in sequence */
2063 for (i = 0; i < jstate->clocations_count; i++)
2065 int loc = locs[i].location;
2070 if (loc <= last_loc)
2071 continue; /* Duplicate constant, ignore */
2073 /* Lex tokens until we find the desired constant */
2076 tok = core_yylex(&yylval, &yylloc, yyscanner);
2078 /* We should not hit end-of-string, but if we do, behave sanely */
2080 break; /* out of inner for-loop */
2083 * We should find the token position exactly, but if we somehow
2084 * run past it, work with that.
2088 if (query[loc] == '-')
2091 * It's a negative value - this is the one and only case
2092 * where we replace more than a single token.
2094 * Do not compensate for the core system's special-case
2095 * adjustment of location to that of the leading '-'
2096 * operator in the event of a negative constant. It is
2097 * also useful for our purposes to start from the minus
2098 * symbol. In this way, queries like "select * from foo
2099 * where bar = 1" and "select * from foo where bar = -2"
2100 * will have identical normalized query strings.
2102 tok = core_yylex(&yylval, &yylloc, yyscanner);
2104 break; /* out of inner for-loop */
2108 * We now rely on the assumption that flex has placed a zero
2109 * byte after the text of the current token in scanbuf.
2111 locs[i].length = strlen(yyextra.scanbuf + loc);
2112 break; /* out of inner for-loop */
2116 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2123 scanner_finish(yyscanner);
2127 * comp_location: comparator for qsorting pgssLocationLen structs by location
2130 comp_location(const void *a, const void *b)
2132 int l = ((const pgssLocationLen *) a)->location;
2133 int r = ((const pgssLocationLen *) b)->location;