1 /*-------------------------------------------------------------------------
4 * Take statistics of plan selection across a whole database cluster.
6 * Execution costs are totaled for each distinct plan for each query,
7 * and plan and queryid are kept in a shared hashtable, each record in
8 * which is associated with a record in pg_stat_statements, if any, by
11 * For Postgres 9.3 or earlier does not expose query id so
12 * pg_store_plans needs to calculate it based on the given query
13 * string using different algorithm from pg_stat_statements, and later
14 * the id will be matched against the one made from query string
15 * stored in pg_stat_statements. For the reason, queryid matching in
16 * this way will fail if the query string kept in pg_stat_statements
17 * is truncated in the middle.
19 * Plans are identified by fingerprinting plan representations in
20 * "shortened" JSON format with constants and unstable values such as
21 * rows, width, loops ignored. Nevertheless, stored plan entries hold
22 * them of the latest execution. Entry eviction is done in the same
23 * way to pg_stat_statements.
25 * Copyright (c) 2008-2020, PostgreSQL Global Development Group
26 * Copyright (c) 2012-2020, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
29 * pg_store_plans/pg_store_plans.c
31 *-------------------------------------------------------------------------
38 #include "commands/explain.h"
39 #include "access/hash.h"
40 #include "executor/instrument.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "storage/fd.h"
45 #include "storage/ipc.h"
46 #include "storage/lwlock.h"
47 #include "storage/spin.h"
48 #include "storage/shmem.h"
49 #include "tcop/utility.h"
50 #include "utils/builtins.h"
51 #include "utils/timestamp.h"
53 #include "pgsp_json.h"
54 #include "pgsp_explain.h"
58 /* Location of stats file */
59 #define PGSP_DUMP_FILE "global/pg_store_plans.stat"
61 /* This constant defines the magic number in the stats file header */
62 static const uint32 PGSP_FILE_HEADER = 0x20180613;
63 static const uint32 store_plan_size = 5000;
65 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
66 #define USAGE_EXEC(duration) (1.0)
67 #define USAGE_INIT (1.0) /* including initial planning */
68 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
69 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
70 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
71 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
74 * Hashtable key that defines the identity of a hashtable entry. We separate
75 * queries by user and by database even if they are otherwise identical.
77 * Presently, the query encoding is fully determined by the source database
78 * and so we don't really need it to be in the key. But that might not always
79 * be true. Anyway it's notationally convenient to pass it as part of the key.
81 typedef struct EntryKey
83 Oid userid; /* user OID */
84 Oid dbid; /* database OID */
85 int encoding; /* query encoding */
86 uint32 queryid; /* internal query identifier */
87 uint32 planid; /* plan identifier */
91 * The actual stats counters kept within StatEntry.
93 typedef struct Counters
95 int64 calls; /* # of times executed */
96 double total_time; /* total execution time, in msec */
97 int64 rows; /* total # of retrieved or affected rows */
98 int64 shared_blks_hit; /* # of shared buffer hits */
99 int64 shared_blks_read; /* # of shared disk blocks read */
100 int64 shared_blks_dirtied;/* # of shared disk blocks dirtied */
101 int64 shared_blks_written;/* # of shared disk blocks written */
102 int64 local_blks_hit; /* # of local buffer hits */
103 int64 local_blks_read; /* # of local disk blocks read */
104 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
105 int64 local_blks_written; /* # of local disk blocks written */
106 int64 temp_blks_read; /* # of temp blocks read */
107 int64 temp_blks_written; /* # of temp blocks written */
108 double blk_read_time; /* time spent reading, in msec */
109 double blk_write_time; /* time spent writing, in msec */
110 TimestampTz first_call; /* timestamp of first call */
111 TimestampTz last_call; /* timestamp of last call */
112 double usage; /* usage factor */
116 * The type of queryId has been widen as of PG11. Define substitute type rather
117 * than put #if here and there.
119 #if PG_VERSION_NUM >= 110000
120 typedef uint64 queryid_t;
122 typedef uint32 queryid_t;
126 * Statistics per plan
128 * NB: see the file read/write code before changing field order here.
130 typedef struct StatEntry
132 EntryKey key; /* hash key of entry - MUST BE FIRST */
133 queryid_t queryid; /* query identifier from stat_statements*/
134 Counters counters; /* the statistics for this query */
135 int plan_len; /* # of valid bytes in query string */
136 slock_t mutex; /* protects the counters only */
137 char plan[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
139 * Note: the allocated length of query[] is actually
140 * shared_state->query_size
145 * Global shared state
147 typedef struct SharedState
149 LWLockId lock; /* protects hashtable search/modification */
150 int plan_size; /* max query length in bytes */
151 double cur_median_usage; /* current median usage in hashtable */
154 /*---- Local variables ----*/
156 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
157 static int nested_level = 0;
159 /* Saved hook values in case of unload */
160 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
161 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
162 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
163 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
164 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
165 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
167 /* Links to shared memory state */
168 static SharedState *shared_state = NULL;
169 static HTAB *hash_table = NULL;
171 /*---- GUC variables ----*/
175 TRACK_LEVEL_NONE, /* track no statements */
176 TRACK_LEVEL_TOP, /* only top level statements */
177 TRACK_LEVEL_ALL, /* all statements, including nested ones */
178 TRACK_LEVEL_FORCE /* all statements, including nested ones */
181 static const struct config_enum_entry track_options[] =
183 {"none", TRACK_LEVEL_NONE, false},
184 {"top", TRACK_LEVEL_TOP, false},
185 {"all", TRACK_LEVEL_ALL, false},
191 PLAN_FORMAT_RAW, /* No conversion. Shorten JSON */
192 PLAN_FORMAT_TEXT, /* Traditional text representation */
193 PLAN_FORMAT_JSON, /* JSON representation */
194 PLAN_FORMAT_YAML, /* YAML */
195 PLAN_FORMAT_XML, /* XML */
198 static const struct config_enum_entry plan_formats[] =
200 {"raw" , PLAN_FORMAT_RAW , false},
201 {"text", PLAN_FORMAT_TEXT, false},
202 {"json", PLAN_FORMAT_JSON, false},
203 {"yaml", PLAN_FORMAT_YAML, false},
204 {"xml" , PLAN_FORMAT_XML , false},
208 static int store_size; /* max # statements to track */
209 static int track_level; /* tracking level */
210 static int min_duration; /* min duration to record */
211 static bool dump_on_shutdown; /* whether to save stats across shutdown */
212 static bool log_analyze; /* Similar to EXPLAIN (ANALYZE *) */
213 static bool log_verbose; /* Similar to EXPLAIN (VERBOSE *) */
214 static bool log_buffers; /* Similar to EXPLAIN (BUFFERS *) */
215 static bool log_timing; /* Similar to EXPLAIN (TIMING *) */
216 static bool log_triggers; /* whether to log trigger statistics */
217 static int plan_format; /* Plan representation style in
218 * pg_store_plans.plan */
220 #define pgsp_enabled() \
221 (track_level == TRACK_LEVEL_ALL || \
222 (track_level == TRACK_LEVEL_TOP && nested_level == 0))
224 /*---- Function declarations ----*/
229 Datum pg_store_plans_reset(PG_FUNCTION_ARGS);
230 Datum pg_store_plans_hash_query(PG_FUNCTION_ARGS);
231 Datum pg_store_plans(PG_FUNCTION_ARGS);
232 Datum pg_store_plans_shorten(PG_FUNCTION_ARGS);
233 Datum pg_store_plans_normalize(PG_FUNCTION_ARGS);
234 Datum pg_store_plans_jsonplan(PG_FUNCTION_ARGS);
235 Datum pg_store_plans_yamlplan(PG_FUNCTION_ARGS);
236 Datum pg_store_plans_xmlplan(PG_FUNCTION_ARGS);
237 Datum pg_store_plans_textplan(PG_FUNCTION_ARGS);
239 PG_FUNCTION_INFO_V1(pg_store_plans_reset);
240 PG_FUNCTION_INFO_V1(pg_store_plans_hash_query);
241 PG_FUNCTION_INFO_V1(pg_store_plans);
242 PG_FUNCTION_INFO_V1(pg_store_plans_shorten);
243 PG_FUNCTION_INFO_V1(pg_store_plans_normalize);
244 PG_FUNCTION_INFO_V1(pg_store_plans_jsonplan);
245 PG_FUNCTION_INFO_V1(pg_store_plans_textplan);
246 PG_FUNCTION_INFO_V1(pg_store_plans_yamlplan);
247 PG_FUNCTION_INFO_V1(pg_store_plans_xmlplan);
249 static void pgsp_shmem_startup(void);
250 static void pgsp_shmem_shutdown(int code, Datum arg);
251 static void pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags);
252 #if PG_VERSION_NUM >= 100000
253 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
254 ScanDirection direction,
255 uint64 count, bool execute_once);
256 #elif PG_VERSION_NUM >= 90600
257 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
258 ScanDirection direction,
261 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
262 ScanDirection direction,
265 static void pgsp_ExecutorFinish(QueryDesc *queryDesc);
266 static void pgsp_ExecutorEnd(QueryDesc *queryDesc);
267 #if PG_VERSION_NUM >= 100000
268 static void pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
269 ProcessUtilityContext context, ParamListInfo params,
270 QueryEnvironment *queryEnv,
271 DestReceiver *dest, char *completionTag);
273 static void pgsp_ProcessUtility(Node *parsetree, const char *queryString,
274 ProcessUtilityContext context, ParamListInfo params,
275 DestReceiver *dest, char *completionTag);
277 static uint32 hash_table_fn(const void *key, Size keysize);
278 static int match_fn(const void *key1, const void *key2, Size keysize);
279 static uint32 hash_query(const char* query);
280 static void store_entry(char *plan, uint32 queryId, queryid_t queryId_pgss,
281 double total_time, uint64 rows,
282 const BufferUsage *bufusage);
283 static Size shared_mem_size(void);
284 static StatEntry *entry_alloc(EntryKey *key, const char *query,
285 int plan_len, bool sticky);
286 static void entry_dealloc(void);
287 static void entry_reset(void);
290 * Module load callback
296 * In order to create our shared memory area, we have to be loaded via
297 * shared_preload_libraries. If not, fall out without hooking into any of
298 * the main system. (We don't throw error here because it seems useful to
299 * allow the pg_stat_statements functions to be created even when the
300 * module isn't active. The functions must protect themselves against
301 * being called then, however.)
303 if (!process_shared_preload_libraries_in_progress)
307 * Define (or redefine) custom GUC variables.
309 DefineCustomIntVariable("pg_store_plans.max",
310 "Sets the maximum number of plans tracked by pg_store_plans.",
322 DefineCustomEnumVariable("pg_store_plans.track",
323 "Selects which plans are tracked by pg_store_plans.",
334 DefineCustomEnumVariable("pg_store_plans.plan_format",
335 "Selects which format to be appied for plan representation in pg_store_plans.",
346 DefineCustomIntVariable("pg_store_plans.min_duration",
347 "Minimum duration to record plan in milliseconds.",
359 DefineCustomBoolVariable("pg_store_plans.save",
360 "Save pg_store_plans statistics across server shutdowns.",
370 DefineCustomBoolVariable("pg_store_plans.log_analyze",
371 "Use EXPLAIN ANALYZE for plan logging.",
381 DefineCustomBoolVariable("pg_store_plans.log_buffers",
392 DefineCustomBoolVariable("pg_store_plans.log_timing",
403 DefineCustomBoolVariable("pg_store_plans.log_triggers",
404 "Log trigger trace.",
414 DefineCustomBoolVariable("pg_store_plans.log_verbose",
415 "Set VERBOSE for EXPLAIN on logging.",
425 EmitWarningsOnPlaceholders("pg_store_plans");
428 * Request additional shared resources. (These are no-ops if we're not in
429 * the postmaster process.) We'll allocate or attach to the shared
430 * resources in pgsp_shmem_startup().
432 RequestAddinShmemSpace(shared_mem_size());
433 #if PG_VERSION_NUM >= 90600
434 RequestNamedLWLockTranche("pg_store_plans", 1);
436 RequestAddinLWLocks(1);
442 prev_shmem_startup_hook = shmem_startup_hook;
443 shmem_startup_hook = pgsp_shmem_startup;
444 prev_ExecutorStart = ExecutorStart_hook;
445 ExecutorStart_hook = pgsp_ExecutorStart;
446 prev_ExecutorRun = ExecutorRun_hook;
447 ExecutorRun_hook = pgsp_ExecutorRun;
448 prev_ExecutorFinish = ExecutorFinish_hook;
449 ExecutorFinish_hook = pgsp_ExecutorFinish;
450 prev_ExecutorEnd = ExecutorEnd_hook;
451 ExecutorEnd_hook = pgsp_ExecutorEnd;
452 prev_ProcessUtility = ProcessUtility_hook;
453 ProcessUtility_hook = pgsp_ProcessUtility;
457 * Module unload callback
462 /* Uninstall hooks. */
463 shmem_startup_hook = prev_shmem_startup_hook;
464 ExecutorStart_hook = prev_ExecutorStart;
465 ExecutorRun_hook = prev_ExecutorRun;
466 ExecutorFinish_hook = prev_ExecutorFinish;
467 ExecutorEnd_hook = prev_ExecutorEnd;
468 ProcessUtility_hook = prev_ProcessUtility;
472 * shmem_startup hook: allocate or attach to shared memory,
473 * then load any pre-existing statistics from file.
476 pgsp_shmem_startup(void)
488 if (prev_shmem_startup_hook)
489 prev_shmem_startup_hook();
491 /* reset in case this is a restart within the postmaster */
496 * Create or attach to the shared memory state, including hash table
498 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
500 shared_state = ShmemInitStruct("pg_store_plans",
506 /* First time through ... */
507 #if PG_VERSION_NUM >= 90600
508 shared_state->lock = &(GetNamedLWLockTranche("pg_store_plans"))->lock;
510 shared_state->lock = LWLockAssign();
512 shared_state->plan_size = store_plan_size;
513 shared_state->cur_median_usage = ASSUMED_MEDIAN_INIT;
516 /* Be sure everyone agrees on the hash table entry size */
517 plan_size = shared_state->plan_size;
519 memset(&info, 0, sizeof(info));
520 info.keysize = sizeof(EntryKey);
521 info.entrysize = offsetof(StatEntry, plan) + plan_size;
522 info.hash = hash_table_fn;
523 info.match = match_fn;
524 hash_table = ShmemInitHash("pg_store_plans hash",
525 store_size, store_size,
527 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
529 LWLockRelease(AddinShmemInitLock);
532 * If we're in the postmaster (or a standalone backend...), set up a shmem
533 * exit hook to dump the statistics to disk.
535 if (!IsUnderPostmaster)
536 on_shmem_exit(pgsp_shmem_shutdown, (Datum) 0);
539 * Attempt to load old statistics from the dump file, if this is the first
540 * time through and we weren't told not to.
542 if (found || !dump_on_shutdown)
546 * Note: we don't bother with locks here, because there should be no other
547 * processes running when this code is reached.
549 file = AllocateFile(PGSP_DUMP_FILE, PG_BINARY_R);
553 return; /* ignore not-found error */
557 buffer_size = plan_size;
558 buffer = (char *) palloc(buffer_size);
560 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
561 header != PGSP_FILE_HEADER ||
562 fread(&num, sizeof(int32), 1, file) != 1)
565 for (i = 0; i < num; i++)
570 if (fread(&temp, offsetof(StatEntry, mutex), 1, file) != 1)
573 /* Encoding is the only field we can easily sanity-check */
574 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
577 /* Previous incarnation might have had a larger plan_size */
578 if (temp.plan_len >= buffer_size)
580 buffer = (char *) repalloc(buffer, temp.plan_len + 1);
581 buffer_size = temp.plan_len + 1;
584 if (fread(buffer, 1, temp.plan_len, file) != temp.plan_len)
586 buffer[temp.plan_len] = '\0';
588 /* Skip loading "sticky" entries */
589 if (temp.counters.calls == 0)
592 /* Clip to available length if needed */
593 if (temp.plan_len >= plan_size)
594 temp.plan_len = pg_encoding_mbcliplen(temp.key.encoding,
599 /* make the hashtable entry (discards old entries if too many) */
600 entry = entry_alloc(&temp.key, buffer, temp.plan_len, false);
602 /* copy in the actual stats */
603 entry->counters = temp.counters;
610 * Remove the file so it's not included in backups/replication slaves,
611 * etc. A new file will be written on next shutdown.
613 unlink(PGSP_DUMP_FILE);
619 (errcode_for_file_access(),
620 errmsg("could not read pg_stat_statement file \"%s\": %m",
626 /* If possible, throw away the bogus file; ignore any error */
627 unlink(PGSP_DUMP_FILE);
631 * shmem_shutdown hook: Dump statistics into file.
633 * Note: we don't bother with acquiring lock, because there should be no
634 * other processes running when this is called.
637 pgsp_shmem_shutdown(int code, Datum arg)
640 HASH_SEQ_STATUS hash_seq;
644 /* Don't try to dump during a crash. */
648 /* Safety check ... shouldn't get here unless shmem is set up. */
649 if (!shared_state || !hash_table)
652 /* Don't dump if told not to. */
653 if (!dump_on_shutdown)
656 file = AllocateFile(PGSP_DUMP_FILE ".tmp", PG_BINARY_W);
660 if (fwrite(&PGSP_FILE_HEADER, sizeof(uint32), 1, file) != 1)
662 num_entries = hash_get_num_entries(hash_table);
663 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
666 hash_seq_init(&hash_seq, hash_table);
667 while ((entry = hash_seq_search(&hash_seq)) != NULL)
669 int len = entry->plan_len;
671 if (fwrite(entry, offsetof(StatEntry, mutex), 1, file) != 1 ||
672 fwrite(entry->plan, 1, len, file) != len)
683 * Rename file into place, so we atomically replace the old one.
685 if (rename(PGSP_DUMP_FILE ".tmp", PGSP_DUMP_FILE) != 0)
687 (errcode_for_file_access(),
688 errmsg("could not rename pg_store_plans file \"%s\": %m",
689 PGSP_DUMP_FILE ".tmp")));
695 (errcode_for_file_access(),
696 errmsg("could not write pg_store_plans file \"%s\": %m",
697 PGSP_DUMP_FILE ".tmp")));
700 unlink(PGSP_DUMP_FILE ".tmp");
705 * ExecutorStart hook: start up tracking if needed
708 pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags)
711 (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
713 queryDesc->instrument_options |=
714 (log_timing ? INSTRUMENT_TIMER : 0)|
715 (log_timing ? 0: INSTRUMENT_ROWS)|
716 (log_buffers ? INSTRUMENT_BUFFERS : 0);
718 if (prev_ExecutorStart)
719 prev_ExecutorStart(queryDesc, eflags);
721 standard_ExecutorStart(queryDesc, eflags);
724 * Set up to track total elapsed time in ExecutorRun. Allocate in per-query
725 * context so as to be free at ExecutorEnd.
727 if (queryDesc->totaltime == NULL && pgsp_enabled())
729 MemoryContext oldcxt;
731 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
732 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
733 MemoryContextSwitchTo(oldcxt);
739 * ExecutorRun hook: all we need do is track nesting depth
742 #if PG_VERSION_NUM >= 100000
743 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
745 #elif PG_VERSION_NUM >= 90600
746 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
748 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
754 #if PG_VERSION_NUM >= 100000
755 if (prev_ExecutorRun)
756 prev_ExecutorRun(queryDesc, direction, count, execute_once);
758 standard_ExecutorRun(queryDesc, direction, count, execute_once);
760 if (prev_ExecutorRun)
761 prev_ExecutorRun(queryDesc, direction, count);
763 standard_ExecutorRun(queryDesc, direction, count);
776 * ExecutorFinish hook: all we need do is track nesting depth
779 pgsp_ExecutorFinish(QueryDesc *queryDesc)
784 if (prev_ExecutorFinish)
785 prev_ExecutorFinish(queryDesc);
787 standard_ExecutorFinish(queryDesc);
799 * ExecutorEnd hook: store results if needed
802 pgsp_ExecutorEnd(QueryDesc *queryDesc)
804 if (queryDesc->totaltime)
806 InstrEndLoop(queryDesc->totaltime);
808 if (pgsp_enabled() &&
809 queryDesc->totaltime->total >=
810 (double)min_duration / 1000.0)
812 ExplainState *es = NewExplainState();
813 StringInfo es_str = es->str;
815 es->analyze = queryDesc->instrument_options;
816 es->verbose = log_verbose;
817 es->buffers = (es->analyze && log_buffers);
818 es->timing = (es->analyze && log_timing);
819 es->format = EXPLAIN_FORMAT_JSON;
821 ExplainBeginOutput(es);
822 ExplainPrintPlan(es, queryDesc);
824 pgspExplainTriggers(es, queryDesc);
825 ExplainEndOutput(es);
827 /* Remove last line break */
828 if (es_str->len > 0 && es_str->data[es_str->len - 1] == '\n')
829 es_str->data[--es_str->len] = '\0';
831 /* JSON outmost braces. */
832 es_str->data[0] = '{';
833 es_str->data[es_str->len - 1] = '}';
836 * Make sure stats accumulation is done. (Note: it's okay if several
837 * levels of hook all do this.)
840 store_entry(es_str->data,
841 hash_query(queryDesc->sourceText),
842 queryDesc->plannedstmt->queryId,
843 queryDesc->totaltime->total * 1000.0, /* convert to msec */
844 queryDesc->estate->es_processed,
845 &queryDesc->totaltime->bufusage);
850 if (prev_ExecutorEnd)
851 prev_ExecutorEnd(queryDesc);
853 standard_ExecutorEnd(queryDesc);
857 * ProcessUtility hook
860 #if PG_VERSION_NUM >= 100000
861 pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
862 ProcessUtilityContext context, ParamListInfo params,
863 QueryEnvironment *queryEnv,
864 DestReceiver *dest, char *completionTag)
866 pgsp_ProcessUtility(Node *parsetree, const char *queryString,
867 ProcessUtilityContext context, ParamListInfo params,
868 DestReceiver *dest, char *completionTag)
871 #if PG_VERSION_NUM >= 100000
872 if (prev_ProcessUtility)
873 prev_ProcessUtility(pstmt, queryString,
874 context, params, queryEnv,
875 dest, completionTag);
877 standard_ProcessUtility(pstmt, queryString,
878 context, params, queryEnv,
879 dest, completionTag);
881 if (prev_ProcessUtility)
882 prev_ProcessUtility(parsetree, queryString,
884 dest, completionTag);
886 standard_ProcessUtility(parsetree, queryString,
888 dest, completionTag);
893 * Calculate hash value for a key
896 hash_table_fn(const void *key, Size keysize)
898 const EntryKey *k = (const EntryKey *) key;
900 /* we don't bother to include encoding in the hash */
901 return hash_uint32((uint32) k->userid) ^
902 hash_uint32((uint32) k->dbid) ^
903 hash_uint32((uint32) k->queryid) ^
904 hash_uint32((uint32) k->planid);
908 * Compare two keys - zero means match
911 match_fn(const void *key1, const void *key2, Size keysize)
913 const EntryKey *k1 = (const EntryKey *) key1;
914 const EntryKey *k2 = (const EntryKey *) key2;
916 if (k1->userid == k2->userid &&
917 k1->dbid == k2->dbid &&
918 k1->encoding == k2->encoding &&
919 k1->queryid == k2->queryid &&
920 k1->planid == k2->planid)
927 * hash_query: calculate internal query ID for a query
929 * As of PG11, Query.queryId has been widen to 64 bit to reduce collision of
930 * queries to practical level. On the other hand pg_store_plans uses the
931 * combination of query hash and plan hash values as the hash table key and
932 * the resolution of the hash value effectively has the same degree so we
933 * continue to use uint32 as internal queryid.
935 * This may merge plans from different queries into single internal query id
936 * but it is not a problem when pg_stat_statements is used together since the
937 * extension gives enough resolution on queries.
940 hash_query(const char* query)
944 char *normquery = pstrdup(query);
945 normalize_expr(normquery, false);
946 queryid = hash_any((const unsigned char*)normquery, strlen(normquery));
954 * Store some statistics for a plan.
956 * Table entry is keyed with userid.queryId.planId. queryId_pgss just stores
957 * queryId used to join with pg_stat_statements.
960 store_entry(char *plan, uint32 queryId, queryid_t queryId_pgss,
961 double total_time, uint64 rows,
962 const BufferUsage *bufusage)
966 char *norm_query = NULL;
968 char *normalized_plan = NULL;
969 char *shorten_plan = NULL;
970 volatile StatEntry *e;
972 Assert(plan != NULL);
974 /* Safety check... */
975 if (!shared_state || !hash_table)
978 /* Set up key for hashtable search */
979 key.userid = GetUserId();
980 key.dbid = MyDatabaseId;
981 key.encoding = GetDatabaseEncoding();
982 key.queryid = queryId;
984 normalized_plan = pgsp_json_normalize(plan);
985 shorten_plan = pgsp_json_shorten(plan);
986 elog(DEBUG3, "pg_store_plans: Normalized plan: %s", normalized_plan);
987 elog(DEBUG3, "pg_store_plans: Shorten plan: %s", shorten_plan);
988 elog(DEBUG3, "pg_store_plans: Original plan: %s", plan);
989 plan_len = strlen(shorten_plan);
991 key.planid = hash_any((const unsigned char *)normalized_plan,
992 strlen(normalized_plan));
993 pfree(normalized_plan);
995 if (plan_len >= shared_state->plan_size)
996 plan_len = pg_encoding_mbcliplen(GetDatabaseEncoding(),
999 shared_state->plan_size - 1);
1002 /* Look up the hash table entry with shared lock. */
1003 LWLockAcquire(shared_state->lock, LW_SHARED);
1005 entry = (StatEntry *) hash_search(hash_table, &key, HASH_FIND, NULL);
1007 /* Create new entry, if not present */
1011 * We'll need exclusive lock to make a new entry. There is no point
1012 * in holding shared lock while we normalize the string, though.
1014 LWLockRelease(shared_state->lock);
1016 /* Acquire exclusive lock as required by entry_alloc() */
1017 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1019 entry = entry_alloc(&key, "", 0, false);
1022 /* Increment the counts, except when jstate is not NULL */
1025 * Grab the spinlock while updating the counters (see comment about
1026 * locking rules at the head of the file)
1029 e = (volatile StatEntry *) entry;
1030 SpinLockAcquire(&e->mutex);
1032 e->queryid = queryId_pgss;
1034 /* "Unstick" entry if it was previously sticky */
1035 if (e->counters.calls == 0)
1037 e->counters.usage = USAGE_INIT;
1038 e->counters.first_call = GetCurrentTimestamp();
1041 e->counters.calls += 1;
1042 e->counters.total_time += total_time;
1043 e->counters.rows += rows;
1044 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1045 e->counters.shared_blks_read += bufusage->shared_blks_read;
1046 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1047 e->counters.shared_blks_written += bufusage->shared_blks_written;
1048 e->counters.local_blks_hit += bufusage->local_blks_hit;
1049 e->counters.local_blks_read += bufusage->local_blks_read;
1050 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1051 e->counters.local_blks_written += bufusage->local_blks_written;
1052 e->counters.temp_blks_read += bufusage->temp_blks_read;
1053 e->counters.temp_blks_written += bufusage->temp_blks_written;
1054 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1055 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1056 e->counters.last_call = GetCurrentTimestamp();
1057 e->counters.usage += USAGE_EXEC(total_time);
1059 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1060 memcpy(entry->plan, shorten_plan, plan_len);
1061 entry->plan_len = plan_len;
1062 entry->plan[plan_len] = '\0';
1064 SpinLockRelease(&e->mutex);
1066 LWLockRelease(shared_state->lock);
1068 /* We postpone this pfree until we're out of the lock */
1074 * Reset all statement statistics.
1077 pg_store_plans_reset(PG_FUNCTION_ARGS)
1079 if (!shared_state || !hash_table)
1081 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1082 errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1087 #define PG_STORE_PLANS_COLS 23
1090 * Retrieve statement statistics.
1093 pg_store_plans(PG_FUNCTION_ARGS)
1095 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1097 Tuplestorestate *tupstore;
1098 MemoryContext per_query_ctx;
1099 MemoryContext oldcontext;
1100 Oid userid = GetUserId();
1101 bool is_superuser = superuser();
1102 HASH_SEQ_STATUS hash_seq;
1105 if (!shared_state || !hash_table)
1107 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1108 errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1110 /* check to see if caller supports us returning a tuplestore */
1111 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1113 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1114 errmsg("set-valued function called in context that cannot accept a set")));
1115 if (!(rsinfo->allowedModes & SFRM_Materialize))
1117 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1118 errmsg("materialize mode required, but it is not " \
1119 "allowed in this context")));
1121 /* Build a tuple descriptor for our result type */
1122 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1123 elog(ERROR, "return type must be a row type");
1125 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1126 oldcontext = MemoryContextSwitchTo(per_query_ctx);
1128 tupstore = tuplestore_begin_heap(true, false, work_mem);
1129 rsinfo->returnMode = SFRM_Materialize;
1130 rsinfo->setResult = tupstore;
1131 rsinfo->setDesc = tupdesc;
1133 MemoryContextSwitchTo(oldcontext);
1135 LWLockAcquire(shared_state->lock, LW_SHARED);
1137 hash_seq_init(&hash_seq, hash_table);
1138 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1140 Datum values[PG_STORE_PLANS_COLS];
1141 bool nulls[PG_STORE_PLANS_COLS];
1143 int64 queryid = entry->key.queryid;
1144 int64 queryid_stmt = entry->queryid;
1145 int64 planid = entry->key.planid;
1148 memset(values, 0, sizeof(values));
1149 memset(nulls, 0, sizeof(nulls));
1151 values[i++] = ObjectIdGetDatum(entry->key.userid);
1152 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1153 if (is_superuser || entry->key.userid == userid)
1155 values[i++] = Int64GetDatumFast(queryid);
1156 values[i++] = Int64GetDatumFast(planid);
1157 values[i++] = Int64GetDatumFast(queryid_stmt);
1161 values[i++] = Int64GetDatumFast(0);
1162 values[i++] = Int64GetDatumFast(0);
1163 values[i++] = Int64GetDatumFast(0);
1167 if (is_superuser || entry->key.userid == userid)
1169 char *pstr = entry->plan;
1172 switch (plan_format)
1174 case PLAN_FORMAT_TEXT:
1175 pstr = pgsp_json_textize(entry->plan);
1177 case PLAN_FORMAT_JSON:
1178 pstr = pgsp_json_inflate(entry->plan);
1180 case PLAN_FORMAT_YAML:
1181 pstr = pgsp_json_yamlize(entry->plan);
1183 case PLAN_FORMAT_XML:
1184 pstr = pgsp_json_xmlize(entry->plan);
1191 pg_do_encoding_conversion((unsigned char *) pstr,
1193 entry->key.encoding,
1194 GetDatabaseEncoding());
1195 values[i++] = CStringGetTextDatum(estr);
1199 if (pstr != entry->plan)
1204 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1206 /* copy counters to a local variable to keep locking time short */
1208 volatile StatEntry *e = (volatile StatEntry *) entry;
1210 SpinLockAcquire(&e->mutex);
1212 SpinLockRelease(&e->mutex);
1215 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1219 values[i++] = Int64GetDatumFast(tmp.calls);
1220 values[i++] = Float8GetDatumFast(tmp.total_time);
1221 values[i++] = Int64GetDatumFast(tmp.rows);
1222 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1223 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1224 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1225 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1226 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1227 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1228 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1229 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1230 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1231 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1232 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1233 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1234 values[i++] = TimestampTzGetDatum(tmp.first_call);
1235 values[i++] = TimestampTzGetDatum(tmp.last_call);
1236 Assert(i == PG_STORE_PLANS_COLS);
1238 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1241 LWLockRelease(shared_state->lock);
1243 /* clean up and return the tuplestore */
1244 tuplestore_donestoring(tupstore);
1250 * Estimate shared memory space needed.
1253 shared_mem_size(void)
1258 size = MAXALIGN(sizeof(SharedState));
1259 entrysize = offsetof(StatEntry, plan) + store_plan_size;
1260 size = add_size(size, hash_estimate_size(store_size, entrysize));
1266 * Allocate a new hashtable entry.
1267 * caller must hold an exclusive lock on shared_state->lock
1269 * "plan" need not be null-terminated; we rely on plan_len instead
1271 * If "sticky" is true, make the new entry artificially sticky so that it will
1272 * probably still be there when the query finishes execution. We do this by
1273 * giving it a median usage value rather than the normal value. (Strictly
1274 * speaking, query strings are normalized on a best effort basis, though it
1275 * would be difficult to demonstrate this even under artificial conditions.)
1277 * Note: despite needing exclusive lock, it's not an error for the target
1278 * entry to already exist. This is because store_entry releases and
1279 * reacquires lock after failing to find a match; so someone else could
1280 * have made the entry while we waited to get exclusive lock.
1283 entry_alloc(EntryKey *key, const char *plan, int plan_len, bool sticky)
1288 /* Make space if needed */
1289 while (hash_get_num_entries(hash_table) >= store_size)
1292 /* Find or create an entry with desired hash code */
1293 entry = (StatEntry *) hash_search(hash_table, key, HASH_ENTER, &found);
1297 /* New entry, initialize it */
1299 /* reset the statistics */
1300 memset(&entry->counters, 0, sizeof(Counters));
1301 /* set the appropriate initial usage count */
1302 entry->counters.usage = sticky ? shared_state->cur_median_usage : USAGE_INIT;
1303 /* re-initialize the mutex each time ... we assume no one using it */
1304 SpinLockInit(&entry->mutex);
1305 /* ... and don't forget the query text */
1306 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1307 entry->plan_len = plan_len;
1308 memcpy(entry->plan, plan, plan_len);
1309 entry->plan[plan_len] = '\0';
1316 * qsort comparator for sorting into increasing usage order
1319 entry_cmp(const void *lhs, const void *rhs)
1321 double l_usage = (*(StatEntry *const *) lhs)->counters.usage;
1322 double r_usage = (*(StatEntry *const *) rhs)->counters.usage;
1324 if (l_usage < r_usage)
1326 else if (l_usage > r_usage)
1333 * Deallocate least used entries.
1334 * Caller must hold an exclusive lock on shared_state->lock.
1339 HASH_SEQ_STATUS hash_seq;
1340 StatEntry **entries;
1346 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1347 * While we're scanning the table, apply the decay factor to the usage
1351 entries = palloc(hash_get_num_entries(hash_table) * sizeof(StatEntry *));
1354 hash_seq_init(&hash_seq, hash_table);
1355 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1357 entries[i++] = entry;
1358 /* "Sticky" entries get a different usage decay rate. */
1359 if (entry->counters.calls == 0)
1360 entry->counters.usage *= STICKY_DECREASE_FACTOR;
1362 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1365 qsort(entries, i, sizeof(StatEntry *), entry_cmp);
1367 /* Also, record the (approximate) median usage */
1369 shared_state->cur_median_usage = entries[i / 2]->counters.usage;
1371 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1372 nvictims = Min(nvictims, i);
1374 for (i = 0; i < nvictims; i++)
1376 hash_search(hash_table, &entries[i]->key, HASH_REMOVE, NULL);
1383 * Release all entries.
1388 HASH_SEQ_STATUS hash_seq;
1391 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1393 hash_seq_init(&hash_seq, hash_table);
1394 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1396 hash_search(hash_table, &entry->key, HASH_REMOVE, NULL);
1399 LWLockRelease(shared_state->lock);
1403 pg_store_plans_hash_query(PG_FUNCTION_ARGS)
1405 PG_RETURN_OID(hash_query(text_to_cstring(PG_GETARG_TEXT_P(0))));
1409 pg_store_plans_shorten(PG_FUNCTION_ARGS)
1411 text *short_plan = PG_GETARG_TEXT_P(0);
1412 char *cjson = text_to_cstring(short_plan);
1413 char *cshorten = pgsp_json_shorten(cjson);
1414 PG_RETURN_TEXT_P(cstring_to_text(cshorten));
1418 pg_store_plans_normalize(PG_FUNCTION_ARGS)
1420 text *short_plan = PG_GETARG_TEXT_P(0);
1421 char *cjson = text_to_cstring(short_plan);
1422 char *cnormalized = pgsp_json_normalize(cjson);
1423 PG_RETURN_TEXT_P(cstring_to_text(cnormalized));
1427 pg_store_plans_jsonplan(PG_FUNCTION_ARGS)
1429 text *short_plan = PG_GETARG_TEXT_P(0);
1430 char *cshort = text_to_cstring(short_plan);
1431 char *cinflated = pgsp_json_inflate(cshort);
1432 PG_RETURN_TEXT_P(cstring_to_text(cinflated));
1436 pg_store_plans_textplan(PG_FUNCTION_ARGS)
1438 text *short_plan = PG_GETARG_TEXT_P(0);
1439 char *cshort = text_to_cstring(short_plan);
1440 char *ctextized = pgsp_json_textize(cshort);
1442 PG_RETURN_TEXT_P(cstring_to_text(ctextized));
1446 pg_store_plans_yamlplan(PG_FUNCTION_ARGS)
1448 text *short_plan = PG_GETARG_TEXT_P(0);
1449 char *cshort = text_to_cstring(short_plan);
1450 char *cyamlized = pgsp_json_yamlize(cshort);
1452 PG_RETURN_TEXT_P(cstring_to_text(cyamlized));
1456 pg_store_plans_xmlplan(PG_FUNCTION_ARGS)
1458 text *short_plan = PG_GETARG_TEXT_P(0);
1459 char *cshort = text_to_cstring(short_plan);
1460 char *cxmlized = pgsp_json_xmlize(cshort);
1462 PG_RETURN_TEXT_P(cstring_to_text(cxmlized));