1 /*-------------------------------------------------------------------------
4 * Take statistics of plan selection across a whole database cluster.
6 * Execution costs are totaled for each distinct plan for each query,
7 * and plan and queryid are kept in a shared hashtable, each record in
8 * which is associated with a record in pg_stat_statements, if any, by
11 * For Postgres 9.3 or earlier does not expose query id so
12 * pg_store_plans needs to calculate it based on the given query
13 * string using different algorithm from pg_stat_statements, and later
14 * the id will be matched against the one made from query string
15 * stored in pg_stat_statements. For the reason, queryid matching in
16 * this way will fail if the query string kept in pg_stat_statements
17 * is truncated in the middle.
19 * Plans are identified by fingerprinting plan representations in
20 * "shortened" JSON format with constants and unstable values such as
21 * rows, width, loops ignored. Nevertheless, stored plan entries hold
22 * them of the latest execution. Entry eviction is done in the same
23 * way to pg_stat_statements.
25 * Copyright (c) 2008-2016, PostgreSQL Global Development Group
26 * Copyright (c) 2012-2016, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
29 * pg_store_plan/pg_store_plan.c
31 *-------------------------------------------------------------------------
38 #include "commands/explain.h"
39 #include "access/hash.h"
40 #include "executor/instrument.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "storage/fd.h"
45 #include "storage/ipc.h"
46 #include "storage/lwlock.h"
47 #include "storage/spin.h"
48 #include "storage/shmem.h"
49 #include "tcop/utility.h"
50 #include "utils/builtins.h"
51 #include "utils/timestamp.h"
53 #include "pgsp_json.h"
54 #include "pgsp_explain.h"
58 /* Location of stats file */
59 #define PGSP_DUMP_FILE "global/pg_store_plans.stat"
61 /* This constant defines the magic number in the stats file header */
62 static const uint32 PGSP_FILE_HEADER = 0x20130828;
63 static const uint32 pg_store_plan_size = 5000;
65 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
66 #define USAGE_EXEC(duration) (1.0)
67 #define USAGE_INIT (1.0) /* including initial planning */
68 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
69 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
70 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
71 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
74 * Hashtable key that defines the identity of a hashtable entry. We separate
75 * queries by user and by database even if they are otherwise identical.
77 * Presently, the query encoding is fully determined by the source database
78 * and so we don't really need it to be in the key. But that might not always
79 * be true. Anyway it's notationally convenient to pass it as part of the key.
81 typedef struct EntryKey
83 Oid userid; /* user OID */
84 Oid dbid; /* database OID */
85 int encoding; /* query encoding */
86 uint32 queryid; /* query identifier */
87 uint32 planid; /* plan identifier */
91 * The actual stats counters kept within StatEntry.
93 typedef struct Counters
95 int64 calls; /* # of times executed */
96 double total_time; /* total execution time, in msec */
97 int64 rows; /* total # of retrieved or affected rows */
98 int64 shared_blks_hit; /* # of shared buffer hits */
99 int64 shared_blks_read; /* # of shared disk blocks read */
100 int64 shared_blks_dirtied;/* # of shared disk blocks dirtied */
101 int64 shared_blks_written;/* # of shared disk blocks written */
102 int64 local_blks_hit; /* # of local buffer hits */
103 int64 local_blks_read; /* # of local disk blocks read */
104 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
105 int64 local_blks_written; /* # of local disk blocks written */
106 int64 temp_blks_read; /* # of temp blocks read */
107 int64 temp_blks_written; /* # of temp blocks written */
108 double blk_read_time; /* time spent reading, in msec */
109 double blk_write_time; /* time spent writing, in msec */
110 TimestampTz first_call; /* timestamp of first call */
111 TimestampTz last_call; /* timestamp of last call */
112 double usage; /* usage factor */
116 * Statistics per plan
118 * NB: see the file read/write code before changing field order here.
120 typedef struct StatEntry
122 EntryKey key; /* hash key of entry - MUST BE FIRST */
123 uint32 queryid; /* query identifier from stat_statements*/
124 Counters counters; /* the statistics for this query */
125 int plan_len; /* # of valid bytes in query string */
126 slock_t mutex; /* protects the counters only */
127 char plan[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
129 * Note: the allocated length of query[] is actually
130 * shared_state->query_size
135 * Global shared state
137 typedef struct SharedState
139 LWLockId lock; /* protects hashtable search/modification */
140 int plan_size; /* max query length in bytes */
141 double cur_median_usage; /* current median usage in hashtable */
144 /*---- Local variables ----*/
146 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
147 static int nested_level = 0;
149 /* Saved hook values in case of unload */
150 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
151 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
152 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
153 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
154 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
155 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
157 /* Links to shared memory state */
158 static SharedState *shared_state = NULL;
159 static HTAB *hash_table = NULL;
161 /*---- GUC variables ----*/
165 TRACK_LEVEL_NONE, /* track no statements */
166 TRACK_LEVEL_TOP, /* only top level statements */
167 TRACK_LEVEL_ALL, /* all statements, including nested ones */
168 TRACK_LEVEL_FORCE /* all statements, including nested ones */
171 static const struct config_enum_entry track_options[] =
173 {"none", TRACK_LEVEL_NONE, false},
174 {"top", TRACK_LEVEL_TOP, false},
175 {"all", TRACK_LEVEL_ALL, false},
181 PLAN_FORMAT_RAW, /* No conversion. Shorten JSON */
182 PLAN_FORMAT_TEXT, /* Traditional text representation */
183 PLAN_FORMAT_JSON, /* JSON representation */
184 PLAN_FORMAT_YAML, /* YAML */
185 PLAN_FORMAT_XML, /* XML */
188 static const struct config_enum_entry plan_formats[] =
190 {"raw" , PLAN_FORMAT_RAW , false},
191 {"text", PLAN_FORMAT_TEXT, false},
192 {"json", PLAN_FORMAT_JSON, false},
193 {"yaml", PLAN_FORMAT_YAML, false},
194 {"xml" , PLAN_FORMAT_XML , false},
198 static int store_size; /* max # statements to track */
199 static int track_level; /* tracking level */
200 static int min_duration; /* min duration to record */
201 static bool dump_on_shutdown; /* whether to save stats across shutdown */
202 static bool log_analyze; /* Similar to EXPLAIN (ANALYZE *) */
203 static bool log_verbose; /* Similar to EXPLAIN (VERBOSE *) */
204 static bool log_buffers; /* Similar to EXPLAIN (BUFFERS *) */
205 static bool log_timing; /* Similar to EXPLAIN (TIMING *) */
206 static bool log_triggers; /* whether to log trigger statistics */
207 static int plan_format; /* Plan representation style in
208 * pg_store_plans.plan */
210 #define pgsp_enabled() \
211 (track_level == TRACK_LEVEL_ALL || \
212 (track_level == TRACK_LEVEL_TOP && nested_level == 0))
214 /*---- Function declarations ----*/
219 Datum pg_store_plans_reset(PG_FUNCTION_ARGS);
220 Datum pg_store_plans_hash_query(PG_FUNCTION_ARGS);
221 Datum pg_store_plans(PG_FUNCTION_ARGS);
222 Datum pg_store_plans_shorten(PG_FUNCTION_ARGS);
223 Datum pg_store_plans_normalize(PG_FUNCTION_ARGS);
224 Datum pg_store_plans_jsonplan(PG_FUNCTION_ARGS);
225 Datum pg_store_plans_yamlplan(PG_FUNCTION_ARGS);
226 Datum pg_store_plans_xmlplan(PG_FUNCTION_ARGS);
227 Datum pg_store_plans_textplan(PG_FUNCTION_ARGS);
229 PG_FUNCTION_INFO_V1(pg_store_plans_reset);
230 PG_FUNCTION_INFO_V1(pg_store_plans_hash_query);
231 PG_FUNCTION_INFO_V1(pg_store_plans);
232 PG_FUNCTION_INFO_V1(pg_store_plans_shorten);
233 PG_FUNCTION_INFO_V1(pg_store_plans_normalize);
234 PG_FUNCTION_INFO_V1(pg_store_plans_jsonplan);
235 PG_FUNCTION_INFO_V1(pg_store_plans_textplan);
236 PG_FUNCTION_INFO_V1(pg_store_plans_yamlplan);
237 PG_FUNCTION_INFO_V1(pg_store_plans_xmlplan);
239 static void pgsp_shmem_startup(void);
240 static void pgsp_shmem_shutdown(int code, Datum arg);
241 static void pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags);
242 #if PG_VERSION_NUM >= 100000
243 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
244 ScanDirection direction,
245 uint64 count, bool execute_once);
246 #elif PG_VERSION_NUM >= 90600
247 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
248 ScanDirection direction,
251 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
252 ScanDirection direction,
255 static void pgsp_ExecutorFinish(QueryDesc *queryDesc);
256 static void pgsp_ExecutorEnd(QueryDesc *queryDesc);
257 #if PG_VERSION_NUM >= 100000
258 static void pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
259 ProcessUtilityContext context, ParamListInfo params,
260 QueryEnvironment *queryEnv,
261 DestReceiver *dest, char *completionTag);
263 static void pgsp_ProcessUtility(Node *parsetree, const char *queryString,
264 ProcessUtilityContext context, ParamListInfo params,
265 DestReceiver *dest, char *completionTag);
267 static uint32 hash_table_fn(const void *key, Size keysize);
268 static int match_fn(const void *key1, const void *key2, Size keysize);
269 static uint32 hash_query(const char* query);
270 static void store_entry(char *plan, uint32 queryId, uint32 queryId2,
271 double total_time, uint64 rows,
272 const BufferUsage *bufusage);
273 static Size shared_mem_size(void);
274 static StatEntry *entry_alloc(EntryKey *key, const char *query,
275 int plan_len, bool sticky);
276 static void entry_dealloc(void);
277 static void entry_reset(void);
280 * Module load callback
286 * In order to create our shared memory area, we have to be loaded via
287 * shared_preload_libraries. If not, fall out without hooking into any of
288 * the main system. (We don't throw error here because it seems useful to
289 * allow the pg_stat_statements functions to be created even when the
290 * module isn't active. The functions must protect themselves against
291 * being called then, however.)
293 if (!process_shared_preload_libraries_in_progress)
297 * Define (or redefine) custom GUC variables.
299 DefineCustomIntVariable("pg_store_plans.max",
300 "Sets the maximum number of plans tracked by pg_store_plans.",
312 DefineCustomEnumVariable("pg_store_plans.track",
313 "Selects which plans are tracked by pg_store_plans.",
324 DefineCustomEnumVariable("pg_store_plans.plan_format",
325 "Selects which format to be appied for plan representation in pg_store_plans.",
336 DefineCustomIntVariable("pg_store_plans.min_duration",
337 "Minimum duration to record plan in milliseconds.",
349 DefineCustomBoolVariable("pg_store_plans.save",
350 "Save pg_store_plans statistics across server shutdowns.",
360 DefineCustomBoolVariable("pg_store_plans.log_analyze",
361 "Use EXPLAIN ANALYZE for plan logging.",
371 DefineCustomBoolVariable("pg_store_plans.log_buffers",
382 DefineCustomBoolVariable("pg_store_plans.log_timing",
393 DefineCustomBoolVariable("pg_store_plans.log_triggers",
394 "Log trigger trace.",
404 DefineCustomBoolVariable("pg_store_plans.log_verbose",
405 "Set VERBOSE for EXPLAIN on logging.",
415 EmitWarningsOnPlaceholders("pg_store_plans");
418 * Request additional shared resources. (These are no-ops if we're not in
419 * the postmaster process.) We'll allocate or attach to the shared
420 * resources in pgsp_shmem_startup().
422 RequestAddinShmemSpace(shared_mem_size());
423 #if PG_VERSION_NUM >= 90600
424 RequestNamedLWLockTranche("pg_store_plans", 1);
426 RequestAddinLWLocks(1);
432 prev_shmem_startup_hook = shmem_startup_hook;
433 shmem_startup_hook = pgsp_shmem_startup;
434 prev_ExecutorStart = ExecutorStart_hook;
435 ExecutorStart_hook = pgsp_ExecutorStart;
436 prev_ExecutorRun = ExecutorRun_hook;
437 ExecutorRun_hook = pgsp_ExecutorRun;
438 prev_ExecutorFinish = ExecutorFinish_hook;
439 ExecutorFinish_hook = pgsp_ExecutorFinish;
440 prev_ExecutorEnd = ExecutorEnd_hook;
441 ExecutorEnd_hook = pgsp_ExecutorEnd;
442 prev_ProcessUtility = ProcessUtility_hook;
443 ProcessUtility_hook = pgsp_ProcessUtility;
447 * Module unload callback
452 /* Uninstall hooks. */
453 shmem_startup_hook = prev_shmem_startup_hook;
454 ExecutorStart_hook = prev_ExecutorStart;
455 ExecutorRun_hook = prev_ExecutorRun;
456 ExecutorFinish_hook = prev_ExecutorFinish;
457 ExecutorEnd_hook = prev_ExecutorEnd;
458 ProcessUtility_hook = prev_ProcessUtility;
462 * shmem_startup hook: allocate or attach to shared memory,
463 * then load any pre-existing statistics from file.
466 pgsp_shmem_startup(void)
478 if (prev_shmem_startup_hook)
479 prev_shmem_startup_hook();
481 /* reset in case this is a restart within the postmaster */
486 * Create or attach to the shared memory state, including hash table
488 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
490 shared_state = ShmemInitStruct("pg_store_plans",
496 /* First time through ... */
497 #if PG_VERSION_NUM >= 90600
498 shared_state->lock = &(GetNamedLWLockTranche("pg_store_plans"))->lock;
500 shared_state->lock = LWLockAssign();
502 shared_state->plan_size = pg_store_plan_size;
503 shared_state->cur_median_usage = ASSUMED_MEDIAN_INIT;
506 /* Be sure everyone agrees on the hash table entry size */
507 plan_size = shared_state->plan_size;
509 memset(&info, 0, sizeof(info));
510 info.keysize = sizeof(EntryKey);
511 info.entrysize = offsetof(StatEntry, plan) + plan_size;
512 info.hash = hash_table_fn;
513 info.match = match_fn;
514 hash_table = ShmemInitHash("pg_store_plans hash",
515 store_size, store_size,
517 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
519 LWLockRelease(AddinShmemInitLock);
522 * If we're in the postmaster (or a standalone backend...), set up a shmem
523 * exit hook to dump the statistics to disk.
525 if (!IsUnderPostmaster)
526 on_shmem_exit(pgsp_shmem_shutdown, (Datum) 0);
529 * Attempt to load old statistics from the dump file, if this is the first
530 * time through and we weren't told not to.
532 if (found || !dump_on_shutdown)
536 * Note: we don't bother with locks here, because there should be no other
537 * processes running when this code is reached.
539 file = AllocateFile(PGSP_DUMP_FILE, PG_BINARY_R);
543 return; /* ignore not-found error */
547 buffer_size = plan_size;
548 buffer = (char *) palloc(buffer_size);
550 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
551 header != PGSP_FILE_HEADER ||
552 fread(&num, sizeof(int32), 1, file) != 1)
555 for (i = 0; i < num; i++)
560 if (fread(&temp, offsetof(StatEntry, mutex), 1, file) != 1)
563 /* Encoding is the only field we can easily sanity-check */
564 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
567 /* Previous incarnation might have had a larger plan_size */
568 if (temp.plan_len >= buffer_size)
570 buffer = (char *) repalloc(buffer, temp.plan_len + 1);
571 buffer_size = temp.plan_len + 1;
574 if (fread(buffer, 1, temp.plan_len, file) != temp.plan_len)
576 buffer[temp.plan_len] = '\0';
578 /* Skip loading "sticky" entries */
579 if (temp.counters.calls == 0)
582 /* Clip to available length if needed */
583 if (temp.plan_len >= plan_size)
584 temp.plan_len = pg_encoding_mbcliplen(temp.key.encoding,
589 /* make the hashtable entry (discards old entries if too many) */
590 entry = entry_alloc(&temp.key, buffer, temp.plan_len, false);
592 /* copy in the actual stats */
593 entry->counters = temp.counters;
600 * Remove the file so it's not included in backups/replication slaves,
601 * etc. A new file will be written on next shutdown.
603 unlink(PGSP_DUMP_FILE);
609 (errcode_for_file_access(),
610 errmsg("could not read pg_stat_statement file \"%s\": %m",
616 /* If possible, throw away the bogus file; ignore any error */
617 unlink(PGSP_DUMP_FILE);
621 * shmem_shutdown hook: Dump statistics into file.
623 * Note: we don't bother with acquiring lock, because there should be no
624 * other processes running when this is called.
627 pgsp_shmem_shutdown(int code, Datum arg)
630 HASH_SEQ_STATUS hash_seq;
634 /* Don't try to dump during a crash. */
638 /* Safety check ... shouldn't get here unless shmem is set up. */
639 if (!shared_state || !hash_table)
642 /* Don't dump if told not to. */
643 if (!dump_on_shutdown)
646 file = AllocateFile(PGSP_DUMP_FILE ".tmp", PG_BINARY_W);
650 if (fwrite(&PGSP_FILE_HEADER, sizeof(uint32), 1, file) != 1)
652 num_entries = hash_get_num_entries(hash_table);
653 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
656 hash_seq_init(&hash_seq, hash_table);
657 while ((entry = hash_seq_search(&hash_seq)) != NULL)
659 int len = entry->plan_len;
661 if (fwrite(entry, offsetof(StatEntry, mutex), 1, file) != 1 ||
662 fwrite(entry->plan, 1, len, file) != len)
673 * Rename file into place, so we atomically replace the old one.
675 if (rename(PGSP_DUMP_FILE ".tmp", PGSP_DUMP_FILE) != 0)
677 (errcode_for_file_access(),
678 errmsg("could not rename pg_store_plans file \"%s\": %m",
679 PGSP_DUMP_FILE ".tmp")));
685 (errcode_for_file_access(),
686 errmsg("could not write pg_store_plans file \"%s\": %m",
687 PGSP_DUMP_FILE ".tmp")));
690 unlink(PGSP_DUMP_FILE ".tmp");
695 * ExecutorStart hook: start up tracking if needed
698 pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags)
701 (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
703 queryDesc->instrument_options |=
704 (log_timing ? INSTRUMENT_TIMER : 0)|
705 (log_timing ? 0: INSTRUMENT_ROWS)|
706 (log_buffers ? INSTRUMENT_BUFFERS : 0);
708 if (prev_ExecutorStart)
709 prev_ExecutorStart(queryDesc, eflags);
711 standard_ExecutorStart(queryDesc, eflags);
714 * Set up to track total elapsed time in ExecutorRun. Allocate in per-query
715 * context so as to be free at ExecutorEnd.
717 if (queryDesc->totaltime == NULL && pgsp_enabled())
719 MemoryContext oldcxt;
721 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
722 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
723 MemoryContextSwitchTo(oldcxt);
729 * ExecutorRun hook: all we need do is track nesting depth
732 #if PG_VERSION_NUM >= 100000
733 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
735 #elif PG_VERSION_NUM >= 90600
736 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
738 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
744 #if PG_VERSION_NUM >= 100000
745 if (prev_ExecutorRun)
746 prev_ExecutorRun(queryDesc, direction, count, execute_once);
748 standard_ExecutorRun(queryDesc, direction, count, execute_once);
750 if (prev_ExecutorRun)
751 prev_ExecutorRun(queryDesc, direction, count);
753 standard_ExecutorRun(queryDesc, direction, count);
766 * ExecutorFinish hook: all we need do is track nesting depth
769 pgsp_ExecutorFinish(QueryDesc *queryDesc)
774 if (prev_ExecutorFinish)
775 prev_ExecutorFinish(queryDesc);
777 standard_ExecutorFinish(queryDesc);
789 * ExecutorEnd hook: store results if needed
792 pgsp_ExecutorEnd(QueryDesc *queryDesc)
794 if (queryDesc->totaltime)
796 InstrEndLoop(queryDesc->totaltime);
798 if (pgsp_enabled() &&
799 queryDesc->totaltime->total >=
800 (double)min_duration / 1000.0)
802 ExplainState *es = NewExplainState();
803 StringInfo es_str = es->str;
805 es->analyze = queryDesc->instrument_options;
806 es->verbose = log_verbose;
807 es->buffers = (es->analyze && log_buffers);
808 es->timing = (es->analyze && log_timing);
809 es->format = EXPLAIN_FORMAT_JSON;
811 ExplainBeginOutput(es);
812 ExplainPrintPlan(es, queryDesc);
814 pgspExplainTriggers(es, queryDesc);
815 ExplainEndOutput(es);
817 /* Remove last line break */
818 if (es_str->len > 0 && es_str->data[es_str->len - 1] == '\n')
819 es_str->data[--es_str->len] = '\0';
821 /* JSON outmost braces. */
822 es_str->data[0] = '{';
823 es_str->data[es_str->len - 1] = '}';
826 * Make sure stats accumulation is done. (Note: it's okay if several
827 * levels of hook all do this.)
830 store_entry(es_str->data,
831 hash_query(queryDesc->sourceText),
832 queryDesc->plannedstmt->queryId,
833 queryDesc->totaltime->total * 1000.0, /* convert to msec */
834 queryDesc->estate->es_processed,
835 &queryDesc->totaltime->bufusage);
840 if (prev_ExecutorEnd)
841 prev_ExecutorEnd(queryDesc);
843 standard_ExecutorEnd(queryDesc);
847 * ProcessUtility hook
850 #if PG_VERSION_NUM >= 100000
851 pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
852 ProcessUtilityContext context, ParamListInfo params,
853 QueryEnvironment *queryEnv,
854 DestReceiver *dest, char *completionTag)
856 pgsp_ProcessUtility(Node *parsetree, const char *queryString,
857 ProcessUtilityContext context, ParamListInfo params,
858 DestReceiver *dest, char *completionTag)
861 #if PG_VERSION_NUM >= 100000
862 if (prev_ProcessUtility)
863 prev_ProcessUtility(pstmt, queryString,
864 context, params, queryEnv,
865 dest, completionTag);
867 standard_ProcessUtility(pstmt, queryString,
868 context, params, queryEnv,
869 dest, completionTag);
871 if (prev_ProcessUtility)
872 prev_ProcessUtility(parsetree, queryString,
874 dest, completionTag);
876 standard_ProcessUtility(parsetree, queryString,
878 dest, completionTag);
883 * Calculate hash value for a key
886 hash_table_fn(const void *key, Size keysize)
888 const EntryKey *k = (const EntryKey *) key;
890 /* we don't bother to include encoding in the hash */
891 return hash_uint32((uint32) k->userid) ^
892 hash_uint32((uint32) k->dbid) ^
893 hash_uint32((uint32) k->queryid) ^
894 hash_uint32((uint32) k->planid);
898 * Compare two keys - zero means match
901 match_fn(const void *key1, const void *key2, Size keysize)
903 const EntryKey *k1 = (const EntryKey *) key1;
904 const EntryKey *k2 = (const EntryKey *) key2;
906 if (k1->userid == k2->userid &&
907 k1->dbid == k2->dbid &&
908 k1->encoding == k2->encoding &&
909 k1->queryid == k2->queryid &&
910 k1->planid == k2->planid)
917 hash_query(const char* query)
921 char *normquery = pstrdup(query);
922 normalize_expr(normquery, false);
923 queryid = hash_any((const unsigned char*)normquery, strlen(normquery));
931 * Store some statistics for a plan.
934 store_entry(char *plan, uint32 queryId, uint32 queryId2,
935 double total_time, uint64 rows,
936 const BufferUsage *bufusage)
940 char *norm_query = NULL;
942 char *normalized_plan = NULL;
943 char *shorten_plan = NULL;
944 volatile StatEntry *e;
946 Assert(plan != NULL);
948 /* Safety check... */
949 if (!shared_state || !hash_table)
952 /* Set up key for hashtable search */
953 key.userid = GetUserId();
954 key.dbid = MyDatabaseId;
955 key.encoding = GetDatabaseEncoding();
956 key.queryid = queryId;
958 normalized_plan = pgsp_json_normalize(plan);
959 shorten_plan = pgsp_json_shorten(plan);
960 elog(DEBUG3, "pg_store_plans: Normalized plan: %s", normalized_plan);
961 elog(DEBUG3, "pg_store_plans: Shorten plan: %s", shorten_plan);
962 elog(DEBUG3, "pg_store_plans: Original plan: %s", plan);
963 plan_len = strlen(shorten_plan);
965 key.planid = hash_any((const unsigned char *)normalized_plan,
966 strlen(normalized_plan));
967 pfree(normalized_plan);
969 if (plan_len >= shared_state->plan_size)
970 plan_len = pg_encoding_mbcliplen(GetDatabaseEncoding(),
973 shared_state->plan_size - 1);
976 /* Look up the hash table entry with shared lock. */
977 LWLockAcquire(shared_state->lock, LW_SHARED);
979 entry = (StatEntry *) hash_search(hash_table, &key, HASH_FIND, NULL);
981 /* Create new entry, if not present */
985 * We'll need exclusive lock to make a new entry. There is no point
986 * in holding shared lock while we normalize the string, though.
988 LWLockRelease(shared_state->lock);
990 /* Acquire exclusive lock as required by entry_alloc() */
991 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
993 entry = entry_alloc(&key, "", 0, false);
996 /* Increment the counts, except when jstate is not NULL */
999 * Grab the spinlock while updating the counters (see comment about
1000 * locking rules at the head of the file)
1003 e = (volatile StatEntry *) entry;
1004 SpinLockAcquire(&e->mutex);
1006 e->queryid = queryId2;
1008 /* "Unstick" entry if it was previously sticky */
1009 if (e->counters.calls == 0)
1011 e->counters.usage = USAGE_INIT;
1012 e->counters.first_call = GetCurrentTimestamp();
1015 e->counters.calls += 1;
1016 e->counters.total_time += total_time;
1017 e->counters.rows += rows;
1018 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1019 e->counters.shared_blks_read += bufusage->shared_blks_read;
1020 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1021 e->counters.shared_blks_written += bufusage->shared_blks_written;
1022 e->counters.local_blks_hit += bufusage->local_blks_hit;
1023 e->counters.local_blks_read += bufusage->local_blks_read;
1024 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1025 e->counters.local_blks_written += bufusage->local_blks_written;
1026 e->counters.temp_blks_read += bufusage->temp_blks_read;
1027 e->counters.temp_blks_written += bufusage->temp_blks_written;
1028 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1029 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1030 e->counters.last_call = GetCurrentTimestamp();
1031 e->counters.usage += USAGE_EXEC(total_time);
1033 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1034 memcpy(entry->plan, shorten_plan, plan_len);
1035 entry->plan_len = plan_len;
1036 entry->plan[plan_len] = '\0';
1038 SpinLockRelease(&e->mutex);
1040 LWLockRelease(shared_state->lock);
1042 /* We postpone this pfree until we're out of the lock */
1048 * Reset all statement statistics.
1051 pg_store_plans_reset(PG_FUNCTION_ARGS)
1053 if (!shared_state || !hash_table)
1055 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1056 errmsg("pg_stat_plan must be loaded via shared_preload_libraries")));
1061 #define PG_STORE_PLANS_COLS 23
1064 * Retrieve statement statistics.
1067 pg_store_plans(PG_FUNCTION_ARGS)
1069 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1071 Tuplestorestate *tupstore;
1072 MemoryContext per_query_ctx;
1073 MemoryContext oldcontext;
1074 Oid userid = GetUserId();
1075 bool is_superuser = superuser();
1076 HASH_SEQ_STATUS hash_seq;
1079 if (!shared_state || !hash_table)
1081 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1082 errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1084 /* check to see if caller supports us returning a tuplestore */
1085 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1087 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1088 errmsg("set-valued function called in context that cannot accept a set")));
1089 if (!(rsinfo->allowedModes & SFRM_Materialize))
1091 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1092 errmsg("materialize mode required, but it is not " \
1093 "allowed in this context")));
1095 /* Build a tuple descriptor for our result type */
1096 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1097 elog(ERROR, "return type must be a row type");
1099 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1100 oldcontext = MemoryContextSwitchTo(per_query_ctx);
1102 tupstore = tuplestore_begin_heap(true, false, work_mem);
1103 rsinfo->returnMode = SFRM_Materialize;
1104 rsinfo->setResult = tupstore;
1105 rsinfo->setDesc = tupdesc;
1107 MemoryContextSwitchTo(oldcontext);
1109 LWLockAcquire(shared_state->lock, LW_SHARED);
1111 hash_seq_init(&hash_seq, hash_table);
1112 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1114 Datum values[PG_STORE_PLANS_COLS];
1115 bool nulls[PG_STORE_PLANS_COLS];
1117 int64 queryid = entry->key.queryid;
1118 int64 queryid_stmt = entry->queryid;
1119 int64 planid = entry->key.planid;
1122 memset(values, 0, sizeof(values));
1123 memset(nulls, 0, sizeof(nulls));
1125 values[i++] = ObjectIdGetDatum(entry->key.userid);
1126 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1127 if (is_superuser || entry->key.userid == userid)
1129 values[i++] = Int64GetDatumFast(queryid);
1130 values[i++] = Int64GetDatumFast(planid);
1131 values[i++] = Int64GetDatumFast(queryid_stmt);
1135 values[i++] = Int64GetDatumFast(0);
1136 values[i++] = Int64GetDatumFast(0);
1137 values[i++] = Int64GetDatumFast(0);
1141 if (is_superuser || entry->key.userid == userid)
1143 char *pstr = entry->plan;
1146 switch (plan_format)
1148 case PLAN_FORMAT_TEXT:
1149 pstr = pgsp_json_textize(entry->plan);
1151 case PLAN_FORMAT_JSON:
1152 pstr = pgsp_json_inflate(entry->plan);
1154 case PLAN_FORMAT_YAML:
1155 pstr = pgsp_json_yamlize(entry->plan);
1157 case PLAN_FORMAT_XML:
1158 pstr = pgsp_json_xmlize(entry->plan);
1165 pg_do_encoding_conversion((unsigned char *) pstr,
1167 entry->key.encoding,
1168 GetDatabaseEncoding());
1169 values[i++] = CStringGetTextDatum(estr);
1173 if (pstr != entry->plan)
1178 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1180 /* copy counters to a local variable to keep locking time short */
1182 volatile StatEntry *e = (volatile StatEntry *) entry;
1184 SpinLockAcquire(&e->mutex);
1186 SpinLockRelease(&e->mutex);
1189 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1193 values[i++] = Int64GetDatumFast(tmp.calls);
1194 values[i++] = Float8GetDatumFast(tmp.total_time);
1195 values[i++] = Int64GetDatumFast(tmp.rows);
1196 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1197 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1198 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1199 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1200 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1201 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1202 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1203 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1204 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1205 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1206 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1207 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1208 values[i++] = TimestampTzGetDatum(tmp.first_call);
1209 values[i++] = TimestampTzGetDatum(tmp.last_call);
1210 Assert(i == PG_STORE_PLANS_COLS);
1212 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1215 LWLockRelease(shared_state->lock);
1217 /* clean up and return the tuplestore */
1218 tuplestore_donestoring(tupstore);
1224 * Estimate shared memory space needed.
1227 shared_mem_size(void)
1232 size = MAXALIGN(sizeof(SharedState));
1233 entrysize = offsetof(StatEntry, plan) + pg_store_plan_size;
1234 size = add_size(size, hash_estimate_size(store_size, entrysize));
1240 * Allocate a new hashtable entry.
1241 * caller must hold an exclusive lock on shared_state->lock
1243 * "plan" need not be null-terminated; we rely on plan_len instead
1245 * If "sticky" is true, make the new entry artificially sticky so that it will
1246 * probably still be there when the query finishes execution. We do this by
1247 * giving it a median usage value rather than the normal value. (Strictly
1248 * speaking, query strings are normalized on a best effort basis, though it
1249 * would be difficult to demonstrate this even under artificial conditions.)
1251 * Note: despite needing exclusive lock, it's not an error for the target
1252 * entry to already exist. This is because store_entry releases and
1253 * reacquires lock after failing to find a match; so someone else could
1254 * have made the entry while we waited to get exclusive lock.
1257 entry_alloc(EntryKey *key, const char *plan, int plan_len, bool sticky)
1262 /* Make space if needed */
1263 while (hash_get_num_entries(hash_table) >= store_size)
1266 /* Find or create an entry with desired hash code */
1267 entry = (StatEntry *) hash_search(hash_table, key, HASH_ENTER, &found);
1271 /* New entry, initialize it */
1273 /* reset the statistics */
1274 memset(&entry->counters, 0, sizeof(Counters));
1275 /* set the appropriate initial usage count */
1276 entry->counters.usage = sticky ? shared_state->cur_median_usage : USAGE_INIT;
1277 /* re-initialize the mutex each time ... we assume no one using it */
1278 SpinLockInit(&entry->mutex);
1279 /* ... and don't forget the query text */
1280 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1281 entry->plan_len = plan_len;
1282 memcpy(entry->plan, plan, plan_len);
1283 entry->plan[plan_len] = '\0';
1290 * qsort comparator for sorting into increasing usage order
1293 entry_cmp(const void *lhs, const void *rhs)
1295 double l_usage = (*(StatEntry *const *) lhs)->counters.usage;
1296 double r_usage = (*(StatEntry *const *) rhs)->counters.usage;
1298 if (l_usage < r_usage)
1300 else if (l_usage > r_usage)
1307 * Deallocate least used entries.
1308 * Caller must hold an exclusive lock on shared_state->lock.
1313 HASH_SEQ_STATUS hash_seq;
1314 StatEntry **entries;
1320 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1321 * While we're scanning the table, apply the decay factor to the usage
1325 entries = palloc(hash_get_num_entries(hash_table) * sizeof(StatEntry *));
1328 hash_seq_init(&hash_seq, hash_table);
1329 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1331 entries[i++] = entry;
1332 /* "Sticky" entries get a different usage decay rate. */
1333 if (entry->counters.calls == 0)
1334 entry->counters.usage *= STICKY_DECREASE_FACTOR;
1336 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1339 qsort(entries, i, sizeof(StatEntry *), entry_cmp);
1341 /* Also, record the (approximate) median usage */
1343 shared_state->cur_median_usage = entries[i / 2]->counters.usage;
1345 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1346 nvictims = Min(nvictims, i);
1348 for (i = 0; i < nvictims; i++)
1350 hash_search(hash_table, &entries[i]->key, HASH_REMOVE, NULL);
1357 * Release all entries.
1362 HASH_SEQ_STATUS hash_seq;
1365 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1367 hash_seq_init(&hash_seq, hash_table);
1368 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1370 hash_search(hash_table, &entry->key, HASH_REMOVE, NULL);
1373 LWLockRelease(shared_state->lock);
1377 pg_store_plans_hash_query(PG_FUNCTION_ARGS)
1379 PG_RETURN_OID(hash_query(text_to_cstring(PG_GETARG_TEXT_P(0))));
1383 pg_store_plans_shorten(PG_FUNCTION_ARGS)
1385 text *short_plan = PG_GETARG_TEXT_P(0);
1386 char *cjson = text_to_cstring(short_plan);
1387 char *cshorten = pgsp_json_shorten(cjson);
1388 PG_RETURN_TEXT_P(cstring_to_text(cshorten));
1392 pg_store_plans_normalize(PG_FUNCTION_ARGS)
1394 text *short_plan = PG_GETARG_TEXT_P(0);
1395 char *cjson = text_to_cstring(short_plan);
1396 char *cnormalized = pgsp_json_normalize(cjson);
1397 PG_RETURN_TEXT_P(cstring_to_text(cnormalized));
1401 pg_store_plans_jsonplan(PG_FUNCTION_ARGS)
1403 text *short_plan = PG_GETARG_TEXT_P(0);
1404 char *cshort = text_to_cstring(short_plan);
1405 char *cinflated = pgsp_json_inflate(cshort);
1406 PG_RETURN_TEXT_P(cstring_to_text(cinflated));
1410 pg_store_plans_textplan(PG_FUNCTION_ARGS)
1412 text *short_plan = PG_GETARG_TEXT_P(0);
1413 char *cshort = text_to_cstring(short_plan);
1414 char *ctextized = pgsp_json_textize(cshort);
1416 PG_RETURN_TEXT_P(cstring_to_text(ctextized));
1420 pg_store_plans_yamlplan(PG_FUNCTION_ARGS)
1422 text *short_plan = PG_GETARG_TEXT_P(0);
1423 char *cshort = text_to_cstring(short_plan);
1424 char *cyamlized = pgsp_json_yamlize(cshort);
1426 PG_RETURN_TEXT_P(cstring_to_text(cyamlized));
1430 pg_store_plans_xmlplan(PG_FUNCTION_ARGS)
1432 text *short_plan = PG_GETARG_TEXT_P(0);
1433 char *cshort = text_to_cstring(short_plan);
1434 char *cxmlized = pgsp_json_xmlize(cshort);
1436 PG_RETURN_TEXT_P(cstring_to_text(cxmlized));