1 /*-------------------------------------------------------------------------
5 * Copyright (c) 2008-2013, PostgreSQL Global Development Group
8 * contrib/pg_store_plan/pg_store_plan.c
10 *-------------------------------------------------------------------------
17 #include "commands/explain.h"
18 #include "access/hash.h"
19 #include "executor/instrument.h"
21 #include "mb/pg_wchar.h"
22 #include "miscadmin.h"
23 #include "storage/fd.h"
24 #include "storage/ipc.h"
25 #include "storage/spin.h"
26 #include "tcop/utility.h"
27 #include "utils/builtins.h"
28 #include "utils/timestamp.h"
30 #include "pgsp_json.h"
31 #include "pgsp_explain.h"
35 /* Location of stats file */
36 #define PGSP_DUMP_FILE "global/pg_store_plans.stat"
38 /* This constant defines the magic number in the stats file header */
39 static const uint32 PGSP_FILE_HEADER = 0x20130828;
40 static const uint32 pg_store_plan_size = 5000;
42 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
43 #define USAGE_EXEC(duration) (1.0)
44 #define USAGE_INIT (1.0) /* including initial planning */
45 #define ASSUMED_MEDIAN_INIT (10.0) /* initial assumed median usage */
46 #define USAGE_DECREASE_FACTOR (0.99) /* decreased every entry_dealloc */
47 #define STICKY_DECREASE_FACTOR (0.50) /* factor for sticky entries */
48 #define USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */
51 * Hashtable key that defines the identity of a hashtable entry. We separate
52 * queries by user and by database even if they are otherwise identical.
54 * Presently, the query encoding is fully determined by the source database
55 * and so we don't really need it to be in the key. But that might not always
56 * be true. Anyway it's notationally convenient to pass it as part of the key.
58 typedef struct EntryKey
60 Oid userid; /* user OID */
61 Oid dbid; /* database OID */
62 int encoding; /* query encoding */
63 uint32 queryid; /* query identifier */
64 uint32 planid; /* plan identifier */
68 * The actual stats counters kept within StatEntry.
70 typedef struct Counters
72 int64 calls; /* # of times executed */
73 double total_time; /* total execution time, in msec */
74 int64 rows; /* total # of retrieved or affected rows */
75 int64 shared_blks_hit; /* # of shared buffer hits */
76 int64 shared_blks_read; /* # of shared disk blocks read */
77 int64 shared_blks_dirtied;/* # of shared disk blocks dirtied */
78 int64 shared_blks_written;/* # of shared disk blocks written */
79 int64 local_blks_hit; /* # of local buffer hits */
80 int64 local_blks_read; /* # of local disk blocks read */
81 int64 local_blks_dirtied; /* # of local disk blocks dirtied */
82 int64 local_blks_written; /* # of local disk blocks written */
83 int64 temp_blks_read; /* # of temp blocks read */
84 int64 temp_blks_written; /* # of temp blocks written */
85 double blk_read_time; /* time spent reading, in msec */
86 double blk_write_time; /* time spent writing, in msec */
87 TimestampTz first_call; /* timestamp of first call */
88 TimestampTz last_call; /* timestamp of last call */
89 double usage; /* usage factor */
95 * NB: see the file read/write code before changing field order here.
97 typedef struct StatEntry
99 EntryKey key; /* hash key of entry - MUST BE FIRST */
100 uint32 queryid; /* query identifier from stat_statements*/
101 Counters counters; /* the statistics for this query */
102 int plan_len; /* # of valid bytes in query string */
103 slock_t mutex; /* protects the counters only */
104 char plan[1]; /* VARIABLE LENGTH ARRAY - MUST BE LAST */
106 * Note: the allocated length of query[] is actually
107 * shared_state->query_size
112 * Global shared state
114 typedef struct SharedState
116 LWLockId lock; /* protects hashtable search/modification */
117 int plan_size; /* max query length in bytes */
118 double cur_median_usage; /* current median usage in hashtable */
121 /*---- Local variables ----*/
123 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
124 static int nested_level = 0;
126 /* Saved hook values in case of unload */
127 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
128 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
129 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
130 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
131 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
132 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
134 /* Links to shared memory state */
135 static SharedState *shared_state = NULL;
136 static HTAB *hash_table = NULL;
138 /*---- GUC variables ----*/
142 TRACK_LEVEL_NONE, /* track no statements */
143 TRACK_LEVEL_TOP, /* only top level statements */
144 TRACK_LEVEL_ALL, /* all statements, including nested ones */
145 TRACK_LEVEL_FORCE /* all statements, including nested ones */
148 static const struct config_enum_entry track_options[] =
150 {"none", TRACK_LEVEL_NONE, false},
151 {"top", TRACK_LEVEL_TOP, false},
152 {"all", TRACK_LEVEL_ALL, false},
158 PLAN_FORMAT_RAW, /* No conversion. Shorten JSON */
159 PLAN_FORMAT_TEXT, /* Traditional text representation */
160 PLAN_FORMAT_JSON, /* JSON representation */
161 PLAN_FORMAT_YAML, /* YAML */
162 PLAN_FORMAT_XML, /* XML */
165 static const struct config_enum_entry plan_formats[] =
167 {"raw" , PLAN_FORMAT_RAW , false},
168 {"text", PLAN_FORMAT_TEXT, false},
169 {"json", PLAN_FORMAT_JSON, false},
170 {"yaml", PLAN_FORMAT_YAML, false},
171 {"xml" , PLAN_FORMAT_XML , false},
175 static int store_size; /* max # statements to track */
176 static int track_level; /* tracking level */
177 static int min_duration; /* min duration to record */
178 static bool dump_on_shutdown; /* whether to save stats across shutdown */
179 static bool log_analyze; /* Similar to EXPLAIN (ANALYZE *) */
180 static bool log_verbose; /* Similar to EXPLAIN (VERBOSE *) */
181 static bool log_buffers; /* Similar to EXPLAIN (BUFFERS *) */
182 static bool log_timing; /* Similar to EXPLAIN (TIMING *) */
183 static bool log_triggers; /* whether to log trigger statistics */
184 static int plan_format; /* Plan representation style in
185 * pg_store_plans.plan */
187 #define pgsp_enabled() \
188 (track_level == TRACK_LEVEL_ALL || \
189 (track_level == TRACK_LEVEL_TOP && nested_level == 0))
191 /*---- Function declarations ----*/
196 Datum pg_store_plans_reset(PG_FUNCTION_ARGS);
197 Datum pg_store_plans_hash_query(PG_FUNCTION_ARGS);
198 Datum pg_store_plans(PG_FUNCTION_ARGS);
199 Datum pg_store_plans_shorten(PG_FUNCTION_ARGS);
200 Datum pg_store_plans_normalize(PG_FUNCTION_ARGS);
201 Datum pg_store_plans_jsonplan(PG_FUNCTION_ARGS);
202 Datum pg_store_plans_yamlplan(PG_FUNCTION_ARGS);
203 Datum pg_store_plans_xmlplan(PG_FUNCTION_ARGS);
204 Datum pg_store_plans_textplan(PG_FUNCTION_ARGS);
206 PG_FUNCTION_INFO_V1(pg_store_plans_reset);
207 PG_FUNCTION_INFO_V1(pg_store_plans_hash_query);
208 PG_FUNCTION_INFO_V1(pg_store_plans);
209 PG_FUNCTION_INFO_V1(pg_store_plans_shorten);
210 PG_FUNCTION_INFO_V1(pg_store_plans_normalize);
211 PG_FUNCTION_INFO_V1(pg_store_plans_jsonplan);
212 PG_FUNCTION_INFO_V1(pg_store_plans_textplan);
213 PG_FUNCTION_INFO_V1(pg_store_plans_yamlplan);
214 PG_FUNCTION_INFO_V1(pg_store_plans_xmlplan);
216 static void pgsp_shmem_startup(void);
217 static void pgsp_shmem_shutdown(int code, Datum arg);
218 static void pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags);
219 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
220 ScanDirection direction,
222 static void pgsp_ExecutorFinish(QueryDesc *queryDesc);
223 static void pgsp_ExecutorEnd(QueryDesc *queryDesc);
224 static void pgsp_ProcessUtility(Node *parsetree, const char *queryString,
225 ProcessUtilityContext context, ParamListInfo params,
226 DestReceiver *dest, char *completionTag);
227 static uint32 hash_table_fn(const void *key, Size keysize);
228 static int match_fn(const void *key1, const void *key2, Size keysize);
229 static uint32 hash_query(const char* query);
230 static void store_entry(char *plan, uint32 queryId, uint32 queryId2,
231 double total_time, uint64 rows,
232 const BufferUsage *bufusage);
233 static Size shared_mem_size(void);
234 static StatEntry *entry_alloc(EntryKey *key, const char *query,
235 int plan_len, bool sticky);
236 static void entry_dealloc(void);
237 static void entry_reset(void);
240 * Module load callback
246 * In order to create our shared memory area, we have to be loaded via
247 * shared_preload_libraries. If not, fall out without hooking into any of
248 * the main system. (We don't throw error here because it seems useful to
249 * allow the pg_stat_statements functions to be created even when the
250 * module isn't active. The functions must protect themselves against
251 * being called then, however.)
253 if (!process_shared_preload_libraries_in_progress)
257 * Define (or redefine) custom GUC variables.
259 DefineCustomIntVariable("pg_store_plans.max",
260 "Sets the maximum number of plans tracked by pg_store_plans.",
272 DefineCustomEnumVariable("pg_store_plans.track",
273 "Selects which plans are tracked by pg_store_plans.",
284 DefineCustomEnumVariable("pg_store_plans.plan_format",
285 "Selects which format to be appied for plan representation in pg_store_plans.",
296 DefineCustomIntVariable("pg_store_plans.min_duration",
297 "Minimum duration to record plan in milliseconds.",
309 DefineCustomBoolVariable("pg_store_plans.save",
310 "Save pg_store_plans statistics across server shutdowns.",
320 DefineCustomBoolVariable("pg_store_plans.log_analyze",
321 "Use EXPLAIN ANALYZE for plan logging.",
331 DefineCustomBoolVariable("pg_store_plans.log_buffers",
342 DefineCustomBoolVariable("pg_store_plans.log_timing",
353 DefineCustomBoolVariable("pg_store_plans.log_triggers",
354 "Log trigger trace.",
364 DefineCustomBoolVariable("pg_store_plans.log_verbose",
365 "Set VERBOSE for EXPLAIN on logging.",
375 EmitWarningsOnPlaceholders("pg_store_plans");
378 * Request additional shared resources. (These are no-ops if we're not in
379 * the postmaster process.) We'll allocate or attach to the shared
380 * resources in pgsp_shmem_startup().
382 RequestAddinShmemSpace(shared_mem_size());
383 RequestAddinLWLocks(1);
388 prev_shmem_startup_hook = shmem_startup_hook;
389 shmem_startup_hook = pgsp_shmem_startup;
390 prev_ExecutorStart = ExecutorStart_hook;
391 ExecutorStart_hook = pgsp_ExecutorStart;
392 prev_ExecutorRun = ExecutorRun_hook;
393 ExecutorRun_hook = pgsp_ExecutorRun;
394 prev_ExecutorFinish = ExecutorFinish_hook;
395 ExecutorFinish_hook = pgsp_ExecutorFinish;
396 prev_ExecutorEnd = ExecutorEnd_hook;
397 ExecutorEnd_hook = pgsp_ExecutorEnd;
398 prev_ProcessUtility = ProcessUtility_hook;
399 ProcessUtility_hook = pgsp_ProcessUtility;
403 * Module unload callback
408 /* Uninstall hooks. */
409 shmem_startup_hook = prev_shmem_startup_hook;
410 ExecutorStart_hook = prev_ExecutorStart;
411 ExecutorRun_hook = prev_ExecutorRun;
412 ExecutorFinish_hook = prev_ExecutorFinish;
413 ExecutorEnd_hook = prev_ExecutorEnd;
414 ProcessUtility_hook = prev_ProcessUtility;
418 * shmem_startup hook: allocate or attach to shared memory,
419 * then load any pre-existing statistics from file.
422 pgsp_shmem_startup(void)
434 if (prev_shmem_startup_hook)
435 prev_shmem_startup_hook();
437 /* reset in case this is a restart within the postmaster */
442 * Create or attach to the shared memory state, including hash table
444 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
446 shared_state = ShmemInitStruct("pg_store_plans",
452 /* First time through ... */
453 shared_state->lock = LWLockAssign();
454 shared_state->plan_size = pg_store_plan_size;
455 shared_state->cur_median_usage = ASSUMED_MEDIAN_INIT;
458 /* Be sure everyone agrees on the hash table entry size */
459 plan_size = shared_state->plan_size;
461 memset(&info, 0, sizeof(info));
462 info.keysize = sizeof(EntryKey);
463 info.entrysize = offsetof(StatEntry, plan) + plan_size;
464 info.hash = hash_table_fn;
465 info.match = match_fn;
466 hash_table = ShmemInitHash("pg_store_plans hash",
467 store_size, store_size,
469 HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
471 LWLockRelease(AddinShmemInitLock);
474 * If we're in the postmaster (or a standalone backend...), set up a shmem
475 * exit hook to dump the statistics to disk.
477 if (!IsUnderPostmaster)
478 on_shmem_exit(pgsp_shmem_shutdown, (Datum) 0);
481 * Attempt to load old statistics from the dump file, if this is the first
482 * time through and we weren't told not to.
484 if (found || !dump_on_shutdown)
488 * Note: we don't bother with locks here, because there should be no other
489 * processes running when this code is reached.
491 file = AllocateFile(PGSP_DUMP_FILE, PG_BINARY_R);
495 return; /* ignore not-found error */
499 buffer_size = plan_size;
500 buffer = (char *) palloc(buffer_size);
502 if (fread(&header, sizeof(uint32), 1, file) != 1 ||
503 header != PGSP_FILE_HEADER ||
504 fread(&num, sizeof(int32), 1, file) != 1)
507 for (i = 0; i < num; i++)
512 if (fread(&temp, offsetof(StatEntry, mutex), 1, file) != 1)
515 /* Encoding is the only field we can easily sanity-check */
516 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
519 /* Previous incarnation might have had a larger plan_size */
520 if (temp.plan_len >= buffer_size)
522 buffer = (char *) repalloc(buffer, temp.plan_len + 1);
523 buffer_size = temp.plan_len + 1;
526 if (fread(buffer, 1, temp.plan_len, file) != temp.plan_len)
528 buffer[temp.plan_len] = '\0';
530 /* Skip loading "sticky" entries */
531 if (temp.counters.calls == 0)
534 /* Clip to available length if needed */
535 if (temp.plan_len >= plan_size)
536 temp.plan_len = pg_encoding_mbcliplen(temp.key.encoding,
541 /* make the hashtable entry (discards old entries if too many) */
542 entry = entry_alloc(&temp.key, buffer, temp.plan_len, false);
544 /* copy in the actual stats */
545 entry->counters = temp.counters;
552 * Remove the file so it's not included in backups/replication slaves,
553 * etc. A new file will be written on next shutdown.
555 unlink(PGSP_DUMP_FILE);
561 (errcode_for_file_access(),
562 errmsg("could not read pg_stat_statement file \"%s\": %m",
568 /* If possible, throw away the bogus file; ignore any error */
569 unlink(PGSP_DUMP_FILE);
573 * shmem_shutdown hook: Dump statistics into file.
575 * Note: we don't bother with acquiring lock, because there should be no
576 * other processes running when this is called.
579 pgsp_shmem_shutdown(int code, Datum arg)
582 HASH_SEQ_STATUS hash_seq;
586 /* Don't try to dump during a crash. */
590 /* Safety check ... shouldn't get here unless shmem is set up. */
591 if (!shared_state || !hash_table)
594 /* Don't dump if told not to. */
595 if (!dump_on_shutdown)
598 file = AllocateFile(PGSP_DUMP_FILE ".tmp", PG_BINARY_W);
602 if (fwrite(&PGSP_FILE_HEADER, sizeof(uint32), 1, file) != 1)
604 num_entries = hash_get_num_entries(hash_table);
605 if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
608 hash_seq_init(&hash_seq, hash_table);
609 while ((entry = hash_seq_search(&hash_seq)) != NULL)
611 int len = entry->plan_len;
613 if (fwrite(entry, offsetof(StatEntry, mutex), 1, file) != 1 ||
614 fwrite(entry->plan, 1, len, file) != len)
625 * Rename file into place, so we atomically replace the old one.
627 if (rename(PGSP_DUMP_FILE ".tmp", PGSP_DUMP_FILE) != 0)
629 (errcode_for_file_access(),
630 errmsg("could not rename pg_store_plans file \"%s\": %m",
631 PGSP_DUMP_FILE ".tmp")));
637 (errcode_for_file_access(),
638 errmsg("could not write pg_store_plans file \"%s\": %m",
639 PGSP_DUMP_FILE ".tmp")));
642 unlink(PGSP_DUMP_FILE ".tmp");
647 * ExecutorStart hook: start up tracking if needed
650 pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags)
653 (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
655 queryDesc->instrument_options |=
656 (log_timing ? INSTRUMENT_TIMER : 0)|
657 (log_timing ? 0: INSTRUMENT_ROWS)|
658 (log_buffers ? INSTRUMENT_BUFFERS : 0);
660 if (prev_ExecutorStart)
661 prev_ExecutorStart(queryDesc, eflags);
663 standard_ExecutorStart(queryDesc, eflags);
666 * Set up to track total elapsed time in ExecutorRun. Allocate in per-query
667 * context so as to be free at ExecutorEnd.
669 if (queryDesc->totaltime == NULL && pgsp_enabled())
671 MemoryContext oldcxt;
673 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
674 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
675 MemoryContextSwitchTo(oldcxt);
681 * ExecutorRun hook: all we need do is track nesting depth
684 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
689 if (prev_ExecutorRun)
690 prev_ExecutorRun(queryDesc, direction, count);
692 standard_ExecutorRun(queryDesc, direction, count);
704 * ExecutorFinish hook: all we need do is track nesting depth
707 pgsp_ExecutorFinish(QueryDesc *queryDesc)
712 if (prev_ExecutorFinish)
713 prev_ExecutorFinish(queryDesc);
715 standard_ExecutorFinish(queryDesc);
727 * ExecutorEnd hook: store results if needed
730 pgsp_ExecutorEnd(QueryDesc *queryDesc)
732 if (queryDesc->totaltime)
734 InstrEndLoop(queryDesc->totaltime);
736 if (pgsp_enabled() &&
737 queryDesc->totaltime->total >=
738 (double)min_duration / 1000.0)
742 ExplainInitState(&es);
743 es.analyze = queryDesc->instrument_options;
745 es.verbose = log_verbose;
746 es.buffers = (es.analyze && log_buffers);
747 es.timing = (es.analyze && log_timing);
748 es.format = EXPLAIN_FORMAT_JSON;
750 ExplainBeginOutput(&es);
751 ExplainPrintPlan(&es, queryDesc);
753 pgspExplainTriggers(&es, queryDesc);
754 ExplainEndOutput(&es);
756 /* Remove last line break */
757 if (es.str->len > 0 && es.str->data[es.str->len - 1] == '\n')
758 es.str->data[--es.str->len] = '\0';
760 /* JSON outmost braces. */
761 es.str->data[0] = '{';
762 es.str->data[es.str->len - 1] = '}';
765 * Make sure stats accumulation is done. (Note: it's okay if several
766 * levels of hook all do this.)
769 store_entry(es.str->data,
770 hash_query(queryDesc->sourceText),
771 queryDesc->plannedstmt->queryId,
772 queryDesc->totaltime->total * 1000.0, /* convert to msec */
773 queryDesc->estate->es_processed,
774 &queryDesc->totaltime->bufusage);
779 if (prev_ExecutorEnd)
780 prev_ExecutorEnd(queryDesc);
782 standard_ExecutorEnd(queryDesc);
786 * ProcessUtility hook
789 pgsp_ProcessUtility(Node *parsetree, const char *queryString,
790 ProcessUtilityContext context, ParamListInfo params,
791 DestReceiver *dest, char *completionTag)
793 if (prev_ProcessUtility)
794 prev_ProcessUtility(parsetree, queryString,
796 dest, completionTag);
798 standard_ProcessUtility(parsetree, queryString,
800 dest, completionTag);
804 * Calculate hash value for a key
807 hash_table_fn(const void *key, Size keysize)
809 const EntryKey *k = (const EntryKey *) key;
811 /* we don't bother to include encoding in the hash */
812 return hash_uint32((uint32) k->userid) ^
813 hash_uint32((uint32) k->dbid) ^
814 hash_uint32((uint32) k->queryid) ^
815 hash_uint32((uint32) k->planid);
819 * Compare two keys - zero means match
822 match_fn(const void *key1, const void *key2, Size keysize)
824 const EntryKey *k1 = (const EntryKey *) key1;
825 const EntryKey *k2 = (const EntryKey *) key2;
827 if (k1->userid == k2->userid &&
828 k1->dbid == k2->dbid &&
829 k1->encoding == k2->encoding &&
830 k1->queryid == k2->queryid &&
831 k1->planid == k2->planid)
838 hash_query(const char* query)
842 char *normquery = pstrdup(query);
843 normalize_expr(normquery, false);
844 queryid = hash_any((const unsigned char*)normquery, strlen(normquery));
852 * Store some statistics for a statement.
854 * If jstate is not NULL then we're trying to create an entry for which
855 * we have no statistics as yet; we just want to record the normalized
856 * query string. total_time, rows, bufusage are ignored in this case.
859 store_entry(char *plan, uint32 queryId, uint32 queryId2,
860 double total_time, uint64 rows,
861 const BufferUsage *bufusage)
865 char *norm_query = NULL;
867 char *normalized_plan = NULL;
868 char *shorten_plan = NULL;
869 volatile StatEntry *e;
871 Assert(plan != NULL);
873 /* Safety check... */
874 if (!shared_state || !hash_table)
877 /* Set up key for hashtable search */
878 key.userid = GetUserId();
879 key.dbid = MyDatabaseId;
880 key.encoding = GetDatabaseEncoding();
881 key.queryid = queryId;
883 normalized_plan = pgsp_json_normalize(plan);
884 shorten_plan = pgsp_json_shorten(plan);
885 //elog(LOG, "Normalized: %s", normalized_plan);
886 //elog(LOG, "Shorten: %s", shorten_plan);
887 //elog(LOG, "Original: %s", plan);
888 plan_len = strlen(shorten_plan);
890 key.planid = hash_any((const unsigned char *)normalized_plan,
891 strlen(normalized_plan));
892 pfree(normalized_plan);
894 if (plan_len >= shared_state->plan_size)
895 plan_len = pg_encoding_mbcliplen(GetDatabaseEncoding(),
898 shared_state->plan_size - 1);
901 /* Lookup the hash table entry with shared lock. */
902 LWLockAcquire(shared_state->lock, LW_SHARED);
904 entry = (StatEntry *) hash_search(hash_table, &key, HASH_FIND, NULL);
906 /* Create new entry, if not present */
910 * We'll need exclusive lock to make a new entry. There is no point
911 * in holding shared lock while we normalize the string, though.
913 LWLockRelease(shared_state->lock);
915 /* Acquire exclusive lock as required by entry_alloc() */
916 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
918 entry = entry_alloc(&key, "", 0, false);
921 /* Increment the counts, except when jstate is not NULL */
923 * Grab the spinlock while updating the counters (see comment about
924 * locking rules at the head of the file)
927 e = (volatile StatEntry *) entry;
928 SpinLockAcquire(&e->mutex);
930 e->queryid = queryId2;
932 /* "Unstick" entry if it was previously sticky */
933 if (e->counters.calls == 0)
935 e->counters.usage = USAGE_INIT;
936 e->counters.first_call = GetCurrentTimestamp();
939 e->counters.calls += 1;
940 e->counters.total_time += total_time;
941 e->counters.rows += rows;
942 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
943 e->counters.shared_blks_read += bufusage->shared_blks_read;
944 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
945 e->counters.shared_blks_written += bufusage->shared_blks_written;
946 e->counters.local_blks_hit += bufusage->local_blks_hit;
947 e->counters.local_blks_read += bufusage->local_blks_read;
948 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
949 e->counters.local_blks_written += bufusage->local_blks_written;
950 e->counters.temp_blks_read += bufusage->temp_blks_read;
951 e->counters.temp_blks_written += bufusage->temp_blks_written;
952 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
953 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
954 e->counters.last_call = GetCurrentTimestamp();
955 e->counters.usage += USAGE_EXEC(total_time);
957 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
958 memcpy(entry->plan, shorten_plan, plan_len);
959 entry->plan_len = plan_len;
960 entry->plan[plan_len] = '\0';
962 SpinLockRelease(&e->mutex);
964 LWLockRelease(shared_state->lock);
966 /* We postpone this pfree until we're out of the lock */
972 * Reset all statement statistics.
975 pg_store_plans_reset(PG_FUNCTION_ARGS)
977 if (!shared_state || !hash_table)
979 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
980 errmsg("pg_stat_plan must be loaded via shared_preload_libraries")));
985 #define PG_STORE_PLANS_COLS 23
988 * Retrieve statement statistics.
991 pg_store_plans(PG_FUNCTION_ARGS)
993 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
995 Tuplestorestate *tupstore;
996 MemoryContext per_query_ctx;
997 MemoryContext oldcontext;
998 Oid userid = GetUserId();
999 bool is_superuser = superuser();
1000 HASH_SEQ_STATUS hash_seq;
1003 if (!shared_state || !hash_table)
1005 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1006 errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1008 /* check to see if caller supports us returning a tuplestore */
1009 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1011 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1012 errmsg("set-valued function called in context that cannot accept a set")));
1013 if (!(rsinfo->allowedModes & SFRM_Materialize))
1015 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1016 errmsg("materialize mode required, but it is not " \
1017 "allowed in this context")));
1019 /* Build a tuple descriptor for our result type */
1020 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1021 elog(ERROR, "return type must be a row type");
1023 per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1024 oldcontext = MemoryContextSwitchTo(per_query_ctx);
1026 tupstore = tuplestore_begin_heap(true, false, work_mem);
1027 rsinfo->returnMode = SFRM_Materialize;
1028 rsinfo->setResult = tupstore;
1029 rsinfo->setDesc = tupdesc;
1031 MemoryContextSwitchTo(oldcontext);
1033 LWLockAcquire(shared_state->lock, LW_SHARED);
1035 hash_seq_init(&hash_seq, hash_table);
1036 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1038 Datum values[PG_STORE_PLANS_COLS];
1039 bool nulls[PG_STORE_PLANS_COLS];
1041 int64 queryid = entry->key.queryid;
1042 int64 queryid_stmt = entry->queryid;
1043 int64 planid = entry->key.planid;
1046 memset(values, 0, sizeof(values));
1047 memset(nulls, 0, sizeof(nulls));
1049 values[i++] = ObjectIdGetDatum(entry->key.userid);
1050 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1051 if (is_superuser || entry->key.userid == userid)
1053 values[i++] = Int64GetDatumFast(queryid);
1054 values[i++] = Int64GetDatumFast(planid);
1055 values[i++] = Int64GetDatumFast(queryid_stmt);
1059 values[i++] = Int64GetDatumFast(0);
1060 values[i++] = Int64GetDatumFast(0);
1061 values[i++] = Int64GetDatumFast(0);
1065 if (is_superuser || entry->key.userid == userid)
1067 char *pstr = entry->plan;
1070 switch (plan_format)
1072 case PLAN_FORMAT_TEXT:
1073 pstr = pgsp_json_textize(entry->plan);
1075 case PLAN_FORMAT_JSON:
1076 pstr = pgsp_json_inflate(entry->plan);
1078 case PLAN_FORMAT_YAML:
1079 pstr = pgsp_json_yamlize(entry->plan);
1081 case PLAN_FORMAT_XML:
1082 pstr = pgsp_json_xmlize(entry->plan);
1089 pg_do_encoding_conversion((unsigned char *) pstr,
1091 entry->key.encoding,
1092 GetDatabaseEncoding());
1093 values[i++] = CStringGetTextDatum(estr);
1097 if (pstr != entry->plan)
1102 values[i++] = CStringGetTextDatum("<insufficient privilege>");
1104 /* copy counters to a local variable to keep locking time short */
1106 volatile StatEntry *e = (volatile StatEntry *) entry;
1108 SpinLockAcquire(&e->mutex);
1110 SpinLockRelease(&e->mutex);
1113 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1117 values[i++] = Int64GetDatumFast(tmp.calls);
1118 values[i++] = Float8GetDatumFast(tmp.total_time);
1119 values[i++] = Int64GetDatumFast(tmp.rows);
1120 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1121 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1122 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1123 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1124 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1125 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1126 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1127 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1128 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1129 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1130 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1131 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1132 values[i++] = TimestampTzGetDatum(tmp.first_call);
1133 values[i++] = TimestampTzGetDatum(tmp.last_call);
1134 Assert(i == PG_STORE_PLANS_COLS);
1136 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1139 LWLockRelease(shared_state->lock);
1141 /* clean up and return the tuplestore */
1142 tuplestore_donestoring(tupstore);
1148 * Estimate shared memory space needed.
1151 shared_mem_size(void)
1156 size = MAXALIGN(sizeof(SharedState));
1157 entrysize = offsetof(StatEntry, plan) + pg_store_plan_size;
1158 size = add_size(size, hash_estimate_size(store_size, entrysize));
1164 * Allocate a new hashtable entry.
1165 * caller must hold an exclusive lock on shared_state->lock
1167 * "query" need not be null-terminated; we rely on plan_len instead
1169 * If "sticky" is true, make the new entry artificially sticky so that it will
1170 * probably still be there when the query finishes execution. We do this by
1171 * giving it a median usage value rather than the normal value. (Strictly
1172 * speaking, query strings are normalized on a best effort basis, though it
1173 * would be difficult to demonstrate this even under artificial conditions.)
1175 * Note: despite needing exclusive lock, it's not an error for the target
1176 * entry to already exist. This is because store_entry releases and
1177 * reacquires lock after failing to find a match; so someone else could
1178 * have made the entry while we waited to get exclusive lock.
1181 entry_alloc(EntryKey *key, const char *plan, int plan_len, bool sticky)
1186 /* Make space if needed */
1187 while (hash_get_num_entries(hash_table) >= store_size)
1190 /* Find or create an entry with desired hash code */
1191 entry = (StatEntry *) hash_search(hash_table, key, HASH_ENTER, &found);
1195 /* New entry, initialize it */
1197 /* reset the statistics */
1198 memset(&entry->counters, 0, sizeof(Counters));
1199 /* set the appropriate initial usage count */
1200 entry->counters.usage = sticky ? shared_state->cur_median_usage : USAGE_INIT;
1201 /* re-initialize the mutex each time ... we assume no one using it */
1202 SpinLockInit(&entry->mutex);
1203 /* ... and don't forget the query text */
1204 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1205 entry->plan_len = plan_len;
1206 memcpy(entry->plan, plan, plan_len);
1207 entry->plan[plan_len] = '\0';
1214 * qsort comparator for sorting into increasing usage order
1217 entry_cmp(const void *lhs, const void *rhs)
1219 double l_usage = (*(StatEntry *const *) lhs)->counters.usage;
1220 double r_usage = (*(StatEntry *const *) rhs)->counters.usage;
1222 if (l_usage < r_usage)
1224 else if (l_usage > r_usage)
1231 * Deallocate least used entries.
1232 * Caller must hold an exclusive lock on shared_state->lock.
1237 HASH_SEQ_STATUS hash_seq;
1238 StatEntry **entries;
1244 * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1245 * While we're scanning the table, apply the decay factor to the usage
1249 entries = palloc(hash_get_num_entries(hash_table) * sizeof(StatEntry *));
1252 hash_seq_init(&hash_seq, hash_table);
1253 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1255 entries[i++] = entry;
1256 /* "Sticky" entries get a different usage decay rate. */
1257 if (entry->counters.calls == 0)
1258 entry->counters.usage *= STICKY_DECREASE_FACTOR;
1260 entry->counters.usage *= USAGE_DECREASE_FACTOR;
1263 qsort(entries, i, sizeof(StatEntry *), entry_cmp);
1265 /* Also, record the (approximate) median usage */
1267 shared_state->cur_median_usage = entries[i / 2]->counters.usage;
1269 nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1270 nvictims = Min(nvictims, i);
1272 for (i = 0; i < nvictims; i++)
1274 hash_search(hash_table, &entries[i]->key, HASH_REMOVE, NULL);
1281 * Release all entries.
1286 HASH_SEQ_STATUS hash_seq;
1289 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1291 hash_seq_init(&hash_seq, hash_table);
1292 while ((entry = hash_seq_search(&hash_seq)) != NULL)
1294 hash_search(hash_table, &entry->key, HASH_REMOVE, NULL);
1297 LWLockRelease(shared_state->lock);
1301 pg_store_plans_hash_query(PG_FUNCTION_ARGS)
1303 PG_RETURN_OID(hash_query(text_to_cstring(PG_GETARG_TEXT_P(0))));
1307 pg_store_plans_shorten(PG_FUNCTION_ARGS)
1309 text *short_plan = PG_GETARG_TEXT_P(0);
1310 char *cjson = text_to_cstring(short_plan);
1311 char *cshorten = pgsp_json_shorten(cjson);
1312 PG_RETURN_TEXT_P(cstring_to_text(cshorten));
1316 pg_store_plans_normalize(PG_FUNCTION_ARGS)
1318 text *short_plan = PG_GETARG_TEXT_P(0);
1319 char *cjson = text_to_cstring(short_plan);
1320 char *cnormalized = pgsp_json_normalize(cjson);
1321 PG_RETURN_TEXT_P(cstring_to_text(cnormalized));
1325 pg_store_plans_jsonplan(PG_FUNCTION_ARGS)
1327 text *short_plan = PG_GETARG_TEXT_P(0);
1328 char *cshort = text_to_cstring(short_plan);
1329 char *cinflated = pgsp_json_inflate(cshort);
1330 PG_RETURN_TEXT_P(cstring_to_text(cinflated));
1334 pg_store_plans_textplan(PG_FUNCTION_ARGS)
1336 text *short_plan = PG_GETARG_TEXT_P(0);
1337 char *cshort = text_to_cstring(short_plan);
1338 char *ctextized = pgsp_json_textize(cshort);
1340 PG_RETURN_TEXT_P(cstring_to_text(ctextized));
1344 pg_store_plans_yamlplan(PG_FUNCTION_ARGS)
1346 text *short_plan = PG_GETARG_TEXT_P(0);
1347 char *cshort = text_to_cstring(short_plan);
1348 char *cyamlized = pgsp_json_yamlize(cshort);
1350 PG_RETURN_TEXT_P(cstring_to_text(cyamlized));
1354 pg_store_plans_xmlplan(PG_FUNCTION_ARGS)
1356 text *short_plan = PG_GETARG_TEXT_P(0);
1357 char *cshort = text_to_cstring(short_plan);
1358 char *cxmlized = pgsp_json_xmlize(cshort);
1360 PG_RETURN_TEXT_P(cstring_to_text(cxmlized));