OSDN Git Service

Fix a bug of plan string inflation
[pgstoreplans/pg_store_plans.git] / pg_store_plans.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_store_plans.c
4  *              Take statistics of plan selection across a whole database cluster.
5  *
6  * Execution costs are totaled for each distinct plan for each query,
7  * and plan and queryid are kept in a shared hashtable, each record in
8  * which is associated with a record in pg_stat_statements, if any, by
9  * the queryid.
10  *
11  * For Postgres 9.3 or earlier does not expose query id so
12  * pg_store_plans needs to calculate it based on the given query
13  * string using different algorithm from pg_stat_statements, and later
14  * the id will be matched against the one made from query string
15  * stored in pg_stat_statements. For the reason, queryid matching in
16  * this way will fail if the query string kept in pg_stat_statements
17  * is truncated in the middle.
18  *
19  * Plans are identified by fingerprinting plan representations in
20  * "shortened" JSON format with constants and unstable values such as
21  * rows, width, loops ignored. Nevertheless, stored plan entries hold
22  * them of the latest execution. Entry eviction is done in the same
23  * way to pg_stat_statements.
24  *
25  * Copyright (c) 2008-2020, PostgreSQL Global Development Group
26  * Copyright (c) 2012-2020, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
27  *
28  * IDENTIFICATION
29  *        pg_store_plans/pg_store_plans.c
30  *
31  *-------------------------------------------------------------------------
32  */
33 #include "postgres.h"
34
35 #include <unistd.h>
36 #include <dlfcn.h>
37
38 #include "commands/explain.h"
39 #include "access/hash.h"
40 #include "executor/instrument.h"
41 #include "funcapi.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "storage/fd.h"
45 #include "storage/ipc.h"
46 #include "storage/lwlock.h"
47 #include "storage/spin.h"
48 #include "storage/shmem.h"
49 #include "tcop/utility.h"
50 #include "utils/builtins.h"
51 #include "utils/timestamp.h"
52
53 #include "pgsp_json.h"
54 #include "pgsp_explain.h"
55
56 PG_MODULE_MAGIC;
57
58 /* Location of stats file */
59 #define PGSP_DUMP_FILE  "global/pg_store_plans.stat"
60
61 /* This constant defines the magic number in the stats file header */
62 static const uint32 PGSP_FILE_HEADER = 0x20180613;
63 static const uint32 store_plan_size = 5000;
64
65 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
66 #define USAGE_EXEC(duration)    (1.0)
67 #define USAGE_INIT                              (1.0)   /* including initial planning */
68 #define ASSUMED_MEDIAN_INIT             (10.0)  /* initial assumed median usage */
69 #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
70 #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
71 #define USAGE_DEALLOC_PERCENT   5               /* free this % of entries at once */
72
73 /*
74  * Hashtable key that defines the identity of a hashtable entry.  We separate
75  * queries by user and by database even if they are otherwise identical.
76  *
77  * Presently, the query encoding is fully determined by the source database
78  * and so we don't really need it to be in the key.  But that might not always
79  * be true. Anyway it's notationally convenient to pass it as part of the key.
80  */
81 typedef struct EntryKey
82 {
83         Oid                     userid;                 /* user OID */
84         Oid                     dbid;                   /* database OID */
85         int                     encoding;               /* query encoding */
86         uint32          queryid;                /* internal query identifier */
87         uint32          planid;                 /* plan identifier */
88 } EntryKey;
89
90 /*
91  * The actual stats counters kept within StatEntry.
92  */
93 typedef struct Counters
94 {
95         int64           calls;                          /* # of times executed */
96         double          total_time;                     /* total execution time, in msec */
97         int64           rows;                           /* total # of retrieved or affected rows */
98         int64           shared_blks_hit;        /* # of shared buffer hits */
99         int64           shared_blks_read;       /* # of shared disk blocks read */
100         int64           shared_blks_dirtied;/* # of shared disk blocks dirtied */
101         int64           shared_blks_written;/* # of shared disk blocks written */
102         int64           local_blks_hit;         /* # of local buffer hits */
103         int64           local_blks_read;        /* # of local disk blocks read */
104         int64           local_blks_dirtied;     /* # of local disk blocks dirtied */
105         int64           local_blks_written;     /* # of local disk blocks written */
106         int64           temp_blks_read;         /* # of temp blocks read */
107         int64           temp_blks_written;      /* # of temp blocks written */
108         double          blk_read_time;          /* time spent reading, in msec */
109         double          blk_write_time;         /* time spent writing, in msec */
110         TimestampTz     first_call;                     /* timestamp of first call  */
111         TimestampTz     last_call;                      /* timestamp of last call  */
112         double          usage;                          /* usage factor */
113 } Counters;
114
115 /*
116  * The type of queryId has been widen as of PG11. Define substitute type rather
117  * than put #if here and there.
118  */
119 #if PG_VERSION_NUM >= 110000
120 typedef uint64 queryid_t;
121 #else
122 typedef uint32 queryid_t;
123 #endif
124
125 /*
126  * Statistics per plan
127  *
128  * NB: see the file read/write code before changing field order here.
129  */
130 typedef struct StatEntry
131 {
132         EntryKey        key;                    /* hash key of entry - MUST BE FIRST */
133         queryid_t       queryid;                /* query identifier from stat_statements*/
134         Counters        counters;               /* the statistics for this query */
135         int                     plan_len;               /* # of valid bytes in query string */
136         slock_t         mutex;                  /* protects the counters only */
137         char            plan[1];                /* VARIABLE LENGTH ARRAY - MUST BE LAST */
138         /*
139          * Note: the allocated length of query[] is actually
140          * shared_state->query_size
141          */
142 } StatEntry;
143
144 /*
145  * Global shared state
146  */
147 typedef struct SharedState
148 {
149         LWLockId        lock;                   /* protects hashtable search/modification */
150         int                     plan_size;              /* max query length in bytes */
151         double          cur_median_usage;       /* current median usage in hashtable */
152 } SharedState;
153
154 /*---- Local variables ----*/
155
156 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
157 static int      nested_level = 0;
158
159 /* Saved hook values in case of unload */
160 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
161 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
162 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
163 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
164 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
165 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
166
167 /* Links to shared memory state */
168 static SharedState *shared_state = NULL;
169 static HTAB *hash_table = NULL;
170
171 /*---- GUC variables ----*/
172
173 typedef enum
174 {
175         TRACK_LEVEL_NONE,                       /* track no statements */
176         TRACK_LEVEL_TOP,                                /* only top level statements */
177         TRACK_LEVEL_ALL,                                /* all statements, including nested ones */
178         TRACK_LEVEL_FORCE                       /* all statements, including nested ones */
179 }       PGSPTrackLevel;
180
181 static const struct config_enum_entry track_options[] =
182 {
183         {"none", TRACK_LEVEL_NONE, false},
184         {"top", TRACK_LEVEL_TOP, false},
185         {"all", TRACK_LEVEL_ALL, false},
186         {NULL, 0, false}
187 };
188
189 typedef enum
190 {
191         PLAN_FORMAT_RAW,                /* No conversion. Shorten JSON */
192         PLAN_FORMAT_TEXT,               /* Traditional text representation */
193         PLAN_FORMAT_JSON,               /* JSON representation */
194         PLAN_FORMAT_YAML,               /* YAML */
195         PLAN_FORMAT_XML,                /* XML  */
196 }       PGSPPlanFormats;
197
198 static const struct config_enum_entry plan_formats[] =
199 {
200         {"raw" , PLAN_FORMAT_RAW , false},
201         {"text", PLAN_FORMAT_TEXT, false},
202         {"json", PLAN_FORMAT_JSON, false},
203         {"yaml", PLAN_FORMAT_YAML, false},
204         {"xml" , PLAN_FORMAT_XML , false},
205         {NULL, 0, false}
206 };
207
208 static int      store_size;                     /* max # statements to track */
209 static int      track_level;            /* tracking level */
210 static int      min_duration;           /* min duration to record */
211 static bool dump_on_shutdown;   /* whether to save stats across shutdown */
212 static bool log_analyze;                /* Similar to EXPLAIN (ANALYZE *) */
213 static bool log_verbose;                /* Similar to EXPLAIN (VERBOSE *) */
214 static bool log_buffers;                /* Similar to EXPLAIN (BUFFERS *) */
215 static bool log_timing;                 /* Similar to EXPLAIN (TIMING *) */
216 static bool log_triggers;               /* whether to log trigger statistics  */
217 static int  plan_format;        /* Plan representation style in
218                                                                  * pg_store_plans.plan  */
219
220 #define pgsp_enabled() \
221         (track_level == TRACK_LEVEL_ALL || \
222         (track_level == TRACK_LEVEL_TOP && nested_level == 0))
223
224 /*---- Function declarations ----*/
225
226 void            _PG_init(void);
227 void            _PG_fini(void);
228
229 Datum           pg_store_plans_reset(PG_FUNCTION_ARGS);
230 Datum           pg_store_plans_hash_query(PG_FUNCTION_ARGS);
231 Datum           pg_store_plans(PG_FUNCTION_ARGS);
232 Datum           pg_store_plans_shorten(PG_FUNCTION_ARGS);
233 Datum           pg_store_plans_normalize(PG_FUNCTION_ARGS);
234 Datum           pg_store_plans_jsonplan(PG_FUNCTION_ARGS);
235 Datum           pg_store_plans_yamlplan(PG_FUNCTION_ARGS);
236 Datum           pg_store_plans_xmlplan(PG_FUNCTION_ARGS);
237 Datum           pg_store_plans_textplan(PG_FUNCTION_ARGS);
238
239 PG_FUNCTION_INFO_V1(pg_store_plans_reset);
240 PG_FUNCTION_INFO_V1(pg_store_plans_hash_query);
241 PG_FUNCTION_INFO_V1(pg_store_plans);
242 PG_FUNCTION_INFO_V1(pg_store_plans_shorten);
243 PG_FUNCTION_INFO_V1(pg_store_plans_normalize);
244 PG_FUNCTION_INFO_V1(pg_store_plans_jsonplan);
245 PG_FUNCTION_INFO_V1(pg_store_plans_textplan);
246 PG_FUNCTION_INFO_V1(pg_store_plans_yamlplan);
247 PG_FUNCTION_INFO_V1(pg_store_plans_xmlplan);
248
249 static void pgsp_shmem_startup(void);
250 static void pgsp_shmem_shutdown(int code, Datum arg);
251 static void pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags);
252 #if PG_VERSION_NUM >= 100000
253 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
254                                  ScanDirection direction,
255                                                          uint64 count, bool execute_once);
256 #elif PG_VERSION_NUM >= 90600
257 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
258                                  ScanDirection direction,
259                                  uint64 count);
260 #else
261 static void pgsp_ExecutorRun(QueryDesc *queryDesc,
262                                  ScanDirection direction,
263                                  long count);
264 #endif
265 static void pgsp_ExecutorFinish(QueryDesc *queryDesc);
266 static void pgsp_ExecutorEnd(QueryDesc *queryDesc);
267 #if PG_VERSION_NUM >= 100000
268 static void pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
269                                         ProcessUtilityContext context, ParamListInfo params,
270                                         QueryEnvironment *queryEnv,
271                                         DestReceiver *dest, char *completionTag);
272 #else
273 static void pgsp_ProcessUtility(Node *parsetree, const char *queryString,
274                                         ProcessUtilityContext context, ParamListInfo params,
275                                         DestReceiver *dest, char *completionTag);
276 #endif
277 static uint32 hash_table_fn(const void *key, Size keysize);
278 static int      match_fn(const void *key1, const void *key2, Size keysize);
279 static uint32 hash_query(const char* query);
280 static void store_entry(char *plan, uint32 queryId, queryid_t queryId_pgss,
281                    double total_time, uint64 rows,
282                    const BufferUsage *bufusage);
283 static Size shared_mem_size(void);
284 static StatEntry *entry_alloc(EntryKey *key, const char *query,
285                         int plan_len, bool sticky);
286 static void entry_dealloc(void);
287 static void entry_reset(void);
288
289 /*
290  * Module load callback
291  */
292 void
293 _PG_init(void)
294 {
295         /*
296          * In order to create our shared memory area, we have to be loaded via
297          * shared_preload_libraries.  If not, fall out without hooking into any of
298          * the main system.  (We don't throw error here because it seems useful to
299          * allow the pg_stat_statements functions to be created even when the
300          * module isn't active.  The functions must protect themselves against
301          * being called then, however.)
302          */
303         if (!process_shared_preload_libraries_in_progress)
304                 return;
305
306         /*
307          * Define (or redefine) custom GUC variables.
308          */
309         DefineCustomIntVariable("pg_store_plans.max",
310           "Sets the maximum number of plans tracked by pg_store_plans.",
311                                                         NULL,
312                                                         &store_size,
313                                                         1000,
314                                                         100,
315                                                         INT_MAX,
316                                                         PGC_POSTMASTER,
317                                                         0,
318                                                         NULL,
319                                                         NULL,
320                                                         NULL);
321
322         DefineCustomEnumVariable("pg_store_plans.track",
323                            "Selects which plans are tracked by pg_store_plans.",
324                                                          NULL,
325                                                          &track_level,
326                                                          TRACK_LEVEL_TOP,
327                                                          track_options,
328                                                          PGC_SUSET,
329                                                          0,
330                                                          NULL,
331                                                          NULL,
332                                                          NULL);
333
334         DefineCustomEnumVariable("pg_store_plans.plan_format",
335                            "Selects which format to be appied for plan representation in pg_store_plans.",
336                                                          NULL,
337                                                          &plan_format,
338                                                          PLAN_FORMAT_TEXT,
339                                                          plan_formats,
340                                                          PGC_USERSET,
341                                                          0,
342                                                          NULL,
343                                                          NULL,
344                                                          NULL);
345
346         DefineCustomIntVariable("pg_store_plans.min_duration",
347                                         "Minimum duration to record plan in milliseconds.",
348                                                         NULL,
349                                                         &min_duration,
350                                                         0,
351                                                         0,
352                                                         INT_MAX,
353                                                         PGC_SUSET,
354                                                         0,
355                                                         NULL,
356                                                         NULL,
357                                                         NULL);
358
359         DefineCustomBoolVariable("pg_store_plans.save",
360                            "Save pg_store_plans statistics across server shutdowns.",
361                                                          NULL,
362                                                          &dump_on_shutdown,
363                                                          true,
364                                                          PGC_SIGHUP,
365                                                          0,
366                                                          NULL,
367                                                          NULL,
368                                                          NULL);
369
370         DefineCustomBoolVariable("pg_store_plans.log_analyze",
371                                                          "Use EXPLAIN ANALYZE for plan logging.",
372                                                          NULL,
373                                                          &log_analyze,
374                                                          false,
375                                                          PGC_SUSET,
376                                                          0,
377                                                          NULL,
378                                                          NULL,
379                                                          NULL);
380
381         DefineCustomBoolVariable("pg_store_plans.log_buffers",
382                                                          "Log buffer usage.",
383                                                          NULL,
384                                                          &log_buffers,
385                                                          false,
386                                                          PGC_SUSET,
387                                                          0,
388                                                          NULL,
389                                                          NULL,
390                                                          NULL);
391
392         DefineCustomBoolVariable("pg_store_plans.log_timing",
393                                                          "Log timings.",
394                                                          NULL,
395                                                          &log_timing,
396                                                          true,
397                                                          PGC_SUSET,
398                                                          0,
399                                                          NULL,
400                                                          NULL,
401                                                          NULL);
402
403         DefineCustomBoolVariable("pg_store_plans.log_triggers",
404                                                          "Log trigger trace.",
405                                                          NULL,
406                                                          &log_triggers,
407                                                          false,
408                                                          PGC_SUSET,
409                                                          0,
410                                                          NULL,
411                                                          NULL,
412                                                          NULL);
413
414         DefineCustomBoolVariable("pg_store_plans.log_verbose",
415                            "Set VERBOSE for EXPLAIN on logging.",
416                                                          NULL,
417                                                          &log_verbose,
418                                                          false,
419                                                          PGC_SUSET,
420                                                          0,
421                                                          NULL,
422                                                          NULL,
423                                                          NULL);
424
425         EmitWarningsOnPlaceholders("pg_store_plans");
426
427         /*
428          * Request additional shared resources.  (These are no-ops if we're not in
429          * the postmaster process.)  We'll allocate or attach to the shared
430          * resources in pgsp_shmem_startup().
431          */
432         RequestAddinShmemSpace(shared_mem_size());
433 #if PG_VERSION_NUM >= 90600
434         RequestNamedLWLockTranche("pg_store_plans", 1);
435 #else
436         RequestAddinLWLocks(1);
437 #endif
438
439         /*
440          * Install hooks.
441          */
442         prev_shmem_startup_hook = shmem_startup_hook;
443         shmem_startup_hook = pgsp_shmem_startup;
444         prev_ExecutorStart = ExecutorStart_hook;
445         ExecutorStart_hook = pgsp_ExecutorStart;
446         prev_ExecutorRun = ExecutorRun_hook;
447         ExecutorRun_hook = pgsp_ExecutorRun;
448         prev_ExecutorFinish = ExecutorFinish_hook;
449         ExecutorFinish_hook = pgsp_ExecutorFinish;
450         prev_ExecutorEnd = ExecutorEnd_hook;
451         ExecutorEnd_hook = pgsp_ExecutorEnd;
452         prev_ProcessUtility = ProcessUtility_hook;
453         ProcessUtility_hook = pgsp_ProcessUtility;
454 }
455
456 /*
457  * Module unload callback
458  */
459 void
460 _PG_fini(void)
461 {
462         /* Uninstall hooks. */
463         shmem_startup_hook = prev_shmem_startup_hook;
464         ExecutorStart_hook = prev_ExecutorStart;
465         ExecutorRun_hook = prev_ExecutorRun;
466         ExecutorFinish_hook = prev_ExecutorFinish;
467         ExecutorEnd_hook = prev_ExecutorEnd;
468         ProcessUtility_hook = prev_ProcessUtility;
469 }
470
471 /*
472  * shmem_startup hook: allocate or attach to shared memory,
473  * then load any pre-existing statistics from file.
474  */
475 static void
476 pgsp_shmem_startup(void)
477 {
478         bool            found;
479         HASHCTL         info;
480         FILE       *file;
481         uint32          header;
482         int32           num;
483         int32           i;
484         int                     plan_size;
485         int                     buffer_size;
486         char       *buffer = NULL;
487
488         if (prev_shmem_startup_hook)
489                 prev_shmem_startup_hook();
490
491         /* reset in case this is a restart within the postmaster */
492         shared_state = NULL;
493         hash_table = NULL;
494
495         /*
496          * Create or attach to the shared memory state, including hash table
497          */
498         LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
499
500         shared_state = ShmemInitStruct("pg_store_plans",
501                                                    sizeof(SharedState),
502                                                    &found);
503
504         if (!found)
505         {
506                 /* First time through ... */
507 #if PG_VERSION_NUM >= 90600
508                 shared_state->lock = &(GetNamedLWLockTranche("pg_store_plans"))->lock;
509 #else
510                 shared_state->lock = LWLockAssign();
511 #endif
512                 shared_state->plan_size = store_plan_size;
513                 shared_state->cur_median_usage = ASSUMED_MEDIAN_INIT;
514         }
515
516         /* Be sure everyone agrees on the hash table entry size */
517         plan_size = shared_state->plan_size;
518
519         memset(&info, 0, sizeof(info));
520         info.keysize = sizeof(EntryKey);
521         info.entrysize = offsetof(StatEntry, plan) + plan_size;
522         info.hash = hash_table_fn;
523         info.match = match_fn;
524         hash_table = ShmemInitHash("pg_store_plans hash",
525                                                           store_size, store_size,
526                                                           &info,
527                                                           HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
528
529         LWLockRelease(AddinShmemInitLock);
530
531         /*
532          * If we're in the postmaster (or a standalone backend...), set up a shmem
533          * exit hook to dump the statistics to disk.
534          */
535         if (!IsUnderPostmaster)
536                 on_shmem_exit(pgsp_shmem_shutdown, (Datum) 0);
537
538         /*
539          * Attempt to load old statistics from the dump file, if this is the first
540          * time through and we weren't told not to.
541          */
542         if (found || !dump_on_shutdown)
543                 return;
544
545         /*
546          * Note: we don't bother with locks here, because there should be no other
547          * processes running when this code is reached.
548          */
549         file = AllocateFile(PGSP_DUMP_FILE, PG_BINARY_R);
550         if (file == NULL)
551         {
552                 if (errno == ENOENT)
553                         return;                         /* ignore not-found error */
554                 goto error;
555         }
556
557         buffer_size = plan_size;
558         buffer = (char *) palloc(buffer_size);
559
560         if (fread(&header, sizeof(uint32), 1, file) != 1 ||
561                 header != PGSP_FILE_HEADER ||
562                 fread(&num, sizeof(int32), 1, file) != 1)
563                 goto error;
564
565         for (i = 0; i < num; i++)
566         {
567                 StatEntry       temp;
568                 StatEntry  *entry;
569
570                 if (fread(&temp, offsetof(StatEntry, mutex), 1, file) != 1)
571                         goto error;
572
573                 /* Encoding is the only field we can easily sanity-check */
574                 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
575                         goto error;
576
577                 /* Previous incarnation might have had a larger plan_size */
578                 if (temp.plan_len >= buffer_size)
579                 {
580                         buffer = (char *) repalloc(buffer, temp.plan_len + 1);
581                         buffer_size = temp.plan_len + 1;
582                 }
583
584                 if (fread(buffer, 1, temp.plan_len, file) != temp.plan_len)
585                         goto error;
586                 buffer[temp.plan_len] = '\0';
587
588                 /* Skip loading "sticky" entries */
589                 if (temp.counters.calls == 0)
590                         continue;
591
592                 /* Clip to available length if needed */
593                 if (temp.plan_len >= plan_size)
594                         temp.plan_len = pg_encoding_mbcliplen(temp.key.encoding,
595                                                                                                    buffer,
596                                                                                                    temp.plan_len,
597                                                                                                    plan_size - 1);
598
599                 /* make the hashtable entry (discards old entries if too many) */
600                 entry = entry_alloc(&temp.key, buffer, temp.plan_len, false);
601
602                 /* copy in the actual stats */
603                 entry->counters = temp.counters;
604         }
605
606         pfree(buffer);
607         FreeFile(file);
608
609         /*
610          * Remove the file so it's not included in backups/replication slaves,
611          * etc. A new file will be written on next shutdown.
612          */
613         unlink(PGSP_DUMP_FILE);
614
615         return;
616
617 error:
618         ereport(LOG,
619                         (errcode_for_file_access(),
620                          errmsg("could not read pg_stat_statement file \"%s\": %m",
621                                         PGSP_DUMP_FILE)));
622         if (buffer)
623                 pfree(buffer);
624         if (file)
625                 FreeFile(file);
626         /* If possible, throw away the bogus file; ignore any error */
627         unlink(PGSP_DUMP_FILE);
628 }
629
630 /*
631  * shmem_shutdown hook: Dump statistics into file.
632  *
633  * Note: we don't bother with acquiring lock, because there should be no
634  * other processes running when this is called.
635  */
636 static void
637 pgsp_shmem_shutdown(int code, Datum arg)
638 {
639         FILE       *file;
640         HASH_SEQ_STATUS hash_seq;
641         int32           num_entries;
642         StatEntry  *entry;
643
644         /* Don't try to dump during a crash. */
645         if (code)
646                 return;
647
648         /* Safety check ... shouldn't get here unless shmem is set up. */
649         if (!shared_state || !hash_table)
650                 return;
651
652         /* Don't dump if told not to. */
653         if (!dump_on_shutdown)
654                 return;
655
656         file = AllocateFile(PGSP_DUMP_FILE ".tmp", PG_BINARY_W);
657         if (file == NULL)
658                 goto error;
659
660         if (fwrite(&PGSP_FILE_HEADER, sizeof(uint32), 1, file) != 1)
661                 goto error;
662         num_entries = hash_get_num_entries(hash_table);
663         if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
664                 goto error;
665
666         hash_seq_init(&hash_seq, hash_table);
667         while ((entry = hash_seq_search(&hash_seq)) != NULL)
668         {
669                 int                     len = entry->plan_len;
670
671                 if (fwrite(entry, offsetof(StatEntry, mutex), 1, file) != 1 ||
672                         fwrite(entry->plan, 1, len, file) != len)
673                         goto error;
674         }
675
676         if (FreeFile(file))
677         {
678                 file = NULL;
679                 goto error;
680         }
681
682         /*
683          * Rename file into place, so we atomically replace the old one.
684          */
685         if (rename(PGSP_DUMP_FILE ".tmp", PGSP_DUMP_FILE) != 0)
686                 ereport(LOG,
687                                 (errcode_for_file_access(),
688                                  errmsg("could not rename pg_store_plans file \"%s\": %m",
689                                                 PGSP_DUMP_FILE ".tmp")));
690
691         return;
692
693 error:
694         ereport(LOG,
695                         (errcode_for_file_access(),
696                          errmsg("could not write pg_store_plans file \"%s\": %m",
697                                         PGSP_DUMP_FILE ".tmp")));
698         if (file)
699                 FreeFile(file);
700         unlink(PGSP_DUMP_FILE ".tmp");
701 }
702
703
704 /*
705  * ExecutorStart hook: start up tracking if needed
706  */
707 static void
708 pgsp_ExecutorStart(QueryDesc *queryDesc, int eflags)
709 {
710         if (log_analyze &&
711                 (eflags & EXEC_FLAG_EXPLAIN_ONLY) == 0)
712         {
713                 queryDesc->instrument_options |=
714                         (log_timing ? INSTRUMENT_TIMER : 0)|
715                         (log_timing ? 0: INSTRUMENT_ROWS)|
716                         (log_buffers ? INSTRUMENT_BUFFERS : 0);
717         }
718         if (prev_ExecutorStart)
719                 prev_ExecutorStart(queryDesc, eflags);
720         else
721                 standard_ExecutorStart(queryDesc, eflags);
722
723         /*
724          * Set up to track total elapsed time in ExecutorRun. Allocate in per-query
725          * context so as to be free at ExecutorEnd.
726          */
727         if (queryDesc->totaltime == NULL && pgsp_enabled())
728         {
729                 MemoryContext oldcxt;
730
731                 oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
732                 queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
733                 MemoryContextSwitchTo(oldcxt);
734         }
735         
736 }
737
738 /*
739  * ExecutorRun hook: all we need do is track nesting depth
740  */
741 static void
742 #if PG_VERSION_NUM >= 100000
743 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count,
744                                  bool execute_once)
745 #elif PG_VERSION_NUM >= 90600
746 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, uint64 count)
747 #else
748 pgsp_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
749 #endif
750 {
751         nested_level++;
752         PG_TRY();
753         {
754 #if PG_VERSION_NUM >= 100000
755                 if (prev_ExecutorRun)
756                         prev_ExecutorRun(queryDesc, direction, count, execute_once);
757                 else
758                         standard_ExecutorRun(queryDesc, direction, count, execute_once);
759 #else
760                 if (prev_ExecutorRun)
761                         prev_ExecutorRun(queryDesc, direction, count);
762                 else
763                         standard_ExecutorRun(queryDesc, direction, count);
764 #endif
765                 nested_level--;
766         }
767         PG_CATCH();
768         {
769                 nested_level--;
770                 PG_RE_THROW();
771         }
772         PG_END_TRY();
773 }
774
775 /*
776  * ExecutorFinish hook: all we need do is track nesting depth
777  */
778 static void
779 pgsp_ExecutorFinish(QueryDesc *queryDesc)
780 {
781         nested_level++;
782         PG_TRY();
783         {
784                 if (prev_ExecutorFinish)
785                         prev_ExecutorFinish(queryDesc);
786                 else
787                         standard_ExecutorFinish(queryDesc);
788                 nested_level--;
789         }
790         PG_CATCH();
791         {
792                 nested_level--;
793                 PG_RE_THROW();
794         }
795         PG_END_TRY();
796 }
797
798 /*
799  * ExecutorEnd hook: store results if needed
800  */
801 static void
802 pgsp_ExecutorEnd(QueryDesc *queryDesc)
803 {
804         if (queryDesc->totaltime)
805         {
806                 InstrEndLoop(queryDesc->totaltime);
807
808                 if (pgsp_enabled() &&
809                         queryDesc->totaltime->total >= 
810                         (double)min_duration / 1000.0)
811                 {
812                         ExplainState *es     = NewExplainState();
813                         StringInfo        es_str = es->str;
814
815                         es->analyze = queryDesc->instrument_options;
816                         es->verbose = log_verbose;
817                         es->buffers = (es->analyze && log_buffers);
818                         es->timing = (es->analyze && log_timing);
819                         es->format = EXPLAIN_FORMAT_JSON;
820         
821                         ExplainBeginOutput(es);
822                         ExplainPrintPlan(es, queryDesc);
823                         if (log_triggers)
824                                 pgspExplainTriggers(es, queryDesc);
825                         ExplainEndOutput(es);
826
827                         /* Remove last line break */
828                         if (es_str->len > 0 && es_str->data[es_str->len - 1] == '\n')
829                                 es_str->data[--es_str->len] = '\0';
830
831                         /* JSON outmost braces. */
832                         es_str->data[0] = '{';
833                         es_str->data[es_str->len - 1] = '}';
834
835                         /*
836                          * Make sure stats accumulation is done.  (Note: it's okay if several
837                          * levels of hook all do this.)
838                          */
839
840                         store_entry(es_str->data,
841                                                 hash_query(queryDesc->sourceText),
842                                                 queryDesc->plannedstmt->queryId,
843                                                 queryDesc->totaltime->total * 1000.0,   /* convert to msec */
844                                                 queryDesc->estate->es_processed,
845                                                 &queryDesc->totaltime->bufusage);
846                         pfree(es_str->data);
847                 }
848         }
849
850         if (prev_ExecutorEnd)
851                 prev_ExecutorEnd(queryDesc);
852         else
853                 standard_ExecutorEnd(queryDesc);
854 }
855
856 /*
857  * ProcessUtility hook
858  */
859 static void
860 #if PG_VERSION_NUM >= 100000
861 pgsp_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
862                                         ProcessUtilityContext context, ParamListInfo params,
863                                         QueryEnvironment *queryEnv,
864                                         DestReceiver *dest, char *completionTag)
865 #else
866 pgsp_ProcessUtility(Node *parsetree, const char *queryString,
867                                         ProcessUtilityContext context, ParamListInfo params,
868                                         DestReceiver *dest, char *completionTag)
869 #endif
870 {
871 #if PG_VERSION_NUM >= 100000
872         if (prev_ProcessUtility)
873                 prev_ProcessUtility(pstmt, queryString,
874                                                         context, params, queryEnv,
875                                                         dest, completionTag);
876         else
877                 standard_ProcessUtility(pstmt, queryString,
878                                                                 context, params, queryEnv,
879                                                                 dest, completionTag);
880 #else
881         if (prev_ProcessUtility)
882                 prev_ProcessUtility(parsetree, queryString,
883                                                         context, params,
884                                                         dest, completionTag);
885         else
886                 standard_ProcessUtility(parsetree, queryString,
887                                                                 context, params,
888                                                                 dest, completionTag);
889 #endif
890 }
891
892 /*
893  * Calculate hash value for a key
894  */
895 static uint32
896 hash_table_fn(const void *key, Size keysize)
897 {
898         const EntryKey *k = (const EntryKey *) key;
899
900         /* we don't bother to include encoding in the hash */
901         return hash_uint32((uint32) k->userid) ^
902                 hash_uint32((uint32) k->dbid) ^
903                 hash_uint32((uint32) k->queryid) ^
904                 hash_uint32((uint32) k->planid);
905 }
906
907 /*
908  * Compare two keys - zero means match
909  */
910 static int
911 match_fn(const void *key1, const void *key2, Size keysize)
912 {
913         const EntryKey *k1 = (const EntryKey *) key1;
914         const EntryKey *k2 = (const EntryKey *) key2;
915
916         if (k1->userid == k2->userid &&
917                 k1->dbid == k2->dbid &&
918                 k1->encoding == k2->encoding &&
919                 k1->queryid == k2->queryid &&
920                 k1->planid == k2->planid)
921                 return 0;
922         else
923                 return 1;
924 }
925
926 /*
927  * hash_query: calculate internal query ID for a query
928  *
929  *  As of PG11, Query.queryId has been widen to 64 bit to reduce collision of
930  *  queries to practical level. On the other hand pg_store_plans uses the
931  *  combination of query hash and plan hash values as the hash table key and
932  *  the resolution of the hash value effectively has the same degree so we
933  *  continue to use uint32 as internal queryid.
934  *
935  *  This may merge plans from different queries into single internal query id
936  *  but it is not a problem when pg_stat_statements is used together since the
937  *  extension gives enough resolution on queries.
938  */
939 static uint32
940 hash_query(const char* query)
941 {
942         uint32 queryid;
943
944         char *normquery = pstrdup(query);
945         normalize_expr(normquery, false);
946         queryid = hash_any((const unsigned char*)normquery, strlen(normquery));
947         pfree(normquery);
948
949         return queryid;
950 }
951
952
953 /*
954  * Store some statistics for a plan.
955  *
956  * Table entry is keyed with userid.queryId.planId. queryId_pgss just stores
957  * queryId used to join with pg_stat_statements.
958  */
959 static void
960 store_entry(char *plan, uint32 queryId, queryid_t queryId_pgss,
961                    double total_time, uint64 rows,
962                    const BufferUsage *bufusage)
963 {
964         EntryKey key;
965         StatEntry  *entry;
966         char       *norm_query = NULL;
967         int             plan_len;
968         char       *normalized_plan = NULL;
969         char       *shorten_plan = NULL;
970         volatile StatEntry *e;
971
972         Assert(plan != NULL);
973
974         /* Safety check... */
975         if (!shared_state || !hash_table)
976                 return;
977
978         /* Set up key for hashtable search */
979         key.userid = GetUserId();
980         key.dbid = MyDatabaseId;
981         key.encoding = GetDatabaseEncoding();
982         key.queryid = queryId;
983
984         normalized_plan = pgsp_json_normalize(plan);
985         shorten_plan = pgsp_json_shorten(plan);
986         elog(DEBUG3, "pg_store_plans: Normalized plan: %s", normalized_plan);
987         elog(DEBUG3, "pg_store_plans: Shorten plan: %s", shorten_plan);
988         elog(DEBUG3, "pg_store_plans: Original plan: %s", plan);
989         plan_len = strlen(shorten_plan);
990
991         key.planid = hash_any((const unsigned char *)normalized_plan,
992                                                   strlen(normalized_plan));
993         pfree(normalized_plan);
994
995         if (plan_len >= shared_state->plan_size)
996                 plan_len = pg_encoding_mbcliplen(GetDatabaseEncoding(),
997                                                                                  shorten_plan,
998                                                                                  plan_len,
999                                                                                  shared_state->plan_size - 1);
1000
1001         
1002         /* Look up the hash table entry with shared lock. */
1003         LWLockAcquire(shared_state->lock, LW_SHARED);
1004
1005         entry = (StatEntry *) hash_search(hash_table, &key, HASH_FIND, NULL);
1006
1007         /* Create new entry, if not present */
1008         if (!entry)
1009         {
1010                 /*
1011                  * We'll need exclusive lock to make a new entry.  There is no point
1012                  * in holding shared lock while we normalize the string, though.
1013                  */
1014                 LWLockRelease(shared_state->lock);
1015
1016                 /* Acquire exclusive lock as required by entry_alloc() */
1017                 LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1018
1019                 entry = entry_alloc(&key, "", 0, false);
1020         }
1021
1022         /* Increment the counts, except when jstate is not NULL */
1023
1024         /*
1025          * Grab the spinlock while updating the counters (see comment about
1026          * locking rules at the head of the file)
1027          */
1028         
1029         e = (volatile StatEntry *) entry;
1030         SpinLockAcquire(&e->mutex);
1031         
1032         e->queryid = queryId_pgss;
1033
1034         /* "Unstick" entry if it was previously sticky */
1035         if (e->counters.calls == 0)
1036         {
1037                 e->counters.usage = USAGE_INIT;
1038                 e->counters.first_call = GetCurrentTimestamp();
1039         }
1040         
1041         e->counters.calls += 1;
1042         e->counters.total_time += total_time;
1043         e->counters.rows += rows;
1044         e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1045         e->counters.shared_blks_read += bufusage->shared_blks_read;
1046         e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1047         e->counters.shared_blks_written += bufusage->shared_blks_written;
1048         e->counters.local_blks_hit += bufusage->local_blks_hit;
1049         e->counters.local_blks_read += bufusage->local_blks_read;
1050         e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1051         e->counters.local_blks_written += bufusage->local_blks_written;
1052         e->counters.temp_blks_read += bufusage->temp_blks_read;
1053         e->counters.temp_blks_written += bufusage->temp_blks_written;
1054         e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1055         e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1056         e->counters.last_call = GetCurrentTimestamp();
1057         e->counters.usage += USAGE_EXEC(total_time);
1058
1059         Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1060         memcpy(entry->plan, shorten_plan, plan_len);
1061         entry->plan_len = plan_len;
1062         entry->plan[plan_len] = '\0';
1063         
1064         SpinLockRelease(&e->mutex);
1065
1066         LWLockRelease(shared_state->lock);
1067
1068         /* We postpone this pfree until we're out of the lock */
1069         if (norm_query)
1070                 pfree(norm_query);
1071 }
1072
1073 /*
1074  * Reset all statement statistics.
1075  */
1076 Datum
1077 pg_store_plans_reset(PG_FUNCTION_ARGS)
1078 {
1079         if (!shared_state || !hash_table)
1080                 ereport(ERROR,
1081                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1082                                  errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1083         entry_reset();
1084         PG_RETURN_VOID();
1085 }
1086
1087 #define PG_STORE_PLANS_COLS                     23
1088
1089 /*
1090  * Retrieve statement statistics.
1091  */
1092 Datum
1093 pg_store_plans(PG_FUNCTION_ARGS)
1094 {
1095         ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1096         TupleDesc       tupdesc;
1097         Tuplestorestate *tupstore;
1098         MemoryContext per_query_ctx;
1099         MemoryContext oldcontext;
1100         Oid                     userid = GetUserId();
1101         bool            is_superuser = superuser();
1102         HASH_SEQ_STATUS hash_seq;
1103         StatEntry  *entry;
1104
1105         if (!shared_state || !hash_table)
1106                 ereport(ERROR,
1107                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1108                                  errmsg("pg_store_plans must be loaded via shared_preload_libraries")));
1109
1110         /* check to see if caller supports us returning a tuplestore */
1111         if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1112                 ereport(ERROR,
1113                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1114                                  errmsg("set-valued function called in context that cannot accept a set")));
1115         if (!(rsinfo->allowedModes & SFRM_Materialize))
1116                 ereport(ERROR,
1117                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1118                                  errmsg("materialize mode required, but it is not " \
1119                                                 "allowed in this context")));
1120
1121         /* Build a tuple descriptor for our result type */
1122         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1123                 elog(ERROR, "return type must be a row type");
1124
1125         per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1126         oldcontext = MemoryContextSwitchTo(per_query_ctx);
1127
1128         tupstore = tuplestore_begin_heap(true, false, work_mem);
1129         rsinfo->returnMode = SFRM_Materialize;
1130         rsinfo->setResult = tupstore;
1131         rsinfo->setDesc = tupdesc;
1132
1133         MemoryContextSwitchTo(oldcontext);
1134
1135         LWLockAcquire(shared_state->lock, LW_SHARED);
1136
1137         hash_seq_init(&hash_seq, hash_table);
1138         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1139         {
1140                 Datum           values[PG_STORE_PLANS_COLS];
1141                 bool            nulls[PG_STORE_PLANS_COLS];
1142                 int                     i = 0;
1143                 int64           queryid      = entry->key.queryid;
1144                 int64           queryid_stmt = entry->queryid;
1145                 int64           planid       = entry->key.planid;
1146                 Counters        tmp;
1147
1148                 memset(values, 0, sizeof(values));
1149                 memset(nulls, 0, sizeof(nulls));
1150
1151                 values[i++] = ObjectIdGetDatum(entry->key.userid);
1152                 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1153                 if (is_superuser || entry->key.userid == userid)
1154                 {
1155                         values[i++] = Int64GetDatumFast(queryid);
1156                         values[i++] = Int64GetDatumFast(planid);
1157                         values[i++] = Int64GetDatumFast(queryid_stmt);
1158                 }
1159                 else
1160                 {
1161                         values[i++] = Int64GetDatumFast(0);
1162                         values[i++] = Int64GetDatumFast(0);
1163                         values[i++] = Int64GetDatumFast(0);
1164                 }
1165
1166
1167                 if (is_superuser || entry->key.userid == userid)
1168                 {
1169                         char       *pstr = entry->plan;
1170                         char       *estr;
1171
1172                         switch (plan_format)
1173                         {
1174                                 case PLAN_FORMAT_TEXT:
1175                                         pstr = pgsp_json_textize(entry->plan);
1176                                         break;
1177                                 case PLAN_FORMAT_JSON:
1178                                         pstr = pgsp_json_inflate(entry->plan);
1179                                         break;
1180                                 case PLAN_FORMAT_YAML:
1181                                         pstr = pgsp_json_yamlize(entry->plan);
1182                                         break;
1183                                 case PLAN_FORMAT_XML:
1184                                         pstr = pgsp_json_xmlize(entry->plan);
1185                                         break;
1186                                 default:
1187                                         break;
1188                         }
1189                         
1190                         estr = (char *)
1191                                 pg_do_encoding_conversion((unsigned char *) pstr,
1192                                                                                   strlen(pstr),
1193                                                                                   entry->key.encoding,
1194                                                                                   GetDatabaseEncoding());
1195                         values[i++] = CStringGetTextDatum(estr);
1196
1197                         if (estr != pstr)
1198                                 pfree(estr);
1199                         if (pstr != entry->plan)
1200                                 pfree(pstr);
1201                         
1202                 }
1203                 else
1204                         values[i++] = CStringGetTextDatum("<insufficient privilege>");
1205
1206                 /* copy counters to a local variable to keep locking time short */
1207                 {
1208                         volatile StatEntry *e = (volatile StatEntry *) entry;
1209
1210                         SpinLockAcquire(&e->mutex);
1211                         tmp = e->counters;
1212                         SpinLockRelease(&e->mutex);
1213                 }
1214
1215                 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1216                 if (tmp.calls == 0)
1217                         continue;
1218
1219                 values[i++] = Int64GetDatumFast(tmp.calls);
1220                 values[i++] = Float8GetDatumFast(tmp.total_time);
1221                 values[i++] = Int64GetDatumFast(tmp.rows);
1222                 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1223                 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1224                 values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1225                 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1226                 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1227                 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1228                 values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1229                 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1230                 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1231                 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1232                 values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1233                 values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1234                 values[i++] = TimestampTzGetDatum(tmp.first_call);
1235                 values[i++] = TimestampTzGetDatum(tmp.last_call);
1236                 Assert(i == PG_STORE_PLANS_COLS);
1237
1238                 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1239         }
1240
1241         LWLockRelease(shared_state->lock);
1242
1243         /* clean up and return the tuplestore */
1244         tuplestore_donestoring(tupstore);
1245
1246         return (Datum) 0;
1247 }
1248
1249 /*
1250  * Estimate shared memory space needed.
1251  */
1252 static Size
1253 shared_mem_size(void)
1254 {
1255         Size            size;
1256         Size            entrysize;
1257
1258         size = MAXALIGN(sizeof(SharedState));
1259         entrysize = offsetof(StatEntry, plan) +  store_plan_size;
1260         size = add_size(size, hash_estimate_size(store_size, entrysize));
1261
1262         return size;
1263 }
1264
1265 /*
1266  * Allocate a new hashtable entry.
1267  * caller must hold an exclusive lock on shared_state->lock
1268  *
1269  * "plan" need not be null-terminated; we rely on plan_len instead
1270  *
1271  * If "sticky" is true, make the new entry artificially sticky so that it will
1272  * probably still be there when the query finishes execution.  We do this by
1273  * giving it a median usage value rather than the normal value.  (Strictly
1274  * speaking, query strings are normalized on a best effort basis, though it
1275  * would be difficult to demonstrate this even under artificial conditions.)
1276  *
1277  * Note: despite needing exclusive lock, it's not an error for the target
1278  * entry to already exist.      This is because store_entry releases and
1279  * reacquires lock after failing to find a match; so someone else could
1280  * have made the entry while we waited to get exclusive lock.
1281  */
1282 static StatEntry *
1283 entry_alloc(EntryKey *key, const char *plan, int plan_len, bool sticky)
1284 {
1285         StatEntry  *entry;
1286         bool            found;
1287
1288         /* Make space if needed */
1289         while (hash_get_num_entries(hash_table) >= store_size)
1290                 entry_dealloc();
1291
1292         /* Find or create an entry with desired hash code */
1293         entry = (StatEntry *) hash_search(hash_table, key, HASH_ENTER, &found);
1294
1295         if (!found)
1296         {
1297                 /* New entry, initialize it */
1298
1299                 /* reset the statistics */
1300                 memset(&entry->counters, 0, sizeof(Counters));
1301                 /* set the appropriate initial usage count */
1302                 entry->counters.usage = sticky ? shared_state->cur_median_usage : USAGE_INIT;
1303                 /* re-initialize the mutex each time ... we assume no one using it */
1304                 SpinLockInit(&entry->mutex);
1305                 /* ... and don't forget the query text */
1306                 Assert(plan_len >= 0 && plan_len < shared_state->plan_size);
1307                 entry->plan_len = plan_len;
1308                 memcpy(entry->plan, plan, plan_len);
1309                 entry->plan[plan_len] = '\0';
1310         }
1311
1312         return entry;
1313 }
1314
1315 /*
1316  * qsort comparator for sorting into increasing usage order
1317  */
1318 static int
1319 entry_cmp(const void *lhs, const void *rhs)
1320 {
1321         double          l_usage = (*(StatEntry *const *) lhs)->counters.usage;
1322         double          r_usage = (*(StatEntry *const *) rhs)->counters.usage;
1323
1324         if (l_usage < r_usage)
1325                 return -1;
1326         else if (l_usage > r_usage)
1327                 return +1;
1328         else
1329                 return 0;
1330 }
1331
1332 /*
1333  * Deallocate least used entries.
1334  * Caller must hold an exclusive lock on shared_state->lock.
1335  */
1336 static void
1337 entry_dealloc(void)
1338 {
1339         HASH_SEQ_STATUS hash_seq;
1340         StatEntry **entries;
1341         StatEntry  *entry;
1342         int                     nvictims;
1343         int                     i;
1344
1345         /*
1346          * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1347          * While we're scanning the table, apply the decay factor to the usage
1348          * values.
1349          */
1350
1351         entries = palloc(hash_get_num_entries(hash_table) * sizeof(StatEntry *));
1352
1353         i = 0;
1354         hash_seq_init(&hash_seq, hash_table);
1355         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1356         {
1357                 entries[i++] = entry;
1358                 /* "Sticky" entries get a different usage decay rate. */
1359                 if (entry->counters.calls == 0)
1360                         entry->counters.usage *= STICKY_DECREASE_FACTOR;
1361                 else
1362                         entry->counters.usage *= USAGE_DECREASE_FACTOR;
1363         }
1364
1365         qsort(entries, i, sizeof(StatEntry *), entry_cmp);
1366
1367         /* Also, record the (approximate) median usage */
1368         if (i > 0)
1369                 shared_state->cur_median_usage = entries[i / 2]->counters.usage;
1370
1371         nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1372         nvictims = Min(nvictims, i);
1373
1374         for (i = 0; i < nvictims; i++)
1375         {
1376                 hash_search(hash_table, &entries[i]->key, HASH_REMOVE, NULL);
1377         }
1378
1379         pfree(entries);
1380 }
1381
1382 /*
1383  * Release all entries.
1384  */
1385 static void
1386 entry_reset(void)
1387 {
1388         HASH_SEQ_STATUS hash_seq;
1389         StatEntry  *entry;
1390
1391         LWLockAcquire(shared_state->lock, LW_EXCLUSIVE);
1392
1393         hash_seq_init(&hash_seq, hash_table);
1394         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1395         {
1396                 hash_search(hash_table, &entry->key, HASH_REMOVE, NULL);
1397         }
1398
1399         LWLockRelease(shared_state->lock);
1400 }
1401
1402 Datum
1403 pg_store_plans_hash_query(PG_FUNCTION_ARGS)
1404 {
1405         PG_RETURN_OID(hash_query(text_to_cstring(PG_GETARG_TEXT_P(0))));
1406 }
1407                 
1408 Datum
1409 pg_store_plans_shorten(PG_FUNCTION_ARGS)
1410 {
1411         text *short_plan = PG_GETARG_TEXT_P(0);
1412         char *cjson = text_to_cstring(short_plan);
1413         char *cshorten = pgsp_json_shorten(cjson);
1414         PG_RETURN_TEXT_P(cstring_to_text(cshorten));
1415 }
1416
1417 Datum
1418 pg_store_plans_normalize(PG_FUNCTION_ARGS)
1419 {
1420         text *short_plan = PG_GETARG_TEXT_P(0);
1421         char *cjson = text_to_cstring(short_plan);
1422         char *cnormalized = pgsp_json_normalize(cjson);
1423         PG_RETURN_TEXT_P(cstring_to_text(cnormalized));
1424 }
1425
1426 Datum
1427 pg_store_plans_jsonplan(PG_FUNCTION_ARGS)
1428 {
1429         text *short_plan = PG_GETARG_TEXT_P(0);
1430         char *cshort = text_to_cstring(short_plan);
1431         char *cinflated = pgsp_json_inflate(cshort);
1432         PG_RETURN_TEXT_P(cstring_to_text(cinflated));
1433 }
1434
1435 Datum
1436 pg_store_plans_textplan(PG_FUNCTION_ARGS)
1437 {
1438         text *short_plan = PG_GETARG_TEXT_P(0);
1439         char *cshort = text_to_cstring(short_plan);
1440         char *ctextized = pgsp_json_textize(cshort);
1441
1442         PG_RETURN_TEXT_P(cstring_to_text(ctextized));
1443 }
1444
1445 Datum
1446 pg_store_plans_yamlplan(PG_FUNCTION_ARGS)
1447 {
1448         text *short_plan = PG_GETARG_TEXT_P(0);
1449         char *cshort = text_to_cstring(short_plan);
1450         char *cyamlized = pgsp_json_yamlize(cshort);
1451
1452         PG_RETURN_TEXT_P(cstring_to_text(cyamlized));
1453 }
1454
1455 Datum
1456 pg_store_plans_xmlplan(PG_FUNCTION_ARGS)
1457 {
1458         text *short_plan = PG_GETARG_TEXT_P(0);
1459         char *cshort = text_to_cstring(short_plan);
1460         char *cxmlized = pgsp_json_xmlize(cshort);
1461
1462         PG_RETURN_TEXT_P(cstring_to_text(cxmlized));
1463 }