OSDN Git Service

A-13-4の試験の内容を項目表にあうように変更した。
[pghintplan/pg_hint_plan.git] / pg_stat_statements.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  *              Track statement execution times across a whole database cluster.
5  *
6  * Execution costs are totalled for each distinct source query, and kept in
7  * a shared hashtable.  (We track only as many distinct queries as will fit
8  * in the designated amount of shared memory.)
9  *
10  * As of Postgres 9.2, this module normalizes query entries.  Normalization
11  * is a process whereby similar queries, typically differing only in their
12  * constants (though the exact rules are somewhat more subtle than that) are
13  * recognized as equivalent, and are tracked as a single entry.  This is
14  * particularly useful for non-prepared queries.
15  *
16  * Normalization is implemented by fingerprinting queries, selectively
17  * serializing those fields of each query tree's nodes that are judged to be
18  * essential to the query.      This is referred to as a query jumble.  This is
19  * distinct from a regular serialization in that various extraneous
20  * information is ignored as irrelevant or not essential to the query, such
21  * as the collations of Vars and, most notably, the values of constants.
22  *
23  * This jumble is acquired at the end of parse analysis of each query, and
24  * a 32-bit hash of it is stored into the query's Query.queryId field.
25  * The server then copies this value around, making it available in plan
26  * tree(s) generated from the query.  The executor can then use this value
27  * to blame query costs on the proper queryId.
28  *
29  * Note about locking issues: to create or delete an entry in the shared
30  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
31  * in an entry except the counters requires the same.  To look up an entry,
32  * one must hold the lock shared.  To read or update the counters within
33  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
34  * disappear!) and also take the entry's mutex spinlock.
35  *
36  *
37  * Copyright (c) 2008-2013, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  *        contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45
46 #ifdef NOT_USED
47
48 #include <unistd.h>
49
50 #endif
51 #include "access/hash.h"
52 #ifdef NOT_USED
53 #include "executor/instrument.h"
54 #include "funcapi.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "parser/analyze.h"
58 #include "parser/parsetree.h"
59 #endif
60 #include "parser/scanner.h"
61 #ifdef NOT_USED
62 #include "pgstat.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/spin.h"
66 #include "tcop/utility.h"
67 #include "utils/builtins.h"
68
69
70 PG_MODULE_MAGIC;
71
72 /* Location of stats file */
73 #define PGSS_DUMP_FILE  "global/pg_stat_statements.stat"
74
75 /* This constant defines the magic number in the stats file header */
76 static const uint32 PGSS_FILE_HEADER = 0x20120328;
77
78 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
79 #define USAGE_EXEC(duration)    (1.0)
80 #define USAGE_INIT                              (1.0)   /* including initial planning */
81 #define ASSUMED_MEDIAN_INIT             (10.0)  /* initial assumed median usage */
82 #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
83 #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
84 #define USAGE_DEALLOC_PERCENT   5               /* free this % of entries at once */
85
86 #define JUMBLE_SIZE                             1024    /* query serialization buffer size */
87
88 /*
89  * Hashtable key that defines the identity of a hashtable entry.  We separate
90  * queries by user and by database even if they are otherwise identical.
91  *
92  * Presently, the query encoding is fully determined by the source database
93  * and so we don't really need it to be in the key.  But that might not always
94  * be true. Anyway it's notationally convenient to pass it as part of the key.
95  */
96 typedef struct pgssHashKey
97 {
98         Oid                     userid;                 /* user OID */
99         Oid                     dbid;                   /* database OID */
100         int                     encoding;               /* query encoding */
101         uint32          queryid;                /* query identifier */
102 } pgssHashKey;
103
104 /*
105  * The actual stats counters kept within pgssEntry.
106  */
107 typedef struct Counters
108 {
109         int64           calls;                  /* # of times executed */
110         double          total_time;             /* total execution time, in msec */
111         int64           rows;                   /* total # of retrieved or affected rows */
112         int64           shared_blks_hit;        /* # of shared buffer hits */
113         int64           shared_blks_read;               /* # of shared disk blocks read */
114         int64           shared_blks_dirtied;    /* # of shared disk blocks dirtied */
115         int64           shared_blks_written;    /* # of shared disk blocks written */
116         int64           local_blks_hit; /* # of local buffer hits */
117         int64           local_blks_read;        /* # of local disk blocks read */
118         int64           local_blks_dirtied;             /* # of local disk blocks dirtied */
119         int64           local_blks_written;             /* # of local disk blocks written */
120         int64           temp_blks_read; /* # of temp blocks read */
121         int64           temp_blks_written;              /* # of temp blocks written */
122         double          blk_read_time;  /* time spent reading, in msec */
123         double          blk_write_time; /* time spent writing, in msec */
124         double          usage;                  /* usage factor */
125 } Counters;
126
127 /*
128  * Statistics per statement
129  *
130  * NB: see the file read/write code before changing field order here.
131  */
132 typedef struct pgssEntry
133 {
134         pgssHashKey key;                        /* hash key of entry - MUST BE FIRST */
135         Counters        counters;               /* the statistics for this query */
136         int                     query_len;              /* # of valid bytes in query string */
137         slock_t         mutex;                  /* protects the counters only */
138         char            query[1];               /* VARIABLE LENGTH ARRAY - MUST BE LAST */
139         /* Note: the allocated length of query[] is actually pgss->query_size */
140 } pgssEntry;
141
142 /*
143  * Global shared state
144  */
145 typedef struct pgssSharedState
146 {
147         LWLockId        lock;                   /* protects hashtable search/modification */
148         int                     query_size;             /* max query length in bytes */
149         double          cur_median_usage;               /* current median usage in hashtable */
150 } pgssSharedState;
151
152 /*
153  * Struct for tracking locations/lengths of constants during normalization
154  */
155 typedef struct pgssLocationLen
156 {
157         int                     location;               /* start offset in query text */
158         int                     length;                 /* length in bytes, or -1 to ignore */
159 } pgssLocationLen;
160
161 /*
162  * Working state for computing a query jumble and producing a normalized
163  * query string
164  */
165 typedef struct pgssJumbleState
166 {
167         /* Jumble of current query tree */
168         unsigned char *jumble;
169
170         /* Number of bytes used in jumble[] */
171         Size            jumble_len;
172
173         /* Array of locations of constants that should be removed */
174         pgssLocationLen *clocations;
175
176         /* Allocated length of clocations array */
177         int                     clocations_buf_size;
178
179         /* Current number of valid entries in clocations array */
180         int                     clocations_count;
181 } pgssJumbleState;
182
183 /*---- Local variables ----*/
184
185 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
186 static int      nested_level = 0;
187
188 /* Saved hook values in case of unload */
189 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
190 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
191 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
192 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
193 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
194 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
195 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
196
197 /* Links to shared memory state */
198 static pgssSharedState *pgss = NULL;
199 static HTAB *pgss_hash = NULL;
200
201 /*---- GUC variables ----*/
202
203 typedef enum
204 {
205         PGSS_TRACK_NONE,                        /* track no statements */
206         PGSS_TRACK_TOP,                         /* only top level statements */
207         PGSS_TRACK_ALL                          /* all statements, including nested ones */
208 }       PGSSTrackLevel;
209
210 static const struct config_enum_entry track_options[] =
211 {
212         {"none", PGSS_TRACK_NONE, false},
213         {"top", PGSS_TRACK_TOP, false},
214         {"all", PGSS_TRACK_ALL, false},
215         {NULL, 0, false}
216 };
217
218 static int      pgss_max;                       /* max # statements to track */
219 static int      pgss_track;                     /* tracking level */
220 static bool pgss_track_utility; /* whether to track utility commands */
221 static bool pgss_save;                  /* whether to save stats across shutdown */
222
223
224 #define pgss_enabled() \
225         (pgss_track == PGSS_TRACK_ALL || \
226         (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
227
228 /*---- Function declarations ----*/
229
230 void            _PG_init(void);
231 void            _PG_fini(void);
232
233 Datum           pg_stat_statements_reset(PG_FUNCTION_ARGS);
234 Datum           pg_stat_statements(PG_FUNCTION_ARGS);
235
236 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
237 PG_FUNCTION_INFO_V1(pg_stat_statements);
238
239 static void pgss_shmem_startup(void);
240 static void pgss_shmem_shutdown(int code, Datum arg);
241 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
242 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
243 static void pgss_ExecutorRun(QueryDesc *queryDesc,
244                                  ScanDirection direction,
245                                  long count);
246 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
247 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
248 static void pgss_ProcessUtility(Node *parsetree, const char *queryString,
249                                         ProcessUtilityContext context, ParamListInfo params,
250                                         DestReceiver *dest, char *completionTag);
251 static uint32 pgss_hash_fn(const void *key, Size keysize);
252 static int      pgss_match_fn(const void *key1, const void *key2, Size keysize);
253 static uint32 pgss_hash_string(const char *str);
254 static void pgss_store(const char *query, uint32 queryId,
255                    double total_time, uint64 rows,
256                    const BufferUsage *bufusage,
257                    pgssJumbleState *jstate);
258 static Size pgss_memsize(void);
259 static pgssEntry *entry_alloc(pgssHashKey *key, const char *query,
260                         int query_len, bool sticky);
261 static void entry_dealloc(void);
262 static void entry_reset(void);
263 #endif
264 static void AppendJumble(pgssJumbleState *jstate,
265                          const unsigned char *item, Size size);
266 #ifdef NOT_USED
267 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
268 #endif
269 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
270 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
271 static void RecordConstLocation(pgssJumbleState *jstate, int location);
272 #ifdef NOT_USED
273 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
274                                                   int *query_len_p, int encoding);
275 #endif
276 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
277 static int      comp_location(const void *a, const void *b);
278 #ifdef NOT_USED
279
280
281 /*
282  * Module load callback
283  */
284 void
285 _PG_init(void)
286 {
287         /*
288          * In order to create our shared memory area, we have to be loaded via
289          * shared_preload_libraries.  If not, fall out without hooking into any of
290          * the main system.  (We don't throw error here because it seems useful to
291          * allow the pg_stat_statements functions to be created even when the
292          * module isn't active.  The functions must protect themselves against
293          * being called then, however.)
294          */
295         if (!process_shared_preload_libraries_in_progress)
296                 return;
297
298         /*
299          * Define (or redefine) custom GUC variables.
300          */
301         DefineCustomIntVariable("pg_stat_statements.max",
302           "Sets the maximum number of statements tracked by pg_stat_statements.",
303                                                         NULL,
304                                                         &pgss_max,
305                                                         1000,
306                                                         100,
307                                                         INT_MAX,
308                                                         PGC_POSTMASTER,
309                                                         0,
310                                                         NULL,
311                                                         NULL,
312                                                         NULL);
313
314         DefineCustomEnumVariable("pg_stat_statements.track",
315                            "Selects which statements are tracked by pg_stat_statements.",
316                                                          NULL,
317                                                          &pgss_track,
318                                                          PGSS_TRACK_TOP,
319                                                          track_options,
320                                                          PGC_SUSET,
321                                                          0,
322                                                          NULL,
323                                                          NULL,
324                                                          NULL);
325
326         DefineCustomBoolVariable("pg_stat_statements.track_utility",
327            "Selects whether utility commands are tracked by pg_stat_statements.",
328                                                          NULL,
329                                                          &pgss_track_utility,
330                                                          true,
331                                                          PGC_SUSET,
332                                                          0,
333                                                          NULL,
334                                                          NULL,
335                                                          NULL);
336
337         DefineCustomBoolVariable("pg_stat_statements.save",
338                            "Save pg_stat_statements statistics across server shutdowns.",
339                                                          NULL,
340                                                          &pgss_save,
341                                                          true,
342                                                          PGC_SIGHUP,
343                                                          0,
344                                                          NULL,
345                                                          NULL,
346                                                          NULL);
347
348         EmitWarningsOnPlaceholders("pg_stat_statements");
349
350         /*
351          * Request additional shared resources.  (These are no-ops if we're not in
352          * the postmaster process.)  We'll allocate or attach to the shared
353          * resources in pgss_shmem_startup().
354          */
355         RequestAddinShmemSpace(pgss_memsize());
356         RequestAddinLWLocks(1);
357
358         /*
359          * Install hooks.
360          */
361         prev_shmem_startup_hook = shmem_startup_hook;
362         shmem_startup_hook = pgss_shmem_startup;
363         prev_post_parse_analyze_hook = post_parse_analyze_hook;
364         post_parse_analyze_hook = pgss_post_parse_analyze;
365         prev_ExecutorStart = ExecutorStart_hook;
366         ExecutorStart_hook = pgss_ExecutorStart;
367         prev_ExecutorRun = ExecutorRun_hook;
368         ExecutorRun_hook = pgss_ExecutorRun;
369         prev_ExecutorFinish = ExecutorFinish_hook;
370         ExecutorFinish_hook = pgss_ExecutorFinish;
371         prev_ExecutorEnd = ExecutorEnd_hook;
372         ExecutorEnd_hook = pgss_ExecutorEnd;
373         prev_ProcessUtility = ProcessUtility_hook;
374         ProcessUtility_hook = pgss_ProcessUtility;
375 }
376
377 /*
378  * Module unload callback
379  */
380 void
381 _PG_fini(void)
382 {
383         /* Uninstall hooks. */
384         shmem_startup_hook = prev_shmem_startup_hook;
385         post_parse_analyze_hook = prev_post_parse_analyze_hook;
386         ExecutorStart_hook = prev_ExecutorStart;
387         ExecutorRun_hook = prev_ExecutorRun;
388         ExecutorFinish_hook = prev_ExecutorFinish;
389         ExecutorEnd_hook = prev_ExecutorEnd;
390         ProcessUtility_hook = prev_ProcessUtility;
391 }
392
393 /*
394  * shmem_startup hook: allocate or attach to shared memory,
395  * then load any pre-existing statistics from file.
396  */
397 static void
398 pgss_shmem_startup(void)
399 {
400         bool            found;
401         HASHCTL         info;
402         FILE       *file;
403         uint32          header;
404         int32           num;
405         int32           i;
406         int                     query_size;
407         int                     buffer_size;
408         char       *buffer = NULL;
409
410         if (prev_shmem_startup_hook)
411                 prev_shmem_startup_hook();
412
413         /* reset in case this is a restart within the postmaster */
414         pgss = NULL;
415         pgss_hash = NULL;
416
417         /*
418          * Create or attach to the shared memory state, including hash table
419          */
420         LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
421
422         pgss = ShmemInitStruct("pg_stat_statements",
423                                                    sizeof(pgssSharedState),
424                                                    &found);
425
426         if (!found)
427         {
428                 /* First time through ... */
429                 pgss->lock = LWLockAssign();
430                 pgss->query_size = pgstat_track_activity_query_size;
431                 pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
432         }
433
434         /* Be sure everyone agrees on the hash table entry size */
435         query_size = pgss->query_size;
436
437         memset(&info, 0, sizeof(info));
438         info.keysize = sizeof(pgssHashKey);
439         info.entrysize = offsetof(pgssEntry, query) +query_size;
440         info.hash = pgss_hash_fn;
441         info.match = pgss_match_fn;
442         pgss_hash = ShmemInitHash("pg_stat_statements hash",
443                                                           pgss_max, pgss_max,
444                                                           &info,
445                                                           HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
446
447         LWLockRelease(AddinShmemInitLock);
448
449         /*
450          * If we're in the postmaster (or a standalone backend...), set up a shmem
451          * exit hook to dump the statistics to disk.
452          */
453         if (!IsUnderPostmaster)
454                 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
455
456         /*
457          * Attempt to load old statistics from the dump file, if this is the first
458          * time through and we weren't told not to.
459          */
460         if (found || !pgss_save)
461                 return;
462
463         /*
464          * Note: we don't bother with locks here, because there should be no other
465          * processes running when this code is reached.
466          */
467         file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
468         if (file == NULL)
469         {
470                 if (errno == ENOENT)
471                         return;                         /* ignore not-found error */
472                 goto error;
473         }
474
475         buffer_size = query_size;
476         buffer = (char *) palloc(buffer_size);
477
478         if (fread(&header, sizeof(uint32), 1, file) != 1 ||
479                 header != PGSS_FILE_HEADER ||
480                 fread(&num, sizeof(int32), 1, file) != 1)
481                 goto error;
482
483         for (i = 0; i < num; i++)
484         {
485                 pgssEntry       temp;
486                 pgssEntry  *entry;
487
488                 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
489                         goto error;
490
491                 /* Encoding is the only field we can easily sanity-check */
492                 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
493                         goto error;
494
495                 /* Previous incarnation might have had a larger query_size */
496                 if (temp.query_len >= buffer_size)
497                 {
498                         buffer = (char *) repalloc(buffer, temp.query_len + 1);
499                         buffer_size = temp.query_len + 1;
500                 }
501
502                 if (fread(buffer, 1, temp.query_len, file) != temp.query_len)
503                         goto error;
504                 buffer[temp.query_len] = '\0';
505
506                 /* Skip loading "sticky" entries */
507                 if (temp.counters.calls == 0)
508                         continue;
509
510                 /* Clip to available length if needed */
511                 if (temp.query_len >= query_size)
512                         temp.query_len = pg_encoding_mbcliplen(temp.key.encoding,
513                                                                                                    buffer,
514                                                                                                    temp.query_len,
515                                                                                                    query_size - 1);
516
517                 /* make the hashtable entry (discards old entries if too many) */
518                 entry = entry_alloc(&temp.key, buffer, temp.query_len, false);
519
520                 /* copy in the actual stats */
521                 entry->counters = temp.counters;
522         }
523
524         pfree(buffer);
525         FreeFile(file);
526
527         /*
528          * Remove the file so it's not included in backups/replication slaves,
529          * etc. A new file will be written on next shutdown.
530          */
531         unlink(PGSS_DUMP_FILE);
532
533         return;
534
535 error:
536         ereport(LOG,
537                         (errcode_for_file_access(),
538                          errmsg("could not read pg_stat_statement file \"%s\": %m",
539                                         PGSS_DUMP_FILE)));
540         if (buffer)
541                 pfree(buffer);
542         if (file)
543                 FreeFile(file);
544         /* If possible, throw away the bogus file; ignore any error */
545         unlink(PGSS_DUMP_FILE);
546 }
547
548 /*
549  * shmem_shutdown hook: Dump statistics into file.
550  *
551  * Note: we don't bother with acquiring lock, because there should be no
552  * other processes running when this is called.
553  */
554 static void
555 pgss_shmem_shutdown(int code, Datum arg)
556 {
557         FILE       *file;
558         HASH_SEQ_STATUS hash_seq;
559         int32           num_entries;
560         pgssEntry  *entry;
561
562         /* Don't try to dump during a crash. */
563         if (code)
564                 return;
565
566         /* Safety check ... shouldn't get here unless shmem is set up. */
567         if (!pgss || !pgss_hash)
568                 return;
569
570         /* Don't dump if told not to. */
571         if (!pgss_save)
572                 return;
573
574         file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
575         if (file == NULL)
576                 goto error;
577
578         if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
579                 goto error;
580         num_entries = hash_get_num_entries(pgss_hash);
581         if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
582                 goto error;
583
584         hash_seq_init(&hash_seq, pgss_hash);
585         while ((entry = hash_seq_search(&hash_seq)) != NULL)
586         {
587                 int                     len = entry->query_len;
588
589                 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
590                         fwrite(entry->query, 1, len, file) != len)
591                         goto error;
592         }
593
594         if (FreeFile(file))
595         {
596                 file = NULL;
597                 goto error;
598         }
599
600         /*
601          * Rename file into place, so we atomically replace the old one.
602          */
603         if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
604                 ereport(LOG,
605                                 (errcode_for_file_access(),
606                                  errmsg("could not rename pg_stat_statement file \"%s\": %m",
607                                                 PGSS_DUMP_FILE ".tmp")));
608
609         return;
610
611 error:
612         ereport(LOG,
613                         (errcode_for_file_access(),
614                          errmsg("could not write pg_stat_statement file \"%s\": %m",
615                                         PGSS_DUMP_FILE ".tmp")));
616         if (file)
617                 FreeFile(file);
618         unlink(PGSS_DUMP_FILE ".tmp");
619 }
620
621 /*
622  * Post-parse-analysis hook: mark query with a queryId
623  */
624 static void
625 pgss_post_parse_analyze(ParseState *pstate, Query *query)
626 {
627         pgssJumbleState jstate;
628
629         /* Assert we didn't do this already */
630         Assert(query->queryId == 0);
631
632         /* Safety check... */
633         if (!pgss || !pgss_hash)
634                 return;
635
636         /*
637          * Utility statements get queryId zero.  We do this even in cases where
638          * the statement contains an optimizable statement for which a queryId
639          * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
640          * runtime control will first go through ProcessUtility and then the
641          * executor, and we don't want the executor hooks to do anything, since we
642          * are already measuring the statement's costs at the utility level.
643          */
644         if (query->utilityStmt)
645         {
646                 query->queryId = 0;
647                 return;
648         }
649
650         /* Set up workspace for query jumbling */
651         jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
652         jstate.jumble_len = 0;
653         jstate.clocations_buf_size = 32;
654         jstate.clocations = (pgssLocationLen *)
655                 palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
656         jstate.clocations_count = 0;
657
658         /* Compute query ID and mark the Query node with it */
659         JumbleQuery(&jstate, query);
660         query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
661
662         /*
663          * If we are unlucky enough to get a hash of zero, use 1 instead, to
664          * prevent confusion with the utility-statement case.
665          */
666         if (query->queryId == 0)
667                 query->queryId = 1;
668
669         /*
670          * If we were able to identify any ignorable constants, we immediately
671          * create a hash table entry for the query, so that we can record the
672          * normalized form of the query string.  If there were no such constants,
673          * the normalized string would be the same as the query text anyway, so
674          * there's no need for an early entry.
675          */
676         if (jstate.clocations_count > 0)
677                 pgss_store(pstate->p_sourcetext,
678                                    query->queryId,
679                                    0,
680                                    0,
681                                    NULL,
682                                    &jstate);
683 }
684
685 /*
686  * ExecutorStart hook: start up tracking if needed
687  */
688 static void
689 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
690 {
691         if (prev_ExecutorStart)
692                 prev_ExecutorStart(queryDesc, eflags);
693         else
694                 standard_ExecutorStart(queryDesc, eflags);
695
696         /*
697          * If query has queryId zero, don't track it.  This prevents double
698          * counting of optimizable statements that are directly contained in
699          * utility statements.
700          */
701         if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
702         {
703                 /*
704                  * Set up to track total elapsed time in ExecutorRun.  Make sure the
705                  * space is allocated in the per-query context so it will go away at
706                  * ExecutorEnd.
707                  */
708                 if (queryDesc->totaltime == NULL)
709                 {
710                         MemoryContext oldcxt;
711
712                         oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
713                         queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
714                         MemoryContextSwitchTo(oldcxt);
715                 }
716         }
717 }
718
719 /*
720  * ExecutorRun hook: all we need do is track nesting depth
721  */
722 static void
723 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
724 {
725         nested_level++;
726         PG_TRY();
727         {
728                 if (prev_ExecutorRun)
729                         prev_ExecutorRun(queryDesc, direction, count);
730                 else
731                         standard_ExecutorRun(queryDesc, direction, count);
732                 nested_level--;
733         }
734         PG_CATCH();
735         {
736                 nested_level--;
737                 PG_RE_THROW();
738         }
739         PG_END_TRY();
740 }
741
742 /*
743  * ExecutorFinish hook: all we need do is track nesting depth
744  */
745 static void
746 pgss_ExecutorFinish(QueryDesc *queryDesc)
747 {
748         nested_level++;
749         PG_TRY();
750         {
751                 if (prev_ExecutorFinish)
752                         prev_ExecutorFinish(queryDesc);
753                 else
754                         standard_ExecutorFinish(queryDesc);
755                 nested_level--;
756         }
757         PG_CATCH();
758         {
759                 nested_level--;
760                 PG_RE_THROW();
761         }
762         PG_END_TRY();
763 }
764
765 /*
766  * ExecutorEnd hook: store results if needed
767  */
768 static void
769 pgss_ExecutorEnd(QueryDesc *queryDesc)
770 {
771         uint32          queryId = queryDesc->plannedstmt->queryId;
772
773         if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
774         {
775                 /*
776                  * Make sure stats accumulation is done.  (Note: it's okay if several
777                  * levels of hook all do this.)
778                  */
779                 InstrEndLoop(queryDesc->totaltime);
780
781                 pgss_store(queryDesc->sourceText,
782                                    queryId,
783                                    queryDesc->totaltime->total * 1000.0,                /* convert to msec */
784                                    queryDesc->estate->es_processed,
785                                    &queryDesc->totaltime->bufusage,
786                                    NULL);
787         }
788
789         if (prev_ExecutorEnd)
790                 prev_ExecutorEnd(queryDesc);
791         else
792                 standard_ExecutorEnd(queryDesc);
793 }
794
795 /*
796  * ProcessUtility hook
797  */
798 static void
799 pgss_ProcessUtility(Node *parsetree, const char *queryString,
800                                         ProcessUtilityContext context, ParamListInfo params,
801                                         DestReceiver *dest, char *completionTag)
802 {
803         /*
804          * If it's an EXECUTE statement, we don't track it and don't increment the
805          * nesting level.  This allows the cycles to be charged to the underlying
806          * PREPARE instead (by the Executor hooks), which is much more useful.
807          *
808          * We also don't track execution of PREPARE.  If we did, we would get one
809          * hash table entry for the PREPARE (with hash calculated from the query
810          * string), and then a different one with the same query string (but hash
811          * calculated from the query tree) would be used to accumulate costs of
812          * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
813          * cases where planning time is not included at all.
814          */
815         if (pgss_track_utility && pgss_enabled() &&
816                 !IsA(parsetree, ExecuteStmt) &&
817                 !IsA(parsetree, PrepareStmt))
818         {
819                 instr_time      start;
820                 instr_time      duration;
821                 uint64          rows = 0;
822                 BufferUsage bufusage_start,
823                                         bufusage;
824                 uint32          queryId;
825
826                 bufusage_start = pgBufferUsage;
827                 INSTR_TIME_SET_CURRENT(start);
828
829                 nested_level++;
830                 PG_TRY();
831                 {
832                         if (prev_ProcessUtility)
833                                 prev_ProcessUtility(parsetree, queryString,
834                                                                         context, params,
835                                                                         dest, completionTag);
836                         else
837                                 standard_ProcessUtility(parsetree, queryString,
838                                                                                 context, params,
839                                                                                 dest, completionTag);
840                         nested_level--;
841                 }
842                 PG_CATCH();
843                 {
844                         nested_level--;
845                         PG_RE_THROW();
846                 }
847                 PG_END_TRY();
848
849                 INSTR_TIME_SET_CURRENT(duration);
850                 INSTR_TIME_SUBTRACT(duration, start);
851
852                 /* parse command tag to retrieve the number of affected rows. */
853                 if (completionTag &&
854                         sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1)
855                         rows = 0;
856
857                 /* calc differences of buffer counters. */
858                 bufusage.shared_blks_hit =
859                         pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
860                 bufusage.shared_blks_read =
861                         pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
862                 bufusage.shared_blks_dirtied =
863                         pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
864                 bufusage.shared_blks_written =
865                         pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
866                 bufusage.local_blks_hit =
867                         pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
868                 bufusage.local_blks_read =
869                         pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
870                 bufusage.local_blks_dirtied =
871                         pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
872                 bufusage.local_blks_written =
873                         pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
874                 bufusage.temp_blks_read =
875                         pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
876                 bufusage.temp_blks_written =
877                         pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
878                 bufusage.blk_read_time = pgBufferUsage.blk_read_time;
879                 INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
880                 bufusage.blk_write_time = pgBufferUsage.blk_write_time;
881                 INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
882
883                 /* For utility statements, we just hash the query string directly */
884                 queryId = pgss_hash_string(queryString);
885
886                 pgss_store(queryString,
887                                    queryId,
888                                    INSTR_TIME_GET_MILLISEC(duration),
889                                    rows,
890                                    &bufusage,
891                                    NULL);
892         }
893         else
894         {
895                 if (prev_ProcessUtility)
896                         prev_ProcessUtility(parsetree, queryString,
897                                                                 context, params,
898                                                                 dest, completionTag);
899                 else
900                         standard_ProcessUtility(parsetree, queryString,
901                                                                         context, params,
902                                                                         dest, completionTag);
903         }
904 }
905
906 /*
907  * Calculate hash value for a key
908  */
909 static uint32
910 pgss_hash_fn(const void *key, Size keysize)
911 {
912         const pgssHashKey *k = (const pgssHashKey *) key;
913
914         /* we don't bother to include encoding in the hash */
915         return hash_uint32((uint32) k->userid) ^
916                 hash_uint32((uint32) k->dbid) ^
917                 hash_uint32((uint32) k->queryid);
918 }
919
920 /*
921  * Compare two keys - zero means match
922  */
923 static int
924 pgss_match_fn(const void *key1, const void *key2, Size keysize)
925 {
926         const pgssHashKey *k1 = (const pgssHashKey *) key1;
927         const pgssHashKey *k2 = (const pgssHashKey *) key2;
928
929         if (k1->userid == k2->userid &&
930                 k1->dbid == k2->dbid &&
931                 k1->encoding == k2->encoding &&
932                 k1->queryid == k2->queryid)
933                 return 0;
934         else
935                 return 1;
936 }
937
938 /*
939  * Given an arbitrarily long query string, produce a hash for the purposes of
940  * identifying the query, without normalizing constants.  Used when hashing
941  * utility statements.
942  */
943 static uint32
944 pgss_hash_string(const char *str)
945 {
946         return hash_any((const unsigned char *) str, strlen(str));
947 }
948
949 /*
950  * Store some statistics for a statement.
951  *
952  * If jstate is not NULL then we're trying to create an entry for which
953  * we have no statistics as yet; we just want to record the normalized
954  * query string.  total_time, rows, bufusage are ignored in this case.
955  */
956 static void
957 pgss_store(const char *query, uint32 queryId,
958                    double total_time, uint64 rows,
959                    const BufferUsage *bufusage,
960                    pgssJumbleState *jstate)
961 {
962         pgssHashKey key;
963         pgssEntry  *entry;
964         char       *norm_query = NULL;
965
966         Assert(query != NULL);
967
968         /* Safety check... */
969         if (!pgss || !pgss_hash)
970                 return;
971
972         /* Set up key for hashtable search */
973         key.userid = GetUserId();
974         key.dbid = MyDatabaseId;
975         key.encoding = GetDatabaseEncoding();
976         key.queryid = queryId;
977
978         /* Lookup the hash table entry with shared lock. */
979         LWLockAcquire(pgss->lock, LW_SHARED);
980
981         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
982
983         /* Create new entry, if not present */
984         if (!entry)
985         {
986                 int                     query_len;
987
988                 /*
989                  * We'll need exclusive lock to make a new entry.  There is no point
990                  * in holding shared lock while we normalize the string, though.
991                  */
992                 LWLockRelease(pgss->lock);
993
994                 query_len = strlen(query);
995
996                 if (jstate)
997                 {
998                         /* Normalize the string if enabled */
999                         norm_query = generate_normalized_query(jstate, query,
1000                                                                                                    &query_len,
1001                                                                                                    key.encoding);
1002
1003                         /* Acquire exclusive lock as required by entry_alloc() */
1004                         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1005
1006                         entry = entry_alloc(&key, norm_query, query_len, true);
1007                 }
1008                 else
1009                 {
1010                         /*
1011                          * We're just going to store the query string as-is; but we have
1012                          * to truncate it if over-length.
1013                          */
1014                         if (query_len >= pgss->query_size)
1015                                 query_len = pg_encoding_mbcliplen(key.encoding,
1016                                                                                                   query,
1017                                                                                                   query_len,
1018                                                                                                   pgss->query_size - 1);
1019
1020                         /* Acquire exclusive lock as required by entry_alloc() */
1021                         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1022
1023                         entry = entry_alloc(&key, query, query_len, false);
1024                 }
1025         }
1026
1027         /* Increment the counts, except when jstate is not NULL */
1028         if (!jstate)
1029         {
1030                 /*
1031                  * Grab the spinlock while updating the counters (see comment about
1032                  * locking rules at the head of the file)
1033                  */
1034                 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1035
1036                 SpinLockAcquire(&e->mutex);
1037
1038                 /* "Unstick" entry if it was previously sticky */
1039                 if (e->counters.calls == 0)
1040                         e->counters.usage = USAGE_INIT;
1041
1042                 e->counters.calls += 1;
1043                 e->counters.total_time += total_time;
1044                 e->counters.rows += rows;
1045                 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1046                 e->counters.shared_blks_read += bufusage->shared_blks_read;
1047                 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1048                 e->counters.shared_blks_written += bufusage->shared_blks_written;
1049                 e->counters.local_blks_hit += bufusage->local_blks_hit;
1050                 e->counters.local_blks_read += bufusage->local_blks_read;
1051                 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1052                 e->counters.local_blks_written += bufusage->local_blks_written;
1053                 e->counters.temp_blks_read += bufusage->temp_blks_read;
1054                 e->counters.temp_blks_written += bufusage->temp_blks_written;
1055                 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1056                 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1057                 e->counters.usage += USAGE_EXEC(total_time);
1058
1059                 SpinLockRelease(&e->mutex);
1060         }
1061
1062         LWLockRelease(pgss->lock);
1063
1064         /* We postpone this pfree until we're out of the lock */
1065         if (norm_query)
1066                 pfree(norm_query);
1067 }
1068
1069 /*
1070  * Reset all statement statistics.
1071  */
1072 Datum
1073 pg_stat_statements_reset(PG_FUNCTION_ARGS)
1074 {
1075         if (!pgss || !pgss_hash)
1076                 ereport(ERROR,
1077                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1078                                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1079         entry_reset();
1080         PG_RETURN_VOID();
1081 }
1082
1083 #define PG_STAT_STATEMENTS_COLS_V1_0    14
1084 #define PG_STAT_STATEMENTS_COLS                 18
1085
1086 /*
1087  * Retrieve statement statistics.
1088  */
1089 Datum
1090 pg_stat_statements(PG_FUNCTION_ARGS)
1091 {
1092         ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1093         TupleDesc       tupdesc;
1094         Tuplestorestate *tupstore;
1095         MemoryContext per_query_ctx;
1096         MemoryContext oldcontext;
1097         Oid                     userid = GetUserId();
1098         bool            is_superuser = superuser();
1099         HASH_SEQ_STATUS hash_seq;
1100         pgssEntry  *entry;
1101         bool            sql_supports_v1_1_counters = true;
1102
1103         if (!pgss || !pgss_hash)
1104                 ereport(ERROR,
1105                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1106                                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1107
1108         /* check to see if caller supports us returning a tuplestore */
1109         if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1110                 ereport(ERROR,
1111                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1112                                  errmsg("set-valued function called in context that cannot accept a set")));
1113         if (!(rsinfo->allowedModes & SFRM_Materialize))
1114                 ereport(ERROR,
1115                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1116                                  errmsg("materialize mode required, but it is not " \
1117                                                 "allowed in this context")));
1118
1119         /* Build a tuple descriptor for our result type */
1120         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1121                 elog(ERROR, "return type must be a row type");
1122         if (tupdesc->natts == PG_STAT_STATEMENTS_COLS_V1_0)
1123                 sql_supports_v1_1_counters = false;
1124
1125         per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1126         oldcontext = MemoryContextSwitchTo(per_query_ctx);
1127
1128         tupstore = tuplestore_begin_heap(true, false, work_mem);
1129         rsinfo->returnMode = SFRM_Materialize;
1130         rsinfo->setResult = tupstore;
1131         rsinfo->setDesc = tupdesc;
1132
1133         MemoryContextSwitchTo(oldcontext);
1134
1135         LWLockAcquire(pgss->lock, LW_SHARED);
1136
1137         hash_seq_init(&hash_seq, pgss_hash);
1138         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1139         {
1140                 Datum           values[PG_STAT_STATEMENTS_COLS];
1141                 bool            nulls[PG_STAT_STATEMENTS_COLS];
1142                 int                     i = 0;
1143                 Counters        tmp;
1144
1145                 memset(values, 0, sizeof(values));
1146                 memset(nulls, 0, sizeof(nulls));
1147
1148                 values[i++] = ObjectIdGetDatum(entry->key.userid);
1149                 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1150
1151                 if (is_superuser || entry->key.userid == userid)
1152                 {
1153                         char       *qstr;
1154
1155                         qstr = (char *)
1156                                 pg_do_encoding_conversion((unsigned char *) entry->query,
1157                                                                                   entry->query_len,
1158                                                                                   entry->key.encoding,
1159                                                                                   GetDatabaseEncoding());
1160                         values[i++] = CStringGetTextDatum(qstr);
1161                         if (qstr != entry->query)
1162                                 pfree(qstr);
1163                 }
1164                 else
1165                         values[i++] = CStringGetTextDatum("<insufficient privilege>");
1166
1167                 /* copy counters to a local variable to keep locking time short */
1168                 {
1169                         volatile pgssEntry *e = (volatile pgssEntry *) entry;
1170
1171                         SpinLockAcquire(&e->mutex);
1172                         tmp = e->counters;
1173                         SpinLockRelease(&e->mutex);
1174                 }
1175
1176                 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1177                 if (tmp.calls == 0)
1178                         continue;
1179
1180                 values[i++] = Int64GetDatumFast(tmp.calls);
1181                 values[i++] = Float8GetDatumFast(tmp.total_time);
1182                 values[i++] = Int64GetDatumFast(tmp.rows);
1183                 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1184                 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1185                 if (sql_supports_v1_1_counters)
1186                         values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1187                 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1188                 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1189                 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1190                 if (sql_supports_v1_1_counters)
1191                         values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1192                 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1193                 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1194                 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1195                 if (sql_supports_v1_1_counters)
1196                 {
1197                         values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1198                         values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1199                 }
1200
1201                 Assert(i == (sql_supports_v1_1_counters ?
1202                                          PG_STAT_STATEMENTS_COLS : PG_STAT_STATEMENTS_COLS_V1_0));
1203
1204                 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1205         }
1206
1207         LWLockRelease(pgss->lock);
1208
1209         /* clean up and return the tuplestore */
1210         tuplestore_donestoring(tupstore);
1211
1212         return (Datum) 0;
1213 }
1214
1215 /*
1216  * Estimate shared memory space needed.
1217  */
1218 static Size
1219 pgss_memsize(void)
1220 {
1221         Size            size;
1222         Size            entrysize;
1223
1224         size = MAXALIGN(sizeof(pgssSharedState));
1225         entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
1226         size = add_size(size, hash_estimate_size(pgss_max, entrysize));
1227
1228         return size;
1229 }
1230
1231 /*
1232  * Allocate a new hashtable entry.
1233  * caller must hold an exclusive lock on pgss->lock
1234  *
1235  * "query" need not be null-terminated; we rely on query_len instead
1236  *
1237  * If "sticky" is true, make the new entry artificially sticky so that it will
1238  * probably still be there when the query finishes execution.  We do this by
1239  * giving it a median usage value rather than the normal value.  (Strictly
1240  * speaking, query strings are normalized on a best effort basis, though it
1241  * would be difficult to demonstrate this even under artificial conditions.)
1242  *
1243  * Note: despite needing exclusive lock, it's not an error for the target
1244  * entry to already exist.      This is because pgss_store releases and
1245  * reacquires lock after failing to find a match; so someone else could
1246  * have made the entry while we waited to get exclusive lock.
1247  */
1248 static pgssEntry *
1249 entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
1250 {
1251         pgssEntry  *entry;
1252         bool            found;
1253
1254         /* Make space if needed */
1255         while (hash_get_num_entries(pgss_hash) >= pgss_max)
1256                 entry_dealloc();
1257
1258         /* Find or create an entry with desired hash code */
1259         entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1260
1261         if (!found)
1262         {
1263                 /* New entry, initialize it */
1264
1265                 /* reset the statistics */
1266                 memset(&entry->counters, 0, sizeof(Counters));
1267                 /* set the appropriate initial usage count */
1268                 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1269                 /* re-initialize the mutex each time ... we assume no one using it */
1270                 SpinLockInit(&entry->mutex);
1271                 /* ... and don't forget the query text */
1272                 Assert(query_len >= 0 && query_len < pgss->query_size);
1273                 entry->query_len = query_len;
1274                 memcpy(entry->query, query, query_len);
1275                 entry->query[query_len] = '\0';
1276         }
1277
1278         return entry;
1279 }
1280
1281 /*
1282  * qsort comparator for sorting into increasing usage order
1283  */
1284 static int
1285 entry_cmp(const void *lhs, const void *rhs)
1286 {
1287         double          l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1288         double          r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1289
1290         if (l_usage < r_usage)
1291                 return -1;
1292         else if (l_usage > r_usage)
1293                 return +1;
1294         else
1295                 return 0;
1296 }
1297
1298 /*
1299  * Deallocate least used entries.
1300  * Caller must hold an exclusive lock on pgss->lock.
1301  */
1302 static void
1303 entry_dealloc(void)
1304 {
1305         HASH_SEQ_STATUS hash_seq;
1306         pgssEntry **entries;
1307         pgssEntry  *entry;
1308         int                     nvictims;
1309         int                     i;
1310
1311         /*
1312          * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1313          * While we're scanning the table, apply the decay factor to the usage
1314          * values.
1315          */
1316
1317         entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1318
1319         i = 0;
1320         hash_seq_init(&hash_seq, pgss_hash);
1321         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1322         {
1323                 entries[i++] = entry;
1324                 /* "Sticky" entries get a different usage decay rate. */
1325                 if (entry->counters.calls == 0)
1326                         entry->counters.usage *= STICKY_DECREASE_FACTOR;
1327                 else
1328                         entry->counters.usage *= USAGE_DECREASE_FACTOR;
1329         }
1330
1331         qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1332
1333         /* Also, record the (approximate) median usage */
1334         if (i > 0)
1335                 pgss->cur_median_usage = entries[i / 2]->counters.usage;
1336
1337         nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1338         nvictims = Min(nvictims, i);
1339
1340         for (i = 0; i < nvictims; i++)
1341         {
1342                 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1343         }
1344
1345         pfree(entries);
1346 }
1347
1348 /*
1349  * Release all entries.
1350  */
1351 static void
1352 entry_reset(void)
1353 {
1354         HASH_SEQ_STATUS hash_seq;
1355         pgssEntry  *entry;
1356
1357         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1358
1359         hash_seq_init(&hash_seq, pgss_hash);
1360         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1361         {
1362                 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
1363         }
1364
1365         LWLockRelease(pgss->lock);
1366 }
1367 #endif
1368
1369 /*
1370  * AppendJumble: Append a value that is substantive in a given query to
1371  * the current jumble.
1372  */
1373 static void
1374 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
1375 {
1376         unsigned char *jumble = jstate->jumble;
1377         Size            jumble_len = jstate->jumble_len;
1378
1379         /*
1380          * Whenever the jumble buffer is full, we hash the current contents and
1381          * reset the buffer to contain just that hash value, thus relying on the
1382          * hash to summarize everything so far.
1383          */
1384         while (size > 0)
1385         {
1386                 Size            part_size;
1387
1388                 if (jumble_len >= JUMBLE_SIZE)
1389                 {
1390                         uint32          start_hash = hash_any(jumble, JUMBLE_SIZE);
1391
1392                         memcpy(jumble, &start_hash, sizeof(start_hash));
1393                         jumble_len = sizeof(start_hash);
1394                 }
1395                 part_size = Min(size, JUMBLE_SIZE - jumble_len);
1396                 memcpy(jumble + jumble_len, item, part_size);
1397                 jumble_len += part_size;
1398                 item += part_size;
1399                 size -= part_size;
1400         }
1401         jstate->jumble_len = jumble_len;
1402 }
1403
1404 /*
1405  * Wrappers around AppendJumble to encapsulate details of serialization
1406  * of individual local variable elements.
1407  */
1408 #define APP_JUMB(item) \
1409         AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
1410 #define APP_JUMB_STRING(str) \
1411         AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
1412
1413 /*
1414  * JumbleQuery: Selectively serialize the query tree, appending significant
1415  * data to the "query jumble" while ignoring nonsignificant data.
1416  *
1417  * Rule of thumb for what to include is that we should ignore anything not
1418  * semantically significant (such as alias names) as well as anything that can
1419  * be deduced from child nodes (else we'd just be double-hashing that piece
1420  * of information).
1421  */
1422 static void
1423 JumbleQuery(pgssJumbleState *jstate, Query *query)
1424 {
1425         Assert(IsA(query, Query));
1426         Assert(query->utilityStmt == NULL);
1427
1428         APP_JUMB(query->commandType);
1429         /* resultRelation is usually predictable from commandType */
1430         JumbleExpr(jstate, (Node *) query->cteList);
1431         JumbleRangeTable(jstate, query->rtable);
1432         JumbleExpr(jstate, (Node *) query->jointree);
1433         JumbleExpr(jstate, (Node *) query->targetList);
1434         JumbleExpr(jstate, (Node *) query->returningList);
1435         JumbleExpr(jstate, (Node *) query->groupClause);
1436         JumbleExpr(jstate, query->havingQual);
1437         JumbleExpr(jstate, (Node *) query->windowClause);
1438         JumbleExpr(jstate, (Node *) query->distinctClause);
1439         JumbleExpr(jstate, (Node *) query->sortClause);
1440         JumbleExpr(jstate, query->limitOffset);
1441         JumbleExpr(jstate, query->limitCount);
1442         /* we ignore rowMarks */
1443         JumbleExpr(jstate, query->setOperations);
1444 }
1445
1446 /*
1447  * Jumble a range table
1448  */
1449 static void
1450 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
1451 {
1452         ListCell   *lc;
1453
1454         foreach(lc, rtable)
1455         {
1456                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1457
1458                 Assert(IsA(rte, RangeTblEntry));
1459                 APP_JUMB(rte->rtekind);
1460                 switch (rte->rtekind)
1461                 {
1462                         case RTE_RELATION:
1463                                 APP_JUMB(rte->relid);
1464                                 break;
1465                         case RTE_SUBQUERY:
1466                                 JumbleQuery(jstate, rte->subquery);
1467                                 break;
1468                         case RTE_JOIN:
1469                                 APP_JUMB(rte->jointype);
1470                                 break;
1471                         case RTE_FUNCTION:
1472                                 JumbleExpr(jstate, rte->funcexpr);
1473                                 break;
1474                         case RTE_VALUES:
1475                                 JumbleExpr(jstate, (Node *) rte->values_lists);
1476                                 break;
1477                         case RTE_CTE:
1478
1479                                 /*
1480                                  * Depending on the CTE name here isn't ideal, but it's the
1481                                  * only info we have to identify the referenced WITH item.
1482                                  */
1483                                 APP_JUMB_STRING(rte->ctename);
1484                                 APP_JUMB(rte->ctelevelsup);
1485                                 break;
1486                         default:
1487                                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
1488                                 break;
1489                 }
1490         }
1491 }
1492
1493 /*
1494  * Jumble an expression tree
1495  *
1496  * In general this function should handle all the same node types that
1497  * expression_tree_walker() does, and therefore it's coded to be as parallel
1498  * to that function as possible.  However, since we are only invoked on
1499  * queries immediately post-parse-analysis, we need not handle node types
1500  * that only appear in planning.
1501  *
1502  * Note: the reason we don't simply use expression_tree_walker() is that the
1503  * point of that function is to support tree walkers that don't care about
1504  * most tree node types, but here we care about all types.      We should complain
1505  * about any unrecognized node type.
1506  */
1507 static void
1508 JumbleExpr(pgssJumbleState *jstate, Node *node)
1509 {
1510         ListCell   *temp;
1511
1512         if (node == NULL)
1513                 return;
1514
1515         /* Guard against stack overflow due to overly complex expressions */
1516         check_stack_depth();
1517
1518         /*
1519          * We always emit the node's NodeTag, then any additional fields that are
1520          * considered significant, and then we recurse to any child nodes.
1521          */
1522         APP_JUMB(node->type);
1523
1524         switch (nodeTag(node))
1525         {
1526                 case T_Var:
1527                         {
1528                                 Var                *var = (Var *) node;
1529
1530                                 APP_JUMB(var->varno);
1531                                 APP_JUMB(var->varattno);
1532                                 APP_JUMB(var->varlevelsup);
1533                         }
1534                         break;
1535                 case T_Const:
1536                         {
1537                                 Const      *c = (Const *) node;
1538
1539                                 /* We jumble only the constant's type, not its value */
1540                                 APP_JUMB(c->consttype);
1541                                 /* Also, record its parse location for query normalization */
1542                                 RecordConstLocation(jstate, c->location);
1543                         }
1544                         break;
1545                 case T_Param:
1546                         {
1547                                 Param      *p = (Param *) node;
1548
1549                                 APP_JUMB(p->paramkind);
1550                                 APP_JUMB(p->paramid);
1551                                 APP_JUMB(p->paramtype);
1552                         }
1553                         break;
1554                 case T_Aggref:
1555                         {
1556                                 Aggref     *expr = (Aggref *) node;
1557
1558                                 APP_JUMB(expr->aggfnoid);
1559                                 JumbleExpr(jstate, (Node *) expr->args);
1560                                 JumbleExpr(jstate, (Node *) expr->aggorder);
1561                                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
1562                         }
1563                         break;
1564                 case T_WindowFunc:
1565                         {
1566                                 WindowFunc *expr = (WindowFunc *) node;
1567
1568                                 APP_JUMB(expr->winfnoid);
1569                                 APP_JUMB(expr->winref);
1570                                 JumbleExpr(jstate, (Node *) expr->args);
1571                         }
1572                         break;
1573                 case T_ArrayRef:
1574                         {
1575                                 ArrayRef   *aref = (ArrayRef *) node;
1576
1577                                 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
1578                                 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
1579                                 JumbleExpr(jstate, (Node *) aref->refexpr);
1580                                 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
1581                         }
1582                         break;
1583                 case T_FuncExpr:
1584                         {
1585                                 FuncExpr   *expr = (FuncExpr *) node;
1586
1587                                 APP_JUMB(expr->funcid);
1588                                 JumbleExpr(jstate, (Node *) expr->args);
1589                         }
1590                         break;
1591                 case T_NamedArgExpr:
1592                         {
1593                                 NamedArgExpr *nae = (NamedArgExpr *) node;
1594
1595                                 APP_JUMB(nae->argnumber);
1596                                 JumbleExpr(jstate, (Node *) nae->arg);
1597                         }
1598                         break;
1599                 case T_OpExpr:
1600                 case T_DistinctExpr:    /* struct-equivalent to OpExpr */
1601                 case T_NullIfExpr:              /* struct-equivalent to OpExpr */
1602                         {
1603                                 OpExpr     *expr = (OpExpr *) node;
1604
1605                                 APP_JUMB(expr->opno);
1606                                 JumbleExpr(jstate, (Node *) expr->args);
1607                         }
1608                         break;
1609                 case T_ScalarArrayOpExpr:
1610                         {
1611                                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
1612
1613                                 APP_JUMB(expr->opno);
1614                                 APP_JUMB(expr->useOr);
1615                                 JumbleExpr(jstate, (Node *) expr->args);
1616                         }
1617                         break;
1618                 case T_BoolExpr:
1619                         {
1620                                 BoolExpr   *expr = (BoolExpr *) node;
1621
1622                                 APP_JUMB(expr->boolop);
1623                                 JumbleExpr(jstate, (Node *) expr->args);
1624                         }
1625                         break;
1626                 case T_SubLink:
1627                         {
1628                                 SubLink    *sublink = (SubLink *) node;
1629
1630                                 APP_JUMB(sublink->subLinkType);
1631                                 JumbleExpr(jstate, (Node *) sublink->testexpr);
1632                                 JumbleQuery(jstate, (Query *) sublink->subselect);
1633                         }
1634                         break;
1635                 case T_FieldSelect:
1636                         {
1637                                 FieldSelect *fs = (FieldSelect *) node;
1638
1639                                 APP_JUMB(fs->fieldnum);
1640                                 JumbleExpr(jstate, (Node *) fs->arg);
1641                         }
1642                         break;
1643                 case T_FieldStore:
1644                         {
1645                                 FieldStore *fstore = (FieldStore *) node;
1646
1647                                 JumbleExpr(jstate, (Node *) fstore->arg);
1648                                 JumbleExpr(jstate, (Node *) fstore->newvals);
1649                         }
1650                         break;
1651                 case T_RelabelType:
1652                         {
1653                                 RelabelType *rt = (RelabelType *) node;
1654
1655                                 APP_JUMB(rt->resulttype);
1656                                 JumbleExpr(jstate, (Node *) rt->arg);
1657                         }
1658                         break;
1659                 case T_CoerceViaIO:
1660                         {
1661                                 CoerceViaIO *cio = (CoerceViaIO *) node;
1662
1663                                 APP_JUMB(cio->resulttype);
1664                                 JumbleExpr(jstate, (Node *) cio->arg);
1665                         }
1666                         break;
1667                 case T_ArrayCoerceExpr:
1668                         {
1669                                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
1670
1671                                 APP_JUMB(acexpr->resulttype);
1672                                 JumbleExpr(jstate, (Node *) acexpr->arg);
1673                         }
1674                         break;
1675                 case T_ConvertRowtypeExpr:
1676                         {
1677                                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
1678
1679                                 APP_JUMB(crexpr->resulttype);
1680                                 JumbleExpr(jstate, (Node *) crexpr->arg);
1681                         }
1682                         break;
1683                 case T_CollateExpr:
1684                         {
1685                                 CollateExpr *ce = (CollateExpr *) node;
1686
1687                                 APP_JUMB(ce->collOid);
1688                                 JumbleExpr(jstate, (Node *) ce->arg);
1689                         }
1690                         break;
1691                 case T_CaseExpr:
1692                         {
1693                                 CaseExpr   *caseexpr = (CaseExpr *) node;
1694
1695                                 JumbleExpr(jstate, (Node *) caseexpr->arg);
1696                                 foreach(temp, caseexpr->args)
1697                                 {
1698                                         CaseWhen   *when = (CaseWhen *) lfirst(temp);
1699
1700                                         Assert(IsA(when, CaseWhen));
1701                                         JumbleExpr(jstate, (Node *) when->expr);
1702                                         JumbleExpr(jstate, (Node *) when->result);
1703                                 }
1704                                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
1705                         }
1706                         break;
1707                 case T_CaseTestExpr:
1708                         {
1709                                 CaseTestExpr *ct = (CaseTestExpr *) node;
1710
1711                                 APP_JUMB(ct->typeId);
1712                         }
1713                         break;
1714                 case T_ArrayExpr:
1715                         JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
1716                         break;
1717                 case T_RowExpr:
1718                         JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
1719                         break;
1720                 case T_RowCompareExpr:
1721                         {
1722                                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
1723
1724                                 APP_JUMB(rcexpr->rctype);
1725                                 JumbleExpr(jstate, (Node *) rcexpr->largs);
1726                                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
1727                         }
1728                         break;
1729                 case T_CoalesceExpr:
1730                         JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
1731                         break;
1732                 case T_MinMaxExpr:
1733                         {
1734                                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
1735
1736                                 APP_JUMB(mmexpr->op);
1737                                 JumbleExpr(jstate, (Node *) mmexpr->args);
1738                         }
1739                         break;
1740                 case T_XmlExpr:
1741                         {
1742                                 XmlExpr    *xexpr = (XmlExpr *) node;
1743
1744                                 APP_JUMB(xexpr->op);
1745                                 JumbleExpr(jstate, (Node *) xexpr->named_args);
1746                                 JumbleExpr(jstate, (Node *) xexpr->args);
1747                         }
1748                         break;
1749                 case T_NullTest:
1750                         {
1751                                 NullTest   *nt = (NullTest *) node;
1752
1753                                 APP_JUMB(nt->nulltesttype);
1754                                 JumbleExpr(jstate, (Node *) nt->arg);
1755                         }
1756                         break;
1757                 case T_BooleanTest:
1758                         {
1759                                 BooleanTest *bt = (BooleanTest *) node;
1760
1761                                 APP_JUMB(bt->booltesttype);
1762                                 JumbleExpr(jstate, (Node *) bt->arg);
1763                         }
1764                         break;
1765                 case T_CoerceToDomain:
1766                         {
1767                                 CoerceToDomain *cd = (CoerceToDomain *) node;
1768
1769                                 APP_JUMB(cd->resulttype);
1770                                 JumbleExpr(jstate, (Node *) cd->arg);
1771                         }
1772                         break;
1773                 case T_CoerceToDomainValue:
1774                         {
1775                                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
1776
1777                                 APP_JUMB(cdv->typeId);
1778                         }
1779                         break;
1780                 case T_SetToDefault:
1781                         {
1782                                 SetToDefault *sd = (SetToDefault *) node;
1783
1784                                 APP_JUMB(sd->typeId);
1785                         }
1786                         break;
1787                 case T_CurrentOfExpr:
1788                         {
1789                                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
1790
1791                                 APP_JUMB(ce->cvarno);
1792                                 if (ce->cursor_name)
1793                                         APP_JUMB_STRING(ce->cursor_name);
1794                                 APP_JUMB(ce->cursor_param);
1795                         }
1796                         break;
1797                 case T_TargetEntry:
1798                         {
1799                                 TargetEntry *tle = (TargetEntry *) node;
1800
1801                                 APP_JUMB(tle->resno);
1802                                 APP_JUMB(tle->ressortgroupref);
1803                                 JumbleExpr(jstate, (Node *) tle->expr);
1804                         }
1805                         break;
1806                 case T_RangeTblRef:
1807                         {
1808                                 RangeTblRef *rtr = (RangeTblRef *) node;
1809
1810                                 APP_JUMB(rtr->rtindex);
1811                         }
1812                         break;
1813                 case T_JoinExpr:
1814                         {
1815                                 JoinExpr   *join = (JoinExpr *) node;
1816
1817                                 APP_JUMB(join->jointype);
1818                                 APP_JUMB(join->isNatural);
1819                                 APP_JUMB(join->rtindex);
1820                                 JumbleExpr(jstate, join->larg);
1821                                 JumbleExpr(jstate, join->rarg);
1822                                 JumbleExpr(jstate, join->quals);
1823                         }
1824                         break;
1825                 case T_FromExpr:
1826                         {
1827                                 FromExpr   *from = (FromExpr *) node;
1828
1829                                 JumbleExpr(jstate, (Node *) from->fromlist);
1830                                 JumbleExpr(jstate, from->quals);
1831                         }
1832                         break;
1833                 case T_List:
1834                         foreach(temp, (List *) node)
1835                         {
1836                                 JumbleExpr(jstate, (Node *) lfirst(temp));
1837                         }
1838                         break;
1839                 case T_SortGroupClause:
1840                         {
1841                                 SortGroupClause *sgc = (SortGroupClause *) node;
1842
1843                                 APP_JUMB(sgc->tleSortGroupRef);
1844                                 APP_JUMB(sgc->eqop);
1845                                 APP_JUMB(sgc->sortop);
1846                                 APP_JUMB(sgc->nulls_first);
1847                         }
1848                         break;
1849                 case T_WindowClause:
1850                         {
1851                                 WindowClause *wc = (WindowClause *) node;
1852
1853                                 APP_JUMB(wc->winref);
1854                                 APP_JUMB(wc->frameOptions);
1855                                 JumbleExpr(jstate, (Node *) wc->partitionClause);
1856                                 JumbleExpr(jstate, (Node *) wc->orderClause);
1857                                 JumbleExpr(jstate, wc->startOffset);
1858                                 JumbleExpr(jstate, wc->endOffset);
1859                         }
1860                         break;
1861                 case T_CommonTableExpr:
1862                         {
1863                                 CommonTableExpr *cte = (CommonTableExpr *) node;
1864
1865                                 /* we store the string name because RTE_CTE RTEs need it */
1866                                 APP_JUMB_STRING(cte->ctename);
1867                                 JumbleQuery(jstate, (Query *) cte->ctequery);
1868                         }
1869                         break;
1870                 case T_SetOperationStmt:
1871                         {
1872                                 SetOperationStmt *setop = (SetOperationStmt *) node;
1873
1874                                 APP_JUMB(setop->op);
1875                                 APP_JUMB(setop->all);
1876                                 JumbleExpr(jstate, setop->larg);
1877                                 JumbleExpr(jstate, setop->rarg);
1878                         }
1879                         break;
1880                 default:
1881                         /* Only a warning, since we can stumble along anyway */
1882                         elog(WARNING, "unrecognized node type: %d",
1883                                  (int) nodeTag(node));
1884                         break;
1885         }
1886 }
1887
1888 /*
1889  * Record location of constant within query string of query tree
1890  * that is currently being walked.
1891  */
1892 static void
1893 RecordConstLocation(pgssJumbleState *jstate, int location)
1894 {
1895         /* -1 indicates unknown or undefined location */
1896         if (location >= 0)
1897         {
1898                 /* enlarge array if needed */
1899                 if (jstate->clocations_count >= jstate->clocations_buf_size)
1900                 {
1901                         jstate->clocations_buf_size *= 2;
1902                         jstate->clocations = (pgssLocationLen *)
1903                                 repalloc(jstate->clocations,
1904                                                  jstate->clocations_buf_size *
1905                                                  sizeof(pgssLocationLen));
1906                 }
1907                 jstate->clocations[jstate->clocations_count].location = location;
1908                 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
1909                 jstate->clocations[jstate->clocations_count].length = -1;
1910                 jstate->clocations_count++;
1911         }
1912 }
1913
1914 /*
1915  * Generate a normalized version of the query string that will be used to
1916  * represent all similar queries.
1917  *
1918  * Note that the normalized representation may well vary depending on
1919  * just which "equivalent" query is used to create the hashtable entry.
1920  * We assume this is OK.
1921  *
1922  * *query_len_p contains the input string length, and is updated with
1923  * the result string length (which cannot be longer) on exit.
1924  *
1925  * Returns a palloc'd string, which is not necessarily null-terminated.
1926  */
1927 static char *
1928 generate_normalized_query(pgssJumbleState *jstate, const char *query,
1929                                                   int *query_len_p, int encoding)
1930 {
1931         char       *norm_query;
1932         int                     query_len = *query_len_p;
1933         int                     max_output_len;
1934         int                     i,
1935                                 len_to_wrt,             /* Length (in bytes) to write */
1936                                 quer_loc = 0,   /* Source query byte location */
1937                                 n_quer_loc = 0, /* Normalized query byte location */
1938                                 last_off = 0,   /* Offset from start for previous tok */
1939                                 last_tok_len = 0;               /* Length (in bytes) of that tok */
1940
1941         /*
1942          * Get constants' lengths (core system only gives us locations).  Note
1943          * this also ensures the items are sorted by location.
1944          */
1945         fill_in_constant_lengths(jstate, query);
1946
1947         /* Allocate result buffer, ensuring we limit result to allowed size */
1948 #ifdef NOT_USED
1949         max_output_len = Min(query_len, pgss->query_size - 1);
1950 #endif
1951         /* XXX: pg_hint_plan doesn't truncate query string. */
1952         max_output_len = query_len;
1953         norm_query = palloc(max_output_len);
1954
1955         for (i = 0; i < jstate->clocations_count; i++)
1956         {
1957                 int                     off,            /* Offset from start for cur tok */
1958                                         tok_len;        /* Length (in bytes) of that tok */
1959
1960                 off = jstate->clocations[i].location;
1961                 tok_len = jstate->clocations[i].length;
1962
1963                 if (tok_len < 0)
1964                         continue;                       /* ignore any duplicates */
1965
1966                 /* Copy next chunk, or as much as will fit */
1967                 len_to_wrt = off - last_off;
1968                 len_to_wrt -= last_tok_len;
1969                 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1970
1971                 Assert(len_to_wrt >= 0);
1972                 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1973                 n_quer_loc += len_to_wrt;
1974
1975                 if (n_quer_loc < max_output_len)
1976                         norm_query[n_quer_loc++] = '?';
1977
1978                 quer_loc = off + tok_len;
1979                 last_off = off;
1980                 last_tok_len = tok_len;
1981
1982                 /* If we run out of space, might as well stop iterating */
1983                 if (n_quer_loc >= max_output_len)
1984                         break;
1985         }
1986
1987         /*
1988          * We've copied up until the last ignorable constant.  Copy over the
1989          * remaining bytes of the original query string, or at least as much as
1990          * will fit.
1991          */
1992         len_to_wrt = query_len - quer_loc;
1993         len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1994
1995         Assert(len_to_wrt >= 0);
1996         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1997         n_quer_loc += len_to_wrt;
1998
1999         /* XXX: pg_hint_plan doesn't truncate query string. */
2000 #ifdef NOT_USED
2001         /*
2002          * If we ran out of space, we need to do an encoding-aware truncation,
2003          * just to make sure we don't have an incomplete character at the end.
2004          */
2005         if (n_quer_loc >= max_output_len)
2006                 query_len = pg_encoding_mbcliplen(encoding,
2007                                                                                   norm_query,
2008                                                                                   n_quer_loc,
2009                                                                                   pgss->query_size - 1);
2010         else
2011 #endif
2012                 query_len = n_quer_loc;
2013
2014         *query_len_p = query_len;
2015         return norm_query;
2016 }
2017
2018 /*
2019  * Given a valid SQL string and an array of constant-location records,
2020  * fill in the textual lengths of those constants.
2021  *
2022  * The constants may use any allowed constant syntax, such as float literals,
2023  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
2024  * accomplished by using the public API for the core scanner.
2025  *
2026  * It is the caller's job to ensure that the string is a valid SQL statement
2027  * with constants at the indicated locations.  Since in practice the string
2028  * has already been parsed, and the locations that the caller provides will
2029  * have originated from within the authoritative parser, this should not be
2030  * a problem.
2031  *
2032  * Duplicate constant pointers are possible, and will have their lengths
2033  * marked as '-1', so that they are later ignored.      (Actually, we assume the
2034  * lengths were initialized as -1 to start with, and don't change them here.)
2035  *
2036  * N.B. There is an assumption that a '-' character at a Const location begins
2037  * a negative numeric constant.  This precludes there ever being another
2038  * reason for a constant to start with a '-'.
2039  */
2040 static void
2041 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
2042 {
2043         pgssLocationLen *locs;
2044         core_yyscan_t yyscanner;
2045         core_yy_extra_type yyextra;
2046         core_YYSTYPE yylval;
2047         YYLTYPE         yylloc;
2048         int                     last_loc = -1;
2049         int                     i;
2050
2051         /*
2052          * Sort the records by location so that we can process them in order while
2053          * scanning the query text.
2054          */
2055         if (jstate->clocations_count > 1)
2056                 qsort(jstate->clocations, jstate->clocations_count,
2057                           sizeof(pgssLocationLen), comp_location);
2058         locs = jstate->clocations;
2059
2060         /* initialize the flex scanner --- should match raw_parser() */
2061         yyscanner = scanner_init(query,
2062                                                          &yyextra,
2063                                                          ScanKeywords,
2064                                                          NumScanKeywords);
2065
2066         /* Search for each constant, in sequence */
2067         for (i = 0; i < jstate->clocations_count; i++)
2068         {
2069                 int                     loc = locs[i].location;
2070                 int                     tok;
2071
2072                 Assert(loc >= 0);
2073
2074                 if (loc <= last_loc)
2075                         continue;                       /* Duplicate constant, ignore */
2076
2077                 /* Lex tokens until we find the desired constant */
2078                 for (;;)
2079                 {
2080                         tok = core_yylex(&yylval, &yylloc, yyscanner);
2081
2082                         /* We should not hit end-of-string, but if we do, behave sanely */
2083                         if (tok == 0)
2084                                 break;                  /* out of inner for-loop */
2085
2086                         /*
2087                          * We should find the token position exactly, but if we somehow
2088                          * run past it, work with that.
2089                          */
2090                         if (yylloc >= loc)
2091                         {
2092                                 if (query[loc] == '-')
2093                                 {
2094                                         /*
2095                                          * It's a negative value - this is the one and only case
2096                                          * where we replace more than a single token.
2097                                          *
2098                                          * Do not compensate for the core system's special-case
2099                                          * adjustment of location to that of the leading '-'
2100                                          * operator in the event of a negative constant.  It is
2101                                          * also useful for our purposes to start from the minus
2102                                          * symbol.      In this way, queries like "select * from foo
2103                                          * where bar = 1" and "select * from foo where bar = -2"
2104                                          * will have identical normalized query strings.
2105                                          */
2106                                         tok = core_yylex(&yylval, &yylloc, yyscanner);
2107                                         if (tok == 0)
2108                                                 break;  /* out of inner for-loop */
2109                                 }
2110
2111                                 /*
2112                                  * We now rely on the assumption that flex has placed a zero
2113                                  * byte after the text of the current token in scanbuf.
2114                                  */
2115                                 locs[i].length = strlen(yyextra.scanbuf + loc);
2116                                 break;                  /* out of inner for-loop */
2117                         }
2118                 }
2119
2120                 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2121                 if (tok == 0)
2122                         break;
2123
2124                 last_loc = loc;
2125         }
2126
2127         scanner_finish(yyscanner);
2128 }
2129
2130 /*
2131  * comp_location: comparator for qsorting pgssLocationLen structs by location
2132  */
2133 static int
2134 comp_location(const void *a, const void *b)
2135 {
2136         int                     l = ((const pgssLocationLen *) a)->location;
2137         int                     r = ((const pgssLocationLen *) b)->location;
2138
2139         if (l < r)
2140                 return -1;
2141         else if (l > r)
2142                 return +1;
2143         else
2144                 return 0;
2145 }
2146