OSDN Git Service

Remove unused struct member
[pghintplan/pg_hint_plan.git] / pg_stat_statements.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  *              Track statement execution times across a whole database cluster.
5  *
6  * Execution costs are totalled for each distinct source query, and kept in
7  * a shared hashtable.  (We track only as many distinct queries as will fit
8  * in the designated amount of shared memory.)
9  *
10  * As of Postgres 9.2, this module normalizes query entries.  Normalization
11  * is a process whereby similar queries, typically differing only in their
12  * constants (though the exact rules are somewhat more subtle than that) are
13  * recognized as equivalent, and are tracked as a single entry.  This is
14  * particularly useful for non-prepared queries.
15  *
16  * Normalization is implemented by fingerprinting queries, selectively
17  * serializing those fields of each query tree's nodes that are judged to be
18  * essential to the query.      This is referred to as a query jumble.  This is
19  * distinct from a regular serialization in that various extraneous
20  * information is ignored as irrelevant or not essential to the query, such
21  * as the collations of Vars and, most notably, the values of constants.
22  *
23  * This jumble is acquired at the end of parse analysis of each query, and
24  * a 32-bit hash of it is stored into the query's Query.queryId field.
25  * The server then copies this value around, making it available in plan
26  * tree(s) generated from the query.  The executor can then use this value
27  * to blame query costs on the proper queryId.
28  *
29  * Note about locking issues: to create or delete an entry in the shared
30  * hashtable, one must hold pgss->lock exclusively.  Modifying any field
31  * in an entry except the counters requires the same.  To look up an entry,
32  * one must hold the lock shared.  To read or update the counters within
33  * an entry, one must hold the lock shared or exclusive (so the entry doesn't
34  * disappear!) and also take the entry's mutex spinlock.
35  *
36  *
37  * Copyright (c) 2008-2014, PostgreSQL Global Development Group
38  *
39  * IDENTIFICATION
40  *        contrib/pg_stat_statements/pg_stat_statements.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 #include "postgres.h"
45
46 #ifdef NOT_USED
47
48 #include <unistd.h>
49
50 #endif
51 #include "access/hash.h"
52 #ifdef NOT_USED
53 #include "executor/instrument.h"
54 #include "funcapi.h"
55 #include "mb/pg_wchar.h"
56 #include "miscadmin.h"
57 #include "parser/analyze.h"
58 #include "parser/parsetree.h"
59 #endif
60 #include "parser/scanner.h"
61 #ifdef NOT_USED
62 #include "pgstat.h"
63 #include "storage/fd.h"
64 #include "storage/ipc.h"
65 #include "storage/spin.h"
66 #include "tcop/utility.h"
67 #include "utils/builtins.h"
68
69
70 PG_MODULE_MAGIC;
71
72 /* Location of stats file */
73 #define PGSS_DUMP_FILE  "global/pg_stat_statements.stat"
74
75 /* This constant defines the magic number in the stats file header */
76 static const uint32 PGSS_FILE_HEADER = 0x20120328;
77
78 /* XXX: Should USAGE_EXEC reflect execution time and/or buffer usage? */
79 #define USAGE_EXEC(duration)    (1.0)
80 #define USAGE_INIT                              (1.0)   /* including initial planning */
81 #define ASSUMED_MEDIAN_INIT             (10.0)  /* initial assumed median usage */
82 #define USAGE_DECREASE_FACTOR   (0.99)  /* decreased every entry_dealloc */
83 #define STICKY_DECREASE_FACTOR  (0.50)  /* factor for sticky entries */
84 #define USAGE_DEALLOC_PERCENT   5               /* free this % of entries at once */
85
86 #define JUMBLE_SIZE                             1024    /* query serialization buffer size */
87
88 /*
89  * Hashtable key that defines the identity of a hashtable entry.  We separate
90  * queries by user and by database even if they are otherwise identical.
91  *
92  * Presently, the query encoding is fully determined by the source database
93  * and so we don't really need it to be in the key.  But that might not always
94  * be true. Anyway it's notationally convenient to pass it as part of the key.
95  */
96 typedef struct pgssHashKey
97 {
98         Oid                     userid;                 /* user OID */
99         Oid                     dbid;                   /* database OID */
100         int                     encoding;               /* query encoding */
101         uint32          queryid;                /* query identifier */
102 } pgssHashKey;
103
104 /*
105  * The actual stats counters kept within pgssEntry.
106  */
107 typedef struct Counters
108 {
109         int64           calls;                  /* # of times executed */
110         double          total_time;             /* total execution time, in msec */
111         int64           rows;                   /* total # of retrieved or affected rows */
112         int64           shared_blks_hit;        /* # of shared buffer hits */
113         int64           shared_blks_read;               /* # of shared disk blocks read */
114         int64           shared_blks_dirtied;    /* # of shared disk blocks dirtied */
115         int64           shared_blks_written;    /* # of shared disk blocks written */
116         int64           local_blks_hit; /* # of local buffer hits */
117         int64           local_blks_read;        /* # of local disk blocks read */
118         int64           local_blks_dirtied;             /* # of local disk blocks dirtied */
119         int64           local_blks_written;             /* # of local disk blocks written */
120         int64           temp_blks_read; /* # of temp blocks read */
121         int64           temp_blks_written;              /* # of temp blocks written */
122         double          blk_read_time;  /* time spent reading, in msec */
123         double          blk_write_time; /* time spent writing, in msec */
124         double          usage;                  /* usage factor */
125 } Counters;
126
127 /*
128  * Statistics per statement
129  *
130  * NB: see the file read/write code before changing field order here.
131  */
132 typedef struct pgssEntry
133 {
134         pgssHashKey key;                        /* hash key of entry - MUST BE FIRST */
135         Counters        counters;               /* the statistics for this query */
136         int                     query_len;              /* # of valid bytes in query string */
137         slock_t         mutex;                  /* protects the counters only */
138         char            query[1];               /* VARIABLE LENGTH ARRAY - MUST BE LAST */
139         /* Note: the allocated length of query[] is actually pgss->query_size */
140 } pgssEntry;
141
142 /*
143  * Global shared state
144  */
145 typedef struct pgssSharedState
146 {
147         LWLockId        lock;                   /* protects hashtable search/modification */
148         int                     query_size;             /* max query length in bytes */
149         double          cur_median_usage;               /* current median usage in hashtable */
150 } pgssSharedState;
151
152 /*
153  * Struct for tracking locations/lengths of constants during normalization
154  */
155 typedef struct pgssLocationLen
156 {
157         int                     location;               /* start offset in query text */
158         int                     length;                 /* length in bytes, or -1 to ignore */
159 } pgssLocationLen;
160
161 /*
162  * Working state for computing a query jumble and producing a normalized
163  * query string
164  */
165 typedef struct pgssJumbleState
166 {
167         /* Jumble of current query tree */
168         unsigned char *jumble;
169
170         /* Number of bytes used in jumble[] */
171         Size            jumble_len;
172
173         /* Array of locations of constants that should be removed */
174         pgssLocationLen *clocations;
175
176         /* Allocated length of clocations array */
177         int                     clocations_buf_size;
178
179         /* Current number of valid entries in clocations array */
180         int                     clocations_count;
181 } pgssJumbleState;
182
183 /*---- Local variables ----*/
184
185 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
186 static int      nested_level = 0;
187
188 /* Saved hook values in case of unload */
189 static shmem_startup_hook_type prev_shmem_startup_hook = NULL;
190 static post_parse_analyze_hook_type prev_post_parse_analyze_hook = NULL;
191 static ExecutorStart_hook_type prev_ExecutorStart = NULL;
192 static ExecutorRun_hook_type prev_ExecutorRun = NULL;
193 static ExecutorFinish_hook_type prev_ExecutorFinish = NULL;
194 static ExecutorEnd_hook_type prev_ExecutorEnd = NULL;
195 static ProcessUtility_hook_type prev_ProcessUtility = NULL;
196
197 /* Links to shared memory state */
198 static pgssSharedState *pgss = NULL;
199 static HTAB *pgss_hash = NULL;
200
201 /*---- GUC variables ----*/
202
203 typedef enum
204 {
205         PGSS_TRACK_NONE,                        /* track no statements */
206         PGSS_TRACK_TOP,                         /* only top level statements */
207         PGSS_TRACK_ALL                          /* all statements, including nested ones */
208 }       PGSSTrackLevel;
209
210 static const struct config_enum_entry track_options[] =
211 {
212         {"none", PGSS_TRACK_NONE, false},
213         {"top", PGSS_TRACK_TOP, false},
214         {"all", PGSS_TRACK_ALL, false},
215         {NULL, 0, false}
216 };
217
218 static int      pgss_max;                       /* max # statements to track */
219 static int      pgss_track;                     /* tracking level */
220 static bool pgss_track_utility; /* whether to track utility commands */
221 static bool pgss_save;                  /* whether to save stats across shutdown */
222
223
224 #define pgss_enabled() \
225         (pgss_track == PGSS_TRACK_ALL || \
226         (pgss_track == PGSS_TRACK_TOP && nested_level == 0))
227
228 /*---- Function declarations ----*/
229
230 void            _PG_init(void);
231 void            _PG_fini(void);
232
233 Datum           pg_stat_statements_reset(PG_FUNCTION_ARGS);
234 Datum           pg_stat_statements(PG_FUNCTION_ARGS);
235
236 PG_FUNCTION_INFO_V1(pg_stat_statements_reset);
237 PG_FUNCTION_INFO_V1(pg_stat_statements);
238
239 static void pgss_shmem_startup(void);
240 static void pgss_shmem_shutdown(int code, Datum arg);
241 static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
242 static void pgss_ExecutorStart(QueryDesc *queryDesc, int eflags);
243 static void pgss_ExecutorRun(QueryDesc *queryDesc,
244                                  ScanDirection direction,
245                                  long count);
246 static void pgss_ExecutorFinish(QueryDesc *queryDesc);
247 static void pgss_ExecutorEnd(QueryDesc *queryDesc);
248 static void pgss_ProcessUtility(Node *parsetree,
249                           const char *queryString, ParamListInfo params, bool isTopLevel,
250                                         DestReceiver *dest, char *completionTag);
251 static uint32 pgss_hash_fn(const void *key, Size keysize);
252 static int      pgss_match_fn(const void *key1, const void *key2, Size keysize);
253 static uint32 pgss_hash_string(const char *str);
254 static void pgss_store(const char *query, uint32 queryId,
255                    double total_time, uint64 rows,
256                    const BufferUsage *bufusage,
257                    pgssJumbleState *jstate);
258 static Size pgss_memsize(void);
259 static pgssEntry *entry_alloc(pgssHashKey *key, const char *query,
260                         int query_len, bool sticky);
261 static void entry_dealloc(void);
262 static void entry_reset(void);
263 #endif
264 static void AppendJumble(pgssJumbleState *jstate,
265                          const unsigned char *item, Size size);
266 #ifdef NOT_USED
267 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
268 #endif
269 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
270 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
271 static void RecordConstLocation(pgssJumbleState *jstate, int location);
272 #ifdef NOT_USED
273 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
274                                                   int *query_len_p, int encoding);
275 #endif
276 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
277 static int      comp_location(const void *a, const void *b);
278 #ifdef NOT_USED
279
280
281 /*
282  * Module load callback
283  */
284 void
285 _PG_init(void)
286 {
287         /*
288          * In order to create our shared memory area, we have to be loaded via
289          * shared_preload_libraries.  If not, fall out without hooking into any of
290          * the main system.  (We don't throw error here because it seems useful to
291          * allow the pg_stat_statements functions to be created even when the
292          * module isn't active.  The functions must protect themselves against
293          * being called then, however.)
294          */
295         if (!process_shared_preload_libraries_in_progress)
296                 return;
297
298         /*
299          * Define (or redefine) custom GUC variables.
300          */
301         DefineCustomIntVariable("pg_stat_statements.max",
302           "Sets the maximum number of statements tracked by pg_stat_statements.",
303                                                         NULL,
304                                                         &pgss_max,
305                                                         1000,
306                                                         100,
307                                                         INT_MAX,
308                                                         PGC_POSTMASTER,
309                                                         0,
310                                                         NULL,
311                                                         NULL,
312                                                         NULL);
313
314         DefineCustomEnumVariable("pg_stat_statements.track",
315                            "Selects which statements are tracked by pg_stat_statements.",
316                                                          NULL,
317                                                          &pgss_track,
318                                                          PGSS_TRACK_TOP,
319                                                          track_options,
320                                                          PGC_SUSET,
321                                                          0,
322                                                          NULL,
323                                                          NULL,
324                                                          NULL);
325
326         DefineCustomBoolVariable("pg_stat_statements.track_utility",
327            "Selects whether utility commands are tracked by pg_stat_statements.",
328                                                          NULL,
329                                                          &pgss_track_utility,
330                                                          true,
331                                                          PGC_SUSET,
332                                                          0,
333                                                          NULL,
334                                                          NULL,
335                                                          NULL);
336
337         DefineCustomBoolVariable("pg_stat_statements.save",
338                            "Save pg_stat_statements statistics across server shutdowns.",
339                                                          NULL,
340                                                          &pgss_save,
341                                                          true,
342                                                          PGC_SIGHUP,
343                                                          0,
344                                                          NULL,
345                                                          NULL,
346                                                          NULL);
347
348         EmitWarningsOnPlaceholders("pg_stat_statements");
349
350         /*
351          * Request additional shared resources.  (These are no-ops if we're not in
352          * the postmaster process.)  We'll allocate or attach to the shared
353          * resources in pgss_shmem_startup().
354          */
355         RequestAddinShmemSpace(pgss_memsize());
356         RequestAddinLWLocks(1);
357
358         /*
359          * Install hooks.
360          */
361         prev_shmem_startup_hook = shmem_startup_hook;
362         shmem_startup_hook = pgss_shmem_startup;
363         prev_post_parse_analyze_hook = post_parse_analyze_hook;
364         post_parse_analyze_hook = pgss_post_parse_analyze;
365         prev_ExecutorStart = ExecutorStart_hook;
366         ExecutorStart_hook = pgss_ExecutorStart;
367         prev_ExecutorRun = ExecutorRun_hook;
368         ExecutorRun_hook = pgss_ExecutorRun;
369         prev_ExecutorFinish = ExecutorFinish_hook;
370         ExecutorFinish_hook = pgss_ExecutorFinish;
371         prev_ExecutorEnd = ExecutorEnd_hook;
372         ExecutorEnd_hook = pgss_ExecutorEnd;
373         prev_ProcessUtility = ProcessUtility_hook;
374         ProcessUtility_hook = pgss_ProcessUtility;
375 }
376
377 /*
378  * Module unload callback
379  */
380 void
381 _PG_fini(void)
382 {
383         /* Uninstall hooks. */
384         shmem_startup_hook = prev_shmem_startup_hook;
385         post_parse_analyze_hook = prev_post_parse_analyze_hook;
386         ExecutorStart_hook = prev_ExecutorStart;
387         ExecutorRun_hook = prev_ExecutorRun;
388         ExecutorFinish_hook = prev_ExecutorFinish;
389         ExecutorEnd_hook = prev_ExecutorEnd;
390         ProcessUtility_hook = prev_ProcessUtility;
391 }
392
393 /*
394  * shmem_startup hook: allocate or attach to shared memory,
395  * then load any pre-existing statistics from file.
396  */
397 static void
398 pgss_shmem_startup(void)
399 {
400         bool            found;
401         HASHCTL         info;
402         FILE       *file;
403         uint32          header;
404         int32           num;
405         int32           i;
406         int                     query_size;
407         int                     buffer_size;
408         char       *buffer = NULL;
409
410         if (prev_shmem_startup_hook)
411                 prev_shmem_startup_hook();
412
413         /* reset in case this is a restart within the postmaster */
414         pgss = NULL;
415         pgss_hash = NULL;
416
417         /*
418          * Create or attach to the shared memory state, including hash table
419          */
420         LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
421
422         pgss = ShmemInitStruct("pg_stat_statements",
423                                                    sizeof(pgssSharedState),
424                                                    &found);
425
426         if (!found)
427         {
428                 /* First time through ... */
429                 pgss->lock = LWLockAssign();
430                 pgss->query_size = pgstat_track_activity_query_size;
431                 pgss->cur_median_usage = ASSUMED_MEDIAN_INIT;
432         }
433
434         /* Be sure everyone agrees on the hash table entry size */
435         query_size = pgss->query_size;
436
437         memset(&info, 0, sizeof(info));
438         info.keysize = sizeof(pgssHashKey);
439         info.entrysize = offsetof(pgssEntry, query) +query_size;
440         info.hash = pgss_hash_fn;
441         info.match = pgss_match_fn;
442         pgss_hash = ShmemInitHash("pg_stat_statements hash",
443                                                           pgss_max, pgss_max,
444                                                           &info,
445                                                           HASH_ELEM | HASH_FUNCTION | HASH_COMPARE);
446
447         LWLockRelease(AddinShmemInitLock);
448
449         /*
450          * If we're in the postmaster (or a standalone backend...), set up a shmem
451          * exit hook to dump the statistics to disk.
452          */
453         if (!IsUnderPostmaster)
454                 on_shmem_exit(pgss_shmem_shutdown, (Datum) 0);
455
456         /*
457          * Attempt to load old statistics from the dump file, if this is the first
458          * time through and we weren't told not to.
459          */
460         if (found || !pgss_save)
461                 return;
462
463         /*
464          * Note: we don't bother with locks here, because there should be no other
465          * processes running when this code is reached.
466          */
467         file = AllocateFile(PGSS_DUMP_FILE, PG_BINARY_R);
468         if (file == NULL)
469         {
470                 if (errno == ENOENT)
471                         return;                         /* ignore not-found error */
472                 goto error;
473         }
474
475         buffer_size = query_size;
476         buffer = (char *) palloc(buffer_size);
477
478         if (fread(&header, sizeof(uint32), 1, file) != 1 ||
479                 header != PGSS_FILE_HEADER ||
480                 fread(&num, sizeof(int32), 1, file) != 1)
481                 goto error;
482
483         for (i = 0; i < num; i++)
484         {
485                 pgssEntry       temp;
486                 pgssEntry  *entry;
487
488                 if (fread(&temp, offsetof(pgssEntry, mutex), 1, file) != 1)
489                         goto error;
490
491                 /* Encoding is the only field we can easily sanity-check */
492                 if (!PG_VALID_BE_ENCODING(temp.key.encoding))
493                         goto error;
494
495                 /* Previous incarnation might have had a larger query_size */
496                 if (temp.query_len >= buffer_size)
497                 {
498                         buffer = (char *) repalloc(buffer, temp.query_len + 1);
499                         buffer_size = temp.query_len + 1;
500                 }
501
502                 if (fread(buffer, 1, temp.query_len, file) != temp.query_len)
503                         goto error;
504                 buffer[temp.query_len] = '\0';
505
506                 /* Skip loading "sticky" entries */
507                 if (temp.counters.calls == 0)
508                         continue;
509
510                 /* Clip to available length if needed */
511                 if (temp.query_len >= query_size)
512                         temp.query_len = pg_encoding_mbcliplen(temp.key.encoding,
513                                                                                                    buffer,
514                                                                                                    temp.query_len,
515                                                                                                    query_size - 1);
516
517                 /* make the hashtable entry (discards old entries if too many) */
518                 entry = entry_alloc(&temp.key, buffer, temp.query_len, false);
519
520                 /* copy in the actual stats */
521                 entry->counters = temp.counters;
522         }
523
524         pfree(buffer);
525         FreeFile(file);
526
527         /*
528          * Remove the file so it's not included in backups/replication slaves,
529          * etc. A new file will be written on next shutdown.
530          */
531         unlink(PGSS_DUMP_FILE);
532
533         return;
534
535 error:
536         ereport(LOG,
537                         (errcode_for_file_access(),
538                          errmsg("could not read pg_stat_statement file \"%s\": %m",
539                                         PGSS_DUMP_FILE)));
540         if (buffer)
541                 pfree(buffer);
542         if (file)
543                 FreeFile(file);
544         /* If possible, throw away the bogus file; ignore any error */
545         unlink(PGSS_DUMP_FILE);
546 }
547
548 /*
549  * shmem_shutdown hook: Dump statistics into file.
550  *
551  * Note: we don't bother with acquiring lock, because there should be no
552  * other processes running when this is called.
553  */
554 static void
555 pgss_shmem_shutdown(int code, Datum arg)
556 {
557         FILE       *file;
558         HASH_SEQ_STATUS hash_seq;
559         int32           num_entries;
560         pgssEntry  *entry;
561
562         /* Don't try to dump during a crash. */
563         if (code)
564                 return;
565
566         /* Safety check ... shouldn't get here unless shmem is set up. */
567         if (!pgss || !pgss_hash)
568                 return;
569
570         /* Don't dump if told not to. */
571         if (!pgss_save)
572                 return;
573
574         file = AllocateFile(PGSS_DUMP_FILE ".tmp", PG_BINARY_W);
575         if (file == NULL)
576                 goto error;
577
578         if (fwrite(&PGSS_FILE_HEADER, sizeof(uint32), 1, file) != 1)
579                 goto error;
580         num_entries = hash_get_num_entries(pgss_hash);
581         if (fwrite(&num_entries, sizeof(int32), 1, file) != 1)
582                 goto error;
583
584         hash_seq_init(&hash_seq, pgss_hash);
585         while ((entry = hash_seq_search(&hash_seq)) != NULL)
586         {
587                 int                     len = entry->query_len;
588
589                 if (fwrite(entry, offsetof(pgssEntry, mutex), 1, file) != 1 ||
590                         fwrite(entry->query, 1, len, file) != len)
591                         goto error;
592         }
593
594         if (FreeFile(file))
595         {
596                 file = NULL;
597                 goto error;
598         }
599
600         /*
601          * Rename file into place, so we atomically replace the old one.
602          */
603         if (rename(PGSS_DUMP_FILE ".tmp", PGSS_DUMP_FILE) != 0)
604                 ereport(LOG,
605                                 (errcode_for_file_access(),
606                                  errmsg("could not rename pg_stat_statement file \"%s\": %m",
607                                                 PGSS_DUMP_FILE ".tmp")));
608
609         return;
610
611 error:
612         ereport(LOG,
613                         (errcode_for_file_access(),
614                          errmsg("could not write pg_stat_statement file \"%s\": %m",
615                                         PGSS_DUMP_FILE ".tmp")));
616         if (file)
617                 FreeFile(file);
618         unlink(PGSS_DUMP_FILE ".tmp");
619 }
620
621 /*
622  * Post-parse-analysis hook: mark query with a queryId
623  */
624 static void
625 pgss_post_parse_analyze(ParseState *pstate, Query *query)
626 {
627         pgssJumbleState jstate;
628
629         /* Assert we didn't do this already */
630         Assert(query->queryId == 0);
631
632         /* Safety check... */
633         if (!pgss || !pgss_hash)
634                 return;
635
636         /*
637          * Utility statements get queryId zero.  We do this even in cases where
638          * the statement contains an optimizable statement for which a queryId
639          * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
640          * runtime control will first go through ProcessUtility and then the
641          * executor, and we don't want the executor hooks to do anything, since we
642          * are already measuring the statement's costs at the utility level.
643          */
644         if (query->utilityStmt)
645         {
646                 query->queryId = 0;
647                 return;
648         }
649
650         /* Set up workspace for query jumbling */
651         jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
652         jstate.jumble_len = 0;
653         jstate.clocations_buf_size = 32;
654         jstate.clocations = (pgssLocationLen *)
655                 palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
656         jstate.clocations_count = 0;
657
658         /* Compute query ID and mark the Query node with it */
659         JumbleQuery(&jstate, query);
660         query->queryId = hash_any(jstate.jumble, jstate.jumble_len);
661
662         /*
663          * If we are unlucky enough to get a hash of zero, use 1 instead, to
664          * prevent confusion with the utility-statement case.
665          */
666         if (query->queryId == 0)
667                 query->queryId = 1;
668
669         /*
670          * If we were able to identify any ignorable constants, we immediately
671          * create a hash table entry for the query, so that we can record the
672          * normalized form of the query string.  If there were no such constants,
673          * the normalized string would be the same as the query text anyway, so
674          * there's no need for an early entry.
675          */
676         if (jstate.clocations_count > 0)
677                 pgss_store(pstate->p_sourcetext,
678                                    query->queryId,
679                                    0,
680                                    0,
681                                    NULL,
682                                    &jstate);
683 }
684
685 /*
686  * ExecutorStart hook: start up tracking if needed
687  */
688 static void
689 pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
690 {
691         if (prev_ExecutorStart)
692                 prev_ExecutorStart(queryDesc, eflags);
693         else
694                 standard_ExecutorStart(queryDesc, eflags);
695
696         /*
697          * If query has queryId zero, don't track it.  This prevents double
698          * counting of optimizable statements that are directly contained in
699          * utility statements.
700          */
701         if (pgss_enabled() && queryDesc->plannedstmt->queryId != 0)
702         {
703                 /*
704                  * Set up to track total elapsed time in ExecutorRun.  Make sure the
705                  * space is allocated in the per-query context so it will go away at
706                  * ExecutorEnd.
707                  */
708                 if (queryDesc->totaltime == NULL)
709                 {
710                         MemoryContext oldcxt;
711
712                         oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt);
713                         queryDesc->totaltime = InstrAlloc(1, INSTRUMENT_ALL);
714                         MemoryContextSwitchTo(oldcxt);
715                 }
716         }
717 }
718
719 /*
720  * ExecutorRun hook: all we need do is track nesting depth
721  */
722 static void
723 pgss_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, long count)
724 {
725         nested_level++;
726         PG_TRY();
727         {
728                 if (prev_ExecutorRun)
729                         prev_ExecutorRun(queryDesc, direction, count);
730                 else
731                         standard_ExecutorRun(queryDesc, direction, count);
732                 nested_level--;
733         }
734         PG_CATCH();
735         {
736                 nested_level--;
737                 PG_RE_THROW();
738         }
739         PG_END_TRY();
740 }
741
742 /*
743  * ExecutorFinish hook: all we need do is track nesting depth
744  */
745 static void
746 pgss_ExecutorFinish(QueryDesc *queryDesc)
747 {
748         nested_level++;
749         PG_TRY();
750         {
751                 if (prev_ExecutorFinish)
752                         prev_ExecutorFinish(queryDesc);
753                 else
754                         standard_ExecutorFinish(queryDesc);
755                 nested_level--;
756         }
757         PG_CATCH();
758         {
759                 nested_level--;
760                 PG_RE_THROW();
761         }
762         PG_END_TRY();
763 }
764
765 /*
766  * ExecutorEnd hook: store results if needed
767  */
768 static void
769 pgss_ExecutorEnd(QueryDesc *queryDesc)
770 {
771         uint32          queryId = queryDesc->plannedstmt->queryId;
772
773         if (queryId != 0 && queryDesc->totaltime && pgss_enabled())
774         {
775                 /*
776                  * Make sure stats accumulation is done.  (Note: it's okay if several
777                  * levels of hook all do this.)
778                  */
779                 InstrEndLoop(queryDesc->totaltime);
780
781                 pgss_store(queryDesc->sourceText,
782                                    queryId,
783                                    queryDesc->totaltime->total * 1000.0,                /* convert to msec */
784                                    queryDesc->estate->es_processed,
785                                    &queryDesc->totaltime->bufusage,
786                                    NULL);
787         }
788
789         if (prev_ExecutorEnd)
790                 prev_ExecutorEnd(queryDesc);
791         else
792                 standard_ExecutorEnd(queryDesc);
793 }
794
795 /*
796  * ProcessUtility hook
797  */
798 static void
799 pgss_ProcessUtility(Node *parsetree, const char *queryString,
800                                         ParamListInfo params, bool isTopLevel,
801                                         DestReceiver *dest, char *completionTag)
802 {
803         /*
804          * If it's an EXECUTE statement, we don't track it and don't increment the
805          * nesting level.  This allows the cycles to be charged to the underlying
806          * PREPARE instead (by the Executor hooks), which is much more useful.
807          *
808          * We also don't track execution of PREPARE.  If we did, we would get one
809          * hash table entry for the PREPARE (with hash calculated from the query
810          * string), and then a different one with the same query string (but hash
811          * calculated from the query tree) would be used to accumulate costs of
812          * ensuing EXECUTEs.  This would be confusing, and inconsistent with other
813          * cases where planning time is not included at all.
814          */
815         if (pgss_track_utility && pgss_enabled() &&
816                 !IsA(parsetree, ExecuteStmt) &&
817                 !IsA(parsetree, PrepareStmt))
818         {
819                 instr_time      start;
820                 instr_time      duration;
821                 uint64          rows = 0;
822                 BufferUsage bufusage_start,
823                                         bufusage;
824                 uint32          queryId;
825
826                 bufusage_start = pgBufferUsage;
827                 INSTR_TIME_SET_CURRENT(start);
828
829                 nested_level++;
830                 PG_TRY();
831                 {
832                         if (prev_ProcessUtility)
833                                 prev_ProcessUtility(parsetree, queryString, params,
834                                                                         isTopLevel, dest, completionTag);
835                         else
836                                 standard_ProcessUtility(parsetree, queryString, params,
837                                                                                 isTopLevel, dest, completionTag);
838                         nested_level--;
839                 }
840                 PG_CATCH();
841                 {
842                         nested_level--;
843                         PG_RE_THROW();
844                 }
845                 PG_END_TRY();
846
847                 INSTR_TIME_SET_CURRENT(duration);
848                 INSTR_TIME_SUBTRACT(duration, start);
849
850                 /* parse command tag to retrieve the number of affected rows. */
851                 if (completionTag &&
852                         sscanf(completionTag, "COPY " UINT64_FORMAT, &rows) != 1)
853                         rows = 0;
854
855                 /* calc differences of buffer counters. */
856                 bufusage.shared_blks_hit =
857                         pgBufferUsage.shared_blks_hit - bufusage_start.shared_blks_hit;
858                 bufusage.shared_blks_read =
859                         pgBufferUsage.shared_blks_read - bufusage_start.shared_blks_read;
860                 bufusage.shared_blks_dirtied =
861                         pgBufferUsage.shared_blks_dirtied - bufusage_start.shared_blks_dirtied;
862                 bufusage.shared_blks_written =
863                         pgBufferUsage.shared_blks_written - bufusage_start.shared_blks_written;
864                 bufusage.local_blks_hit =
865                         pgBufferUsage.local_blks_hit - bufusage_start.local_blks_hit;
866                 bufusage.local_blks_read =
867                         pgBufferUsage.local_blks_read - bufusage_start.local_blks_read;
868                 bufusage.local_blks_dirtied =
869                         pgBufferUsage.local_blks_dirtied - bufusage_start.local_blks_dirtied;
870                 bufusage.local_blks_written =
871                         pgBufferUsage.local_blks_written - bufusage_start.local_blks_written;
872                 bufusage.temp_blks_read =
873                         pgBufferUsage.temp_blks_read - bufusage_start.temp_blks_read;
874                 bufusage.temp_blks_written =
875                         pgBufferUsage.temp_blks_written - bufusage_start.temp_blks_written;
876                 bufusage.blk_read_time = pgBufferUsage.blk_read_time;
877                 INSTR_TIME_SUBTRACT(bufusage.blk_read_time, bufusage_start.blk_read_time);
878                 bufusage.blk_write_time = pgBufferUsage.blk_write_time;
879                 INSTR_TIME_SUBTRACT(bufusage.blk_write_time, bufusage_start.blk_write_time);
880
881                 /* For utility statements, we just hash the query string directly */
882                 queryId = pgss_hash_string(queryString);
883
884                 pgss_store(queryString,
885                                    queryId,
886                                    INSTR_TIME_GET_MILLISEC(duration),
887                                    rows,
888                                    &bufusage,
889                                    NULL);
890         }
891         else
892         {
893                 if (prev_ProcessUtility)
894                         prev_ProcessUtility(parsetree, queryString, params,
895                                                                 isTopLevel, dest, completionTag);
896                 else
897                         standard_ProcessUtility(parsetree, queryString, params,
898                                                                         isTopLevel, dest, completionTag);
899         }
900 }
901
902 /*
903  * Calculate hash value for a key
904  */
905 static uint32
906 pgss_hash_fn(const void *key, Size keysize)
907 {
908         const pgssHashKey *k = (const pgssHashKey *) key;
909
910         /* we don't bother to include encoding in the hash */
911         return hash_uint32((uint32) k->userid) ^
912                 hash_uint32((uint32) k->dbid) ^
913                 hash_uint32((uint32) k->queryid);
914 }
915
916 /*
917  * Compare two keys - zero means match
918  */
919 static int
920 pgss_match_fn(const void *key1, const void *key2, Size keysize)
921 {
922         const pgssHashKey *k1 = (const pgssHashKey *) key1;
923         const pgssHashKey *k2 = (const pgssHashKey *) key2;
924
925         if (k1->userid == k2->userid &&
926                 k1->dbid == k2->dbid &&
927                 k1->encoding == k2->encoding &&
928                 k1->queryid == k2->queryid)
929                 return 0;
930         else
931                 return 1;
932 }
933
934 /*
935  * Given an arbitrarily long query string, produce a hash for the purposes of
936  * identifying the query, without normalizing constants.  Used when hashing
937  * utility statements.
938  */
939 static uint32
940 pgss_hash_string(const char *str)
941 {
942         return hash_any((const unsigned char *) str, strlen(str));
943 }
944
945 /*
946  * Store some statistics for a statement.
947  *
948  * If jstate is not NULL then we're trying to create an entry for which
949  * we have no statistics as yet; we just want to record the normalized
950  * query string.  total_time, rows, bufusage are ignored in this case.
951  */
952 static void
953 pgss_store(const char *query, uint32 queryId,
954                    double total_time, uint64 rows,
955                    const BufferUsage *bufusage,
956                    pgssJumbleState *jstate)
957 {
958         pgssHashKey key;
959         pgssEntry  *entry;
960         char       *norm_query = NULL;
961
962         Assert(query != NULL);
963
964         /* Safety check... */
965         if (!pgss || !pgss_hash)
966                 return;
967
968         /* Set up key for hashtable search */
969         key.userid = GetUserId();
970         key.dbid = MyDatabaseId;
971         key.encoding = GetDatabaseEncoding();
972         key.queryid = queryId;
973
974         /* Lookup the hash table entry with shared lock. */
975         LWLockAcquire(pgss->lock, LW_SHARED);
976
977         entry = (pgssEntry *) hash_search(pgss_hash, &key, HASH_FIND, NULL);
978
979         /* Create new entry, if not present */
980         if (!entry)
981         {
982                 int                     query_len;
983
984                 /*
985                  * We'll need exclusive lock to make a new entry.  There is no point
986                  * in holding shared lock while we normalize the string, though.
987                  */
988                 LWLockRelease(pgss->lock);
989
990                 query_len = strlen(query);
991
992                 if (jstate)
993                 {
994                         /* Normalize the string if enabled */
995                         norm_query = generate_normalized_query(jstate, query,
996                                                                                                    &query_len,
997                                                                                                    key.encoding);
998
999                         /* Acquire exclusive lock as required by entry_alloc() */
1000                         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1001
1002                         entry = entry_alloc(&key, norm_query, query_len, true);
1003                 }
1004                 else
1005                 {
1006                         /*
1007                          * We're just going to store the query string as-is; but we have
1008                          * to truncate it if over-length.
1009                          */
1010                         if (query_len >= pgss->query_size)
1011                                 query_len = pg_encoding_mbcliplen(key.encoding,
1012                                                                                                   query,
1013                                                                                                   query_len,
1014                                                                                                   pgss->query_size - 1);
1015
1016                         /* Acquire exclusive lock as required by entry_alloc() */
1017                         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1018
1019                         entry = entry_alloc(&key, query, query_len, false);
1020                 }
1021         }
1022
1023         /* Increment the counts, except when jstate is not NULL */
1024         if (!jstate)
1025         {
1026                 /*
1027                  * Grab the spinlock while updating the counters (see comment about
1028                  * locking rules at the head of the file)
1029                  */
1030                 volatile pgssEntry *e = (volatile pgssEntry *) entry;
1031
1032                 SpinLockAcquire(&e->mutex);
1033
1034                 /* "Unstick" entry if it was previously sticky */
1035                 if (e->counters.calls == 0)
1036                         e->counters.usage = USAGE_INIT;
1037
1038                 e->counters.calls += 1;
1039                 e->counters.total_time += total_time;
1040                 e->counters.rows += rows;
1041                 e->counters.shared_blks_hit += bufusage->shared_blks_hit;
1042                 e->counters.shared_blks_read += bufusage->shared_blks_read;
1043                 e->counters.shared_blks_dirtied += bufusage->shared_blks_dirtied;
1044                 e->counters.shared_blks_written += bufusage->shared_blks_written;
1045                 e->counters.local_blks_hit += bufusage->local_blks_hit;
1046                 e->counters.local_blks_read += bufusage->local_blks_read;
1047                 e->counters.local_blks_dirtied += bufusage->local_blks_dirtied;
1048                 e->counters.local_blks_written += bufusage->local_blks_written;
1049                 e->counters.temp_blks_read += bufusage->temp_blks_read;
1050                 e->counters.temp_blks_written += bufusage->temp_blks_written;
1051                 e->counters.blk_read_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_read_time);
1052                 e->counters.blk_write_time += INSTR_TIME_GET_MILLISEC(bufusage->blk_write_time);
1053                 e->counters.usage += USAGE_EXEC(total_time);
1054
1055                 SpinLockRelease(&e->mutex);
1056         }
1057
1058         LWLockRelease(pgss->lock);
1059
1060         /* We postpone this pfree until we're out of the lock */
1061         if (norm_query)
1062                 pfree(norm_query);
1063 }
1064
1065 /*
1066  * Reset all statement statistics.
1067  */
1068 Datum
1069 pg_stat_statements_reset(PG_FUNCTION_ARGS)
1070 {
1071         if (!pgss || !pgss_hash)
1072                 ereport(ERROR,
1073                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1074                                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1075         entry_reset();
1076         PG_RETURN_VOID();
1077 }
1078
1079 #define PG_STAT_STATEMENTS_COLS_V1_0    14
1080 #define PG_STAT_STATEMENTS_COLS                 18
1081
1082 /*
1083  * Retrieve statement statistics.
1084  */
1085 Datum
1086 pg_stat_statements(PG_FUNCTION_ARGS)
1087 {
1088         ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
1089         TupleDesc       tupdesc;
1090         Tuplestorestate *tupstore;
1091         MemoryContext per_query_ctx;
1092         MemoryContext oldcontext;
1093         Oid                     userid = GetUserId();
1094         bool            is_superuser = superuser();
1095         HASH_SEQ_STATUS hash_seq;
1096         pgssEntry  *entry;
1097         bool            sql_supports_v1_1_counters = true;
1098
1099         if (!pgss || !pgss_hash)
1100                 ereport(ERROR,
1101                                 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1102                                  errmsg("pg_stat_statements must be loaded via shared_preload_libraries")));
1103
1104         /* check to see if caller supports us returning a tuplestore */
1105         if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
1106                 ereport(ERROR,
1107                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1108                                  errmsg("set-valued function called in context that cannot accept a set")));
1109         if (!(rsinfo->allowedModes & SFRM_Materialize))
1110                 ereport(ERROR,
1111                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1112                                  errmsg("materialize mode required, but it is not " \
1113                                                 "allowed in this context")));
1114
1115         /* Build a tuple descriptor for our result type */
1116         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
1117                 elog(ERROR, "return type must be a row type");
1118         if (tupdesc->natts == PG_STAT_STATEMENTS_COLS_V1_0)
1119                 sql_supports_v1_1_counters = false;
1120
1121         per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
1122         oldcontext = MemoryContextSwitchTo(per_query_ctx);
1123
1124         tupstore = tuplestore_begin_heap(true, false, work_mem);
1125         rsinfo->returnMode = SFRM_Materialize;
1126         rsinfo->setResult = tupstore;
1127         rsinfo->setDesc = tupdesc;
1128
1129         MemoryContextSwitchTo(oldcontext);
1130
1131         LWLockAcquire(pgss->lock, LW_SHARED);
1132
1133         hash_seq_init(&hash_seq, pgss_hash);
1134         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1135         {
1136                 Datum           values[PG_STAT_STATEMENTS_COLS];
1137                 bool            nulls[PG_STAT_STATEMENTS_COLS];
1138                 int                     i = 0;
1139                 Counters        tmp;
1140
1141                 memset(values, 0, sizeof(values));
1142                 memset(nulls, 0, sizeof(nulls));
1143
1144                 values[i++] = ObjectIdGetDatum(entry->key.userid);
1145                 values[i++] = ObjectIdGetDatum(entry->key.dbid);
1146
1147                 if (is_superuser || entry->key.userid == userid)
1148                 {
1149                         char       *qstr;
1150
1151                         qstr = (char *)
1152                                 pg_do_encoding_conversion((unsigned char *) entry->query,
1153                                                                                   entry->query_len,
1154                                                                                   entry->key.encoding,
1155                                                                                   GetDatabaseEncoding());
1156                         values[i++] = CStringGetTextDatum(qstr);
1157                         if (qstr != entry->query)
1158                                 pfree(qstr);
1159                 }
1160                 else
1161                         values[i++] = CStringGetTextDatum("<insufficient privilege>");
1162
1163                 /* copy counters to a local variable to keep locking time short */
1164                 {
1165                         volatile pgssEntry *e = (volatile pgssEntry *) entry;
1166
1167                         SpinLockAcquire(&e->mutex);
1168                         tmp = e->counters;
1169                         SpinLockRelease(&e->mutex);
1170                 }
1171
1172                 /* Skip entry if unexecuted (ie, it's a pending "sticky" entry) */
1173                 if (tmp.calls == 0)
1174                         continue;
1175
1176                 values[i++] = Int64GetDatumFast(tmp.calls);
1177                 values[i++] = Float8GetDatumFast(tmp.total_time);
1178                 values[i++] = Int64GetDatumFast(tmp.rows);
1179                 values[i++] = Int64GetDatumFast(tmp.shared_blks_hit);
1180                 values[i++] = Int64GetDatumFast(tmp.shared_blks_read);
1181                 if (sql_supports_v1_1_counters)
1182                         values[i++] = Int64GetDatumFast(tmp.shared_blks_dirtied);
1183                 values[i++] = Int64GetDatumFast(tmp.shared_blks_written);
1184                 values[i++] = Int64GetDatumFast(tmp.local_blks_hit);
1185                 values[i++] = Int64GetDatumFast(tmp.local_blks_read);
1186                 if (sql_supports_v1_1_counters)
1187                         values[i++] = Int64GetDatumFast(tmp.local_blks_dirtied);
1188                 values[i++] = Int64GetDatumFast(tmp.local_blks_written);
1189                 values[i++] = Int64GetDatumFast(tmp.temp_blks_read);
1190                 values[i++] = Int64GetDatumFast(tmp.temp_blks_written);
1191                 if (sql_supports_v1_1_counters)
1192                 {
1193                         values[i++] = Float8GetDatumFast(tmp.blk_read_time);
1194                         values[i++] = Float8GetDatumFast(tmp.blk_write_time);
1195                 }
1196
1197                 Assert(i == (sql_supports_v1_1_counters ?
1198                                          PG_STAT_STATEMENTS_COLS : PG_STAT_STATEMENTS_COLS_V1_0));
1199
1200                 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
1201         }
1202
1203         LWLockRelease(pgss->lock);
1204
1205         /* clean up and return the tuplestore */
1206         tuplestore_donestoring(tupstore);
1207
1208         return (Datum) 0;
1209 }
1210
1211 /*
1212  * Estimate shared memory space needed.
1213  */
1214 static Size
1215 pgss_memsize(void)
1216 {
1217         Size            size;
1218         Size            entrysize;
1219
1220         size = MAXALIGN(sizeof(pgssSharedState));
1221         entrysize = offsetof(pgssEntry, query) +pgstat_track_activity_query_size;
1222         size = add_size(size, hash_estimate_size(pgss_max, entrysize));
1223
1224         return size;
1225 }
1226
1227 /*
1228  * Allocate a new hashtable entry.
1229  * caller must hold an exclusive lock on pgss->lock
1230  *
1231  * "query" need not be null-terminated; we rely on query_len instead
1232  *
1233  * If "sticky" is true, make the new entry artificially sticky so that it will
1234  * probably still be there when the query finishes execution.  We do this by
1235  * giving it a median usage value rather than the normal value.  (Strictly
1236  * speaking, query strings are normalized on a best effort basis, though it
1237  * would be difficult to demonstrate this even under artificial conditions.)
1238  *
1239  * Note: despite needing exclusive lock, it's not an error for the target
1240  * entry to already exist.      This is because pgss_store releases and
1241  * reacquires lock after failing to find a match; so someone else could
1242  * have made the entry while we waited to get exclusive lock.
1243  */
1244 static pgssEntry *
1245 entry_alloc(pgssHashKey *key, const char *query, int query_len, bool sticky)
1246 {
1247         pgssEntry  *entry;
1248         bool            found;
1249
1250         /* Make space if needed */
1251         while (hash_get_num_entries(pgss_hash) >= pgss_max)
1252                 entry_dealloc();
1253
1254         /* Find or create an entry with desired hash code */
1255         entry = (pgssEntry *) hash_search(pgss_hash, key, HASH_ENTER, &found);
1256
1257         if (!found)
1258         {
1259                 /* New entry, initialize it */
1260
1261                 /* reset the statistics */
1262                 memset(&entry->counters, 0, sizeof(Counters));
1263                 /* set the appropriate initial usage count */
1264                 entry->counters.usage = sticky ? pgss->cur_median_usage : USAGE_INIT;
1265                 /* re-initialize the mutex each time ... we assume no one using it */
1266                 SpinLockInit(&entry->mutex);
1267                 /* ... and don't forget the query text */
1268                 Assert(query_len >= 0 && query_len < pgss->query_size);
1269                 entry->query_len = query_len;
1270                 memcpy(entry->query, query, query_len);
1271                 entry->query[query_len] = '\0';
1272         }
1273
1274         return entry;
1275 }
1276
1277 /*
1278  * qsort comparator for sorting into increasing usage order
1279  */
1280 static int
1281 entry_cmp(const void *lhs, const void *rhs)
1282 {
1283         double          l_usage = (*(pgssEntry *const *) lhs)->counters.usage;
1284         double          r_usage = (*(pgssEntry *const *) rhs)->counters.usage;
1285
1286         if (l_usage < r_usage)
1287                 return -1;
1288         else if (l_usage > r_usage)
1289                 return +1;
1290         else
1291                 return 0;
1292 }
1293
1294 /*
1295  * Deallocate least used entries.
1296  * Caller must hold an exclusive lock on pgss->lock.
1297  */
1298 static void
1299 entry_dealloc(void)
1300 {
1301         HASH_SEQ_STATUS hash_seq;
1302         pgssEntry **entries;
1303         pgssEntry  *entry;
1304         int                     nvictims;
1305         int                     i;
1306
1307         /*
1308          * Sort entries by usage and deallocate USAGE_DEALLOC_PERCENT of them.
1309          * While we're scanning the table, apply the decay factor to the usage
1310          * values.
1311          */
1312
1313         entries = palloc(hash_get_num_entries(pgss_hash) * sizeof(pgssEntry *));
1314
1315         i = 0;
1316         hash_seq_init(&hash_seq, pgss_hash);
1317         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1318         {
1319                 entries[i++] = entry;
1320                 /* "Sticky" entries get a different usage decay rate. */
1321                 if (entry->counters.calls == 0)
1322                         entry->counters.usage *= STICKY_DECREASE_FACTOR;
1323                 else
1324                         entry->counters.usage *= USAGE_DECREASE_FACTOR;
1325         }
1326
1327         qsort(entries, i, sizeof(pgssEntry *), entry_cmp);
1328
1329         /* Also, record the (approximate) median usage */
1330         if (i > 0)
1331                 pgss->cur_median_usage = entries[i / 2]->counters.usage;
1332
1333         nvictims = Max(10, i * USAGE_DEALLOC_PERCENT / 100);
1334         nvictims = Min(nvictims, i);
1335
1336         for (i = 0; i < nvictims; i++)
1337         {
1338                 hash_search(pgss_hash, &entries[i]->key, HASH_REMOVE, NULL);
1339         }
1340
1341         pfree(entries);
1342 }
1343
1344 /*
1345  * Release all entries.
1346  */
1347 static void
1348 entry_reset(void)
1349 {
1350         HASH_SEQ_STATUS hash_seq;
1351         pgssEntry  *entry;
1352
1353         LWLockAcquire(pgss->lock, LW_EXCLUSIVE);
1354
1355         hash_seq_init(&hash_seq, pgss_hash);
1356         while ((entry = hash_seq_search(&hash_seq)) != NULL)
1357         {
1358                 hash_search(pgss_hash, &entry->key, HASH_REMOVE, NULL);
1359         }
1360
1361         LWLockRelease(pgss->lock);
1362 }
1363 #endif
1364
1365 /*
1366  * AppendJumble: Append a value that is substantive in a given query to
1367  * the current jumble.
1368  */
1369 static void
1370 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
1371 {
1372         unsigned char *jumble = jstate->jumble;
1373         Size            jumble_len = jstate->jumble_len;
1374
1375         /*
1376          * Whenever the jumble buffer is full, we hash the current contents and
1377          * reset the buffer to contain just that hash value, thus relying on the
1378          * hash to summarize everything so far.
1379          */
1380         while (size > 0)
1381         {
1382                 Size            part_size;
1383
1384                 if (jumble_len >= JUMBLE_SIZE)
1385                 {
1386                         uint32          start_hash = hash_any(jumble, JUMBLE_SIZE);
1387
1388                         memcpy(jumble, &start_hash, sizeof(start_hash));
1389                         jumble_len = sizeof(start_hash);
1390                 }
1391                 part_size = Min(size, JUMBLE_SIZE - jumble_len);
1392                 memcpy(jumble + jumble_len, item, part_size);
1393                 jumble_len += part_size;
1394                 item += part_size;
1395                 size -= part_size;
1396         }
1397         jstate->jumble_len = jumble_len;
1398 }
1399
1400 /*
1401  * Wrappers around AppendJumble to encapsulate details of serialization
1402  * of individual local variable elements.
1403  */
1404 #define APP_JUMB(item) \
1405         AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
1406 #define APP_JUMB_STRING(str) \
1407         AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
1408
1409 /*
1410  * JumbleQuery: Selectively serialize the query tree, appending significant
1411  * data to the "query jumble" while ignoring nonsignificant data.
1412  *
1413  * Rule of thumb for what to include is that we should ignore anything not
1414  * semantically significant (such as alias names) as well as anything that can
1415  * be deduced from child nodes (else we'd just be double-hashing that piece
1416  * of information).
1417  */
1418 static void
1419 JumbleQuery(pgssJumbleState *jstate, Query *query)
1420 {
1421         Assert(IsA(query, Query));
1422         Assert(query->utilityStmt == NULL);
1423
1424         APP_JUMB(query->commandType);
1425         /* resultRelation is usually predictable from commandType */
1426         JumbleExpr(jstate, (Node *) query->cteList);
1427         JumbleRangeTable(jstate, query->rtable);
1428         JumbleExpr(jstate, (Node *) query->jointree);
1429         JumbleExpr(jstate, (Node *) query->targetList);
1430         JumbleExpr(jstate, (Node *) query->returningList);
1431         JumbleExpr(jstate, (Node *) query->groupClause);
1432         JumbleExpr(jstate, query->havingQual);
1433         JumbleExpr(jstate, (Node *) query->windowClause);
1434         JumbleExpr(jstate, (Node *) query->distinctClause);
1435         JumbleExpr(jstate, (Node *) query->sortClause);
1436         JumbleExpr(jstate, query->limitOffset);
1437         JumbleExpr(jstate, query->limitCount);
1438         /* we ignore rowMarks */
1439         JumbleExpr(jstate, query->setOperations);
1440 }
1441
1442 /*
1443  * Jumble a range table
1444  */
1445 static void
1446 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
1447 {
1448         ListCell   *lc;
1449
1450         foreach(lc, rtable)
1451         {
1452                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
1453
1454                 Assert(IsA(rte, RangeTblEntry));
1455                 APP_JUMB(rte->rtekind);
1456                 switch (rte->rtekind)
1457                 {
1458                         case RTE_RELATION:
1459                                 APP_JUMB(rte->relid);
1460                                 break;
1461                         case RTE_SUBQUERY:
1462                                 JumbleQuery(jstate, rte->subquery);
1463                                 break;
1464                         case RTE_JOIN:
1465                                 APP_JUMB(rte->jointype);
1466                                 break;
1467                         case RTE_FUNCTION:
1468                                 JumbleExpr(jstate, rte->funcexpr);
1469                                 break;
1470                         case RTE_VALUES:
1471                                 JumbleExpr(jstate, (Node *) rte->values_lists);
1472                                 break;
1473                         case RTE_CTE:
1474
1475                                 /*
1476                                  * Depending on the CTE name here isn't ideal, but it's the
1477                                  * only info we have to identify the referenced WITH item.
1478                                  */
1479                                 APP_JUMB_STRING(rte->ctename);
1480                                 APP_JUMB(rte->ctelevelsup);
1481                                 break;
1482                         default:
1483                                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
1484                                 break;
1485                 }
1486         }
1487 }
1488
1489 /*
1490  * Jumble an expression tree
1491  *
1492  * In general this function should handle all the same node types that
1493  * expression_tree_walker() does, and therefore it's coded to be as parallel
1494  * to that function as possible.  However, since we are only invoked on
1495  * queries immediately post-parse-analysis, we need not handle node types
1496  * that only appear in planning.
1497  *
1498  * Note: the reason we don't simply use expression_tree_walker() is that the
1499  * point of that function is to support tree walkers that don't care about
1500  * most tree node types, but here we care about all types.      We should complain
1501  * about any unrecognized node type.
1502  */
1503 static void
1504 JumbleExpr(pgssJumbleState *jstate, Node *node)
1505 {
1506         ListCell   *temp;
1507
1508         if (node == NULL)
1509                 return;
1510
1511         /* Guard against stack overflow due to overly complex expressions */
1512         check_stack_depth();
1513
1514         /*
1515          * We always emit the node's NodeTag, then any additional fields that are
1516          * considered significant, and then we recurse to any child nodes.
1517          */
1518         APP_JUMB(node->type);
1519
1520         switch (nodeTag(node))
1521         {
1522                 case T_Var:
1523                         {
1524                                 Var                *var = (Var *) node;
1525
1526                                 APP_JUMB(var->varno);
1527                                 APP_JUMB(var->varattno);
1528                                 APP_JUMB(var->varlevelsup);
1529                         }
1530                         break;
1531                 case T_Const:
1532                         {
1533                                 Const      *c = (Const *) node;
1534
1535                                 /* We jumble only the constant's type, not its value */
1536                                 APP_JUMB(c->consttype);
1537                                 /* Also, record its parse location for query normalization */
1538                                 RecordConstLocation(jstate, c->location);
1539                         }
1540                         break;
1541                 case T_Param:
1542                         {
1543                                 Param      *p = (Param *) node;
1544
1545                                 APP_JUMB(p->paramkind);
1546                                 APP_JUMB(p->paramid);
1547                                 APP_JUMB(p->paramtype);
1548                         }
1549                         break;
1550                 case T_Aggref:
1551                         {
1552                                 Aggref     *expr = (Aggref *) node;
1553
1554                                 APP_JUMB(expr->aggfnoid);
1555                                 JumbleExpr(jstate, (Node *) expr->args);
1556                                 JumbleExpr(jstate, (Node *) expr->aggorder);
1557                                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
1558                         }
1559                         break;
1560                 case T_WindowFunc:
1561                         {
1562                                 WindowFunc *expr = (WindowFunc *) node;
1563
1564                                 APP_JUMB(expr->winfnoid);
1565                                 APP_JUMB(expr->winref);
1566                                 JumbleExpr(jstate, (Node *) expr->args);
1567                         }
1568                         break;
1569                 case T_ArrayRef:
1570                         {
1571                                 ArrayRef   *aref = (ArrayRef *) node;
1572
1573                                 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
1574                                 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
1575                                 JumbleExpr(jstate, (Node *) aref->refexpr);
1576                                 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
1577                         }
1578                         break;
1579                 case T_FuncExpr:
1580                         {
1581                                 FuncExpr   *expr = (FuncExpr *) node;
1582
1583                                 APP_JUMB(expr->funcid);
1584                                 JumbleExpr(jstate, (Node *) expr->args);
1585                         }
1586                         break;
1587                 case T_NamedArgExpr:
1588                         {
1589                                 NamedArgExpr *nae = (NamedArgExpr *) node;
1590
1591                                 APP_JUMB(nae->argnumber);
1592                                 JumbleExpr(jstate, (Node *) nae->arg);
1593                         }
1594                         break;
1595                 case T_OpExpr:
1596                 case T_DistinctExpr:    /* struct-equivalent to OpExpr */
1597                 case T_NullIfExpr:              /* struct-equivalent to OpExpr */
1598                         {
1599                                 OpExpr     *expr = (OpExpr *) node;
1600
1601                                 APP_JUMB(expr->opno);
1602                                 JumbleExpr(jstate, (Node *) expr->args);
1603                         }
1604                         break;
1605                 case T_ScalarArrayOpExpr:
1606                         {
1607                                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
1608
1609                                 APP_JUMB(expr->opno);
1610                                 APP_JUMB(expr->useOr);
1611                                 JumbleExpr(jstate, (Node *) expr->args);
1612                         }
1613                         break;
1614                 case T_BoolExpr:
1615                         {
1616                                 BoolExpr   *expr = (BoolExpr *) node;
1617
1618                                 APP_JUMB(expr->boolop);
1619                                 JumbleExpr(jstate, (Node *) expr->args);
1620                         }
1621                         break;
1622                 case T_SubLink:
1623                         {
1624                                 SubLink    *sublink = (SubLink *) node;
1625
1626                                 APP_JUMB(sublink->subLinkType);
1627                                 JumbleExpr(jstate, (Node *) sublink->testexpr);
1628                                 JumbleQuery(jstate, (Query *) sublink->subselect);
1629                         }
1630                         break;
1631                 case T_FieldSelect:
1632                         {
1633                                 FieldSelect *fs = (FieldSelect *) node;
1634
1635                                 APP_JUMB(fs->fieldnum);
1636                                 JumbleExpr(jstate, (Node *) fs->arg);
1637                         }
1638                         break;
1639                 case T_FieldStore:
1640                         {
1641                                 FieldStore *fstore = (FieldStore *) node;
1642
1643                                 JumbleExpr(jstate, (Node *) fstore->arg);
1644                                 JumbleExpr(jstate, (Node *) fstore->newvals);
1645                         }
1646                         break;
1647                 case T_RelabelType:
1648                         {
1649                                 RelabelType *rt = (RelabelType *) node;
1650
1651                                 APP_JUMB(rt->resulttype);
1652                                 JumbleExpr(jstate, (Node *) rt->arg);
1653                         }
1654                         break;
1655                 case T_CoerceViaIO:
1656                         {
1657                                 CoerceViaIO *cio = (CoerceViaIO *) node;
1658
1659                                 APP_JUMB(cio->resulttype);
1660                                 JumbleExpr(jstate, (Node *) cio->arg);
1661                         }
1662                         break;
1663                 case T_ArrayCoerceExpr:
1664                         {
1665                                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
1666
1667                                 APP_JUMB(acexpr->resulttype);
1668                                 JumbleExpr(jstate, (Node *) acexpr->arg);
1669                         }
1670                         break;
1671                 case T_ConvertRowtypeExpr:
1672                         {
1673                                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
1674
1675                                 APP_JUMB(crexpr->resulttype);
1676                                 JumbleExpr(jstate, (Node *) crexpr->arg);
1677                         }
1678                         break;
1679                 case T_CollateExpr:
1680                         {
1681                                 CollateExpr *ce = (CollateExpr *) node;
1682
1683                                 APP_JUMB(ce->collOid);
1684                                 JumbleExpr(jstate, (Node *) ce->arg);
1685                         }
1686                         break;
1687                 case T_CaseExpr:
1688                         {
1689                                 CaseExpr   *caseexpr = (CaseExpr *) node;
1690
1691                                 JumbleExpr(jstate, (Node *) caseexpr->arg);
1692                                 foreach(temp, caseexpr->args)
1693                                 {
1694                                         CaseWhen   *when = (CaseWhen *) lfirst(temp);
1695
1696                                         Assert(IsA(when, CaseWhen));
1697                                         JumbleExpr(jstate, (Node *) when->expr);
1698                                         JumbleExpr(jstate, (Node *) when->result);
1699                                 }
1700                                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
1701                         }
1702                         break;
1703                 case T_CaseTestExpr:
1704                         {
1705                                 CaseTestExpr *ct = (CaseTestExpr *) node;
1706
1707                                 APP_JUMB(ct->typeId);
1708                         }
1709                         break;
1710                 case T_ArrayExpr:
1711                         JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
1712                         break;
1713                 case T_RowExpr:
1714                         JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
1715                         break;
1716                 case T_RowCompareExpr:
1717                         {
1718                                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
1719
1720                                 APP_JUMB(rcexpr->rctype);
1721                                 JumbleExpr(jstate, (Node *) rcexpr->largs);
1722                                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
1723                         }
1724                         break;
1725                 case T_CoalesceExpr:
1726                         JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
1727                         break;
1728                 case T_MinMaxExpr:
1729                         {
1730                                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
1731
1732                                 APP_JUMB(mmexpr->op);
1733                                 JumbleExpr(jstate, (Node *) mmexpr->args);
1734                         }
1735                         break;
1736                 case T_XmlExpr:
1737                         {
1738                                 XmlExpr    *xexpr = (XmlExpr *) node;
1739
1740                                 APP_JUMB(xexpr->op);
1741                                 JumbleExpr(jstate, (Node *) xexpr->named_args);
1742                                 JumbleExpr(jstate, (Node *) xexpr->args);
1743                         }
1744                         break;
1745                 case T_NullTest:
1746                         {
1747                                 NullTest   *nt = (NullTest *) node;
1748
1749                                 APP_JUMB(nt->nulltesttype);
1750                                 JumbleExpr(jstate, (Node *) nt->arg);
1751                         }
1752                         break;
1753                 case T_BooleanTest:
1754                         {
1755                                 BooleanTest *bt = (BooleanTest *) node;
1756
1757                                 APP_JUMB(bt->booltesttype);
1758                                 JumbleExpr(jstate, (Node *) bt->arg);
1759                         }
1760                         break;
1761                 case T_CoerceToDomain:
1762                         {
1763                                 CoerceToDomain *cd = (CoerceToDomain *) node;
1764
1765                                 APP_JUMB(cd->resulttype);
1766                                 JumbleExpr(jstate, (Node *) cd->arg);
1767                         }
1768                         break;
1769                 case T_CoerceToDomainValue:
1770                         {
1771                                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
1772
1773                                 APP_JUMB(cdv->typeId);
1774                         }
1775                         break;
1776                 case T_SetToDefault:
1777                         {
1778                                 SetToDefault *sd = (SetToDefault *) node;
1779
1780                                 APP_JUMB(sd->typeId);
1781                         }
1782                         break;
1783                 case T_CurrentOfExpr:
1784                         {
1785                                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
1786
1787                                 APP_JUMB(ce->cvarno);
1788                                 if (ce->cursor_name)
1789                                         APP_JUMB_STRING(ce->cursor_name);
1790                                 APP_JUMB(ce->cursor_param);
1791                         }
1792                         break;
1793                 case T_TargetEntry:
1794                         {
1795                                 TargetEntry *tle = (TargetEntry *) node;
1796
1797                                 APP_JUMB(tle->resno);
1798                                 APP_JUMB(tle->ressortgroupref);
1799                                 JumbleExpr(jstate, (Node *) tle->expr);
1800                         }
1801                         break;
1802                 case T_RangeTblRef:
1803                         {
1804                                 RangeTblRef *rtr = (RangeTblRef *) node;
1805
1806                                 APP_JUMB(rtr->rtindex);
1807                         }
1808                         break;
1809                 case T_JoinExpr:
1810                         {
1811                                 JoinExpr   *join = (JoinExpr *) node;
1812
1813                                 APP_JUMB(join->jointype);
1814                                 APP_JUMB(join->isNatural);
1815                                 APP_JUMB(join->rtindex);
1816                                 JumbleExpr(jstate, join->larg);
1817                                 JumbleExpr(jstate, join->rarg);
1818                                 JumbleExpr(jstate, join->quals);
1819                         }
1820                         break;
1821                 case T_FromExpr:
1822                         {
1823                                 FromExpr   *from = (FromExpr *) node;
1824
1825                                 JumbleExpr(jstate, (Node *) from->fromlist);
1826                                 JumbleExpr(jstate, from->quals);
1827                         }
1828                         break;
1829                 case T_List:
1830                         foreach(temp, (List *) node)
1831                         {
1832                                 JumbleExpr(jstate, (Node *) lfirst(temp));
1833                         }
1834                         break;
1835                 case T_SortGroupClause:
1836                         {
1837                                 SortGroupClause *sgc = (SortGroupClause *) node;
1838
1839                                 APP_JUMB(sgc->tleSortGroupRef);
1840                                 APP_JUMB(sgc->eqop);
1841                                 APP_JUMB(sgc->sortop);
1842                                 APP_JUMB(sgc->nulls_first);
1843                         }
1844                         break;
1845                 case T_WindowClause:
1846                         {
1847                                 WindowClause *wc = (WindowClause *) node;
1848
1849                                 APP_JUMB(wc->winref);
1850                                 APP_JUMB(wc->frameOptions);
1851                                 JumbleExpr(jstate, (Node *) wc->partitionClause);
1852                                 JumbleExpr(jstate, (Node *) wc->orderClause);
1853                                 JumbleExpr(jstate, wc->startOffset);
1854                                 JumbleExpr(jstate, wc->endOffset);
1855                         }
1856                         break;
1857                 case T_CommonTableExpr:
1858                         {
1859                                 CommonTableExpr *cte = (CommonTableExpr *) node;
1860
1861                                 /* we store the string name because RTE_CTE RTEs need it */
1862                                 APP_JUMB_STRING(cte->ctename);
1863                                 JumbleQuery(jstate, (Query *) cte->ctequery);
1864                         }
1865                         break;
1866                 case T_SetOperationStmt:
1867                         {
1868                                 SetOperationStmt *setop = (SetOperationStmt *) node;
1869
1870                                 APP_JUMB(setop->op);
1871                                 APP_JUMB(setop->all);
1872                                 JumbleExpr(jstate, setop->larg);
1873                                 JumbleExpr(jstate, setop->rarg);
1874                         }
1875                         break;
1876                 default:
1877                         /* Only a warning, since we can stumble along anyway */
1878                         elog(WARNING, "unrecognized node type: %d",
1879                                  (int) nodeTag(node));
1880                         break;
1881         }
1882 }
1883
1884 /*
1885  * Record location of constant within query string of query tree
1886  * that is currently being walked.
1887  */
1888 static void
1889 RecordConstLocation(pgssJumbleState *jstate, int location)
1890 {
1891         /* -1 indicates unknown or undefined location */
1892         if (location >= 0)
1893         {
1894                 /* enlarge array if needed */
1895                 if (jstate->clocations_count >= jstate->clocations_buf_size)
1896                 {
1897                         jstate->clocations_buf_size *= 2;
1898                         jstate->clocations = (pgssLocationLen *)
1899                                 repalloc(jstate->clocations,
1900                                                  jstate->clocations_buf_size *
1901                                                  sizeof(pgssLocationLen));
1902                 }
1903                 jstate->clocations[jstate->clocations_count].location = location;
1904                 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
1905                 jstate->clocations[jstate->clocations_count].length = -1;
1906                 jstate->clocations_count++;
1907         }
1908 }
1909
1910 /*
1911  * Generate a normalized version of the query string that will be used to
1912  * represent all similar queries.
1913  *
1914  * Note that the normalized representation may well vary depending on
1915  * just which "equivalent" query is used to create the hashtable entry.
1916  * We assume this is OK.
1917  *
1918  * *query_len_p contains the input string length, and is updated with
1919  * the result string length (which cannot be longer) on exit.
1920  *
1921  * Returns a palloc'd string, which is not necessarily null-terminated.
1922  */
1923 static char *
1924 generate_normalized_query(pgssJumbleState *jstate, const char *query,
1925                                                   int *query_len_p, int encoding)
1926 {
1927         char       *norm_query;
1928         int                     query_len = *query_len_p;
1929         int                     max_output_len;
1930         int                     i,
1931                                 len_to_wrt,             /* Length (in bytes) to write */
1932                                 quer_loc = 0,   /* Source query byte location */
1933                                 n_quer_loc = 0, /* Normalized query byte location */
1934                                 last_off = 0,   /* Offset from start for previous tok */
1935                                 last_tok_len = 0;               /* Length (in bytes) of that tok */
1936
1937         /*
1938          * Get constants' lengths (core system only gives us locations).  Note
1939          * this also ensures the items are sorted by location.
1940          */
1941         fill_in_constant_lengths(jstate, query);
1942
1943         /* Allocate result buffer, ensuring we limit result to allowed size */
1944 #ifdef NOT_USED
1945         max_output_len = Min(query_len, pgss->query_size - 1);
1946 #endif
1947         /* XXX: pg_hint_plan doesn't truncate query string. */
1948         max_output_len = query_len;
1949         norm_query = palloc(max_output_len);
1950
1951         for (i = 0; i < jstate->clocations_count; i++)
1952         {
1953                 int                     off,            /* Offset from start for cur tok */
1954                                         tok_len;        /* Length (in bytes) of that tok */
1955
1956                 off = jstate->clocations[i].location;
1957                 tok_len = jstate->clocations[i].length;
1958
1959                 if (tok_len < 0)
1960                         continue;                       /* ignore any duplicates */
1961
1962                 /* Copy next chunk, or as much as will fit */
1963                 len_to_wrt = off - last_off;
1964                 len_to_wrt -= last_tok_len;
1965                 len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1966
1967                 Assert(len_to_wrt >= 0);
1968                 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1969                 n_quer_loc += len_to_wrt;
1970
1971                 if (n_quer_loc < max_output_len)
1972                         norm_query[n_quer_loc++] = '?';
1973
1974                 quer_loc = off + tok_len;
1975                 last_off = off;
1976                 last_tok_len = tok_len;
1977
1978                 /* If we run out of space, might as well stop iterating */
1979                 if (n_quer_loc >= max_output_len)
1980                         break;
1981         }
1982
1983         /*
1984          * We've copied up until the last ignorable constant.  Copy over the
1985          * remaining bytes of the original query string, or at least as much as
1986          * will fit.
1987          */
1988         len_to_wrt = query_len - quer_loc;
1989         len_to_wrt = Min(len_to_wrt, max_output_len - n_quer_loc);
1990
1991         Assert(len_to_wrt >= 0);
1992         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
1993         n_quer_loc += len_to_wrt;
1994
1995         /* XXX: pg_hint_plan doesn't truncate query string. */
1996 #ifdef NOT_USED
1997         /*
1998          * If we ran out of space, we need to do an encoding-aware truncation,
1999          * just to make sure we don't have an incomplete character at the end.
2000          */
2001         if (n_quer_loc >= max_output_len)
2002                 query_len = pg_encoding_mbcliplen(encoding,
2003                                                                                   norm_query,
2004                                                                                   n_quer_loc,
2005                                                                                   pgss->query_size - 1);
2006         else
2007 #endif
2008                 query_len = n_quer_loc;
2009
2010         *query_len_p = query_len;
2011         return norm_query;
2012 }
2013
2014 /*
2015  * Given a valid SQL string and an array of constant-location records,
2016  * fill in the textual lengths of those constants.
2017  *
2018  * The constants may use any allowed constant syntax, such as float literals,
2019  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
2020  * accomplished by using the public API for the core scanner.
2021  *
2022  * It is the caller's job to ensure that the string is a valid SQL statement
2023  * with constants at the indicated locations.  Since in practice the string
2024  * has already been parsed, and the locations that the caller provides will
2025  * have originated from within the authoritative parser, this should not be
2026  * a problem.
2027  *
2028  * Duplicate constant pointers are possible, and will have their lengths
2029  * marked as '-1', so that they are later ignored.      (Actually, we assume the
2030  * lengths were initialized as -1 to start with, and don't change them here.)
2031  *
2032  * N.B. There is an assumption that a '-' character at a Const location begins
2033  * a negative numeric constant.  This precludes there ever being another
2034  * reason for a constant to start with a '-'.
2035  */
2036 static void
2037 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
2038 {
2039         pgssLocationLen *locs;
2040         core_yyscan_t yyscanner;
2041         core_yy_extra_type yyextra;
2042         core_YYSTYPE yylval;
2043         YYLTYPE         yylloc;
2044         int                     last_loc = -1;
2045         int                     i;
2046
2047         /*
2048          * Sort the records by location so that we can process them in order while
2049          * scanning the query text.
2050          */
2051         if (jstate->clocations_count > 1)
2052                 qsort(jstate->clocations, jstate->clocations_count,
2053                           sizeof(pgssLocationLen), comp_location);
2054         locs = jstate->clocations;
2055
2056         /* initialize the flex scanner --- should match raw_parser() */
2057         yyscanner = scanner_init(query,
2058                                                          &yyextra,
2059                                                          ScanKeywords,
2060                                                          NumScanKeywords);
2061
2062         /* Search for each constant, in sequence */
2063         for (i = 0; i < jstate->clocations_count; i++)
2064         {
2065                 int                     loc = locs[i].location;
2066                 int                     tok;
2067
2068                 Assert(loc >= 0);
2069
2070                 if (loc <= last_loc)
2071                         continue;                       /* Duplicate constant, ignore */
2072
2073                 /* Lex tokens until we find the desired constant */
2074                 for (;;)
2075                 {
2076                         tok = core_yylex(&yylval, &yylloc, yyscanner);
2077
2078                         /* We should not hit end-of-string, but if we do, behave sanely */
2079                         if (tok == 0)
2080                                 break;                  /* out of inner for-loop */
2081
2082                         /*
2083                          * We should find the token position exactly, but if we somehow
2084                          * run past it, work with that.
2085                          */
2086                         if (yylloc >= loc)
2087                         {
2088                                 if (query[loc] == '-')
2089                                 {
2090                                         /*
2091                                          * It's a negative value - this is the one and only case
2092                                          * where we replace more than a single token.
2093                                          *
2094                                          * Do not compensate for the core system's special-case
2095                                          * adjustment of location to that of the leading '-'
2096                                          * operator in the event of a negative constant.  It is
2097                                          * also useful for our purposes to start from the minus
2098                                          * symbol.      In this way, queries like "select * from foo
2099                                          * where bar = 1" and "select * from foo where bar = -2"
2100                                          * will have identical normalized query strings.
2101                                          */
2102                                         tok = core_yylex(&yylval, &yylloc, yyscanner);
2103                                         if (tok == 0)
2104                                                 break;  /* out of inner for-loop */
2105                                 }
2106
2107                                 /*
2108                                  * We now rely on the assumption that flex has placed a zero
2109                                  * byte after the text of the current token in scanbuf.
2110                                  */
2111                                 locs[i].length = strlen(yyextra.scanbuf + loc);
2112                                 break;                  /* out of inner for-loop */
2113                         }
2114                 }
2115
2116                 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
2117                 if (tok == 0)
2118                         break;
2119
2120                 last_loc = loc;
2121         }
2122
2123         scanner_finish(yyscanner);
2124 }
2125
2126 /*
2127  * comp_location: comparator for qsorting pgssLocationLen structs by location
2128  */
2129 static int
2130 comp_location(const void *a, const void *b)
2131 {
2132         int                     l = ((const pgssLocationLen *) a)->location;
2133         int                     r = ((const pgssLocationLen *) b)->location;
2134
2135         if (l < r)
2136                 return -1;
2137         else if (l > r)
2138                 return +1;
2139         else
2140                 return 0;
2141 }