OSDN Git Service

7fc34a4129c9c1a6eb938c8293b3ad8326b7b534
[pghintplan/pg_hint_plan.git] / pg_stat_statements.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * 
5  * Part of pg_stat_statements.c in PostgreSQL 9.5.
6  *
7  * Copyright (c) 2008-2016, PostgreSQL Global Development Group
8  *
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12
13 #include <sys/stat.h>
14
15 #include "access/hash.h"
16 #include "parser/scanner.h"
17
18 static void AppendJumble(pgssJumbleState *jstate,
19                          const unsigned char *item, Size size);
20 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
21 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
22 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
23 static void RecordConstLocation(pgssJumbleState *jstate, int location);
24 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
25 static int      comp_location(const void *a, const void *b);
26
27 /*
28  * AppendJumble: Append a value that is substantive in a given query to
29  * the current jumble.
30  */
31 static void
32 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
33 {
34         unsigned char *jumble = jstate->jumble;
35         Size            jumble_len = jstate->jumble_len;
36
37         /*
38          * Whenever the jumble buffer is full, we hash the current contents and
39          * reset the buffer to contain just that hash value, thus relying on the
40          * hash to summarize everything so far.
41          */
42         while (size > 0)
43         {
44                 Size            part_size;
45
46                 if (jumble_len >= JUMBLE_SIZE)
47                 {
48                         uint32          start_hash = hash_any(jumble, JUMBLE_SIZE);
49
50                         memcpy(jumble, &start_hash, sizeof(start_hash));
51                         jumble_len = sizeof(start_hash);
52                 }
53                 part_size = Min(size, JUMBLE_SIZE - jumble_len);
54                 memcpy(jumble + jumble_len, item, part_size);
55                 jumble_len += part_size;
56                 item += part_size;
57                 size -= part_size;
58         }
59         jstate->jumble_len = jumble_len;
60 }
61
62 /*
63  * Wrappers around AppendJumble to encapsulate details of serialization
64  * of individual local variable elements.
65  */
66 #define APP_JUMB(item) \
67         AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
68 #define APP_JUMB_STRING(str) \
69         AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
70
71 /*
72  * JumbleQuery: Selectively serialize the query tree, appending significant
73  * data to the "query jumble" while ignoring nonsignificant data.
74  *
75  * Rule of thumb for what to include is that we should ignore anything not
76  * semantically significant (such as alias names) as well as anything that can
77  * be deduced from child nodes (else we'd just be double-hashing that piece
78  * of information).
79  */
80 static void
81 JumbleQuery(pgssJumbleState *jstate, Query *query)
82 {
83         Assert(IsA(query, Query));
84         Assert(query->utilityStmt == NULL);
85
86         APP_JUMB(query->commandType);
87         /* resultRelation is usually predictable from commandType */
88         JumbleExpr(jstate, (Node *) query->cteList);
89         JumbleRangeTable(jstate, query->rtable);
90         JumbleExpr(jstate, (Node *) query->jointree);
91         JumbleExpr(jstate, (Node *) query->targetList);
92         JumbleExpr(jstate, (Node *) query->onConflict);
93         JumbleExpr(jstate, (Node *) query->returningList);
94         JumbleExpr(jstate, (Node *) query->groupClause);
95         JumbleExpr(jstate, (Node *) query->groupingSets);
96         JumbleExpr(jstate, query->havingQual);
97         JumbleExpr(jstate, (Node *) query->windowClause);
98         JumbleExpr(jstate, (Node *) query->distinctClause);
99         JumbleExpr(jstate, (Node *) query->sortClause);
100         JumbleExpr(jstate, query->limitOffset);
101         JumbleExpr(jstate, query->limitCount);
102         /* we ignore rowMarks */
103         JumbleExpr(jstate, query->setOperations);
104 }
105
106 /*
107  * Jumble a range table
108  */
109 static void
110 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
111 {
112         ListCell   *lc;
113
114         foreach(lc, rtable)
115         {
116                 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
117
118                 Assert(IsA(rte, RangeTblEntry));
119                 APP_JUMB(rte->rtekind);
120                 switch (rte->rtekind)
121                 {
122                         case RTE_RELATION:
123                                 APP_JUMB(rte->relid);
124                                 JumbleExpr(jstate, (Node *) rte->tablesample);
125                                 break;
126                         case RTE_SUBQUERY:
127                                 JumbleQuery(jstate, rte->subquery);
128                                 break;
129                         case RTE_JOIN:
130                                 APP_JUMB(rte->jointype);
131                                 break;
132                         case RTE_FUNCTION:
133                                 JumbleExpr(jstate, (Node *) rte->functions);
134                                 break;
135                         case RTE_VALUES:
136                                 JumbleExpr(jstate, (Node *) rte->values_lists);
137                                 break;
138                         case RTE_CTE:
139
140                                 /*
141                                  * Depending on the CTE name here isn't ideal, but it's the
142                                  * only info we have to identify the referenced WITH item.
143                                  */
144                                 APP_JUMB_STRING(rte->ctename);
145                                 APP_JUMB(rte->ctelevelsup);
146                                 break;
147                         default:
148                                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
149                                 break;
150                 }
151         }
152 }
153
154 /*
155  * Jumble an expression tree
156  *
157  * In general this function should handle all the same node types that
158  * expression_tree_walker() does, and therefore it's coded to be as parallel
159  * to that function as possible.  However, since we are only invoked on
160  * queries immediately post-parse-analysis, we need not handle node types
161  * that only appear in planning.
162  *
163  * Note: the reason we don't simply use expression_tree_walker() is that the
164  * point of that function is to support tree walkers that don't care about
165  * most tree node types, but here we care about all types.  We should complain
166  * about any unrecognized node type.
167  */
168 static void
169 JumbleExpr(pgssJumbleState *jstate, Node *node)
170 {
171         ListCell   *temp;
172
173         if (node == NULL)
174                 return;
175
176         /* Guard against stack overflow due to overly complex expressions */
177         check_stack_depth();
178
179         /*
180          * We always emit the node's NodeTag, then any additional fields that are
181          * considered significant, and then we recurse to any child nodes.
182          */
183         APP_JUMB(node->type);
184
185         switch (nodeTag(node))
186         {
187                 case T_Var:
188                         {
189                                 Var                *var = (Var *) node;
190
191                                 APP_JUMB(var->varno);
192                                 APP_JUMB(var->varattno);
193                                 APP_JUMB(var->varlevelsup);
194                         }
195                         break;
196                 case T_Const:
197                         {
198                                 Const      *c = (Const *) node;
199
200                                 /* We jumble only the constant's type, not its value */
201                                 APP_JUMB(c->consttype);
202                                 /* Also, record its parse location for query normalization */
203                                 RecordConstLocation(jstate, c->location);
204                         }
205                         break;
206                 case T_Param:
207                         {
208                                 Param      *p = (Param *) node;
209
210                                 APP_JUMB(p->paramkind);
211                                 APP_JUMB(p->paramid);
212                                 APP_JUMB(p->paramtype);
213                         }
214                         break;
215                 case T_Aggref:
216                         {
217                                 Aggref     *expr = (Aggref *) node;
218
219                                 APP_JUMB(expr->aggfnoid);
220                                 JumbleExpr(jstate, (Node *) expr->aggdirectargs);
221                                 JumbleExpr(jstate, (Node *) expr->args);
222                                 JumbleExpr(jstate, (Node *) expr->aggorder);
223                                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
224                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
225                         }
226                         break;
227                 case T_GroupingFunc:
228                         {
229                                 GroupingFunc *grpnode = (GroupingFunc *) node;
230
231                                 JumbleExpr(jstate, (Node *) grpnode->refs);
232                         }
233                         break;
234                 case T_WindowFunc:
235                         {
236                                 WindowFunc *expr = (WindowFunc *) node;
237
238                                 APP_JUMB(expr->winfnoid);
239                                 APP_JUMB(expr->winref);
240                                 JumbleExpr(jstate, (Node *) expr->args);
241                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
242                         }
243                         break;
244                 case T_ArrayRef:
245                         {
246                                 ArrayRef   *aref = (ArrayRef *) node;
247
248                                 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
249                                 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
250                                 JumbleExpr(jstate, (Node *) aref->refexpr);
251                                 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
252                         }
253                         break;
254                 case T_FuncExpr:
255                         {
256                                 FuncExpr   *expr = (FuncExpr *) node;
257
258                                 APP_JUMB(expr->funcid);
259                                 JumbleExpr(jstate, (Node *) expr->args);
260                         }
261                         break;
262                 case T_NamedArgExpr:
263                         {
264                                 NamedArgExpr *nae = (NamedArgExpr *) node;
265
266                                 APP_JUMB(nae->argnumber);
267                                 JumbleExpr(jstate, (Node *) nae->arg);
268                         }
269                         break;
270                 case T_OpExpr:
271                 case T_DistinctExpr:    /* struct-equivalent to OpExpr */
272                 case T_NullIfExpr:              /* struct-equivalent to OpExpr */
273                         {
274                                 OpExpr     *expr = (OpExpr *) node;
275
276                                 APP_JUMB(expr->opno);
277                                 JumbleExpr(jstate, (Node *) expr->args);
278                         }
279                         break;
280                 case T_ScalarArrayOpExpr:
281                         {
282                                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
283
284                                 APP_JUMB(expr->opno);
285                                 APP_JUMB(expr->useOr);
286                                 JumbleExpr(jstate, (Node *) expr->args);
287                         }
288                         break;
289                 case T_BoolExpr:
290                         {
291                                 BoolExpr   *expr = (BoolExpr *) node;
292
293                                 APP_JUMB(expr->boolop);
294                                 JumbleExpr(jstate, (Node *) expr->args);
295                         }
296                         break;
297                 case T_SubLink:
298                         {
299                                 SubLink    *sublink = (SubLink *) node;
300
301                                 APP_JUMB(sublink->subLinkType);
302                                 APP_JUMB(sublink->subLinkId);
303                                 JumbleExpr(jstate, (Node *) sublink->testexpr);
304                                 JumbleQuery(jstate, (Query *) sublink->subselect);
305                         }
306                         break;
307                 case T_FieldSelect:
308                         {
309                                 FieldSelect *fs = (FieldSelect *) node;
310
311                                 APP_JUMB(fs->fieldnum);
312                                 JumbleExpr(jstate, (Node *) fs->arg);
313                         }
314                         break;
315                 case T_FieldStore:
316                         {
317                                 FieldStore *fstore = (FieldStore *) node;
318
319                                 JumbleExpr(jstate, (Node *) fstore->arg);
320                                 JumbleExpr(jstate, (Node *) fstore->newvals);
321                         }
322                         break;
323                 case T_RelabelType:
324                         {
325                                 RelabelType *rt = (RelabelType *) node;
326
327                                 APP_JUMB(rt->resulttype);
328                                 JumbleExpr(jstate, (Node *) rt->arg);
329                         }
330                         break;
331                 case T_CoerceViaIO:
332                         {
333                                 CoerceViaIO *cio = (CoerceViaIO *) node;
334
335                                 APP_JUMB(cio->resulttype);
336                                 JumbleExpr(jstate, (Node *) cio->arg);
337                         }
338                         break;
339                 case T_ArrayCoerceExpr:
340                         {
341                                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
342
343                                 APP_JUMB(acexpr->resulttype);
344                                 JumbleExpr(jstate, (Node *) acexpr->arg);
345                         }
346                         break;
347                 case T_ConvertRowtypeExpr:
348                         {
349                                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
350
351                                 APP_JUMB(crexpr->resulttype);
352                                 JumbleExpr(jstate, (Node *) crexpr->arg);
353                         }
354                         break;
355                 case T_CollateExpr:
356                         {
357                                 CollateExpr *ce = (CollateExpr *) node;
358
359                                 APP_JUMB(ce->collOid);
360                                 JumbleExpr(jstate, (Node *) ce->arg);
361                         }
362                         break;
363                 case T_CaseExpr:
364                         {
365                                 CaseExpr   *caseexpr = (CaseExpr *) node;
366
367                                 JumbleExpr(jstate, (Node *) caseexpr->arg);
368                                 foreach(temp, caseexpr->args)
369                                 {
370                                         CaseWhen   *when = (CaseWhen *) lfirst(temp);
371
372                                         Assert(IsA(when, CaseWhen));
373                                         JumbleExpr(jstate, (Node *) when->expr);
374                                         JumbleExpr(jstate, (Node *) when->result);
375                                 }
376                                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
377                         }
378                         break;
379                 case T_CaseTestExpr:
380                         {
381                                 CaseTestExpr *ct = (CaseTestExpr *) node;
382
383                                 APP_JUMB(ct->typeId);
384                         }
385                         break;
386                 case T_ArrayExpr:
387                         JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
388                         break;
389                 case T_RowExpr:
390                         JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
391                         break;
392                 case T_RowCompareExpr:
393                         {
394                                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
395
396                                 APP_JUMB(rcexpr->rctype);
397                                 JumbleExpr(jstate, (Node *) rcexpr->largs);
398                                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
399                         }
400                         break;
401                 case T_CoalesceExpr:
402                         JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
403                         break;
404                 case T_MinMaxExpr:
405                         {
406                                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
407
408                                 APP_JUMB(mmexpr->op);
409                                 JumbleExpr(jstate, (Node *) mmexpr->args);
410                         }
411                         break;
412                 case T_XmlExpr:
413                         {
414                                 XmlExpr    *xexpr = (XmlExpr *) node;
415
416                                 APP_JUMB(xexpr->op);
417                                 JumbleExpr(jstate, (Node *) xexpr->named_args);
418                                 JumbleExpr(jstate, (Node *) xexpr->args);
419                         }
420                         break;
421                 case T_NullTest:
422                         {
423                                 NullTest   *nt = (NullTest *) node;
424
425                                 APP_JUMB(nt->nulltesttype);
426                                 JumbleExpr(jstate, (Node *) nt->arg);
427                         }
428                         break;
429                 case T_BooleanTest:
430                         {
431                                 BooleanTest *bt = (BooleanTest *) node;
432
433                                 APP_JUMB(bt->booltesttype);
434                                 JumbleExpr(jstate, (Node *) bt->arg);
435                         }
436                         break;
437                 case T_CoerceToDomain:
438                         {
439                                 CoerceToDomain *cd = (CoerceToDomain *) node;
440
441                                 APP_JUMB(cd->resulttype);
442                                 JumbleExpr(jstate, (Node *) cd->arg);
443                         }
444                         break;
445                 case T_CoerceToDomainValue:
446                         {
447                                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
448
449                                 APP_JUMB(cdv->typeId);
450                         }
451                         break;
452                 case T_SetToDefault:
453                         {
454                                 SetToDefault *sd = (SetToDefault *) node;
455
456                                 APP_JUMB(sd->typeId);
457                         }
458                         break;
459                 case T_CurrentOfExpr:
460                         {
461                                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
462
463                                 APP_JUMB(ce->cvarno);
464                                 if (ce->cursor_name)
465                                         APP_JUMB_STRING(ce->cursor_name);
466                                 APP_JUMB(ce->cursor_param);
467                         }
468                         break;
469                 case T_InferenceElem:
470                         {
471                                 InferenceElem *ie = (InferenceElem *) node;
472
473                                 APP_JUMB(ie->infercollid);
474                                 APP_JUMB(ie->inferopclass);
475                                 JumbleExpr(jstate, ie->expr);
476                         }
477                         break;
478                 case T_TargetEntry:
479                         {
480                                 TargetEntry *tle = (TargetEntry *) node;
481
482                                 APP_JUMB(tle->resno);
483                                 APP_JUMB(tle->ressortgroupref);
484                                 JumbleExpr(jstate, (Node *) tle->expr);
485                         }
486                         break;
487                 case T_RangeTblRef:
488                         {
489                                 RangeTblRef *rtr = (RangeTblRef *) node;
490
491                                 APP_JUMB(rtr->rtindex);
492                         }
493                         break;
494                 case T_JoinExpr:
495                         {
496                                 JoinExpr   *join = (JoinExpr *) node;
497
498                                 APP_JUMB(join->jointype);
499                                 APP_JUMB(join->isNatural);
500                                 APP_JUMB(join->rtindex);
501                                 JumbleExpr(jstate, join->larg);
502                                 JumbleExpr(jstate, join->rarg);
503                                 JumbleExpr(jstate, join->quals);
504                         }
505                         break;
506                 case T_FromExpr:
507                         {
508                                 FromExpr   *from = (FromExpr *) node;
509
510                                 JumbleExpr(jstate, (Node *) from->fromlist);
511                                 JumbleExpr(jstate, from->quals);
512                         }
513                         break;
514                 case T_OnConflictExpr:
515                         {
516                                 OnConflictExpr *conf = (OnConflictExpr *) node;
517
518                                 APP_JUMB(conf->action);
519                                 JumbleExpr(jstate, (Node *) conf->arbiterElems);
520                                 JumbleExpr(jstate, conf->arbiterWhere);
521                                 JumbleExpr(jstate, (Node *) conf->onConflictSet);
522                                 JumbleExpr(jstate, conf->onConflictWhere);
523                                 APP_JUMB(conf->constraint);
524                                 APP_JUMB(conf->exclRelIndex);
525                                 JumbleExpr(jstate, (Node *) conf->exclRelTlist);
526                         }
527                         break;
528                 case T_List:
529                         foreach(temp, (List *) node)
530                         {
531                                 JumbleExpr(jstate, (Node *) lfirst(temp));
532                         }
533                         break;
534                 case T_IntList:
535                         foreach(temp, (List *) node)
536                         {
537                                 APP_JUMB(lfirst_int(temp));
538                         }
539                         break;
540                 case T_SortGroupClause:
541                         {
542                                 SortGroupClause *sgc = (SortGroupClause *) node;
543
544                                 APP_JUMB(sgc->tleSortGroupRef);
545                                 APP_JUMB(sgc->eqop);
546                                 APP_JUMB(sgc->sortop);
547                                 APP_JUMB(sgc->nulls_first);
548                         }
549                         break;
550                 case T_GroupingSet:
551                         {
552                                 GroupingSet *gsnode = (GroupingSet *) node;
553
554                                 JumbleExpr(jstate, (Node *) gsnode->content);
555                         }
556                         break;
557                 case T_WindowClause:
558                         {
559                                 WindowClause *wc = (WindowClause *) node;
560
561                                 APP_JUMB(wc->winref);
562                                 APP_JUMB(wc->frameOptions);
563                                 JumbleExpr(jstate, (Node *) wc->partitionClause);
564                                 JumbleExpr(jstate, (Node *) wc->orderClause);
565                                 JumbleExpr(jstate, wc->startOffset);
566                                 JumbleExpr(jstate, wc->endOffset);
567                         }
568                         break;
569                 case T_CommonTableExpr:
570                         {
571                                 CommonTableExpr *cte = (CommonTableExpr *) node;
572
573                                 /* we store the string name because RTE_CTE RTEs need it */
574                                 APP_JUMB_STRING(cte->ctename);
575                                 JumbleQuery(jstate, (Query *) cte->ctequery);
576                         }
577                         break;
578                 case T_SetOperationStmt:
579                         {
580                                 SetOperationStmt *setop = (SetOperationStmt *) node;
581
582                                 APP_JUMB(setop->op);
583                                 APP_JUMB(setop->all);
584                                 JumbleExpr(jstate, setop->larg);
585                                 JumbleExpr(jstate, setop->rarg);
586                         }
587                         break;
588                 case T_RangeTblFunction:
589                         {
590                                 RangeTblFunction *rtfunc = (RangeTblFunction *) node;
591
592                                 JumbleExpr(jstate, rtfunc->funcexpr);
593                         }
594                         break;
595                 case T_TableSampleClause:
596                         {
597                                 TableSampleClause *tsc = (TableSampleClause *) node;
598
599                                 APP_JUMB(tsc->tsmhandler);
600                                 JumbleExpr(jstate, (Node *) tsc->args);
601                                 JumbleExpr(jstate, (Node *) tsc->repeatable);
602                         }
603                         break;
604                 default:
605                         /* Only a warning, since we can stumble along anyway */
606                         elog(WARNING, "unrecognized node type: %d",
607                                  (int) nodeTag(node));
608                         break;
609         }
610 }
611
612 /*
613  * Record location of constant within query string of query tree
614  * that is currently being walked.
615  */
616 static void
617 RecordConstLocation(pgssJumbleState *jstate, int location)
618 {
619         /* -1 indicates unknown or undefined location */
620         if (location >= 0)
621         {
622                 /* enlarge array if needed */
623                 if (jstate->clocations_count >= jstate->clocations_buf_size)
624                 {
625                         jstate->clocations_buf_size *= 2;
626                         jstate->clocations = (pgssLocationLen *)
627                                 repalloc(jstate->clocations,
628                                                  jstate->clocations_buf_size *
629                                                  sizeof(pgssLocationLen));
630                 }
631                 jstate->clocations[jstate->clocations_count].location = location;
632                 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
633                 jstate->clocations[jstate->clocations_count].length = -1;
634                 jstate->clocations_count++;
635         }
636 }
637
638 /*
639  * Generate a normalized version of the query string that will be used to
640  * represent all similar queries.
641  *
642  * Note that the normalized representation may well vary depending on
643  * just which "equivalent" query is used to create the hashtable entry.
644  * We assume this is OK.
645  *
646  * *query_len_p contains the input string length, and is updated with
647  * the result string length (which cannot be longer) on exit.
648  *
649  * Returns a palloc'd string.
650  */
651 static char *
652 generate_normalized_query(pgssJumbleState *jstate, const char *query,
653                                                   int *query_len_p, int encoding)
654 {
655         char       *norm_query;
656         int                     query_len = *query_len_p;
657         int                     i,
658                                 len_to_wrt,             /* Length (in bytes) to write */
659                                 quer_loc = 0,   /* Source query byte location */
660                                 n_quer_loc = 0, /* Normalized query byte location */
661                                 last_off = 0,   /* Offset from start for previous tok */
662                                 last_tok_len = 0;               /* Length (in bytes) of that tok */
663
664         /*
665          * Get constants' lengths (core system only gives us locations).  Note
666          * this also ensures the items are sorted by location.
667          */
668         fill_in_constant_lengths(jstate, query);
669
670         /* Allocate result buffer */
671         norm_query = palloc(query_len + 1);
672
673         for (i = 0; i < jstate->clocations_count; i++)
674         {
675                 int                     off,            /* Offset from start for cur tok */
676                                         tok_len;        /* Length (in bytes) of that tok */
677
678                 off = jstate->clocations[i].location;
679                 tok_len = jstate->clocations[i].length;
680
681                 if (tok_len < 0)
682                         continue;                       /* ignore any duplicates */
683
684                 /* Copy next chunk (what precedes the next constant) */
685                 len_to_wrt = off - last_off;
686                 len_to_wrt -= last_tok_len;
687
688                 Assert(len_to_wrt >= 0);
689                 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
690                 n_quer_loc += len_to_wrt;
691
692                 /* And insert a '?' in place of the constant token */
693                 norm_query[n_quer_loc++] = '?';
694
695                 quer_loc = off + tok_len;
696                 last_off = off;
697                 last_tok_len = tok_len;
698         }
699
700         /*
701          * We've copied up until the last ignorable constant.  Copy over the
702          * remaining bytes of the original query string.
703          */
704         len_to_wrt = query_len - quer_loc;
705
706         Assert(len_to_wrt >= 0);
707         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
708         n_quer_loc += len_to_wrt;
709
710         Assert(n_quer_loc <= query_len);
711         norm_query[n_quer_loc] = '\0';
712
713         *query_len_p = n_quer_loc;
714         return norm_query;
715 }
716
717 /*
718  * Given a valid SQL string and an array of constant-location records,
719  * fill in the textual lengths of those constants.
720  *
721  * The constants may use any allowed constant syntax, such as float literals,
722  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
723  * accomplished by using the public API for the core scanner.
724  *
725  * It is the caller's job to ensure that the string is a valid SQL statement
726  * with constants at the indicated locations.  Since in practice the string
727  * has already been parsed, and the locations that the caller provides will
728  * have originated from within the authoritative parser, this should not be
729  * a problem.
730  *
731  * Duplicate constant pointers are possible, and will have their lengths
732  * marked as '-1', so that they are later ignored.  (Actually, we assume the
733  * lengths were initialized as -1 to start with, and don't change them here.)
734  *
735  * N.B. There is an assumption that a '-' character at a Const location begins
736  * a negative numeric constant.  This precludes there ever being another
737  * reason for a constant to start with a '-'.
738  */
739 static void
740 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
741 {
742         pgssLocationLen *locs;
743         core_yyscan_t yyscanner;
744         core_yy_extra_type yyextra;
745         core_YYSTYPE yylval;
746         YYLTYPE         yylloc;
747         int                     last_loc = -1;
748         int                     i;
749
750         /*
751          * Sort the records by location so that we can process them in order while
752          * scanning the query text.
753          */
754         if (jstate->clocations_count > 1)
755                 qsort(jstate->clocations, jstate->clocations_count,
756                           sizeof(pgssLocationLen), comp_location);
757         locs = jstate->clocations;
758
759         /* initialize the flex scanner --- should match raw_parser() */
760         yyscanner = scanner_init(query,
761                                                          &yyextra,
762                                                          ScanKeywords,
763                                                          NumScanKeywords);
764
765         /* we don't want to re-emit any escape string warnings */
766         yyextra.escape_string_warning = false;
767
768         /* Search for each constant, in sequence */
769         for (i = 0; i < jstate->clocations_count; i++)
770         {
771                 int                     loc = locs[i].location;
772                 int                     tok;
773
774                 Assert(loc >= 0);
775
776                 if (loc <= last_loc)
777                         continue;                       /* Duplicate constant, ignore */
778
779                 /* Lex tokens until we find the desired constant */
780                 for (;;)
781                 {
782                         tok = core_yylex(&yylval, &yylloc, yyscanner);
783
784                         /* We should not hit end-of-string, but if we do, behave sanely */
785                         if (tok == 0)
786                                 break;                  /* out of inner for-loop */
787
788                         /*
789                          * We should find the token position exactly, but if we somehow
790                          * run past it, work with that.
791                          */
792                         if (yylloc >= loc)
793                         {
794                                 if (query[loc] == '-')
795                                 {
796                                         /*
797                                          * It's a negative value - this is the one and only case
798                                          * where we replace more than a single token.
799                                          *
800                                          * Do not compensate for the core system's special-case
801                                          * adjustment of location to that of the leading '-'
802                                          * operator in the event of a negative constant.  It is
803                                          * also useful for our purposes to start from the minus
804                                          * symbol.  In this way, queries like "select * from foo
805                                          * where bar = 1" and "select * from foo where bar = -2"
806                                          * will have identical normalized query strings.
807                                          */
808                                         tok = core_yylex(&yylval, &yylloc, yyscanner);
809                                         if (tok == 0)
810                                                 break;  /* out of inner for-loop */
811                                 }
812
813                                 /*
814                                  * We now rely on the assumption that flex has placed a zero
815                                  * byte after the text of the current token in scanbuf.
816                                  */
817                                 locs[i].length = strlen(yyextra.scanbuf + loc);
818                                 break;                  /* out of inner for-loop */
819                         }
820                 }
821
822                 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
823                 if (tok == 0)
824                         break;
825
826                 last_loc = loc;
827         }
828
829         scanner_finish(yyscanner);
830 }
831
832 /*
833  * comp_location: comparator for qsorting pgssLocationLen structs by location
834  */
835 static int
836 comp_location(const void *a, const void *b)
837 {
838         int                     l = ((const pgssLocationLen *) a)->location;
839         int                     r = ((const pgssLocationLen *) b)->location;
840
841         if (l < r)
842                 return -1;
843         else if (l > r)
844                 return +1;
845         else
846                 return 0;
847 }