OSDN Git Service

25bf905aa57a322021ea77bc091ea5f97261b894
[pghintplan/pg_hint_plan.git] / pg_stat_statements.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * 
5  * Part of pg_stat_statements.c in PostgreSQL 10.
6  *
7  * Copyright (c) 2008-2019, PostgreSQL Global Development Group
8  *
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12
13 #include <sys/stat.h>
14
15 #include "access/hash.h"
16 #include "parser/scanner.h"
17
18 static void AppendJumble(pgssJumbleState *jstate,
19                          const unsigned char *item, Size size);
20 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
21 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
22 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
23 static void RecordConstLocation(pgssJumbleState *jstate, int location);
24 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
25                                                   int query_loc, int *query_len_p, int encoding);
26 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
27                                                  int query_loc);
28 static int      comp_location(const void *a, const void *b);
29
30 /*
31  * AppendJumble: Append a value that is substantive in a given query to
32  * the current jumble.
33  */
34 static void
35 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
36 {
37         unsigned char *jumble = jstate->jumble;
38         Size            jumble_len = jstate->jumble_len;
39
40         /*
41          * Whenever the jumble buffer is full, we hash the current contents and
42          * reset the buffer to contain just that hash value, thus relying on the
43          * hash to summarize everything so far.
44          */
45         while (size > 0)
46         {
47                 Size            part_size;
48
49                 if (jumble_len >= JUMBLE_SIZE)
50                 {
51                         uint32          start_hash = hash_any(jumble, JUMBLE_SIZE);
52
53                         memcpy(jumble, &start_hash, sizeof(start_hash));
54                         jumble_len = sizeof(start_hash);
55                 }
56                 part_size = Min(size, JUMBLE_SIZE - jumble_len);
57                 memcpy(jumble + jumble_len, item, part_size);
58                 jumble_len += part_size;
59                 item += part_size;
60                 size -= part_size;
61         }
62         jstate->jumble_len = jumble_len;
63 }
64
65 /*
66  * Wrappers around AppendJumble to encapsulate details of serialization
67  * of individual local variable elements.
68  */
69 #define APP_JUMB(item) \
70         AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
71 #define APP_JUMB_STRING(str) \
72         AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
73
74 /*
75  * JumbleQuery: Selectively serialize the query tree, appending significant
76  * data to the "query jumble" while ignoring nonsignificant data.
77  *
78  * Rule of thumb for what to include is that we should ignore anything not
79  * semantically significant (such as alias names) as well as anything that can
80  * be deduced from child nodes (else we'd just be double-hashing that piece
81  * of information).
82  */
83 static void
84 JumbleQuery(pgssJumbleState *jstate, Query *query)
85 {
86         Assert(IsA(query, Query));
87         Assert(query->utilityStmt == NULL);
88
89         APP_JUMB(query->commandType);
90         /* resultRelation is usually predictable from commandType */
91         JumbleExpr(jstate, (Node *) query->cteList);
92         JumbleRangeTable(jstate, query->rtable);
93         JumbleExpr(jstate, (Node *) query->jointree);
94         JumbleExpr(jstate, (Node *) query->targetList);
95         JumbleExpr(jstate, (Node *) query->onConflict);
96         JumbleExpr(jstate, (Node *) query->returningList);
97         JumbleExpr(jstate, (Node *) query->groupClause);
98         JumbleExpr(jstate, (Node *) query->groupingSets);
99         JumbleExpr(jstate, query->havingQual);
100         JumbleExpr(jstate, (Node *) query->windowClause);
101         JumbleExpr(jstate, (Node *) query->distinctClause);
102         JumbleExpr(jstate, (Node *) query->sortClause);
103         JumbleExpr(jstate, query->limitOffset);
104         JumbleExpr(jstate, query->limitCount);
105         /* we ignore rowMarks */
106         JumbleExpr(jstate, query->setOperations);
107 }
108
109 /*
110  * Jumble a range table
111  */
112 static void
113 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
114 {
115         ListCell   *lc;
116
117         foreach(lc, rtable)
118         {
119                 RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
120
121                 APP_JUMB(rte->rtekind);
122                 switch (rte->rtekind)
123                 {
124                         case RTE_RELATION:
125                                 APP_JUMB(rte->relid);
126                                 JumbleExpr(jstate, (Node *) rte->tablesample);
127                                 break;
128                         case RTE_SUBQUERY:
129                                 JumbleQuery(jstate, rte->subquery);
130                                 break;
131                         case RTE_JOIN:
132                                 APP_JUMB(rte->jointype);
133                                 break;
134                         case RTE_FUNCTION:
135                                 JumbleExpr(jstate, (Node *) rte->functions);
136                                 break;
137                         case RTE_TABLEFUNC:
138                                 JumbleExpr(jstate, (Node *) rte->tablefunc);
139                                 break;
140                         case RTE_VALUES:
141                                 JumbleExpr(jstate, (Node *) rte->values_lists);
142                                 break;
143                         case RTE_CTE:
144
145                                 /*
146                                  * Depending on the CTE name here isn't ideal, but it's the
147                                  * only info we have to identify the referenced WITH item.
148                                  */
149                                 APP_JUMB_STRING(rte->ctename);
150                                 APP_JUMB(rte->ctelevelsup);
151                                 break;
152                         case RTE_NAMEDTUPLESTORE:
153                                 APP_JUMB_STRING(rte->enrname);
154                                 break;
155                         default:
156                                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
157                                 break;
158                 }
159         }
160 }
161
162 /*
163  * Jumble an expression tree
164  *
165  * In general this function should handle all the same node types that
166  * expression_tree_walker() does, and therefore it's coded to be as parallel
167  * to that function as possible.  However, since we are only invoked on
168  * queries immediately post-parse-analysis, we need not handle node types
169  * that only appear in planning.
170  *
171  * Note: the reason we don't simply use expression_tree_walker() is that the
172  * point of that function is to support tree walkers that don't care about
173  * most tree node types, but here we care about all types.  We should complain
174  * about any unrecognized node type.
175  */
176 static void
177 JumbleExpr(pgssJumbleState *jstate, Node *node)
178 {
179         ListCell   *temp;
180
181         if (node == NULL)
182                 return;
183
184         /* Guard against stack overflow due to overly complex expressions */
185         check_stack_depth();
186
187         /*
188          * We always emit the node's NodeTag, then any additional fields that are
189          * considered significant, and then we recurse to any child nodes.
190          */
191         APP_JUMB(node->type);
192
193         switch (nodeTag(node))
194         {
195                 case T_Var:
196                         {
197                                 Var                *var = (Var *) node;
198
199                                 APP_JUMB(var->varno);
200                                 APP_JUMB(var->varattno);
201                                 APP_JUMB(var->varlevelsup);
202                         }
203                         break;
204                 case T_Const:
205                         {
206                                 Const      *c = (Const *) node;
207
208                                 /* We jumble only the constant's type, not its value */
209                                 APP_JUMB(c->consttype);
210                                 /* Also, record its parse location for query normalization */
211                                 RecordConstLocation(jstate, c->location);
212                         }
213                         break;
214                 case T_Param:
215                         {
216                                 Param      *p = (Param *) node;
217
218                                 APP_JUMB(p->paramkind);
219                                 APP_JUMB(p->paramid);
220                                 APP_JUMB(p->paramtype);
221                                 /* Also, track the highest external Param id */
222                                 if (p->paramkind == PARAM_EXTERN &&
223                                         p->paramid > jstate->highest_extern_param_id)
224                                         jstate->highest_extern_param_id = p->paramid;
225                         }
226                         break;
227                 case T_Aggref:
228                         {
229                                 Aggref     *expr = (Aggref *) node;
230
231                                 APP_JUMB(expr->aggfnoid);
232                                 JumbleExpr(jstate, (Node *) expr->aggdirectargs);
233                                 JumbleExpr(jstate, (Node *) expr->args);
234                                 JumbleExpr(jstate, (Node *) expr->aggorder);
235                                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
236                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
237                         }
238                         break;
239                 case T_GroupingFunc:
240                         {
241                                 GroupingFunc *grpnode = (GroupingFunc *) node;
242
243                                 JumbleExpr(jstate, (Node *) grpnode->refs);
244                         }
245                         break;
246                 case T_WindowFunc:
247                         {
248                                 WindowFunc *expr = (WindowFunc *) node;
249
250                                 APP_JUMB(expr->winfnoid);
251                                 APP_JUMB(expr->winref);
252                                 JumbleExpr(jstate, (Node *) expr->args);
253                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
254                         }
255                         break;
256                 case T_ArrayRef:
257                         {
258                                 ArrayRef   *aref = (ArrayRef *) node;
259
260                                 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
261                                 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
262                                 JumbleExpr(jstate, (Node *) aref->refexpr);
263                                 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
264                         }
265                         break;
266                 case T_FuncExpr:
267                         {
268                                 FuncExpr   *expr = (FuncExpr *) node;
269
270                                 APP_JUMB(expr->funcid);
271                                 JumbleExpr(jstate, (Node *) expr->args);
272                         }
273                         break;
274                 case T_NamedArgExpr:
275                         {
276                                 NamedArgExpr *nae = (NamedArgExpr *) node;
277
278                                 APP_JUMB(nae->argnumber);
279                                 JumbleExpr(jstate, (Node *) nae->arg);
280                         }
281                         break;
282                 case T_OpExpr:
283                 case T_DistinctExpr:    /* struct-equivalent to OpExpr */
284                 case T_NullIfExpr:              /* struct-equivalent to OpExpr */
285                         {
286                                 OpExpr     *expr = (OpExpr *) node;
287
288                                 APP_JUMB(expr->opno);
289                                 JumbleExpr(jstate, (Node *) expr->args);
290                         }
291                         break;
292                 case T_ScalarArrayOpExpr:
293                         {
294                                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
295
296                                 APP_JUMB(expr->opno);
297                                 APP_JUMB(expr->useOr);
298                                 JumbleExpr(jstate, (Node *) expr->args);
299                         }
300                         break;
301                 case T_BoolExpr:
302                         {
303                                 BoolExpr   *expr = (BoolExpr *) node;
304
305                                 APP_JUMB(expr->boolop);
306                                 JumbleExpr(jstate, (Node *) expr->args);
307                         }
308                         break;
309                 case T_SubLink:
310                         {
311                                 SubLink    *sublink = (SubLink *) node;
312
313                                 APP_JUMB(sublink->subLinkType);
314                                 APP_JUMB(sublink->subLinkId);
315                                 JumbleExpr(jstate, (Node *) sublink->testexpr);
316                                 JumbleQuery(jstate, castNode(Query, sublink->subselect));
317                         }
318                         break;
319                 case T_FieldSelect:
320                         {
321                                 FieldSelect *fs = (FieldSelect *) node;
322
323                                 APP_JUMB(fs->fieldnum);
324                                 JumbleExpr(jstate, (Node *) fs->arg);
325                         }
326                         break;
327                 case T_FieldStore:
328                         {
329                                 FieldStore *fstore = (FieldStore *) node;
330
331                                 JumbleExpr(jstate, (Node *) fstore->arg);
332                                 JumbleExpr(jstate, (Node *) fstore->newvals);
333                         }
334                         break;
335                 case T_RelabelType:
336                         {
337                                 RelabelType *rt = (RelabelType *) node;
338
339                                 APP_JUMB(rt->resulttype);
340                                 JumbleExpr(jstate, (Node *) rt->arg);
341                         }
342                         break;
343                 case T_CoerceViaIO:
344                         {
345                                 CoerceViaIO *cio = (CoerceViaIO *) node;
346
347                                 APP_JUMB(cio->resulttype);
348                                 JumbleExpr(jstate, (Node *) cio->arg);
349                         }
350                         break;
351                 case T_ArrayCoerceExpr:
352                         {
353                                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
354
355                                 APP_JUMB(acexpr->resulttype);
356                                 JumbleExpr(jstate, (Node *) acexpr->arg);
357                         }
358                         break;
359                 case T_ConvertRowtypeExpr:
360                         {
361                                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
362
363                                 APP_JUMB(crexpr->resulttype);
364                                 JumbleExpr(jstate, (Node *) crexpr->arg);
365                         }
366                         break;
367                 case T_CollateExpr:
368                         {
369                                 CollateExpr *ce = (CollateExpr *) node;
370
371                                 APP_JUMB(ce->collOid);
372                                 JumbleExpr(jstate, (Node *) ce->arg);
373                         }
374                         break;
375                 case T_CaseExpr:
376                         {
377                                 CaseExpr   *caseexpr = (CaseExpr *) node;
378
379                                 JumbleExpr(jstate, (Node *) caseexpr->arg);
380                                 foreach(temp, caseexpr->args)
381                                 {
382                                         CaseWhen   *when = lfirst_node(CaseWhen, temp);
383
384                                         JumbleExpr(jstate, (Node *) when->expr);
385                                         JumbleExpr(jstate, (Node *) when->result);
386                                 }
387                                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
388                         }
389                         break;
390                 case T_CaseTestExpr:
391                         {
392                                 CaseTestExpr *ct = (CaseTestExpr *) node;
393
394                                 APP_JUMB(ct->typeId);
395                         }
396                         break;
397                 case T_ArrayExpr:
398                         JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
399                         break;
400                 case T_RowExpr:
401                         JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
402                         break;
403                 case T_RowCompareExpr:
404                         {
405                                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
406
407                                 APP_JUMB(rcexpr->rctype);
408                                 JumbleExpr(jstate, (Node *) rcexpr->largs);
409                                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
410                         }
411                         break;
412                 case T_CoalesceExpr:
413                         JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
414                         break;
415                 case T_MinMaxExpr:
416                         {
417                                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
418
419                                 APP_JUMB(mmexpr->op);
420                                 JumbleExpr(jstate, (Node *) mmexpr->args);
421                         }
422                         break;
423                 case T_SQLValueFunction:
424                         {
425                                 SQLValueFunction *svf = (SQLValueFunction *) node;
426
427                                 APP_JUMB(svf->op);
428                                 /* type is fully determined by op */
429                                 APP_JUMB(svf->typmod);
430                         }
431                         break;
432                 case T_XmlExpr:
433                         {
434                                 XmlExpr    *xexpr = (XmlExpr *) node;
435
436                                 APP_JUMB(xexpr->op);
437                                 JumbleExpr(jstate, (Node *) xexpr->named_args);
438                                 JumbleExpr(jstate, (Node *) xexpr->args);
439                         }
440                         break;
441                 case T_NullTest:
442                         {
443                                 NullTest   *nt = (NullTest *) node;
444
445                                 APP_JUMB(nt->nulltesttype);
446                                 JumbleExpr(jstate, (Node *) nt->arg);
447                         }
448                         break;
449                 case T_BooleanTest:
450                         {
451                                 BooleanTest *bt = (BooleanTest *) node;
452
453                                 APP_JUMB(bt->booltesttype);
454                                 JumbleExpr(jstate, (Node *) bt->arg);
455                         }
456                         break;
457                 case T_CoerceToDomain:
458                         {
459                                 CoerceToDomain *cd = (CoerceToDomain *) node;
460
461                                 APP_JUMB(cd->resulttype);
462                                 JumbleExpr(jstate, (Node *) cd->arg);
463                         }
464                         break;
465                 case T_CoerceToDomainValue:
466                         {
467                                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
468
469                                 APP_JUMB(cdv->typeId);
470                         }
471                         break;
472                 case T_SetToDefault:
473                         {
474                                 SetToDefault *sd = (SetToDefault *) node;
475
476                                 APP_JUMB(sd->typeId);
477                         }
478                         break;
479                 case T_CurrentOfExpr:
480                         {
481                                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
482
483                                 APP_JUMB(ce->cvarno);
484                                 if (ce->cursor_name)
485                                         APP_JUMB_STRING(ce->cursor_name);
486                                 APP_JUMB(ce->cursor_param);
487                         }
488                         break;
489                 case T_NextValueExpr:
490                         {
491                                 NextValueExpr *nve = (NextValueExpr *) node;
492
493                                 APP_JUMB(nve->seqid);
494                                 APP_JUMB(nve->typeId);
495                         }
496                         break;
497                 case T_InferenceElem:
498                         {
499                                 InferenceElem *ie = (InferenceElem *) node;
500
501                                 APP_JUMB(ie->infercollid);
502                                 APP_JUMB(ie->inferopclass);
503                                 JumbleExpr(jstate, ie->expr);
504                         }
505                         break;
506                 case T_TargetEntry:
507                         {
508                                 TargetEntry *tle = (TargetEntry *) node;
509
510                                 APP_JUMB(tle->resno);
511                                 APP_JUMB(tle->ressortgroupref);
512                                 JumbleExpr(jstate, (Node *) tle->expr);
513                         }
514                         break;
515                 case T_RangeTblRef:
516                         {
517                                 RangeTblRef *rtr = (RangeTblRef *) node;
518
519                                 APP_JUMB(rtr->rtindex);
520                         }
521                         break;
522                 case T_JoinExpr:
523                         {
524                                 JoinExpr   *join = (JoinExpr *) node;
525
526                                 APP_JUMB(join->jointype);
527                                 APP_JUMB(join->isNatural);
528                                 APP_JUMB(join->rtindex);
529                                 JumbleExpr(jstate, join->larg);
530                                 JumbleExpr(jstate, join->rarg);
531                                 JumbleExpr(jstate, join->quals);
532                         }
533                         break;
534                 case T_FromExpr:
535                         {
536                                 FromExpr   *from = (FromExpr *) node;
537
538                                 JumbleExpr(jstate, (Node *) from->fromlist);
539                                 JumbleExpr(jstate, from->quals);
540                         }
541                         break;
542                 case T_OnConflictExpr:
543                         {
544                                 OnConflictExpr *conf = (OnConflictExpr *) node;
545
546                                 APP_JUMB(conf->action);
547                                 JumbleExpr(jstate, (Node *) conf->arbiterElems);
548                                 JumbleExpr(jstate, conf->arbiterWhere);
549                                 JumbleExpr(jstate, (Node *) conf->onConflictSet);
550                                 JumbleExpr(jstate, conf->onConflictWhere);
551                                 APP_JUMB(conf->constraint);
552                                 APP_JUMB(conf->exclRelIndex);
553                                 JumbleExpr(jstate, (Node *) conf->exclRelTlist);
554                         }
555                         break;
556                 case T_List:
557                         foreach(temp, (List *) node)
558                         {
559                                 JumbleExpr(jstate, (Node *) lfirst(temp));
560                         }
561                         break;
562                 case T_IntList:
563                         foreach(temp, (List *) node)
564                         {
565                                 APP_JUMB(lfirst_int(temp));
566                         }
567                         break;
568                 case T_SortGroupClause:
569                         {
570                                 SortGroupClause *sgc = (SortGroupClause *) node;
571
572                                 APP_JUMB(sgc->tleSortGroupRef);
573                                 APP_JUMB(sgc->eqop);
574                                 APP_JUMB(sgc->sortop);
575                                 APP_JUMB(sgc->nulls_first);
576                         }
577                         break;
578                 case T_GroupingSet:
579                         {
580                                 GroupingSet *gsnode = (GroupingSet *) node;
581
582                                 JumbleExpr(jstate, (Node *) gsnode->content);
583                         }
584                         break;
585                 case T_WindowClause:
586                         {
587                                 WindowClause *wc = (WindowClause *) node;
588
589                                 APP_JUMB(wc->winref);
590                                 APP_JUMB(wc->frameOptions);
591                                 JumbleExpr(jstate, (Node *) wc->partitionClause);
592                                 JumbleExpr(jstate, (Node *) wc->orderClause);
593                                 JumbleExpr(jstate, wc->startOffset);
594                                 JumbleExpr(jstate, wc->endOffset);
595                         }
596                         break;
597                 case T_CommonTableExpr:
598                         {
599                                 CommonTableExpr *cte = (CommonTableExpr *) node;
600
601                                 /* we store the string name because RTE_CTE RTEs need it */
602                                 APP_JUMB_STRING(cte->ctename);
603                                 JumbleQuery(jstate, castNode(Query, cte->ctequery));
604                         }
605                         break;
606                 case T_SetOperationStmt:
607                         {
608                                 SetOperationStmt *setop = (SetOperationStmt *) node;
609
610                                 APP_JUMB(setop->op);
611                                 APP_JUMB(setop->all);
612                                 JumbleExpr(jstate, setop->larg);
613                                 JumbleExpr(jstate, setop->rarg);
614                         }
615                         break;
616                 case T_RangeTblFunction:
617                         {
618                                 RangeTblFunction *rtfunc = (RangeTblFunction *) node;
619
620                                 JumbleExpr(jstate, rtfunc->funcexpr);
621                         }
622                         break;
623                 case T_TableFunc:
624                         {
625                                 TableFunc  *tablefunc = (TableFunc *) node;
626
627                                 JumbleExpr(jstate, tablefunc->docexpr);
628                                 JumbleExpr(jstate, tablefunc->rowexpr);
629                                 JumbleExpr(jstate, (Node *) tablefunc->colexprs);
630                         }
631                         break;
632                 case T_TableSampleClause:
633                         {
634                                 TableSampleClause *tsc = (TableSampleClause *) node;
635
636                                 APP_JUMB(tsc->tsmhandler);
637                                 JumbleExpr(jstate, (Node *) tsc->args);
638                                 JumbleExpr(jstate, (Node *) tsc->repeatable);
639                         }
640                         break;
641                 default:
642                         /* Only a warning, since we can stumble along anyway */
643                         elog(WARNING, "unrecognized node type: %d",
644                                  (int) nodeTag(node));
645                         break;
646         }
647 }
648
649 /*
650  * Record location of constant within query string of query tree
651  * that is currently being walked.
652  */
653 static void
654 RecordConstLocation(pgssJumbleState *jstate, int location)
655 {
656         /* -1 indicates unknown or undefined location */
657         if (location >= 0)
658         {
659                 /* enlarge array if needed */
660                 if (jstate->clocations_count >= jstate->clocations_buf_size)
661                 {
662                         jstate->clocations_buf_size *= 2;
663                         jstate->clocations = (pgssLocationLen *)
664                                 repalloc(jstate->clocations,
665                                                  jstate->clocations_buf_size *
666                                                  sizeof(pgssLocationLen));
667                 }
668                 jstate->clocations[jstate->clocations_count].location = location;
669                 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
670                 jstate->clocations[jstate->clocations_count].length = -1;
671                 jstate->clocations_count++;
672         }
673 }
674
675 /*
676  * Generate a normalized version of the query string that will be used to
677  * represent all similar queries.
678  *
679  * Note that the normalized representation may well vary depending on
680  * just which "equivalent" query is used to create the hashtable entry.
681  * We assume this is OK.
682  *
683  * If query_loc > 0, then "query" has been advanced by that much compared to
684  * the original string start, so we need to translate the provided locations
685  * to compensate.  (This lets us avoid re-scanning statements before the one
686  * of interest, so it's worth doing.)
687  *
688  * *query_len_p contains the input string length, and is updated with
689  * the result string length on exit.  The resulting string might be longer
690  * or shorter depending on what happens with replacement of constants.
691  *
692  * Returns a palloc'd string.
693  */
694 static char *
695 generate_normalized_query(pgssJumbleState *jstate, const char *query,
696                                                   int query_loc, int *query_len_p, int encoding)
697 {
698         char       *norm_query;
699         int                     query_len = *query_len_p;
700         int                     i,
701                                 norm_query_buflen,      /* Space allowed for norm_query */
702                                 len_to_wrt,             /* Length (in bytes) to write */
703                                 quer_loc = 0,   /* Source query byte location */
704                                 n_quer_loc = 0, /* Normalized query byte location */
705                                 last_off = 0,   /* Offset from start for previous tok */
706                                 last_tok_len = 0;       /* Length (in bytes) of that tok */
707
708         /*
709          * Get constants' lengths (core system only gives us locations).  Note
710          * this also ensures the items are sorted by location.
711          */
712         fill_in_constant_lengths(jstate, query, query_loc);
713
714         /*
715          * Allow for $n symbols to be longer than the constants they replace.
716          * Constants must take at least one byte in text form, while a $n symbol
717          * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
718          * could refine that limit based on the max value of n for the current
719          * query, but it hardly seems worth any extra effort to do so.
720          */
721         norm_query_buflen = query_len + jstate->clocations_count * 10;
722
723         /* Allocate result buffer */
724         norm_query = palloc(norm_query_buflen + 1);
725
726         for (i = 0; i < jstate->clocations_count; i++)
727         {
728                 int                     off,            /* Offset from start for cur tok */
729                                         tok_len;        /* Length (in bytes) of that tok */
730
731                 off = jstate->clocations[i].location;
732                 /* Adjust recorded location if we're dealing with partial string */
733                 off -= query_loc;
734
735                 tok_len = jstate->clocations[i].length;
736
737                 if (tok_len < 0)
738                         continue;                       /* ignore any duplicates */
739
740                 /* Copy next chunk (what precedes the next constant) */
741                 len_to_wrt = off - last_off;
742                 len_to_wrt -= last_tok_len;
743
744                 Assert(len_to_wrt >= 0);
745                 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
746                 n_quer_loc += len_to_wrt;
747
748                 /*
749                  * PG_HINT_PLAN: DON'T TAKE IN a6f22e8356 so that the designed behavior
750                  * is kept stable.
751                  */
752                 /* And insert a '?' in place of the constant token */
753                 norm_query[n_quer_loc++] = '?';
754
755                 quer_loc = off + tok_len;
756                 last_off = off;
757                 last_tok_len = tok_len;
758         }
759
760         /*
761          * We've copied up until the last ignorable constant.  Copy over the
762          * remaining bytes of the original query string.
763          */
764         len_to_wrt = query_len - quer_loc;
765
766         Assert(len_to_wrt >= 0);
767         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
768         n_quer_loc += len_to_wrt;
769
770         Assert(n_quer_loc <= norm_query_buflen);
771         norm_query[n_quer_loc] = '\0';
772
773         *query_len_p = n_quer_loc;
774         return norm_query;
775 }
776
777 /*
778  * Given a valid SQL string and an array of constant-location records,
779  * fill in the textual lengths of those constants.
780  *
781  * The constants may use any allowed constant syntax, such as float literals,
782  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
783  * accomplished by using the public API for the core scanner.
784  *
785  * It is the caller's job to ensure that the string is a valid SQL statement
786  * with constants at the indicated locations.  Since in practice the string
787  * has already been parsed, and the locations that the caller provides will
788  * have originated from within the authoritative parser, this should not be
789  * a problem.
790  *
791  * Duplicate constant pointers are possible, and will have their lengths
792  * marked as '-1', so that they are later ignored.  (Actually, we assume the
793  * lengths were initialized as -1 to start with, and don't change them here.)
794  *
795  * If query_loc > 0, then "query" has been advanced by that much compared to
796  * the original string start, so we need to translate the provided locations
797  * to compensate.  (This lets us avoid re-scanning statements before the one
798  * of interest, so it's worth doing.)
799  *
800  * N.B. There is an assumption that a '-' character at a Const location begins
801  * a negative numeric constant.  This precludes there ever being another
802  * reason for a constant to start with a '-'.
803  */
804 static void
805 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
806                                                  int query_loc)
807 {
808         pgssLocationLen *locs;
809         core_yyscan_t yyscanner;
810         core_yy_extra_type yyextra;
811         core_YYSTYPE yylval;
812         YYLTYPE         yylloc;
813         int                     last_loc = -1;
814         int                     i;
815
816         /*
817          * Sort the records by location so that we can process them in order while
818          * scanning the query text.
819          */
820         if (jstate->clocations_count > 1)
821                 qsort(jstate->clocations, jstate->clocations_count,
822                           sizeof(pgssLocationLen), comp_location);
823         locs = jstate->clocations;
824
825         /* initialize the flex scanner --- should match raw_parser() */
826         yyscanner = scanner_init(query,
827                                                          &yyextra,
828                                                          &ScanKeywords,
829                                                          ScanKeywordTokens);
830
831         /* we don't want to re-emit any escape string warnings */
832         yyextra.escape_string_warning = false;
833
834         /* Search for each constant, in sequence */
835         for (i = 0; i < jstate->clocations_count; i++)
836         {
837                 int                     loc = locs[i].location;
838                 int                     tok;
839
840                 /* Adjust recorded location if we're dealing with partial string */
841                 loc -= query_loc;
842
843                 Assert(loc >= 0);
844
845                 if (loc <= last_loc)
846                         continue;                       /* Duplicate constant, ignore */
847
848                 /* Lex tokens until we find the desired constant */
849                 for (;;)
850                 {
851                         tok = core_yylex(&yylval, &yylloc, yyscanner);
852
853                         /* We should not hit end-of-string, but if we do, behave sanely */
854                         if (tok == 0)
855                                 break;                  /* out of inner for-loop */
856
857                         /*
858                          * We should find the token position exactly, but if we somehow
859                          * run past it, work with that.
860                          */
861                         if (yylloc >= loc)
862                         {
863                                 if (query[loc] == '-')
864                                 {
865                                         /*
866                                          * It's a negative value - this is the one and only case
867                                          * where we replace more than a single token.
868                                          *
869                                          * Do not compensate for the core system's special-case
870                                          * adjustment of location to that of the leading '-'
871                                          * operator in the event of a negative constant.  It is
872                                          * also useful for our purposes to start from the minus
873                                          * symbol.  In this way, queries like "select * from foo
874                                          * where bar = 1" and "select * from foo where bar = -2"
875                                          * will have identical normalized query strings.
876                                          */
877                                         tok = core_yylex(&yylval, &yylloc, yyscanner);
878                                         if (tok == 0)
879                                                 break;  /* out of inner for-loop */
880                                 }
881
882                                 /*
883                                  * We now rely on the assumption that flex has placed a zero
884                                  * byte after the text of the current token in scanbuf.
885                                  */
886                                 locs[i].length = strlen(yyextra.scanbuf + loc);
887                                 break;                  /* out of inner for-loop */
888                         }
889                 }
890
891                 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
892                 if (tok == 0)
893                         break;
894
895                 last_loc = loc;
896         }
897
898         scanner_finish(yyscanner);
899 }
900
901 /*
902  * comp_location: comparator for qsorting pgssLocationLen structs by location
903  */
904 static int
905 comp_location(const void *a, const void *b)
906 {
907         int                     l = ((const pgssLocationLen *) a)->location;
908         int                     r = ((const pgssLocationLen *) b)->location;
909
910         if (l < r)
911                 return -1;
912         else if (l > r)
913                 return +1;
914         else
915                 return 0;
916 }