OSDN Git Service

Ignore non-existent prepared statement in get_query_string.
[pghintplan/pg_hint_plan.git] / pg_stat_statements.c
1 /*-------------------------------------------------------------------------
2  *
3  * pg_stat_statements.c
4  * 
5  * Part of pg_stat_statements.c in PostgreSQL 10.
6  *
7  * Copyright (c) 2008-2020, PostgreSQL Global Development Group
8  *
9  *-------------------------------------------------------------------------
10  */
11 #include "postgres.h"
12
13 #include <sys/stat.h>
14
15 #include "access/hash.h"
16 #include "parser/scanner.h"
17
18 static void AppendJumble(pgssJumbleState *jstate,
19                          const unsigned char *item, Size size);
20 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
21 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
22 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
23 static void RecordConstLocation(pgssJumbleState *jstate, int location);
24 static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
25                                                   int query_loc, int *query_len_p, int encoding);
26 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
27                                                  int query_loc);
28 static int      comp_location(const void *a, const void *b);
29
30 /*
31  * AppendJumble: Append a value that is substantive in a given query to
32  * the current jumble.
33  */
34 static void
35 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
36 {
37         unsigned char *jumble = jstate->jumble;
38         Size            jumble_len = jstate->jumble_len;
39
40         /*
41          * Whenever the jumble buffer is full, we hash the current contents and
42          * reset the buffer to contain just that hash value, thus relying on the
43          * hash to summarize everything so far.
44          */
45         while (size > 0)
46         {
47                 Size            part_size;
48
49                 if (jumble_len >= JUMBLE_SIZE)
50                 {
51                         uint64          start_hash;
52
53                         start_hash = DatumGetUInt64(hash_any_extended(jumble,
54                                                                                                                   JUMBLE_SIZE, 0));
55                         memcpy(jumble, &start_hash, sizeof(start_hash));
56                         jumble_len = sizeof(start_hash);
57                 }
58                 part_size = Min(size, JUMBLE_SIZE - jumble_len);
59                 memcpy(jumble + jumble_len, item, part_size);
60                 jumble_len += part_size;
61                 item += part_size;
62                 size -= part_size;
63         }
64         jstate->jumble_len = jumble_len;
65 }
66
67 /*
68  * Wrappers around AppendJumble to encapsulate details of serialization
69  * of individual local variable elements.
70  */
71 #define APP_JUMB(item) \
72         AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
73 #define APP_JUMB_STRING(str) \
74         AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
75
76 /*
77  * JumbleQuery: Selectively serialize the query tree, appending significant
78  * data to the "query jumble" while ignoring nonsignificant data.
79  *
80  * Rule of thumb for what to include is that we should ignore anything not
81  * semantically significant (such as alias names) as well as anything that can
82  * be deduced from child nodes (else we'd just be double-hashing that piece
83  * of information).
84  */
85 static void
86 JumbleQuery(pgssJumbleState *jstate, Query *query)
87 {
88         Assert(IsA(query, Query));
89         Assert(query->utilityStmt == NULL);
90
91         APP_JUMB(query->commandType);
92         /* resultRelation is usually predictable from commandType */
93         JumbleExpr(jstate, (Node *) query->cteList);
94         JumbleRangeTable(jstate, query->rtable);
95         JumbleExpr(jstate, (Node *) query->jointree);
96         JumbleExpr(jstate, (Node *) query->targetList);
97         JumbleExpr(jstate, (Node *) query->onConflict);
98         JumbleExpr(jstate, (Node *) query->returningList);
99         JumbleExpr(jstate, (Node *) query->groupClause);
100         JumbleExpr(jstate, (Node *) query->groupingSets);
101         JumbleExpr(jstate, query->havingQual);
102         JumbleExpr(jstate, (Node *) query->windowClause);
103         JumbleExpr(jstate, (Node *) query->distinctClause);
104         JumbleExpr(jstate, (Node *) query->sortClause);
105         JumbleExpr(jstate, query->limitOffset);
106         JumbleExpr(jstate, query->limitCount);
107         /* we ignore rowMarks */
108         JumbleExpr(jstate, query->setOperations);
109 }
110
111 /*
112  * Jumble a range table
113  */
114 static void
115 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
116 {
117         ListCell   *lc;
118
119         foreach(lc, rtable)
120         {
121                 RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
122
123                 APP_JUMB(rte->rtekind);
124                 switch (rte->rtekind)
125                 {
126                         case RTE_RELATION:
127                                 APP_JUMB(rte->relid);
128                                 JumbleExpr(jstate, (Node *) rte->tablesample);
129                                 break;
130                         case RTE_SUBQUERY:
131                                 JumbleQuery(jstate, rte->subquery);
132                                 break;
133                         case RTE_JOIN:
134                                 APP_JUMB(rte->jointype);
135                                 break;
136                         case RTE_FUNCTION:
137                                 JumbleExpr(jstate, (Node *) rte->functions);
138                                 break;
139                         case RTE_TABLEFUNC:
140                                 JumbleExpr(jstate, (Node *) rte->tablefunc);
141                                 break;
142                         case RTE_VALUES:
143                                 JumbleExpr(jstate, (Node *) rte->values_lists);
144                                 break;
145                         case RTE_CTE:
146
147                                 /*
148                                  * Depending on the CTE name here isn't ideal, but it's the
149                                  * only info we have to identify the referenced WITH item.
150                                  */
151                                 APP_JUMB_STRING(rte->ctename);
152                                 APP_JUMB(rte->ctelevelsup);
153                                 break;
154                         case RTE_NAMEDTUPLESTORE:
155                                 APP_JUMB_STRING(rte->enrname);
156                                 break;
157                         case RTE_RESULT:
158                                 break;
159                         default:
160                                 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
161                                 break;
162                 }
163         }
164 }
165
166 /*
167  * Jumble an expression tree
168  *
169  * In general this function should handle all the same node types that
170  * expression_tree_walker() does, and therefore it's coded to be as parallel
171  * to that function as possible.  However, since we are only invoked on
172  * queries immediately post-parse-analysis, we need not handle node types
173  * that only appear in planning.
174  *
175  * Note: the reason we don't simply use expression_tree_walker() is that the
176  * point of that function is to support tree walkers that don't care about
177  * most tree node types, but here we care about all types.  We should complain
178  * about any unrecognized node type.
179  */
180 static void
181 JumbleExpr(pgssJumbleState *jstate, Node *node)
182 {
183         ListCell   *temp;
184
185         if (node == NULL)
186                 return;
187
188         /* Guard against stack overflow due to overly complex expressions */
189         check_stack_depth();
190
191         /*
192          * We always emit the node's NodeTag, then any additional fields that are
193          * considered significant, and then we recurse to any child nodes.
194          */
195         APP_JUMB(node->type);
196
197         switch (nodeTag(node))
198         {
199                 case T_Var:
200                         {
201                                 Var                *var = (Var *) node;
202
203                                 APP_JUMB(var->varno);
204                                 APP_JUMB(var->varattno);
205                                 APP_JUMB(var->varlevelsup);
206                         }
207                         break;
208                 case T_Const:
209                         {
210                                 Const      *c = (Const *) node;
211
212                                 /* We jumble only the constant's type, not its value */
213                                 APP_JUMB(c->consttype);
214                                 /* Also, record its parse location for query normalization */
215                                 RecordConstLocation(jstate, c->location);
216                         }
217                         break;
218                 case T_Param:
219                         {
220                                 Param      *p = (Param *) node;
221
222                                 APP_JUMB(p->paramkind);
223                                 APP_JUMB(p->paramid);
224                                 APP_JUMB(p->paramtype);
225                                 /* Also, track the highest external Param id */
226                                 if (p->paramkind == PARAM_EXTERN &&
227                                         p->paramid > jstate->highest_extern_param_id)
228                                         jstate->highest_extern_param_id = p->paramid;
229                         }
230                         break;
231                 case T_Aggref:
232                         {
233                                 Aggref     *expr = (Aggref *) node;
234
235                                 APP_JUMB(expr->aggfnoid);
236                                 JumbleExpr(jstate, (Node *) expr->aggdirectargs);
237                                 JumbleExpr(jstate, (Node *) expr->args);
238                                 JumbleExpr(jstate, (Node *) expr->aggorder);
239                                 JumbleExpr(jstate, (Node *) expr->aggdistinct);
240                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
241                         }
242                         break;
243                 case T_GroupingFunc:
244                         {
245                                 GroupingFunc *grpnode = (GroupingFunc *) node;
246
247                                 JumbleExpr(jstate, (Node *) grpnode->refs);
248                         }
249                         break;
250                 case T_WindowFunc:
251                         {
252                                 WindowFunc *expr = (WindowFunc *) node;
253
254                                 APP_JUMB(expr->winfnoid);
255                                 APP_JUMB(expr->winref);
256                                 JumbleExpr(jstate, (Node *) expr->args);
257                                 JumbleExpr(jstate, (Node *) expr->aggfilter);
258                         }
259                         break;
260                 case T_SubscriptingRef:
261                         {
262                                 SubscriptingRef *sbsref = (SubscriptingRef *) node;
263
264                                 JumbleExpr(jstate, (Node *) sbsref->refupperindexpr);
265                                 JumbleExpr(jstate, (Node *) sbsref->reflowerindexpr);
266                                 JumbleExpr(jstate, (Node *) sbsref->refexpr);
267                                 JumbleExpr(jstate, (Node *) sbsref->refassgnexpr);
268                         }
269                         break;
270                 case T_FuncExpr:
271                         {
272                                 FuncExpr   *expr = (FuncExpr *) node;
273
274                                 APP_JUMB(expr->funcid);
275                                 JumbleExpr(jstate, (Node *) expr->args);
276                         }
277                         break;
278                 case T_NamedArgExpr:
279                         {
280                                 NamedArgExpr *nae = (NamedArgExpr *) node;
281
282                                 APP_JUMB(nae->argnumber);
283                                 JumbleExpr(jstate, (Node *) nae->arg);
284                         }
285                         break;
286                 case T_OpExpr:
287                 case T_DistinctExpr:    /* struct-equivalent to OpExpr */
288                 case T_NullIfExpr:              /* struct-equivalent to OpExpr */
289                         {
290                                 OpExpr     *expr = (OpExpr *) node;
291
292                                 APP_JUMB(expr->opno);
293                                 JumbleExpr(jstate, (Node *) expr->args);
294                         }
295                         break;
296                 case T_ScalarArrayOpExpr:
297                         {
298                                 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
299
300                                 APP_JUMB(expr->opno);
301                                 APP_JUMB(expr->useOr);
302                                 JumbleExpr(jstate, (Node *) expr->args);
303                         }
304                         break;
305                 case T_BoolExpr:
306                         {
307                                 BoolExpr   *expr = (BoolExpr *) node;
308
309                                 APP_JUMB(expr->boolop);
310                                 JumbleExpr(jstate, (Node *) expr->args);
311                         }
312                         break;
313                 case T_SubLink:
314                         {
315                                 SubLink    *sublink = (SubLink *) node;
316
317                                 APP_JUMB(sublink->subLinkType);
318                                 APP_JUMB(sublink->subLinkId);
319                                 JumbleExpr(jstate, (Node *) sublink->testexpr);
320                                 JumbleQuery(jstate, castNode(Query, sublink->subselect));
321                         }
322                         break;
323                 case T_FieldSelect:
324                         {
325                                 FieldSelect *fs = (FieldSelect *) node;
326
327                                 APP_JUMB(fs->fieldnum);
328                                 JumbleExpr(jstate, (Node *) fs->arg);
329                         }
330                         break;
331                 case T_FieldStore:
332                         {
333                                 FieldStore *fstore = (FieldStore *) node;
334
335                                 JumbleExpr(jstate, (Node *) fstore->arg);
336                                 JumbleExpr(jstate, (Node *) fstore->newvals);
337                         }
338                         break;
339                 case T_RelabelType:
340                         {
341                                 RelabelType *rt = (RelabelType *) node;
342
343                                 APP_JUMB(rt->resulttype);
344                                 JumbleExpr(jstate, (Node *) rt->arg);
345                         }
346                         break;
347                 case T_CoerceViaIO:
348                         {
349                                 CoerceViaIO *cio = (CoerceViaIO *) node;
350
351                                 APP_JUMB(cio->resulttype);
352                                 JumbleExpr(jstate, (Node *) cio->arg);
353                         }
354                         break;
355                 case T_ArrayCoerceExpr:
356                         {
357                                 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
358
359                                 APP_JUMB(acexpr->resulttype);
360                                 JumbleExpr(jstate, (Node *) acexpr->arg);
361                                 JumbleExpr(jstate, (Node *) acexpr->elemexpr);
362                         }
363                         break;
364                 case T_ConvertRowtypeExpr:
365                         {
366                                 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
367
368                                 APP_JUMB(crexpr->resulttype);
369                                 JumbleExpr(jstate, (Node *) crexpr->arg);
370                         }
371                         break;
372                 case T_CollateExpr:
373                         {
374                                 CollateExpr *ce = (CollateExpr *) node;
375
376                                 APP_JUMB(ce->collOid);
377                                 JumbleExpr(jstate, (Node *) ce->arg);
378                         }
379                         break;
380                 case T_CaseExpr:
381                         {
382                                 CaseExpr   *caseexpr = (CaseExpr *) node;
383
384                                 JumbleExpr(jstate, (Node *) caseexpr->arg);
385                                 foreach(temp, caseexpr->args)
386                                 {
387                                         CaseWhen   *when = lfirst_node(CaseWhen, temp);
388
389                                         JumbleExpr(jstate, (Node *) when->expr);
390                                         JumbleExpr(jstate, (Node *) when->result);
391                                 }
392                                 JumbleExpr(jstate, (Node *) caseexpr->defresult);
393                         }
394                         break;
395                 case T_CaseTestExpr:
396                         {
397                                 CaseTestExpr *ct = (CaseTestExpr *) node;
398
399                                 APP_JUMB(ct->typeId);
400                         }
401                         break;
402                 case T_ArrayExpr:
403                         JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
404                         break;
405                 case T_RowExpr:
406                         JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
407                         break;
408                 case T_RowCompareExpr:
409                         {
410                                 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
411
412                                 APP_JUMB(rcexpr->rctype);
413                                 JumbleExpr(jstate, (Node *) rcexpr->largs);
414                                 JumbleExpr(jstate, (Node *) rcexpr->rargs);
415                         }
416                         break;
417                 case T_CoalesceExpr:
418                         JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
419                         break;
420                 case T_MinMaxExpr:
421                         {
422                                 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
423
424                                 APP_JUMB(mmexpr->op);
425                                 JumbleExpr(jstate, (Node *) mmexpr->args);
426                         }
427                         break;
428                 case T_SQLValueFunction:
429                         {
430                                 SQLValueFunction *svf = (SQLValueFunction *) node;
431
432                                 APP_JUMB(svf->op);
433                                 /* type is fully determined by op */
434                                 APP_JUMB(svf->typmod);
435                         }
436                         break;
437                 case T_XmlExpr:
438                         {
439                                 XmlExpr    *xexpr = (XmlExpr *) node;
440
441                                 APP_JUMB(xexpr->op);
442                                 JumbleExpr(jstate, (Node *) xexpr->named_args);
443                                 JumbleExpr(jstate, (Node *) xexpr->args);
444                         }
445                         break;
446                 case T_NullTest:
447                         {
448                                 NullTest   *nt = (NullTest *) node;
449
450                                 APP_JUMB(nt->nulltesttype);
451                                 JumbleExpr(jstate, (Node *) nt->arg);
452                         }
453                         break;
454                 case T_BooleanTest:
455                         {
456                                 BooleanTest *bt = (BooleanTest *) node;
457
458                                 APP_JUMB(bt->booltesttype);
459                                 JumbleExpr(jstate, (Node *) bt->arg);
460                         }
461                         break;
462                 case T_CoerceToDomain:
463                         {
464                                 CoerceToDomain *cd = (CoerceToDomain *) node;
465
466                                 APP_JUMB(cd->resulttype);
467                                 JumbleExpr(jstate, (Node *) cd->arg);
468                         }
469                         break;
470                 case T_CoerceToDomainValue:
471                         {
472                                 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
473
474                                 APP_JUMB(cdv->typeId);
475                         }
476                         break;
477                 case T_SetToDefault:
478                         {
479                                 SetToDefault *sd = (SetToDefault *) node;
480
481                                 APP_JUMB(sd->typeId);
482                         }
483                         break;
484                 case T_CurrentOfExpr:
485                         {
486                                 CurrentOfExpr *ce = (CurrentOfExpr *) node;
487
488                                 APP_JUMB(ce->cvarno);
489                                 if (ce->cursor_name)
490                                         APP_JUMB_STRING(ce->cursor_name);
491                                 APP_JUMB(ce->cursor_param);
492                         }
493                         break;
494                 case T_NextValueExpr:
495                         {
496                                 NextValueExpr *nve = (NextValueExpr *) node;
497
498                                 APP_JUMB(nve->seqid);
499                                 APP_JUMB(nve->typeId);
500                         }
501                         break;
502                 case T_InferenceElem:
503                         {
504                                 InferenceElem *ie = (InferenceElem *) node;
505
506                                 APP_JUMB(ie->infercollid);
507                                 APP_JUMB(ie->inferopclass);
508                                 JumbleExpr(jstate, ie->expr);
509                         }
510                         break;
511                 case T_TargetEntry:
512                         {
513                                 TargetEntry *tle = (TargetEntry *) node;
514
515                                 APP_JUMB(tle->resno);
516                                 APP_JUMB(tle->ressortgroupref);
517                                 JumbleExpr(jstate, (Node *) tle->expr);
518                         }
519                         break;
520                 case T_RangeTblRef:
521                         {
522                                 RangeTblRef *rtr = (RangeTblRef *) node;
523
524                                 APP_JUMB(rtr->rtindex);
525                         }
526                         break;
527                 case T_JoinExpr:
528                         {
529                                 JoinExpr   *join = (JoinExpr *) node;
530
531                                 APP_JUMB(join->jointype);
532                                 APP_JUMB(join->isNatural);
533                                 APP_JUMB(join->rtindex);
534                                 JumbleExpr(jstate, join->larg);
535                                 JumbleExpr(jstate, join->rarg);
536                                 JumbleExpr(jstate, join->quals);
537                         }
538                         break;
539                 case T_FromExpr:
540                         {
541                                 FromExpr   *from = (FromExpr *) node;
542
543                                 JumbleExpr(jstate, (Node *) from->fromlist);
544                                 JumbleExpr(jstate, from->quals);
545                         }
546                         break;
547                 case T_OnConflictExpr:
548                         {
549                                 OnConflictExpr *conf = (OnConflictExpr *) node;
550
551                                 APP_JUMB(conf->action);
552                                 JumbleExpr(jstate, (Node *) conf->arbiterElems);
553                                 JumbleExpr(jstate, conf->arbiterWhere);
554                                 JumbleExpr(jstate, (Node *) conf->onConflictSet);
555                                 JumbleExpr(jstate, conf->onConflictWhere);
556                                 APP_JUMB(conf->constraint);
557                                 APP_JUMB(conf->exclRelIndex);
558                                 JumbleExpr(jstate, (Node *) conf->exclRelTlist);
559                         }
560                         break;
561                 case T_List:
562                         foreach(temp, (List *) node)
563                         {
564                                 JumbleExpr(jstate, (Node *) lfirst(temp));
565                         }
566                         break;
567                 case T_IntList:
568                         foreach(temp, (List *) node)
569                         {
570                                 APP_JUMB(lfirst_int(temp));
571                         }
572                         break;
573                 case T_SortGroupClause:
574                         {
575                                 SortGroupClause *sgc = (SortGroupClause *) node;
576
577                                 APP_JUMB(sgc->tleSortGroupRef);
578                                 APP_JUMB(sgc->eqop);
579                                 APP_JUMB(sgc->sortop);
580                                 APP_JUMB(sgc->nulls_first);
581                         }
582                         break;
583                 case T_GroupingSet:
584                         {
585                                 GroupingSet *gsnode = (GroupingSet *) node;
586
587                                 JumbleExpr(jstate, (Node *) gsnode->content);
588                         }
589                         break;
590                 case T_WindowClause:
591                         {
592                                 WindowClause *wc = (WindowClause *) node;
593
594                                 APP_JUMB(wc->winref);
595                                 APP_JUMB(wc->frameOptions);
596                                 JumbleExpr(jstate, (Node *) wc->partitionClause);
597                                 JumbleExpr(jstate, (Node *) wc->orderClause);
598                                 JumbleExpr(jstate, wc->startOffset);
599                                 JumbleExpr(jstate, wc->endOffset);
600                         }
601                         break;
602                 case T_CommonTableExpr:
603                         {
604                                 CommonTableExpr *cte = (CommonTableExpr *) node;
605
606                                 /* we store the string name because RTE_CTE RTEs need it */
607                                 APP_JUMB_STRING(cte->ctename);
608                                 APP_JUMB(cte->ctematerialized);
609                                 JumbleQuery(jstate, castNode(Query, cte->ctequery));
610                         }
611                         break;
612                 case T_SetOperationStmt:
613                         {
614                                 SetOperationStmt *setop = (SetOperationStmt *) node;
615
616                                 APP_JUMB(setop->op);
617                                 APP_JUMB(setop->all);
618                                 JumbleExpr(jstate, setop->larg);
619                                 JumbleExpr(jstate, setop->rarg);
620                         }
621                         break;
622                 case T_RangeTblFunction:
623                         {
624                                 RangeTblFunction *rtfunc = (RangeTblFunction *) node;
625
626                                 JumbleExpr(jstate, rtfunc->funcexpr);
627                         }
628                         break;
629                 case T_TableFunc:
630                         {
631                                 TableFunc  *tablefunc = (TableFunc *) node;
632
633                                 JumbleExpr(jstate, tablefunc->docexpr);
634                                 JumbleExpr(jstate, tablefunc->rowexpr);
635                                 JumbleExpr(jstate, (Node *) tablefunc->colexprs);
636                         }
637                         break;
638                 case T_TableSampleClause:
639                         {
640                                 TableSampleClause *tsc = (TableSampleClause *) node;
641
642                                 APP_JUMB(tsc->tsmhandler);
643                                 JumbleExpr(jstate, (Node *) tsc->args);
644                                 JumbleExpr(jstate, (Node *) tsc->repeatable);
645                         }
646                         break;
647                 default:
648                         /* Only a warning, since we can stumble along anyway */
649                         elog(WARNING, "unrecognized node type: %d",
650                                  (int) nodeTag(node));
651                         break;
652         }
653 }
654
655 /*
656  * Record location of constant within query string of query tree
657  * that is currently being walked.
658  */
659 static void
660 RecordConstLocation(pgssJumbleState *jstate, int location)
661 {
662         /* -1 indicates unknown or undefined location */
663         if (location >= 0)
664         {
665                 /* enlarge array if needed */
666                 if (jstate->clocations_count >= jstate->clocations_buf_size)
667                 {
668                         jstate->clocations_buf_size *= 2;
669                         jstate->clocations = (pgssLocationLen *)
670                                 repalloc(jstate->clocations,
671                                                  jstate->clocations_buf_size *
672                                                  sizeof(pgssLocationLen));
673                 }
674                 jstate->clocations[jstate->clocations_count].location = location;
675                 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
676                 jstate->clocations[jstate->clocations_count].length = -1;
677                 jstate->clocations_count++;
678         }
679 }
680
681 /*
682  * Generate a normalized version of the query string that will be used to
683  * represent all similar queries.
684  *
685  * Note that the normalized representation may well vary depending on
686  * just which "equivalent" query is used to create the hashtable entry.
687  * We assume this is OK.
688  *
689  * If query_loc > 0, then "query" has been advanced by that much compared to
690  * the original string start, so we need to translate the provided locations
691  * to compensate.  (This lets us avoid re-scanning statements before the one
692  * of interest, so it's worth doing.)
693  *
694  * *query_len_p contains the input string length, and is updated with
695  * the result string length on exit.  The resulting string might be longer
696  * or shorter depending on what happens with replacement of constants.
697  *
698  * Returns a palloc'd string.
699  */
700 static char *
701 generate_normalized_query(pgssJumbleState *jstate, const char *query,
702                                                   int query_loc, int *query_len_p, int encoding)
703 {
704         char       *norm_query;
705         int                     query_len = *query_len_p;
706         int                     i,
707                                 norm_query_buflen,      /* Space allowed for norm_query */
708                                 len_to_wrt,             /* Length (in bytes) to write */
709                                 quer_loc = 0,   /* Source query byte location */
710                                 n_quer_loc = 0, /* Normalized query byte location */
711                                 last_off = 0,   /* Offset from start for previous tok */
712                                 last_tok_len = 0;       /* Length (in bytes) of that tok */
713
714         /*
715          * Get constants' lengths (core system only gives us locations).  Note
716          * this also ensures the items are sorted by location.
717          */
718         fill_in_constant_lengths(jstate, query, query_loc);
719
720         /*
721          * Allow for $n symbols to be longer than the constants they replace.
722          * Constants must take at least one byte in text form, while a $n symbol
723          * certainly isn't more than 11 bytes, even if n reaches INT_MAX.  We
724          * could refine that limit based on the max value of n for the current
725          * query, but it hardly seems worth any extra effort to do so.
726          */
727         norm_query_buflen = query_len + jstate->clocations_count * 10;
728
729         /* Allocate result buffer */
730         norm_query = palloc(norm_query_buflen + 1);
731
732         for (i = 0; i < jstate->clocations_count; i++)
733         {
734                 int                     off,            /* Offset from start for cur tok */
735                                         tok_len;        /* Length (in bytes) of that tok */
736
737                 off = jstate->clocations[i].location;
738                 /* Adjust recorded location if we're dealing with partial string */
739                 off -= query_loc;
740
741                 tok_len = jstate->clocations[i].length;
742
743                 if (tok_len < 0)
744                         continue;                       /* ignore any duplicates */
745
746                 /* Copy next chunk (what precedes the next constant) */
747                 len_to_wrt = off - last_off;
748                 len_to_wrt -= last_tok_len;
749
750                 Assert(len_to_wrt >= 0);
751                 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
752                 n_quer_loc += len_to_wrt;
753
754                 /*
755                  * PG_HINT_PLAN: DON'T TAKE IN a6f22e8356 so that the designed behavior
756                  * is kept stable.
757                  */
758                 /* And insert a '?' in place of the constant token */
759                 norm_query[n_quer_loc++] = '?';
760
761                 quer_loc = off + tok_len;
762                 last_off = off;
763                 last_tok_len = tok_len;
764         }
765
766         /*
767          * We've copied up until the last ignorable constant.  Copy over the
768          * remaining bytes of the original query string.
769          */
770         len_to_wrt = query_len - quer_loc;
771
772         Assert(len_to_wrt >= 0);
773         memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
774         n_quer_loc += len_to_wrt;
775
776         Assert(n_quer_loc <= norm_query_buflen);
777         norm_query[n_quer_loc] = '\0';
778
779         *query_len_p = n_quer_loc;
780         return norm_query;
781 }
782
783 /*
784  * Given a valid SQL string and an array of constant-location records,
785  * fill in the textual lengths of those constants.
786  *
787  * The constants may use any allowed constant syntax, such as float literals,
788  * bit-strings, single-quoted strings and dollar-quoted strings.  This is
789  * accomplished by using the public API for the core scanner.
790  *
791  * It is the caller's job to ensure that the string is a valid SQL statement
792  * with constants at the indicated locations.  Since in practice the string
793  * has already been parsed, and the locations that the caller provides will
794  * have originated from within the authoritative parser, this should not be
795  * a problem.
796  *
797  * Duplicate constant pointers are possible, and will have their lengths
798  * marked as '-1', so that they are later ignored.  (Actually, we assume the
799  * lengths were initialized as -1 to start with, and don't change them here.)
800  *
801  * If query_loc > 0, then "query" has been advanced by that much compared to
802  * the original string start, so we need to translate the provided locations
803  * to compensate.  (This lets us avoid re-scanning statements before the one
804  * of interest, so it's worth doing.)
805  *
806  * N.B. There is an assumption that a '-' character at a Const location begins
807  * a negative numeric constant.  This precludes there ever being another
808  * reason for a constant to start with a '-'.
809  */
810 static void
811 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
812                                                  int query_loc)
813 {
814         pgssLocationLen *locs;
815         core_yyscan_t yyscanner;
816         core_yy_extra_type yyextra;
817         core_YYSTYPE yylval;
818         YYLTYPE         yylloc;
819         int                     last_loc = -1;
820         int                     i;
821
822         /*
823          * Sort the records by location so that we can process them in order while
824          * scanning the query text.
825          */
826         if (jstate->clocations_count > 1)
827                 qsort(jstate->clocations, jstate->clocations_count,
828                           sizeof(pgssLocationLen), comp_location);
829         locs = jstate->clocations;
830
831         /* initialize the flex scanner --- should match raw_parser() */
832         yyscanner = scanner_init(query,
833                                                          &yyextra,
834                                                          &ScanKeywords,
835                                                          ScanKeywordTokens);
836
837         /* we don't want to re-emit any escape string warnings */
838         yyextra.escape_string_warning = false;
839
840         /* Search for each constant, in sequence */
841         for (i = 0; i < jstate->clocations_count; i++)
842         {
843                 int                     loc = locs[i].location;
844                 int                     tok;
845
846                 /* Adjust recorded location if we're dealing with partial string */
847                 loc -= query_loc;
848
849                 Assert(loc >= 0);
850
851                 if (loc <= last_loc)
852                         continue;                       /* Duplicate constant, ignore */
853
854                 /* Lex tokens until we find the desired constant */
855                 for (;;)
856                 {
857                         tok = core_yylex(&yylval, &yylloc, yyscanner);
858
859                         /* We should not hit end-of-string, but if we do, behave sanely */
860                         if (tok == 0)
861                                 break;                  /* out of inner for-loop */
862
863                         /*
864                          * We should find the token position exactly, but if we somehow
865                          * run past it, work with that.
866                          */
867                         if (yylloc >= loc)
868                         {
869                                 if (query[loc] == '-')
870                                 {
871                                         /*
872                                          * It's a negative value - this is the one and only case
873                                          * where we replace more than a single token.
874                                          *
875                                          * Do not compensate for the core system's special-case
876                                          * adjustment of location to that of the leading '-'
877                                          * operator in the event of a negative constant.  It is
878                                          * also useful for our purposes to start from the minus
879                                          * symbol.  In this way, queries like "select * from foo
880                                          * where bar = 1" and "select * from foo where bar = -2"
881                                          * will have identical normalized query strings.
882                                          */
883                                         tok = core_yylex(&yylval, &yylloc, yyscanner);
884                                         if (tok == 0)
885                                                 break;  /* out of inner for-loop */
886                                 }
887
888                                 /*
889                                  * We now rely on the assumption that flex has placed a zero
890                                  * byte after the text of the current token in scanbuf.
891                                  */
892                                 locs[i].length = strlen(yyextra.scanbuf + loc);
893                                 break;                  /* out of inner for-loop */
894                         }
895                 }
896
897                 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
898                 if (tok == 0)
899                         break;
900
901                 last_loc = loc;
902         }
903
904         scanner_finish(yyscanner);
905 }
906
907 /*
908  * comp_location: comparator for qsorting pgssLocationLen structs by location
909  */
910 static int
911 comp_location(const void *a, const void *b)
912 {
913         int                     l = ((const pgssLocationLen *) a)->location;
914         int                     r = ((const pgssLocationLen *) b)->location;
915
916         if (l < r)
917                 return -1;
918         else if (l > r)
919                 return +1;
920         else
921                 return 0;
922 }