1 /*-------------------------------------------------------------------------
5 * Part of pg_stat_statements.c in PostgreSQL 9.6.
7 * Copyright (c) 2008-2016, PostgreSQL Global Development Group
9 *-------------------------------------------------------------------------
13 #include "access/hash.h"
14 #include "parser/scanner.h"
16 static void AppendJumble(pgssJumbleState *jstate,
17 const unsigned char *item, Size size);
18 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
19 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
20 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
21 static void RecordConstLocation(pgssJumbleState *jstate, int location);
22 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
23 static int comp_location(const void *a, const void *b);
26 * AppendJumble: Append a value that is substantive in a given query to
30 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
32 unsigned char *jumble = jstate->jumble;
33 Size jumble_len = jstate->jumble_len;
36 * Whenever the jumble buffer is full, we hash the current contents and
37 * reset the buffer to contain just that hash value, thus relying on the
38 * hash to summarize everything so far.
44 if (jumble_len >= JUMBLE_SIZE)
46 uint32 start_hash = hash_any(jumble, JUMBLE_SIZE);
48 memcpy(jumble, &start_hash, sizeof(start_hash));
49 jumble_len = sizeof(start_hash);
51 part_size = Min(size, JUMBLE_SIZE - jumble_len);
52 memcpy(jumble + jumble_len, item, part_size);
53 jumble_len += part_size;
57 jstate->jumble_len = jumble_len;
61 * Wrappers around AppendJumble to encapsulate details of serialization
62 * of individual local variable elements.
64 #define APP_JUMB(item) \
65 AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
66 #define APP_JUMB_STRING(str) \
67 AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
70 * JumbleQuery: Selectively serialize the query tree, appending significant
71 * data to the "query jumble" while ignoring nonsignificant data.
73 * Rule of thumb for what to include is that we should ignore anything not
74 * semantically significant (such as alias names) as well as anything that can
75 * be deduced from child nodes (else we'd just be double-hashing that piece
79 JumbleQuery(pgssJumbleState *jstate, Query *query)
81 Assert(IsA(query, Query));
82 Assert(query->utilityStmt == NULL);
84 APP_JUMB(query->commandType);
85 /* resultRelation is usually predictable from commandType */
86 JumbleExpr(jstate, (Node *) query->cteList);
87 JumbleRangeTable(jstate, query->rtable);
88 JumbleExpr(jstate, (Node *) query->jointree);
89 JumbleExpr(jstate, (Node *) query->targetList);
90 JumbleExpr(jstate, (Node *) query->onConflict);
91 JumbleExpr(jstate, (Node *) query->returningList);
92 JumbleExpr(jstate, (Node *) query->groupClause);
93 JumbleExpr(jstate, (Node *) query->groupingSets);
94 JumbleExpr(jstate, query->havingQual);
95 JumbleExpr(jstate, (Node *) query->windowClause);
96 JumbleExpr(jstate, (Node *) query->distinctClause);
97 JumbleExpr(jstate, (Node *) query->sortClause);
98 JumbleExpr(jstate, query->limitOffset);
99 JumbleExpr(jstate, query->limitCount);
100 /* we ignore rowMarks */
101 JumbleExpr(jstate, query->setOperations);
105 * Jumble a range table
108 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
114 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
116 Assert(IsA(rte, RangeTblEntry));
117 APP_JUMB(rte->rtekind);
118 switch (rte->rtekind)
121 APP_JUMB(rte->relid);
122 JumbleExpr(jstate, (Node *) rte->tablesample);
125 JumbleQuery(jstate, rte->subquery);
128 APP_JUMB(rte->jointype);
131 JumbleExpr(jstate, (Node *) rte->functions);
134 JumbleExpr(jstate, (Node *) rte->values_lists);
139 * Depending on the CTE name here isn't ideal, but it's the
140 * only info we have to identify the referenced WITH item.
142 APP_JUMB_STRING(rte->ctename);
143 APP_JUMB(rte->ctelevelsup);
146 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
153 * Jumble an expression tree
155 * In general this function should handle all the same node types that
156 * expression_tree_walker() does, and therefore it's coded to be as parallel
157 * to that function as possible. However, since we are only invoked on
158 * queries immediately post-parse-analysis, we need not handle node types
159 * that only appear in planning.
161 * Note: the reason we don't simply use expression_tree_walker() is that the
162 * point of that function is to support tree walkers that don't care about
163 * most tree node types, but here we care about all types. We should complain
164 * about any unrecognized node type.
167 JumbleExpr(pgssJumbleState *jstate, Node *node)
174 /* Guard against stack overflow due to overly complex expressions */
178 * We always emit the node's NodeTag, then any additional fields that are
179 * considered significant, and then we recurse to any child nodes.
181 APP_JUMB(node->type);
183 switch (nodeTag(node))
187 Var *var = (Var *) node;
189 APP_JUMB(var->varno);
190 APP_JUMB(var->varattno);
191 APP_JUMB(var->varlevelsup);
196 Const *c = (Const *) node;
198 /* We jumble only the constant's type, not its value */
199 APP_JUMB(c->consttype);
200 /* Also, record its parse location for query normalization */
201 RecordConstLocation(jstate, c->location);
206 Param *p = (Param *) node;
208 APP_JUMB(p->paramkind);
209 APP_JUMB(p->paramid);
210 APP_JUMB(p->paramtype);
215 Aggref *expr = (Aggref *) node;
217 APP_JUMB(expr->aggfnoid);
218 JumbleExpr(jstate, (Node *) expr->aggdirectargs);
219 JumbleExpr(jstate, (Node *) expr->args);
220 JumbleExpr(jstate, (Node *) expr->aggorder);
221 JumbleExpr(jstate, (Node *) expr->aggdistinct);
222 JumbleExpr(jstate, (Node *) expr->aggfilter);
227 GroupingFunc *grpnode = (GroupingFunc *) node;
229 JumbleExpr(jstate, (Node *) grpnode->refs);
234 WindowFunc *expr = (WindowFunc *) node;
236 APP_JUMB(expr->winfnoid);
237 APP_JUMB(expr->winref);
238 JumbleExpr(jstate, (Node *) expr->args);
239 JumbleExpr(jstate, (Node *) expr->aggfilter);
244 ArrayRef *aref = (ArrayRef *) node;
246 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
247 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
248 JumbleExpr(jstate, (Node *) aref->refexpr);
249 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
254 FuncExpr *expr = (FuncExpr *) node;
256 APP_JUMB(expr->funcid);
257 JumbleExpr(jstate, (Node *) expr->args);
262 NamedArgExpr *nae = (NamedArgExpr *) node;
264 APP_JUMB(nae->argnumber);
265 JumbleExpr(jstate, (Node *) nae->arg);
269 case T_DistinctExpr: /* struct-equivalent to OpExpr */
270 case T_NullIfExpr: /* struct-equivalent to OpExpr */
272 OpExpr *expr = (OpExpr *) node;
274 APP_JUMB(expr->opno);
275 JumbleExpr(jstate, (Node *) expr->args);
278 case T_ScalarArrayOpExpr:
280 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
282 APP_JUMB(expr->opno);
283 APP_JUMB(expr->useOr);
284 JumbleExpr(jstate, (Node *) expr->args);
289 BoolExpr *expr = (BoolExpr *) node;
291 APP_JUMB(expr->boolop);
292 JumbleExpr(jstate, (Node *) expr->args);
297 SubLink *sublink = (SubLink *) node;
299 APP_JUMB(sublink->subLinkType);
300 APP_JUMB(sublink->subLinkId);
301 JumbleExpr(jstate, (Node *) sublink->testexpr);
302 JumbleQuery(jstate, (Query *) sublink->subselect);
307 FieldSelect *fs = (FieldSelect *) node;
309 APP_JUMB(fs->fieldnum);
310 JumbleExpr(jstate, (Node *) fs->arg);
315 FieldStore *fstore = (FieldStore *) node;
317 JumbleExpr(jstate, (Node *) fstore->arg);
318 JumbleExpr(jstate, (Node *) fstore->newvals);
323 RelabelType *rt = (RelabelType *) node;
325 APP_JUMB(rt->resulttype);
326 JumbleExpr(jstate, (Node *) rt->arg);
331 CoerceViaIO *cio = (CoerceViaIO *) node;
333 APP_JUMB(cio->resulttype);
334 JumbleExpr(jstate, (Node *) cio->arg);
337 case T_ArrayCoerceExpr:
339 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
341 APP_JUMB(acexpr->resulttype);
342 JumbleExpr(jstate, (Node *) acexpr->arg);
345 case T_ConvertRowtypeExpr:
347 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
349 APP_JUMB(crexpr->resulttype);
350 JumbleExpr(jstate, (Node *) crexpr->arg);
355 CollateExpr *ce = (CollateExpr *) node;
357 APP_JUMB(ce->collOid);
358 JumbleExpr(jstate, (Node *) ce->arg);
363 CaseExpr *caseexpr = (CaseExpr *) node;
365 JumbleExpr(jstate, (Node *) caseexpr->arg);
366 foreach(temp, caseexpr->args)
368 CaseWhen *when = (CaseWhen *) lfirst(temp);
370 Assert(IsA(when, CaseWhen));
371 JumbleExpr(jstate, (Node *) when->expr);
372 JumbleExpr(jstate, (Node *) when->result);
374 JumbleExpr(jstate, (Node *) caseexpr->defresult);
379 CaseTestExpr *ct = (CaseTestExpr *) node;
381 APP_JUMB(ct->typeId);
385 JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
388 JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
390 case T_RowCompareExpr:
392 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
394 APP_JUMB(rcexpr->rctype);
395 JumbleExpr(jstate, (Node *) rcexpr->largs);
396 JumbleExpr(jstate, (Node *) rcexpr->rargs);
400 JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
404 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
406 APP_JUMB(mmexpr->op);
407 JumbleExpr(jstate, (Node *) mmexpr->args);
412 XmlExpr *xexpr = (XmlExpr *) node;
415 JumbleExpr(jstate, (Node *) xexpr->named_args);
416 JumbleExpr(jstate, (Node *) xexpr->args);
421 NullTest *nt = (NullTest *) node;
423 APP_JUMB(nt->nulltesttype);
424 JumbleExpr(jstate, (Node *) nt->arg);
429 BooleanTest *bt = (BooleanTest *) node;
431 APP_JUMB(bt->booltesttype);
432 JumbleExpr(jstate, (Node *) bt->arg);
435 case T_CoerceToDomain:
437 CoerceToDomain *cd = (CoerceToDomain *) node;
439 APP_JUMB(cd->resulttype);
440 JumbleExpr(jstate, (Node *) cd->arg);
443 case T_CoerceToDomainValue:
445 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
447 APP_JUMB(cdv->typeId);
452 SetToDefault *sd = (SetToDefault *) node;
454 APP_JUMB(sd->typeId);
457 case T_CurrentOfExpr:
459 CurrentOfExpr *ce = (CurrentOfExpr *) node;
461 APP_JUMB(ce->cvarno);
463 APP_JUMB_STRING(ce->cursor_name);
464 APP_JUMB(ce->cursor_param);
467 case T_InferenceElem:
469 InferenceElem *ie = (InferenceElem *) node;
471 APP_JUMB(ie->infercollid);
472 APP_JUMB(ie->inferopclass);
473 JumbleExpr(jstate, ie->expr);
478 TargetEntry *tle = (TargetEntry *) node;
480 APP_JUMB(tle->resno);
481 APP_JUMB(tle->ressortgroupref);
482 JumbleExpr(jstate, (Node *) tle->expr);
487 RangeTblRef *rtr = (RangeTblRef *) node;
489 APP_JUMB(rtr->rtindex);
494 JoinExpr *join = (JoinExpr *) node;
496 APP_JUMB(join->jointype);
497 APP_JUMB(join->isNatural);
498 APP_JUMB(join->rtindex);
499 JumbleExpr(jstate, join->larg);
500 JumbleExpr(jstate, join->rarg);
501 JumbleExpr(jstate, join->quals);
506 FromExpr *from = (FromExpr *) node;
508 JumbleExpr(jstate, (Node *) from->fromlist);
509 JumbleExpr(jstate, from->quals);
512 case T_OnConflictExpr:
514 OnConflictExpr *conf = (OnConflictExpr *) node;
516 APP_JUMB(conf->action);
517 JumbleExpr(jstate, (Node *) conf->arbiterElems);
518 JumbleExpr(jstate, conf->arbiterWhere);
519 JumbleExpr(jstate, (Node *) conf->onConflictSet);
520 JumbleExpr(jstate, conf->onConflictWhere);
521 APP_JUMB(conf->constraint);
522 APP_JUMB(conf->exclRelIndex);
523 JumbleExpr(jstate, (Node *) conf->exclRelTlist);
527 foreach(temp, (List *) node)
529 JumbleExpr(jstate, (Node *) lfirst(temp));
533 foreach(temp, (List *) node)
535 APP_JUMB(lfirst_int(temp));
538 case T_SortGroupClause:
540 SortGroupClause *sgc = (SortGroupClause *) node;
542 APP_JUMB(sgc->tleSortGroupRef);
544 APP_JUMB(sgc->sortop);
545 APP_JUMB(sgc->nulls_first);
550 GroupingSet *gsnode = (GroupingSet *) node;
552 JumbleExpr(jstate, (Node *) gsnode->content);
557 WindowClause *wc = (WindowClause *) node;
559 APP_JUMB(wc->winref);
560 APP_JUMB(wc->frameOptions);
561 JumbleExpr(jstate, (Node *) wc->partitionClause);
562 JumbleExpr(jstate, (Node *) wc->orderClause);
563 JumbleExpr(jstate, wc->startOffset);
564 JumbleExpr(jstate, wc->endOffset);
567 case T_CommonTableExpr:
569 CommonTableExpr *cte = (CommonTableExpr *) node;
571 /* we store the string name because RTE_CTE RTEs need it */
572 APP_JUMB_STRING(cte->ctename);
573 JumbleQuery(jstate, (Query *) cte->ctequery);
576 case T_SetOperationStmt:
578 SetOperationStmt *setop = (SetOperationStmt *) node;
581 APP_JUMB(setop->all);
582 JumbleExpr(jstate, setop->larg);
583 JumbleExpr(jstate, setop->rarg);
586 case T_RangeTblFunction:
588 RangeTblFunction *rtfunc = (RangeTblFunction *) node;
590 JumbleExpr(jstate, rtfunc->funcexpr);
593 case T_TableSampleClause:
595 TableSampleClause *tsc = (TableSampleClause *) node;
597 APP_JUMB(tsc->tsmhandler);
598 JumbleExpr(jstate, (Node *) tsc->args);
599 JumbleExpr(jstate, (Node *) tsc->repeatable);
603 /* Only a warning, since we can stumble along anyway */
604 elog(WARNING, "unrecognized node type: %d",
605 (int) nodeTag(node));
611 * Record location of constant within query string of query tree
612 * that is currently being walked.
615 RecordConstLocation(pgssJumbleState *jstate, int location)
617 /* -1 indicates unknown or undefined location */
620 /* enlarge array if needed */
621 if (jstate->clocations_count >= jstate->clocations_buf_size)
623 jstate->clocations_buf_size *= 2;
624 jstate->clocations = (pgssLocationLen *)
625 repalloc(jstate->clocations,
626 jstate->clocations_buf_size *
627 sizeof(pgssLocationLen));
629 jstate->clocations[jstate->clocations_count].location = location;
630 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
631 jstate->clocations[jstate->clocations_count].length = -1;
632 jstate->clocations_count++;
637 * Generate a normalized version of the query string that will be used to
638 * represent all similar queries.
640 * Note that the normalized representation may well vary depending on
641 * just which "equivalent" query is used to create the hashtable entry.
642 * We assume this is OK.
644 * *query_len_p contains the input string length, and is updated with
645 * the result string length (which cannot be longer) on exit.
647 * Returns a palloc'd string.
650 generate_normalized_query(pgssJumbleState *jstate, const char *query,
651 int *query_len_p, int encoding)
654 int query_len = *query_len_p;
656 len_to_wrt, /* Length (in bytes) to write */
657 quer_loc = 0, /* Source query byte location */
658 n_quer_loc = 0, /* Normalized query byte location */
659 last_off = 0, /* Offset from start for previous tok */
660 last_tok_len = 0; /* Length (in bytes) of that tok */
663 * Get constants' lengths (core system only gives us locations). Note
664 * this also ensures the items are sorted by location.
666 fill_in_constant_lengths(jstate, query);
668 /* Allocate result buffer */
669 norm_query = palloc(query_len + 1);
671 for (i = 0; i < jstate->clocations_count; i++)
673 int off, /* Offset from start for cur tok */
674 tok_len; /* Length (in bytes) of that tok */
676 off = jstate->clocations[i].location;
677 tok_len = jstate->clocations[i].length;
680 continue; /* ignore any duplicates */
682 /* Copy next chunk (what precedes the next constant) */
683 len_to_wrt = off - last_off;
684 len_to_wrt -= last_tok_len;
686 Assert(len_to_wrt >= 0);
687 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
688 n_quer_loc += len_to_wrt;
690 /* And insert a '?' in place of the constant token */
691 norm_query[n_quer_loc++] = '?';
693 quer_loc = off + tok_len;
695 last_tok_len = tok_len;
699 * We've copied up until the last ignorable constant. Copy over the
700 * remaining bytes of the original query string.
702 len_to_wrt = query_len - quer_loc;
704 Assert(len_to_wrt >= 0);
705 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
706 n_quer_loc += len_to_wrt;
708 Assert(n_quer_loc <= query_len);
709 norm_query[n_quer_loc] = '\0';
711 *query_len_p = n_quer_loc;
716 * Given a valid SQL string and an array of constant-location records,
717 * fill in the textual lengths of those constants.
719 * The constants may use any allowed constant syntax, such as float literals,
720 * bit-strings, single-quoted strings and dollar-quoted strings. This is
721 * accomplished by using the public API for the core scanner.
723 * It is the caller's job to ensure that the string is a valid SQL statement
724 * with constants at the indicated locations. Since in practice the string
725 * has already been parsed, and the locations that the caller provides will
726 * have originated from within the authoritative parser, this should not be
729 * Duplicate constant pointers are possible, and will have their lengths
730 * marked as '-1', so that they are later ignored. (Actually, we assume the
731 * lengths were initialized as -1 to start with, and don't change them here.)
733 * N.B. There is an assumption that a '-' character at a Const location begins
734 * a negative numeric constant. This precludes there ever being another
735 * reason for a constant to start with a '-'.
738 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
740 pgssLocationLen *locs;
741 core_yyscan_t yyscanner;
742 core_yy_extra_type yyextra;
749 * Sort the records by location so that we can process them in order while
750 * scanning the query text.
752 if (jstate->clocations_count > 1)
753 qsort(jstate->clocations, jstate->clocations_count,
754 sizeof(pgssLocationLen), comp_location);
755 locs = jstate->clocations;
757 /* initialize the flex scanner --- should match raw_parser() */
758 yyscanner = scanner_init(query,
763 /* we don't want to re-emit any escape string warnings */
764 yyextra.escape_string_warning = false;
766 /* Search for each constant, in sequence */
767 for (i = 0; i < jstate->clocations_count; i++)
769 int loc = locs[i].location;
775 continue; /* Duplicate constant, ignore */
777 /* Lex tokens until we find the desired constant */
780 tok = core_yylex(&yylval, &yylloc, yyscanner);
782 /* We should not hit end-of-string, but if we do, behave sanely */
784 break; /* out of inner for-loop */
787 * We should find the token position exactly, but if we somehow
788 * run past it, work with that.
792 if (query[loc] == '-')
795 * It's a negative value - this is the one and only case
796 * where we replace more than a single token.
798 * Do not compensate for the core system's special-case
799 * adjustment of location to that of the leading '-'
800 * operator in the event of a negative constant. It is
801 * also useful for our purposes to start from the minus
802 * symbol. In this way, queries like "select * from foo
803 * where bar = 1" and "select * from foo where bar = -2"
804 * will have identical normalized query strings.
806 tok = core_yylex(&yylval, &yylloc, yyscanner);
808 break; /* out of inner for-loop */
812 * We now rely on the assumption that flex has placed a zero
813 * byte after the text of the current token in scanbuf.
815 locs[i].length = strlen(yyextra.scanbuf + loc);
816 break; /* out of inner for-loop */
820 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
827 scanner_finish(yyscanner);
831 * comp_location: comparator for qsorting pgssLocationLen structs by location
834 comp_location(const void *a, const void *b)
836 int l = ((const pgssLocationLen *) a)->location;
837 int r = ((const pgssLocationLen *) b)->location;