1 /*-------------------------------------------------------------------------
5 * Part of pg_stat_statements.c in PostgreSQL 9.5.
7 * Copyright (c) 2008-2015, PostgreSQL Global Development Group
9 *-------------------------------------------------------------------------
15 #include "access/hash.h"
16 #include "parser/scanner.h"
18 static void AppendJumble(pgssJumbleState *jstate,
19 const unsigned char *item, Size size);
20 static void JumbleQuery(pgssJumbleState *jstate, Query *query);
21 static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
22 static void JumbleExpr(pgssJumbleState *jstate, Node *node);
23 static void RecordConstLocation(pgssJumbleState *jstate, int location);
24 static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query);
25 static int comp_location(const void *a, const void *b);
28 * AppendJumble: Append a value that is substantive in a given query to
32 AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
34 unsigned char *jumble = jstate->jumble;
35 Size jumble_len = jstate->jumble_len;
38 * Whenever the jumble buffer is full, we hash the current contents and
39 * reset the buffer to contain just that hash value, thus relying on the
40 * hash to summarize everything so far.
46 if (jumble_len >= JUMBLE_SIZE)
48 uint32 start_hash = hash_any(jumble, JUMBLE_SIZE);
50 memcpy(jumble, &start_hash, sizeof(start_hash));
51 jumble_len = sizeof(start_hash);
53 part_size = Min(size, JUMBLE_SIZE - jumble_len);
54 memcpy(jumble + jumble_len, item, part_size);
55 jumble_len += part_size;
59 jstate->jumble_len = jumble_len;
63 * Wrappers around AppendJumble to encapsulate details of serialization
64 * of individual local variable elements.
66 #define APP_JUMB(item) \
67 AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
68 #define APP_JUMB_STRING(str) \
69 AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
72 * JumbleQuery: Selectively serialize the query tree, appending significant
73 * data to the "query jumble" while ignoring nonsignificant data.
75 * Rule of thumb for what to include is that we should ignore anything not
76 * semantically significant (such as alias names) as well as anything that can
77 * be deduced from child nodes (else we'd just be double-hashing that piece
81 JumbleQuery(pgssJumbleState *jstate, Query *query)
83 Assert(IsA(query, Query));
84 Assert(query->utilityStmt == NULL);
86 APP_JUMB(query->commandType);
87 /* resultRelation is usually predictable from commandType */
88 JumbleExpr(jstate, (Node *) query->cteList);
89 JumbleRangeTable(jstate, query->rtable);
90 JumbleExpr(jstate, (Node *) query->jointree);
91 JumbleExpr(jstate, (Node *) query->targetList);
92 JumbleExpr(jstate, (Node *) query->onConflict);
93 JumbleExpr(jstate, (Node *) query->returningList);
94 JumbleExpr(jstate, (Node *) query->groupClause);
95 JumbleExpr(jstate, (Node *) query->groupingSets);
96 JumbleExpr(jstate, query->havingQual);
97 JumbleExpr(jstate, (Node *) query->windowClause);
98 JumbleExpr(jstate, (Node *) query->distinctClause);
99 JumbleExpr(jstate, (Node *) query->sortClause);
100 JumbleExpr(jstate, query->limitOffset);
101 JumbleExpr(jstate, query->limitCount);
102 /* we ignore rowMarks */
103 JumbleExpr(jstate, query->setOperations);
107 * Jumble a range table
110 JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
116 RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
118 Assert(IsA(rte, RangeTblEntry));
119 APP_JUMB(rte->rtekind);
120 switch (rte->rtekind)
123 APP_JUMB(rte->relid);
124 JumbleExpr(jstate, (Node *) rte->tablesample);
127 JumbleQuery(jstate, rte->subquery);
130 APP_JUMB(rte->jointype);
133 JumbleExpr(jstate, (Node *) rte->functions);
136 JumbleExpr(jstate, (Node *) rte->values_lists);
141 * Depending on the CTE name here isn't ideal, but it's the
142 * only info we have to identify the referenced WITH item.
144 APP_JUMB_STRING(rte->ctename);
145 APP_JUMB(rte->ctelevelsup);
148 elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
155 * Jumble an expression tree
157 * In general this function should handle all the same node types that
158 * expression_tree_walker() does, and therefore it's coded to be as parallel
159 * to that function as possible. However, since we are only invoked on
160 * queries immediately post-parse-analysis, we need not handle node types
161 * that only appear in planning.
163 * Note: the reason we don't simply use expression_tree_walker() is that the
164 * point of that function is to support tree walkers that don't care about
165 * most tree node types, but here we care about all types. We should complain
166 * about any unrecognized node type.
169 JumbleExpr(pgssJumbleState *jstate, Node *node)
176 /* Guard against stack overflow due to overly complex expressions */
180 * We always emit the node's NodeTag, then any additional fields that are
181 * considered significant, and then we recurse to any child nodes.
183 APP_JUMB(node->type);
185 switch (nodeTag(node))
189 Var *var = (Var *) node;
191 APP_JUMB(var->varno);
192 APP_JUMB(var->varattno);
193 APP_JUMB(var->varlevelsup);
198 Const *c = (Const *) node;
200 /* We jumble only the constant's type, not its value */
201 APP_JUMB(c->consttype);
202 /* Also, record its parse location for query normalization */
203 RecordConstLocation(jstate, c->location);
208 Param *p = (Param *) node;
210 APP_JUMB(p->paramkind);
211 APP_JUMB(p->paramid);
212 APP_JUMB(p->paramtype);
217 Aggref *expr = (Aggref *) node;
219 APP_JUMB(expr->aggfnoid);
220 JumbleExpr(jstate, (Node *) expr->aggdirectargs);
221 JumbleExpr(jstate, (Node *) expr->args);
222 JumbleExpr(jstate, (Node *) expr->aggorder);
223 JumbleExpr(jstate, (Node *) expr->aggdistinct);
224 JumbleExpr(jstate, (Node *) expr->aggfilter);
229 GroupingFunc *grpnode = (GroupingFunc *) node;
231 JumbleExpr(jstate, (Node *) grpnode->refs);
236 WindowFunc *expr = (WindowFunc *) node;
238 APP_JUMB(expr->winfnoid);
239 APP_JUMB(expr->winref);
240 JumbleExpr(jstate, (Node *) expr->args);
241 JumbleExpr(jstate, (Node *) expr->aggfilter);
246 ArrayRef *aref = (ArrayRef *) node;
248 JumbleExpr(jstate, (Node *) aref->refupperindexpr);
249 JumbleExpr(jstate, (Node *) aref->reflowerindexpr);
250 JumbleExpr(jstate, (Node *) aref->refexpr);
251 JumbleExpr(jstate, (Node *) aref->refassgnexpr);
256 FuncExpr *expr = (FuncExpr *) node;
258 APP_JUMB(expr->funcid);
259 JumbleExpr(jstate, (Node *) expr->args);
264 NamedArgExpr *nae = (NamedArgExpr *) node;
266 APP_JUMB(nae->argnumber);
267 JumbleExpr(jstate, (Node *) nae->arg);
271 case T_DistinctExpr: /* struct-equivalent to OpExpr */
272 case T_NullIfExpr: /* struct-equivalent to OpExpr */
274 OpExpr *expr = (OpExpr *) node;
276 APP_JUMB(expr->opno);
277 JumbleExpr(jstate, (Node *) expr->args);
280 case T_ScalarArrayOpExpr:
282 ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
284 APP_JUMB(expr->opno);
285 APP_JUMB(expr->useOr);
286 JumbleExpr(jstate, (Node *) expr->args);
291 BoolExpr *expr = (BoolExpr *) node;
293 APP_JUMB(expr->boolop);
294 JumbleExpr(jstate, (Node *) expr->args);
299 SubLink *sublink = (SubLink *) node;
301 APP_JUMB(sublink->subLinkType);
302 APP_JUMB(sublink->subLinkId);
303 JumbleExpr(jstate, (Node *) sublink->testexpr);
304 JumbleQuery(jstate, (Query *) sublink->subselect);
309 FieldSelect *fs = (FieldSelect *) node;
311 APP_JUMB(fs->fieldnum);
312 JumbleExpr(jstate, (Node *) fs->arg);
317 FieldStore *fstore = (FieldStore *) node;
319 JumbleExpr(jstate, (Node *) fstore->arg);
320 JumbleExpr(jstate, (Node *) fstore->newvals);
325 RelabelType *rt = (RelabelType *) node;
327 APP_JUMB(rt->resulttype);
328 JumbleExpr(jstate, (Node *) rt->arg);
333 CoerceViaIO *cio = (CoerceViaIO *) node;
335 APP_JUMB(cio->resulttype);
336 JumbleExpr(jstate, (Node *) cio->arg);
339 case T_ArrayCoerceExpr:
341 ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
343 APP_JUMB(acexpr->resulttype);
344 JumbleExpr(jstate, (Node *) acexpr->arg);
347 case T_ConvertRowtypeExpr:
349 ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
351 APP_JUMB(crexpr->resulttype);
352 JumbleExpr(jstate, (Node *) crexpr->arg);
357 CollateExpr *ce = (CollateExpr *) node;
359 APP_JUMB(ce->collOid);
360 JumbleExpr(jstate, (Node *) ce->arg);
365 CaseExpr *caseexpr = (CaseExpr *) node;
367 JumbleExpr(jstate, (Node *) caseexpr->arg);
368 foreach(temp, caseexpr->args)
370 CaseWhen *when = (CaseWhen *) lfirst(temp);
372 Assert(IsA(when, CaseWhen));
373 JumbleExpr(jstate, (Node *) when->expr);
374 JumbleExpr(jstate, (Node *) when->result);
376 JumbleExpr(jstate, (Node *) caseexpr->defresult);
381 CaseTestExpr *ct = (CaseTestExpr *) node;
383 APP_JUMB(ct->typeId);
387 JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
390 JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
392 case T_RowCompareExpr:
394 RowCompareExpr *rcexpr = (RowCompareExpr *) node;
396 APP_JUMB(rcexpr->rctype);
397 JumbleExpr(jstate, (Node *) rcexpr->largs);
398 JumbleExpr(jstate, (Node *) rcexpr->rargs);
402 JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
406 MinMaxExpr *mmexpr = (MinMaxExpr *) node;
408 APP_JUMB(mmexpr->op);
409 JumbleExpr(jstate, (Node *) mmexpr->args);
414 XmlExpr *xexpr = (XmlExpr *) node;
417 JumbleExpr(jstate, (Node *) xexpr->named_args);
418 JumbleExpr(jstate, (Node *) xexpr->args);
423 NullTest *nt = (NullTest *) node;
425 APP_JUMB(nt->nulltesttype);
426 JumbleExpr(jstate, (Node *) nt->arg);
431 BooleanTest *bt = (BooleanTest *) node;
433 APP_JUMB(bt->booltesttype);
434 JumbleExpr(jstate, (Node *) bt->arg);
437 case T_CoerceToDomain:
439 CoerceToDomain *cd = (CoerceToDomain *) node;
441 APP_JUMB(cd->resulttype);
442 JumbleExpr(jstate, (Node *) cd->arg);
445 case T_CoerceToDomainValue:
447 CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
449 APP_JUMB(cdv->typeId);
454 SetToDefault *sd = (SetToDefault *) node;
456 APP_JUMB(sd->typeId);
459 case T_CurrentOfExpr:
461 CurrentOfExpr *ce = (CurrentOfExpr *) node;
463 APP_JUMB(ce->cvarno);
465 APP_JUMB_STRING(ce->cursor_name);
466 APP_JUMB(ce->cursor_param);
469 case T_InferenceElem:
471 InferenceElem *ie = (InferenceElem *) node;
473 APP_JUMB(ie->infercollid);
474 APP_JUMB(ie->inferopclass);
475 JumbleExpr(jstate, ie->expr);
480 TargetEntry *tle = (TargetEntry *) node;
482 APP_JUMB(tle->resno);
483 APP_JUMB(tle->ressortgroupref);
484 JumbleExpr(jstate, (Node *) tle->expr);
489 RangeTblRef *rtr = (RangeTblRef *) node;
491 APP_JUMB(rtr->rtindex);
496 JoinExpr *join = (JoinExpr *) node;
498 APP_JUMB(join->jointype);
499 APP_JUMB(join->isNatural);
500 APP_JUMB(join->rtindex);
501 JumbleExpr(jstate, join->larg);
502 JumbleExpr(jstate, join->rarg);
503 JumbleExpr(jstate, join->quals);
508 FromExpr *from = (FromExpr *) node;
510 JumbleExpr(jstate, (Node *) from->fromlist);
511 JumbleExpr(jstate, from->quals);
514 case T_OnConflictExpr:
516 OnConflictExpr *conf = (OnConflictExpr *) node;
518 APP_JUMB(conf->action);
519 JumbleExpr(jstate, (Node *) conf->arbiterElems);
520 JumbleExpr(jstate, conf->arbiterWhere);
521 JumbleExpr(jstate, (Node *) conf->onConflictSet);
522 JumbleExpr(jstate, conf->onConflictWhere);
523 APP_JUMB(conf->constraint);
524 APP_JUMB(conf->exclRelIndex);
525 JumbleExpr(jstate, (Node *) conf->exclRelTlist);
529 foreach(temp, (List *) node)
531 JumbleExpr(jstate, (Node *) lfirst(temp));
535 foreach(temp, (List *) node)
537 APP_JUMB(lfirst_int(temp));
540 case T_SortGroupClause:
542 SortGroupClause *sgc = (SortGroupClause *) node;
544 APP_JUMB(sgc->tleSortGroupRef);
546 APP_JUMB(sgc->sortop);
547 APP_JUMB(sgc->nulls_first);
552 GroupingSet *gsnode = (GroupingSet *) node;
554 JumbleExpr(jstate, (Node *) gsnode->content);
559 WindowClause *wc = (WindowClause *) node;
561 APP_JUMB(wc->winref);
562 APP_JUMB(wc->frameOptions);
563 JumbleExpr(jstate, (Node *) wc->partitionClause);
564 JumbleExpr(jstate, (Node *) wc->orderClause);
565 JumbleExpr(jstate, wc->startOffset);
566 JumbleExpr(jstate, wc->endOffset);
569 case T_CommonTableExpr:
571 CommonTableExpr *cte = (CommonTableExpr *) node;
573 /* we store the string name because RTE_CTE RTEs need it */
574 APP_JUMB_STRING(cte->ctename);
575 JumbleQuery(jstate, (Query *) cte->ctequery);
578 case T_SetOperationStmt:
580 SetOperationStmt *setop = (SetOperationStmt *) node;
583 APP_JUMB(setop->all);
584 JumbleExpr(jstate, setop->larg);
585 JumbleExpr(jstate, setop->rarg);
588 case T_RangeTblFunction:
590 RangeTblFunction *rtfunc = (RangeTblFunction *) node;
592 JumbleExpr(jstate, rtfunc->funcexpr);
595 case T_TableSampleClause:
597 TableSampleClause *tsc = (TableSampleClause *) node;
599 APP_JUMB(tsc->tsmhandler);
600 JumbleExpr(jstate, (Node *) tsc->args);
601 JumbleExpr(jstate, (Node *) tsc->repeatable);
605 /* Only a warning, since we can stumble along anyway */
606 elog(WARNING, "unrecognized node type: %d",
607 (int) nodeTag(node));
613 * Record location of constant within query string of query tree
614 * that is currently being walked.
617 RecordConstLocation(pgssJumbleState *jstate, int location)
619 /* -1 indicates unknown or undefined location */
622 /* enlarge array if needed */
623 if (jstate->clocations_count >= jstate->clocations_buf_size)
625 jstate->clocations_buf_size *= 2;
626 jstate->clocations = (pgssLocationLen *)
627 repalloc(jstate->clocations,
628 jstate->clocations_buf_size *
629 sizeof(pgssLocationLen));
631 jstate->clocations[jstate->clocations_count].location = location;
632 /* initialize lengths to -1 to simplify fill_in_constant_lengths */
633 jstate->clocations[jstate->clocations_count].length = -1;
634 jstate->clocations_count++;
639 * Generate a normalized version of the query string that will be used to
640 * represent all similar queries.
642 * Note that the normalized representation may well vary depending on
643 * just which "equivalent" query is used to create the hashtable entry.
644 * We assume this is OK.
646 * *query_len_p contains the input string length, and is updated with
647 * the result string length (which cannot be longer) on exit.
649 * Returns a palloc'd string.
652 generate_normalized_query(pgssJumbleState *jstate, const char *query,
653 int *query_len_p, int encoding)
656 int query_len = *query_len_p;
658 len_to_wrt, /* Length (in bytes) to write */
659 quer_loc = 0, /* Source query byte location */
660 n_quer_loc = 0, /* Normalized query byte location */
661 last_off = 0, /* Offset from start for previous tok */
662 last_tok_len = 0; /* Length (in bytes) of that tok */
665 * Get constants' lengths (core system only gives us locations). Note
666 * this also ensures the items are sorted by location.
668 fill_in_constant_lengths(jstate, query);
670 /* Allocate result buffer */
671 norm_query = palloc(query_len + 1);
673 for (i = 0; i < jstate->clocations_count; i++)
675 int off, /* Offset from start for cur tok */
676 tok_len; /* Length (in bytes) of that tok */
678 off = jstate->clocations[i].location;
679 tok_len = jstate->clocations[i].length;
682 continue; /* ignore any duplicates */
684 /* Copy next chunk (what precedes the next constant) */
685 len_to_wrt = off - last_off;
686 len_to_wrt -= last_tok_len;
688 Assert(len_to_wrt >= 0);
689 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
690 n_quer_loc += len_to_wrt;
692 /* And insert a '?' in place of the constant token */
693 norm_query[n_quer_loc++] = '?';
695 quer_loc = off + tok_len;
697 last_tok_len = tok_len;
701 * We've copied up until the last ignorable constant. Copy over the
702 * remaining bytes of the original query string.
704 len_to_wrt = query_len - quer_loc;
706 Assert(len_to_wrt >= 0);
707 memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
708 n_quer_loc += len_to_wrt;
710 Assert(n_quer_loc <= query_len);
711 norm_query[n_quer_loc] = '\0';
713 *query_len_p = n_quer_loc;
718 * Given a valid SQL string and an array of constant-location records,
719 * fill in the textual lengths of those constants.
721 * The constants may use any allowed constant syntax, such as float literals,
722 * bit-strings, single-quoted strings and dollar-quoted strings. This is
723 * accomplished by using the public API for the core scanner.
725 * It is the caller's job to ensure that the string is a valid SQL statement
726 * with constants at the indicated locations. Since in practice the string
727 * has already been parsed, and the locations that the caller provides will
728 * have originated from within the authoritative parser, this should not be
731 * Duplicate constant pointers are possible, and will have their lengths
732 * marked as '-1', so that they are later ignored. (Actually, we assume the
733 * lengths were initialized as -1 to start with, and don't change them here.)
735 * N.B. There is an assumption that a '-' character at a Const location begins
736 * a negative numeric constant. This precludes there ever being another
737 * reason for a constant to start with a '-'.
740 fill_in_constant_lengths(pgssJumbleState *jstate, const char *query)
742 pgssLocationLen *locs;
743 core_yyscan_t yyscanner;
744 core_yy_extra_type yyextra;
751 * Sort the records by location so that we can process them in order while
752 * scanning the query text.
754 if (jstate->clocations_count > 1)
755 qsort(jstate->clocations, jstate->clocations_count,
756 sizeof(pgssLocationLen), comp_location);
757 locs = jstate->clocations;
759 /* initialize the flex scanner --- should match raw_parser() */
760 yyscanner = scanner_init(query,
765 /* we don't want to re-emit any escape string warnings */
766 yyextra.escape_string_warning = false;
768 /* Search for each constant, in sequence */
769 for (i = 0; i < jstate->clocations_count; i++)
771 int loc = locs[i].location;
777 continue; /* Duplicate constant, ignore */
779 /* Lex tokens until we find the desired constant */
782 tok = core_yylex(&yylval, &yylloc, yyscanner);
784 /* We should not hit end-of-string, but if we do, behave sanely */
786 break; /* out of inner for-loop */
789 * We should find the token position exactly, but if we somehow
790 * run past it, work with that.
794 if (query[loc] == '-')
797 * It's a negative value - this is the one and only case
798 * where we replace more than a single token.
800 * Do not compensate for the core system's special-case
801 * adjustment of location to that of the leading '-'
802 * operator in the event of a negative constant. It is
803 * also useful for our purposes to start from the minus
804 * symbol. In this way, queries like "select * from foo
805 * where bar = 1" and "select * from foo where bar = -2"
806 * will have identical normalized query strings.
808 tok = core_yylex(&yylval, &yylloc, yyscanner);
810 break; /* out of inner for-loop */
814 * We now rely on the assumption that flex has placed a zero
815 * byte after the text of the current token in scanbuf.
817 locs[i].length = strlen(yyextra.scanbuf + loc);
818 break; /* out of inner for-loop */
822 /* If we hit end-of-string, give up, leaving remaining lengths -1 */
829 scanner_finish(yyscanner);
833 * comp_location: comparator for qsorting pgssLocationLen structs by location
836 comp_location(const void *a, const void *b)
838 int l = ((const pgssLocationLen *) a)->location;
839 int r = ((const pgssLocationLen *) b)->location;