From 67becf8d41a082eaaf6db6e0860d49409b79e32b Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 1 Aug 2010 22:38:11 +0000 Subject: [PATCH] Fix ANALYZE's ancient deficiency of not trying to collect stats for expression indexes when the index column type (the opclass opckeytype) is different from the expression's datatype. When coded, this limitation wasn't worth worrying about because we had no intelligence to speak of in stats collection for the datatypes used by such opclasses. However, now that there's non-toy estimation capability for tsvector queries, it amounts to a bug that ANALYZE fails to do this. The fix changes struct VacAttrStats, and therefore constitutes an API break for custom typanalyze functions. Therefore we can't back-patch it into released branches, but it was agreed that 9.0 isn't yet frozen hard enough to make such a change unacceptable. Ergo, back-patch to 9.0 but no further. The API break had better be mentioned in 9.0 release notes. --- src/backend/commands/analyze.c | 88 ++++++++++++++++++++++++------------------ src/include/commands/vacuum.h | 19 ++++++--- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 22734b7619..9d36427ad9 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.152 2010/02/26 02:00:37 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.153 2010/08/01 22:38:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -92,7 +92,8 @@ static void compute_index_stats(Relation onerel, double totalrows, AnlIndexData *indexdata, int nindexes, HeapTuple *rows, int numrows, MemoryContext col_context); -static VacAttrStats *examine_attribute(Relation onerel, int attnum); +static VacAttrStats *examine_attribute(Relation onerel, int attnum, + Node *index_expr); static int acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows, double *totalrows, double *totaldeadrows); static double random_fract(void); @@ -339,7 +340,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, (errcode(ERRCODE_UNDEFINED_COLUMN), errmsg("column \"%s\" of relation \"%s\" does not exist", col, RelationGetRelationName(onerel)))); - vacattrstats[tcnt] = examine_attribute(onerel, i); + vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); if (vacattrstats[tcnt] != NULL) tcnt++; } @@ -353,7 +354,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, tcnt = 0; for (i = 1; i <= attr_cnt; i++) { - vacattrstats[tcnt] = examine_attribute(onerel, i); + vacattrstats[tcnt] = examine_attribute(onerel, i, NULL); if (vacattrstats[tcnt] != NULL) tcnt++; } @@ -407,21 +408,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, elog(ERROR, "too few entries in indexprs list"); indexkey = (Node *) lfirst(indexpr_item); indexpr_item = lnext(indexpr_item); - - /* - * Can't analyze if the opclass uses a storage type - * different from the expression result type. We'd get - * confused because the type shown in pg_attribute for - * the index column doesn't match what we are getting - * from the expression. Perhaps this can be fixed - * someday, but for now, punt. - */ - if (exprType(indexkey) != - Irel[ind]->rd_att->attrs[i]->atttypid) - continue; - thisdata->vacattrstats[tcnt] = - examine_attribute(Irel[ind], i + 1); + examine_attribute(Irel[ind], i + 1, indexkey); if (thisdata->vacattrstats[tcnt] != NULL) { tcnt++; @@ -802,9 +790,12 @@ compute_index_stats(Relation onerel, double totalrows, * * Determine whether the column is analyzable; if so, create and initialize * a VacAttrStats struct for it. If not, return NULL. + * + * If index_expr isn't NULL, then we're trying to analyze an expression index, + * and index_expr is the expression tree representing the column's data. */ static VacAttrStats * -examine_attribute(Relation onerel, int attnum) +examine_attribute(Relation onerel, int attnum, Node *index_expr) { Form_pg_attribute attr = onerel->rd_att->attrs[attnum - 1]; HeapTuple typtuple; @@ -827,9 +818,30 @@ examine_attribute(Relation onerel, int attnum) stats = (VacAttrStats *) palloc0(sizeof(VacAttrStats)); stats->attr = (Form_pg_attribute) palloc(ATTRIBUTE_FIXED_PART_SIZE); memcpy(stats->attr, attr, ATTRIBUTE_FIXED_PART_SIZE); - typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(attr->atttypid)); + + /* + * When analyzing an expression index, believe the expression tree's type + * not the column datatype --- the latter might be the opckeytype storage + * type of the opclass, which is not interesting for our purposes. (Note: + * if we did anything with non-expression index columns, we'd need to + * figure out where to get the correct type info from, but for now that's + * not a problem.) It's not clear whether anyone will care about the + * typmod, but we store that too just in case. + */ + if (index_expr) + { + stats->attrtypid = exprType(index_expr); + stats->attrtypmod = exprTypmod(index_expr); + } + else + { + stats->attrtypid = attr->atttypid; + stats->attrtypmod = attr->atttypmod; + } + + typtuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(stats->attrtypid)); if (!HeapTupleIsValid(typtuple)) - elog(ERROR, "cache lookup failed for type %u", attr->atttypid); + elog(ERROR, "cache lookup failed for type %u", stats->attrtypid); stats->attrtype = (Form_pg_type) palloc(sizeof(FormData_pg_type)); memcpy(stats->attrtype, GETSTRUCT(typtuple), sizeof(FormData_pg_type)); ReleaseSysCache(typtuple); @@ -838,12 +850,12 @@ examine_attribute(Relation onerel, int attnum) /* * The fields describing the stats->stavalues[n] element types default to - * the type of the field being analyzed, but the type-specific typanalyze + * the type of the data being analyzed, but the type-specific typanalyze * function can change them if it wants to store something else. */ for (i = 0; i < STATISTIC_NUM_SLOTS; i++) { - stats->statypid[i] = stats->attr->atttypid; + stats->statypid[i] = stats->attrtypid; stats->statyplen[i] = stats->attrtype->typlen; stats->statypbyval[i] = stats->attrtype->typbyval; stats->statypalign[i] = stats->attrtype->typalign; @@ -1780,7 +1792,7 @@ std_typanalyze(VacAttrStats *stats) attr->attstattarget = default_statistics_target; /* Look for default "<" and "=" operators for column's type */ - get_sort_group_operators(attr->atttypid, + get_sort_group_operators(stats->attrtypid, false, false, false, <opr, &eqopr, NULL); @@ -1860,10 +1872,10 @@ compute_minimal_stats(VacAttrStatsP stats, int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; - bool is_varlena = (!stats->attr->attbyval && - stats->attr->attlen == -1); - bool is_varwidth = (!stats->attr->attbyval && - stats->attr->attlen < 0); + bool is_varlena = (!stats->attrtype->typbyval && + stats->attrtype->typlen == -1); + bool is_varwidth = (!stats->attrtype->typbyval && + stats->attrtype->typlen < 0); FmgrInfo f_cmpeq; typedef struct { @@ -2126,8 +2138,8 @@ compute_minimal_stats(VacAttrStatsP stats, for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(track[i].value, - stats->attr->attbyval, - stats->attr->attlen); + stats->attrtype->typbyval, + stats->attrtype->typlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); @@ -2184,10 +2196,10 @@ compute_scalar_stats(VacAttrStatsP stats, int nonnull_cnt = 0; int toowide_cnt = 0; double total_width = 0; - bool is_varlena = (!stats->attr->attbyval && - stats->attr->attlen == -1); - bool is_varwidth = (!stats->attr->attbyval && - stats->attr->attlen < 0); + bool is_varlena = (!stats->attrtype->typbyval && + stats->attrtype->typlen == -1); + bool is_varwidth = (!stats->attrtype->typbyval && + stats->attrtype->typlen < 0); double corr_xysum; Oid cmpFn; int cmpFlags; @@ -2476,8 +2488,8 @@ compute_scalar_stats(VacAttrStatsP stats, for (i = 0; i < num_mcv; i++) { mcv_values[i] = datumCopy(values[track[i].first].value, - stats->attr->attbyval, - stats->attr->attlen); + stats->attrtype->typbyval, + stats->attrtype->typlen); mcv_freqs[i] = (double) track[i].count / (double) samplerows; } MemoryContextSwitchTo(old_context); @@ -2583,8 +2595,8 @@ compute_scalar_stats(VacAttrStatsP stats, for (i = 0; i < num_hist; i++) { hist_values[i] = datumCopy(values[pos].value, - stats->attr->attbyval, - stats->attr->attlen); + stats->attrtype->typbyval, + stats->attrtype->typlen); pos += delta; posfrac += deltafrac; if (posfrac >= (num_hist - 1)) diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index ef568e9dcd..7fcb402813 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.89 2010/02/09 21:43:30 tgl Exp $ + * $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.90 2010/08/01 22:38:11 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,9 +62,17 @@ typedef struct VacAttrStats /* * These fields are set up by the main ANALYZE code before invoking the * type-specific typanalyze function. + * + * Note: do not assume that the data being analyzed has the same datatype + * shown in attr, ie do not trust attr->atttypid, attlen, etc. This is + * because some index opclasses store a different type than the underlying + * column/expression. Instead use attrtypid, attrtypmod, and attrtype for + * information about the datatype being fed to the typanalyze function. */ Form_pg_attribute attr; /* copy of pg_attribute row for column */ - Form_pg_type attrtype; /* copy of pg_type row for column */ + Oid attrtypid; /* type of data being analyzed */ + int32 attrtypmod; /* typmod of data being analyzed */ + Form_pg_type attrtype; /* copy of pg_type row for attrtypid */ MemoryContext anl_context; /* where to save long-lived data */ /* @@ -95,10 +103,9 @@ typedef struct VacAttrStats /* * These fields describe the stavalues[n] element types. They will be - * initialized to be the same as the column's that's underlying the slot, - * but a custom typanalyze function might want to store an array of - * something other than the analyzed column's elements. It should then - * overwrite these fields. + * initialized to match attrtypid, but a custom typanalyze function might + * want to store an array of something other than the analyzed column's + * elements. It should then overwrite these fields. */ Oid statypid[STATISTIC_NUM_SLOTS]; int2 statyplen[STATISTIC_NUM_SLOTS]; -- 2.11.0