4 * Copyright (c) 2009-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
5 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
6 * Portions Copyright (c) 1994, Regents of the University of California
10 #include "access/transam.h"
11 #include "catalog/pg_statistic.h"
12 #include "catalog/pg_type.h"
13 #include "commands/trigger.h"
14 #include "executor/spi.h"
16 #include "optimizer/plancat.h"
17 #include "storage/bufmgr.h"
18 #include "utils/builtins.h"
19 #include "utils/guc.h"
20 #include "utils/inval.h"
21 #include "utils/lsyscache.h"
22 #include "utils/selfuncs.h"
23 #include "utils/syscache.h"
24 #if PG_VERSION_NUM >= 90200
25 #include "utils/rel.h"
27 #if PG_VERSION_NUM >= 90300
28 #include "access/htup_details.h"
29 #include "utils/catcache.h"
32 #include "pg_dbms_stats.h"
36 /* Error levels used by pg_dbms_stats */
37 #define ELEVEL_DEBUG DEBUG3 /* log level for debug information */
38 #define ELEVEL_BADSTATS LOG /* log level for invalid statistics */
40 #define MAX_REL_CACHE 50 /* expected max # of rel stats entries */
42 /* represents an element of relation statistics which is cached in hash table */
43 typedef struct StatsRelationEntry
45 Oid relid; /* hash key must be at the head */
47 bool valid; /* T if the entry has valid stats */
49 BlockNumber relpages; /* # of pages as of last ANALYZE */
50 double reltuples; /* # of tuples as of last ANALYZE */
51 BlockNumber relallvisible; /* # of all-visible pages as of last
53 BlockNumber curpages; /* # of pages as of lock/restore */
55 List *col_stats; /* list of HeapTuple, each element is
56 pg_statistic record of this relation. */
59 /* Saved hook functions */
60 get_relation_info_hook_type prev_get_relation_info = NULL;
61 get_attavgwidth_hook_type prev_get_attavgwidth = NULL;
62 get_relation_stats_hook_type prev_get_relation_stats = NULL;
63 get_index_stats_hook_type prev_get_index_stats = NULL;
65 /* rows_query(oid) RETURNS int4, float4, int4 */
66 static const char *rows_query =
67 "SELECT relpages, reltuples, curpages"
68 #if PG_VERSION_NUM >= 90200
71 " FROM dbms_stats.relation_stats_locked"
73 static SPIPlanPtr rows_plan = NULL;
75 /* tuple_query(oid, int2, bool) RETURNS pg_statistic */
76 static const char *tuple_query =
78 " FROM dbms_stats.column_stats_locked "
79 " WHERE starelid = $1 "
80 " AND staattnum = $2 "
81 " AND stainherit = $3";
82 static SPIPlanPtr tuple_plan = NULL;
85 static bool pg_dbms_stats_use_locked_stats = true;
87 /* Current nesting depth of SPI calls, used to prevent recursive calls */
88 static int nested_level = 0;
91 * The relation_stats_effective statistic cache is stored in hash table.
93 static HTAB *rel_stats;
95 #define get_pg_statistic(tuple) ((Form_pg_statistic) GETSTRUCT(tuple))
97 PG_FUNCTION_INFO_V1(dbms_stats_merge);
98 PG_FUNCTION_INFO_V1(dbms_stats_invalidate_relation_cache);
99 PG_FUNCTION_INFO_V1(dbms_stats_invalidate_column_cache);
100 PG_FUNCTION_INFO_V1(dbms_stats_is_system_schema);
101 PG_FUNCTION_INFO_V1(dbms_stats_is_system_catalog);
103 extern Datum dbms_stats_merge(PG_FUNCTION_ARGS);
104 extern Datum dbms_stats_invalidate_relation_cache(PG_FUNCTION_ARGS);
105 extern Datum dbms_stats_invalidate_column_cache(PG_FUNCTION_ARGS);
106 extern Datum dbms_stats_is_system_schema(PG_FUNCTION_ARGS);
107 extern Datum dbms_stats_is_system_catalog(PG_FUNCTION_ARGS);
109 static HeapTuple dbms_stats_merge_internal(HeapTuple lhs, HeapTuple rhs,
110 TupleDesc tupledesc);
111 static void dbms_stats_check_tg_event(FunctionCallInfo fcinfo,
112 TriggerData *trigdata, HeapTuple *invtup, HeapTuple *rettup);
113 static void dbms_stats_invalidate_cache_internal(Oid relid, bool sta_col);
115 /* Module callbacks */
119 static void dbms_stats_get_relation_info(PlannerInfo *root, Oid relid,
120 bool inhparent, RelOptInfo *rel);
121 static int32 dbms_stats_get_attavgwidth(Oid relid, AttrNumber attnum);
122 static bool dbms_stats_get_relation_stats(PlannerInfo *root, RangeTblEntry *rte,
123 AttrNumber attnum, VariableStatData *vardata);
124 static bool dbms_stats_get_index_stats(PlannerInfo *root, Oid indexOid,
125 AttrNumber indexattnum, VariableStatData *vardata);
127 static void get_merged_relation_stats(Oid relid, BlockNumber *pages,
128 double *tuples, double *allvisfrac, bool estimate);
129 static int32 get_merged_avgwidth(Oid relid, AttrNumber attnum);
130 static HeapTuple get_merged_column_stats(Oid relid, AttrNumber attnum,
132 static HeapTuple column_cache_search(Oid relid, AttrNumber attnum, bool inh);
133 static HeapTuple column_cache_enter(HeapTuple tuple);
134 static bool execute_plan(SPIPlanPtr *plan, const char *query, Oid relid,
135 const AttrNumber *attnum, bool inh);
136 static void StatsCacheRelCallback(Datum arg, Oid relid);
137 static void init_rel_stats(void);
138 static void init_rel_stats_entry(StatsRelationEntry *entry, Oid relid);
139 /* copied from PG core source tree */
140 static void dbms_stats_estimate_rel_size(Relation rel, int32 *attr_widths,
141 BlockNumber *pages, double *tuples, double *allvisfrac,
142 BlockNumber curpages);
143 static int32 dbms_stats_get_rel_data_width(Relation rel, int32 *attr_widths);
145 /* Unit test suit functions */
147 extern void test_import(int *passed, int *total);
148 extern void test_dump(int *passed, int *total);
149 extern void test_pg_dbms_stats(int *passed, int *total);
153 * Module load callback
158 /* Execute unit test cases */
164 test_import(&passed, &total);
165 test_dump(&passed, &total);
166 test_pg_dbms_stats(&passed, &total);
168 elog(WARNING, "TOTAL %d/%d passed", passed, total);
172 /* Define custom GUC variables. */
173 DefineCustomBoolVariable("pg_dbms_stats.use_locked_stats",
174 "Enable user defined statistics.",
176 &pg_dbms_stats_use_locked_stats,
184 EmitWarningsOnPlaceholders("pg_dbms_stats");
186 /* Back up old hooks, and install ours. */
187 prev_get_relation_info = get_relation_info_hook;
188 get_relation_info_hook = dbms_stats_get_relation_info;
189 prev_get_attavgwidth = get_attavgwidth_hook;
190 get_attavgwidth_hook = dbms_stats_get_attavgwidth;
191 prev_get_relation_stats = get_relation_stats_hook;
192 get_relation_stats_hook = dbms_stats_get_relation_stats;
193 prev_get_index_stats = get_index_stats_hook;
194 get_index_stats_hook = dbms_stats_get_index_stats;
196 /* Initialize hash table for statistics caching. */
199 /* Also set up a callback for relcache SI invalidations */
200 CacheRegisterRelcacheCallback(StatsCacheRelCallback, (Datum) 0);
204 * Module unload callback
209 /* Restore old hooks. */
210 get_relation_info_hook = prev_get_relation_info;
211 get_attavgwidth_hook = prev_get_attavgwidth;
212 get_relation_stats_hook = prev_get_relation_stats;
213 get_index_stats_hook = prev_get_index_stats;
215 /* A function to unregister callback for relcache is NOT provided. */
219 * Store heap tuple header into given heap tuple.
222 AssignHeapTuple(HeapTuple htup, HeapTupleHeader header)
224 htup->t_len = HeapTupleHeaderGetDatumLength(header);
225 ItemPointerSetInvalid(&htup->t_self);
226 htup->t_tableOid = InvalidOid;
227 htup->t_data = header;
232 * called by sql function 'dbms_stats.merge', and return the execution result
233 * of the function 'dbms_stats_merge_internal'.
236 dbms_stats_merge(PG_FUNCTION_ARGS)
241 HeapTuple ret = NULL;
243 /* assign HeapTuple of the left statistics data unless null. */
247 AssignHeapTuple(&lhs, PG_GETARG_HEAPTUPLEHEADER(0));
249 /* assign HeapTuple of the right statistics data unless null. */
253 AssignHeapTuple(&rhs, PG_GETARG_HEAPTUPLEHEADER(1));
255 /* fast path for one-side is null */
256 if (lhs.t_data == NULL && rhs.t_data == NULL)
259 /* build a tuple descriptor for our result type */
260 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
261 elog(ERROR, "return type must be a row type");
263 /* merge two statistics tuples into one, and return it */
264 ret = dbms_stats_merge_internal(&lhs, &rhs, tupdesc);
267 PG_RETURN_DATUM(HeapTupleGetDatum(ret));
273 * dbms_stats_merge_internal
274 * merge the dummy statistic (lhs) and the true statistic (rhs), on the basis
275 * of given TupleDesc.
277 * this function doesn't become an error level of ERROR to meet that the
278 * result of the SQL is not affected by the query plan.
281 dbms_stats_merge_internal(HeapTuple lhs, HeapTuple rhs, TupleDesc tupdesc)
283 Datum values[Natts_pg_statistic];
284 bool nulls[Natts_pg_statistic];
286 Oid atttype = InvalidOid;
290 /* fast path for both-sides are null */
291 if ((lhs == NULL || lhs->t_data == NULL) &&
292 (rhs == NULL || rhs->t_data == NULL))
295 /* fast path for one-side is null */
296 if (lhs == NULL || lhs->t_data == NULL)
298 /* use right tuple */
299 heap_deform_tuple(rhs, tupdesc, values, nulls);
300 for (i = 0; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
302 return NULL; /* check null constraints */
304 else if (rhs == NULL || rhs->t_data == NULL)
307 heap_deform_tuple(lhs, tupdesc, values, nulls);
308 for (i = 0; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
310 return NULL; /* check null constraints */
315 * If the column value of the dummy statistic is not NULL, in the
316 * statistics except the slot, use it. Otherwise we use the column
317 * value of the true statistic.
319 heap_deform_tuple(lhs, tupdesc, values, nulls);
320 for (i = 0; i < Anum_pg_statistic_stakind1 - 1; i++)
324 values[i] = fastgetattr(rhs, i + 1, tupdesc, &nulls[i]);
327 ereport(ELEVEL_BADSTATS,
328 (errmsg("pg_dbms_stats: bad statistics"),
329 errdetail("column \"%s\" should not be null",
330 get_attname(StatisticRelationId,
331 tupdesc->attrs[i]->attnum))));
332 return NULL; /* should not be null */
338 * If the column value of the dummy statistic is not all NULL, in the
339 * statistics the slot, use it. Otherwise we use the column
340 * value of the true statistic.
342 for (; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
346 for (i = Anum_pg_statistic_stakind1 - 1;
347 i < Anum_pg_statistic_stavalues1 + STATISTIC_NUM_SLOTS - 1;
350 values[i] = fastgetattr(rhs, i + 1, tupdesc, &nulls[i]);
351 if (i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1 &&
354 ereport(ELEVEL_BADSTATS,
355 (errmsg("pg_dbms_stats: bad statistics"),
356 errdetail("column \"%s\" should not be null",
357 get_attname(StatisticRelationId,
358 tupdesc->attrs[i]->attnum))));
359 return NULL; /* should not be null */
369 * Verify types to work around for ALTER COLUMN TYPE.
371 * Note: We don't need to retrieve atttype when the attribute doesn't have
372 * neither Most-Common-Value nor Histogram, but we retrieve it always
373 * because it's not usual.
375 relid = DatumGetObjectId(values[0]);
376 attnum = DatumGetInt16(values[1]);
377 atttype = get_atttype(relid, attnum);
378 if (atttype == InvalidOid)
381 (errmsg("pg_dbms_stats: not exist column"),
382 errdetail("relid \"%d\" or it's column number \"%d\" does not exist",
384 errhint("need to execute clean_up_stats()")));
387 for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
389 if ((i + 1 == STATISTIC_KIND_MCV ||
390 i + 1 == STATISTIC_KIND_HISTOGRAM) &&
391 !nulls[Anum_pg_statistic_stavalues1 + i - 1])
395 arr = DatumGetArrayTypeP(
396 values[Anum_pg_statistic_stavalues1 + i - 1]);
397 if (arr == NULL || arr->elemtype != atttype)
399 const char *attname = get_attname(relid, attnum);
402 * relid and attnum must be valid here because valid atttype
403 * has been gotten already.
406 ereport(ELEVEL_BADSTATS,
407 (errmsg("pg_dbms_stats: bad column type"),
408 errdetail("type of column \"%s\" has been changed",
410 errhint("need to execute dbms_stats.unlock('%s', '%s')",
411 get_rel_name(relid), attname)));
417 return heap_form_tuple(tupdesc, values, nulls);
421 * dbms_stats_invalidate_relation_cache
422 * Register invalidation of the specified relation's relcache.
424 * CREATE TRIGGER dbms_stats.relation_stats_locked FOR INSERT, UPDATE, DELETE FOR EACH
428 dbms_stats_invalidate_relation_cache(PG_FUNCTION_ARGS)
430 TriggerData *trigdata = (TriggerData *) fcinfo->context;
431 HeapTuple invtup; /* tuple to be invalidated */
432 HeapTuple rettup; /* tuple to be returned */
436 /* make sure it's called as a before/after trigger */
437 dbms_stats_check_tg_event(fcinfo, trigdata, &invtup, &rettup);
440 * assume that position of dbms_stats.relation_stats_locked.relid is head value of
443 value = fastgetattr(invtup, 1, trigdata->tg_relation->rd_att, &isnull);
446 * invalidate prepared statements and force re-planning with pg_dbms_stats.
448 dbms_stats_invalidate_cache_internal((Oid)value, false);
450 PG_RETURN_POINTER(rettup);
454 * dbms_stats_invalidate_column_cache
455 * Register invalidation of the specified relation's relcache.
457 * CREATE TRIGGER dbms_stats.column_stats_locked FOR INSERT, UPDATE, DELETE FOR EACH
461 dbms_stats_invalidate_column_cache(PG_FUNCTION_ARGS)
463 TriggerData *trigdata = (TriggerData *) fcinfo->context;
464 Form_pg_statistic form;
465 HeapTuple invtup; /* tuple to be invalidated */
466 HeapTuple rettup; /* tuple to be returned */
468 /* make sure it's called as a before/after trigger */
469 dbms_stats_check_tg_event(fcinfo, trigdata, &invtup, &rettup);
472 * assume that both pg_statistic and dbms_stats.column_stats_locked have the same
475 form = get_pg_statistic(invtup);
478 * invalidate prepared statements and force re-planning with pg_dbms_stats.
480 dbms_stats_invalidate_cache_internal(form->starelid, true);
482 PG_RETURN_POINTER(rettup);
486 dbms_stats_check_tg_event(FunctionCallInfo fcinfo,
487 TriggerData *trigdata,
491 /* make sure it's called as a before/after trigger */
492 if (!CALLED_AS_TRIGGER(fcinfo) ||
493 !TRIGGER_FIRED_BEFORE(trigdata->tg_event) ||
494 !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
495 elog(ERROR, "pg_dbms_stats: invalid trigger call");
497 if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
500 *rettup = *invtup = trigdata->tg_trigtuple;
502 else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event))
505 *rettup = *invtup = trigdata->tg_trigtuple;
510 *invtup = trigdata->tg_trigtuple;
511 *rettup = trigdata->tg_newtuple;
516 dbms_stats_invalidate_cache_internal(Oid relid, bool sta_col)
521 * invalidate prepared statements and force re-planning with pg_dbms_stats.
523 rel = try_relation_open(relid, NoLock);
527 rel->rd_rel->relkind == RELKIND_INDEX &&
528 (rel->rd_indextuple == NULL ||
529 heap_attisnull(rel->rd_indextuple, Anum_pg_index_indexprs)))
531 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
532 errmsg("\"%s\" is an index except an index expression",
533 RelationGetRelationName(rel))));
534 if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
536 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
537 errmsg("\"%s\" is a composite type",
538 RelationGetRelationName(rel))));
541 * We need to invalidate relcache of underlying table too, because
542 * CachedPlan mechanism decides to do re-planning when any relcache of
543 * used tables was invalid at EXECUTE.
545 if (rel->rd_rel->relkind == RELKIND_INDEX &&
546 rel->rd_index && OidIsValid(rel->rd_index->indrelid))
547 CacheInvalidateRelcacheByRelid(rel->rd_index->indrelid);
549 CacheInvalidateRelcache(rel);
550 relation_close(rel, NoLock);
555 * dbms_stats_is_system_schema
556 * called by sql function 'dbms_stats.is_system_schema', and return the
557 * result of the function 'dbms_stats_is_system_internal'.
560 dbms_stats_is_system_schema(PG_FUNCTION_ARGS)
566 arg0 = PG_GETARG_TEXT_PP(0);
567 schema_name = text_to_cstring(arg0);
568 result = dbms_stats_is_system_schema_internal(schema_name);
570 PG_FREE_IF_COPY(arg0, 0);
572 PG_RETURN_BOOL(result);
576 * dbms_stats_is_system_schema_internal
577 * return whether the given schema contains any system catalog. Here we
578 * treat dbms_stats objects as system catalogs to avoid infinite loop.
581 dbms_stats_is_system_schema_internal(char *schema_name)
583 Assert(schema_name != NULL);
585 /* if the schema is system_schema, return true */
586 if (strcmp(schema_name, "pg_catalog") == 0 ||
587 strcmp(schema_name, "pg_toast") == 0 ||
588 strcmp(schema_name, "information_schema") == 0 ||
589 strcmp(schema_name, "dbms_stats") == 0)
596 * dbms_stats_is_system_catalog
597 * called by sql function 'dbms_stats.is_system_catalog', and return the
598 * result of the function 'dbms_stats_is_system_catalog_internal'.
601 dbms_stats_is_system_catalog(PG_FUNCTION_ARGS)
607 PG_RETURN_BOOL(true);
609 relid = PG_GETARG_OID(0);
610 result = dbms_stats_is_system_catalog_internal(relid);
612 PG_RETURN_BOOL(result);
616 * dbms_stats_is_system_catalog_internal
617 * Check whether the given relation is one of system catalogs.
620 dbms_stats_is_system_catalog_internal(Oid relid)
626 /* relid is InvalidOid */
627 if (!OidIsValid(relid))
630 /* no such relation */
631 rel = try_relation_open(relid, NoLock);
635 /* check by namespace name. */
636 schema_name = get_namespace_name(rel->rd_rel->relnamespace);
637 result = dbms_stats_is_system_schema_internal(schema_name);
638 relation_close(rel, NoLock);
644 * dbms_stats_get_relation_info
645 * Hook function for get_relation_info_hook, which implements post-process of
646 * get_relation_info().
648 * This function is designed on the basis of the fact that only expression
649 * indexes have statistics.
652 dbms_stats_get_relation_info(PlannerInfo *root,
658 double allvisfrac; /* dummy */
661 * Call previously installed hook function regardless to whether
662 * pg_dbms_stats is enabled or not.
664 if (prev_get_relation_info)
665 prev_get_relation_info(root, relid, inhparent, rel);
667 /* If pg_dbms_stats is disabled, there is no more thing to do. */
668 if (!pg_dbms_stats_use_locked_stats)
672 * Adjust stats of table itself, and stats of index
673 * relation_stats_effective as well
677 * Estimate relation size --- unless it's an inheritance parent, in which
678 * case the size will be computed later in set_append_rel_pathlist, and we
679 * must leave it zero for now to avoid bollixing the total_table_pages
684 #if PG_VERSION_NUM >= 90200
685 get_merged_relation_stats(relid, &rel->pages, &rel->tuples,
686 &rel->allvisfrac, true);
688 get_merged_relation_stats(relid, &rel->pages, &rel->tuples,
695 foreach(lc, rel->indexlist)
698 * Estimate the index size. If it's not a partial index, we lock
699 * the number-of-tuples estimate to equal the parent table; if it
700 * is partial then we have to use the same methods as we would for
701 * a table, except we can be sure that the index is not larger
704 IndexOptInfo *info = (IndexOptInfo *) lfirst(lc);
705 bool estimate = info->indpred != NIL;
707 get_merged_relation_stats(info->indexoid, &info->pages, &info->tuples,
708 &allvisfrac, estimate);
710 if (!estimate || (estimate && info->tuples > rel->tuples))
711 info->tuples = rel->tuples;
716 * dbms_stats_get_attavgwidth
717 * Hook function for get_attavgwidth_hook which replaces get_attavgwidth().
718 * Returning 0 tells caller to use standard routine.
721 dbms_stats_get_attavgwidth(Oid relid, AttrNumber attnum)
723 if (pg_dbms_stats_use_locked_stats)
725 int32 width = get_merged_avgwidth(relid, attnum);
730 if (prev_get_attavgwidth)
731 return prev_get_attavgwidth(relid, attnum);
737 * We do nothing here, to keep the tuple valid even after examination.
740 FreeHeapTuple(HeapTuple tuple)
746 * dbms_stats_get_relation_stats
747 * Hook function for get_relation_stats_hook which provides custom
748 * per-relation statistics.
749 * Returning false tells caller to use standard (true) statistics.
752 dbms_stats_get_relation_stats(PlannerInfo *root,
755 VariableStatData *vardata)
757 if (pg_dbms_stats_use_locked_stats)
761 tuple = get_merged_column_stats(rte->relid, attnum, rte->inh);
764 vardata->statsTuple = tuple;
765 vardata->freefunc = FreeHeapTuple;
770 if (prev_get_relation_stats)
771 return prev_get_relation_stats(root, rte, attnum, vardata);
777 * dbms_stats_get_index_stats
778 * Hook function for get_index_stats_hook which provides custom per-relation
780 * Returning false tells caller to use standard (true) statistics.
783 dbms_stats_get_index_stats(PlannerInfo *root,
785 AttrNumber indexattnum,
786 VariableStatData *vardata)
788 if (pg_dbms_stats_use_locked_stats)
792 tuple = get_merged_column_stats(indexOid, indexattnum, false);
795 vardata->statsTuple = tuple;
796 vardata->freefunc = FreeHeapTuple;
801 if (prev_get_index_stats)
802 return prev_get_index_stats(root, indexOid, indexattnum, vardata);
808 * Extract binary value from given column.
811 get_binary_datum(int column, bool *isnull)
813 return SPI_getbinval(SPI_tuptable->vals[0],
814 SPI_tuptable->tupdesc, column, isnull);
818 * get_merged_relation_stats
819 * get the statistics of the table, # of pages and # of rows, by executing
820 * SELECT against dbms_stats.relation_stats_locked view.
823 get_merged_relation_stats(Oid relid, BlockNumber *pages, double *tuples,
824 double *allvisfrac, bool estimate)
826 StatsRelationEntry *entry;
830 /* avoid recursive call and system objects */
831 if (nested_level > 0 || relid < FirstNormalObjectId)
835 * pg_dbms_stats doesn't handle system catalogs and its internal relation_stats_effective
837 if (dbms_stats_is_system_catalog_internal(relid))
841 * First, search from cache. If we have not cached stats for given relid
842 * yet, initialize newly created entry.
844 entry = hash_search(rel_stats, &relid, HASH_ENTER, &found);
846 init_rel_stats_entry(entry, relid);
849 * If we don't have valid cache entry, retrieve stats from catalogs,
850 * pg_class and dbms_stats.relation_stats_locked, and merge them for planner use.
851 * We also cache curpages value to make plans stable.
864 * Retrieve per-tuple dummy statistics from relation_stats_locked table
865 * via SPI. If we don't have dummy statistics, we return false to
866 * tell caller thet we gave up to provide alternative statistics.
868 has_dummy = execute_plan(&rows_plan, rows_query, relid, NULL, true);
877 * Retrieve per-relation stats from pg_class system catalog.
878 * We use syscache to support indexes
880 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
881 if (!HeapTupleIsValid(tuple))
882 elog(ERROR, "cache lookup failed for relation %u", relid);
883 form = (Form_pg_class) GETSTRUCT(tuple);
885 /* Choose dummy or authentic */
886 val = get_binary_datum(1, &isnull);
887 entry->relpages = isnull ? form->relpages :
888 (BlockNumber) DatumGetInt32(val);
889 val = get_binary_datum(2, &isnull);
890 entry->reltuples = isnull ? form->reltuples :
891 (double) DatumGetFloat4(val);
892 val = get_binary_datum(3, &isnull);
893 entry->curpages = isnull ? InvalidBlockNumber :
894 (BlockNumber) DatumGetInt32(val);
895 #if PG_VERSION_NUM >= 90200
896 val = get_binary_datum(4, &isnull);
897 entry->relallvisible = isnull ? form->relallvisible :
898 (BlockNumber) DatumGetInt32(val);
901 ReleaseSysCache(tuple);
903 /* Mark this entry as valid. */
919 * If we don't dummy statistics, tell caller that we didn't change
920 * stats in Relation, and it's ok to use authentic statistics as-is.
928 *pages = entry->relpages;
929 *tuples = entry->reltuples;
934 * Get current # of pages to estimate current # of tuples, based on
935 * tuple density at the last ANALYZE and current # of pages.
937 rel = relation_open(relid, NoLock);
938 rel->rd_rel->relpages = entry->relpages;
939 rel->rd_rel->reltuples = entry->reltuples;
940 #if PG_VERSION_NUM >= 90200
941 rel->rd_rel->relallvisible = entry->relallvisible;
943 dbms_stats_estimate_rel_size(rel, NULL, pages, tuples, allvisfrac,
945 relation_close(rel, NoLock);
949 * get_merged_avgwidth
950 * get average width of the given column by merging dummy and authentic
954 get_merged_avgwidth(Oid relid, AttrNumber attnum)
958 if (nested_level > 0 || relid < FirstNormalObjectId)
959 return 0; /* avoid recursive call and system objects */
961 if ((tuple = get_merged_column_stats(relid, attnum, false)) == NULL)
964 return get_pg_statistic(tuple)->stawidth;
968 * get_merged_column_stats
969 * get per-column statistic of given column
971 * We cache the result because this function is called for every used column.
974 get_merged_column_stats(Oid relid, AttrNumber attnum, bool inh)
977 HeapTuple statsTuple;
979 if (nested_level > 0 || relid < FirstNormalObjectId)
980 return NULL; /* avoid recursive call and system objects */
982 /* If the relation is system catalog, return NULL to use true statistics. */
983 if (dbms_stats_is_system_catalog_internal(relid))
986 /* Return cached statistics, if any. */
987 if ((tuple = column_cache_search(relid, attnum, inh)) != NULL)
990 /* Obtain statistics from syscache. This must be released afterward. */
991 statsTuple = SearchSysCache3(STATRELATTINH,
992 ObjectIdGetDatum(relid),
993 Int16GetDatum(attnum),
1001 /* Obtain dummy statistics. */
1002 if (execute_plan(&tuple_plan, tuple_query, relid, &attnum, inh))
1004 /* merge the dummy statistics and the true statistics */
1005 tuple = dbms_stats_merge_internal(SPI_tuptable->vals[0], statsTuple,
1006 SPI_tuptable->tupdesc);
1010 /* if dummy statistics does not exist, use the true statistics */
1014 /* Cache merged result for subsequent calls. */
1016 tuple = column_cache_enter(tuple);
1029 if (HeapTupleIsValid(statsTuple))
1030 ReleaseSysCache(statsTuple);
1036 * column_cache_search
1037 * Search statistic of the given column from the cache.
1040 column_cache_search(Oid relid, AttrNumber attnum, bool inh)
1042 StatsRelationEntry *entry;
1047 * First, get cached relation stats. If we have not cached relation stats,
1048 * we don't have column stats too.
1050 entry = hash_search(rel_stats, &relid, HASH_FIND, &found);
1055 * It is assumed that not so many column_stats_effective are defined on a relation, so we
1056 * use simple linear-search here. Using hash table for each relation would
1057 * be another solution, but it seems overkill so far.
1059 foreach(lc, entry->col_stats)
1061 HeapTuple tuple = (HeapTuple) lfirst(lc);
1062 Form_pg_statistic form = get_pg_statistic(tuple);
1064 /* Find statistic of the given column from the cache. */
1065 if (form->staattnum == attnum && form->stainherit == inh)
1069 return NULL; /* not found */
1073 * Cache a per-column statistics. Cached statistics are valid through the
1074 * current session, unless dummy statistics or table definition have been
1077 * We store statistics tuple in longer-lifetime context, CacheMemoryContext, to
1078 * keep them alive during this session.
1081 column_cache_enter(HeapTuple tuple)
1083 MemoryContext oldcontext;
1085 Form_pg_statistic form;
1086 StatsRelationEntry *entry;
1089 Assert(tuple != NULL);
1090 Assert(!heap_attisnull(tuple, 1));
1092 form = get_pg_statistic(tuple);
1094 entry = hash_search(rel_stats, &form->starelid, HASH_ENTER, &found);
1096 init_rel_stats_entry(entry, form->starelid);
1099 * Link new column statistics entry into the list in relation statsstics
1102 oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1103 newtuple = heap_copytuple(tuple);
1104 entry->col_stats = lappend(entry->col_stats, newtuple);
1105 MemoryContextSwitchTo(oldcontext);
1111 * Execute given plan. When given plan is NULL, create new plan from given
1112 * query string, and execute it. This function can be used only for retrieving
1113 * statistics of column_stats_effective and relation_stats_effective, because we assume #, types, and order
1114 * of parameters here.
1117 execute_plan(SPIPlanPtr *plan,
1120 const AttrNumber *attnum,
1124 Oid argtypes[3] = { OIDOID, INT2OID, BOOLOID };
1127 bool nulls[3] = { false, false, false };
1129 nargs = (attnum ? 3 : 1);
1131 /* When plan is not given, create plan from query string at first. */
1135 p = SPI_prepare(query, nargs, argtypes);
1137 elog(ERROR, "pg_dbms_stats: SPI_prepare => %d", SPI_result);
1138 *plan = SPI_saveplan(p);
1142 values[0] = ObjectIdGetDatum(relid);
1143 values[1] = Int16GetDatum(attnum ? *attnum : 0);
1144 values[2] = BoolGetDatum(inh);
1146 ret = SPI_execute_plan(*plan, values, nulls, true, 1);
1147 if (ret != SPI_OK_SELECT)
1148 elog(ERROR, "pg_dbms_stats: SPI_execute_plan => %d", ret);
1150 return SPI_processed > 0;
1154 * StatsCacheRelCallback
1155 * Relcache inval callback function
1157 * Invalidate cached statistic info of the given relid, or all cached statistic
1158 * info if relid == InvalidOid. We don't complain even when we don't have such
1161 * Note: arg is not used.
1164 StatsCacheRelCallback(Datum arg, Oid relid)
1166 HASH_SEQ_STATUS status;
1167 StatsRelationEntry *entry;
1169 hash_seq_init(&status, rel_stats);
1170 while ((entry = hash_seq_search(&status)) != NULL)
1172 if (relid == InvalidOid || relid == entry->relid)
1174 /* Mark the relation entry as INVALID */
1175 entry->valid = false;
1177 /* Discard every column statistics */
1178 list_free_deep(entry->col_stats);
1179 entry->col_stats = NIL;
1183 /* We always check throughout the list, so hash_seq_term is not necessary */
1187 * Initialize hash table for per-relation statistics.
1190 init_rel_stats(void)
1195 /* Prevent double initialization. */
1196 if (rel_stats != NULL)
1199 MemSet(&ctl, 0, sizeof(ctl));
1200 ctl.keysize = sizeof(Oid);
1201 ctl.entrysize = sizeof(StatsRelationEntry);
1202 ctl.hash = oid_hash;
1203 ctl.hcxt = CacheMemoryContext;
1204 hash = hash_create("dbms_stats relation statistics cache",
1206 &ctl, HASH_ELEM | HASH_CONTEXT);
1212 * Initialize newly added cache entry so that it represents an invalid cache
1213 * entry for given relid.
1216 init_rel_stats_entry(StatsRelationEntry *entry, Oid relid)
1218 entry->relid = relid;
1219 entry->valid = false;
1220 entry->relpages = InvalidBlockNumber;
1221 entry->reltuples = 0.0;
1222 entry->relallvisible = InvalidBlockNumber;
1223 entry->curpages = InvalidBlockNumber;
1224 entry->col_stats = NIL;
1228 * dbms_stats_estimate_rel_size - estimate # pages and # tuples in a table or
1231 * We also estimate the fraction of the pages that are marked all-visible in
1232 * the visibility map, for use in estimation of index-only scans.
1234 * If attr_widths isn't NULL, it points to the zero-index entry of the
1235 * relation's attr_widths[] cache; we fill this in if we have need to compute
1236 * the attribute widths for estimation purposes.
1238 * Note: This function is copied from plancat.c in core source tree of version
1239 * 9.2, and customized for pg_dbms_stats. Changes from orignal one are:
1240 * - rename by prefixing dbms_stats_
1241 * - add 3 parameters (relpages, reltuples, curpage) to pass dummy curpage
1243 * - Get current # of pages only when supplied curpages is InvalidBlockNumber
1244 * - get franction of all-visible-pages
1247 dbms_stats_estimate_rel_size(Relation rel, int32 *attr_widths,
1248 BlockNumber *pages, double *tuples,
1249 double *allvisfrac, BlockNumber curpages)
1251 BlockNumber relpages;
1253 BlockNumber relallvisible;
1256 switch (rel->rd_rel->relkind)
1258 case RELKIND_RELATION:
1260 #if PG_VERSION_NUM >= 90300
1261 case RELKIND_MATVIEW:
1263 case RELKIND_TOASTVALUE:
1264 /* it has storage, ok to call the smgr */
1265 if (curpages == InvalidBlockNumber)
1266 curpages = RelationGetNumberOfBlocks(rel);
1269 * HACK: if the relation has never yet been vacuumed, use a
1270 * minimum size estimate of 10 pages. The idea here is to avoid
1271 * assuming a newly-created table is really small, even if it
1272 * currently is, because that may not be true once some data gets
1273 * loaded into it. Once a vacuum or analyze cycle has been done
1274 * on it, it's more reasonable to believe the size is somewhat
1277 * (Note that this is only an issue if the plan gets cached and
1278 * used again after the table has been filled. What we're trying
1279 * to avoid is using a nestloop-type plan on a table that has
1280 * grown substantially since the plan was made. Normally,
1281 * autovacuum/autoanalyze will occur once enough inserts have
1282 * happened and cause cached-plan invalidation; but that doesn't
1283 * happen instantaneously, and it won't happen at all for cases
1284 * such as temporary tables.)
1286 * We approximate "never vacuumed" by "has relpages = 0", which
1287 * means this will also fire on genuinely empty relation_stats_effective. Not
1288 * great, but fortunately that's a seldom-seen case in the real
1289 * world, and it shouldn't degrade the quality of the plan too
1290 * much anyway to err in this direction.
1292 * There are two exceptions wherein we don't apply this heuristic.
1293 * One is if the table has inheritance children. Totally empty
1294 * parent tables are quite common, so we should be willing to
1295 * believe that they are empty. Also, we don't apply the 10-page
1296 * minimum to indexes.
1298 if (curpages < 10 &&
1299 rel->rd_rel->relpages == 0 &&
1300 !rel->rd_rel->relhassubclass &&
1301 rel->rd_rel->relkind != RELKIND_INDEX)
1304 /* report estimated # pages */
1306 /* quick exit if rel is clearly empty */
1313 /* coerce values in pg_class to more desirable types */
1314 relpages = (BlockNumber) rel->rd_rel->relpages;
1315 reltuples = (double) rel->rd_rel->reltuples;
1316 #if PG_VERSION_NUM >= 90200
1317 relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1322 * If it's an index, discount the metapage while estimating the
1323 * number of tuples. This is a kluge because it assumes more than
1324 * it ought to about index structure. Currently it's OK for
1325 * btree, hash, and GIN indexes but suspect for GiST indexes.
1327 if (rel->rd_rel->relkind == RELKIND_INDEX &&
1334 /* estimate number of tuples from previous tuple density */
1336 density = reltuples / (double) relpages;
1340 * When we have no data because the relation was truncated,
1341 * estimate tuple width from attribute datatypes. We assume
1342 * here that the pages are completely full, which is OK for
1343 * tables (since they've presumably not been VACUUMed yet) but
1344 * is probably an overestimate for indexes. Fortunately
1345 * get_relation_info() can clamp the overestimate to the
1346 * parent table's size.
1348 * Note: this code intentionally disregards alignment
1349 * considerations, because (a) that would be gilding the lily
1350 * considering how crude the estimate is, and (b) it creates
1351 * platform dependencies in the default plans which are kind
1352 * of a headache for regression testing.
1356 tuple_width = dbms_stats_get_rel_data_width(rel, attr_widths);
1357 tuple_width += sizeof(HeapTupleHeaderData);
1358 tuple_width += sizeof(ItemPointerData);
1359 /* note: integer division is intentional here */
1360 density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
1362 *tuples = rint(density * (double) curpages);
1365 * We use relallvisible as-is, rather than scaling it up like we
1366 * do for the pages and tuples counts, on the theory that any
1367 * pages added since the last VACUUM are most likely not marked
1368 * all-visible. But costsize.c wants it converted to a fraction.
1370 if (relallvisible == 0 || curpages <= 0)
1372 else if ((double) relallvisible >= curpages)
1375 *allvisfrac = (double) relallvisible / curpages;
1377 case RELKIND_SEQUENCE:
1378 /* Sequences always have a known size */
1383 case RELKIND_FOREIGN_TABLE:
1384 /* Just use whatever's in pg_class */
1385 *pages = rel->rd_rel->relpages;
1386 *tuples = rel->rd_rel->reltuples;
1390 /* else it has no disk storage; probably shouldn't get here? */
1399 * dbms_stats_get_rel_data_width
1401 * Estimate the average width of (the data part of) the relation's tuples.
1403 * If attr_widths isn't NULL, it points to the zero-index entry of the
1404 * relation's attr_widths[] cache; use and update that cache as appropriate.
1406 * Currently we ignore dropped column_stats_effective. Ideally those should be included
1407 * in the result, but we haven't got any way to get info about them; and
1408 * since they might be mostly NULLs, treating them as zero-width is not
1409 * necessarily the wrong thing anyway.
1411 * Note: This function is copied from plancat.c in core source tree of version
1412 * 9.2, and just renamed.
1415 dbms_stats_get_rel_data_width(Relation rel, int32 *attr_widths)
1417 int32 tuple_width = 0;
1420 for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
1422 Form_pg_attribute att = rel->rd_att->attrs[i - 1];
1425 if (att->attisdropped)
1428 /* use previously cached data, if any */
1429 if (attr_widths != NULL && attr_widths[i] > 0)
1431 tuple_width += attr_widths[i];
1435 /* This should match set_rel_width() in costsize.c */
1436 item_width = get_attavgwidth(RelationGetRelid(rel), i);
1437 if (item_width <= 0)
1439 item_width = get_typavgwidth(att->atttypid, att->atttypmod);
1440 Assert(item_width > 0);
1442 if (attr_widths != NULL)
1443 attr_widths[i] = item_width;
1444 tuple_width += item_width;
1451 void test_pg_dbms_stats(int *passed, int *total);
1452 static void test_init_rel_stats(int *passed, int *total);
1453 static void test_init_rel_stats_entry(int *passed, int *total);
1456 test_pg_dbms_stats(int *passed, int *total)
1458 int local_passed = 0;
1459 int local_total = 0;
1461 elog(WARNING, "==========");
1464 test_init_rel_stats(&local_passed, &local_total);
1465 test_init_rel_stats_entry(&local_passed, &local_total);
1467 elog(WARNING, "%s %d/%d passed", __FUNCTION__, local_passed, local_total);
1468 *passed += local_passed;
1469 *total += local_total;
1473 test_init_rel_stats_entry(int *passed, int *total)
1476 StatsRelationEntry entry;
1482 init_rel_stats_entry(&entry, 1234);
1483 if (entry.relid == 1234 &&
1484 entry.valid == false &&
1485 entry.relpages == InvalidBlockNumber &&
1486 entry.reltuples == 0 &&
1487 entry.relallvisible == InvalidBlockNumber &&
1488 entry.curpages == InvalidBlockNumber &&
1489 entry.col_stats == NIL)
1491 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1495 elog(WARNING, "%s-%d failed: initialized", __FUNCTION__, caseno);
1501 test_init_rel_stats(int *passed, int *total)
1504 static HTAB *org_rel_stats;
1512 if (rel_stats != NULL)
1514 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1518 elog(WARNING, "%s-%d failed: rel_stats is NULL", __FUNCTION__, caseno);
1525 org_rel_stats = rel_stats;
1527 if (org_rel_stats == rel_stats)
1529 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1533 elog(WARNING, "%s-%d failed: rel_stats changed from %p to %p",
1534 __FUNCTION__, caseno, org_rel_stats, rel_stats);