OSDN Git Service

1.3.1
[pgdbmsstats/pg_dbms_stats.git] / pg_dbms_stats.c
1 /*
2  * pg_dbms_stats.c
3  *
4  * Copyright (c) 2009-2014, NIPPON TELEGRAPH AND TELEPHONE CORPORATION
5  * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
6  * Portions Copyright (c) 1994, Regents of the University of California
7  */
8 #include "postgres.h"
9
10 #include "access/transam.h"
11 #include "catalog/pg_statistic.h"
12 #include "catalog/pg_type.h"
13 #include "commands/trigger.h"
14 #include "executor/spi.h"
15 #include "funcapi.h"
16 #include "optimizer/plancat.h"
17 #include "storage/bufmgr.h"
18 #include "utils/builtins.h"
19 #include "utils/guc.h"
20 #include "utils/inval.h"
21 #include "utils/lsyscache.h"
22 #include "utils/selfuncs.h"
23 #include "utils/syscache.h"
24 #if PG_VERSION_NUM >= 90200
25 #include "utils/rel.h"
26 #endif
27 #if PG_VERSION_NUM >= 90300
28 #include "access/htup_details.h"
29 #include "utils/catcache.h"
30 #endif
31
32 #include "pg_dbms_stats.h"
33
34 PG_MODULE_MAGIC;
35
36 /* Error levels used by pg_dbms_stats */
37 #define ELEVEL_DEBUG            DEBUG3  /* log level for debug information */
38 #define ELEVEL_BADSTATS         LOG             /* log level for invalid statistics */
39
40 #define MAX_REL_CACHE           50              /* expected max # of rel stats entries */
41
42 /* represents an element of relation statistics which is cached in hash table */
43 typedef struct StatsRelationEntry
44 {
45         Oid                                     relid;          /* hash key must be at the head */
46
47         bool                            valid;          /* T if the entry has valid stats */
48
49         BlockNumber                     relpages;       /* # of pages as of last ANALYZE */
50         double                          reltuples;      /* # of tuples as of last ANALYZE */
51         BlockNumber                     relallvisible;  /* # of all-visible pages as of last
52                                                                                  * ANALYZE */
53         BlockNumber                     curpages;       /* # of pages as of lock/restore */
54
55         List                       *col_stats;  /* list of HeapTuple, each element is
56                                                                            pg_statistic record of this relation. */
57 } StatsRelationEntry;
58
59 /* Saved hook functions */
60 get_relation_info_hook_type             prev_get_relation_info = NULL;
61 get_attavgwidth_hook_type               prev_get_attavgwidth = NULL;
62 get_relation_stats_hook_type    prev_get_relation_stats = NULL;
63 get_index_stats_hook_type               prev_get_index_stats = NULL;
64
65 /* rows_query(oid) RETURNS int4, float4, int4 */
66 static const char  *rows_query =
67         "SELECT relpages, reltuples, curpages"
68 #if PG_VERSION_NUM >= 90200
69         ", relallvisible"
70 #endif
71         "  FROM dbms_stats.relation_stats_locked"
72         " WHERE relid = $1";
73 static SPIPlanPtr       rows_plan = NULL;
74
75 /* tuple_query(oid, int2, bool) RETURNS pg_statistic */
76 static const char  *tuple_query =
77         "SELECT * "
78         "  FROM dbms_stats.column_stats_locked "
79         " WHERE starelid = $1 "
80         "   AND staattnum = $2 "
81         "   AND stainherit = $3";
82 static SPIPlanPtr       tuple_plan = NULL;
83
84 /* GUC variables */
85 static bool                     pg_dbms_stats_use_locked_stats = true;
86
87 /* Current nesting depth of SPI calls, used to prevent recursive calls */
88 static int                      nested_level = 0;
89
90 /*
91  * The relation_stats_effective statistic cache is stored in hash table.
92  */
93 static HTAB        *rel_stats;
94
95 #define get_pg_statistic(tuple) ((Form_pg_statistic) GETSTRUCT(tuple))
96
97 PG_FUNCTION_INFO_V1(dbms_stats_merge);
98 PG_FUNCTION_INFO_V1(dbms_stats_invalidate_relation_cache);
99 PG_FUNCTION_INFO_V1(dbms_stats_invalidate_column_cache);
100 PG_FUNCTION_INFO_V1(dbms_stats_is_system_schema);
101 PG_FUNCTION_INFO_V1(dbms_stats_is_system_catalog);
102
103 extern Datum dbms_stats_merge(PG_FUNCTION_ARGS);
104 extern Datum dbms_stats_invalidate_relation_cache(PG_FUNCTION_ARGS);
105 extern Datum dbms_stats_invalidate_column_cache(PG_FUNCTION_ARGS);
106 extern Datum dbms_stats_is_system_schema(PG_FUNCTION_ARGS);
107 extern Datum dbms_stats_is_system_catalog(PG_FUNCTION_ARGS);
108
109 static HeapTuple dbms_stats_merge_internal(HeapTuple lhs, HeapTuple rhs,
110         TupleDesc tupledesc);
111 static void dbms_stats_check_tg_event(FunctionCallInfo fcinfo,
112         TriggerData *trigdata, HeapTuple *invtup, HeapTuple *rettup);
113 static void dbms_stats_invalidate_cache_internal(Oid relid, bool sta_col);
114
115 /* Module callbacks */
116 void    _PG_init(void);
117 void    _PG_fini(void);
118
119 static void dbms_stats_get_relation_info(PlannerInfo *root, Oid relid,
120         bool inhparent, RelOptInfo *rel);
121 static int32 dbms_stats_get_attavgwidth(Oid relid, AttrNumber attnum);
122 static bool dbms_stats_get_relation_stats(PlannerInfo *root, RangeTblEntry *rte,
123         AttrNumber attnum, VariableStatData *vardata);
124 static bool dbms_stats_get_index_stats(PlannerInfo *root, Oid indexOid,
125         AttrNumber indexattnum, VariableStatData *vardata);
126
127 static void get_merged_relation_stats(Oid relid, BlockNumber *pages,
128         double *tuples, double *allvisfrac, bool estimate);
129 static int32 get_merged_avgwidth(Oid relid, AttrNumber attnum);
130 static HeapTuple get_merged_column_stats(Oid relid, AttrNumber attnum,
131         bool inh);
132 static HeapTuple column_cache_search(Oid relid, AttrNumber attnum, bool inh);
133 static HeapTuple column_cache_enter(HeapTuple tuple);
134 static bool execute_plan(SPIPlanPtr *plan, const char *query, Oid relid,
135         const AttrNumber *attnum, bool inh);
136 static void StatsCacheRelCallback(Datum arg, Oid relid);
137 static void init_rel_stats(void);
138 static void init_rel_stats_entry(StatsRelationEntry *entry, Oid relid);
139 /* copied from PG core source tree */
140 static void dbms_stats_estimate_rel_size(Relation rel, int32 *attr_widths,
141                                   BlockNumber *pages, double *tuples, double *allvisfrac,
142                                   BlockNumber curpages);
143 static int32 dbms_stats_get_rel_data_width(Relation rel, int32 *attr_widths);
144
145 /* Unit test suit functions */
146 #ifdef UNIT_TEST
147 extern void test_import(int *passed, int *total);
148 extern void test_dump(int *passed, int *total);
149 extern void test_pg_dbms_stats(int *passed, int *total);
150 #endif
151
152 /*
153  * Module load callback
154  */
155 void
156 _PG_init(void)
157 {
158         /* Execute unit test cases */
159 #ifdef UNIT_TEST
160         {
161                 int passed = 0;
162                 int total = 0;
163
164                 test_import(&passed, &total);
165                 test_dump(&passed, &total);
166                 test_pg_dbms_stats(&passed, &total);
167
168                 elog(WARNING, "TOTAL %d/%d passed", passed, total);
169         }
170 #endif
171
172         /* Define custom GUC variables. */
173         DefineCustomBoolVariable("pg_dbms_stats.use_locked_stats",
174                                                          "Enable user defined statistics.",
175                                                          NULL,
176                                                          &pg_dbms_stats_use_locked_stats,
177                                                          true,
178                                                          PGC_USERSET,
179                                                          0,
180                                                          NULL,
181                                                          NULL,
182                                                          NULL);
183
184         EmitWarningsOnPlaceholders("pg_dbms_stats");
185
186         /* Back up old hooks, and install ours. */
187         prev_get_relation_info = get_relation_info_hook;
188         get_relation_info_hook = dbms_stats_get_relation_info;
189         prev_get_attavgwidth = get_attavgwidth_hook;
190         get_attavgwidth_hook = dbms_stats_get_attavgwidth;
191         prev_get_relation_stats = get_relation_stats_hook;
192         get_relation_stats_hook = dbms_stats_get_relation_stats;
193         prev_get_index_stats = get_index_stats_hook;
194         get_index_stats_hook = dbms_stats_get_index_stats;
195
196         /* Initialize hash table for statistics caching. */
197         init_rel_stats();
198
199         /* Also set up a callback for relcache SI invalidations */
200         CacheRegisterRelcacheCallback(StatsCacheRelCallback, (Datum) 0);
201 }
202
203 /*
204  * Module unload callback
205  */
206 void
207 _PG_fini(void)
208 {
209         /* Restore old hooks. */
210         get_relation_info_hook = prev_get_relation_info;
211         get_attavgwidth_hook = prev_get_attavgwidth;
212         get_relation_stats_hook = prev_get_relation_stats;
213         get_index_stats_hook = prev_get_index_stats;
214
215         /* A function to unregister callback for relcache is NOT provided. */
216 }
217
218 /*
219  * Store heap tuple header into given heap tuple.
220  */
221 static void
222 AssignHeapTuple(HeapTuple htup, HeapTupleHeader header)
223 {
224         htup->t_len = HeapTupleHeaderGetDatumLength(header);
225         ItemPointerSetInvalid(&htup->t_self);
226         htup->t_tableOid = InvalidOid;
227         htup->t_data = header;
228 }
229
230 /*
231  * dbms_stats_merge
232  *   called by sql function 'dbms_stats.merge', and return the execution result
233  *   of the function 'dbms_stats_merge_internal'.
234  */
235 Datum
236 dbms_stats_merge(PG_FUNCTION_ARGS)
237 {
238         HeapTupleData   lhs;
239         HeapTupleData   rhs;
240         TupleDesc               tupdesc;
241         HeapTuple               ret = NULL;
242
243         /* assign HeapTuple of the left statistics data unless null. */
244         if (PG_ARGISNULL(0))
245                 lhs.t_data = NULL;
246         else
247                 AssignHeapTuple(&lhs, PG_GETARG_HEAPTUPLEHEADER(0));
248
249         /* assign HeapTuple of the right statistics data unless null. */
250         if (PG_ARGISNULL(1))
251                 rhs.t_data = NULL;
252         else
253                 AssignHeapTuple(&rhs, PG_GETARG_HEAPTUPLEHEADER(1));
254
255         /* fast path for one-side is null */
256         if (lhs.t_data == NULL && rhs.t_data == NULL)
257                 PG_RETURN_NULL();
258
259         /* build a tuple descriptor for our result type */
260         if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
261                 elog(ERROR, "return type must be a row type");
262
263         /* merge two statistics tuples into one, and return it */
264         ret = dbms_stats_merge_internal(&lhs, &rhs, tupdesc);
265
266         if (ret)
267                 PG_RETURN_DATUM(HeapTupleGetDatum(ret));
268         else
269                 PG_RETURN_NULL();
270 }
271
272 /*
273  * dbms_stats_merge_internal
274  *   merge the dummy statistic (lhs) and the true statistic (rhs), on the basis
275  *   of given TupleDesc.
276  *
277  *   this function doesn't become an error level of ERROR to meet that the 
278  *   result of the SQL is not affected by the query plan.
279  */
280 static HeapTuple
281 dbms_stats_merge_internal(HeapTuple lhs, HeapTuple rhs, TupleDesc tupdesc)
282 {
283         Datum                   values[Natts_pg_statistic];
284         bool                    nulls[Natts_pg_statistic];
285         int                             i;
286         Oid                             atttype = InvalidOid;
287         Oid                             relid;
288         AttrNumber              attnum;
289
290         /* fast path for both-sides are null */
291         if ((lhs == NULL || lhs->t_data == NULL) &&
292                 (rhs == NULL || rhs->t_data == NULL))
293                 return NULL;
294
295         /* fast path for one-side is null */
296         if (lhs == NULL || lhs->t_data == NULL)
297         {
298                 /* use right tuple */
299                 heap_deform_tuple(rhs, tupdesc, values, nulls);
300                 for (i = 0; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
301                         if (nulls[i])
302                                 return NULL;    /* check null constraints */
303         }
304         else if (rhs == NULL || rhs->t_data == NULL)
305         {
306                 /* use left tuple */
307                 heap_deform_tuple(lhs, tupdesc, values, nulls);
308                 for (i = 0; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
309                         if (nulls[i])
310                                 return NULL;    /* check null constraints */
311         }
312         else
313         {
314                 /*
315                  * If the column value of the dummy statistic is not NULL, in the
316                  * statistics except the slot, use it.  Otherwise we use the column
317                  * value of the true statistic.
318                  */
319                 heap_deform_tuple(lhs, tupdesc, values, nulls);
320                 for (i = 0; i < Anum_pg_statistic_stakind1 - 1; i++)
321                 {
322                         if (nulls[i])
323                         {
324                                 values[i] = fastgetattr(rhs, i + 1, tupdesc, &nulls[i]);
325                                 if (nulls[i])
326                                 {
327                                         ereport(ELEVEL_BADSTATS,
328                                                 (errmsg("pg_dbms_stats: bad statistics"),
329                                                  errdetail("column \"%s\" should not be null",
330                                                         get_attname(StatisticRelationId,
331                                                                                 tupdesc->attrs[i]->attnum))));
332                                         return NULL;    /* should not be null */
333                                 }
334                         }
335                 }
336
337                 /*
338                  * If the column value of the dummy statistic is not all NULL, in the
339                  * statistics the slot, use it.  Otherwise we use the column
340                  * value of the true statistic.
341                  */
342                 for (; i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1; i++)
343                 {
344                         if (nulls[i])
345                         {
346                                 for (i = Anum_pg_statistic_stakind1 - 1;
347                                          i < Anum_pg_statistic_stavalues1 + STATISTIC_NUM_SLOTS - 1;
348                                          i++)
349                                 {
350                                         values[i] = fastgetattr(rhs, i + 1, tupdesc, &nulls[i]);
351                                         if (i < Anum_pg_statistic_staop1 + STATISTIC_NUM_SLOTS - 1 &&
352                                                 nulls[i])
353                                         {
354                                                 ereport(ELEVEL_BADSTATS,
355                                                         (errmsg("pg_dbms_stats: bad statistics"),
356                                                          errdetail("column \"%s\" should not be null",
357                                                                 get_attname(StatisticRelationId,
358                                                                                         tupdesc->attrs[i]->attnum))));
359                                                 return NULL;    /* should not be null */
360                                         }
361                                 }
362
363                                 break;
364                         }
365                 }
366         }
367
368         /*
369          * Verify types to work around for ALTER COLUMN TYPE.
370          *
371          * Note: We don't need to retrieve atttype when the attribute doesn't have
372          * neither Most-Common-Value nor Histogram, but we retrieve it always
373          * because it's not usual.
374          */
375         relid = DatumGetObjectId(values[0]);
376         attnum = DatumGetInt16(values[1]);
377         atttype = get_atttype(relid, attnum);
378         if (atttype == InvalidOid)
379         {
380                 ereport(WARNING,
381                         (errmsg("pg_dbms_stats: not exist column"),
382                          errdetail("relid \"%d\" or it's column number \"%d\" does not exist",
383                                 relid, attnum),
384                          errhint("need to execute clean_up_stats()")));
385                 return NULL;
386         }
387         for (i = 0; i < STATISTIC_NUM_SLOTS; i++)
388         {
389                 if ((i + 1 == STATISTIC_KIND_MCV ||
390                          i + 1 == STATISTIC_KIND_HISTOGRAM) &&
391                         !nulls[Anum_pg_statistic_stavalues1 + i - 1])
392                 {
393                         ArrayType  *arr;
394
395                         arr = DatumGetArrayTypeP(
396                                         values[Anum_pg_statistic_stavalues1 + i - 1]);
397                         if (arr == NULL || arr->elemtype != atttype)
398                         {
399                                 const char         *attname = get_attname(relid, attnum);
400
401                                 /*
402                                  * relid and attnum must be valid here because valid atttype
403                                  * has been gotten already.
404                                  */
405                                 Assert(attname);
406                                 ereport(ELEVEL_BADSTATS,
407                                         (errmsg("pg_dbms_stats: bad column type"),
408                                          errdetail("type of column \"%s\" has been changed",
409                                                 attname),
410                                          errhint("need to execute dbms_stats.unlock('%s', '%s')",
411                                                 get_rel_name(relid), attname)));
412                                 return NULL;
413                         }
414                 }
415         }
416
417         return heap_form_tuple(tupdesc, values, nulls);
418 }
419
420 /*
421  * dbms_stats_invalidate_relation_cache
422  *   Register invalidation of the specified relation's relcache.
423  *
424  * CREATE TRIGGER dbms_stats.relation_stats_locked FOR INSERT, UPDATE, DELETE FOR EACH
425  * ROWS EXECUTE ...
426  */
427 Datum
428 dbms_stats_invalidate_relation_cache(PG_FUNCTION_ARGS)
429 {
430         TriggerData                *trigdata = (TriggerData *) fcinfo->context;
431         HeapTuple                       invtup; /* tuple to be invalidated */
432         HeapTuple                       rettup; /* tuple to be returned */
433         Datum                           value;
434         bool                            isnull;
435
436         /* make sure it's called as a before/after trigger */
437         dbms_stats_check_tg_event(fcinfo, trigdata, &invtup, &rettup);
438
439         /*
440          * assume that position of dbms_stats.relation_stats_locked.relid is head value of
441          * tuple.
442          */
443         value = fastgetattr(invtup, 1, trigdata->tg_relation->rd_att, &isnull);
444
445         /*
446          * invalidate prepared statements and force re-planning with pg_dbms_stats.
447          */
448         dbms_stats_invalidate_cache_internal((Oid)value, false);
449
450         PG_RETURN_POINTER(rettup);
451 }
452
453 /*
454  * dbms_stats_invalidate_column_cache
455  *   Register invalidation of the specified relation's relcache.
456  *
457  * CREATE TRIGGER dbms_stats.column_stats_locked FOR INSERT, UPDATE, DELETE FOR EACH
458  * ROWS EXECUTE ...
459  */
460 Datum
461 dbms_stats_invalidate_column_cache(PG_FUNCTION_ARGS)
462 {
463         TriggerData                *trigdata = (TriggerData *) fcinfo->context;
464         Form_pg_statistic       form;
465         HeapTuple                       invtup; /* tuple to be invalidated */
466         HeapTuple                       rettup; /* tuple to be returned */
467
468         /* make sure it's called as a before/after trigger */
469         dbms_stats_check_tg_event(fcinfo, trigdata, &invtup, &rettup);
470
471         /*
472          * assume that both pg_statistic and dbms_stats.column_stats_locked have the same
473          * definition.
474          */
475         form = get_pg_statistic(invtup);
476
477         /*
478          * invalidate prepared statements and force re-planning with pg_dbms_stats.
479          */
480         dbms_stats_invalidate_cache_internal(form->starelid, true);
481
482         PG_RETURN_POINTER(rettup);
483 }
484
485 static void
486 dbms_stats_check_tg_event(FunctionCallInfo fcinfo,
487                                                   TriggerData *trigdata,
488                                                   HeapTuple *invtup,
489                                                   HeapTuple *rettup)
490 {
491         /* make sure it's called as a before/after trigger */
492         if (!CALLED_AS_TRIGGER(fcinfo) ||
493                 !TRIGGER_FIRED_BEFORE(trigdata->tg_event) ||
494                 !TRIGGER_FIRED_FOR_ROW(trigdata->tg_event))
495                 elog(ERROR, "pg_dbms_stats: invalid trigger call");
496
497         if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
498         {
499                 /* INSERT */
500                 *rettup = *invtup = trigdata->tg_trigtuple;
501         }
502         else if (TRIGGER_FIRED_BY_DELETE(trigdata->tg_event))
503         {
504                 /* DELETE */
505                 *rettup = *invtup = trigdata->tg_trigtuple;
506         }
507         else
508         {
509                 /* UPDATE */
510                 *invtup = trigdata->tg_trigtuple;
511                 *rettup = trigdata->tg_newtuple;
512         }
513 }
514
515 static void
516 dbms_stats_invalidate_cache_internal(Oid relid, bool sta_col)
517 {
518         Relation        rel;
519
520         /*
521          * invalidate prepared statements and force re-planning with pg_dbms_stats.
522          */
523         rel = try_relation_open(relid, NoLock);
524         if (rel != NULL)
525         {
526                 if (sta_col &&
527                         rel->rd_rel->relkind == RELKIND_INDEX &&
528                         (rel->rd_indextuple == NULL ||
529                          heap_attisnull(rel->rd_indextuple, Anum_pg_index_indexprs)))
530                         ereport(ERROR,
531                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
532                                          errmsg("\"%s\" is an index except an index expression",
533                                                         RelationGetRelationName(rel))));
534                 if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
535                         ereport(ERROR,
536                                         (errcode(ERRCODE_WRONG_OBJECT_TYPE),
537                                          errmsg("\"%s\" is a composite type",
538                                                         RelationGetRelationName(rel))));
539
540                 /*
541                  * We need to invalidate relcache of underlying table too, because
542                  * CachedPlan mechanism decides to do re-planning when any relcache of
543                  * used tables was invalid at EXECUTE.
544                  */
545                 if (rel->rd_rel->relkind == RELKIND_INDEX &&
546                         rel->rd_index && OidIsValid(rel->rd_index->indrelid))
547                         CacheInvalidateRelcacheByRelid(rel->rd_index->indrelid);
548
549                 CacheInvalidateRelcache(rel);
550                 relation_close(rel, NoLock);
551         }
552 }
553
554 /*
555  * dbms_stats_is_system_schema
556  *   called by sql function 'dbms_stats.is_system_schema', and return the
557  *   result of the function 'dbms_stats_is_system_internal'.
558  */
559 Datum
560 dbms_stats_is_system_schema(PG_FUNCTION_ARGS)
561 {
562         text   *arg0;
563         char   *schema_name;
564         bool    result;
565
566         arg0 = PG_GETARG_TEXT_PP(0);
567         schema_name = text_to_cstring(arg0);
568         result = dbms_stats_is_system_schema_internal(schema_name);
569
570         PG_FREE_IF_COPY(arg0, 0);
571
572         PG_RETURN_BOOL(result);
573 }
574
575 /*
576  * dbms_stats_is_system_schema_internal
577  *   return whether the given schema contains any system catalog.  Here we
578  *   treat dbms_stats objects as system catalogs to avoid infinite loop.
579  */
580 bool
581 dbms_stats_is_system_schema_internal(char *schema_name)
582 {
583         Assert(schema_name != NULL);
584
585         /* if the schema is system_schema, return true */
586         if (strcmp(schema_name, "pg_catalog") == 0 ||
587                 strcmp(schema_name, "pg_toast") == 0 ||
588                 strcmp(schema_name, "information_schema") == 0 ||
589                 strcmp(schema_name, "dbms_stats") == 0)
590                 return true;
591
592         return false;
593 }
594
595 /*
596  * dbms_stats_is_system_catalog
597  *   called by sql function 'dbms_stats.is_system_catalog', and return the
598  *   result of the function 'dbms_stats_is_system_catalog_internal'.
599  */
600 Datum
601 dbms_stats_is_system_catalog(PG_FUNCTION_ARGS)
602 {
603         Oid             relid;
604         bool    result;
605
606         if (PG_ARGISNULL(0))
607                 PG_RETURN_BOOL(true);
608
609         relid = PG_GETARG_OID(0);
610         result = dbms_stats_is_system_catalog_internal(relid);
611
612         PG_RETURN_BOOL(result);
613 }
614
615 /*
616  * dbms_stats_is_system_catalog_internal
617  *   Check whether the given relation is one of system catalogs.
618  */
619 bool
620 dbms_stats_is_system_catalog_internal(Oid relid)
621 {
622         Relation        rel;
623         char       *schema_name;
624         bool            result;
625
626         /* relid is InvalidOid */
627         if (!OidIsValid(relid))
628                 return false;
629
630         /* no such relation */
631         rel = try_relation_open(relid, NoLock);
632         if (rel == NULL)
633                 return false;
634
635         /* check by namespace name. */
636         schema_name = get_namespace_name(rel->rd_rel->relnamespace);
637         result = dbms_stats_is_system_schema_internal(schema_name);
638         relation_close(rel, NoLock);
639
640         return result;
641 }
642
643 /*
644  * dbms_stats_get_relation_info
645  *   Hook function for get_relation_info_hook, which implements post-process of
646  *   get_relation_info().
647  *
648  *   This function is designed on the basis of the fact that only expression
649  *   indexes have statistics.
650  */
651 static void
652 dbms_stats_get_relation_info(PlannerInfo *root,
653                                                          Oid relid,
654                                                          bool inhparent,
655                                                          RelOptInfo *rel)
656 {
657         ListCell   *lc;
658         double          allvisfrac; /* dummy */
659
660         /*
661          * Call previously installed hook function regardless to whether
662          * pg_dbms_stats is enabled or not.
663          */
664         if (prev_get_relation_info)
665                 prev_get_relation_info(root, relid, inhparent, rel);
666
667         /* If pg_dbms_stats is disabled, there is no more thing to do. */
668         if (!pg_dbms_stats_use_locked_stats)
669                 return;
670
671         /*
672          * Adjust stats of table itself, and stats of index
673          * relation_stats_effective as well
674          */
675
676         /*
677          * Estimate relation size --- unless it's an inheritance parent, in which
678          * case the size will be computed later in set_append_rel_pathlist, and we
679          * must leave it zero for now to avoid bollixing the total_table_pages
680          * calculation.
681          */
682         if (!inhparent)
683         {
684 #if PG_VERSION_NUM >= 90200
685                 get_merged_relation_stats(relid, &rel->pages, &rel->tuples,
686                                                                   &rel->allvisfrac, true);
687 #else
688                 get_merged_relation_stats(relid, &rel->pages, &rel->tuples,
689                                                                   &allvisfrac, true);
690 #endif
691         }
692         else
693                 return;
694
695         foreach(lc, rel->indexlist)
696         {
697                 /*
698                  * Estimate the index size.  If it's not a partial index, we lock
699                  * the number-of-tuples estimate to equal the parent table; if it
700                  * is partial then we have to use the same methods as we would for
701                  * a table, except we can be sure that the index is not larger
702                  * than the table.
703                  */
704                 IndexOptInfo   *info = (IndexOptInfo *) lfirst(lc);
705                 bool                    estimate = info->indpred != NIL;
706
707                 get_merged_relation_stats(info->indexoid, &info->pages, &info->tuples,
708                                                                   &allvisfrac, estimate);
709
710                 if (!estimate || (estimate && info->tuples > rel->tuples))
711                         info->tuples = rel->tuples;
712         }
713 }
714
715 /*
716  * dbms_stats_get_attavgwidth
717  *   Hook function for get_attavgwidth_hook which replaces get_attavgwidth().
718  *   Returning 0 tells caller to use standard routine.
719  */
720 static int32
721 dbms_stats_get_attavgwidth(Oid relid, AttrNumber attnum)
722 {
723         if (pg_dbms_stats_use_locked_stats)
724         {
725                 int32   width = get_merged_avgwidth(relid, attnum);
726                 if (width > 0)
727                         return width;
728         }
729
730         if (prev_get_attavgwidth)
731                 return prev_get_attavgwidth(relid, attnum);
732         else
733                 return 0;
734 }
735
736 /*
737  * We do nothing here, to keep the tuple valid even after examination.
738  */
739 static void
740 FreeHeapTuple(HeapTuple tuple)
741 {
742         /* noop */
743 }
744
745 /*
746  * dbms_stats_get_relation_stats
747  *   Hook function for get_relation_stats_hook which provides custom
748  *   per-relation statistics.
749  *   Returning false tells caller to use standard (true) statistics.
750  */
751 static bool
752 dbms_stats_get_relation_stats(PlannerInfo *root,
753                                                           RangeTblEntry *rte,
754                                                           AttrNumber attnum,
755                                                           VariableStatData *vardata)
756 {
757         if (pg_dbms_stats_use_locked_stats)
758         {
759                 HeapTuple       tuple;
760
761                 tuple = get_merged_column_stats(rte->relid, attnum, rte->inh);
762                 if (tuple != NULL)
763                 {
764                         vardata->statsTuple = tuple;
765                         vardata->freefunc = FreeHeapTuple;
766                         return true;
767                 }
768         }
769
770         if (prev_get_relation_stats)
771                 return prev_get_relation_stats(root, rte, attnum, vardata);
772         else
773                 return false;
774 }
775
776 /*
777  * dbms_stats_get_index_stats
778  *   Hook function for get_index_stats_hook which provides custom per-relation
779  *   statistics.
780  *   Returning false tells caller to use standard (true) statistics.
781  */
782 static bool
783 dbms_stats_get_index_stats(PlannerInfo *root,
784                                                    Oid indexOid,
785                                                    AttrNumber indexattnum,
786                                                    VariableStatData *vardata)
787 {
788         if (pg_dbms_stats_use_locked_stats)
789         {
790                 HeapTuple       tuple;
791
792                 tuple = get_merged_column_stats(indexOid, indexattnum, false);
793                 if (tuple != NULL)
794                 {
795                         vardata->statsTuple = tuple;
796                         vardata->freefunc = FreeHeapTuple;
797                         return true;
798                 }
799         }
800
801         if (prev_get_index_stats)
802                 return prev_get_index_stats(root, indexOid, indexattnum, vardata);
803         else
804                 return false;
805 }
806
807 /*
808  * Extract binary value from given column.
809  */
810 static Datum
811 get_binary_datum(int column, bool *isnull)
812 {
813         return SPI_getbinval(SPI_tuptable->vals[0],
814                                                  SPI_tuptable->tupdesc, column, isnull);
815 }
816
817 /*
818  * get_merged_relation_stats
819  *   get the statistics of the table, # of pages and # of rows, by executing
820  *   SELECT against dbms_stats.relation_stats_locked view.
821  */
822 static void
823 get_merged_relation_stats(Oid relid, BlockNumber *pages, double *tuples,
824                                                   double *allvisfrac, bool estimate)
825 {
826         StatsRelationEntry *entry;
827         bool            found;
828         Relation        rel;
829
830         /* avoid recursive call and system objects */
831         if (nested_level > 0 || relid < FirstNormalObjectId)
832                 return;
833
834         /*
835          * pg_dbms_stats doesn't handle system catalogs and its internal relation_stats_effective
836          */
837         if (dbms_stats_is_system_catalog_internal(relid))
838                 return;
839
840         /*
841          * First, search from cache.  If we have not cached stats for given relid
842          * yet, initialize newly created entry.
843          */
844         entry = hash_search(rel_stats, &relid, HASH_ENTER, &found);
845         if (!found)
846                 init_rel_stats_entry(entry, relid);
847
848         /*
849          * If we don't have valid cache entry, retrieve stats from catalogs,
850          * pg_class and dbms_stats.relation_stats_locked, and merge them for planner use.
851          * We also cache curpages value to make plans stable.
852          */
853         if (!entry->valid)
854         {
855                 bool            has_dummy;
856
857                 PG_TRY();
858                 {
859                         ++nested_level;
860
861                         SPI_connect();
862
863                         /*
864                          * Retrieve per-tuple dummy statistics from relation_stats_locked table
865                          * via SPI.  If we don't have dummy statistics, we return false to
866                          * tell caller thet we gave up to provide alternative statistics.
867                          */
868                         has_dummy = execute_plan(&rows_plan, rows_query, relid, NULL, true);
869                         if (has_dummy)
870                         {
871                                 HeapTuple       tuple;
872                                 Form_pg_class form;
873                                 bool            isnull;
874                                 Datum           val;
875
876                                 /*
877                                  * Retrieve per-relation stats from pg_class system catalog.
878                                  * We use syscache to support indexes
879                                  */
880                                 tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
881                                 if (!HeapTupleIsValid(tuple))
882                                         elog(ERROR, "cache lookup failed for relation %u", relid);
883                                 form = (Form_pg_class) GETSTRUCT(tuple);
884
885                                 /* Choose dummy or authentic */
886                                 val = get_binary_datum(1, &isnull);
887                                 entry->relpages = isnull ? form->relpages :
888                                                 (BlockNumber) DatumGetInt32(val);
889                                 val = get_binary_datum(2, &isnull);
890                                 entry->reltuples = isnull ? form->reltuples :
891                                                 (double) DatumGetFloat4(val);
892                                 val = get_binary_datum(3, &isnull);
893                                 entry->curpages = isnull ? InvalidBlockNumber :
894                                                 (BlockNumber) DatumGetInt32(val);
895 #if PG_VERSION_NUM >= 90200
896                                 val = get_binary_datum(4, &isnull);
897                                 entry->relallvisible = isnull ? form->relallvisible :
898                                                 (BlockNumber) DatumGetInt32(val);
899 #endif
900
901                                 ReleaseSysCache(tuple);
902
903                                 /* Mark this entry as valid. */
904                                 entry->valid = true;
905                         }
906
907                         SPI_finish();
908
909                         --nested_level;
910                 }
911                 PG_CATCH();
912                 {
913                         --nested_level;
914                         PG_RE_THROW();
915                 }
916                 PG_END_TRY();
917
918                 /*
919                  * If we don't dummy statistics, tell caller that we didn't change
920                  * stats in Relation, and it's ok to use authentic statistics as-is.
921                  */
922                 if (!has_dummy)
923                         return;
924         }
925
926         if (!estimate)
927         {
928                 *pages = entry->relpages;
929                 *tuples = entry->reltuples;
930                 return;
931         }
932
933         /*
934          * Get current # of pages to estimate current # of tuples, based on
935          * tuple density at the last ANALYZE and current # of pages.
936          */
937         rel = relation_open(relid, NoLock);
938         rel->rd_rel->relpages = entry->relpages;
939         rel->rd_rel->reltuples = entry->reltuples;
940 #if PG_VERSION_NUM >= 90200
941         rel->rd_rel->relallvisible = entry->relallvisible;
942 #endif
943         dbms_stats_estimate_rel_size(rel, NULL, pages, tuples, allvisfrac,
944                                                                  entry->curpages);
945         relation_close(rel, NoLock);
946 }
947
948 /*
949  * get_merged_avgwidth
950  *   get average width of the given column by merging dummy and authentic
951  *   statistics
952  */
953 static int32
954 get_merged_avgwidth(Oid relid, AttrNumber attnum)
955 {
956         HeapTuple               tuple;
957
958         if (nested_level > 0 || relid < FirstNormalObjectId)
959                 return 0;       /* avoid recursive call and system objects */
960
961         if ((tuple = get_merged_column_stats(relid, attnum, false)) == NULL)
962                 return 0;
963
964         return get_pg_statistic(tuple)->stawidth;
965 }
966
967 /*
968  * get_merged_column_stats
969  *   get per-column statistic of given column
970  *
971  * We cache the result because this function is called for every used column.
972  */
973 static HeapTuple
974 get_merged_column_stats(Oid relid, AttrNumber attnum, bool inh)
975 {
976         HeapTuple               tuple;
977         HeapTuple               statsTuple;
978
979         if (nested_level > 0 || relid < FirstNormalObjectId)
980                 return NULL;    /* avoid recursive call and system objects */
981
982         /* If the relation is system catalog, return NULL to use true statistics. */
983         if (dbms_stats_is_system_catalog_internal(relid))
984                 return NULL;
985
986         /* Return cached statistics, if any. */
987         if ((tuple = column_cache_search(relid, attnum, inh)) != NULL)
988                 return tuple;
989
990         /* Obtain statistics from syscache.  This must be released afterward. */
991         statsTuple = SearchSysCache3(STATRELATTINH,
992                                                                  ObjectIdGetDatum(relid),
993                                                                  Int16GetDatum(attnum),
994                                                                  BoolGetDatum(inh));
995         PG_TRY();
996         {
997                 ++nested_level;
998
999                 SPI_connect();
1000
1001                 /* Obtain dummy statistics. */
1002                 if (execute_plan(&tuple_plan, tuple_query, relid, &attnum, inh))
1003                 {
1004                         /* merge the dummy statistics and the true statistics */
1005                         tuple = dbms_stats_merge_internal(SPI_tuptable->vals[0], statsTuple,
1006                                                                                           SPI_tuptable->tupdesc);
1007                 }
1008                 else
1009                 {
1010                         /* if dummy statistics does not exist, use the true statistics */
1011                         tuple = statsTuple;
1012                 }
1013
1014                 /* Cache merged result for subsequent calls. */
1015                 if (tuple != NULL)
1016                         tuple = column_cache_enter(tuple);
1017
1018                 SPI_finish();
1019
1020                 --nested_level;
1021         }
1022         PG_CATCH();
1023         {
1024                 --nested_level;
1025                 PG_RE_THROW();
1026         }
1027         PG_END_TRY();
1028
1029         if (HeapTupleIsValid(statsTuple))
1030                 ReleaseSysCache(statsTuple);
1031
1032         return tuple;
1033 }
1034
1035 /*
1036  * column_cache_search
1037  *   Search statistic of the given column from the cache.
1038  */
1039 static HeapTuple
1040 column_cache_search(Oid relid, AttrNumber attnum, bool inh)
1041 {
1042         StatsRelationEntry *entry;
1043         bool                    found;
1044         ListCell           *lc;
1045
1046         /*
1047          * First, get cached relation stats.  If we have not cached relation stats,
1048          * we don't have column stats too.
1049          */
1050         entry = hash_search(rel_stats, &relid, HASH_FIND, &found);
1051         if (!found )
1052                 return NULL;
1053
1054         /*
1055          * It is assumed that not so many column_stats_effective are defined on a relation, so we
1056          * use simple linear-search here.  Using hash table for each relation would
1057          * be another solution, but it seems overkill so far.
1058          */
1059         foreach(lc, entry->col_stats)
1060         {
1061                 HeapTuple       tuple = (HeapTuple) lfirst(lc);
1062                 Form_pg_statistic form = get_pg_statistic(tuple);
1063
1064                 /* Find statistic of the given column from the cache. */
1065                 if (form->staattnum == attnum && form->stainherit == inh)
1066                         return tuple;
1067         }
1068
1069         return NULL;    /* not found */
1070 }
1071
1072 /*
1073  * Cache a per-column statistics. Cached statistics are valid through the
1074  * current session, unless dummy statistics or table definition have been
1075  * changed.
1076  *
1077  * We store statistics tuple in longer-lifetime context, CacheMemoryContext, to
1078  * keep them alive during this session.
1079  */
1080 static HeapTuple
1081 column_cache_enter(HeapTuple tuple)
1082 {
1083         MemoryContext   oldcontext;
1084         HeapTuple               newtuple;
1085         Form_pg_statistic form;
1086         StatsRelationEntry *entry;
1087         bool                    found;
1088
1089         Assert(tuple != NULL);
1090         Assert(!heap_attisnull(tuple, 1));
1091
1092         form = get_pg_statistic(tuple);
1093
1094         entry = hash_search(rel_stats, &form->starelid, HASH_ENTER, &found);
1095         if (!found)
1096                 init_rel_stats_entry(entry, form->starelid);
1097
1098         /*
1099          * Link new column statistics entry into the list in relation statsstics
1100          * entry.
1101          */
1102         oldcontext = MemoryContextSwitchTo(CacheMemoryContext);
1103         newtuple = heap_copytuple(tuple);
1104         entry->col_stats = lappend(entry->col_stats, newtuple);
1105         MemoryContextSwitchTo(oldcontext);
1106
1107         return newtuple;
1108 }
1109
1110 /*
1111  * Execute given plan.  When given plan is NULL, create new plan from given
1112  * query string, and execute it.  This function can be used only for retrieving
1113  * statistics of column_stats_effective and relation_stats_effective, because we assume #, types, and order
1114  * of parameters here.
1115  */
1116 static bool
1117 execute_plan(SPIPlanPtr *plan,
1118                          const char *query,
1119                          Oid relid,
1120                          const AttrNumber *attnum,
1121                          bool inh)
1122 {
1123         int             ret;
1124         Oid             argtypes[3] = { OIDOID, INT2OID, BOOLOID };
1125         int             nargs;
1126         Datum   values[3];
1127         bool    nulls[3] = { false, false, false };
1128
1129         nargs = (attnum ? 3 : 1);
1130
1131         /* When plan is not given, create plan from query string at first. */
1132         if (*plan == NULL)
1133         {
1134                 SPIPlanPtr p;
1135                 p = SPI_prepare(query, nargs, argtypes);
1136                 if (p == NULL)
1137                         elog(ERROR, "pg_dbms_stats: SPI_prepare => %d", SPI_result);
1138                 *plan = SPI_saveplan(p);
1139                 SPI_freeplan(p);
1140         }
1141
1142         values[0] = ObjectIdGetDatum(relid);
1143         values[1] = Int16GetDatum(attnum ? *attnum : 0);
1144         values[2] = BoolGetDatum(inh);
1145
1146         ret = SPI_execute_plan(*plan, values, nulls, true, 1);
1147         if (ret != SPI_OK_SELECT)
1148                 elog(ERROR, "pg_dbms_stats: SPI_execute_plan => %d", ret);
1149
1150         return SPI_processed > 0;
1151 }
1152
1153 /*
1154  * StatsCacheRelCallback
1155  *              Relcache inval callback function
1156  *
1157  * Invalidate cached statistic info of the given relid, or all cached statistic
1158  * info if relid == InvalidOid.  We don't complain even when we don't have such
1159  * statistics.
1160  *
1161  * Note: arg is not used.
1162  */
1163 static void
1164 StatsCacheRelCallback(Datum arg, Oid relid)
1165 {
1166         HASH_SEQ_STATUS         status;
1167         StatsRelationEntry *entry;
1168
1169         hash_seq_init(&status, rel_stats);
1170         while ((entry = hash_seq_search(&status)) != NULL)
1171         {
1172                 if (relid == InvalidOid || relid == entry->relid)
1173                 {
1174                         /* Mark the relation entry as INVALID */
1175                         entry->valid = false;
1176
1177                         /* Discard every column statistics */
1178                         list_free_deep(entry->col_stats);
1179                         entry->col_stats = NIL;
1180                 }
1181         }
1182
1183         /* We always check throughout the list, so hash_seq_term is not necessary */
1184 }
1185
1186 /*
1187  * Initialize hash table for per-relation statistics.
1188  */
1189 static void
1190 init_rel_stats(void)
1191 {
1192         HTAB       *hash;
1193         HASHCTL         ctl;
1194
1195         /* Prevent double initialization. */
1196         if (rel_stats != NULL)
1197                 return;
1198
1199         MemSet(&ctl, 0, sizeof(ctl));
1200         ctl.keysize = sizeof(Oid);
1201         ctl.entrysize = sizeof(StatsRelationEntry);
1202         ctl.hash = oid_hash;
1203         ctl.hcxt = CacheMemoryContext;
1204         hash = hash_create("dbms_stats relation statistics cache",
1205                                            MAX_REL_CACHE,
1206                                            &ctl, HASH_ELEM | HASH_CONTEXT);
1207
1208         rel_stats = hash;
1209 }
1210
1211 /*
1212  * Initialize newly added cache entry so that it represents an invalid cache
1213  * entry for given relid.
1214  */
1215 static void
1216 init_rel_stats_entry(StatsRelationEntry *entry, Oid relid)
1217 {
1218         entry->relid = relid;
1219         entry->valid = false;
1220         entry->relpages = InvalidBlockNumber;
1221         entry->reltuples = 0.0;
1222         entry->relallvisible = InvalidBlockNumber;
1223         entry->curpages = InvalidBlockNumber;
1224         entry->col_stats = NIL;
1225 }
1226
1227 /*
1228  * dbms_stats_estimate_rel_size - estimate # pages and # tuples in a table or
1229  * index
1230  *
1231  * We also estimate the fraction of the pages that are marked all-visible in
1232  * the visibility map, for use in estimation of index-only scans.
1233  *
1234  * If attr_widths isn't NULL, it points to the zero-index entry of the
1235  * relation's attr_widths[] cache; we fill this in if we have need to compute
1236  * the attribute widths for estimation purposes.
1237  *
1238  * Note: This function is copied from plancat.c in core source tree of version
1239  * 9.2, and customized for pg_dbms_stats.  Changes from orignal one are:
1240  *   - rename by prefixing dbms_stats_
1241  *   - add 3 parameters (relpages, reltuples, curpage) to pass dummy curpage
1242  *     values.
1243  *   - Get current # of pages only when supplied curpages is InvalidBlockNumber
1244  *   - get franction of all-visible-pages
1245  */
1246 static void
1247 dbms_stats_estimate_rel_size(Relation rel, int32 *attr_widths,
1248                                                          BlockNumber *pages, double *tuples,
1249                                                          double *allvisfrac, BlockNumber curpages)
1250 {
1251         BlockNumber relpages;
1252         double          reltuples;
1253         BlockNumber relallvisible;
1254         double          density;
1255
1256         switch (rel->rd_rel->relkind)
1257         {
1258                 case RELKIND_RELATION:
1259                 case RELKIND_INDEX:
1260 #if PG_VERSION_NUM >= 90300
1261                 case RELKIND_MATVIEW:
1262 #endif
1263                 case RELKIND_TOASTVALUE:
1264                         /* it has storage, ok to call the smgr */
1265                         if (curpages == InvalidBlockNumber)
1266                                 curpages = RelationGetNumberOfBlocks(rel);
1267
1268                         /*
1269                          * HACK: if the relation has never yet been vacuumed, use a
1270                          * minimum size estimate of 10 pages.  The idea here is to avoid
1271                          * assuming a newly-created table is really small, even if it
1272                          * currently is, because that may not be true once some data gets
1273                          * loaded into it.  Once a vacuum or analyze cycle has been done
1274                          * on it, it's more reasonable to believe the size is somewhat
1275                          * stable.
1276                          *
1277                          * (Note that this is only an issue if the plan gets cached and
1278                          * used again after the table has been filled.  What we're trying
1279                          * to avoid is using a nestloop-type plan on a table that has
1280                          * grown substantially since the plan was made.  Normally,
1281                          * autovacuum/autoanalyze will occur once enough inserts have
1282                          * happened and cause cached-plan invalidation; but that doesn't
1283                          * happen instantaneously, and it won't happen at all for cases
1284                          * such as temporary tables.)
1285                          *
1286                          * We approximate "never vacuumed" by "has relpages = 0", which
1287                          * means this will also fire on genuinely empty relation_stats_effective.       Not
1288                          * great, but fortunately that's a seldom-seen case in the real
1289                          * world, and it shouldn't degrade the quality of the plan too
1290                          * much anyway to err in this direction.
1291                          *
1292                          * There are two exceptions wherein we don't apply this heuristic.
1293                          * One is if the table has inheritance children.  Totally empty
1294                          * parent tables are quite common, so we should be willing to
1295                          * believe that they are empty.  Also, we don't apply the 10-page
1296                          * minimum to indexes.
1297                          */
1298                         if (curpages < 10 &&
1299                                 rel->rd_rel->relpages == 0 &&
1300                                 !rel->rd_rel->relhassubclass &&
1301                                 rel->rd_rel->relkind != RELKIND_INDEX)
1302                                 curpages = 10;
1303
1304                         /* report estimated # pages */
1305                         *pages = curpages;
1306                         /* quick exit if rel is clearly empty */
1307                         if (curpages == 0)
1308                         {
1309                                 *tuples = 0;
1310                                 *allvisfrac = 0;
1311                                 break;
1312                         }
1313                         /* coerce values in pg_class to more desirable types */
1314                         relpages = (BlockNumber) rel->rd_rel->relpages;
1315                         reltuples = (double) rel->rd_rel->reltuples;
1316 #if PG_VERSION_NUM >= 90200
1317                         relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
1318 #else
1319                         relallvisible = 0;
1320 #endif
1321                         /*
1322                          * If it's an index, discount the metapage while estimating the
1323                          * number of tuples.  This is a kluge because it assumes more than
1324                          * it ought to about index structure.  Currently it's OK for
1325                          * btree, hash, and GIN indexes but suspect for GiST indexes.
1326                          */
1327                         if (rel->rd_rel->relkind == RELKIND_INDEX &&
1328                                 relpages > 0)
1329                         {
1330                                 curpages--;
1331                                 relpages--;
1332                         }
1333
1334                         /* estimate number of tuples from previous tuple density */
1335                         if (relpages > 0)
1336                                 density = reltuples / (double) relpages;
1337                         else
1338                         {
1339                                 /*
1340                                  * When we have no data because the relation was truncated,
1341                                  * estimate tuple width from attribute datatypes.  We assume
1342                                  * here that the pages are completely full, which is OK for
1343                                  * tables (since they've presumably not been VACUUMed yet) but
1344                                  * is probably an overestimate for indexes.  Fortunately
1345                                  * get_relation_info() can clamp the overestimate to the
1346                                  * parent table's size.
1347                                  *
1348                                  * Note: this code intentionally disregards alignment
1349                                  * considerations, because (a) that would be gilding the lily
1350                                  * considering how crude the estimate is, and (b) it creates
1351                                  * platform dependencies in the default plans which are kind
1352                                  * of a headache for regression testing.
1353                                  */
1354                                 int32           tuple_width;
1355
1356                                 tuple_width = dbms_stats_get_rel_data_width(rel, attr_widths);
1357                                 tuple_width += sizeof(HeapTupleHeaderData);
1358                                 tuple_width += sizeof(ItemPointerData);
1359                                 /* note: integer division is intentional here */
1360                                 density = (BLCKSZ - SizeOfPageHeaderData) / tuple_width;
1361                         }
1362                         *tuples = rint(density * (double) curpages);
1363
1364                         /*
1365                          * We use relallvisible as-is, rather than scaling it up like we
1366                          * do for the pages and tuples counts, on the theory that any
1367                          * pages added since the last VACUUM are most likely not marked
1368                          * all-visible.  But costsize.c wants it converted to a fraction.
1369                          */
1370                         if (relallvisible == 0 || curpages <= 0)
1371                                 *allvisfrac = 0;
1372                         else if ((double) relallvisible >= curpages)
1373                                 *allvisfrac = 1;
1374                         else
1375                                 *allvisfrac = (double) relallvisible / curpages;
1376                         break;
1377                 case RELKIND_SEQUENCE:
1378                         /* Sequences always have a known size */
1379                         *pages = 1;
1380                         *tuples = 1;
1381                         *allvisfrac = 0;
1382                         break;
1383                 case RELKIND_FOREIGN_TABLE:
1384                         /* Just use whatever's in pg_class */
1385                         *pages = rel->rd_rel->relpages;
1386                         *tuples = rel->rd_rel->reltuples;
1387                         *allvisfrac = 0;
1388                         break;
1389                 default:
1390                         /* else it has no disk storage; probably shouldn't get here? */
1391                         *pages = 0;
1392                         *tuples = 0;
1393                         *allvisfrac = 0;
1394                         break;
1395         }
1396 }
1397
1398 /*
1399  * dbms_stats_get_rel_data_width
1400  *
1401  * Estimate the average width of (the data part of) the relation's tuples.
1402  *
1403  * If attr_widths isn't NULL, it points to the zero-index entry of the
1404  * relation's attr_widths[] cache; use and update that cache as appropriate.
1405  *
1406  * Currently we ignore dropped column_stats_effective.  Ideally those should be included
1407  * in the result, but we haven't got any way to get info about them; and
1408  * since they might be mostly NULLs, treating them as zero-width is not
1409  * necessarily the wrong thing anyway.
1410  *
1411  * Note: This function is copied from plancat.c in core source tree of version
1412  * 9.2, and just renamed.
1413  */
1414 static int32
1415 dbms_stats_get_rel_data_width(Relation rel, int32 *attr_widths)
1416 {
1417         int32           tuple_width = 0;
1418         int                     i;
1419
1420         for (i = 1; i <= RelationGetNumberOfAttributes(rel); i++)
1421         {
1422                 Form_pg_attribute att = rel->rd_att->attrs[i - 1];
1423                 int32           item_width;
1424
1425                 if (att->attisdropped)
1426                         continue;
1427
1428                 /* use previously cached data, if any */
1429                 if (attr_widths != NULL && attr_widths[i] > 0)
1430                 {
1431                         tuple_width += attr_widths[i];
1432                         continue;
1433                 }
1434
1435                 /* This should match set_rel_width() in costsize.c */
1436                 item_width = get_attavgwidth(RelationGetRelid(rel), i);
1437                 if (item_width <= 0)
1438                 {
1439                         item_width = get_typavgwidth(att->atttypid, att->atttypmod);
1440                         Assert(item_width > 0);
1441                 }
1442                 if (attr_widths != NULL)
1443                         attr_widths[i] = item_width;
1444                 tuple_width += item_width;
1445         }
1446
1447         return tuple_width;
1448 }
1449
1450 #ifdef UNIT_TEST
1451 void test_pg_dbms_stats(int *passed, int *total);
1452 static void test_init_rel_stats(int *passed, int *total);
1453 static void test_init_rel_stats_entry(int *passed, int *total);
1454
1455 void
1456 test_pg_dbms_stats(int *passed, int *total)
1457 {
1458         int local_passed = 0;
1459         int local_total = 0;
1460
1461         elog(WARNING, "==========");
1462
1463         /* Do tests here */
1464         test_init_rel_stats(&local_passed, &local_total);
1465         test_init_rel_stats_entry(&local_passed, &local_total);
1466
1467         elog(WARNING, "%s %d/%d passed", __FUNCTION__, local_passed, local_total);
1468         *passed += local_passed;
1469         *total += local_total;
1470 }
1471
1472 static void
1473 test_init_rel_stats_entry(int *passed, int *total)
1474 {
1475         int             caseno = 0;
1476         StatsRelationEntry entry;
1477         
1478         /*
1479          * *-*-1
1480          */
1481         caseno++;
1482         init_rel_stats_entry(&entry, 1234);
1483         if (entry.relid == 1234 &&
1484                 entry.valid == false &&
1485                 entry.relpages == InvalidBlockNumber &&
1486                 entry.reltuples == 0 &&
1487                 entry.relallvisible == InvalidBlockNumber &&
1488                 entry.curpages == InvalidBlockNumber &&
1489                 entry.col_stats == NIL)
1490         {
1491                 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1492                 (*passed)++;
1493         }
1494         else
1495                 elog(WARNING, "%s-%d failed: initialized", __FUNCTION__, caseno);
1496
1497         (*total) += caseno;
1498 }
1499
1500 static void
1501 test_init_rel_stats(int *passed, int *total)
1502 {
1503         int                     caseno = 0;
1504         static HTAB        *org_rel_stats;
1505
1506         /*
1507          * *-*-1
1508          *   - first call
1509          */
1510         caseno++;
1511         init_rel_stats();
1512         if (rel_stats != NULL)
1513         {
1514                 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1515                 (*passed)++;
1516         }
1517         else
1518                 elog(WARNING, "%s-%d failed: rel_stats is NULL", __FUNCTION__, caseno);
1519
1520         /*
1521          * *-*-2
1522          *   - second call
1523          */
1524         caseno++;
1525         org_rel_stats = rel_stats;
1526         init_rel_stats();
1527         if (org_rel_stats == rel_stats)
1528         {
1529                 elog(WARNING, "%s-%d ok", __FUNCTION__, caseno);
1530                 (*passed)++;
1531         }
1532         else
1533                 elog(WARNING, "%s-%d failed: rel_stats changed from %p to %p",
1534                          __FUNCTION__, caseno, org_rel_stats, rel_stats);
1535
1536         *total += caseno;
1537 }
1538 #endif