OSDN Git Service

Add database page inspection /contrib module.
authorBruce Momjian <bruce@momjian.us>
Thu, 17 May 2007 19:11:25 +0000 (19:11 +0000)
committerBruce Momjian <bruce@momjian.us>
Thu, 17 May 2007 19:11:25 +0000 (19:11 +0000)
Simon and Heikki

13 files changed:
contrib/Makefile
contrib/README
contrib/pageinspect/Makefile [new file with mode: 0644]
contrib/pageinspect/README.pageinspect [new file with mode: 0644]
contrib/pageinspect/btreefuncs.c [new file with mode: 0644]
contrib/pageinspect/heapfuncs.c [new file with mode: 0644]
contrib/pageinspect/pageinspect.sql.in [new file with mode: 0644]
contrib/pageinspect/rawpage.c [new file with mode: 0644]
contrib/pageinspect/uninstall_pageinspect.sql [new file with mode: 0644]
contrib/pgstattuple/README.pgstattuple
contrib/pgstattuple/pgstatindex.c
contrib/pgstattuple/pgstattuple.sql.in
contrib/pgstattuple/uninstall_pgstattuple.sql

index 9b2bdc5..dfb65d6 100644 (file)
@@ -1,4 +1,4 @@
-# $PostgreSQL: pgsql/contrib/Makefile,v 1.75 2007/04/21 17:26:17 petere Exp $
+# $PostgreSQL: pgsql/contrib/Makefile,v 1.76 2007/05/17 19:11:24 momjian Exp $
 
 subdir = contrib
 top_builddir = ..
@@ -19,6 +19,7 @@ WANTED_DIRS = \
                lo              \
                ltree           \
                oid2name        \
+               pageinspect     \
                pg_buffercache  \
                pg_freespacemap \
                pg_standby      \
index 7c5034d..5b2167e 100644 (file)
@@ -80,6 +80,10 @@ oid2name -
        Maps numeric files to table names
        by B Palmer <bpalmer@crimelabs.net>
 
+pageinspect -
+       Allows inspection of database pages
+       Heikki Linnakangas <heikki@enterprisedb.com>
+
 pg_buffercache -
        Real time queries on the shared buffer cache
        by Mark Kirkwood <markir@paradise.net.nz>
diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
new file mode 100644 (file)
index 0000000..5222e89
--- /dev/null
@@ -0,0 +1,24 @@
+#-------------------------------------------------------------------------
+#
+# pageinspect Makefile
+#
+# $PostgreSQL: pgsql/contrib/pageinspect/Makefile,v 1.1 2007/05/17 19:11:24 momjian Exp $
+#
+#-------------------------------------------------------------------------
+
+MODULE_big     = pageinspect
+OBJS           = rawpage.o heapfuncs.o btreefuncs.o
+DOCS           = README.pageinspect
+DATA_built     = pageinspect.sql
+DATA           = uninstall_pageinspect.sql
+
+ifdef USE_PGXS
+PGXS := $(shell pg_config --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pageinspect
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
+
diff --git a/contrib/pageinspect/README.pageinspect b/contrib/pageinspect/README.pageinspect
new file mode 100644 (file)
index 0000000..c9af5cd
--- /dev/null
@@ -0,0 +1,94 @@
+The functions in this module allow you to inspect the contents of data pages
+at a low level, for debugging purposes.
+
+1. Installation
+
+    $ make
+    $ make install
+    $ psql -e -f /usr/local/pgsql/share/contrib/pageinspect.sql test
+
+2. Functions included:
+
+    get_raw_page
+    ------------
+    get_raw_page reads one block of the named table and returns a copy as a
+    bytea field. This allows a single time-consistent copy of the block to be
+    made. Use of this functions is restricted to superusers.
+
+    page_header
+    -----------
+    page_header shows fields which are common to all PostgreSQL heap and index
+    pages. Use of this function is restricted to superusers.
+
+    A page image obtained with get_raw_page should be passed as argument:
+
+        test=# SELECT * FROM page_header(get_raw_page('pg_class',0));
+           lsn    | tli | flags | lower | upper | special | pagesize | version
+        ----------+-----+-------+-------+-------+---------+----------+---------
+         0/3C5614 |   1 |     1 |   216 |   256 |    8192 |     8192 |       4
+        (1 row)
+
+    The returned columns correspond to the fields in the PageHeaderData-struct,
+    see src/include/storage/bufpage.h for more details.
+
+    heap_page_items
+    ---------------
+    heap_page_items shows all line pointers on a heap page.  For those line
+    pointers that are in use, tuple headers are also shown. All tuples are
+    shown, whether or not the tuples were visible to an MVCC snapshot at the
+    time the raw page was copied. Use of this function is restricted to
+    superusers.
+
+    A heap page image obtained with get_raw_page should be passed as argument:
+
+        test=# SELECT * FROM heap_page_items(get_raw_page('pg_class',0));
+
+    See src/include/storage/itemid.h and src/include/access/htup.h for
+    explanations of the fields returned.
+
+    bt_metap
+    --------
+    bt_metap() returns information about the btree index metapage:
+
+        test=> SELECT * FROM bt_metap('pg_cast_oid_index');
+        -[ RECORD 1 ]-----
+        magic     | 340322
+        version   | 2
+        root      | 1
+        level     | 0
+        fastroot  | 1
+        fastlevel | 0
+
+    bt_page_stats
+    -------------
+    bt_page_stats() shows information about single btree pages:
+
+        test=> SELECT * FROM bt_page_stats('pg_cast_oid_index', 1);
+        -[ RECORD 1 ]-+-----
+        blkno         | 1
+        type          | l
+        live_items    | 256
+        dead_items    | 0
+        avg_item_size | 12
+        page_size     | 8192
+        free_size     | 4056
+        btpo_prev     | 0
+        btpo_next     | 0
+        btpo          | 0
+        btpo_flags    | 3
+
+    bt_page_items
+    -------------
+    bt_page_items() returns information about specific items on btree pages:
+
+        test=> SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
+         itemoffset |  ctid   | itemlen | nulls | vars |    data
+        ------------+---------+---------+-------+------+-------------
+                  1 | (0,1)   |      12 | f     | f    | 23 27 00 00
+                  2 | (0,2)   |      12 | f     | f    | 24 27 00 00
+                  3 | (0,3)   |      12 | f     | f    | 25 27 00 00
+                  4 | (0,4)   |      12 | f     | f    | 26 27 00 00
+                  5 | (0,5)   |      12 | f     | f    | 27 27 00 00
+                  6 | (0,6)   |      12 | f     | f    | 28 27 00 00
+                  7 | (0,7)   |      12 | f     | f    | 29 27 00 00
+                  8 | (0,8)   |      12 | f     | f    | 2a 27 00 00
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
new file mode 100644 (file)
index 0000000..9b392d0
--- /dev/null
@@ -0,0 +1,499 @@
+/*
+ * btreefuncs.c
+ *
+ * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
+ *
+ * Permission to use, copy, modify, and distribute this software and
+ * its documentation for any purpose, without fee, and without a
+ * written agreement is hereby granted, provided that the above
+ * copyright notice and this paragraph and the following two
+ * paragraphs appear in all copies.
+ *
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
+ * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+ * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+ * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
+ * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
+ * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+#include "access/heapam.h"
+#include "access/itup.h"
+#include "access/nbtree.h"
+#include "access/transam.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "utils/inval.h"
+
+PG_FUNCTION_INFO_V1(bt_metap);
+PG_FUNCTION_INFO_V1(bt_page_items);
+PG_FUNCTION_INFO_V1(bt_page_stats);
+
+extern Datum bt_metap(PG_FUNCTION_ARGS);
+extern Datum bt_page_items(PG_FUNCTION_ARGS);
+extern Datum bt_page_stats(PG_FUNCTION_ARGS);
+
+#define BTMETAP_TYPE "public.bt_metap_type"
+#define BTMETAP_NCOLUMNS 6
+
+#define BTPAGEITEMS_TYPE "public.bt_page_items_type"
+#define BTPAGEITEMS_NCOLUMNS 6
+
+#define BTPAGESTATS_TYPE "public.bt_page_stats_type"
+#define BTPAGESTATS_NCOLUMNS 11
+
+
+#define IS_INDEX(r) ((r)->rd_rel->relkind == 'i')
+#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
+
+#define CHECK_PAGE_OFFSET_RANGE(page, offset) { \
+               if ( !(FirstOffsetNumber<=(offset) && \
+                                               (offset)<=PageGetMaxOffsetNumber(page)) ) \
+                        elog(ERROR, "Page offset number out of range."); }
+
+#define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
+               if ( (blkno)<0 && RelationGetNumberOfBlocks((rel))<=(blkno) ) \
+                        elog(ERROR, "Block number out of range."); }
+
+/* ------------------------------------------------
+ * structure for single btree page statistics
+ * ------------------------------------------------
+ */
+typedef struct BTPageStat
+{
+       uint32          blkno;
+       uint32          live_items;
+       uint32          dead_items;
+       uint32          page_size;
+       uint32          max_avail;
+       uint32          free_size;
+       uint32          avg_item_size;
+       char            type;
+
+       /* opaque data */
+       BlockNumber btpo_prev;
+       BlockNumber btpo_next;
+       union
+       {
+               uint32          level;
+               TransactionId xact;
+       }                       btpo;
+       uint16          btpo_flags;
+       BTCycleId       btpo_cycleid;
+}      BTPageStat;
+
+/* ------------------------------------------------
+ * A structure for a whole btree index statistics
+ * used by pgstatindex().
+ * ------------------------------------------------
+ */
+typedef struct BTIndexStat
+{
+       uint32          magic;
+       uint32          version;
+       BlockNumber root_blkno;
+       uint32          level;
+
+       BlockNumber fastroot;
+       uint32          fastlevel;
+
+       uint32          live_items;
+       uint32          dead_items;
+
+       uint32          root_pages;
+       uint32          internal_pages;
+       uint32          leaf_pages;
+       uint32          empty_pages;
+       uint32          deleted_pages;
+
+       uint32          page_size;
+       uint32          avg_item_size;
+
+       uint32          max_avail;
+       uint32          free_space;
+}      BTIndexStat;
+
+
+/* -------------------------------------------------
+ * GetBTPageStatistics()
+ *
+ * Collect statistics of single b-tree leaf page
+ * -------------------------------------------------
+ */
+static void
+GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat)
+{
+       Page            page = BufferGetPage(buffer);
+       PageHeader      phdr = (PageHeader) page;
+       OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
+       BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
+       int                     item_size = 0;
+       int                     off;
+
+       stat->blkno = blkno;
+
+       stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
+
+       stat->dead_items = stat->live_items = 0;
+
+       stat->page_size = PageGetPageSize(page);
+
+       /* page type (flags) */
+       if (P_ISDELETED(opaque))
+       {
+               stat->type = 'd';
+               stat->btpo.xact = opaque->btpo.xact;
+               return;
+       }
+       else if (P_IGNORE(opaque))
+               stat->type = 'e';
+       else if (P_ISLEAF(opaque))
+               stat->type = 'l';
+       else if (P_ISROOT(opaque))
+               stat->type = 'r';
+       else
+               stat->type = 'i';
+
+       /* btpage opaque data */
+       stat->btpo_prev = opaque->btpo_prev;
+       stat->btpo_next = opaque->btpo_next;
+       stat->btpo.level = opaque->btpo.level;
+       stat->btpo_flags = opaque->btpo_flags;
+       stat->btpo_cycleid = opaque->btpo_cycleid;
+
+       /* count live and dead tuples, and free space */
+       for (off = FirstOffsetNumber; off <= maxoff; off++)
+       {
+               IndexTuple      itup;
+
+               ItemId          id = PageGetItemId(page, off);
+
+               itup = (IndexTuple) PageGetItem(page, id);
+
+               item_size += IndexTupleSize(itup);
+
+               if (!ItemIdDeleted(id))
+                       stat->live_items++;
+               else
+                       stat->dead_items++;
+       }
+       stat->free_size = PageGetFreeSpace(page);
+
+       if ((stat->live_items + stat->dead_items) > 0)
+               stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
+       else
+               stat->avg_item_size = 0;
+}
+
+/* -----------------------------------------------
+ * bt_page()
+ *
+ * Usage: SELECT * FROM bt_page('t1_pkey', 0);
+ * -----------------------------------------------
+ */
+Datum
+bt_page_stats(PG_FUNCTION_ARGS)
+{
+       text       *relname = PG_GETARG_TEXT_P(0);
+       uint32          blkno = PG_GETARG_UINT32(1);
+       Buffer          buffer;
+
+       Relation        rel;
+       RangeVar   *relrv;
+       Datum           result;
+
+       relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+       rel = relation_openrv(relrv, AccessShareLock);
+
+       CHECK_RELATION_BLOCK_RANGE(rel, blkno);
+
+       buffer = ReadBuffer(rel, blkno);
+
+       if (!IS_INDEX(rel) || !IS_BTREE(rel))
+               elog(ERROR, "bt_page_stats() can be used only on b-tree index.");
+
+       if (blkno == 0)
+               elog(ERROR, "Block 0 is a meta page.");
+
+       {
+               HeapTuple       tuple;
+               TupleDesc       tupleDesc;
+               int                     j;
+               char       *values[BTPAGESTATS_NCOLUMNS];
+
+               BTPageStat      stat;
+
+               /* keep compiler quiet */
+               stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
+               stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
+
+               GetBTPageStatistics(blkno, buffer, &stat);
+
+               tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE);
+
+               j = 0;
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.blkno);
+
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%c", stat.type);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.live_items);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.dead_items);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.avg_item_size);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.page_size);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.free_size);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.btpo_prev);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.btpo_next);
+
+               values[j] = palloc(32);
+               if (stat.type == 'd')
+                       snprintf(values[j++], 32, "%d", stat.btpo.xact);
+               else
+                       snprintf(values[j++], 32, "%d", stat.btpo.level);
+
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", stat.btpo_flags);
+
+               tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
+                                                                          values);
+
+               result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
+       }
+
+       ReleaseBuffer(buffer);
+
+       relation_close(rel, AccessShareLock);
+
+       PG_RETURN_DATUM(result);
+}
+
+/*-------------------------------------------------------
+ * bt_page_items()
+ *
+ * Get IndexTupleData set in a leaf page
+ *
+ * Usage: SELECT * FROM bt_page_items('t1_pkey', 0);
+ *-------------------------------------------------------
+ */
+/* ---------------------------------------------------
+ * data structure for SRF to hold a scan information
+ * ---------------------------------------------------
+ */
+struct user_args
+{
+       TupleDesc       tupd;
+       Relation        rel;
+       Buffer          buffer;
+       Page            page;
+       uint16          offset;
+};
+
+Datum
+bt_page_items(PG_FUNCTION_ARGS)
+{
+       text       *relname = PG_GETARG_TEXT_P(0);
+       uint32          blkno = PG_GETARG_UINT32(1);
+
+       RangeVar   *relrv;
+       Datum           result;
+       char       *values[BTPAGEITEMS_NCOLUMNS];
+       BTPageOpaque opaque;
+       HeapTuple       tuple;
+       ItemId          id;
+
+       FuncCallContext *fctx;
+       MemoryContext mctx;
+       struct user_args *uargs = NULL;
+
+       if (blkno == 0)
+               elog(ERROR, "Block 0 is a meta page.");
+
+       if (SRF_IS_FIRSTCALL())
+       {
+               fctx = SRF_FIRSTCALL_INIT();
+               mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+               uargs = palloc(sizeof(struct user_args));
+
+               uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE);
+               uargs->offset = FirstOffsetNumber;
+
+               relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+               uargs->rel = relation_openrv(relrv, AccessShareLock);
+
+               CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno);
+
+               uargs->buffer = ReadBuffer(uargs->rel, blkno);
+
+               if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel))
+                       elog(ERROR, "bt_page_items() can be used only on b-tree index.");
+
+               uargs->page = BufferGetPage(uargs->buffer);
+
+               opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
+
+               if (P_ISDELETED(opaque))
+                       elog(NOTICE, "bt_page_items(): this page is deleted.");
+
+               fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
+               fctx->user_fctx = uargs;
+
+               MemoryContextSwitchTo(mctx);
+       }
+
+       fctx = SRF_PERCALL_SETUP();
+       uargs = fctx->user_fctx;
+
+       if (fctx->call_cntr < fctx->max_calls)
+       {
+               IndexTuple      itup;
+
+               id = PageGetItemId(uargs->page, uargs->offset);
+
+               if (!ItemIdIsValid(id))
+                       elog(ERROR, "Invalid ItemId.");
+
+               itup = (IndexTuple) PageGetItem(uargs->page, id);
+
+               {
+                       int                     j = 0;
+
+                       BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid));
+
+                       values[j] = palloc(32);
+                       snprintf(values[j++], 32, "%d", uargs->offset);
+                       values[j] = palloc(32);
+                       snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid);
+                       values[j] = palloc(32);
+                       snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
+                       values[j] = palloc(32);
+                       snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
+                       values[j] = palloc(32);
+                       snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
+
+                       {
+                               int                     off;
+                               char       *dump;
+                               char       *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
+
+                               dump = palloc(IndexTupleSize(itup) * 3);
+                               memset(dump, 0, IndexTupleSize(itup) * 3);
+
+                               for (off = 0;
+                                        off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
+                                        off++)
+                               {
+                                       if (dump[0] == '\0')
+                                               sprintf(dump, "%02x", *(ptr + off) & 0xff);
+                                       else
+                                       {
+                                               char            buf[4];
+
+                                               sprintf(buf, " %02x", *(ptr + off) & 0xff);
+                                               strcat(dump, buf);
+                                       }
+                               }
+                               values[j] = dump;
+                       }
+
+                       tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values);
+                       result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple);
+               }
+
+               uargs->offset = uargs->offset + 1;
+
+               SRF_RETURN_NEXT(fctx, result);
+       }
+       else
+       {
+               ReleaseBuffer(uargs->buffer);
+               relation_close(uargs->rel, AccessShareLock);
+
+               SRF_RETURN_DONE(fctx);
+       }
+}
+
+
+/* ------------------------------------------------
+ * bt_metap()
+ *
+ * Get a btree meta-page information
+ *
+ * Usage: SELECT * FROM bt_metap('t1_pkey')
+ * ------------------------------------------------
+ */
+Datum
+bt_metap(PG_FUNCTION_ARGS)
+{
+       text       *relname = PG_GETARG_TEXT_P(0);
+       Buffer          buffer;
+
+       Relation        rel;
+       RangeVar   *relrv;
+       Datum           result;
+
+       relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+       rel = relation_openrv(relrv, AccessShareLock);
+
+       if (!IS_INDEX(rel) || !IS_BTREE(rel))
+               elog(ERROR, "bt_metap() can be used only on b-tree index.");
+
+       buffer = ReadBuffer(rel, 0);
+
+       {
+               BTMetaPageData *metad;
+
+               TupleDesc       tupleDesc;
+               int                     j;
+               char       *values[BTMETAP_NCOLUMNS];
+               HeapTuple       tuple;
+
+               Page            page = BufferGetPage(buffer);
+
+               metad = BTPageGetMeta(page);
+
+               tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE);
+
+               j = 0;
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_magic);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_version);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_root);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_level);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_fastroot);
+               values[j] = palloc(32);
+               snprintf(values[j++], 32, "%d", metad->btm_fastlevel);
+
+               tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
+                                                                          values);
+
+               result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
+       }
+
+       ReleaseBuffer(buffer);
+
+       relation_close(rel, AccessShareLock);
+
+       PG_RETURN_DATUM(result);
+}
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644 (file)
index 0000000..abf50de
--- /dev/null
@@ -0,0 +1,229 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapfuncs.c
+ *       Functions to investigate heap pages
+ *
+ * We check the input to these functions for corrupt pointers etc. that
+ * might cause crashes, but at the same time we try to print out as much
+ * information as possible, even if it's nonsense. That's because if a
+ * page is corrupt, we don't know why and how exactly it is corrupt, so we
+ * let the user to judge it.
+ * 
+ * These functions are restricted to superusers for the fear of introducing
+ * security holes if the input checking isn't as water-tight as it should. 
+ * You'd need to be superuser to obtain a raw page image anyway, so 
+ * there's hardly any use case for using these without superuser-rights
+ * anyway.
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/contrib/pageinspect/heapfuncs.c,v 1.1 2007/05/17 19:11:24 momjian Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+#include "access/heapam.h"
+#include "access/transam.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "miscadmin.h"
+
+Datum heap_page_items(PG_FUNCTION_ARGS);
+
+#define GET_TEXT(str_) \
+        DirectFunctionCall1(textin, CStringGetDatum(str_))
+
+/*
+ * bits_to_text
+ *
+ * Converts a bits8-array of 'len' bits to a human-readable
+ * c-string representation.
+ */
+static char *
+bits_to_text(bits8 *bits, int len)
+{
+       int i;
+       char *str;
+
+       str = palloc(len + 1);
+       
+       for(i = 0; i < len; i++)
+               str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
+
+       str[i] = '\0';
+
+       return str;
+}
+
+
+/*
+ * heap_page_items
+ *
+ * Allows inspection of line pointers and tuple headers of a heap page.
+ */
+PG_FUNCTION_INFO_V1(heap_page_items);
+
+typedef struct heap_page_items_state
+{
+       TupleDesc       tupd;
+       Page            page;
+       uint16          offset;
+} heap_page_items_state;
+
+Datum
+heap_page_items(PG_FUNCTION_ARGS)
+{
+       bytea  *raw_page = PG_GETARG_BYTEA_P(0);
+       heap_page_items_state *inter_call_data = NULL;
+       FuncCallContext *fctx;
+       int             raw_page_size;
+
+       if (!superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to use raw page functions"))));
+
+       raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+       if (SRF_IS_FIRSTCALL())
+       {
+               TupleDesc       tupdesc;
+               MemoryContext mctx;
+
+               if(raw_page_size < SizeOfPageHeaderData)
+                       ereport(ERROR, 
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("input page too small (%d bytes)", raw_page_size)));
+
+               fctx = SRF_FIRSTCALL_INIT();
+               mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+               inter_call_data = palloc(sizeof(heap_page_items_state));
+
+               /* Build a tuple descriptor for our result type */
+               if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+                       elog(ERROR, "return type must be a row type");
+
+               inter_call_data->tupd = tupdesc;
+
+               inter_call_data->offset = FirstOffsetNumber;
+               inter_call_data->page = VARDATA(raw_page);
+
+               fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
+               fctx->user_fctx = inter_call_data;
+
+               MemoryContextSwitchTo(mctx);
+       }
+
+       fctx = SRF_PERCALL_SETUP();
+       inter_call_data = fctx->user_fctx;
+
+       if (fctx->call_cntr < fctx->max_calls)
+       {
+               Page            page = inter_call_data->page;
+               HeapTuple       resultTuple;
+               Datum           result;
+               ItemId          id;
+               Datum           values[13];
+               bool            nulls[13];
+               uint16          lp_offset;
+               uint16          lp_flags;
+               uint16          lp_len;
+
+               memset(nulls, 0, sizeof(nulls));
+
+               /* Extract information from the line pointer */
+               
+               id = PageGetItemId(page, inter_call_data->offset);
+
+               lp_offset       = ItemIdGetOffset(id);
+               lp_flags        = ItemIdGetFlags(id);
+               lp_len          = ItemIdGetLength(id);
+
+               values[0] = UInt16GetDatum(inter_call_data->offset);
+               values[1] = UInt16GetDatum(lp_offset);
+               values[2] = UInt16GetDatum(lp_flags);
+               values[3] = UInt16GetDatum(lp_len);
+
+               /* We do just enough validity checking to make sure we don't 
+                * reference data outside the page passed to us. The page
+                * could be corrupt in many other ways, but at least we won't 
+                * crash.
+                */
+               if ((lp_len >= sizeof(HeapTupleHeader)) &&
+                       (lp_offset == MAXALIGN(lp_offset)) &&
+                       (lp_offset + lp_len <= raw_page_size) &&
+                       ItemIdIsUsed(id))
+               {
+                       HeapTupleHeader tuphdr;
+                       int                             bits_len;
+
+                       /* Extract infromation from the tuple header */
+
+                       tuphdr = (HeapTupleHeader) PageGetItem(page, id);
+               
+                       values[4] = UInt32GetDatum(HeapTupleHeaderGetXmin(tuphdr));
+                       values[5] = UInt32GetDatum(HeapTupleHeaderGetXmax(tuphdr));
+                       values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); /* shared with xvac */
+                       values[7] = PointerGetDatum(&tuphdr->t_ctid);
+                       values[8] = UInt16GetDatum(tuphdr->t_infomask2);
+                       values[9] = UInt16GetDatum(tuphdr->t_infomask);
+                       values[10] = UInt8GetDatum(tuphdr->t_hoff);
+
+                       /* We already checked that the item as is completely within
+                        * the raw page passed to us, with the length given in the line
+                        * pointer.. Let's check that t_hoff doesn't point over lp_len,
+                        * before using it to access t_bits and oid.
+                        */
+                       if (tuphdr->t_hoff >= sizeof(HeapTupleHeader) && 
+                               tuphdr->t_hoff <= lp_len)
+                       {
+                               if (tuphdr->t_infomask & HEAP_HASNULL)
+                               {
+                                       bits_len = tuphdr->t_hoff - 
+                                               (((char *)tuphdr->t_bits) - ((char *)tuphdr));
+
+                                       values[11] = GET_TEXT(
+                                               bits_to_text(tuphdr->t_bits, bits_len * 8));
+                               } 
+                               else
+                                       nulls[11] = true;
+
+                               if (tuphdr->t_infomask & HEAP_HASOID)
+                                       values[12] = HeapTupleHeaderGetOid(tuphdr);
+                               else
+                                       nulls[12] = true;
+                       }
+                       else
+                       {
+                               nulls[11] = true;
+                               nulls[12] = true;
+                       }
+               }
+               else
+               {
+                       /* The line pointer is not used, or it's invalid. Set the rest of
+                        * the fields to NULL */
+                       int i;
+
+                       for(i = 4; i <= 12; i++)
+                               nulls[i] = true;
+               }
+
+        /* Build and return the result tuple. */
+        resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
+        result = HeapTupleGetDatum(resultTuple);
+
+               inter_call_data->offset++;
+
+               SRF_RETURN_NEXT(fctx, result);
+       }
+       else
+               SRF_RETURN_DONE(fctx);
+}
diff --git a/contrib/pageinspect/pageinspect.sql.in b/contrib/pageinspect/pageinspect.sql.in
new file mode 100644 (file)
index 0000000..3bd2825
--- /dev/null
@@ -0,0 +1,109 @@
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+--
+-- get_raw_page()
+--
+CREATE OR REPLACE FUNCTION get_raw_page(text, int4)
+RETURNS bytea
+AS 'MODULE_PATHNAME', 'get_raw_page'
+LANGUAGE C STRICT;
+
+--
+-- page_header()
+--
+CREATE TYPE page_header_type AS (
+       lsn text,
+       tli smallint,
+       flags smallint,
+       lower smallint,
+       upper smallint,
+       special smallint,
+       pagesize smallint,
+       version smallint
+);
+
+CREATE OR REPLACE FUNCTION page_header(bytea)
+RETURNS page_header_type
+AS 'MODULE_PATHNAME', 'page_header'
+LANGUAGE C STRICT;
+
+--
+-- heap_page_items()
+--
+CREATE TYPE heap_page_items_type AS (
+       lp smallint,
+       lp_off smallint,
+       lp_flags smallint,
+       lp_len smallint,
+       t_xmin xid,
+       t_xmax xid,
+       t_field3 int4,
+       t_ctid tid,
+       t_infomask2 smallint,
+       t_infomask smallint,
+       t_hoff smallint,
+       t_bits text,
+       t_oid oid
+);
+
+CREATE OR REPLACE FUNCTION heap_page_items(bytea)
+RETURNS SETOF heap_page_items_type
+AS 'MODULE_PATHNAME', 'heap_page_items'
+LANGUAGE C STRICT;
+
+--
+-- bt_metap()
+--
+CREATE TYPE bt_metap_type AS (
+  magic int4,
+  version int4,
+  root int4,
+  level int4,
+  fastroot int4,
+  fastlevel int4
+);
+
+CREATE OR REPLACE FUNCTION bt_metap(text)
+RETURNS bt_metap_type
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE 'C' STRICT;
+
+--
+-- bt_page_stats()
+--
+CREATE TYPE bt_page_stats_type AS (
+  blkno int4,
+  type char,
+  live_items int4,
+  dead_items int4,
+  avg_item_size float,
+  page_size int4,
+  free_size int4,
+  btpo_prev int4,
+  btpo_next int4,
+  btpo int4,
+  btpo_flags int4
+);
+
+CREATE OR REPLACE FUNCTION bt_page_stats(text, int4)
+RETURNS bt_page_stats_type
+AS 'MODULE_PATHNAME', 'bt_page_stats'
+LANGUAGE 'C' STRICT;
+
+--
+-- bt_page_items()
+--
+CREATE TYPE bt_page_items_type AS (
+  itemoffset smallint,
+  ctid tid,
+  itemlen smallint,
+  nulls bool,
+  vars bool,
+  data text
+);
+
+CREATE OR REPLACE FUNCTION bt_page_items(text, int4)
+RETURNS SETOF bt_page_items_type
+AS 'MODULE_PATHNAME', 'bt_page_items'
+LANGUAGE 'C' STRICT;
diff --git a/contrib/pageinspect/rawpage.c b/contrib/pageinspect/rawpage.c
new file mode 100644 (file)
index 0000000..4aba08e
--- /dev/null
@@ -0,0 +1,164 @@
+/*-------------------------------------------------------------------------
+ *
+ * rawpage.c
+ *       Functions to extract a raw page as bytea and inspect it
+ *
+ * Access-method specific inspection functions are in separate files.
+ *
+ * Copyright (c) 2007, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *       $PostgreSQL: pgsql/contrib/pageinspect/rawpage.c,v 1.1 2007/05/17 19:11:24 momjian Exp $
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "funcapi.h"
+#include "access/heapam.h"
+#include "access/transam.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_type.h"
+#include "utils/builtins.h"
+#include "miscadmin.h"
+
+PG_MODULE_MAGIC;
+
+Datum get_raw_page(PG_FUNCTION_ARGS);
+Datum page_header(PG_FUNCTION_ARGS);
+
+/*
+ * get_raw_page
+ *
+ * Returns a copy of a page from shared buffers as a bytea
+ */
+PG_FUNCTION_INFO_V1(get_raw_page);
+
+Datum
+get_raw_page(PG_FUNCTION_ARGS)
+{
+       text       *relname = PG_GETARG_TEXT_P(0);
+       uint32          blkno = PG_GETARG_UINT32(1);
+
+       Relation        rel;
+       RangeVar        *relrv;
+       bytea           *raw_page;
+       char            *raw_page_data;
+       Buffer          buf;
+
+       if (!superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to use raw functions"))));
+
+       relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
+       rel = relation_openrv(relrv, AccessShareLock);
+
+       /* Check that this relation has storage */
+       if (rel->rd_rel->relkind == RELKIND_VIEW)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("cannot get raw page from view \"%s\"",
+                                                       RelationGetRelationName(rel))));
+       if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("cannot get raw page from composite type \"%s\"",
+                                                       RelationGetRelationName(rel))));
+
+       if (blkno >= RelationGetNumberOfBlocks(rel))
+               elog(ERROR, "block number %u is out of range for relation \"%s\"",
+                        blkno, RelationGetRelationName(rel));
+
+       /* Initialize buffer to copy to */
+       raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ);
+       SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ);
+       raw_page_data = VARDATA(raw_page);
+
+       /* Take a verbatim copy of the page */
+
+       buf = ReadBuffer(rel, blkno);
+       LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+       memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ);
+
+       LockBuffer(buf, BUFFER_LOCK_UNLOCK);
+       ReleaseBuffer(buf);
+
+       relation_close(rel, AccessShareLock);
+
+       PG_RETURN_BYTEA_P(raw_page);
+}
+
+/*
+ * page_header
+ *
+ * Allows inspection of page header fields of a raw page
+ */
+
+PG_FUNCTION_INFO_V1(page_header);
+
+Datum
+page_header(PG_FUNCTION_ARGS)
+{
+       bytea      *raw_page = PG_GETARG_BYTEA_P(0);
+       int                     raw_page_size;
+
+       TupleDesc       tupdesc;
+
+       Datum           result;
+       HeapTuple       tuple;
+       Datum           values[8];
+       bool            nulls[8];
+
+       PageHeader      page;
+       XLogRecPtr      lsn;
+       char            lsnchar[64];
+
+       if (!superuser())
+               ereport(ERROR,
+                               (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+                                (errmsg("must be superuser to use raw page functions"))));
+
+       raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+       /*
+        * Check that enough data was supplied, so that we don't try to access 
+        * fields outside the supplied buffer. 
+        */
+       if(raw_page_size < sizeof(PageHeaderData))
+               ereport(ERROR, 
+                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                         errmsg("input page too small (%d bytes)", raw_page_size)));
+
+       page = (PageHeader) VARDATA(raw_page);
+
+       /* Build a tuple descriptor for our result type */
+       if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+               elog(ERROR, "return type must be a row type");
+
+       /* Extract information from the page header */
+
+       lsn = PageGetLSN(page);
+       snprintf(lsnchar, sizeof(lsnchar), "%X/%X", lsn.xlogid, lsn.xrecoff);
+
+       values[0] = DirectFunctionCall1(textin, CStringGetDatum(lsnchar));
+       values[1] = UInt16GetDatum(PageGetTLI(page));
+       values[2] = UInt16GetDatum(page->pd_flags);
+       values[3] = UInt16GetDatum(page->pd_lower);
+       values[4] = UInt16GetDatum(page->pd_upper);
+       values[5] = UInt16GetDatum(page->pd_special);
+       values[6] = UInt16GetDatum(PageGetPageSize(page));
+       values[7] = UInt16GetDatum(PageGetPageLayoutVersion(page));
+
+    /* Build and return the tuple. */
+
+       memset(nulls, 0, sizeof(nulls));
+
+    tuple = heap_form_tuple(tupdesc, values, nulls);
+    result = HeapTupleGetDatum(tuple);
+
+       PG_RETURN_DATUM(result);
+}
diff --git a/contrib/pageinspect/uninstall_pageinspect.sql b/contrib/pageinspect/uninstall_pageinspect.sql
new file mode 100644 (file)
index 0000000..ecf9095
--- /dev/null
@@ -0,0 +1,20 @@
+-- Adjust this setting to control where the objects get created.
+SET search_path = public;
+
+DROP FUNCTION get_raw_page(text, int4);
+
+DROP FUNCTION page_header(bytea);
+DROP TYPE page_header_type;
+
+DROP FUNCTION heap_page_items(bytea);
+DROP TYPE heap_page_items_type;
+
+DROP FUNCTION bt_metap(text);
+DROP TYPE bt_metap_type;
+
+DROP FUNCTION bt_page_stats(text, int4);
+DROP TYPE bt_page_stats_type;
+
+DROP FUNCTION bt_page_items(text, int4);
+DROP TYPE bt_page_items_type;
+
index 235de72..c47f6ad 100644 (file)
@@ -56,53 +56,6 @@ pgstattuple README                   2002/08/29 Tatsuo Ishii
         avg_leaf_density   | 50.27
         leaf_fragmentation | 0
 
-    bt_metap
-    --------
-    bt_metap() returns information about the btree index metapage:
-
-        test=> SELECT * FROM bt_metap('pg_cast_oid_index');
-        -[ RECORD 1 ]-----
-        magic     | 340322
-        version   | 2
-        root      | 1
-        level     | 0
-        fastroot  | 1
-        fastlevel | 0
-
-    bt_page_stats
-    -------------
-    bt_page_stats() shows information about single btree pages:
-
-        test=> SELECT * FROM bt_page_stats('pg_cast_oid_index', 1);
-        -[ RECORD 1 ]-+-----
-        blkno         | 1
-        type          | l
-        live_items    | 256
-        dead_items    | 0
-        avg_item_size | 12
-        page_size     | 8192
-        free_size     | 4056
-        btpo_prev     | 0
-        btpo_next     | 0
-        btpo          | 0
-        btpo_flags    | 3
-
-    bt_page_items
-    -------------
-    bt_page_items() returns information about specific items on btree pages:
-
-        test=> SELECT * FROM bt_page_items('pg_cast_oid_index', 1);
-         itemoffset |  ctid   | itemlen | nulls | vars |    data
-        ------------+---------+---------+-------+------+-------------
-                  1 | (0,1)   |      12 | f     | f    | 23 27 00 00
-                  2 | (0,2)   |      12 | f     | f    | 24 27 00 00
-                  3 | (0,3)   |      12 | f     | f    | 25 27 00 00
-                  4 | (0,4)   |      12 | f     | f    | 26 27 00 00
-                  5 | (0,5)   |      12 | f     | f    | 27 27 00 00
-                  6 | (0,6)   |      12 | f     | f    | 28 27 00 00
-                  7 | (0,7)   |      12 | f     | f    | 29 27 00 00
-                  8 | (0,8)   |      12 | f     | f    | 2a 27 00 00
-
 
 2. Installing pgstattuple
 
@@ -140,6 +93,10 @@ pgstattuple README                  2002/08/29 Tatsuo Ishii
 
 5. History
 
+    2007/05/17
+
+       Moved page-level functions to contrib/pageinspect
+
     2006/06/28
 
        Extended to work against indexes.
index 2982cea..838fd9e 100644 (file)
 #include "utils/inval.h"
 
 PG_FUNCTION_INFO_V1(pgstatindex);
-PG_FUNCTION_INFO_V1(bt_metap);
-PG_FUNCTION_INFO_V1(bt_page_items);
-PG_FUNCTION_INFO_V1(bt_page_stats);
 PG_FUNCTION_INFO_V1(pg_relpages);
 
 extern Datum pgstatindex(PG_FUNCTION_ARGS);
-extern Datum bt_metap(PG_FUNCTION_ARGS);
-extern Datum bt_page_items(PG_FUNCTION_ARGS);
-extern Datum bt_page_stats(PG_FUNCTION_ARGS);
 extern Datum pg_relpages(PG_FUNCTION_ARGS);
 
 #define PGSTATINDEX_TYPE "public.pgstatindex_type"
 #define PGSTATINDEX_NCOLUMNS 10
 
-#define BTMETAP_TYPE "public.bt_metap_type"
-#define BTMETAP_NCOLUMNS 6
-
-#define BTPAGEITEMS_TYPE "public.bt_page_items_type"
-#define BTPAGEITEMS_NCOLUMNS 6
-
-#define BTPAGESTATS_TYPE "public.bt_page_stats_type"
-#define BTPAGESTATS_NCOLUMNS 11
-
-
 #define IS_INDEX(r) ((r)->rd_rel->relkind == 'i')
 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
 
@@ -73,150 +57,28 @@ extern Datum pg_relpages(PG_FUNCTION_ARGS);
                         elog(ERROR, "Block number out of range."); }
 
 /* ------------------------------------------------
- * structure for single btree page statistics
- * ------------------------------------------------
- */
-typedef struct BTPageStat
-{
-       uint32          blkno;
-       uint32          live_items;
-       uint32          dead_items;
-       uint32          page_size;
-       uint32          max_avail;
-       uint32          free_size;
-       uint32          avg_item_size;
-       uint32          fragments;
-       char            type;
-
-       /* opaque data */
-       BlockNumber btpo_prev;
-       BlockNumber btpo_next;
-       union
-       {
-               uint32          level;
-               TransactionId xact;
-       }                       btpo;
-       uint16          btpo_flags;
-       BTCycleId       btpo_cycleid;
-}      BTPageStat;
-
-/* ------------------------------------------------
  * A structure for a whole btree index statistics
  * used by pgstatindex().
  * ------------------------------------------------
  */
 typedef struct BTIndexStat
 {
-       uint32          magic;
        uint32          version;
        BlockNumber root_blkno;
        uint32          level;
 
-       BlockNumber fastroot;
-       uint32          fastlevel;
-
-       uint32          live_items;
-       uint32          dead_items;
-
        uint32          root_pages;
        uint32          internal_pages;
        uint32          leaf_pages;
        uint32          empty_pages;
        uint32          deleted_pages;
 
-       uint32          page_size;
-       uint32          avg_item_size;
-
        uint32          max_avail;
        uint32          free_space;
 
        uint32          fragments;
 }      BTIndexStat;
 
-/* -------------------------------------------------
- * GetBTPageStatistics()
- *
- * Collect statistics of single b-tree leaf page
- * -------------------------------------------------
- */
-static void
-GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat * stat)
-{
-       Page            page = BufferGetPage(buffer);
-       PageHeader      phdr = (PageHeader) page;
-       OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
-       BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
-       int                     item_size = 0;
-       int                     off;
-
-       stat->blkno = blkno;
-
-       stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
-
-       stat->dead_items = stat->live_items = 0;
-       stat->fragments = 0;
-
-       stat->page_size = PageGetPageSize(page);
-
-       /* page type (flags) */
-       if (P_ISDELETED(opaque))
-       {
-               stat->type = 'd';
-               stat->btpo.xact = opaque->btpo.xact;
-               return;
-       }
-       else if (P_IGNORE(opaque))
-               stat->type = 'e';
-       else if (P_ISLEAF(opaque))
-               stat->type = 'l';
-       else if (P_ISROOT(opaque))
-               stat->type = 'r';
-       else
-               stat->type = 'i';
-
-       /* btpage opaque data */
-       stat->btpo_prev = opaque->btpo_prev;
-       stat->btpo_next = opaque->btpo_next;
-       stat->btpo.level = opaque->btpo.level;
-       stat->btpo_flags = opaque->btpo_flags;
-       stat->btpo_cycleid = opaque->btpo_cycleid;
-
-       /*----------------------------------------------
-        * If a next leaf is on the previous block,
-        * it means a fragmentation.
-        *----------------------------------------------
-        */
-       if (stat->type == 'l')
-       {
-               if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno)
-                       stat->fragments++;
-       }
-
-       /* count live and dead tuples, and free space */
-       for (off = FirstOffsetNumber; off <= maxoff; off++)
-       {
-               IndexTuple      itup;
-
-               ItemId          id = PageGetItemId(page, off);
-
-               itup = (IndexTuple) PageGetItem(page, id);
-
-               item_size += IndexTupleSize(itup);
-
-               if (!ItemIdDeleted(id))
-                       stat->live_items++;
-               else
-                       stat->dead_items++;
-       }
-       stat->free_size = PageGetFreeSpace(page);
-
-       if ((stat->live_items + stat->dead_items) > 0)
-               stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
-       else
-               stat->avg_item_size = 0;
-}
-
-
 /* ------------------------------------------------------
  * pgstatindex()
  *
@@ -249,12 +111,9 @@ pgstatindex(PG_FUNCTION_ARGS)
                Page            page = BufferGetPage(buffer);
                BTMetaPageData *metad = BTPageGetMeta(page);
 
-               indexStat.magic = metad->btm_magic;
                indexStat.version = metad->btm_version;
                indexStat.root_blkno = metad->btm_root;
                indexStat.level = metad->btm_level;
-               indexStat.fastroot = metad->btm_fastroot;
-               indexStat.fastlevel = metad->btm_fastlevel;
 
                ReleaseBuffer(buffer);
        }
@@ -279,47 +138,49 @@ pgstatindex(PG_FUNCTION_ARGS)
         */
        for (blkno = 1; blkno < nblocks; blkno++)
        {
-               Buffer          buffer = ReadBuffer(rel, blkno);
-               BTPageStat      stat;
-
-               /* scan one page */
-               stat.blkno = blkno;
-               GetBTPageStatistics(blkno, buffer, &stat);
-
-               /*---------------------
-                * page status (type)
-                *---------------------
-                */
-               switch (stat.type)
-               {
-                       case 'd':
-                               indexStat.deleted_pages++;
-                               break;
-                       case 'l':
-                               indexStat.leaf_pages++;
-                               break;
-                       case 'i':
-                               indexStat.internal_pages++;
-                               break;
-                       case 'e':
-                               indexStat.empty_pages++;
-                               break;
-                       case 'r':
-                               indexStat.root_pages++;
-                               break;
-                       default:
-                               elog(ERROR, "unknown page status.");
-               }
+               Buffer          buffer;
+               Page            page;
+               BTPageOpaque opaque;
+
+               /* Read and lock buffer */
+               buffer = ReadBuffer(rel, blkno);
+               LockBuffer(buffer, BUFFER_LOCK_SHARE);
 
-               /* -- leaf fragmentation -- */
-               indexStat.fragments += stat.fragments;
+               page = BufferGetPage(buffer);
+               opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 
-               if (stat.type == 'l')
+               /* Determine page type, and update totals */
+
+               if (P_ISDELETED(opaque))
+                       indexStat.deleted_pages++;
+
+               else if (P_IGNORE(opaque))
+                       indexStat.empty_pages++;
+
+               else if (P_ISLEAF(opaque))
                {
-                       indexStat.max_avail += stat.max_avail;
-                       indexStat.free_space += stat.free_size;
+                       int max_avail;
+                       max_avail = BLCKSZ - (BLCKSZ - ((PageHeader)page)->pd_special + SizeOfPageHeaderData);
+                       indexStat.max_avail += max_avail;
+                       indexStat.free_space += PageGetFreeSpace(page);
+
+                       indexStat.leaf_pages++;
+
+                       /*
+                        * If the next leaf is on an earlier block, it
+                        * means a fragmentation.
+                        */
+                       if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno)
+                               indexStat.fragments++;
                }
+               else if (P_ISROOT(opaque))
+                       indexStat.root_pages++;
+
+               else
+                       indexStat.internal_pages++;
 
+               /* Unlock and release buffer */
+               LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
                ReleaseBuffer(buffer);
        }
 
@@ -373,305 +234,6 @@ pgstatindex(PG_FUNCTION_ARGS)
        PG_RETURN_DATUM(result);
 }
 
-/* -----------------------------------------------
- * bt_page()
- *
- * Usage: SELECT * FROM bt_page('t1_pkey', 0);
- * -----------------------------------------------
- */
-Datum
-bt_page_stats(PG_FUNCTION_ARGS)
-{
-       text       *relname = PG_GETARG_TEXT_P(0);
-       uint32          blkno = PG_GETARG_UINT32(1);
-       Buffer          buffer;
-
-       Relation        rel;
-       RangeVar   *relrv;
-       Datum           result;
-
-       relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
-       rel = relation_openrv(relrv, AccessShareLock);
-
-       CHECK_RELATION_BLOCK_RANGE(rel, blkno);
-
-       buffer = ReadBuffer(rel, blkno);
-
-       if (!IS_INDEX(rel) || !IS_BTREE(rel))
-               elog(ERROR, "bt_page_stats() can be used only on b-tree index.");
-
-       if (blkno == 0)
-               elog(ERROR, "Block 0 is a meta page.");
-
-       {
-               HeapTuple       tuple;
-               TupleDesc       tupleDesc;
-               int                     j;
-               char       *values[BTPAGESTATS_NCOLUMNS];
-
-               BTPageStat      stat;
-
-               GetBTPageStatistics(blkno, buffer, &stat);
-
-               tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE);
-
-               j = 0;
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.blkno);
-
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%c", stat.type);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.live_items);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.dead_items);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.avg_item_size);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.page_size);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.free_size);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.btpo_prev);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.btpo_next);
-
-               values[j] = palloc(32);
-               if (stat.type == 'd')
-                       snprintf(values[j++], 32, "%d", stat.btpo.xact);
-               else
-                       snprintf(values[j++], 32, "%d", stat.btpo.level);
-
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", stat.btpo_flags);
-
-               tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
-                                                                          values);
-
-               result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
-       }
-
-       ReleaseBuffer(buffer);
-
-       relation_close(rel, AccessShareLock);
-
-       PG_RETURN_DATUM(result);
-}
-
-/*-------------------------------------------------------
- * bt_page_items()
- *
- * Get IndexTupleData set in a leaf page
- *
- * Usage: SELECT * FROM bt_page_items('t1_pkey', 0);
- *-------------------------------------------------------
- */
-/* ---------------------------------------------------
- * data structure for SRF to hold a scan information
- * ---------------------------------------------------
- */
-struct user_args
-{
-       TupleDesc       tupd;
-       Relation        rel;
-       Buffer          buffer;
-       Page            page;
-       uint16          offset;
-};
-
-Datum
-bt_page_items(PG_FUNCTION_ARGS)
-{
-       text       *relname = PG_GETARG_TEXT_P(0);
-       uint32          blkno = PG_GETARG_UINT32(1);
-
-       RangeVar   *relrv;
-       Datum           result;
-       char       *values[BTPAGEITEMS_NCOLUMNS];
-       BTPageOpaque opaque;
-       HeapTuple       tuple;
-       ItemId          id;
-
-       FuncCallContext *fctx;
-       MemoryContext mctx;
-       struct user_args *uargs = NULL;
-
-       if (blkno == 0)
-               elog(ERROR, "Block 0 is a meta page.");
-
-       if (SRF_IS_FIRSTCALL())
-       {
-               fctx = SRF_FIRSTCALL_INIT();
-               mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
-
-               uargs = palloc(sizeof(struct user_args));
-
-               uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE);
-               uargs->offset = FirstOffsetNumber;
-
-               relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
-               uargs->rel = relation_openrv(relrv, AccessShareLock);
-
-               CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno);
-
-               uargs->buffer = ReadBuffer(uargs->rel, blkno);
-
-               if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel))
-                       elog(ERROR, "bt_page_items() can be used only on b-tree index.");
-
-               uargs->page = BufferGetPage(uargs->buffer);
-
-               opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
-
-               if (P_ISDELETED(opaque))
-                       elog(NOTICE, "bt_page_items(): this page is deleted.");
-
-               fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
-               fctx->user_fctx = uargs;
-
-               MemoryContextSwitchTo(mctx);
-       }
-
-       fctx = SRF_PERCALL_SETUP();
-       uargs = fctx->user_fctx;
-
-       if (fctx->call_cntr < fctx->max_calls)
-       {
-               IndexTuple      itup;
-
-               id = PageGetItemId(uargs->page, uargs->offset);
-
-               if (!ItemIdIsValid(id))
-                       elog(ERROR, "Invalid ItemId.");
-
-               itup = (IndexTuple) PageGetItem(uargs->page, id);
-
-               {
-                       int                     j = 0;
-
-                       BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid));
-
-                       values[j] = palloc(32);
-                       snprintf(values[j++], 32, "%d", uargs->offset);
-                       values[j] = palloc(32);
-                       snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid);
-                       values[j] = palloc(32);
-                       snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
-                       values[j] = palloc(32);
-                       snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
-                       values[j] = palloc(32);
-                       snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
-
-                       {
-                               int                     off;
-                               char       *dump;
-                               char       *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
-
-                               dump = palloc(IndexTupleSize(itup) * 3);
-                               memset(dump, 0, IndexTupleSize(itup) * 3);
-
-                               for (off = 0;
-                                        off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
-                                        off++)
-                               {
-                                       if (dump[0] == '\0')
-                                               sprintf(dump, "%02x", *(ptr + off) & 0xff);
-                                       else
-                                       {
-                                               char            buf[4];
-
-                                               sprintf(buf, " %02x", *(ptr + off) & 0xff);
-                                               strcat(dump, buf);
-                                       }
-                               }
-                               values[j] = dump;
-                       }
-
-                       tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values);
-                       result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple);
-               }
-
-               uargs->offset = uargs->offset + 1;
-
-               SRF_RETURN_NEXT(fctx, result);
-       }
-       else
-       {
-               ReleaseBuffer(uargs->buffer);
-               relation_close(uargs->rel, AccessShareLock);
-
-               SRF_RETURN_DONE(fctx);
-       }
-}
-
-
-/* ------------------------------------------------
- * bt_metap()
- *
- * Get a btree meta-page information
- *
- * Usage: SELECT * FROM bt_metap('t1_pkey')
- * ------------------------------------------------
- */
-Datum
-bt_metap(PG_FUNCTION_ARGS)
-{
-       text       *relname = PG_GETARG_TEXT_P(0);
-       Buffer          buffer;
-
-       Relation        rel;
-       RangeVar   *relrv;
-       Datum           result;
-
-       relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
-       rel = relation_openrv(relrv, AccessShareLock);
-
-       if (!IS_INDEX(rel) || !IS_BTREE(rel))
-               elog(ERROR, "bt_metap() can be used only on b-tree index.");
-
-       buffer = ReadBuffer(rel, 0);
-
-       {
-               BTMetaPageData *metad;
-
-               TupleDesc       tupleDesc;
-               int                     j;
-               char       *values[BTMETAP_NCOLUMNS];
-               HeapTuple       tuple;
-
-               Page            page = BufferGetPage(buffer);
-
-               metad = BTPageGetMeta(page);
-
-               tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE);
-
-               j = 0;
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_magic);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_version);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_root);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_level);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_fastroot);
-               values[j] = palloc(32);
-               snprintf(values[j++], 32, "%d", metad->btm_fastlevel);
-
-               tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
-                                                                          values);
-
-               result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
-       }
-
-       ReleaseBuffer(buffer);
-
-       relation_close(rel, AccessShareLock);
-
-       PG_RETURN_DATUM(result);
-}
-
 /* --------------------------------------------------------
  * pg_relpages()
  *
index 39220f3..77a5e2d 100644 (file)
@@ -45,62 +45,6 @@ AS 'MODULE_PATHNAME', 'pgstatindex'
 LANGUAGE 'C' STRICT;
 
 --
--- bt_metap()
---
-CREATE TYPE bt_metap_type AS (
-  magic int4,
-  version int4,
-  root int4,
-  level int4,
-  fastroot int4,
-  fastlevel int4
-);
-
-CREATE OR REPLACE FUNCTION bt_metap(text)
-RETURNS bt_metap_type
-AS 'MODULE_PATHNAME', 'bt_metap'
-LANGUAGE 'C' STRICT;
-
---
--- bt_page_stats()
---
-CREATE TYPE bt_page_stats_type AS (
-  blkno int4,
-  type char,
-  live_items int4,
-  dead_items int4,
-  avg_item_size float,
-  page_size int4,
-  free_size int4,
-  btpo_prev int4,
-  btpo_next int4,
-  btpo int4,
-  btpo_flags int4
-);
-
-CREATE OR REPLACE FUNCTION bt_page_stats(text, int4)
-RETURNS bt_page_stats_type
-AS 'MODULE_PATHNAME', 'bt_page_stats'
-LANGUAGE 'C' STRICT;
-
---
--- bt_page_items()
---
-CREATE TYPE bt_page_items_type AS (
-  itemoffset int4,
-  ctid tid,
-  itemlen int4,
-  nulls bool,
-  vars bool,
-  data text
-);
-
-CREATE OR REPLACE FUNCTION bt_page_items(text, int4)
-RETURNS SETOF bt_page_items_type
-AS 'MODULE_PATHNAME', 'bt_page_items'
-LANGUAGE 'C' STRICT;
-
---
 -- pg_relpages()
 --
 CREATE OR REPLACE FUNCTION pg_relpages(text)
index 5b857bb..16f3d9a 100644 (file)
@@ -8,13 +8,4 @@ DROP TYPE pgstattuple_type;
 DROP FUNCTION pgstatindex(text);
 DROP TYPE pgstatindex_type;
 
-DROP FUNCTION bt_metap(text);
-DROP TYPE bt_metap_type;
-
-DROP FUNCTION bt_page_stats(text, int4);
-DROP TYPE bt_page_stats_type;
-
-DROP FUNCTION bt_page_items(text, int4);
-DROP TYPE bt_page_items_type;
-
 DROP FUNCTION pg_relpages(text);