src/include/access/nbtree.h

   1 /*-------------------------------------------------------------------------
   2  *
   3  * nbtree.h
   4  *        header file for postgres btree access method implementation.
   5  *
   6  *
   7  * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  * $PostgreSQL: pgsql/src/include/access/nbtree.h,v 1.105 2006/10/04 00:30:07 momjian Exp $
  11  *
  12  *-------------------------------------------------------------------------
  13  */
  14 #ifndef NBTREE_H
  15 #define NBTREE_H
  16
  17 #include "access/itup.h"
  18 #include "access/relscan.h"
  19 #include "access/sdir.h"
  20 #include "access/xlogutils.h"
  21
  22
  23 /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
  24 typedef uint16 BTCycleId;
  25
  26 /*
  27  *      BTPageOpaqueData -- At the end of every page, we store a pointer
  28  *      to both siblings in the tree.  This is used to do forward/backward
  29  *      index scans.  The next-page link is also critical for recovery when
  30  *      a search has navigated to the wrong page due to concurrent page splits
  31  *      or deletions; see src/backend/access/nbtree/README for more info.
  32  *
  33  *      In addition, we store the page's btree level (counting upwards from
  34  *      zero at a leaf page) as well as some flag bits indicating the page type
  35  *      and status.  If the page is deleted, we replace the level with the
  36  *      next-transaction-ID value indicating when it is safe to reclaim the page.
  37  *
  38  *      We also store a "vacuum cycle ID".      When a page is split while VACUUM is
  39  *      processing the index, a nonzero value associated with the VACUUM run is
  40  *      stored into both halves of the split page.      (If VACUUM is not running,
  41  *      both pages receive zero cycleids.)      This allows VACUUM to detect whether
  42  *      a page was split since it started, with a small probability of false match
  43  *      if the page was last split some exact multiple of 65536 VACUUMs ago.
  44  *      Also, during a split, the BTP_SPLIT_END flag is cleared in the left
  45  *      (original) page, and set in the right page, but only if the next page
  46  *      to its right has a different cycleid.
  47  *
  48  *      NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
  49  *      instead.
  50  */
  51
  52 typedef struct BTPageOpaqueData
  53 {
  54         BlockNumber btpo_prev;          /* left sibling, or P_NONE if leftmost */
  55         BlockNumber btpo_next;          /* right sibling, or P_NONE if rightmost */
  56         union
  57         {
  58                 uint32          level;          /* tree level --- zero for leaf pages */
  59                 TransactionId xact;             /* next transaction ID, if deleted */
  60         }                       btpo;
  61         uint16          btpo_flags;             /* flag bits, see below */
  62         BTCycleId       btpo_cycleid;   /* vacuum cycle ID of latest split */
  63 } BTPageOpaqueData;
  64
  65 typedef BTPageOpaqueData *BTPageOpaque;
  66
  67 /* Bits defined in btpo_flags */
  68 #define BTP_LEAF                (1 << 0)        /* leaf page, i.e. not internal page */
  69 #define BTP_ROOT                (1 << 1)        /* root page (has no parent) */
  70 #define BTP_DELETED             (1 << 2)        /* page has been deleted from tree */
  71 #define BTP_META                (1 << 3)        /* meta-page */
  72 #define BTP_HALF_DEAD   (1 << 4)        /* empty, but still in tree */
  73 #define BTP_SPLIT_END   (1 << 5)        /* rightmost page of split group */
  74 #define BTP_HAS_GARBAGE (1 << 6)        /* page has LP_DELETEd tuples */
  75
  76
  77 /*
  78  * The Meta page is always the first page in the btree index.
  79  * Its primary purpose is to point to the location of the btree root page.
  80  * We also point to the "fast" root, which is the current effective root;
  81  * see README for discussion.
  82  */
  83
  84 typedef struct BTMetaPageData
  85 {
  86         uint32          btm_magic;              /* should contain BTREE_MAGIC */
  87         uint32          btm_version;    /* should contain BTREE_VERSION */
  88         BlockNumber btm_root;           /* current root location */
  89         uint32          btm_level;              /* tree level of the root page */
  90         BlockNumber btm_fastroot;       /* current "fast" root location */
  91         uint32          btm_fastlevel;  /* tree level of the "fast" root page */
  92 } BTMetaPageData;
  93
  94 #define BTPageGetMeta(p) \
  95         ((BTMetaPageData *) PageGetContents(p))
  96
  97 #define BTREE_METAPAGE  0               /* first page is meta */
  98 #define BTREE_MAGIC             0x053162        /* magic number of btree pages */
  99 #define BTREE_VERSION   2               /* current version number */
 100
 101 /*
 102  * We actually need to be able to fit three items on every page,
 103  * so restrict any one item to 1/3 the per-page available space.
 104  */
 105 #define BTMaxItemSize(page) \
 106         ((PageGetPageSize(page) - \
 107           sizeof(PageHeaderData) - \
 108           MAXALIGN(sizeof(BTPageOpaqueData))) / 3 - sizeof(ItemIdData))
 109
 110 /*
 111  * The leaf-page fillfactor defaults to 90% but is user-adjustable.
 112  * For pages above the leaf level, we use a fixed 70% fillfactor.
 113  * The fillfactor is applied during index build and when splitting
 114  * a rightmost page; when splitting non-rightmost pages we try to
 115  * divide the data equally.
 116  */
 117 #define BTREE_MIN_FILLFACTOR            10
 118 #define BTREE_DEFAULT_FILLFACTOR        90
 119 #define BTREE_NONLEAF_FILLFACTOR        70
 120
 121 /*
 122  *      Test whether two btree entries are "the same".
 123  *
 124  *      Old comments:
 125  *      In addition, we must guarantee that all tuples in the index are unique,
 126  *      in order to satisfy some assumptions in Lehman and Yao.  The way that we
 127  *      do this is by generating a new OID for every insertion that we do in the
 128  *      tree.  This adds eight bytes to the size of btree index tuples.  Note
 129  *      that we do not use the OID as part of a composite key; the OID only
 130  *      serves as a unique identifier for a given index tuple (logical position
 131  *      within a page).
 132  *
 133  *      New comments:
 134  *      actually, we must guarantee that all tuples in A LEVEL
 135  *      are unique, not in ALL INDEX. So, we can use the t_tid
 136  *      as unique identifier for a given index tuple (logical position
 137  *      within a level). - vadim 04/09/97
 138  */
 139 #define BTTidSame(i1, i2)       \
 140         ( (i1).ip_blkid.bi_hi == (i2).ip_blkid.bi_hi && \
 141           (i1).ip_blkid.bi_lo == (i2).ip_blkid.bi_lo && \
 142           (i1).ip_posid == (i2).ip_posid )
 143 #define BTEntrySame(i1, i2) \
 144         BTTidSame((i1)->t_tid, (i2)->t_tid)
 145
 146
 147 /*
 148  *      In general, the btree code tries to localize its knowledge about
 149  *      page layout to a couple of routines.  However, we need a special
 150  *      value to indicate "no page number" in those places where we expect
 151  *      page numbers.  We can use zero for this because we never need to
 152  *      make a pointer to the metadata page.
 153  */
 154
 155 #define P_NONE                  0
 156
 157 /*
 158  * Macros to test whether a page is leftmost or rightmost on its tree level,
 159  * as well as other state info kept in the opaque data.
 160  */
 161 #define P_LEFTMOST(opaque)              ((opaque)->btpo_prev == P_NONE)
 162 #define P_RIGHTMOST(opaque)             ((opaque)->btpo_next == P_NONE)
 163 #define P_ISLEAF(opaque)                ((opaque)->btpo_flags & BTP_LEAF)
 164 #define P_ISROOT(opaque)                ((opaque)->btpo_flags & BTP_ROOT)
 165 #define P_ISDELETED(opaque)             ((opaque)->btpo_flags & BTP_DELETED)
 166 #define P_IGNORE(opaque)                ((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD))
 167 #define P_HAS_GARBAGE(opaque)   ((opaque)->btpo_flags & BTP_HAS_GARBAGE)
 168
 169 /*
 170  *      Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
 171  *      page.  The high key is not a data key, but gives info about what range of
 172  *      keys is supposed to be on this page.  The high key on a page is required
 173  *      to be greater than or equal to any data key that appears on the page.
 174  *      If we find ourselves trying to insert a key > high key, we know we need
 175  *      to move right (this should only happen if the page was split since we
 176  *      examined the parent page).
 177  *
 178  *      Our insertion algorithm guarantees that we can use the initial least key
 179  *      on our right sibling as the high key.  Once a page is created, its high
 180  *      key changes only if the page is split.
 181  *
 182  *      On a non-rightmost page, the high key lives in item 1 and data items
 183  *      start in item 2.  Rightmost pages have no high key, so we store data
 184  *      items beginning in item 1.
 185  */
 186
 187 #define P_HIKEY                         ((OffsetNumber) 1)
 188 #define P_FIRSTKEY                      ((OffsetNumber) 2)
 189 #define P_FIRSTDATAKEY(opaque)  (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
 190
 191 /*
 192  * XLOG records for btree operations
 193  *
 194  * XLOG allows to store some information in high 4 bits of log
 195  * record xl_info field
 196  */
 197 #define XLOG_BTREE_INSERT_LEAF  0x00    /* add index tuple without split */
 198 #define XLOG_BTREE_INSERT_UPPER 0x10    /* same, on a non-leaf page */
 199 #define XLOG_BTREE_INSERT_META  0x20    /* same, plus update metapage */
 200 #define XLOG_BTREE_SPLIT_L              0x30    /* add index tuple with split */
 201 #define XLOG_BTREE_SPLIT_R              0x40    /* as above, new item on right */
 202 #define XLOG_BTREE_SPLIT_L_ROOT 0x50    /* add tuple with split of root */
 203 #define XLOG_BTREE_SPLIT_R_ROOT 0x60    /* as above, new item on right */
 204 #define XLOG_BTREE_DELETE               0x70    /* delete leaf index tuple */
 205 #define XLOG_BTREE_DELETE_PAGE  0x80    /* delete an entire page */
 206 #define XLOG_BTREE_DELETE_PAGE_META 0x90                /* same, plus update metapage */
 207 #define XLOG_BTREE_NEWROOT              0xA0    /* new root page */
 208
 209 /*
 210  * All that we need to find changed index tuple
 211  */
 212 typedef struct xl_btreetid
 213 {
 214         RelFileNode node;
 215         ItemPointerData tid;            /* changed tuple id */
 216 } xl_btreetid;
 217
 218 /*
 219  * All that we need to regenerate the meta-data page
 220  */
 221 typedef struct xl_btree_metadata
 222 {
 223         BlockNumber root;
 224         uint32          level;
 225         BlockNumber fastroot;
 226         uint32          fastlevel;
 227 } xl_btree_metadata;
 228
 229 /*
 230  * This is what we need to know about simple (without split) insert.
 231  *
 232  * This data record is used for INSERT_LEAF, INSERT_UPPER, INSERT_META.
 233  * Note that INSERT_META implies it's not a leaf page.
 234  */
 235 typedef struct xl_btree_insert
 236 {
 237         xl_btreetid target;                     /* inserted tuple id */
 238         /* BlockNumber downlink field FOLLOWS IF NOT XLOG_BTREE_INSERT_LEAF */
 239         /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_INSERT_META */
 240         /* INDEX TUPLE FOLLOWS AT END OF STRUCT */
 241 } xl_btree_insert;
 242
 243 #define SizeOfBtreeInsert       (offsetof(xl_btreetid, tid) + SizeOfIptrData)
 244
 245 /*
 246  * On insert with split we save items of both left and right siblings
 247  * and restore content of both pages from log record.  This way takes less
 248  * xlog space than the normal approach, because if we did it standardly,
 249  * XLogInsert would almost always think the right page is new and store its
 250  * whole page image.
 251  *
 252  * Note: the four XLOG_BTREE_SPLIT xl_info codes all use this data record.
 253  * The _L and _R variants indicate whether the inserted tuple went into the
 254  * left or right split page (and thus, whether otherblk is the right or left
 255  * page of the split pair).  The _ROOT variants indicate that we are splitting
 256  * the root page, and thus that a newroot record rather than an insert or
 257  * split record should follow.  Note that a split record never carries a
 258  * metapage update --- we'll do that in the parent-level update.
 259  */
 260 typedef struct xl_btree_split
 261 {
 262         xl_btreetid target;                     /* inserted tuple id */
 263         BlockNumber otherblk;           /* second block participated in split: */
 264         /* first one is stored in target' tid */
 265         BlockNumber leftblk;            /* prev/left block */
 266         BlockNumber rightblk;           /* next/right block */
 267         uint32          level;                  /* tree level of page being split */
 268         uint16          leftlen;                /* len of left page items below */
 269         /* LEFT AND RIGHT PAGES TUPLES FOLLOW AT THE END */
 270 } xl_btree_split;
 271
 272 #define SizeOfBtreeSplit        (offsetof(xl_btree_split, leftlen) + sizeof(uint16))
 273
 274 /*
 275  * This is what we need to know about delete of individual leaf index tuples.
 276  * The WAL record can represent deletion of any number of index tuples on a
 277  * single index page.
 278  */
 279 typedef struct xl_btree_delete
 280 {
 281         RelFileNode node;
 282         BlockNumber block;
 283         /* TARGET OFFSET NUMBERS FOLLOW AT THE END */
 284 } xl_btree_delete;
 285
 286 #define SizeOfBtreeDelete       (offsetof(xl_btree_delete, block) + sizeof(BlockNumber))
 287
 288 /*
 289  * This is what we need to know about deletion of a btree page.  The target
 290  * identifies the tuple removed from the parent page (note that we remove
 291  * this tuple's downlink and the *following* tuple's key).      Note we do not
 292  * store any content for the deleted page --- it is just rewritten as empty
 293  * during recovery.
 294  */
 295 typedef struct xl_btree_delete_page
 296 {
 297         xl_btreetid target;                     /* deleted tuple id in parent page */
 298         BlockNumber deadblk;            /* child block being deleted */
 299         BlockNumber leftblk;            /* child block's left sibling, if any */
 300         BlockNumber rightblk;           /* child block's right sibling */
 301         /* xl_btree_metadata FOLLOWS IF XLOG_BTREE_DELETE_PAGE_META */
 302 } xl_btree_delete_page;
 303
 304 #define SizeOfBtreeDeletePage   (offsetof(xl_btree_delete_page, rightblk) + sizeof(BlockNumber))
 305
 306 /*
 307  * New root log record.  There are zero tuples if this is to establish an
 308  * empty root, or two if it is the result of splitting an old root.
 309  *
 310  * Note that although this implies rewriting the metadata page, we don't need
 311  * an xl_btree_metadata record --- the rootblk and level are sufficient.
 312  */
 313 typedef struct xl_btree_newroot
 314 {
 315         RelFileNode node;
 316         BlockNumber rootblk;            /* location of new root */
 317         uint32          level;                  /* its tree level */
 318         /* 0 or 2 INDEX TUPLES FOLLOW AT END OF STRUCT */
 319 } xl_btree_newroot;
 320
 321 #define SizeOfBtreeNewroot      (offsetof(xl_btree_newroot, level) + sizeof(uint32))
 322
 323
 324 /*
 325  *      Operator strategy numbers for B-tree have been moved to access/skey.h,
 326  *      because many places need to use them in ScanKeyInit() calls.
 327  */
 328
 329 /*
 330  *      When a new operator class is declared, we require that the user
 331  *      supply us with an amproc procedure for determining whether, for
 332  *      two keys a and b, a < b, a = b, or a > b.  This routine must
 333  *      return < 0, 0, > 0, respectively, in these three cases.  Since we
 334  *      only have one such proc in amproc, it's number 1.
 335  */
 336
 337 #define BTORDER_PROC    1
 338
 339 /*
 340  *      We need to be able to tell the difference between read and write
 341  *      requests for pages, in order to do locking correctly.
 342  */
 343
 344 #define BT_READ                 BUFFER_LOCK_SHARE
 345 #define BT_WRITE                BUFFER_LOCK_EXCLUSIVE
 346
 347 /*
 348  *      BTStackData -- As we descend a tree, we push the (location, downlink)
 349  *      pairs from internal pages onto a private stack.  If we split a
 350  *      leaf, we use this stack to walk back up the tree and insert data
 351  *      into parent pages (and possibly to split them, too).  Lehman and
 352  *      Yao's update algorithm guarantees that under no circumstances can
 353  *      our private stack give us an irredeemably bad picture up the tree.
 354  *      Again, see the paper for details.
 355  */
 356
 357 typedef struct BTStackData
 358 {
 359         BlockNumber bts_blkno;
 360         OffsetNumber bts_offset;
 361         IndexTupleData bts_btentry;
 362         struct BTStackData *bts_parent;
 363 } BTStackData;
 364
 365 typedef BTStackData *BTStack;
 366
 367 /*
 368  * BTScanOpaqueData is the btree-private state needed for an indexscan.
 369  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
 370  * details of the preprocessing), information about the current location
 371  * of the scan, and information about the marked location, if any.      (We use
 372  * BTScanPosData to represent the data needed for each of current and marked
 373  * locations.)  In addition we can remember some known-killed index entries
 374  * that must be marked before we can move off the current page.
 375  *
 376  * Index scans work a page at a time: we pin and read-lock the page, identify
 377  * all the matching items on the page and save them in BTScanPosData, then
 378  * release the read-lock while returning the items to the caller for
 379  * processing.  This approach minimizes lock/unlock traffic.  Note that we
 380  * keep the pin on the index page until the caller is done with all the items
 381  * (this is needed for VACUUM synchronization, see nbtree/README).      When we
 382  * are ready to step to the next page, if the caller has told us any of the
 383  * items were killed, we re-lock the page to mark them killed, then unlock.
 384  * Finally we drop the pin and step to the next page in the appropriate
 385  * direction.
 386  *
 387  * NOTE: in this implementation, btree does not use or set the
 388  * currentItemData and currentMarkData fields of IndexScanDesc at all.
 389  */
 390
 391 typedef struct BTScanPosItem    /* what we remember about each match */
 392 {
 393         ItemPointerData heapTid;        /* TID of referenced heap item */
 394         OffsetNumber indexOffset;       /* index item's location within page */
 395 } BTScanPosItem;
 396
 397 typedef struct BTScanPosData
 398 {
 399         Buffer          buf;                    /* if valid, the buffer is pinned */
 400
 401         BlockNumber nextPage;           /* page's right link when we scanned it */
 402
 403         /*
 404          * moreLeft and moreRight track whether we think there may be matching
 405          * index entries to the left and right of the current page, respectively.
 406          * We can clear the appropriate one of these flags when _bt_checkkeys()
 407          * returns continuescan = false.
 408          */
 409         bool            moreLeft;
 410         bool            moreRight;
 411
 412         /*
 413          * The items array is always ordered in index order (ie, increasing
 414          * indexoffset).  When scanning backwards it is convenient to fill the
 415          * array back-to-front, so we start at the last slot and fill downwards.
 416          * Hence we need both a first-valid-entry and a last-valid-entry counter.
 417          * itemIndex is a cursor showing which entry was last returned to caller.
 418          */
 419         int                     firstItem;              /* first valid index in items[] */
 420         int                     lastItem;               /* last valid index in items[] */
 421         int                     itemIndex;              /* current index in items[] */
 422
 423         BTScanPosItem items[MaxIndexTuplesPerPage]; /* MUST BE LAST */
 424 } BTScanPosData;
 425
 426 typedef BTScanPosData *BTScanPos;
 427
 428 #define BTScanPosIsValid(scanpos) BufferIsValid((scanpos).buf)
 429
 430 typedef struct BTScanOpaqueData
 431 {
 432         /* these fields are set by _bt_preprocess_keys(): */
 433         bool            qual_ok;                /* false if qual can never be satisfied */
 434         int                     numberOfKeys;   /* number of preprocessed scan keys */
 435         ScanKey         keyData;                /* array of preprocessed scan keys */
 436
 437         /* info about killed items if any (killedItems is NULL if never used) */
 438         int                *killedItems;        /* currPos.items indexes of killed items */
 439         int                     numKilled;              /* number of currently stored items */
 440
 441         /*
 442          * If the marked position is on the same page as current position, we
 443          * don't use markPos, but just keep the marked itemIndex in markItemIndex
 444          * (all the rest of currPos is valid for the mark position). Hence, to
 445          * determine if there is a mark, first look at markItemIndex, then at
 446          * markPos.
 447          */
 448         int                     markItemIndex;  /* itemIndex, or -1 if not valid */
 449
 450         /* keep these last in struct for efficiency */
 451         BTScanPosData currPos;          /* current position data */
 452         BTScanPosData markPos;          /* marked position, if any */
 453 } BTScanOpaqueData;
 454
 455 typedef BTScanOpaqueData *BTScanOpaque;
 456
 457 /*
 458  * We use these private sk_flags bits in preprocessed scan keys
 459  */
 460 #define SK_BT_REQFWD    0x00010000              /* required to continue forward scan */
 461 #define SK_BT_REQBKWD   0x00020000              /* required to continue backward scan */
 462
 463
 464 /*
 465  * prototypes for functions in nbtree.c (external entry points for btree)
 466  */
 467 extern Datum btbuild(PG_FUNCTION_ARGS);
 468 extern Datum btinsert(PG_FUNCTION_ARGS);
 469 extern Datum btbeginscan(PG_FUNCTION_ARGS);
 470 extern Datum btgettuple(PG_FUNCTION_ARGS);
 471 extern Datum btgetmulti(PG_FUNCTION_ARGS);
 472 extern Datum btrescan(PG_FUNCTION_ARGS);
 473 extern Datum btendscan(PG_FUNCTION_ARGS);
 474 extern Datum btmarkpos(PG_FUNCTION_ARGS);
 475 extern Datum btrestrpos(PG_FUNCTION_ARGS);
 476 extern Datum btbulkdelete(PG_FUNCTION_ARGS);
 477 extern Datum btvacuumcleanup(PG_FUNCTION_ARGS);
 478 extern Datum btoptions(PG_FUNCTION_ARGS);
 479
 480 /*
 481  * prototypes for functions in nbtinsert.c
 482  */
 483 extern void _bt_doinsert(Relation rel, IndexTuple itup,
 484                          bool index_is_unique, Relation heapRel);
 485 extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
 486 extern void _bt_insert_parent(Relation rel, Buffer buf, Buffer rbuf,
 487                                   BTStack stack, bool is_root, bool is_only);
 488
 489 /*
 490  * prototypes for functions in nbtpage.c
 491  */
 492 extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
 493 extern Buffer _bt_getroot(Relation rel, int access);
 494 extern Buffer _bt_gettrueroot(Relation rel);
 495 extern void _bt_checkpage(Relation rel, Buffer buf);
 496 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
 497 extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
 498                                  BlockNumber blkno, int access);
 499 extern void _bt_relbuf(Relation rel, Buffer buf);
 500 extern void _bt_pageinit(Page page, Size size);
 501 extern bool _bt_page_recyclable(Page page);
 502 extern void _bt_delitems(Relation rel, Buffer buf,
 503                          OffsetNumber *itemnos, int nitems);
 504 extern int      _bt_pagedel(Relation rel, Buffer buf, bool vacuum_full);
 505
 506 /*
 507  * prototypes for functions in nbtsearch.c
 508  */
 509 extern BTStack _bt_search(Relation rel,
 510                    int keysz, ScanKey scankey, bool nextkey,
 511                    Buffer *bufP, int access);
 512 extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
 513                           ScanKey scankey, bool nextkey, int access);
 514 extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
 515                         ScanKey scankey, bool nextkey);
 516 extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey,
 517                         Page page, OffsetNumber offnum);
 518 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
 519 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
 520 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost);
 521
 522 /*
 523  * prototypes for functions in nbtutils.c
 524  */
 525 extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
 526 extern ScanKey _bt_mkscankey_nodata(Relation rel);
 527 extern void _bt_freeskey(ScanKey skey);
 528 extern void _bt_freestack(BTStack stack);
 529 extern void _bt_preprocess_keys(IndexScanDesc scan);
 530 extern bool _bt_checkkeys(IndexScanDesc scan,
 531                           Page page, OffsetNumber offnum,
 532                           ScanDirection dir, bool *continuescan);
 533 extern void _bt_killitems(IndexScanDesc scan, bool haveLock);
 534 extern BTCycleId _bt_vacuum_cycleid(Relation rel);
 535 extern BTCycleId _bt_start_vacuum(Relation rel);
 536 extern void _bt_end_vacuum(Relation rel);
 537 extern Size BTreeShmemSize(void);
 538 extern void BTreeShmemInit(void);
 539
 540 /*
 541  * prototypes for functions in nbtsort.c
 542  */
 543 typedef struct BTSpool BTSpool; /* opaque type known only within nbtsort.c */
 544
 545 extern BTSpool *_bt_spoolinit(Relation index, bool isunique, bool isdead);
 546 extern void _bt_spooldestroy(BTSpool *btspool);
 547 extern void _bt_spool(IndexTuple itup, BTSpool *btspool);
 548 extern void _bt_leafbuild(BTSpool *btspool, BTSpool *spool2);
 549
 550 /*
 551  * prototypes for functions in nbtxlog.c
 552  */
 553 extern void btree_redo(XLogRecPtr lsn, XLogRecord *record);
 554 extern void btree_desc(StringInfo buf, uint8 xl_info, char *rec);
 555 extern void btree_xlog_startup(void);
 556 extern void btree_xlog_cleanup(void);
 557 extern bool btree_safe_restartpoint(void);
 558
 559 #endif   /* NBTREE_H */