Clean up smgr.c/md.c APIs as per discussion a couple months ago. Instead of

author Tom Lane <tgl@sss.pgh.pa.us>

Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)
diff --git a/src/backend/access/hash/hashpage.c b/src/backend/access/hash/hashpage.c

index 0f64383..b9569e5 100644 (file)
--- a/src/backend/access/hash/hashpage.c
+++ b/src/backend/access/hash/hashpage.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.61 2006/11/19 21:33:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.62 2007/01/03 18:11:01 tgl Exp $
   *
   * NOTES
   *       Postgres hash pages look like ordinary relation pages.  The opaque
@@ -533,10 +533,8 @@ fail:
   *
   * This does not need to initialize the new bucket pages; we'll do that as
   * each one is used by _hash_expandtable().  But we have to extend the logical
- * EOF to the end of the splitpoint; otherwise the first overflow page
- * allocated beyond the splitpoint will represent a noncontiguous access,
- * which can confuse md.c (and will probably be forbidden by future changes
- * to md.c).
+ * EOF to the end of the splitpoint; this keeps smgr's idea of the EOF in
+ * sync with ours, so that overflow-page allocation works correctly.
   *
   * We do this by writing a page of zeroes at the end of the splitpoint range.
   * We expect that the filesystem will ensure that the intervening pages read
@@ -559,7 +557,6 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
  {
         BlockNumber     firstblock;
         BlockNumber     lastblock;
-       BlockNumber     endblock;
         char            zerobuf[BLCKSZ];
  
         /*
@@ -577,24 +574,9 @@ _hash_alloc_buckets(Relation rel, uint32 nblocks)
         if (lastblock < firstblock || lastblock == InvalidBlockNumber)
                 return InvalidBlockNumber;
  
-       /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
-
         MemSet(zerobuf, 0, sizeof(zerobuf));
  
-       /*
-        * XXX If the extension results in creation of new segment files,
-        * we have to make sure that each non-last file is correctly filled out to
-        * RELSEG_SIZE blocks.  This ought to be done inside mdextend, but
-        * changing the smgr API seems best left for development cycle not late
-        * beta.  Temporary fix for bug #2737.
-        */
-#ifndef LET_OS_MANAGE_FILESIZE
-       for (endblock = firstblock | (RELSEG_SIZE - 1);
-                endblock < lastblock;
-                endblock += RELSEG_SIZE)
-               smgrextend(rel->rd_smgr, endblock, zerobuf, rel->rd_istemp);
-#endif
-
+       /* Note: we assume RelationGetNumberOfBlocks did RelationOpenSmgr for us */
         smgrextend(rel->rd_smgr, lastblock, zerobuf, rel->rd_istemp);
  
         return firstblock;
diff --git a/src/backend/access/nbtree/nbtsort.c b/src/backend/access/nbtree/nbtsort.c

index 4951dca..4f886e8 100644 (file)
--- a/src/backend/access/nbtree/nbtsort.c
+++ b/src/backend/access/nbtree/nbtsort.c
@@ -36,9 +36,9 @@
   * that is of no value (since other backends have no interest in them yet)
   * and it created locking problems for CHECKPOINT, because the upper-level
   * pages were held exclusive-locked for long periods.  Now we just build
- * the pages in local memory and smgrwrite() them as we finish them.  They
- * will need to be re-read into shared buffers on first use after the build
- * finishes.
+ * the pages in local memory and smgrwrite or smgrextend them as we finish
+ * them.  They will need to be re-read into shared buffers on first use after
+ * the build finishes.
   *
   * Since the index will never be used unless it is completely built,
   * from a crash-recovery point of view there is no need to WAL-log the
@@ -57,7 +57,7 @@
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.107 2006/10/04 00:29:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/nbtree/nbtsort.c,v 1.108 2007/01/03 18:11:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -309,9 +309,9 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
         {
                 if (!wstate->btws_zeropage)
                         wstate->btws_zeropage = (Page) palloc0(BLCKSZ);
-               smgrwrite(wstate->index->rd_smgr, wstate->btws_pages_written++,
-                                 (char *) wstate->btws_zeropage,
-                                 true);
+               smgrextend(wstate->index->rd_smgr, wstate->btws_pages_written++,
+                                  (char *) wstate->btws_zeropage,
+                                  true);
         }
  
         /*
@@ -319,10 +319,17 @@ _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
          * index, because there's no need for smgr to schedule an fsync for this
          * write; we'll do it ourselves before ending the build.
          */
-       smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
-
         if (blkno == wstate->btws_pages_written)
+       {
+               /* extending the file... */
+               smgrextend(wstate->index->rd_smgr, blkno, (char *) page, true);
                 wstate->btws_pages_written++;
+       }
+       else
+       {
+               /* overwriting a block we zero-filled before */
+               smgrwrite(wstate->index->rd_smgr, blkno, (char *) page, true);
+       }
  
         pfree(page);
  }
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c

index c30aa69..5de8e96 100644 (file)
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.208 2006/12/30 21:21:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/commands/tablecmds.c,v 1.209 2007/01/03 18:11:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -6083,7 +6083,7 @@ copy_relation_data(Relation rel, SMgrRelation dst)
                  * rel, because there's no need for smgr to schedule an fsync for this
                  * write; we'll do it ourselves below.
                  */
-               smgrwrite(dst, blkno, buf, true);
+               smgrextend(dst, blkno, buf, true);
         }
  
         /*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c

index f58ab03..e0899a5 100644 (file)
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.123 2006/11/20 01:07:56 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.124 2007/01/03 18:11:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -22,6 +22,7 @@
  #include "miscadmin.h"
  #include "postmaster/bgwriter.h"
  #include "storage/fd.h"
+#include "storage/bufmgr.h"
  #include "storage/smgr.h"
  #include "utils/hsearch.h"
  #include "utils/memutils.h"
@@ -108,9 +109,16 @@ typedef struct
  static HTAB *pendingOpsTable = NULL;
  
  
+typedef enum                                   /* behavior for mdopen & _mdfd_getseg */
+{
+       EXTENSION_FAIL,                         /* ereport if segment not present */
+       EXTENSION_RETURN_NULL,          /* return NULL if not present */
+       EXTENSION_CREATE                        /* create new segments as needed */
+} ExtensionBehavior;
+
  /* local routines */
-static MdfdVec *mdopen(SMgrRelation reln, bool allowNotFound);
-static bool register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
+static MdfdVec *mdopen(SMgrRelation reln, ExtensionBehavior behavior);
+static void register_dirty_segment(SMgrRelation reln, MdfdVec *seg);
  static MdfdVec *_fdvec_alloc(void);
  
  #ifndef LET_OS_MANAGE_FILESIZE
@@ -118,14 +126,14 @@ static MdfdVec *_mdfd_openseg(SMgrRelation reln, BlockNumber segno,
                           int oflags);
  #endif
  static MdfdVec *_mdfd_getseg(SMgrRelation reln, BlockNumber blkno,
-                        bool allowNotFound);
-static BlockNumber _mdnblocks(File file, Size blcksz);
+                                                        bool isTemp, ExtensionBehavior behavior);
+static BlockNumber _mdnblocks(SMgrRelation reln, MdfdVec *seg);
  
  
  /*
   *     mdinit() -- Initialize private state for magnetic disk storage manager.
   */
-bool
+void
  mdinit(void)
  {
         MdCxt = AllocSetContextCreate(TopMemoryContext,
@@ -154,8 +162,6 @@ mdinit(void)
                                                                           &hash_ctl,
                                                                    HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
         }
-
-       return true;
  }
  
  /*
@@ -163,14 +169,14 @@ mdinit(void)
   *
   * If isRedo is true, it's okay for the relation to exist already.
   */
-bool
+void
  mdcreate(SMgrRelation reln, bool isRedo)
  {
         char       *path;
         File            fd;
  
         if (isRedo && reln->md_fd != NULL)
-               return true;                    /* created and opened already... */
+               return;                                 /* created and opened already... */
  
         Assert(reln->md_fd == NULL);
  
@@ -193,11 +199,15 @@ mdcreate(SMgrRelation reln, bool isRedo)
                 if (fd < 0)
                 {
                         pfree(path);
-                       /* be sure to return the error reported by create, not open */
+                       /* be sure to report the error reported by create, not open */
                         errno = save_errno;
-                       return false;
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not create relation %u/%u/%u: %m",
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode)));
                 }
-               errno = 0;
         }
  
         pfree(path);
@@ -209,8 +219,6 @@ mdcreate(SMgrRelation reln, bool isRedo)
  #ifndef LET_OS_MANAGE_FILESIZE
         reln->md_fd->mdfd_chain = NULL;
  #endif
-
-       return true;
  }
  
  /*
@@ -220,12 +228,12 @@ mdcreate(SMgrRelation reln, bool isRedo)
   * there won't be an SMgrRelation hashtable entry anymore.
   *
   * If isRedo is true, it's okay for the relation to be already gone.
+ * Also, any failure should be reported as WARNING not ERROR, because
+ * we are usually not in a transaction anymore when this is called.
   */
-bool
+void
  mdunlink(RelFileNode rnode, bool isRedo)
  {
-       bool            status = true;
-       int                     save_errno = 0;
         char       *path;
  
         path = relpath(rnode);
@@ -234,15 +242,17 @@ mdunlink(RelFileNode rnode, bool isRedo)
         if (unlink(path) < 0)
         {
                 if (!isRedo || errno != ENOENT)
-               {
-                       status = false;
-                       save_errno = errno;
-               }
+                       ereport(WARNING,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not remove relation %u/%u/%u: %m",
+                                                       rnode.spcNode,
+                                                       rnode.dbNode,
+                                                       rnode.relNode)));
         }
  
  #ifndef LET_OS_MANAGE_FILESIZE
         /* Delete the additional segments, if any */
-       if (status)
+       else
         {
                 char       *segpath = (char *) palloc(strlen(path) + 12);
                 BlockNumber segno;
@@ -258,10 +268,13 @@ mdunlink(RelFileNode rnode, bool isRedo)
                         {
                                 /* ENOENT is expected after the last segment... */
                                 if (errno != ENOENT)
-                               {
-                                       status = false;
-                                       save_errno = errno;
-                               }
+                                       ereport(WARNING,
+                                                       (errcode_for_file_access(),
+                                                        errmsg("could not remove segment %u of relation %u/%u/%u: %m",
+                                                                       segno,
+                                                                       rnode.spcNode,
+                                                                       rnode.dbNode,
+                                                                       rnode.relNode)));
                                 break;
                         }
                 }
@@ -270,29 +283,44 @@ mdunlink(RelFileNode rnode, bool isRedo)
  #endif
  
         pfree(path);
-
-       errno = save_errno;
-       return status;
  }
  
  /*
   *     mdextend() -- Add a block to the specified relation.
   *
- *             The semantics are basically the same as mdwrite(): write at the
- *             specified position.  However, we are expecting to extend the
- *             relation (ie, blocknum is >= the current EOF), and so in case of
- *             failure we clean up by truncating.
- *
- *             This routine returns true or false, with errno set as appropriate.
+ *             The semantics are nearly the same as mdwrite(): write at the
+ *             specified position.  However, this is to be used for the case of
+ *             extending a relation (i.e., blocknum is at or beyond the current
+ *             EOF).  Note that we assume writing a block beyond current EOF
+ *             causes intervening file space to become filled with zeroes.
   */
-bool
+void
  mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  {
         long            seekpos;
         int                     nbytes;
         MdfdVec    *v;
  
-       v = _mdfd_getseg(reln, blocknum, false);
+       /* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+       Assert(blocknum >= mdnblocks(reln));
+#endif
+
+       /*
+        * If a relation manages to grow to 2^32-1 blocks, refuse to extend it
+        * any more --- we mustn't create a block whose number
+        * actually is InvalidBlockNumber.
+        */
+       if (blocknum == InvalidBlockNumber)
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("cannot extend relation %u/%u/%u beyond %u blocks",
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode,
+                                               InvalidBlockNumber)));
+
+       v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_CREATE);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -302,52 +330,64 @@ mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  #endif
  
         /*
-        * Note: because caller obtained blocknum by calling _mdnblocks, which did
-        * a seek(SEEK_END), this seek is often redundant and will be optimized
-        * away by fd.c.  It's not redundant, however, if there is a partial page
-        * at the end of the file.      In that case we want to try to overwrite the
-        * partial page with a full page.  It's also not redundant if bufmgr.c had
-        * to dump another buffer of the same file to make room for the new page's
-        * buffer.
+        * Note: because caller usually obtained blocknum by calling mdnblocks,
+        * which did a seek(SEEK_END), this seek is often redundant and will be
+        * optimized away by fd.c.  It's not redundant, however, if there is a
+        * partial page at the end of the file. In that case we want to try to
+        * overwrite the partial page with a full page.  It's also not redundant
+        * if bufmgr.c had to dump another buffer of the same file to make room
+        * for the new page's buffer.
          */
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return false;
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+                                               blocknum,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode)));
  
         if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
         {
-               if (nbytes > 0)
-               {
-                       int                     save_errno = errno;
-
-                       /* Remove the partially-written page */
-                       FileTruncate(v->mdfd_vfd, seekpos);
-                       FileSeek(v->mdfd_vfd, seekpos, SEEK_SET);
-                       errno = save_errno;
-               }
-               return false;
+               if (nbytes < 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not extend relation %u/%u/%u: %m",
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode),
+                                        errhint("Check free disk space.")));
+               /* short write: complain appropriately */
+               ereport(ERROR,
+                               (errcode(ERRCODE_DISK_FULL),
+                                errmsg("could not extend relation %u/%u/%u: wrote only %d of %d bytes at block %u",
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode,
+                                               nbytes, BLCKSZ, blocknum),
+                                errhint("Check free disk space.")));
         }
  
         if (!isTemp)
-       {
-               if (!register_dirty_segment(reln, v))
-                       return false;
-       }
+               register_dirty_segment(reln, v);
  
  #ifndef LET_OS_MANAGE_FILESIZE
-       Assert(_mdnblocks(v->mdfd_vfd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+       Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
  #endif
-
-       return true;
  }
  
  /*
- *     mdopen() -- Open the specified relation.  ereport's on failure.
- *             (Optionally, can return NULL instead of ereport for ENOENT.)
+ *     mdopen() -- Open the specified relation.
   *
   * Note we only open the first segment, when there are multiple segments.
+ *
+ * If first segment is not present, either ereport or return NULL according
+ * to "behavior".  We treat EXTENSION_CREATE the same as EXTENSION_FAIL;
+ * EXTENSION_CREATE means it's OK to extend an existing relation, not to
+ * invent one out of whole cloth.
   */
  static MdfdVec *
-mdopen(SMgrRelation reln, bool allowNotFound)
+mdopen(SMgrRelation reln, ExtensionBehavior behavior)
  {
         MdfdVec    *mdfd;
         char       *path;
@@ -374,7 +414,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
                 if (fd < 0)
                 {
                         pfree(path);
-                       if (allowNotFound && errno == ENOENT)
+                       if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
                                 return NULL;
                         ereport(ERROR,
                                         (errcode_for_file_access(),
@@ -393,7 +433,7 @@ mdopen(SMgrRelation reln, bool allowNotFound)
         mdfd->mdfd_segno = 0;
  #ifndef LET_OS_MANAGE_FILESIZE
         mdfd->mdfd_chain = NULL;
-       Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+       Assert(_mdnblocks(reln, mdfd) <= ((BlockNumber) RELSEG_SIZE));
  #endif
  
         return mdfd;
@@ -401,17 +441,15 @@ mdopen(SMgrRelation reln, bool allowNotFound)
  
  /*
   *     mdclose() -- Close the specified relation, if it isn't closed already.
- *
- *             Returns true or false with errno set as appropriate.
   */
-bool
+void
  mdclose(SMgrRelation reln)
  {
         MdfdVec    *v = reln->md_fd;
  
         /* No work if already closed */
         if (v == NULL)
-               return true;
+               return;
  
         reln->md_fd = NULL;                     /* prevent dangling pointer after error */
  
@@ -432,22 +470,19 @@ mdclose(SMgrRelation reln)
                 FileClose(v->mdfd_vfd);
         pfree(v);
  #endif
-
-       return true;
  }
  
  /*
   *     mdread() -- Read the specified block from a relation.
   */
-bool
+void
  mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       bool            status;
         long            seekpos;
         int                     nbytes;
         MdfdVec    *v;
  
-       v = _mdfd_getseg(reln, blocknum, false);
+       v = _mdfd_getseg(reln, blocknum, false, EXTENSION_FAIL);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -457,39 +492,66 @@ mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  #endif
  
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return false;
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+                                               blocknum,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode)));
  
-       status = true;
         if ((nbytes = FileRead(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
         {
+               if (nbytes < 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not read block %u of relation %u/%u/%u: %m",
+                                                       blocknum,
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode)));
                 /*
-                * If we are at or past EOF, return zeroes without complaining. Also
-                * substitute zeroes if we found a partial block at EOF.
-                *
-                * XXX this is really ugly, bad design.  However the current
-                * implementation of hash indexes requires it, because hash index
-                * pages are initialized out-of-order.
+                * Short read: we are at or past EOF, or we read a partial block at
+                * EOF.  Normally this is an error; upper levels should never try to
+                * read a nonexistent block.  However, if zero_damaged_pages is ON
+                * or we are InRecovery, we should instead return zeroes without
+                * complaining.  This allows, for example, the case of trying to
+                * update a block that was later truncated away.
                  */
-               if (nbytes == 0 ||
-                       (nbytes > 0 && mdnblocks(reln) == blocknum))
+               if (zero_damaged_pages || InRecovery)
                         MemSet(buffer, 0, BLCKSZ);
                 else
-                       status = false;
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATA_CORRUPTED),
+                                        errmsg("could not read block %u of relation %u/%u/%u: read only %d of %d bytes",
+                                                       blocknum,
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode,
+                                                       nbytes, BLCKSZ)));
         }
-
-       return status;
  }
  
  /*
   *     mdwrite() -- Write the supplied block at the appropriate location.
+ *
+ *             This is to be used only for updating already-existing blocks of a
+ *             relation (ie, those before the current EOF).  To extend a relation,
+ *             use mdextend().
   */
-bool
+void
  mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  {
         long            seekpos;
+       int                     nbytes;
         MdfdVec    *v;
  
-       v = _mdfd_getseg(reln, blocknum, false);
+       /* This assert is too expensive to have on normally ... */
+#ifdef CHECK_WRITE_VS_EXTEND
+       Assert(blocknum < mdnblocks(reln));
+#endif
+
+       v = _mdfd_getseg(reln, blocknum, isTemp, EXTENSION_FAIL);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         seekpos = (long) (BLCKSZ * (blocknum % ((BlockNumber) RELSEG_SIZE)));
@@ -499,18 +561,38 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  #endif
  
         if (FileSeek(v->mdfd_vfd, seekpos, SEEK_SET) != seekpos)
-               return false;
-
-       if (FileWrite(v->mdfd_vfd, buffer, BLCKSZ) != BLCKSZ)
-               return false;
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not seek to block %u of relation %u/%u/%u: %m",
+                                               blocknum,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode)));
  
-       if (!isTemp)
+       if ((nbytes = FileWrite(v->mdfd_vfd, buffer, BLCKSZ)) != BLCKSZ)
         {
-               if (!register_dirty_segment(reln, v))
-                       return false;
+               if (nbytes < 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not write block %u of relation %u/%u/%u: %m",
+                                                       blocknum,
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode)));
+               /* short write: complain appropriately */
+               ereport(ERROR,
+                               (errcode(ERRCODE_DISK_FULL),
+                                errmsg("could not write block %u of relation %u/%u/%u: wrote only %d of %d bytes",
+                                               blocknum,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode,
+                                               nbytes, BLCKSZ),
+                                errhint("Check free disk space.")));
         }
  
-       return true;
+       if (!isTemp)
+               register_dirty_segment(reln, v);
  }
  
  /*
@@ -520,13 +602,11 @@ mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
   *             and added to the mdfd_chain list.  If this routine has not been
   *             called, then only segments up to the last one actually touched
   *             are present in the chain.
- *
- *             Returns # of blocks, or InvalidBlockNumber on error.
   */
  BlockNumber
  mdnblocks(SMgrRelation reln)
  {
-       MdfdVec    *v = mdopen(reln, false);
+       MdfdVec    *v = mdopen(reln, EXTENSION_FAIL);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         BlockNumber nblocks;
@@ -552,7 +632,7 @@ mdnblocks(SMgrRelation reln)
  
         for (;;)
         {
-               nblocks = _mdnblocks(v->mdfd_vfd, BLCKSZ);
+               nblocks = _mdnblocks(reln, v);
                 if (nblocks > ((BlockNumber) RELSEG_SIZE))
                         elog(FATAL, "segment too big");
                 if (nblocks < ((BlockNumber) RELSEG_SIZE))
@@ -573,22 +653,26 @@ mdnblocks(SMgrRelation reln)
                          */
                         v->mdfd_chain = _mdfd_openseg(reln, segno, O_CREAT);
                         if (v->mdfd_chain == NULL)
-                               return InvalidBlockNumber;              /* failed? */
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not open segment %u of relation %u/%u/%u: %m",
+                                                               segno,
+                                                               reln->smgr_rnode.spcNode,
+                                                               reln->smgr_rnode.dbNode,
+                                                               reln->smgr_rnode.relNode)));
                 }
  
                 v = v->mdfd_chain;
         }
  #else
-       return _mdnblocks(v->mdfd_vfd, BLCKSZ);
+       return _mdnblocks(reln, v);
  #endif
  }
  
  /*
   *     mdtruncate() -- Truncate relation to specified number of blocks.
- *
- *             Returns # of blocks or InvalidBlockNumber on error.
   */
-BlockNumber
+void
  mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
  {
         MdfdVec    *v;
@@ -603,14 +687,22 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
          * that truncation loop will get them all!
          */
         curnblk = mdnblocks(reln);
-       if (curnblk == InvalidBlockNumber)
-               return InvalidBlockNumber;              /* mdnblocks failed */
         if (nblocks > curnblk)
-               return InvalidBlockNumber;              /* bogus request */
+       {
+               /* Bogus request ... but no complaint if InRecovery */
+               if (InRecovery)
+                       return;
+               ereport(ERROR,
+                               (errmsg("could not truncate relation %u/%u/%u to %u blocks: it's only %u blocks now",
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode,
+                                               nblocks, curnblk)));
+       }
         if (nblocks == curnblk)
-               return nblocks;                 /* no work */
+               return;                                 /* no work */
  
-       v = mdopen(reln, false);
+       v = mdopen(reln, EXTENSION_FAIL);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         priorblocks = 0;
@@ -626,12 +718,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                          * not delete it, for reasons explained in the header comments.
                          */
                         if (FileTruncate(v->mdfd_vfd, 0) < 0)
-                               return InvalidBlockNumber;
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+                                                               reln->smgr_rnode.spcNode,
+                                                               reln->smgr_rnode.dbNode,
+                                                               reln->smgr_rnode.relNode,
+                                                               nblocks)));
                         if (!isTemp)
-                       {
-                               if (!register_dirty_segment(reln, v))
-                                       return InvalidBlockNumber;
-                       }
+                               register_dirty_segment(reln, v);
                         v = v->mdfd_chain;
                         Assert(ov != reln->md_fd);      /* we never drop the 1st segment */
                         pfree(ov);
@@ -649,12 +744,15 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                         BlockNumber lastsegblocks = nblocks - priorblocks;
  
                         if (FileTruncate(v->mdfd_vfd, lastsegblocks * BLCKSZ) < 0)
-                               return InvalidBlockNumber;
+                               ereport(ERROR,
+                                               (errcode_for_file_access(),
+                                                errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+                                                               reln->smgr_rnode.spcNode,
+                                                               reln->smgr_rnode.dbNode,
+                                                               reln->smgr_rnode.relNode,
+                                                               nblocks)));
                         if (!isTemp)
-                       {
-                               if (!register_dirty_segment(reln, v))
-                                       return InvalidBlockNumber;
-                       }
+                               register_dirty_segment(reln, v);
                         v = v->mdfd_chain;
                         ov->mdfd_chain = NULL;
                 }
@@ -670,15 +768,16 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
         }
  #else
         if (FileTruncate(v->mdfd_vfd, nblocks * BLCKSZ) < 0)
-               return InvalidBlockNumber;
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                         errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
+                                        reln->smgr_rnode.spcNode,
+                                        reln->smgr_rnode.dbNode,
+                                        reln->smgr_rnode.relNode,
+                                        nblocks)));
         if (!isTemp)
-       {
-               if (!register_dirty_segment(reln, v))
-                       return InvalidBlockNumber;
-       }
+               register_dirty_segment(reln, v);
  #endif
-
-       return nblocks;
  }
  
  /*
@@ -687,7 +786,7 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
   * Note that only writes already issued are synced; this routine knows
   * nothing of dirty buffers that may exist inside the buffer manager.
   */
-bool
+void
  mdimmedsync(SMgrRelation reln)
  {
         MdfdVec    *v;
@@ -698,24 +797,32 @@ mdimmedsync(SMgrRelation reln)
          * that fsync loop will get them all!
          */
         curnblk = mdnblocks(reln);
-       if (curnblk == InvalidBlockNumber)
-               return false;                   /* mdnblocks failed */
  
-       v = mdopen(reln, false);
+       v = mdopen(reln, EXTENSION_FAIL);
  
  #ifndef LET_OS_MANAGE_FILESIZE
         while (v != NULL)
         {
                 if (FileSync(v->mdfd_vfd) < 0)
-                       return false;
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+                                                       v->mdfd_segno,
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode)));
                 v = v->mdfd_chain;
         }
  #else
         if (FileSync(v->mdfd_vfd) < 0)
-               return false;
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+                                               v->mdfd_segno,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode)));
  #endif
-
-       return true;
  }
  
  /*
@@ -724,7 +831,7 @@ mdimmedsync(SMgrRelation reln)
   * This is only called during checkpoints, and checkpoints should only
   * occur in processes that have created a pendingOpsTable.
   */
-bool
+void
  mdsync(void)
  {
         HASH_SEQ_STATUS hstat;
@@ -732,7 +839,7 @@ mdsync(void)
         int                     absorb_counter;
  
         if (!pendingOpsTable)
-               return false;
+               elog(ERROR, "cannot sync without a pendingOpsTable");
  
         /*
          * If we are in the bgwriter, the sync had better include all fsync
@@ -795,21 +902,18 @@ mdsync(void)
                          */
                         seg = _mdfd_getseg(reln,
                                                            entry->segno * ((BlockNumber) RELSEG_SIZE),
-                                                          true);
+                                                          false, EXTENSION_RETURN_NULL);
                         if (seg)
                         {
                                 if (FileSync(seg->mdfd_vfd) < 0 &&
                                         errno != ENOENT)
-                               {
-                                       ereport(LOG,
+                                       ereport(ERROR,
                                                         (errcode_for_file_access(),
                                                          errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
                                                                         entry->segno,
                                                                         entry->rnode.spcNode,
                                                                         entry->rnode.dbNode,
                                                                         entry->rnode.relNode)));
-                                       return false;
-                               }
                         }
                 }
  
@@ -818,8 +922,6 @@ mdsync(void)
                                                 HASH_REMOVE, NULL) == NULL)
                         elog(ERROR, "pendingOpsTable corrupted");
         }
-
-       return true;
  }
  
  /*
@@ -830,11 +932,8 @@ mdsync(void)
   * to the background writer process.  If that fails, just do the fsync
   * locally before returning (we expect this will not happen often enough
   * to be a performance problem).
- *
- * A false result implies I/O failure during local fsync.  errno will be
- * valid for error reporting.
   */
-static bool
+static void
  register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
  {
         if (pendingOpsTable)
@@ -847,17 +946,21 @@ register_dirty_segment(SMgrRelation reln, MdfdVec *seg)
                 entry.segno = seg->mdfd_segno;
  
                 (void) hash_search(pendingOpsTable, &entry, HASH_ENTER, NULL);
-               return true;
         }
         else
         {
                 if (ForwardFsyncRequest(reln->smgr_rnode, seg->mdfd_segno))
-                       return true;
-       }
+                       return;                         /* passed it off successfully */
  
-       if (FileSync(seg->mdfd_vfd) < 0)
-               return false;
-       return true;
+               if (FileSync(seg->mdfd_vfd) < 0)
+                       ereport(ERROR,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not fsync segment %u of relation %u/%u/%u: %m",
+                                                       seg->mdfd_segno,
+                                                       reln->smgr_rnode.spcNode,
+                                                       reln->smgr_rnode.dbNode,
+                                                       reln->smgr_rnode.relNode)));
+       }
  }
  
  /*
@@ -931,7 +1034,7 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
         v->mdfd_vfd = fd;
         v->mdfd_segno = segno;
         v->mdfd_chain = NULL;
-       Assert(_mdnblocks(fd, BLCKSZ) <= ((BlockNumber) RELSEG_SIZE));
+       Assert(_mdnblocks(reln, v) <= ((BlockNumber) RELSEG_SIZE));
  
         /* all done */
         return v;
@@ -940,51 +1043,66 @@ _mdfd_openseg(SMgrRelation reln, BlockNumber segno, int oflags)
  
  /*
   *     _mdfd_getseg() -- Find the segment of the relation holding the
- *             specified block.  ereport's on failure.
- *             (Optionally, can return NULL instead of ereport for ENOENT.)
+ *             specified block.
+ *
+ * If the segment doesn't exist, we ereport, return NULL, or create the
+ * segment, according to "behavior".  Note: isTemp need only be correct
+ * in the EXTENSION_CREATE case.
   */
  static MdfdVec *
-_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
+_mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool isTemp,
+                        ExtensionBehavior behavior)
  {
-       MdfdVec    *v = mdopen(reln, allowNotFound);
+       MdfdVec    *v = mdopen(reln, behavior);
  
  #ifndef LET_OS_MANAGE_FILESIZE
-       BlockNumber segstogo;
+       BlockNumber targetseg;
         BlockNumber nextsegno;
  
         if (!v)
-               return NULL;                    /* only possible if allowNotFound */
+               return NULL;                    /* only possible if EXTENSION_RETURN_NULL */
  
-       for (segstogo = blkno / ((BlockNumber) RELSEG_SIZE), nextsegno = 1;
-                segstogo > 0;
-                nextsegno++, segstogo--)
+       targetseg = blkno / ((BlockNumber) RELSEG_SIZE);
+       for (nextsegno = 1; nextsegno <= targetseg; nextsegno++)
         {
+               Assert(nextsegno == v->mdfd_segno + 1);
+
                 if (v->mdfd_chain == NULL)
                 {
                         /*
-                        * We will create the next segment only if the target block is
-                        * within it.  This prevents Sorcerer's Apprentice syndrome if a
-                        * bug at higher levels causes us to be handed a ridiculously
-                        * large blkno --- otherwise we could create many thousands of
-                        * empty segment files before reaching the "target" block.      We
-                        * should never need to create more than one new segment per call,
-                        * so this restriction seems reasonable.
+                        * Normally we will create new segments only if authorized by
+                        * the caller (i.e., we are doing mdextend()).  But when doing
+                        * WAL recovery, create segments anyway; this allows cases such as
+                        * replaying WAL data that has a write into a high-numbered
+                        * segment of a relation that was later deleted.  We want to go
+                        * ahead and create the segments so we can finish out the replay.
                          *
-                        * BUT: when doing WAL recovery, disable this logic and create
-                        * segments unconditionally.  In this case it seems better to
-                        * assume the given blkno is good (it presumably came from a
-                        * CRC-checked WAL record); furthermore this lets us cope in the
-                        * case where we are replaying WAL data that has a write into a
-                        * high-numbered segment of a relation that was later deleted.  We
-                        * want to go ahead and create the segments so we can finish out
-                        * the replay.
+                        * We have to maintain the invariant that segments before the
+                        * last active segment are of size RELSEG_SIZE; therefore, pad
+                        * them out with zeroes if needed.  (This only matters if caller
+                        * is extending the relation discontiguously, but that can happen
+                        * in hash indexes.)
                          */
-                       v->mdfd_chain = _mdfd_openseg(reln,
-                                                                                 nextsegno,
-                                                               (segstogo == 1 || InRecovery) ? O_CREAT : 0);
+                       if (behavior == EXTENSION_CREATE || InRecovery)
+                       {
+                               if (_mdnblocks(reln, v) < RELSEG_SIZE)
+                               {
+                                       char   *zerobuf = palloc0(BLCKSZ);
+
+                                       mdextend(reln, nextsegno * ((BlockNumber) RELSEG_SIZE) - 1,
+                                                        zerobuf, isTemp);
+                                       pfree(zerobuf);
+                               }
+                               v->mdfd_chain = _mdfd_openseg(reln, nextsegno, O_CREAT);
+                       }
+                       else
+                       {
+                               /* We won't create segment if not existent */
+                               v->mdfd_chain = _mdfd_openseg(reln, nextsegno, 0);
+                       }
                         if (v->mdfd_chain == NULL)
                         {
-                               if (allowNotFound && errno == ENOENT)
+                               if (behavior == EXTENSION_RETURN_NULL && errno == ENOENT)
                                         return NULL;
                                 ereport(ERROR,
                                                 (errcode_for_file_access(),
@@ -1007,12 +1125,19 @@ _mdfd_getseg(SMgrRelation reln, BlockNumber blkno, bool allowNotFound)
   * Get number of blocks present in a single disk file
   */
  static BlockNumber
-_mdnblocks(File file, Size blcksz)
+_mdnblocks(SMgrRelation reln, MdfdVec *seg)
  {
         long            len;
  
-       len = FileSeek(file, 0L, SEEK_END);
+       len = FileSeek(seg->mdfd_vfd, 0L, SEEK_END);
         if (len < 0)
-               return 0;                               /* on failure, assume file is empty */
-       return (BlockNumber) (len / blcksz);
+               ereport(ERROR,
+                               (errcode_for_file_access(),
+                                errmsg("could not seek to end of segment %u of relation %u/%u/%u: %m",
+                                               seg->mdfd_segno,
+                                               reln->smgr_rnode.spcNode,
+                                               reln->smgr_rnode.dbNode,
+                                               reln->smgr_rnode.relNode)));
+       /* note that this calculation will ignore any partial block at EOF */
+       return (BlockNumber) (len / BLCKSZ);
  }
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c

index 0ceb800..1a3a00f 100644 (file)
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -11,7 +11,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.101 2006/10/04 00:29:58 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.102 2007/01/03 18:11:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -31,30 +31,33 @@
  /*
   * This struct of function pointers defines the API between smgr.c and
   * any individual storage manager module.  Note that smgr subfunctions are
- * generally expected to return TRUE on success, FALSE on error.  (For
- * nblocks and truncate we instead say that returning InvalidBlockNumber
- * indicates an error.)
+ * generally expected to report problems via elog(ERROR).  An exception is
+ * that smgr_unlink should use elog(WARNING), rather than erroring out,
+ * because we normally unlink relations during post-commit/abort cleanup,
+ * and so it's too late to raise an error.  Also, various conditions that
+ * would normally be errors should be allowed during bootstrap and/or WAL
+ * recovery --- see comments in md.c for details.
   */
  typedef struct f_smgr
  {
-       bool            (*smgr_init) (void);    /* may be NULL */
-       bool            (*smgr_shutdown) (void);                /* may be NULL */
-       bool            (*smgr_close) (SMgrRelation reln);
-       bool            (*smgr_create) (SMgrRelation reln, bool isRedo);
-       bool            (*smgr_unlink) (RelFileNode rnode, bool isRedo);
-       bool            (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
+       void            (*smgr_init) (void);    /* may be NULL */
+       void            (*smgr_shutdown) (void);                /* may be NULL */
+       void            (*smgr_close) (SMgrRelation reln);
+       void            (*smgr_create) (SMgrRelation reln, bool isRedo);
+       void            (*smgr_unlink) (RelFileNode rnode, bool isRedo);
+       void            (*smgr_extend) (SMgrRelation reln, BlockNumber blocknum,
                                                                                         char *buffer, bool isTemp);
-       bool            (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
+       void            (*smgr_read) (SMgrRelation reln, BlockNumber blocknum,
                                                                                   char *buffer);
-       bool            (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
+       void            (*smgr_write) (SMgrRelation reln, BlockNumber blocknum,
                                                                                    char *buffer, bool isTemp);
         BlockNumber (*smgr_nblocks) (SMgrRelation reln);
-       BlockNumber (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
-                                                                                         bool isTemp);
-       bool            (*smgr_immedsync) (SMgrRelation reln);
-       bool            (*smgr_commit) (void);  /* may be NULL */
-       bool            (*smgr_abort) (void);   /* may be NULL */
-       bool            (*smgr_sync) (void);    /* may be NULL */
+       void            (*smgr_truncate) (SMgrRelation reln, BlockNumber nblocks,
+                                                                 bool isTemp);
+       void            (*smgr_immedsync) (SMgrRelation reln);
+       void            (*smgr_commit) (void);  /* may be NULL */
+       void            (*smgr_abort) (void);   /* may be NULL */
+       void            (*smgr_sync) (void);    /* may be NULL */
  } f_smgr;
  
  
@@ -152,12 +155,7 @@ smgrinit(void)
         for (i = 0; i < NSmgr; i++)
         {
                 if (smgrsw[i].smgr_init)
-               {
-                       if (!(*(smgrsw[i].smgr_init)) ())
-                               elog(FATAL, "smgr initialization failed on %s: %m",
-                                        DatumGetCString(DirectFunctionCall1(smgrout,
-                                                                                                                Int16GetDatum(i))));
-               }
+                       (*(smgrsw[i].smgr_init)) ();
         }
  
         /* register the shutdown proc */
@@ -175,12 +173,7 @@ smgrshutdown(int code, Datum arg)
         for (i = 0; i < NSmgr; i++)
         {
                 if (smgrsw[i].smgr_shutdown)
-               {
-                       if (!(*(smgrsw[i].smgr_shutdown)) ())
-                               elog(FATAL, "smgr shutdown failed on %s: %m",
-                                        DatumGetCString(DirectFunctionCall1(smgrout,
-                                                                                                                Int16GetDatum(i))));
-               }
+                       (*(smgrsw[i].smgr_shutdown)) ();
         }
  }
  
@@ -256,13 +249,7 @@ smgrclose(SMgrRelation reln)
  {
         SMgrRelation *owner;
  
-       if (!(*(smgrsw[reln->smgr_which].smgr_close)) (reln))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not close relation %u/%u/%u: %m",
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
+       (*(smgrsw[reln->smgr_which].smgr_close)) (reln);
  
         owner = reln->smgr_owner;
  
@@ -354,13 +341,7 @@ smgrcreate(SMgrRelation reln, bool isTemp, bool isRedo)
                                                         reln->smgr_rnode.dbNode,
                                                         isRedo);
  
-       if (!(*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not create relation %u/%u/%u: %m",
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
+       (*(smgrsw[reln->smgr_which].smgr_create)) (reln, isRedo);
  
         if (isRedo)
                 return;
@@ -482,38 +463,26 @@ smgr_internal_unlink(RelFileNode rnode, int which, bool isTemp, bool isRedo)
         /*
          * And delete the physical files.
          *
-        * Note: we treat deletion failure as a WARNING, not an error, because
-        * we've already decided to commit or abort the current xact.
+        * Note: smgr_unlink must treat deletion failure as a WARNING, not an
+        * ERROR, because we've already decided to commit or abort the current
+        * xact.
          */
-       if (!(*(smgrsw[which].smgr_unlink)) (rnode, isRedo))
-               ereport(WARNING,
-                               (errcode_for_file_access(),
-                                errmsg("could not remove relation %u/%u/%u: %m",
-                                               rnode.spcNode,
-                                               rnode.dbNode,
-                                               rnode.relNode)));
+       (*(smgrsw[which].smgr_unlink)) (rnode, isRedo);
  }
  
  /*
   *     smgrextend() -- Add a new block to a file.
   *
- *             The semantics are basically the same as smgrwrite(): write at the
- *             specified position.  However, we are expecting to extend the
- *             relation (ie, blocknum is the current EOF), and so in case of
- *             failure we clean up by truncating.
+ *             The semantics are nearly the same as smgrwrite(): write at the
+ *             specified position.  However, this is to be used for the case of
+ *             extending a relation (i.e., blocknum is at or beyond the current
+ *             EOF).  Note that we assume writing a block beyond current EOF
+ *             causes intervening file space to become filled with zeroes.
   */
  void
  smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  {
-       if (!(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer,
-                                                                                                       isTemp))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not extend relation %u/%u/%u: %m",
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode),
-                                errhint("Check free disk space.")));
+       (*(smgrsw[reln->smgr_which].smgr_extend)) (reln, blocknum, buffer, isTemp);
  }
  
  /*
@@ -527,19 +496,16 @@ smgrextend(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  void
  smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  {
-       if (!(*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not read block %u of relation %u/%u/%u: %m",
-                                               blocknum,
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
+       (*(smgrsw[reln->smgr_which].smgr_read)) (reln, blocknum, buffer);
  }
  
  /*
   *     smgrwrite() -- Write the supplied buffer out.
   *
+ *             This is to be used only for updating already-existing blocks of a
+ *             relation (ie, those before the current EOF).  To extend a relation,
+ *             use smgrextend().
+ *
   *             This is not a synchronous write -- the block is not necessarily
   *             on disk at return, only dumped out to the kernel.  However,
   *             provisions will be made to fsync the write before the next checkpoint.
@@ -551,60 +517,26 @@ smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer)
  void
  smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer, bool isTemp)
  {
-       if (!(*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer,
-                                                                                                  isTemp))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not write block %u of relation %u/%u/%u: %m",
-                                               blocknum,
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
+       (*(smgrsw[reln->smgr_which].smgr_write)) (reln, blocknum, buffer, isTemp);
  }
  
  /*
   *     smgrnblocks() -- Calculate the number of blocks in the
   *                                      supplied relation.
- *
- *             Returns the number of blocks on success, aborts the current
- *             transaction on failure.
   */
  BlockNumber
  smgrnblocks(SMgrRelation reln)
  {
-       BlockNumber nblocks;
-
-       nblocks = (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
-
-       /*
-        * NOTE: if a relation ever did grow to 2^32-1 blocks, this code would
-        * fail --- but that's a good thing, because it would stop us from
-        * extending the rel another block and having a block whose number
-        * actually is InvalidBlockNumber.
-        */
-       if (nblocks == InvalidBlockNumber)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not count blocks of relation %u/%u/%u: %m",
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
-
-       return nblocks;
+       return (*(smgrsw[reln->smgr_which].smgr_nblocks)) (reln);
  }
  
  /*
   *     smgrtruncate() -- Truncate supplied relation to the specified number
   *                                       of blocks
- *
- *             Returns the number of blocks on success, aborts the current
- *             transaction on failure.
   */
-BlockNumber
+void
  smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
  {
-       BlockNumber newblks;
-
         /*
          * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
          * just drop them without bothering to write the contents.
@@ -619,16 +551,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
         FreeSpaceMapTruncateRel(&reln->smgr_rnode, nblocks);
  
         /* Do the truncation */
-       newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks,
-                                                                                                                  isTemp);
-       if (newblks == InvalidBlockNumber)
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                         errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
-                                        reln->smgr_rnode.spcNode,
-                                        reln->smgr_rnode.dbNode,
-                                        reln->smgr_rnode.relNode,
-                                        nblocks)));
+       (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln, nblocks, isTemp);
  
         if (!isTemp)
         {
@@ -642,7 +565,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                 XLogRecData rdata;
                 xl_smgr_truncate xlrec;
  
-               xlrec.blkno = newblks;
+               xlrec.blkno = nblocks;
                 xlrec.rnode = reln->smgr_rnode;
  
                 rdata.data = (char *) &xlrec;
@@ -653,8 +576,6 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
                 lsn = XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLOG_NO_TRAN,
                                                  &rdata);
         }
-
-       return newblks;
  }
  
  /*
@@ -683,13 +604,7 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp)
  void
  smgrimmedsync(SMgrRelation reln)
  {
-       if (!(*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln))
-               ereport(ERROR,
-                               (errcode_for_file_access(),
-                                errmsg("could not sync relation %u/%u/%u: %m",
-                                               reln->smgr_rnode.spcNode,
-                                               reln->smgr_rnode.dbNode,
-                                               reln->smgr_rnode.relNode)));
+       (*(smgrsw[reln->smgr_which].smgr_immedsync)) (reln);
  }
  
  
@@ -843,12 +758,7 @@ smgrcommit(void)
         for (i = 0; i < NSmgr; i++)
         {
                 if (smgrsw[i].smgr_commit)
-               {
-                       if (!(*(smgrsw[i].smgr_commit)) ())
-                               elog(ERROR, "transaction commit failed on %s: %m",
-                                        DatumGetCString(DirectFunctionCall1(smgrout,
-                                                                                                                Int16GetDatum(i))));
-               }
+                       (*(smgrsw[i].smgr_commit)) ();
         }
  }
  
@@ -863,12 +773,7 @@ smgrabort(void)
         for (i = 0; i < NSmgr; i++)
         {
                 if (smgrsw[i].smgr_abort)
-               {
-                       if (!(*(smgrsw[i].smgr_abort)) ())
-                               elog(ERROR, "transaction abort failed on %s: %m",
-                                        DatumGetCString(DirectFunctionCall1(smgrout,
-                                                                                                                Int16GetDatum(i))));
-               }
+                       (*(smgrsw[i].smgr_abort)) ();
         }
  }
  
@@ -883,12 +788,7 @@ smgrsync(void)
         for (i = 0; i < NSmgr; i++)
         {
                 if (smgrsw[i].smgr_sync)
-               {
-                       if (!(*(smgrsw[i].smgr_sync)) ())
-                               elog(ERROR, "storage sync failed on %s: %m",
-                                        DatumGetCString(DirectFunctionCall1(smgrout,
-                                                                                                                Int16GetDatum(i))));
-               }
+                       (*(smgrsw[i].smgr_sync)) ();
         }
  }
  
@@ -910,7 +810,6 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
         {
                 xl_smgr_truncate *xlrec = (xl_smgr_truncate *) XLogRecGetData(record);
                 SMgrRelation reln;
-               BlockNumber newblks;
  
                 reln = smgropen(xlrec->rnode);
  
@@ -931,17 +830,9 @@ smgr_redo(XLogRecPtr lsn, XLogRecord *record)
                 FreeSpaceMapTruncateRel(&reln->smgr_rnode, xlrec->blkno);
  
                 /* Do the truncation */
-               newblks = (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
-                                                                                                                          xlrec->blkno,
-                                                                                                                          false);
-               if (newblks == InvalidBlockNumber)
-                       ereport(WARNING,
-                                       (errcode_for_file_access(),
-                         errmsg("could not truncate relation %u/%u/%u to %u blocks: %m",
-                                        reln->smgr_rnode.spcNode,
-                                        reln->smgr_rnode.dbNode,
-                                        reln->smgr_rnode.relNode,
-                                        xlrec->blkno)));
+               (*(smgrsw[reln->smgr_which].smgr_truncate)) (reln,
+                                                                                                        xlrec->blkno,
+                                                                                                        false);
  
                 /* Also tell xlogutils.c about it */
                 XLogTruncateRelation(xlrec->rnode, xlrec->blkno);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h

index b768a5b..1c8963e 100644 (file)
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.55 2006/03/24 04:32:13 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/smgr.h,v 1.56 2007/01/03 18:11:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -72,7 +72,7 @@ extern void smgrread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
  extern void smgrwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
                   bool isTemp);
  extern BlockNumber smgrnblocks(SMgrRelation reln);
-extern BlockNumber smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
+extern void smgrtruncate(SMgrRelation reln, BlockNumber nblocks,
                          bool isTemp);
  extern void smgrimmedsync(SMgrRelation reln);
  extern void smgrDoPendingDeletes(bool isCommit);
@@ -91,20 +91,19 @@ extern void smgr_desc(StringInfo buf, uint8 xl_info, char *rec);
  /* internals: move me elsewhere -- ay 7/94 */
  
  /* in md.c */
-extern bool mdinit(void);
-extern bool mdclose(SMgrRelation reln);
-extern bool mdcreate(SMgrRelation reln, bool isRedo);
-extern bool mdunlink(RelFileNode rnode, bool isRedo);
-extern bool mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdinit(void);
+extern void mdclose(SMgrRelation reln);
+extern void mdcreate(SMgrRelation reln, bool isRedo);
+extern void mdunlink(RelFileNode rnode, bool isRedo);
+extern void mdextend(SMgrRelation reln, BlockNumber blocknum, char *buffer,
                  bool isTemp);
-extern bool mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
-extern bool mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
+extern void mdread(SMgrRelation reln, BlockNumber blocknum, char *buffer);
+extern void mdwrite(SMgrRelation reln, BlockNumber blocknum, char *buffer,
                 bool isTemp);
  extern BlockNumber mdnblocks(SMgrRelation reln);
-extern BlockNumber mdtruncate(SMgrRelation reln, BlockNumber nblocks,
-                  bool isTemp);
-extern bool mdimmedsync(SMgrRelation reln);
-extern bool mdsync(void);
+extern void mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp);
+extern void mdimmedsync(SMgrRelation reln);
+extern void mdsync(void);
  
  extern void RememberFsyncRequest(RelFileNode rnode, BlockNumber segno);
author	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Wed, 3 Jan 2007 18:11:01 +0000 (18:11 +0000)
src/backend/access/hash/hashpage.c		patch \| blob \| history
src/backend/access/nbtree/nbtsort.c		patch \| blob \| history
src/backend/commands/tablecmds.c		patch \| blob \| history
src/backend/storage/smgr/md.c		patch \| blob \| history
src/backend/storage/smgr/smgr.c		patch \| blob \| history
src/include/storage/smgr.h		patch \| blob \| history