OSDN Git Service

Fix subtransaction behavior for large objects, temp namespace, files,
authorTom Lane <tgl@sss.pgh.pa.us>
Wed, 28 Jul 2004 14:23:31 +0000 (14:23 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Wed, 28 Jul 2004 14:23:31 +0000 (14:23 +0000)
password/group files.  Also allow read-only subtransactions of a read-write
parent, but not vice versa.  These are the reasonably noncontroversial
parts of Alvaro's recent mop-up patch, plus further work on large objects
to minimize use of the TopTransactionResourceOwner.

17 files changed:
src/backend/access/transam/xact.c
src/backend/catalog/namespace.c
src/backend/commands/user.c
src/backend/libpq/be-fsstubs.c
src/backend/storage/file/fd.c
src/backend/storage/large_object/inv_api.c
src/backend/storage/lmgr/lmgr.c
src/backend/utils/cache/inval.c
src/backend/utils/misc/guc.c
src/backend/utils/time/tqual.c
src/bin/psql/tab-complete.c
src/include/catalog/namespace.h
src/include/commands/user.h
src/include/libpq/be-fsstubs.h
src/include/storage/fd.h
src/include/storage/large_object.h
src/include/utils/inval.h

index 55d5ef9..f938cdc 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.172 2004/07/27 05:10:49 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.173 2004/07/28 14:23:27 tgl Exp $
  *
  * NOTES
  *             Transaction aborts can now occur two ways:
@@ -224,6 +224,7 @@ typedef struct TransactionStateData
        ResourceOwner   curTransactionOwner;    /* my query resources */
        List               *childXids;                          /* subcommitted child XIDs */
        AclId                   currentUser;                    /* subxact start current_user */
+       bool                    prevXactReadOnly;               /* entry-time xact r/o state */
        struct TransactionStateData *parent;    /* back link to parent */
 } TransactionStateData;
 
@@ -284,6 +285,7 @@ static TransactionStateData TopTransactionStateData = {
        NULL,                                           /* cur transaction resource owner */
        NIL,                                            /* subcommitted child Xids */
        0,                                                      /* entry-time current userid */
+       false,                                          /* entry-time xact r/o state */
        NULL                                            /* link to parent state block */
 };
 
@@ -1242,7 +1244,8 @@ StartTransaction(void)
         * check the current transaction state
         */
        if (s->state != TRANS_DEFAULT)
-               elog(WARNING, "StartTransaction and not in default state");
+               elog(WARNING, "StartTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        /*
         * set the current transaction state information appropriately during
@@ -1287,6 +1290,8 @@ StartTransaction(void)
         * you won't because it doesn't work during startup; the userid isn't
         * set yet during a backend's first transaction start.  We only use
         * the currentUser field in sub-transaction state structs.
+        *
+        * prevXactReadOnly is also valid only in sub-transactions.
         */
 
        /*
@@ -1319,7 +1324,8 @@ CommitTransaction(void)
         * check the current transaction state
         */
        if (s->state != TRANS_INPROGRESS)
-               elog(WARNING, "CommitTransaction and not in in-progress state");
+               elog(WARNING, "CommitTransaction while in %s state",
+                        TransStateAsString(s->state));
        Assert(s->parent == NULL);
 
        /*
@@ -1351,14 +1357,14 @@ CommitTransaction(void)
 
        AtCommit_Portals();
 
-       /* handle commit for large objects [ PA, 7/17/98 ] */
-       /* XXX probably this does not belong here */
-       lo_commit(true);
+       /* close large objects before lower-level cleanup */
+       AtEOXact_LargeObject(true);
 
        /* NOTIFY commit must come before lower-level cleanup */
        AtCommit_Notify();
 
        /* Update the flat password file if we changed pg_shadow or pg_group */
+       /* This should be the last step before commit */
        AtEOXact_UpdatePasswordFile(true);
 
        /*
@@ -1486,7 +1492,8 @@ AbortTransaction(void)
         * check the current transaction state
         */
        if (s->state != TRANS_INPROGRESS)
-               elog(WARNING, "AbortTransaction and not in in-progress state");
+               elog(WARNING, "AbortTransaction while in %s state",
+                        TransStateAsString(s->state));
        Assert(s->parent == NULL);
 
        /*
@@ -1515,7 +1522,7 @@ AbortTransaction(void)
         */
        DeferredTriggerAbortXact();
        AtAbort_Portals();
-       lo_commit(false);                       /* 'false' means it's abort */
+       AtEOXact_LargeObject(false);                    /* 'false' means it's abort */
        AtAbort_Notify();
        AtEOXact_UpdatePasswordFile(false);
 
@@ -1870,6 +1877,9 @@ CleanupAbortedSubTransactions(bool returnName)
                s = CurrentTransactionState;
        }
 
+       AssertState(s->blockState == TBLOCK_SUBINPROGRESS ||
+                               s->blockState == TBLOCK_INPROGRESS);
+
        return name;
 }
 
@@ -2866,7 +2876,8 @@ StartSubTransaction(void)
        TransactionState s = CurrentTransactionState;
 
        if (s->state != TRANS_DEFAULT)
-               elog(WARNING, "StartSubTransaction and not in default state");
+               elog(WARNING, "StartSubTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        s->state = TRANS_START;
 
@@ -2889,6 +2900,7 @@ StartSubTransaction(void)
         * Finish setup of other transaction state fields.
         */
        s->currentUser = GetUserId();
+       s->prevXactReadOnly = XactReadOnly;
        
        /*
         * Initialize other subsystems for new subtransaction
@@ -2913,7 +2925,8 @@ CommitSubTransaction(void)
        ShowTransactionState("CommitSubTransaction");
 
        if (s->state != TRANS_INPROGRESS)
-               elog(WARNING, "CommitSubTransaction and not in in-progress state");
+               elog(WARNING, "CommitSubTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        /* Pre-commit processing */
        AtSubCommit_Portals(s->parent->transactionIdData,
@@ -2930,9 +2943,18 @@ CommitSubTransaction(void)
        /* Post-commit cleanup */
        AtSubCommit_smgr();
 
-       AtSubEOXact_Inval(true);
+       AtEOSubXact_Inval(true);
        AtEOSubXact_SPI(true, s->transactionIdData);
 
+       AtEOSubXact_LargeObject(true, s->transactionIdData,
+                                                       s->parent->transactionIdData);
+       AtEOSubXact_UpdatePasswordFile(true, s->transactionIdData,
+                                                                  s->parent->transactionIdData);
+       AtEOSubXact_Files(true, s->transactionIdData,
+                                         s->parent->transactionIdData);
+       AtEOSubXact_Namespace(true, s->transactionIdData,
+                                                 s->parent->transactionIdData);
+
        /*
         * Note that we just release the resource owner's resources and don't
         * delete it.  This is because locks are not actually released here.
@@ -2953,6 +2975,13 @@ CommitSubTransaction(void)
        AtEOSubXact_on_commit_actions(true, s->transactionIdData,
                                                                  s->parent->transactionIdData);
 
+       /*
+        * We need to restore the upper transaction's read-only state,
+        * in case the upper is read-write while the child is read-only;
+        * GUC will incorrectly think it should leave the child state in place.
+        */
+       XactReadOnly = s->prevXactReadOnly;
+
        CurrentResourceOwner = s->parent->curTransactionOwner;
        CurTransactionResourceOwner = s->parent->curTransactionOwner;
        s->curTransactionOwner = NULL;
@@ -2973,7 +3002,8 @@ AbortSubTransaction(void)
        ShowTransactionState("AbortSubTransaction");
 
        if (s->state != TRANS_INPROGRESS)
-               elog(WARNING, "AbortSubTransaction and not in in-progress state");
+               elog(WARNING, "AbortSubTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        HOLD_INTERRUPTS();
 
@@ -3010,7 +3040,16 @@ AbortSubTransaction(void)
        AtEOSubXact_SPI(false, s->transactionIdData);
        AtSubAbort_Portals(s->parent->transactionIdData,
                                           s->parent->curTransactionOwner);
-       AtSubEOXact_Inval(false);
+       AtEOSubXact_Inval(false);
+
+       AtEOSubXact_LargeObject(false, s->transactionIdData,
+                                                       s->parent->transactionIdData);
+       AtEOSubXact_UpdatePasswordFile(false, s->transactionIdData,
+                                                                  s->parent->transactionIdData);
+       AtEOSubXact_Files(false, s->transactionIdData,
+                                         s->parent->transactionIdData);
+       AtEOSubXact_Namespace(false, s->transactionIdData,
+                                                 s->parent->transactionIdData);
 
        ResourceOwnerRelease(s->curTransactionOwner,
                                                 RESOURCE_RELEASE_BEFORE_LOCKS,
@@ -3041,6 +3080,13 @@ AbortSubTransaction(void)
         */
        SetUserId(s->currentUser);
 
+       /*
+        * Restore the upper transaction's read-only state, too.  This should
+        * be redundant with GUC's cleanup but we may as well do it for
+        * consistency with the commit case.
+        */
+       XactReadOnly = s->prevXactReadOnly;
+
        CommandCounterIncrement();
 
        RESUME_INTERRUPTS();
@@ -3057,7 +3103,8 @@ CleanupSubTransaction(void)
        ShowTransactionState("CleanupSubTransaction");
 
        if (s->state != TRANS_ABORT)
-               elog(WARNING, "CleanupSubTransaction and not in aborted state");
+               elog(WARNING, "CleanupSubTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        AtSubCleanup_Portals();
 
@@ -3088,7 +3135,8 @@ StartAbortedSubTransaction(void)
        TransactionState s = CurrentTransactionState;
 
        if (s->state != TRANS_DEFAULT)
-               elog(WARNING, "StartAbortedSubTransaction and not in default state");
+               elog(WARNING, "StartAbortedSubTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        s->state = TRANS_START;
 
@@ -3168,7 +3216,8 @@ PopTransaction(void)
        TransactionState s = CurrentTransactionState;
 
        if (s->state != TRANS_DEFAULT)
-               elog(WARNING, "PopTransaction and not in default state");
+               elog(WARNING, "PopTransaction while in %s state",
+                        TransStateAsString(s->state));
 
        if (s->parent == NULL)
                elog(FATAL, "PopTransaction with no parent");
index b412023..7bc388c 100644 (file)
@@ -13,7 +13,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.67 2004/06/18 06:13:19 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/catalog/namespace.c,v 1.68 2004/07/28 14:23:27 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -107,12 +107,17 @@ static bool namespaceSearchPathValid = true;
  * myTempNamespace is InvalidOid until and unless a TEMP namespace is set up
  * in a particular backend session (this happens when a CREATE TEMP TABLE
  * command is first executed). Thereafter it's the OID of the temp namespace.
- * firstTempTransaction flags whether we've committed creation of the TEMP
- * namespace or not.
+ *
+ * myTempNamespaceXID shows whether we've created the TEMP namespace in the
+ * current transaction.  The TransactionId propagates up the transaction tree,
+ * so the main transaction will correctly recognize the flag if all
+ * intermediate subtransactions commit.  When it is InvalidTransactionId,
+ * we either haven't made the TEMP namespace yet, or have successfully
+ * committed its creation, depending on whether myTempNamespace is valid.
  */
 static Oid     myTempNamespace = InvalidOid;
 
-static bool firstTempTransaction = false;
+static TransactionId myTempNamespaceXID = InvalidTransactionId;
 
 /*
  * "Special" namespace for CREATE SCHEMA.  If set, it's the first search
@@ -1688,7 +1693,9 @@ InitTempTableNamespace(void)
         */
        myTempNamespace = namespaceId;
 
-       firstTempTransaction = true;
+       /* It should not be done already. */
+       AssertState(myTempNamespaceXID == InvalidTransactionId);
+       myTempNamespaceXID = GetCurrentTransactionId();
 
        namespaceSearchPathValid = false;       /* need to rebuild list */
 }
@@ -1707,7 +1714,7 @@ AtEOXact_Namespace(bool isCommit)
         * temp tables at backend shutdown.  (We only want to register the
         * callback once per session, so this is a good place to do it.)
         */
-       if (firstTempTransaction)
+       if (myTempNamespaceXID == GetCurrentTransactionId())
        {
                if (isCommit)
                        on_shmem_exit(RemoveTempRelationsCallback, 0);
@@ -1716,7 +1723,7 @@ AtEOXact_Namespace(bool isCommit)
                        myTempNamespace = InvalidOid;
                        namespaceSearchPathValid = false;       /* need to rebuild list */
                }
-               firstTempTransaction = false;
+               myTempNamespaceXID = InvalidTransactionId;
        }
 
        /*
@@ -1730,6 +1737,32 @@ AtEOXact_Namespace(bool isCommit)
 }
 
 /*
+ * AtEOSubXact_Namespace
+ *
+ * At subtransaction commit, propagate the temp-namespace-creation
+ * flag to the parent transaction.
+ *
+ * At subtransaction abort, forget the flag if we set it up.
+ */
+void
+AtEOSubXact_Namespace(bool isCommit, TransactionId myXid,
+                                         TransactionId parentXid)
+{
+       if (myTempNamespaceXID == myXid)
+       {
+               if (isCommit)
+                       myTempNamespaceXID = parentXid;
+               else
+               {
+                       myTempNamespaceXID = InvalidTransactionId;
+                       /* TEMP namespace creation failed, so reset state */
+                       myTempNamespace = InvalidOid;
+                       namespaceSearchPathValid = false;       /* need to rebuild list */
+               }
+       }
+}
+
+/*
  * Remove all relations in the specified temp namespace.
  *
  * This is called at backend shutdown (if we made any temp relations).
index 255428f..da8f92a 100644 (file)
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.141 2004/05/26 04:41:12 neilc Exp $
+ * $PostgreSQL: pgsql/src/backend/commands/user.c,v 1.142 2004/07/28 14:23:28 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 extern bool Password_encryption;
 
-static bool user_file_update_needed = false;
-static bool group_file_update_needed = false;
+/*
+ * The need-to-update-files flags are a pair of TransactionIds that show what
+ * level of the transaction tree requested the update.  To register an update,
+ * the transaction saves its own TransactionId in the flag, unless the value
+ * was already set to a valid TransactionId.  If it aborts and the value is its
+ * TransactionId, it resets the value to InvalidTransactionId.  If it commits,
+ * it changes the value to its parent's TransactionId.  This way the value is
+ * propagated up to the topmost transaction, which will update the files if a
+ * valid TransactionId is detected.
+ */
+static TransactionId user_file_update_xid = InvalidTransactionId;
+static TransactionId group_file_update_xid = InvalidTransactionId;
+
+#define user_file_update_needed() \
+       do { \
+               if (user_file_update_xid == InvalidTransactionId) \
+                       user_file_update_xid = GetCurrentTransactionId(); \
+       } while (0)
+
+#define group_file_update_needed() \
+       do { \
+               if (group_file_update_xid == InvalidTransactionId) \
+                       group_file_update_xid = GetCurrentTransactionId(); \
+       } while (0)
 
 
 static void CheckPgUserAclNotNull(void);
@@ -402,8 +424,8 @@ write_user_file(Relation urel)
 Datum
 update_pg_pwd_and_pg_group(PG_FUNCTION_ARGS)
 {
-       user_file_update_needed = true;
-       group_file_update_needed = true;
+       user_file_update_needed();
+       group_file_update_needed();
 
        return PointerGetDatum(NULL);
 }
@@ -429,13 +451,14 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
        Relation        urel = NULL;
        Relation        grel = NULL;
 
-       if (!(user_file_update_needed || group_file_update_needed))
+       if (user_file_update_xid == InvalidTransactionId &&
+               group_file_update_xid == InvalidTransactionId)
                return;
 
        if (!isCommit)
        {
-               user_file_update_needed = false;
-               group_file_update_needed = false;
+               user_file_update_xid = InvalidTransactionId;
+               group_file_update_xid = InvalidTransactionId;
                return;
        }
 
@@ -447,22 +470,22 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
         * pg_shadow or pg_group, which likely won't have gotten a strong
         * enough lock), so get the locks we need before writing anything.
         */
-       if (user_file_update_needed)
+       if (user_file_update_xid != InvalidTransactionId)
                urel = heap_openr(ShadowRelationName, ExclusiveLock);
-       if (group_file_update_needed)
+       if (group_file_update_xid != InvalidTransactionId)
                grel = heap_openr(GroupRelationName, ExclusiveLock);
 
        /* Okay to write the files */
-       if (user_file_update_needed)
+       if (user_file_update_xid != InvalidTransactionId)
        {
-               user_file_update_needed = false;
+               user_file_update_xid = InvalidTransactionId;
                write_user_file(urel);
                heap_close(urel, NoLock);
        }
 
-       if (group_file_update_needed)
+       if (group_file_update_xid != InvalidTransactionId)
        {
-               group_file_update_needed = false;
+               group_file_update_xid = InvalidTransactionId;
                write_group_file(grel);
                heap_close(grel, NoLock);
        }
@@ -473,7 +496,33 @@ AtEOXact_UpdatePasswordFile(bool isCommit)
        SendPostmasterSignal(PMSIGNAL_PASSWORD_CHANGE);
 }
 
+/*
+ * AtEOSubXact_UpdatePasswordFile
+ *
+ * Called at subtransaction end, this routine resets or updates the
+ * need-to-update-files flags.
+ */
+void
+AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid,
+                                                          TransactionId parentXid)
+{
+       if (isCommit)
+       {
+               if (user_file_update_xid == myXid)
+                       user_file_update_xid = parentXid;
+
+               if (group_file_update_xid == myXid)
+                       group_file_update_xid = parentXid;
+       }
+       else
+       {
+               if (user_file_update_xid == myXid)
+                       user_file_update_xid = InvalidTransactionId;
 
+               if (group_file_update_xid == myXid)
+                       group_file_update_xid = InvalidTransactionId;
+       }
+}
 
 /*
  * CREATE USER
@@ -728,7 +777,7 @@ CreateUser(CreateUserStmt *stmt)
        /*
         * Set flag to update flat password file at commit.
         */
-       user_file_update_needed = true;
+       user_file_update_needed();
 }
 
 
@@ -925,7 +974,7 @@ AlterUser(AlterUserStmt *stmt)
        /*
         * Set flag to update flat password file at commit.
         */
-       user_file_update_needed = true;
+       user_file_update_needed();
 }
 
 
@@ -1147,7 +1196,7 @@ DropUser(DropUserStmt *stmt)
        /*
         * Set flag to update flat password file at commit.
         */
-       user_file_update_needed = true;
+       user_file_update_needed();
 }
 
 
@@ -1233,7 +1282,7 @@ RenameUser(const char *oldname, const char *newname)
        ReleaseSysCache(oldtuple);
        heap_close(rel, NoLock);
 
-       user_file_update_needed = true;
+       user_file_update_needed();
 }
 
 
@@ -1438,7 +1487,7 @@ CreateGroup(CreateGroupStmt *stmt)
        /*
         * Set flag to update flat group file at commit.
         */
-       group_file_update_needed = true;
+       group_file_update_needed();
 }
 
 
@@ -1590,7 +1639,7 @@ AlterGroup(AlterGroupStmt *stmt, const char *tag)
        /*
         * Set flag to update flat group file at commit.
         */
-       group_file_update_needed = true;
+       group_file_update_needed();
 }
 
 /*
@@ -1730,7 +1779,7 @@ DropGroup(DropGroupStmt *stmt)
        /*
         * Set flag to update flat group file at commit.
         */
-       group_file_update_needed = true;
+       group_file_update_needed();
 }
 
 
@@ -1776,5 +1825,5 @@ RenameGroup(const char *oldname, const char *newname)
        heap_close(rel, NoLock);
        heap_freetuple(tup);
 
-       group_file_update_needed = true;
+       group_file_update_needed();
 }
index ed19e76..21d1f3d 100644 (file)
@@ -1,24 +1,22 @@
 /*-------------------------------------------------------------------------
  *
  * be-fsstubs.c
- *       support for filesystem operations on large objects
+ *       Builtin functions for open/close/read/write operations on large objects
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.70 2004/02/10 01:55:25 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/libpq/be-fsstubs.c,v 1.71 2004/07/28 14:23:28 tgl Exp $
  *
  * NOTES
  *       This should be moved to a more appropriate place.  It is here
  *       for lack of a better place.
  *
- *       Builtin functions for open/close/read/write operations on large objects.
- *
  *       These functions operate in a private MemoryContext, which means
- *       that large object descriptors hang around until we destroy the context.
- *       That happens in lo_commit().  It'd be possible to prolong the lifetime
+ *       that large object descriptors hang around until we destroy the context
+ *       at transaction end.  It'd be possible to prolong the lifetime
  *       of the context so that LO FDs are good across transactions (for example,
  *       we could release the context only if we see that no FDs remain open).
  *       But we'd need additional state in order to do the right thing at the
  *       existing documented semantics of LO FDs: they're only good within a
  *       transaction.
  *
+ *       As of PostgreSQL 7.5, much of the angst expressed above is no longer
+ *       relevant, and in fact it'd be pretty easy to allow LO FDs to stay
+ *       open across transactions.  However backwards compatibility suggests
+ *       that we should stick to the status quo.
+ *
  *-------------------------------------------------------------------------
  */
 
@@ -46,8 +49,6 @@
 #include "utils/memutils.h"
 
 
-/* [PA] is Pascal André <andre@via.ecp.fr> */
-
 /*#define FSDB 1*/
 #define BUFSIZE                        8192
 
@@ -68,6 +69,7 @@ static MemoryContext fscxt = NULL;
 static int     newLOfd(LargeObjectDesc *lobjCookie);
 static void deleteLOfd(int fd);
 
+
 /*****************************************************************************
  *     File Interfaces for Large Objects
  *****************************************************************************/
@@ -399,7 +401,7 @@ lo_import(PG_FUNCTION_ARGS)
        lobjOid = lobj->id;
 
        /*
-        * read in from the Unix file and write to the inversion file
+        * read in from the filesystem and write to the inversion file
         */
        while ((nbytes = FileRead(fd, buf, BUFSIZE)) > 0)
        {
@@ -471,7 +473,7 @@ lo_export(PG_FUNCTION_ARGS)
                                                fnamebuf)));
 
        /*
-        * read in from the inversion file and write to the Unix file
+        * read in from the inversion file and write to the filesystem
         */
        while ((nbytes = inv_read(lobj, buf, BUFSIZE)) > 0)
        {
@@ -490,11 +492,11 @@ lo_export(PG_FUNCTION_ARGS)
 }
 
 /*
- * lo_commit -
- *              prepares large objects for transaction commit [PA, 7/17/98]
+ * AtEOXact_LargeObject -
+ *              prepares large objects for transaction commit
  */
 void
-lo_commit(bool isCommit)
+AtEOXact_LargeObject(bool isCommit)
 {
        int                     i;
        MemoryContext currentContext;
@@ -505,8 +507,8 @@ lo_commit(bool isCommit)
        currentContext = MemoryContextSwitchTo(fscxt);
 
        /*
-        * Clean out still-open index scans (not necessary if aborting) and
-        * clear cookies array so that LO fds are no longer good.
+        * Close LO fds and clear cookies array so that LO fds are no longer good.
+        * On abort we skip the close step.
         */
        for (i = 0; i < cookies_size; i++)
        {
@@ -514,7 +516,7 @@ lo_commit(bool isCommit)
                {
                        if (isCommit)
                                inv_close(cookies[i]);
-                       cookies[i] = NULL;
+                       deleteLOfd(i);
                }
        }
 
@@ -527,8 +529,47 @@ lo_commit(bool isCommit)
        /* Release the LO memory context to prevent permanent memory leaks. */
        MemoryContextDelete(fscxt);
        fscxt = NULL;
+
+       /* Give inv_api.c a chance to clean up, too */
+       close_lo_relation(isCommit);
 }
 
+/*
+ * AtEOSubXact_LargeObject
+ *             Take care of large objects at subtransaction commit/abort
+ *
+ * Reassign LOs created/opened during a committing subtransaction
+ * to the parent transaction.  On abort, just close them.
+ */
+void
+AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid,
+                                               TransactionId parentXid)
+{
+       int                             i;
+
+       if (fscxt == NULL)                      /* no LO operations in this xact */
+               return;
+
+       for (i = 0; i < cookies_size; i++)
+       {
+               LargeObjectDesc *lo = cookies[i];
+
+               if (lo != NULL && lo->xid == myXid)
+               {
+                       if (isCommit)
+                               lo->xid = parentXid;
+                       else
+                       {
+                               /*
+                                * Make sure we do not call inv_close twice if it errors out
+                                * for some reason.  Better a leak than a crash.
+                                */
+                               deleteLOfd(i);
+                               inv_close(lo);
+                       }
+               }
+       }
+}
 
 /*****************************************************************************
  *     Support routines for this file
index 96de541..918d541 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.109 2004/05/31 03:48:04 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/file/fd.c,v 1.110 2004/07/28 14:23:28 tgl Exp $
  *
  * NOTES:
  *
@@ -47,6 +47,7 @@
 #include <fcntl.h>
 
 #include "miscadmin.h"
+#include "access/xact.h"
 #include "storage/fd.h"
 #include "storage/ipc.h"
 
@@ -122,6 +123,7 @@ typedef struct vfd
 {
        signed short fd;                        /* current FD, or VFD_CLOSED if none */
        unsigned short fdstate;         /* bitflags for VFD's state */
+       TransactionId create_xid;       /* for XACT_TEMPORARY fds, creating Xid */
        File            nextFree;               /* link to next free VFD, if in freelist */
        File            lruMoreRecently;        /* doubly linked recency-of-use list */
        File            lruLessRecently;
@@ -146,27 +148,31 @@ static Size SizeVfdCache = 0;
 static int     nfile = 0;
 
 /*
- * List of stdio FILEs opened with AllocateFile.
+ * List of stdio FILEs and <dirent.h> DIRs opened with AllocateFile
+ * and AllocateDir.
  *
- * Since we don't want to encourage heavy use of AllocateFile, it seems
- * OK to put a pretty small maximum limit on the number of simultaneously
- * allocated files.
+ * Since we don't want to encourage heavy use of AllocateFile or AllocateDir,
+ * it seems OK to put a pretty small maximum limit on the number of
+ * simultaneously allocated descs.
  */
-#define MAX_ALLOCATED_FILES  32
+#define MAX_ALLOCATED_DESCS  32
 
-static int     numAllocatedFiles = 0;
-static FILE *allocatedFiles[MAX_ALLOCATED_FILES];
+typedef enum {
+       AllocateDescFile,
+       AllocateDescDir
+} AllocateDescKind;
 
-/*
- * List of <dirent.h> DIRs opened with AllocateDir.
- *
- * Since we don't have heavy use of AllocateDir, it seems OK to put a pretty
- * small maximum limit on the number of simultaneously allocated dirs.
- */
-#define MAX_ALLOCATED_DIRS  10
+typedef struct {
+       AllocateDescKind        kind;
+       union   {
+               FILE    *file;
+               DIR             *dir;
+       } desc;
+       TransactionId create_xid;
+} AllocateDesc;
 
-static int     numAllocatedDirs = 0;
-static DIR *allocatedDirs[MAX_ALLOCATED_DIRS];
+static int numAllocatedDescs = 0;
+static AllocateDesc allocatedDescs[MAX_ALLOCATED_DESCS];
 
 /*
  * Number of temporary files opened during the current session;
@@ -499,7 +505,7 @@ LruInsert(File file)
 
        if (FileIsNotOpen(file))
        {
-               while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds)
+               while (nfile + numAllocatedDescs >= max_safe_fds)
                {
                        if (!ReleaseLruFile())
                                break;
@@ -759,7 +765,7 @@ fileNameOpenFile(FileName fileName,
        file = AllocateVfd();
        vfdP = &VfdCache[file];
 
-       while (nfile + numAllocatedFiles + numAllocatedDirs >= max_safe_fds)
+       while (nfile + numAllocatedDescs >= max_safe_fds)
        {
                if (!ReleaseLruFile())
                        break;
@@ -876,7 +882,10 @@ OpenTemporaryFile(bool interXact)
 
        /* Mark it for deletion at EOXact */
        if (!interXact)
+       {
                VfdCache[file].fdstate |= FD_XACT_TEMPORARY;
+               VfdCache[file].create_xid = GetCurrentTransactionId();
+       }
 
        return file;
 }
@@ -1134,24 +1143,29 @@ AllocateFile(char *name, char *mode)
 {
        FILE       *file;
 
-       DO_DB(elog(LOG, "AllocateFile: Allocated %d", numAllocatedFiles));
+       DO_DB(elog(LOG, "AllocateFile: Allocated %d (%s)",
+                          numAllocatedDescs, name));
 
        /*
-        * The test against MAX_ALLOCATED_FILES prevents us from overflowing
+        * The test against MAX_ALLOCATED_DESCS prevents us from overflowing
         * allocatedFiles[]; the test against max_safe_fds prevents AllocateFile
         * from hogging every one of the available FDs, which'd lead to infinite
         * looping.
         */
-       if (numAllocatedFiles >= MAX_ALLOCATED_FILES ||
-               numAllocatedFiles + numAllocatedDirs >= max_safe_fds - 1)
+       if (numAllocatedDescs >= MAX_ALLOCATED_DESCS ||
+               numAllocatedDescs >= max_safe_fds - 1)
                elog(ERROR, "too many private files demanded");
 
 TryAgain:
        if ((file = fopen(name, mode)) != NULL)
        {
-               allocatedFiles[numAllocatedFiles] = file;
-               numAllocatedFiles++;
-               return file;
+               AllocateDesc *desc = &allocatedDescs[numAllocatedDescs];
+
+               desc->kind = AllocateDescFile;
+               desc->desc.file = file;
+               desc->create_xid = GetCurrentTransactionId();
+               numAllocatedDescs++;
+               return desc->desc.file;
        }
 
        if (errno == EMFILE || errno == ENFILE)
@@ -1171,6 +1185,38 @@ TryAgain:
 }
 
 /*
+ * Free an AllocateDesc of either type.
+ *
+ * The argument *must* point into the allocatedDescs[] array.
+ */
+static int
+FreeDesc(AllocateDesc *desc)
+{
+       int             result;
+
+       /* Close the underlying object */
+       switch (desc->kind)
+       {
+               case AllocateDescFile:
+                       result = fclose(desc->desc.file);
+                       break;
+               case AllocateDescDir:
+                       result = closedir(desc->desc.dir);
+                       break;
+               default:
+                       elog(ERROR, "AllocateDesc kind not recognized");
+                       result = 0;                     /* keep compiler quiet */
+                       break;
+       }
+
+       /* Compact storage in the allocatedDescs array */
+       numAllocatedDescs--;
+       *desc = allocatedDescs[numAllocatedDescs];
+
+       return result;
+}
+
+/*
  * Close a file returned by AllocateFile.
  *
  * Note we do not check fclose's return value --- it is up to the caller
@@ -1181,20 +1227,19 @@ FreeFile(FILE *file)
 {
        int                     i;
 
-       DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedFiles));
+       DO_DB(elog(LOG, "FreeFile: Allocated %d", numAllocatedDescs));
 
        /* Remove file from list of allocated files, if it's present */
-       for (i = numAllocatedFiles; --i >= 0;)
+       for (i = numAllocatedDescs; --i >= 0;)
        {
-               if (allocatedFiles[i] == file)
-               {
-                       numAllocatedFiles--;
-                       allocatedFiles[i] = allocatedFiles[numAllocatedFiles];
-                       break;
-               }
+               AllocateDesc *desc = &allocatedDescs[i];
+
+               if (desc->kind == AllocateDescFile && desc->desc.file == file)
+                       return FreeDesc(desc);
        }
-       if (i < 0)
-               elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile");
+
+       /* Only get here if someone passes us a file not in allocatedDescs */
+       elog(WARNING, "file passed to FreeFile was not obtained from AllocateFile");
 
        return fclose(file);
 }
@@ -1213,24 +1258,29 @@ AllocateDir(const char *dirname)
 {
        DIR        *dir;
 
-       DO_DB(elog(LOG, "AllocateDir: Allocated %d", numAllocatedDirs));
+       DO_DB(elog(LOG, "AllocateDir: Allocated %d (%s)",
+                          numAllocatedDescs, dirname));
 
        /*
-        * The test against MAX_ALLOCATED_DIRS prevents us from overflowing
-        * allocatedDirs[]; the test against max_safe_fds prevents AllocateDir
+        * The test against MAX_ALLOCATED_DESCS prevents us from overflowing
+        * allocatedDescs[]; the test against max_safe_fds prevents AllocateDir
         * from hogging every one of the available FDs, which'd lead to infinite
         * looping.
         */
-       if (numAllocatedDirs >= MAX_ALLOCATED_DIRS ||
-               numAllocatedDirs + numAllocatedFiles >= max_safe_fds - 1)
+       if (numAllocatedDescs >= MAX_ALLOCATED_DESCS ||
+               numAllocatedDescs >= max_safe_fds - 1)
                elog(ERROR, "too many private dirs demanded");
 
 TryAgain:
        if ((dir = opendir(dirname)) != NULL)
        {
-               allocatedDirs[numAllocatedDirs] = dir;
-               numAllocatedDirs++;
-               return dir;
+               AllocateDesc *desc = &allocatedDescs[numAllocatedDescs];
+
+               desc->kind = AllocateDescDir;
+               desc->desc.dir = dir;
+               desc->create_xid = GetCurrentTransactionId();
+               numAllocatedDescs++;
+               return desc->desc.dir;
        }
 
        if (errno == EMFILE || errno == ENFILE)
@@ -1260,20 +1310,19 @@ FreeDir(DIR *dir)
 {
        int                     i;
 
-       DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDirs));
+       DO_DB(elog(LOG, "FreeDir: Allocated %d", numAllocatedDescs));
 
        /* Remove dir from list of allocated dirs, if it's present */
-       for (i = numAllocatedDirs; --i >= 0;)
+       for (i = numAllocatedDescs; --i >= 0;)
        {
-               if (allocatedDirs[i] == dir)
-               {
-                       numAllocatedDirs--;
-                       allocatedDirs[i] = allocatedDirs[numAllocatedDirs];
-                       break;
-               }
+               AllocateDesc *desc = &allocatedDescs[i];
+
+               if (desc->kind == AllocateDescDir && desc->desc.dir == dir)
+                       return FreeDesc(desc);
        }
-       if (i < 0)
-               elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir");
+
+       /* Only get here if someone passes us a dir not in allocatedDescs */
+       elog(WARNING, "dir passed to FreeDir was not obtained from AllocateDir");
 
        return closedir(dir);
 }
@@ -1303,6 +1352,51 @@ closeAllVfds(void)
 }
 
 /*
+ * AtEOSubXact_Files
+ *
+ * Take care of subtransaction commit/abort.  At abort, we close temp files
+ * that the subtransaction may have opened.  At commit, we reassign the
+ * files that were opened to the parent transaction.
+ */
+void
+AtEOSubXact_Files(bool isCommit, TransactionId myXid, TransactionId parentXid)
+{
+       Index i;
+
+       if (SizeVfdCache > 0)
+       {
+               Assert(FileIsNotOpen(0));               /* Make sure ring not corrupted */
+               for (i = 1; i < SizeVfdCache; i++)
+               {
+                       unsigned short fdstate = VfdCache[i].fdstate;
+
+                       if ((fdstate & FD_XACT_TEMPORARY) &&
+                               VfdCache[i].create_xid == myXid)
+                       {
+                               if (isCommit)
+                                       VfdCache[i].create_xid = parentXid;
+                               else if (VfdCache[i].fileName != NULL)
+                                       FileClose(i);
+                       }
+               }
+       }
+
+       for (i = 0; i < numAllocatedDescs; i++)
+       {
+               if (allocatedDescs[i].create_xid == myXid)
+               {
+                       if (isCommit)
+                               allocatedDescs[i].create_xid = parentXid;
+                       else
+                       {
+                               /* have to recheck the item after FreeDesc (ugly) */
+                               FreeDesc(&allocatedDescs[i--]);
+                       }
+               }
+       }
+}
+
+/*
  * AtEOXact_Files
  *
  * This routine is called during transaction commit or abort (it doesn't
@@ -1362,11 +1456,8 @@ CleanupTempFiles(bool isProcExit)
                }
        }
 
-       while (numAllocatedFiles > 0)
-               FreeFile(allocatedFiles[0]);
-
-       while (numAllocatedDirs > 0)
-               FreeDir(allocatedDirs[0]);
+       while (numAllocatedDescs > 0)
+               FreeDesc(&allocatedDescs[0]);
 }
 
 
index 5f75e06..470dcf1 100644 (file)
@@ -9,36 +9,92 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.102 2003/11/29 19:51:56 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/large_object/inv_api.c,v 1.103 2004/07/28 14:23:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
-#include <errno.h>
-#include <sys/file.h>
-#include <sys/stat.h>
-
 #include "access/genam.h"
 #include "access/heapam.h"
-#include "access/htup.h"
 #include "access/tuptoaster.h"
 #include "catalog/catalog.h"
 #include "catalog/catname.h"
-#include "catalog/heap.h"
-#include "catalog/index.h"
 #include "catalog/indexing.h"
-#include "catalog/pg_opclass.h"
 #include "catalog/pg_largeobject.h"
-#include "catalog/pg_type.h"
 #include "commands/comment.h"
 #include "libpq/libpq-fs.h"
-#include "miscadmin.h"
 #include "storage/large_object.h"
-#include "storage/smgr.h"
-#include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
+#include "utils/resowner.h"
+
+
+/*
+ * All accesses to pg_largeobject and its index make use of a single Relation
+ * reference, so that we only need to open pg_relation once per transaction.
+ * To avoid problems when the first such reference occurs inside a
+ * subtransaction, we execute a slightly klugy maneuver to assign ownership of
+ * the Relation reference to TopTransactionResourceOwner.
+ */
+static Relation lo_heap_r = NULL;
+static Relation lo_index_r = NULL;
+
+
+/*
+ * Open pg_largeobject and its index, if not already done in current xact
+ */
+static void
+open_lo_relation(void)
+{
+       ResourceOwner currentOwner;
+
+       if (lo_heap_r && lo_index_r)
+               return;                                 /* already open in current xact */
+
+       /* Arrange for the top xact to own these relation references */
+       currentOwner = CurrentResourceOwner;
+       CurrentResourceOwner = TopTransactionResourceOwner;
+
+       /* Use RowExclusiveLock since we might either read or write */
+       if (lo_heap_r == NULL)
+               lo_heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
+       if (lo_index_r == NULL)
+               lo_index_r = index_openr(LargeObjectLOidPNIndex);
+
+       CurrentResourceOwner = currentOwner;
+}
+
+/*
+ * Clean up at main transaction end
+ */
+void
+close_lo_relation(bool isCommit)
+{
+       if (lo_heap_r || lo_index_r)
+       {
+               /*
+                * Only bother to close if committing; else abort cleanup will
+                * handle it
+                */
+               if (isCommit)
+               {
+                       ResourceOwner currentOwner;
+
+                       currentOwner = CurrentResourceOwner;
+                       CurrentResourceOwner = TopTransactionResourceOwner;
+
+                       if (lo_index_r)
+                               index_close(lo_index_r);
+                       if (lo_heap_r)
+                               heap_close(lo_heap_r, NoLock);
+
+                       CurrentResourceOwner = currentOwner;
+               }
+               lo_heap_r = NULL;
+               lo_index_r = NULL;
+       }
+}
 
 
 static int32
@@ -50,6 +106,7 @@ getbytealen(bytea *data)
        return (VARSIZE(data) - VARHDRSZ);
 }
 
+
 /*
  *     inv_create -- create a new large object.
  *
@@ -92,23 +149,20 @@ inv_create(int flags)
        retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));
 
        retval->id = file_oid;
+       retval->xid = GetCurrentTransactionId();
        retval->offset = 0;
 
        if (flags & INV_WRITE)
        {
                retval->flags = IFS_WRLOCK | IFS_RDLOCK;
-               retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
        }
        else if (flags & INV_READ)
        {
                retval->flags = IFS_RDLOCK;
-               retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock);
        }
        else
                elog(ERROR, "invalid flags: %d", flags);
 
-       retval->index_r = index_openr(LargeObjectLOidPNIndex);
-
        return retval;
 }
 
@@ -131,23 +185,20 @@ inv_open(Oid lobjId, int flags)
        retval = (LargeObjectDesc *) palloc(sizeof(LargeObjectDesc));
 
        retval->id = lobjId;
+       retval->xid = GetCurrentTransactionId();
        retval->offset = 0;
 
        if (flags & INV_WRITE)
        {
                retval->flags = IFS_WRLOCK | IFS_RDLOCK;
-               retval->heap_r = heap_openr(LargeObjectRelationName, RowExclusiveLock);
        }
        else if (flags & INV_READ)
        {
                retval->flags = IFS_RDLOCK;
-               retval->heap_r = heap_openr(LargeObjectRelationName, AccessShareLock);
        }
        else
                elog(ERROR, "invalid flags: %d", flags);
 
-       retval->index_r = index_openr(LargeObjectLOidPNIndex);
-
        return retval;
 }
 
@@ -158,13 +209,6 @@ void
 inv_close(LargeObjectDesc *obj_desc)
 {
        Assert(PointerIsValid(obj_desc));
-
-       if (obj_desc->flags & IFS_WRLOCK)
-               heap_close(obj_desc->heap_r, RowExclusiveLock);
-       else if (obj_desc->flags & IFS_RDLOCK)
-               heap_close(obj_desc->heap_r, AccessShareLock);
-       index_close(obj_desc->index_r);
-
        pfree(obj_desc);
 }
 
@@ -212,12 +256,14 @@ inv_getsize(LargeObjectDesc *obj_desc)
 
        Assert(PointerIsValid(obj_desc));
 
+       open_lo_relation();
+
        ScanKeyInit(&skey[0],
                                Anum_pg_largeobject_loid,
                                BTEqualStrategyNumber, F_OIDEQ,
                                ObjectIdGetDatum(obj_desc->id));
 
-       sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
+       sd = index_beginscan(lo_heap_r, lo_index_r,
                                                 SnapshotNow, 1, skey);
 
        /*
@@ -316,6 +362,8 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
        if (nbytes <= 0)
                return 0;
 
+       open_lo_relation();
+
        ScanKeyInit(&skey[0],
                                Anum_pg_largeobject_loid,
                                BTEqualStrategyNumber, F_OIDEQ,
@@ -326,7 +374,7 @@ inv_read(LargeObjectDesc *obj_desc, char *buf, int nbytes)
                                BTGreaterEqualStrategyNumber, F_INT4GE,
                                Int32GetDatum(pageno));
 
-       sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
+       sd = index_beginscan(lo_heap_r, lo_index_r,
                                                 SnapshotNow, 2, skey);
 
        while ((tuple = index_getnext(sd, ForwardScanDirection)) != NULL)
@@ -421,7 +469,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
        if (nbytes <= 0)
                return 0;
 
-       indstate = CatalogOpenIndexes(obj_desc->heap_r);
+       open_lo_relation();
+
+       indstate = CatalogOpenIndexes(lo_heap_r);
 
        ScanKeyInit(&skey[0],
                                Anum_pg_largeobject_loid,
@@ -433,7 +483,7 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
                                BTGreaterEqualStrategyNumber, F_INT4GE,
                                Int32GetDatum(pageno));
 
-       sd = index_beginscan(obj_desc->heap_r, obj_desc->index_r,
+       sd = index_beginscan(lo_heap_r, lo_index_r,
                                                 SnapshotNow, 2, skey);
 
        oldtuple = NULL;
@@ -510,9 +560,9 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
                        memset(replace, ' ', sizeof(replace));
                        values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
                        replace[Anum_pg_largeobject_data - 1] = 'r';
-                       newtup = heap_modifytuple(oldtuple, obj_desc->heap_r,
+                       newtup = heap_modifytuple(oldtuple, lo_heap_r,
                                                                          values, nulls, replace);
-                       simple_heap_update(obj_desc->heap_r, &newtup->t_self, newtup);
+                       simple_heap_update(lo_heap_r, &newtup->t_self, newtup);
                        CatalogIndexInsert(indstate, newtup);
                        heap_freetuple(newtup);
 
@@ -554,8 +604,8 @@ inv_write(LargeObjectDesc *obj_desc, char *buf, int nbytes)
                        values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
                        values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
                        values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
-                       newtup = heap_formtuple(obj_desc->heap_r->rd_att, values, nulls);
-                       simple_heap_insert(obj_desc->heap_r, newtup);
+                       newtup = heap_formtuple(lo_heap_r->rd_att, values, nulls);
+                       simple_heap_insert(lo_heap_r, newtup);
                        CatalogIndexInsert(indstate, newtup);
                        heap_freetuple(newtup);
                }
index 1767675..11d73c5 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.65 2004/07/27 05:10:58 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/storage/lmgr/lmgr.c,v 1.66 2004/07/28 14:23:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -137,7 +137,7 @@ LockRelation(Relation relation, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = InvalidBlockNumber;
 
-       if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
+       if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
                                         lockmode, false))
                elog(ERROR, "LockAcquire failed");
 
@@ -171,7 +171,7 @@ ConditionalLockRelation(Relation relation, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = InvalidBlockNumber;
 
-       if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
+       if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
                                         lockmode, true))
                return false;
 
@@ -201,7 +201,7 @@ UnlockRelation(Relation relation, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = InvalidBlockNumber;
 
-       LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
+       LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode);
 }
 
 /*
@@ -264,7 +264,7 @@ LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = blkno;
 
-       if (!LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
+       if (!LockAcquire(LockTableId, &tag, GetTopTransactionId(),
                                         lockmode, false))
                elog(ERROR, "LockAcquire failed");
 }
@@ -285,7 +285,7 @@ ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = blkno;
 
-       return LockAcquire(LockTableId, &tag, GetCurrentTransactionId(),
+       return LockAcquire(LockTableId, &tag, GetTopTransactionId(),
                                           lockmode, true);
 }
 
@@ -302,7 +302,7 @@ UnlockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode)
        tag.dbId = relation->rd_lockInfo.lockRelId.dbId;
        tag.objId.blkno = blkno;
 
-       LockRelease(LockTableId, &tag, GetCurrentTransactionId(), lockmode);
+       LockRelease(LockTableId, &tag, GetTopTransactionId(), lockmode);
 }
 
 /*
@@ -343,7 +343,7 @@ void
 XactLockTableWait(TransactionId xid)
 {
        LOCKTAG         tag;
-       TransactionId myxid = GetCurrentTransactionId();
+       TransactionId myxid = GetTopTransactionId();
 
        Assert(!SubTransXidsHaveCommonAncestor(xid, myxid));
 
index e54a74f..946bd0c 100644 (file)
@@ -80,7 +80,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.63 2004/07/01 00:51:17 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/cache/inval.c,v 1.64 2004/07/28 14:23:29 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -678,7 +678,7 @@ AtEOXact_Inval(bool isCommit)
 }
 
 /*
- * AtSubEOXact_Inval
+ * AtEOSubXact_Inval
  *             Process queued-up invalidation messages at end of subtransaction.
  *
  * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't),
@@ -695,7 +695,7 @@ AtEOXact_Inval(bool isCommit)
  * (if aborting).
  */
 void
-AtSubEOXact_Inval(bool isCommit)
+AtEOSubXact_Inval(bool isCommit)
 {
        TransInvalidationInfo *myInfo = transInvalInfo;
 
index b986546..22df3ef 100644 (file)
@@ -10,7 +10,7 @@
  * Written by Peter Eisentraut <peter_e@gmx.net>.
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.224 2004/07/24 19:51:23 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.225 2004/07/28 14:23:29 tgl Exp $
  *
  *--------------------------------------------------------------------
  */
@@ -5436,10 +5436,15 @@ assign_log_stats(bool newval, bool doit, GucSource source)
 static bool
 assign_transaction_read_only(bool newval, bool doit, GucSource source)
 {
-       if (doit && source >= PGC_S_INTERACTIVE && IsSubTransaction())
-               ereport(ERROR,
-                               (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
-                                errmsg("cannot set transaction read only mode inside a subtransaction")));
+       /* Can't go to r/w mode inside a r/o transaction */
+       if (newval == false && XactReadOnly && IsSubTransaction())
+       {
+               if (source >= PGC_S_INTERACTIVE)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                        errmsg("cannot set transaction read-write mode inside a read-only transaction")));
+               return false;
+       }
        return true;
 }
 
index 446ee4b..d1a7179 100644 (file)
@@ -16,7 +16,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.73 2004/07/01 00:51:33 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/time/tqual.c,v 1.74 2004/07/28 14:23:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -118,7 +118,10 @@ HeapTupleSatisfiesItself(HeapTupleHeader tuple)
 
                        /* deleting subtransaction aborted */
                        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
+                       {
+                               tuple->t_infomask |= HEAP_XMAX_INVALID;
                                return true;
+                       }
 
                        Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));
 
@@ -268,7 +271,10 @@ HeapTupleSatisfiesNow(HeapTupleHeader tuple)
 
                        /* deleting subtransaction aborted */
                        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
+                       {
+                               tuple->t_infomask |= HEAP_XMAX_INVALID;
                                return true;
+                       }
 
                        Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));
 
@@ -452,7 +458,10 @@ HeapTupleSatisfiesUpdate(HeapTupleHeader tuple, CommandId curcid)
 
                        /* deleting subtransaction aborted */
                        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
+                       {
+                               tuple->t_infomask |= HEAP_XMAX_INVALID;
                                return HeapTupleMayBeUpdated;
+                       }
 
                        Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));
 
@@ -590,7 +599,10 @@ HeapTupleSatisfiesDirty(HeapTupleHeader tuple)
 
                        /* deleting subtransaction aborted */
                        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
+                       {
+                               tuple->t_infomask |= HEAP_XMAX_INVALID;
                                return true;
+                       }
 
                        Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));
 
@@ -732,7 +744,10 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
                        /* deleting subtransaction aborted */
                        /* FIXME -- is this correct w.r.t. the cmax of the tuple? */
                        if (TransactionIdDidAbort(HeapTupleHeaderGetXmax(tuple)))
+                       {
+                               tuple->t_infomask |= HEAP_XMAX_INVALID;
                                return true;
+                       }
 
                        Assert(TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetXmax(tuple)));
 
@@ -757,21 +772,36 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
        /*
         * By here, the inserting transaction has committed - have to check
         * when...
+        *
+        * Note that the provided snapshot contains only top-level XIDs, so
+        * we have to convert a subxact XID to its parent for comparison.
+        * However, we can make first-pass range checks with the given XID,
+        * because a subxact with XID < xmin has surely also got a parent with
+        * XID < xmin, while one with XID >= xmax must belong to a parent that
+        * was not yet committed at the time of this snapshot.
         */
        if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple),
                                                                         snapshot->xmin))
        {
-               uint32          i;
+               TransactionId parentXid;
 
                if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmin(tuple),
                                                                                 snapshot->xmax))
                        return false;
 
-               for (i = 0; i < snapshot->xcnt; i++)
+               parentXid = SubTransGetTopmostTransaction(HeapTupleHeaderGetXmin(tuple));
+
+               if (TransactionIdFollowsOrEquals(parentXid, snapshot->xmin))
                {
-                       if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmin(tuple),
-                                                                       snapshot->xip[i]))
-                               return false;
+                       uint32          i;
+
+                       /* no point in checking parentXid against xmax here */
+
+                       for (i = 0; i < snapshot->xcnt; i++)
+                       {
+                               if (TransactionIdEquals(parentXid, snapshot->xip[i]))
+                                       return false;
+                       }
                }
        }
 
@@ -804,18 +834,31 @@ HeapTupleSatisfiesSnapshot(HeapTupleHeader tuple, Snapshot snapshot)
 
        /*
         * OK, the deleting transaction committed too ... but when?
+        *
+        * See notes for the similar tests on tuple xmin, above.
         */
-       if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple), snapshot->xmin))
+       if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple),
+                                                                        snapshot->xmin))
        {
-               uint32          i;
+               TransactionId parentXid;
 
                if (TransactionIdFollowsOrEquals(HeapTupleHeaderGetXmax(tuple),
                                                                                 snapshot->xmax))
                        return true;
-               for (i = 0; i < snapshot->xcnt; i++)
+
+               parentXid = SubTransGetTopmostTransaction(HeapTupleHeaderGetXmax(tuple));
+
+               if (TransactionIdFollowsOrEquals(parentXid, snapshot->xmin))
                {
-                       if (SubTransXidsHaveCommonAncestor(HeapTupleHeaderGetXmax(tuple), snapshot->xip[i]))
-                               return true;
+                       uint32          i;
+
+                       /* no point in checking parentXid against xmax here */
+
+                       for (i = 0; i < snapshot->xcnt; i++)
+                       {
+                               if (TransactionIdEquals(parentXid, snapshot->xip[i]))
+                                       return true;
+                       }
                }
        }
 
index 0dfaebe..130fcd3 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/tab-complete.c,v 1.108 2004/07/27 05:11:11 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/tab-complete.c,v 1.109 2004/07/28 14:23:30 tgl Exp $
  */
 
 /*----------------------------------------------------------------------
@@ -722,7 +722,7 @@ psql_completion(char *text, int start, int end)
        else if (pg_strcasecmp(prev2_wd, "ANALYZE") == 0)
                COMPLETE_WITH_CONST(";");
 
-/* BEGIN, COMMIT, ABORT */
+/* BEGIN, END, COMMIT, ABORT */
        else if (pg_strcasecmp(prev_wd, "BEGIN") == 0 ||
                 pg_strcasecmp(prev_wd, "END") == 0 ||
                 pg_strcasecmp(prev_wd, "COMMIT") == 0 ||
index 7a0cfd7..986a26b 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.30 2004/01/19 19:04:40 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/namespace.h,v 1.31 2004/07/28 14:23:30 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -91,6 +91,8 @@ extern Oid    FindDefaultConversionProc(int4 for_encoding, int4 to_encoding);
 /* initialization & transaction cleanup code */
 extern void InitializeSearchPath(void);
 extern void AtEOXact_Namespace(bool isCommit);
+extern void AtEOSubXact_Namespace(bool isCommit, TransactionId myXid,
+                                                                 TransactionId parentXid);
 
 /* stuff for search_path GUC variable */
 extern char *namespace_search_path;
index 8dba146..7a72319 100644 (file)
@@ -4,7 +4,7 @@
  *       Commands for manipulating users and groups.
  *
  *
- * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.22 2003/11/29 22:40:59 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/commands/user.h,v 1.23 2004/07/28 14:23:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -32,5 +32,7 @@ extern void RenameGroup(const char *oldname, const char *newname);
 extern Datum update_pg_pwd_and_pg_group(PG_FUNCTION_ARGS);
 
 extern void AtEOXact_UpdatePasswordFile(bool isCommit);
+extern void AtEOSubXact_UpdatePasswordFile(bool isCommit, TransactionId myXid,
+                                                                                  TransactionId parentXid);
 
 #endif   /* USER_H */
index 9c45876..b2d8b3d 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.18 2003/11/29 22:41:03 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/libpq/be-fsstubs.h,v 1.19 2004/07/28 14:23:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -43,8 +43,10 @@ extern int   lo_read(int fd, char *buf, int len);
 extern int     lo_write(int fd, char *buf, int len);
 
 /*
- * Cleanup LOs at xact commit/abort [ Pascal André <andre@via.ecp.fr> ]
+ * Cleanup LOs at xact commit/abort
  */
-extern void lo_commit(bool isCommit);
+extern void AtEOXact_LargeObject(bool isCommit);
+extern void AtEOSubXact_LargeObject(bool isCommit, TransactionId myXid,
+                                                                       TransactionId parentXid);
 
 #endif   /* BE_FSSTUBS_H */
index 430ed5d..c62f6ff 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.45 2004/05/31 03:48:10 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/storage/fd.h,v 1.46 2004/07/28 14:23:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -85,6 +85,8 @@ extern int    BasicOpenFile(FileName fileName, int fileFlags, int fileMode);
 extern void set_max_safe_fds(void);
 extern void closeAllVfds(void);
 extern void AtEOXact_Files(void);
+extern void AtEOSubXact_Files(bool isCommit, TransactionId myXid,
+                                                         TransactionId parentXid);
 extern void RemovePgTempFiles(void);
 extern int     pg_fsync(int fd);
 extern int     pg_fdatasync(int fd);
index 23118aa..164d3ab 100644 (file)
@@ -1,47 +1,44 @@
 /*-------------------------------------------------------------------------
  *
  * large_object.h
- *       file of info for Postgres large objects. POSTGRES 4.2 supports
+ *       Declarations for PostgreSQL large objects.  POSTGRES 4.2 supported
  *       zillions of large objects (internal, external, jaquith, inversion).
  *       Now we only support inversion.
  *
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.27 2003/11/29 22:41:13 pgsql Exp $
+ * $PostgreSQL: pgsql/src/include/storage/large_object.h,v 1.28 2004/07/28 14:23:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #ifndef LARGE_OBJECT_H
 #define LARGE_OBJECT_H
 
-#include "utils/rel.h"
-
 
 /*----------
  * Data about a currently-open large object.
  *
  * id is the logical OID of the large object
+ * xid is the transaction Id that opened the LO (or currently owns it)
  * offset is the current seek offset within the LO
- * heap_r holds an open-relation reference to pg_largeobject
- * index_r holds an open-relation reference to pg_largeobject_loid_pn_index
+ * flags contains some flag bits
  *
- * NOTE: before 7.1, heap_r and index_r held references to the separate
- * table and index of a specific large object. Now they all live in one rel.
+ * NOTE: before 7.1, we also had to store references to the separate table
+ * and index of a specific large object.  Now they all live in pg_largeobject
+ * and are accessed via a common relation descriptor.
  *----------
  */
 typedef struct LargeObjectDesc
 {
-       Oid                     id;
+       Oid                     id;                             /* LO's identifier */
+       TransactionId xid;                      /* owning XID */
        uint32          offset;                 /* current seek pointer */
        int                     flags;                  /* locking info, etc */
 
 /* flag bits: */
 #define IFS_RDLOCK             (1 << 0)
 #define IFS_WRLOCK             (1 << 1)
-
-       Relation        heap_r;
-       Relation        index_r;
 } LargeObjectDesc;
 
 
@@ -67,6 +64,7 @@ typedef struct LargeObjectDesc
  */
 
 /* inversion stuff in inv_api.c */
+extern void close_lo_relation(bool isCommit);
 extern LargeObjectDesc *inv_create(int flags);
 extern LargeObjectDesc *inv_open(Oid lobjId, int flags);
 extern void inv_close(LargeObjectDesc *obj_desc);
index add5ca8..cf12122 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.32 2004/07/01 00:51:44 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/inval.h,v 1.33 2004/07/28 14:23:31 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -28,7 +28,7 @@ extern void AtSubStart_Inval(void);
 
 extern void AtEOXact_Inval(bool isCommit);
 
-extern void AtSubEOXact_Inval(bool isCommit);
+extern void AtEOSubXact_Inval(bool isCommit);
 
 extern void CommandEndInvalidationMessages(void);