From b95ae32b4178959e8880bd716fb33ec163f61713 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 20 Jun 2005 18:37:02 +0000 Subject: [PATCH] Avoid WAL-logging individual tuple insertions during CREATE TABLE AS (a/k/a SELECT INTO). Instead, flush and fsync the whole relation before committing. We do still need the WAL log when PITR is active, however. Simon Riggs and Tom Lane. --- src/backend/access/heap/heapam.c | 36 +++++++++++++++++++++++------------ src/backend/access/heap/hio.c | 27 +++++++++++++++++++++++--- src/backend/executor/execMain.c | 41 +++++++++++++++++++++++++++++++++++++--- src/backend/executor/execUtils.c | 4 +++- src/backend/storage/smgr/md.c | 5 ++++- src/backend/storage/smgr/smgr.c | 9 +++++++-- src/include/access/heapam.h | 5 +++-- src/include/access/hio.h | 4 ++-- src/include/nodes/execnodes.h | 4 +++- 9 files changed, 108 insertions(+), 27 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 74f76c1d16..843b2909ef 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.194 2005/06/08 15:50:21 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.195 2005/06/20 18:37:01 tgl Exp $ * * * INTERFACE ROUTINES @@ -1034,9 +1034,20 @@ heap_get_latest_tid(Relation relation, * * The new tuple is stamped with current transaction ID and the specified * command ID. + * + * If use_wal is false, the new tuple is not logged in WAL, even for a + * non-temp relation. Safe usage of this behavior requires that we arrange + * that all new tuples go into new pages not containing any tuples from other + * transactions, that the relation gets fsync'd before commit, and that the + * transaction emits at least one WAL record to ensure RecordTransactionCommit + * will decide to WAL-log the commit. + * + * use_fsm is passed directly to RelationGetBufferForTuple, which see for + * more info. */ Oid -heap_insert(Relation relation, HeapTuple tup, CommandId cid) +heap_insert(Relation relation, HeapTuple tup, CommandId cid, + bool use_wal, bool use_fsm) { TransactionId xid = GetCurrentTransactionId(); Buffer buffer; @@ -1086,7 +1097,8 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) heap_tuple_toast_attrs(relation, tup, NULL); /* Find buffer to insert this tuple into */ - buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer); + buffer = RelationGetBufferForTuple(relation, tup->t_len, + InvalidBuffer, use_fsm); /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -1096,7 +1108,12 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) pgstat_count_heap_insert(&relation->pgstat_info); /* XLOG stuff */ - if (!relation->rd_istemp) + if (relation->rd_istemp) + { + /* No XLOG record, but still need to flag that XID exists on disk */ + MyXactMadeTempRelUpdate = true; + } + else if (use_wal) { xl_heap_insert xlrec; xl_heap_header xlhdr; @@ -1151,11 +1168,6 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) PageSetLSN(page, recptr); PageSetTLI(page, ThisTimeLineID); } - else - { - /* No XLOG record, but still need to flag that XID exists on disk */ - MyXactMadeTempRelUpdate = true; - } END_CRIT_SECTION(); @@ -1183,7 +1195,7 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid) Oid simple_heap_insert(Relation relation, HeapTuple tup) { - return heap_insert(relation, tup, GetCurrentCommandId()); + return heap_insert(relation, tup, GetCurrentCommandId(), true, true); } /* @@ -1743,7 +1755,7 @@ l2: { /* Assume there's no chance to put newtup on same page. */ newbuf = RelationGetBufferForTuple(relation, newtup->t_len, - buffer); + buffer, true); } else { @@ -1760,7 +1772,7 @@ l2: */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); newbuf = RelationGetBufferForTuple(relation, newtup->t_len, - buffer); + buffer, true); } else { diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 583bb20933..fc1b0afd21 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.56 2005/05/07 21:32:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/access/heap/hio.c,v 1.57 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -79,12 +79,26 @@ RelationPutHeapTuple(Relation relation, * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * + * If use_fsm is true (the normal case), we use FSM to help us find free + * space. If use_fsm is false, we always append a new empty page to the + * end of the relation if the tuple won't fit on the current target page. + * This can save some cycles when we know the relation is new and doesn't + * contain useful amounts of free space. + * + * The use_fsm = false case is also useful for non-WAL-logged additions to a + * relation, if the caller holds exclusive lock and is careful to invalidate + * relation->rd_targblock before the first insertion --- that ensures that + * all insertions will occur into newly added pages and not be intermixed + * with tuples from other transactions. That way, a crash can't risk losing + * any committed data of other transactions. (See heap_insert's comments + * for additional constraints needed for safe usage of this behavior.) + * * ereport(ERROR) is allowed here, so this routine *must* be called * before any (unlogged) changes are made in buffer pool. */ Buffer RelationGetBufferForTuple(Relation relation, Size len, - Buffer otherBuffer) + Buffer otherBuffer, bool use_fsm) { Buffer buffer = InvalidBuffer; Page pageHeader; @@ -121,11 +135,14 @@ RelationGetBufferForTuple(Relation relation, Size len, * on each page that proves not to be suitable.) If the FSM has no * record of a page with enough free space, we give up and extend the * relation. + * + * When use_fsm is false, we either put the tuple onto the existing + * target page or extend the relation. */ targetBlock = relation->rd_targblock; - if (targetBlock == InvalidBlockNumber) + if (targetBlock == InvalidBlockNumber && use_fsm) { /* * We have no cached target page, so ask the FSM for an initial @@ -209,6 +226,10 @@ RelationGetBufferForTuple(Relation relation, Size len, ReleaseBuffer(buffer); } + /* Without FSM, always fall out of the loop and extend */ + if (!use_fsm) + break; + /* * Update FSM as to condition of this page, and ask for another * page to try. diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c index a390829bb8..938474610a 100644 --- a/src/backend/executor/execMain.c +++ b/src/backend/executor/execMain.c @@ -26,13 +26,14 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.249 2005/05/22 22:30:19 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execMain.c,v 1.250 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/heapam.h" +#include "access/xlog.h" #include "catalog/heap.h" #include "catalog/namespace.h" #include "commands/tablecmds.h" @@ -44,6 +45,7 @@ #include "optimizer/clauses.h" #include "optimizer/var.h" #include "parser/parsetree.h" +#include "storage/smgr.h" #include "utils/acl.h" #include "utils/guc.h" #include "utils/lsyscache.h" @@ -784,6 +786,20 @@ InitPlan(QueryDesc *queryDesc, bool explainOnly) * And open the constructed table for writing. */ intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock); + + /* use_wal off requires rd_targblock be initially invalid */ + Assert(intoRelationDesc->rd_targblock == InvalidBlockNumber); + + /* + * We can skip WAL-logging the insertions, unless PITR is in use. + * + * Note that for a non-temp INTO table, this is safe only because + * we know that the catalog changes above will have been WAL-logged, + * and so RecordTransactionCommit will think it needs to WAL-log the + * eventual transaction commit. Else the commit might be lost, even + * though all the data is safely fsync'd ... + */ + estate->es_into_relation_use_wal = XLogArchivingActive(); } estate->es_into_relation_descriptor = intoRelationDesc; @@ -979,7 +995,22 @@ ExecEndPlan(PlanState *planstate, EState *estate) * close the "into" relation if necessary, again keeping lock */ if (estate->es_into_relation_descriptor != NULL) + { + /* + * If we skipped using WAL, and it's not a temp relation, + * we must force the relation down to disk before it's + * safe to commit the transaction. This requires forcing + * out any dirty buffers and then doing a forced fsync. + */ + if (!estate->es_into_relation_use_wal && + !estate->es_into_relation_descriptor->rd_istemp) + { + FlushRelationBuffers(estate->es_into_relation_descriptor); + smgrimmedsync(estate->es_into_relation_descriptor->rd_smgr); + } + heap_close(estate->es_into_relation_descriptor, NoLock); + } /* * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks @@ -1307,7 +1338,9 @@ ExecSelect(TupleTableSlot *slot, tuple = ExecCopySlotTuple(slot); heap_insert(estate->es_into_relation_descriptor, tuple, - estate->es_snapshot->curcid); + estate->es_snapshot->curcid, + estate->es_into_relation_use_wal, + false); /* never any point in using FSM */ /* we know there are no indexes to update */ heap_freetuple(tuple); IncrAppended(); @@ -1386,7 +1419,8 @@ ExecInsert(TupleTableSlot *slot, * insert the tuple */ newId = heap_insert(resultRelationDesc, tuple, - estate->es_snapshot->curcid); + estate->es_snapshot->curcid, + true, true); IncrAppended(); (estate->es_processed)++; @@ -2089,6 +2123,7 @@ EvalPlanQualStart(evalPlanQual *epq, EState *estate, evalPlanQual *priorepq) epqstate->es_result_relation_info = estate->es_result_relation_info; epqstate->es_junkFilter = estate->es_junkFilter; epqstate->es_into_relation_descriptor = estate->es_into_relation_descriptor; + epqstate->es_into_relation_use_wal = estate->es_into_relation_use_wal; epqstate->es_param_list_info = estate->es_param_list_info; if (estate->es_topPlan->nParamExec > 0) epqstate->es_param_exec_vals = (ParamExecData *) diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 133bf57bca..8eaff494e3 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.123 2005/04/28 21:47:12 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/executor/execUtils.c,v 1.124 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -186,7 +186,9 @@ CreateExecutorState(void) estate->es_result_relation_info = NULL; estate->es_junkFilter = NULL; + estate->es_into_relation_descriptor = NULL; + estate->es_into_relation_use_wal = false; estate->es_param_list_info = NULL; estate->es_param_exec_vals = NULL; diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 1c0cb7e240..fa7913aff7 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.115 2005/05/29 04:23:05 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/md.c,v 1.116 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -660,6 +660,9 @@ mdtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) /* * mdimmedsync() -- Immediately sync a relation to stable storage. + * + * Note that only writes already issued are synced; this routine knows + * nothing of dirty buffers that may exist inside the buffer manager. */ bool mdimmedsync(SMgrRelation reln) diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 2c8cf07eec..f286b20ee2 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.90 2005/06/17 22:32:46 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/storage/smgr/smgr.c,v 1.91 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -650,7 +650,8 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) /* * smgrimmedsync() -- Force the specified relation to stable storage. * - * Synchronously force all of the specified relation down to disk. + * Synchronously force all previous writes to the specified relation + * down to disk. * * This is useful for building completely new relations (eg, new * indexes). Instead of incrementally WAL-logging the index build @@ -664,6 +665,10 @@ smgrtruncate(SMgrRelation reln, BlockNumber nblocks, bool isTemp) * * The preceding writes should specify isTemp = true to avoid * duplicative fsyncs. + * + * Note that you need to do FlushRelationBuffers() first if there is + * any possibility that there are dirty buffers for the relation; + * otherwise the sync is not very meaningful. */ void smgrimmedsync(SMgrRelation reln) diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h index 151a62f9b6..dde6fe8ecd 100644 --- a/src/include/access/heapam.h +++ b/src/include/access/heapam.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.101 2005/06/06 17:01:24 tgl Exp $ + * $PostgreSQL: pgsql/src/include/access/heapam.h,v 1.102 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -156,7 +156,8 @@ extern ItemPointer heap_get_latest_tid(Relation relation, Snapshot snapshot, ItemPointer tid); extern void setLastTid(const ItemPointer tid); -extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid); +extern Oid heap_insert(Relation relation, HeapTuple tup, CommandId cid, + bool use_wal, bool use_fsm); extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid, CommandId cid, Snapshot crosscheck, bool wait); extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup, diff --git a/src/include/access/hio.h b/src/include/access/hio.h index 49091eb202..e706fea4ca 100644 --- a/src/include/access/hio.h +++ b/src/include/access/hio.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.27 2004/12/31 22:03:21 pgsql Exp $ + * $PostgreSQL: pgsql/src/include/access/hio.h,v 1.28 2005/06/20 18:37:01 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,6 +19,6 @@ extern void RelationPutHeapTuple(Relation relation, Buffer buffer, HeapTuple tuple); extern Buffer RelationGetBufferForTuple(Relation relation, Size len, - Buffer otherBuffer); + Buffer otherBuffer, bool use_fsm); #endif /* HIO_H */ diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 19f264119c..df41c85610 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.134 2005/06/15 07:27:44 neilc Exp $ + * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.135 2005/06/20 18:37:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -304,7 +304,9 @@ typedef struct EState ResultRelInfo *es_result_relation_info; /* currently active array * elt */ JunkFilter *es_junkFilter; /* currently active junk filter */ + Relation es_into_relation_descriptor; /* for SELECT INTO */ + bool es_into_relation_use_wal; /* Parameter info: */ ParamListInfo es_param_list_info; /* values of external params */ -- 2.11.0