From 8db05ba411ecd3ce2cb0cb7c78fec87658e1a4ab Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Tue, 7 Mar 2006 19:06:50 +0000 Subject: [PATCH] Repair old performance bug in tuplesort.c/logtape.c. In the case where we are doing the final merge pass on-the-fly, and not writing the data back onto a 'tape', the number of free blocks in the tape set will become large, leading to a lot of time wasted in ltsReleaseBlock(). There is really no need to track the free blocks anymore in this state, so add a simple shutoff switch. Per report from Stefan Kaltenbrunner. --- src/backend/utils/sort/logtape.c | 29 +++++++++++++++++++++- src/backend/utils/sort/tuplesort.c | 50 ++++++++++++++++++++++++++------------ src/include/utils/logtape.h | 3 ++- 3 files changed, 65 insertions(+), 17 deletions(-) diff --git a/src/backend/utils/sort/logtape.c b/src/backend/utils/sort/logtape.c index 4f280509ca..6dc203063c 100644 --- a/src/backend/utils/sort/logtape.c +++ b/src/backend/utils/sort/logtape.c @@ -64,7 +64,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.19 2006/03/05 15:58:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/logtape.c,v 1.20 2006/03/07 19:06:49 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -146,7 +146,12 @@ struct LogicalTapeSet * When there are no such blocks, we extend the underlying file. Note * that the block numbers in freeBlocks are always in *decreasing* order, * so that removing the last entry gives us the lowest free block. + * + * If forgetFreeSpace is true then any freed blocks are simply forgotten + * rather than being remembered in freeBlocks[]. See notes for + * LogicalTapeSetForgetFreeSpace(). */ + bool forgetFreeSpace; /* are we remembering free blocks? */ long *freeBlocks; /* resizable array */ int nFreeBlocks; /* # of currently free blocks */ int freeBlocksLen; /* current allocated length of freeBlocks[] */ @@ -248,6 +253,12 @@ ltsReleaseBlock(LogicalTapeSet *lts, long blocknum) long *ptr; /* + * Do nothing if we're no longer interested in remembering free space. + */ + if (lts->forgetFreeSpace) + return; + + /* * Enlarge freeBlocks array if full. */ if (lts->nFreeBlocks >= lts->freeBlocksLen) @@ -491,6 +502,7 @@ LogicalTapeSetCreate(int ntapes) (ntapes - 1) * sizeof(LogicalTape)); lts->pfile = BufFileCreateTemp(false); lts->nFileBlocks = 0L; + lts->forgetFreeSpace = false; lts->freeBlocksLen = 32; /* reasonable initial guess */ lts->freeBlocks = (long *) palloc(lts->freeBlocksLen * sizeof(long)); lts->nFreeBlocks = 0; @@ -547,6 +559,21 @@ LogicalTapeSetClose(LogicalTapeSet *lts) } /* + * Mark a logical tape set as not needing management of free space anymore. + * + * This should be called if the caller does not intend to write any more data + * into the tape set, but is reading from un-frozen tapes. Since no more + * writes are planned, remembering free blocks is no longer useful. Setting + * this flag lets us avoid wasting time and space in ltsReleaseBlock(), which + * is not designed to handle large numbers of free blocks. + */ +void +LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts) +{ + lts->forgetFreeSpace = true; +} + +/* * Dump the dirty buffer of a logical tape. */ static void diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c index a9d5d79e03..af0109fda6 100644 --- a/src/backend/utils/sort/tuplesort.c +++ b/src/backend/utils/sort/tuplesort.c @@ -91,7 +91,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.62 2006/03/05 15:58:49 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/sort/tuplesort.c,v 1.63 2006/03/07 19:06:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -1384,7 +1384,8 @@ mergeruns(Tuplesortstate *state) /* * If we produced only one initial run (quite likely if the total data * volume is between 1X and 2X workMem), we can just use that tape as the - * finished output, rather than doing a useless merge. + * finished output, rather than doing a useless merge. (This obvious + * optimization is not in Knuth's algorithm.) */ if (state->currentRun == 1) { @@ -1401,33 +1402,51 @@ mergeruns(Tuplesortstate *state) for (;;) { - /* Step D5: merge runs onto tape[T] until tape[P] is empty */ - while (state->tp_runs[state->tapeRange - 1] || - state->tp_dummy[state->tapeRange - 1]) + /* + * At this point we know that tape[T] is empty. If there's just one + * (real or dummy) run left on each input tape, then only one merge + * pass remains. If we don't have to produce a materialized sorted + * tape, we can stop at this point and do the final merge on-the-fly. + */ + if (!state->randomAccess) { - bool allDummy = true; bool allOneRun = true; + Assert(state->tp_runs[state->tapeRange] == 0); for (tapenum = 0; tapenum < state->tapeRange; tapenum++) { - if (state->tp_dummy[tapenum] == 0) - allDummy = false; if (state->tp_runs[tapenum] + state->tp_dummy[tapenum] != 1) + { allOneRun = false; + break; + } } - - /* - * If we don't have to produce a materialized sorted tape, quit as - * soon as we're down to one real/dummy run per tape. - */ - if (!state->randomAccess && allOneRun) + if (allOneRun) { - Assert(!allDummy); + /* Tell logtape.c we won't be writing anymore */ + LogicalTapeSetForgetFreeSpace(state->tapeset); /* Initialize for the final merge pass */ beginmerge(state); state->status = TSS_FINALMERGE; return; } + } + + /* Step D5: merge runs onto tape[T] until tape[P] is empty */ + while (state->tp_runs[state->tapeRange - 1] || + state->tp_dummy[state->tapeRange - 1]) + { + bool allDummy = true; + + for (tapenum = 0; tapenum < state->tapeRange; tapenum++) + { + if (state->tp_dummy[tapenum] == 0) + { + allDummy = false; + break; + } + } + if (allDummy) { state->tp_dummy[state->tapeRange]++; @@ -1437,6 +1456,7 @@ mergeruns(Tuplesortstate *state) else mergeonerun(state); } + /* Step D6: decrease level */ if (--state->Level == 0) break; diff --git a/src/include/utils/logtape.h b/src/include/utils/logtape.h index 3791f03e9e..7cd3db3777 100644 --- a/src/include/utils/logtape.h +++ b/src/include/utils/logtape.h @@ -8,7 +8,7 @@ * Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.14 2006/03/05 15:59:07 momjian Exp $ + * $PostgreSQL: pgsql/src/include/utils/logtape.h,v 1.15 2006/03/07 19:06:50 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -26,6 +26,7 @@ typedef struct LogicalTapeSet LogicalTapeSet; extern LogicalTapeSet *LogicalTapeSetCreate(int ntapes); extern void LogicalTapeSetClose(LogicalTapeSet *lts); +extern void LogicalTapeSetForgetFreeSpace(LogicalTapeSet *lts); extern size_t LogicalTapeRead(LogicalTapeSet *lts, int tapenum, void *ptr, size_t size); extern void LogicalTapeWrite(LogicalTapeSet *lts, int tapenum, -- 2.11.0