From f5f366e18829c982273540d88a1ac81c9c85d401 Mon Sep 17 00:00:00 2001 From: Bruce Momjian Date: Wed, 6 Aug 1997 03:42:21 +0000 Subject: [PATCH] Allow internal sorts to be stored in memory rather than in files. --- src/backend/executor/nodeMergejoin.c | 16 +- src/backend/executor/nodeSort.c | 261 +++++----------- src/backend/tcop/postgres.c | 18 +- src/backend/utils/sort/lselect.c | 135 +++----- src/backend/utils/sort/psort.c | 581 ++++++++++++++++++++++++----------- src/include/nodes/execnodes.h | 4 +- src/include/nodes/plannodes.h | 4 +- src/include/utils/lselect.h | 31 +- src/include/utils/psort.h | 65 +++- src/man/postgres.1 | 24 +- src/man/postmaster.1 | 4 +- 11 files changed, 630 insertions(+), 513 deletions(-) diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 29e349bda9..65d8482cad 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.5 1996/11/10 02:59:54 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeMergejoin.c,v 1.6 1997/08/06 03:41:29 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -374,6 +374,18 @@ ExecMergeTupleDump(ExprContext *econtext, MergeJoinState *mergestate) printf("******** \n"); } +static void +CleanUpSort(Plan *plan) { + + if (plan == NULL) + return; + + if (plan->type == T_Sort) { + Sort *sort = (Sort *)plan; + psort_end(sort); + } +} + /* ---------------------------------------------------------------- * ExecMergeJoin * @@ -676,6 +688,8 @@ ExecMergeJoin(MergeJoin *node) */ if (TupIsNull(outerTupleSlot)) { MJ_printf("ExecMergeJoin: **** outer tuple is nil ****\n"); + CleanUpSort(node->join.lefttree->lefttree); + CleanUpSort(node->join.righttree->lefttree); return NULL; } diff --git a/src/backend/executor/nodeSort.c b/src/backend/executor/nodeSort.c index 83d78073bb..84c6db71f7 100644 --- a/src/backend/executor/nodeSort.c +++ b/src/backend/executor/nodeSort.c @@ -1,13 +1,13 @@ /*------------------------------------------------------------------------- * * nodeSort.c-- - * Routines to handle sorting of relations into temporaries. + * Routines to handle sorting of relations. * * Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.4 1996/11/08 05:56:17 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/executor/nodeSort.c,v 1.5 1997/08/06 03:41:31 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -86,10 +86,9 @@ FormSortKeys(Sort *sortnode) * ExecSort * * old comments - * Retrieves tuples fron the outer subtree and insert them into a - * temporary relation. The temporary relation is then sorted and - * the sorted relation is stored in the relation whose ID is indicated - * in the 'tempid' field of this node. + * Sorts tuples from the outer subtree of the node in psort, + * which saves the results in a temporary file or memory. After the + * initial call, returns a tuple from the file with each call. * Assumes that heap access method is used. * * Conditions: @@ -108,13 +107,8 @@ ExecSort(Sort *node) ScanDirection dir; int keycount; ScanKey sortkeys; - Relation tempRelation; - Relation currentRelation; - HeapScanDesc currentScanDesc; HeapTuple heapTuple; TupleTableSlot *slot; - Buffer buffer; - int tupCount = 0; /* ---------------- * get state info from node @@ -128,10 +122,8 @@ ExecSort(Sort *node) dir = estate->es_direction; /* ---------------- - * the first time we call this, we retrieve all tuples - * from the subplan into a temporary relation and then - * we sort the relation. Subsequent calls return tuples - * from the temporary relation. + * the first time we call this, psort sorts this into a file. + * Subsequent calls return tuples from psort. * ---------------- */ @@ -144,78 +136,23 @@ ExecSort(Sort *node) * ---------------- */ estate->es_direction = ForwardScanDirection; - - /* ---------------- - * if we couldn't create the temp or current relations then - * we print a warning and return NULL. - * ---------------- - */ - tempRelation = sortstate->sort_TempRelation; - if (tempRelation == NULL) { - elog(DEBUG, "ExecSort: temp relation is NULL! aborting..."); - return NULL; - } - - currentRelation = sortstate->csstate.css_currentRelation; - if (currentRelation == NULL) { - elog(DEBUG, "ExecSort: current relation is NULL! aborting..."); - return NULL; - } - + /* ---------------- - * retrieve tuples from the subplan and - * insert them in the temporary relation + * prepare information for psort_begin() * ---------------- */ outerNode = outerPlan((Plan *) node); - SO1_printf("ExecSort: %s\n", - "inserting tuples into tempRelation"); - - for (;;) { - slot = ExecProcNode(outerNode, (Plan*)node); - - if (TupIsNull(slot)) - break; - - tupCount++; - - heapTuple = slot->val; - - heap_insert(tempRelation, /* relation desc */ - heapTuple); /* heap tuple to insert */ - - ExecClearTuple(slot); - } - - /* ---------------- - * now sort the tuples in our temporary relation - * into a new sorted relation using psort() - * - * psort() seems to require that the relations - * are created and opened in advance. - * -cim 1/25/90 - * ---------------- - */ + keycount = node->keycount; sortkeys = (ScanKey)sortstate->sort_Keys; SO1_printf("ExecSort: %s\n", - "calling psort"); - - /* - * If no tuples were fetched from the proc node return NULL now - * psort dumps it if 0 tuples are in the relation and I don't want - * to try to debug *that* routine!! - */ - if (tupCount == 0) - return NULL; - - psort(tempRelation, /* old relation */ - currentRelation, /* new relation */ - keycount, /* number keys */ - sortkeys); /* keys */ - - if (currentRelation == NULL) { - elog(DEBUG, "ExecSort: sorted relation is NULL! aborting..."); + "calling psort_begin"); + + if (!psort_begin(node, /* this node */ + keycount, /* number keys */ + sortkeys)) /* keys */ + { + /* Psort says, there are no tuples to be sorted */ return NULL; } @@ -226,66 +163,55 @@ ExecSort(Sort *node) estate->es_direction = dir; /* ---------------- - * now initialize the scan descriptor to scan the - * sorted relation and update the sortstate information - * ---------------- - */ - currentScanDesc = heap_beginscan(currentRelation, /* relation */ - ScanDirectionIsBackward(dir), - /* bkwd flag */ - NowTimeQual, /* time qual */ - 0, /* num scan keys */ - NULL); /* scan keys */ - - sortstate->csstate.css_currentRelation = currentRelation; - sortstate->csstate.css_currentScanDesc = currentScanDesc; - - /* ---------------- * make sure the tuple descriptor is up to date * ---------------- */ - slot = sortstate->csstate.css_ScanTupleSlot; - - slot->ttc_tupleDescriptor = - RelationGetTupleDescriptor(currentRelation); - + slot = (TupleTableSlot*)sortstate->csstate.cstate.cs_ResultTupleSlot; + /* *** get_cs_ResultTupleSlot((CommonState) sortstate); */ + + slot->ttc_tupleDescriptor = ExecGetTupType(outerNode); +#ifdef 0 + slot->ttc_execTupDescriptor = ExecGetExecTupDesc(outerNode); +#endif /* ---------------- * finally set the sorted flag to true * ---------------- */ sortstate->sort_Flag = true; + SO1_printf(stderr, "ExecSort: sorting done.\n"); } else { - slot = sortstate->csstate.css_ScanTupleSlot; + slot = (TupleTableSlot*)sortstate->csstate.cstate.cs_ResultTupleSlot; + /* *** get_cs_ResultTupleSlot((CommonState) sortstate); */ +/* slot = sortstate->csstate.css_ScanTupleSlot; orig */ } SO1_printf("ExecSort: %s\n", - "retrieveing tuple from sorted relation"); + "retrieving tuple from sorted relation"); /* ---------------- - * at this point we know we have a sorted relation so - * we preform a simple scan on it with amgetnext().. + * at this point we grab a tuple from psort * ---------------- */ - currentScanDesc = sortstate->csstate.css_currentScanDesc; - - heapTuple = heap_getnext(currentScanDesc, /* scan desc */ - ScanDirectionIsBackward(dir), - /* bkwd flag */ - &buffer); /* return: buffer */ - - /* Increase the pin count on the buffer page, because the tuple stored in - the slot also points to it (as well as the scan descriptor). If we - don't, ExecStoreTuple will decrease the pin count on the next iteration. - - 01/09/93 */ - - if (buffer != InvalidBuffer) - IncrBufferRefCount(buffer); - - return ExecStoreTuple(heapTuple, /* tuple to store */ - slot, /* slot to store in */ - buffer, /* this tuple's buffer */ - false); /* don't free stuff from amgetnext */ + heapTuple = psort_grabtuple(node); + + if (heapTuple == NULL) { +/* psort_end(node); */ + return (ExecClearTuple(slot)); + } + + ExecStoreTuple(heapTuple, /* tuple to store */ + slot, /* slot to store in */ + InvalidBuffer, /* no buffer */ + true); /* free the palloc'd tuple */ +/* printf("ExecSort: (%x)",node);print_slot(slot);printf("\n");*/ + return slot; +#if 0 + return ExecStoreTuple(heapTuple, /* tuple to store */ + slot, /* slot to store in */ + InvalidBuffer, /* no buffer */ + true); /* free the palloc'd tuple */ +#endif } /* ---------------------------------------------------------------- @@ -302,11 +228,6 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) SortState *sortstate; Plan *outerPlan; ScanKey sortkeys; - TupleDesc tupType; - Oid tempOid; - Oid sortOid; - Relation tempDesc; - Relation sortedDesc; SO1_printf("ExecInitSort: %s\n", "initializing sort node"); @@ -324,7 +245,7 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) sortstate = makeNode(SortState); sortstate->sort_Flag = 0; sortstate->sort_Keys = NULL; - sortstate->sort_TempRelation = NULL; + node->cleaned = FALSE; node->sortstate = sortstate; @@ -348,8 +269,8 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) * relation. * ---------------- */ - ExecInitScanTupleSlot(estate, &sortstate->csstate); - ExecInitResultTupleSlot(estate, &sortstate->csstate.cstate); + ExecInitResultTupleSlot(estate, &sortstate->csstate.cstate); + ExecInitScanTupleSlot(estate, &sortstate->csstate); /* ---------------- * initializes child nodes @@ -371,41 +292,10 @@ ExecInitSort(Sort *node, EState *estate, Plan *parent) * info because this node doesn't do projections. * ---------------- */ - ExecAssignScanTypeFromOuterPlan((Plan *) node, &sortstate->csstate); + ExecAssignResultTypeFromOuterPlan((Plan *)node, &sortstate->csstate.cstate); + ExecAssignScanTypeFromOuterPlan((Plan *) node, &sortstate->csstate); sortstate->csstate.cstate.cs_ProjInfo = NULL; - /* ---------------- - * get type information needed for ExecCreatR - * ---------------- - */ - tupType = ExecGetScanType(&sortstate->csstate); - - /* ---------------- - * ExecCreatR wants its second argument to be an object id of - * a relation in the range table or _TEMP_RELATION_ID_ - * indicating that the relation is not in the range table. - * - * In the second case ExecCreatR creates a temp relation. - * (currently this is the only case we support -cim 10/16/89) - * ---------------- - */ - tempOid = node->tempid; - sortOid = _TEMP_RELATION_ID_; - - /* ---------------- - * create the temporary relations - * ---------------- - */ -/* len = ExecTargetListLength(node->plan.targetlist); */ - tempDesc = ExecCreatR(tupType, tempOid); - sortedDesc = ExecCreatR(tupType, sortOid); - - /* ---------------- - * save the relation descriptor in the sortstate - * ---------------- - */ - sortstate->sort_TempRelation = tempDesc; - sortstate->csstate.css_currentRelation = sortedDesc; SO1_printf("ExecInitSort: %s\n", "sort node initialized"); @@ -429,15 +319,12 @@ ExecCountSlotsSort(Sort *node) * ExecEndSort(node) * * old comments - * destroys the temporary relation. * ---------------------------------------------------------------- */ void ExecEndSort(Sort *node) { SortState *sortstate; - Relation tempRelation; - Relation sortedRelation; Plan *outerPlan; /* ---------------- @@ -448,18 +335,6 @@ ExecEndSort(Sort *node) "shutting down sort node"); sortstate = node->sortstate; - tempRelation = sortstate->sort_TempRelation; - sortedRelation = sortstate->csstate.css_currentRelation; - - heap_destroyr(tempRelation); - heap_destroyr(sortedRelation); - - - /* ---------------- - * close the sorted relation and shut down the scan. - * ---------------- - */ - ExecCloseR((Plan *) node); /* ---------------- * shut down the subplan @@ -473,6 +348,9 @@ ExecEndSort(Sort *node) * ---------------- */ ExecClearTuple(sortstate->csstate.css_ScanTupleSlot); + + /* Clean up after psort */ + psort_end(node); SO1_printf("ExecEndSort: %s\n", "sort node shutdown"); @@ -480,37 +358,39 @@ ExecEndSort(Sort *node) /* ---------------------------------------------------------------- * ExecSortMarkPos + * + * Calls psort to save the current position in the sorted file. * ---------------------------------------------------------------- */ void ExecSortMarkPos(Sort *node) { - SortState *sortstate; - HeapScanDesc sdesc; - + SortState *sortstate; + /* ---------------- - * if we haven't sorted yet, just return + * if we haven't sorted yet, just return * ---------------- */ sortstate = node->sortstate; if (sortstate->sort_Flag == false) return; - sdesc = sortstate->csstate.css_currentScanDesc; - heap_markpos(sdesc); + psort_markpos(node); + return; } /* ---------------------------------------------------------------- * ExecSortRestrPos + * + * Calls psort to restore the last saved sort file position. * ---------------------------------------------------------------- */ void ExecSortRestrPos(Sort *node) { - SortState *sortstate; - HeapScanDesc sdesc; - + SortState *sortstate; + /* ---------------- * if we haven't sorted yet, just return. * ---------------- @@ -520,9 +400,8 @@ ExecSortRestrPos(Sort *node) return; /* ---------------- - * restore the scan to the previously marked position + * restore the scan to the previously marked position * ---------------- */ - sdesc = sortstate->csstate.css_currentScanDesc; - heap_restrpos(sdesc); + psort_restorepos(node); } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index a9269da46c..a14386745f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -7,7 +7,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.36 1997/07/29 16:14:40 thomas Exp $ + * $Header: /cvsroot/pgsql/src/backend/tcop/postgres.c,v 1.37 1997/08/06 03:41:41 momjian Exp $ * * NOTES * this is the "main" module of the postgres backend and @@ -108,6 +108,7 @@ extern int lockingOff; extern int NBuffers; int fsyncOff = 0; +int SortMem = 512; int dontExecute = 0; static int ShowStats; @@ -1038,7 +1039,16 @@ PostgresMain(int argc, char *argv[]) */ flagQ = 1; break; - + + case 'S': + /* ---------------- + * S - amount of sort memory to use in 1k bytes + * ---------------- + */ + SortMem = atoi(optarg); + break; + +#ifdef NOT_USED case 'S': /* ---------------- * S - assume stable main memory @@ -1048,6 +1058,7 @@ PostgresMain(int argc, char *argv[]) flagS = 1; SetTransactionFlushEnabled(false); break; +#endif case 's': /* ---------------- @@ -1173,6 +1184,7 @@ PostgresMain(int argc, char *argv[]) printf("\ttimings = %c\n", ShowStats ? 't' : 'f'); printf("\tdates = %s\n", EuroDates ? "European" : "Normal"); printf("\tbufsize = %d\n", NBuffers); + printf("\tsortmem = %d\n", SortMem); printf("\tquery echo = %c\n", EchoQuery ? 't' : 'f'); printf("\tmultiplexed backend? = %c\n", multiplexedBackend ? 't' : 'f'); @@ -1280,7 +1292,7 @@ PostgresMain(int argc, char *argv[]) */ if (IsUnderPostmaster == false) { puts("\nPOSTGRES backend interactive interface"); - puts("$Revision: 1.36 $ $Date: 1997/07/29 16:14:40 $"); + puts("$Revision: 1.37 $ $Date: 1997/08/06 03:41:41 $"); } /* ---------------- diff --git a/src/backend/utils/sort/lselect.c b/src/backend/utils/sort/lselect.c index 8c6c27ef46..26a3ca3857 100644 --- a/src/backend/utils/sort/lselect.c +++ b/src/backend/utils/sort/lselect.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/sort/Attic/lselect.c,v 1.3 1997/05/20 11:35:48 vadim Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/sort/Attic/lselect.c,v 1.4 1997/08/06 03:41:47 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -26,37 +26,14 @@ #include "utils/psort.h" #include "utils/lselect.h" -extern Relation SortRdesc; /* later static */ - -/* - * PUTTUP - writes the next tuple - * ENDRUN - mark end of run - * GETLEN - reads the length of the next tuple - * ALLOCTUP - returns space for the new tuple - * SETTUPLEN - stores the length into the tuple - * GETTUP - reads the tuple - * - * Note: - * LEN field must be a short; FP is a stream - */ - #define PUTTUP(TUP, FP) fwrite((char *)TUP, (TUP)->t_len, 1, FP) -#define ENDRUN(FP) fwrite((char *)&shortzero, sizeof (shortzero), 1, FP) -#define GETLEN(LEN, FP) fread(&(LEN), sizeof (shortzero), 1, FP) -#define ALLOCTUP(LEN) ((HeapTuple)palloc((unsigned)LEN)) -#define GETTUP(TUP, LEN, FP)\ - fread((char *)(TUP) + sizeof (shortzero), 1, (LEN) - sizeof (shortzero), FP) -#define SETTUPLEN(TUP, LEN) (TUP)->t_len = LEN /* * USEMEM - record use of memory * FREEMEM - record freeing of memory - * FULLMEM - 1 iff a tuple will fit */ - -#define USEMEM(AMT) SortMemory -= (AMT) -#define FREEMEM(AMT) SortMemory += (AMT) -#define LACKMEM() (SortMemory <= BLCKSZ) /* not accurate */ +#define USEMEM(context,AMT) context->sortMem -= (AMT) +#define FREEMEM(context,AMT) context->sortMem += (AMT) /* * lmerge - merges two leftist trees into one @@ -67,12 +44,12 @@ extern Relation SortRdesc; /* later static */ * speed up code significantly. */ struct leftist * -lmerge(struct leftist *pt, struct leftist *qt) +lmerge(struct leftist *pt, struct leftist *qt, LeftistContext context) { register struct leftist *root, *majorLeftist, *minorLeftist; int dist; - if (tuplecmp(pt->lt_tuple, qt->lt_tuple)) { + if (tuplecmp(pt->lt_tuple, qt->lt_tuple, context)) { root = pt; majorLeftist = qt; } else { @@ -83,7 +60,7 @@ lmerge(struct leftist *pt, struct leftist *qt) root->lt_left = majorLeftist; else { if ((minorLeftist = root->lt_right) != NULL) - majorLeftist = lmerge(majorLeftist, minorLeftist); + majorLeftist = lmerge(majorLeftist, minorLeftist, context); if ((dist = root->lt_left->lt_dist) < majorLeftist->lt_dist) { root->lt_dist = 1 + dist; root->lt_right = root->lt_left; @@ -97,11 +74,11 @@ lmerge(struct leftist *pt, struct leftist *qt) } static struct leftist * -linsert(struct leftist *root, struct leftist *new1) +linsert(struct leftist *root, struct leftist *new1, LeftistContext context) { register struct leftist *left, *right; - if (! tuplecmp(root->lt_tuple, new1->lt_tuple)) { + if (! tuplecmp(root->lt_tuple, new1->lt_tuple, context)) { new1->lt_left = root; return(new1); } @@ -116,7 +93,7 @@ linsert(struct leftist *root, struct leftist *new1) } return(root); } - right = linsert(right, new1); + right = linsert(right, new1, context); if (right->lt_dist < left->lt_dist) { root->lt_dist = 1 + left->lt_dist; root->lt_left = right; @@ -142,7 +119,8 @@ linsert(struct leftist *root, struct leftist *new1) */ HeapTuple gettuple(struct leftist **treep, - short *devnum) /* device from which tuple came */ + short *devnum, /* device from which tuple came */ + LeftistContext context) { register struct leftist *tp; HeapTuple tup; @@ -153,9 +131,9 @@ gettuple(struct leftist **treep, if (tp->lt_dist == 1) /* lt_left == NULL */ *treep = tp->lt_left; else - *treep = lmerge(tp->lt_left, tp->lt_right); + *treep = lmerge(tp->lt_left, tp->lt_right, context); - FREEMEM(sizeof (struct leftist)); + FREEMEM(context,sizeof (struct leftist)); FREE(tp); return(tup); } @@ -169,14 +147,17 @@ gettuple(struct leftist **treep, * Note: * Currently never returns NULL BUG */ -int -puttuple(struct leftist **treep, HeapTuple newtuple, int devnum) +void +puttuple(struct leftist **treep, + HeapTuple newtuple, + short devnum, + LeftistContext context) { register struct leftist *new1; register struct leftist *tp; new1 = (struct leftist *) palloc((unsigned) sizeof (struct leftist)); - USEMEM(sizeof (struct leftist)); + USEMEM(context,sizeof (struct leftist)); new1->lt_dist = 1; new1->lt_devnum = devnum; new1->lt_tuple = newtuple; @@ -185,39 +166,12 @@ puttuple(struct leftist **treep, HeapTuple newtuple, int devnum) if ((tp = *treep) == NULL) *treep = new1; else - *treep = linsert(tp, new1); - return(1); + *treep = linsert(tp, new1, context); + return; } /* - * dumptuples - stores all the tuples in tree into file - */ -void -dumptuples(FILE *file) -{ - register struct leftist *tp; - register struct leftist *newp; - HeapTuple tup; - - tp = Tuples; - while (tp != NULL) { - tup = tp->lt_tuple; - if (tp->lt_dist == 1) /* lt_right == NULL */ - newp = tp->lt_left; - else - newp = lmerge(tp->lt_left, tp->lt_right); - FREEMEM(sizeof (struct leftist)); - FREE(tp); - PUTTUP(tup, file); - FREEMEM(tup->t_len); - FREE(tup); - tp = newp; - } - Tuples = NULL; -} - -/* * tuplecmp - Compares two tuples with respect CmpList * * Returns: @@ -225,7 +179,7 @@ dumptuples(FILE *file) * Assumtions: */ int -tuplecmp(HeapTuple ltup, HeapTuple rtup) +tuplecmp(HeapTuple ltup, HeapTuple rtup, LeftistContext context) { register char *lattr, *rattr; int nkey = 0; @@ -238,24 +192,27 @@ tuplecmp(HeapTuple ltup, HeapTuple rtup) return(0); if (rtup == (HeapTuple)NULL) return(1); - while (nkey < Nkeys && !result) { + while (nkey < context->nKeys && !result) { lattr = heap_getattr(ltup, InvalidBuffer, - Key[nkey].sk_attno, - RelationGetTupleDescriptor(SortRdesc), - &isnull); + context->scanKeys[nkey].sk_attno, + context->tupDesc, &isnull); if (isnull) return(0); rattr = heap_getattr(rtup, InvalidBuffer, - Key[nkey].sk_attno, - RelationGetTupleDescriptor(SortRdesc), + context->scanKeys[nkey].sk_attno, + context->tupDesc, &isnull); if (isnull) return(1); - if (Key[nkey].sk_flags & SK_COMMUTE) { - if (!(result = (long) (*Key[nkey].sk_func) (rattr, lattr))) - result = -(long) (*Key[nkey].sk_func) (lattr, rattr); - } else if (!(result = (long) (*Key[nkey].sk_func) (lattr, rattr))) - result = -(long) (*Key[nkey].sk_func) (rattr, lattr); + if (context->scanKeys[nkey].sk_flags & SK_COMMUTE) { + if (!(result = + (long) (*context->scanKeys[nkey].sk_func) (rattr, lattr))) + result = + -(long) (*context->scanKeys[nkey].sk_func) (lattr, rattr); + } else if (!(result = + (long) (*context->scanKeys[nkey].sk_func) (lattr, rattr))) + result = + -(long) (*context->scanKeys[nkey].sk_func) (rattr, lattr); nkey++; } return (result == 1); @@ -263,7 +220,7 @@ tuplecmp(HeapTuple ltup, HeapTuple rtup) #ifdef EBUG void -checktree(struct leftist *tree) +checktree(struct leftist *tree, LeftistContext context) { int lnodes; int rnodes; @@ -272,8 +229,8 @@ checktree(struct leftist *tree) puts("Null tree."); return; } - lnodes = checktreer(tree->lt_left, 1); - rnodes = checktreer(tree->lt_right, 1); + lnodes = checktreer(tree->lt_left, 1, context); + rnodes = checktreer(tree->lt_right, 1, context); if (lnodes < 0) { lnodes = -lnodes; puts("0:\tBad left side."); @@ -297,24 +254,24 @@ checktree(struct leftist *tree) } else if (tree->lt_dist != 1+ tree->lt_right->lt_dist) puts("0:\tDistance incorrect."); if (lnodes > 0) - if (tuplecmp(tree->lt_left->lt_tuple, tree->lt_tuple)) + if (tuplecmp(tree->lt_left->lt_tuple, tree->lt_tuple, context)) printf("%d:\tLeft child < parent.\n"); if (rnodes > 0) - if (tuplecmp(tree->lt_right->lt_tuple, tree->lt_tuple)) + if (tuplecmp(tree->lt_right->lt_tuple, tree->lt_tuple, context)) printf("%d:\tRight child < parent.\n"); printf("Tree has %d nodes\n", 1 + lnodes + rnodes); } int -checktreer(struct leftist *tree, int level) +checktreer(struct leftist *tree, int level, LeftistContext context) { int lnodes, rnodes; int error = 0; if (tree == NULL) return(0); - lnodes = checktreer(tree->lt_left, level + 1); - rnodes = checktreer(tree->lt_right, level + 1); + lnodes = checktreer(tree->lt_left, level + 1, context); + rnodes = checktreer(tree->lt_right, level + 1, context); if (lnodes < 0) { error = 1; lnodes = -lnodes; @@ -349,12 +306,12 @@ checktreer(struct leftist *tree, int level) printf("%d:\tDistance incorrect.\n", level); } if (lnodes > 0) - if (tuplecmp(tree->lt_left->lt_tuple, tree->lt_tuple)) { + if (tuplecmp(tree->lt_left->lt_tuple, tree->lt_tuple, context)) { error = 1; printf("%d:\tLeft child < parent.\n"); } if (rnodes > 0) - if (tuplecmp(tree->lt_right->lt_tuple, tree->lt_tuple)) { + if (tuplecmp(tree->lt_right->lt_tuple, tree->lt_tuple, context)) { error = 1; printf("%d:\tRight child < parent.\n"); } diff --git a/src/backend/utils/sort/psort.c b/src/backend/utils/sort/psort.c index 7005666ce3..5821905669 100644 --- a/src/backend/utils/sort/psort.c +++ b/src/backend/utils/sort/psort.c @@ -7,11 +7,25 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/utils/sort/Attic/psort.c,v 1.5 1997/07/24 20:18:07 momjian Exp $ + * $Header: /cvsroot/pgsql/src/backend/utils/sort/Attic/psort.c,v 1.6 1997/08/06 03:41:55 momjian Exp $ * * NOTES - * Sorts the first relation into the second relation. The sort may - * not be called twice simultaneously. + * Sorts the first relation into the second relation. + * + * The old psort.c's routines formed a temporary relation from the merged + * sort files. This version keeps the files around instead of generating the + * relation from them, and provides interface functions to the file so that + * you can grab tuples, mark a position in the file, restore a position in the + * file. You must now explicitly call an interface function to end the sort, + * psort_end, when you are done. + * Now most of the global variables are stuck in the Sort nodes, and + * accessed from there (they are passed to all the psort routines) so that + * each sort running has its own separate state. This is facilitated by having + * the Sort nodes passed in to all the interface functions. + * The one global variable that all the sorts still share is SortMemory. + * You should now be allowed to run two or more psorts concurrently, + * so long as the memory they eat up is not greater than SORTMEM, the initial + * value of SortMemory. -Rex 2.15.1995 * * Use the tape-splitting method (Knuth, Vol. III, pp281-86) in the future. * @@ -21,7 +35,6 @@ */ #include #include -#include #include "postgres.h" @@ -35,120 +48,150 @@ #include "storage/buf.h" #include "storage/bufmgr.h" /* for BLCKSZ */ #include "utils/portal.h" /* for {Start,End}PortalAllocMode */ +#include "utils/elog.h" #include "utils/rel.h" -#include "utils/psort.h" +#include "nodes/execnodes.h" +#include "nodes/plannodes.h" +#include "executor/executor.h" + #include "utils/lselect.h" +#include "utils/psort.h" #include "storage/fd.h" -#ifndef HAVE_MEMMOVE -# include -#else -# include -#endif - #define TEMPDIR "./" -int Nkeys; -ScanKey Key; -int SortMemory; - -static int TapeRange; /* number of tapes - 1 (T) */ -static int Level; /* (l) */ -static int TotalDummy; /* summation of tp_dummy */ -static struct tape Tape[MAXTAPES]; -static long shortzero = 0; /* used to delimit runs */ -static struct tuple *LastTuple = NULL; /* last output */ +extern int SortMem; /* defined as postgres option */ +static long shortzero = 0; /* used to delimit runs */ -static int BytesRead; /* to keep track of # of IO */ -static int BytesWritten; +/* + * old psort global variables + * + * (These are the global variables from the old psort. They are still used, + * but are now accessed from Sort nodes using the PS macro. Note that while + * these variables will be accessed by PS(node)->whatever, they will still + * be called by their original names within the comments! -Rex 2.10.1995) + * + * LeftistContextData treeContext; + * + * static int TapeRange; // number of tapes - 1 (T) // + * static int Level; // (l) // + * static int TotalDummy; // summation of tp_dummy // + * static struct tape *Tape; + * + * static int BytesRead; // to keep track of # of IO // + * static int BytesWritten; + * + * struct leftist *Tuples; // current tuples in memory // + * + * FILE *psort_grab_file; // this holds tuples grabbed + * from merged sort runs // + * long psort_current; // current file position // + * long psort_saved; // file position saved for + * mark and restore // + */ -Relation SortRdesc; /* current tuples in memory */ -struct leftist *Tuples; /* current tuples in memory */ +/* + * PS - Macro to access and cast psortstate from a Sort node + */ +#define PS(N) ((Psortstate *)N->psortstate) /* - * psort - polyphase merge sort entry point + * psort_begin - polyphase merge sort entry point. Sorts the subplan + * into a temporary file psort_grab_file. After + * this is called, calling the interface function + * psort_grabtuple iteratively will get you the sorted + * tuples. psort_end then finishes the sort off, after + * all the tuples have been grabbed. + * + * Allocates and initializes sort node's psort state. */ -void -psort(Relation oldrel, Relation newrel, int nkeys, ScanKey key) +bool +psort_begin(Sort *node, int nkeys, ScanKey key) { + bool empty; /* to answer: is child node empty? */ + + node->psortstate = (struct Psortstate *)palloc(sizeof(struct Psortstate)); + if (node->psortstate == NULL) + return false; + AssertArg(nkeys >= 1); AssertArg(key[0].sk_attno != 0); AssertArg(key[0].sk_procedure != 0); - Nkeys = nkeys; - Key = key; - SortMemory = 0; - SortRdesc = oldrel; - BytesRead = 0; - BytesWritten = 0; - /* - * may not be the best place. - * - * Pass 0 for the "limit" as the argument is currently ignored. - * Previously, only one arg was passed. -mer 12 Nov. 1991 - */ - StartPortalAllocMode(StaticAllocMode, (Size)0); - initpsort(); - initialrun(oldrel); - /* call finalrun(newrel, mergerun()) instead */ - endpsort(newrel, mergeruns()); - EndPortalAllocMode(); - NDirectFileRead += (int)ceil((double)BytesRead / BLCKSZ); - NDirectFileWrite += (int)ceil((double)BytesWritten / BLCKSZ); -} + PS(node)->BytesRead = 0; + PS(node)->BytesWritten = 0; + PS(node)->treeContext.tupDesc = + ExecGetTupType(outerPlan((Plan *)node)); + PS(node)->treeContext.nKeys = nkeys; + PS(node)->treeContext.scanKeys = key; + PS(node)->treeContext.sortMem = SortMem; -/* - * TAPENO - number of tape in Tape - */ + PS(node)->Tuples = NULL; + PS(node)->tupcount = 0; + + PS(node)->using_tape_files = false; + PS(node)->memtuples = NULL; + + initialrun(node, &empty); + + if (empty) + return false; + + if (PS(node)->using_tape_files) + PS(node)->psort_grab_file = mergeruns(node); -#define TAPENO(NODE) (NODE - Tape) -#define TUPLENO(TUP) ((TUP == NULL) ? -1 : (int) TUP->t_iid) + PS(node)->psort_current = 0; + PS(node)->psort_saved = 0; + + return true; +} /* - * initpsort - initializes the tapes + * inittapes - initializes the tapes * - (polyphase merge Alg.D(D1)--Knuth, Vol.3, p.270) * Returns: * number of allocated tapes */ void -initpsort() +inittapes(Sort *node) { register int i; register struct tape *tp; + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + /* ASSERT(ntapes >= 3 && ntapes <= MAXTAPES, - "initpsort: Invalid number of tapes to initialize.\n"); + "inittapes: Invalid number of tapes to initialize.\n"); */ - tp = Tape; + tp = PS(node)->Tape; for (i = 0; i < MAXTAPES && (tp->tp_file = gettape()) != NULL; i++) { tp->tp_dummy = 1; tp->tp_fib = 1; tp->tp_prev = tp - 1; tp++; } - TapeRange = --tp - Tape; + PS(node)->TapeRange = --tp - PS(node)->Tape; tp->tp_dummy = 0; tp->tp_fib = 0; - Tape[0].tp_prev = tp; - - if (TapeRange <= 1) - elog(WARN, "initpsort: Could only allocate %d < 3 tapes\n", - TapeRange + 1); + PS(node)->Tape[0].tp_prev = tp; - Level = 1; - TotalDummy = TapeRange; + if (PS(node)->TapeRange <= 1) + elog(WARN, "inittapes: Could only allocate %d < 3 tapes\n", + PS(node)->TapeRange + 1); - SortMemory = SORTMEM; - LastTuple = NULL; - Tuples = NULL; + PS(node)->Level = 1; + PS(node)->TotalDummy = PS(node)->TapeRange; + + PS(node)->using_tape_files = true; } /* - * resetpsort - resets (frees) malloc'd memory for an aborted Xaction + * resetpsort - resets (pfrees) palloc'd memory for an aborted Xaction * * Not implemented yet. */ @@ -170,16 +213,18 @@ resetpsort() * LEN field must be a short; FP is a stream */ -#define PUTTUP(TUP, FP)\ - BytesWritten += (TUP)->t_len; \ - fwrite((char *)TUP, (TUP)->t_len, 1, FP) + +#define PUTTUP(NODE, TUP, FP) if (1) {\ + ((Psortstate *)NODE->psortstate)->BytesWritten += (TUP)->t_len; \ + fwrite((char *)TUP, (TUP)->t_len, 1, FP);} else #define ENDRUN(FP) fwrite((char *)&shortzero, sizeof (shortzero), 1, FP) #define GETLEN(LEN, FP) fread((char *)&(LEN), sizeof (shortzero), 1, FP) #define ALLOCTUP(LEN) ((HeapTuple)palloc((unsigned)LEN)) -#define GETTUP(TUP, LEN, FP)\ +#define GETTUP(NODE, TUP, LEN, FP) if (1) {\ IncrProcessed(); \ - BytesRead += (LEN) - sizeof (shortzero); \ - fread((char *)(TUP) + sizeof (shortzero), (LEN) - sizeof (shortzero), 1, FP) + ((Psortstate *)NODE->psortstate)->BytesRead += (LEN) - sizeof (shortzero); \ + fread((char *)(TUP) + sizeof (shortzero), (LEN) - sizeof (shortzero), 1, FP);} \ + else #define SETTUPLEN(TUP, LEN) (TUP)->t_len = LEN /* @@ -188,9 +233,9 @@ resetpsort() * FULLMEM - 1 iff a tuple will fit */ -#define USEMEM(AMT) SortMemory -= (AMT) -#define FREEMEM(AMT) SortMemory += (AMT) -#define LACKMEM() (SortMemory <= BLCKSZ) /* not accurate */ +#define USEMEM(NODE,AMT) PS(node)->treeContext.sortMem -= (AMT) +#define FREEMEM(NODE,AMT) PS(node)->treeContext.sortMem += (AMT) +#define LACKMEM(NODE) (PS(node)->treeContext.sortMem <= MAXBLCKSZ) /* not accurate */ #define TRACEMEM(FUNC) #define TRACEOUT(FUNC, TUP) @@ -219,61 +264,66 @@ resetpsort() * Also, perhaps allocate tapes when needed. Split into 2 funcs. */ void -initialrun(Relation rdesc) +initialrun(Sort *node, bool *empty) { /* register struct tuple *tup; */ register struct tape *tp; - HeapScanDesc sdesc; int baseruns; /* D:(a) */ - int morepasses; /* EOF */ - - sdesc = heap_beginscan(rdesc, 0, NowTimeQual, 0, - (ScanKey)NULL); - tp = Tape; + int extrapasses; /* EOF */ + + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + + tp = PS(node)->Tape; - if ((bool)createrun(sdesc, tp->tp_file) != false) - morepasses = 0; + if ((bool)createrun(node, tp->tp_file, empty) != false) { + if (! PS(node)->using_tape_files) + inittapes(node); + extrapasses = 0; + } else - morepasses = 1 + (Tuples != NULL); /* (T != N) ? 2 : 1 */ + return; /* if rows fit in memory, we never access tape stuff */ for ( ; ; ) { tp->tp_dummy--; - TotalDummy--; + PS(node)->TotalDummy--; if (tp->tp_dummy < (tp + 1)->tp_dummy) tp++; else if (tp->tp_dummy != 0) - tp = Tape; + tp = PS(node)->Tape; else { - Level++; - baseruns = Tape[0].tp_fib; - for (tp = Tape; tp - Tape < TapeRange; tp++) { - TotalDummy += + PS(node)->Level++; + baseruns = PS(node)->Tape[0].tp_fib; + for (tp = PS(node)->Tape; + tp - PS(node)->Tape < PS(node)->TapeRange; tp++) { + PS(node)->TotalDummy += (tp->tp_dummy = baseruns + (tp + 1)->tp_fib - tp->tp_fib); tp->tp_fib = baseruns + (tp + 1)->tp_fib; } - tp = Tape; /* D4 */ + tp = PS(node)->Tape; /* D4 */ } /* D3 */ - if (morepasses) - if (--morepasses) { - dumptuples(tp->tp_file); + if (extrapasses) + if (--extrapasses) { + dumptuples(node); ENDRUN(tp->tp_file); continue; } else break; - if ((bool)createrun(sdesc, tp->tp_file) == false) - morepasses = 1 + (Tuples != NULL); + + if ((bool)createrun(node, tp->tp_file, empty) == false) + extrapasses = 1 + (PS(node)->Tuples != NULL); /* D2 */ } - for (tp = Tape + TapeRange; tp >= Tape; tp--) - rewind(tp->tp_file); /* D. */ - heap_endscan(sdesc); + for (tp = PS(node)->Tape + PS(node)->TapeRange; tp >= PS(node)->Tape; tp--) + rewind(tp->tp_file); /* D. */ } /* - * createrun - places the next run on file + * createrun - places the next run on file, grabbing the tuples by + * executing the subplan passed in * * Uses: * Tuples, which should contain any tuples for this run @@ -283,7 +333,7 @@ initialrun(Relation rdesc) * Tuples contains the tuples for the following run upon exit */ bool -createrun(HeapScanDesc sdesc, FILE *file) +createrun(Sort *node, FILE *file, bool *empty) { register HeapTuple lasttuple; register HeapTuple btup, tup; @@ -291,47 +341,74 @@ createrun(HeapScanDesc sdesc, FILE *file) Buffer b; bool foundeor; short junk; - + + int cr_tuples = 0; /* Count tuples grabbed from plannode */ + TupleTableSlot *cr_slot; + + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + lasttuple = NULL; nextrun = NULL; foundeor = false; for ( ; ; ) { - while (LACKMEM() && Tuples != NULL) { + while (LACKMEM(node) && PS(node)->Tuples != NULL) { if (lasttuple != NULL) { - FREEMEM(lasttuple->t_len); + FREEMEM(node,lasttuple->t_len); FREE(lasttuple); TRACEMEM(createrun); } - lasttuple = tup = gettuple(&Tuples, &junk); - PUTTUP(tup, file); + lasttuple = tup = gettuple(&PS(node)->Tuples, &junk, + &PS(node)->treeContext); + if (! PS(node)->using_tape_files) + inittapes(node); + PUTTUP(node, tup, PS(node)->Tape->tp_file); TRACEOUT(createrun, tup); } - if (LACKMEM()) + if (LACKMEM(node)) break; - btup = heap_getnext(sdesc, 0, &b); - if (!HeapTupleIsValid(btup)) { + + /* About to call ExecProcNode, it can mess up the state if it + * eventually calls another Sort node. So must stow it away here for + * the meantime. -Rex 2.2.1995 + */ + + cr_slot = ExecProcNode(outerPlan((Plan *)node), (Plan *)node); + + if (TupIsNull(cr_slot)) { foundeor = true; break; } + else { + tup = tuplecopy(cr_slot->val); + ExecClearTuple(cr_slot); + PS(node)->tupcount++; + cr_tuples++; + } + IncrProcessed(); - tup = tuplecopy(btup, sdesc->rs_rd, b); - USEMEM(tup->t_len); + USEMEM(node,tup->t_len); TRACEMEM(createrun); - if (lasttuple != NULL && tuplecmp(tup, lasttuple)) - puttuple(&nextrun, tup, 0); + if (lasttuple != NULL && tuplecmp(tup, lasttuple, + &PS(node)->treeContext)) + puttuple(&nextrun, tup, 0, &PS(node)->treeContext); else - puttuple(&Tuples, tup, 0); - ReleaseBuffer(b); + puttuple(&PS(node)->Tuples, tup, 0, &PS(node)->treeContext); } if (lasttuple != NULL) { - FREEMEM(lasttuple->t_len); + FREEMEM(node,lasttuple->t_len); FREE(lasttuple); TRACEMEM(createrun); } - dumptuples(file); - ENDRUN(file); + dumptuples(node); + if (PS(node)->using_tape_files) + ENDRUN(file); /* delimit the end of the run */ - Tuples = nextrun; + PS(node)->Tuples = nextrun; + + /* if we did not see any tuples, mark empty */ + *empty = (cr_tuples > 0) ? false : true; + return((bool)! foundeor); /* XXX - works iff bool is {0,1} */ } @@ -339,10 +416,10 @@ createrun(HeapScanDesc sdesc, FILE *file) * tuplecopy - see also tuple.c:palloctup() * * This should eventually go there under that name? And this will - * then use malloc directly (see version -r1.2). + * then use palloc directly (see version -r1.2). */ HeapTuple -tuplecopy(HeapTuple tup, Relation rdesc, Buffer b) +tuplecopy(HeapTuple tup) { HeapTuple rettup; @@ -362,18 +439,22 @@ tuplecopy(HeapTuple tup, Relation rdesc, Buffer b) * file of tuples in order */ FILE * -mergeruns() +mergeruns(Sort *node) { - register struct tape *tp; - - tp = Tape + TapeRange; - merge(tp); + register struct tape *tp; + + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + Assert(PS(node)->using_tape_files == true); + + tp = PS(node)->Tape + PS(node)->TapeRange; + merge(node, tp); rewind(tp->tp_file); - while (--Level != 0) { + while (--PS(node)->Level != 0) { tp = tp->tp_prev; rewind(tp->tp_file); - /* resettape(tp->tp_file); -not sufficient */ - merge(tp); + /* resettape(tp->tp_file); -not sufficient */ + merge(node, tp); rewind(tp->tp_file); } return(tp->tp_file); @@ -384,7 +465,7 @@ mergeruns() * (polyphase merge Alg.D(D5)--Knuth, Vol.3, p271) */ void -merge(struct tape *dest) +merge(Sort *node, struct tape *dest) { register HeapTuple tup; register struct tape *lasttp; /* (TAPE[P]) */ @@ -396,6 +477,10 @@ merge(struct tape *dest) short fromtape; long tuplen; + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + Assert(PS(node)->using_tape_files == true); + lasttp = dest->tp_prev; times = lasttp->tp_fib; for (tp = lasttp ; tp != dest; tp = tp->tp_prev) @@ -403,95 +488,217 @@ merge(struct tape *dest) tp->tp_fib += times; /* Tape[].tp_fib (A[]) is set to proper exit values */ - if (TotalDummy < TapeRange) /* no complete dummy runs */ + if (PS(node)->TotalDummy < PS(node)->TapeRange)/* no complete dummy runs */ outdummy = 0; else { - outdummy = TotalDummy; /* a large positive number */ + outdummy = PS(node)->TotalDummy; /* a large positive number */ for (tp = lasttp; tp != dest; tp = tp->tp_prev) if (outdummy > tp->tp_dummy) outdummy = tp->tp_dummy; for (tp = lasttp; tp != dest; tp = tp->tp_prev) tp->tp_dummy -= outdummy; tp->tp_dummy += outdummy; - TotalDummy -= outdummy * TapeRange; + PS(node)->TotalDummy -= outdummy * PS(node)->TapeRange; /* do not add the outdummy runs yet */ times -= outdummy; } destfile = dest->tp_file; - while (times-- != 0) { /* merge one run */ + while (times-- != 0) { /* merge one run */ tuples = NULL; - if (TotalDummy == 0) + if (PS(node)->TotalDummy == 0) for (tp = dest->tp_prev; tp != dest; tp = tp->tp_prev) { GETLEN(tuplen, tp->tp_file); tup = ALLOCTUP(tuplen); - USEMEM(tuplen); + USEMEM(node,tuplen); TRACEMEM(merge); SETTUPLEN(tup, tuplen); - GETTUP(tup, tuplen, tp->tp_file); - puttuple(&tuples, tup, TAPENO(tp)); + GETTUP(node, tup, tuplen, tp->tp_file); + puttuple(&tuples, tup, tp - PS(node)->Tape, + &PS(node)->treeContext); } else { for (tp = dest->tp_prev; tp != dest; tp = tp->tp_prev) { if (tp->tp_dummy != 0) { tp->tp_dummy--; - TotalDummy--; + PS(node)->TotalDummy--; } else { GETLEN(tuplen, tp->tp_file); tup = ALLOCTUP(tuplen); - USEMEM(tuplen); + USEMEM(node,tuplen); TRACEMEM(merge); SETTUPLEN(tup, tuplen); - GETTUP(tup, tuplen, tp->tp_file); - puttuple(&tuples, tup, TAPENO(tp)); + GETTUP(node, tup, tuplen, tp->tp_file); + puttuple(&tuples, tup, tp - PS(node)->Tape, + &PS(node)->treeContext); } } } while (tuples != NULL) { /* possible optimization by using count in tuples */ - tup = gettuple(&tuples, &fromtape); - PUTTUP(tup, destfile); - FREEMEM(tup->t_len); + tup = gettuple(&tuples, &fromtape, &PS(node)->treeContext); + PUTTUP(node, tup, destfile); + FREEMEM(node,tup->t_len); FREE(tup); TRACEMEM(merge); - GETLEN(tuplen, Tape[fromtape].tp_file); + GETLEN(tuplen, PS(node)->Tape[fromtape].tp_file); if (tuplen == 0) ; else { tup = ALLOCTUP(tuplen); - USEMEM(tuplen); + USEMEM(node,tuplen); TRACEMEM(merge); SETTUPLEN(tup, tuplen); - GETTUP(tup, tuplen, Tape[fromtape].tp_file); - puttuple(&tuples, tup, fromtape); + GETTUP(node, tup, tuplen, PS(node)->Tape[fromtape].tp_file); + puttuple(&tuples, tup, fromtape, &PS(node)->treeContext); } - } + } ENDRUN(destfile); } - TotalDummy += outdummy; + PS(node)->TotalDummy += outdummy; } /* - * endpsort - creates the new relation and unlinks the tape files + * dumptuples - stores all the tuples in tree into file */ void -endpsort(Relation rdesc, FILE *file) +dumptuples(Sort *node) { - register struct tape *tp; - register HeapTuple tup; - long tuplen; + register struct leftist *tp; + register struct leftist *newp; + struct leftist **treep = &PS(node)->Tuples; + LeftistContext context = &PS(node)->treeContext; + HeapTuple tup; + int memtupindex = 0; + + if (! PS(node)->using_tape_files) { + Assert(PS(node)->memtuples == NULL); + PS(node)->memtuples = palloc(PS(node)->tupcount * sizeof(HeapTuple)); + } - if (! feof(file)) - while (GETLEN(tuplen, file) && tuplen != 0) { - tup = ALLOCTUP(tuplen); - SortMemory += tuplen; - SETTUPLEN(tup, tuplen); - GETTUP(tup, tuplen, file); - heap_insert(rdesc, tup); + tp = *treep; + while (tp != NULL) { + tup = tp->lt_tuple; + if (tp->lt_dist == 1) /* lt_right == NULL */ + newp = tp->lt_left; + else + newp = lmerge(tp->lt_left, tp->lt_right, context); + FREEMEM(node,sizeof (struct leftist)); + FREE(tp); + if (PS(node)->using_tape_files) { + PUTTUP(node, tup, PS(node)->Tape->tp_file); + FREEMEM(node,tup->t_len); FREE(tup); - SortMemory -= tuplen; } - for (tp = Tape + TapeRange; tp >= Tape; tp--) - destroytape(tp->tp_file); + else + PS(node)->memtuples[memtupindex++] = tup; + + tp = newp; + } + *treep = NULL; +} + +/* + * psort_grabtuple - gets a tuple from the sorted file and returns it. + * If there are no tuples left, returns NULL. + * Should not call psort_end unless this has returned + * a NULL indicating the last tuple has been processed. + */ +HeapTuple +psort_grabtuple(Sort *node) +{ + register HeapTuple tup; + long tuplen; + + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + + if (PS(node)->using_tape_files == true) { + if (!feof(PS(node)->psort_grab_file)) { + if (GETLEN(tuplen, PS(node)->psort_grab_file) && tuplen != 0) { + tup = (HeapTuple)palloc((unsigned)tuplen); + SETTUPLEN(tup, tuplen); + GETTUP(node, tup, tuplen, PS(node)->psort_grab_file); + + /* Update current merged sort file position */ + PS(node)->psort_current += tuplen; + + return tup; + } + else + return NULL; + } + else + return NULL; + } + else { + if (PS(node)->psort_current < PS(node)->tupcount) + return PS(node)->memtuples[PS(node)->psort_current++]; + else + return NULL; + } +} + +/* + * psort_markpos - saves current position in the merged sort file + */ +void +psort_markpos(Sort *node) +{ + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + + PS(node)->psort_saved = PS(node)->psort_current; +} + +/* + * psort_restorepos- restores current position in merged sort file to + * last saved position + */ +void +psort_restorepos(Sort *node) +{ + Assert(node != (Sort *) NULL); + Assert(PS(node) != (Psortstate *) NULL); + + if (PS(node)->using_tape_files == true) + fseek(PS(node)->psort_grab_file, PS(node)->psort_saved, SEEK_SET); + PS(node)->psort_current = PS(node)->psort_saved; +} + +/* + * psort_end - unlinks the tape files, and cleans up. Should not be + * called unless psort_grabtuple has returned a NULL. + */ +void +psort_end(Sort *node) +{ + register struct tape *tp; + + if (!node->cleaned) { + Assert(node != (Sort *) NULL); +/* Assert(PS(node) != (Psortstate *) NULL); */ + + /* + * I'm changing this because if we are sorting a relation + * with no tuples, psortstate is NULL. + */ + if (PS(node) != (Psortstate *) NULL) { + if (PS(node)->using_tape_files == true) + for (tp = PS(node)->Tape + PS(node)->TapeRange; tp >= PS(node)->Tape; tp--) + destroytape(tp->tp_file); + else if (PS(node)->memtuples) + pfree(PS(node)->memtuples); + + NDirectFileRead += + (int)ceil((double)PS(node)->BytesRead / BLCKSZ); + NDirectFileWrite += + (int)ceil((double)PS(node)->BytesWritten / BLCKSZ); + + pfree((void *)node->psortstate); + + node->cleaned = TRUE; + } + } } /* @@ -522,26 +729,34 @@ static char Tempfile[MAXPGPATH] = TEMPDIR; FILE * gettape() { - register struct tapelst *tp; - FILE *file; - static int tapeinit = 0; + register struct tapelst *tp; + FILE *file; + static int tapeinit = 0; + char *mktemp(); + static unsigned int uniqueFileId = 0; + extern int errno; + char uniqueName[MAXPGPATH]; tp = (struct tapelst *)palloc((unsigned)sizeof (struct tapelst)); - if (!tapeinit) { - Tempfile[sizeof (TEMPDIR) - 1] = '/'; - memmove(Tempfile + sizeof(TEMPDIR), TAPEEXT, sizeof (TAPEEXT)); - tapeinit = 1; - } - tp->tl_name = palloc((unsigned)sizeof(Tempfile)); + + sprintf(uniqueName, "%spg_psort.%d.%d", TEMPDIR, getpid(), uniqueFileId); + uniqueFileId++; + + tapeinit = 1; + + tp->tl_name = palloc((unsigned)sizeof(uniqueName)); + /* - * now, copy template with final null into malloc'd space + * now, copy template with final null into palloc'd space */ - memmove(tp->tl_name, Tempfile, sizeof (TEMPDIR) + sizeof (TAPEEXT)); - mktemp(tp->tl_name); + + memmove(tp->tl_name, uniqueName, strlen(uniqueName)); + AllocateFile(); file = fopen(tp->tl_name, "w+"); if (file == NULL) { + elog(NOTICE, "psort: gettape: fopen returned error code %i", errno); /* XXX this should not happen */ FreeFile(); FREE(tp->tl_name); diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index 3e01132faa..ff01ff5a62 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: execnodes.h,v 1.6 1996/11/04 08:52:54 scrappy Exp $ + * $Id: execnodes.h,v 1.7 1997/08/06 03:42:02 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -605,7 +605,7 @@ typedef struct SortState { CommonScanState csstate; /* its first field is NodeTag */ bool sort_Flag; ScanKey sort_Keys; - Relation sort_TempRelation; + bool cleaned; } SortState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b02a370fac..6e786c34f0 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: plannodes.h,v 1.5 1996/11/05 08:18:44 scrappy Exp $ + * $Id: plannodes.h,v 1.6 1997/08/06 03:42:04 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -265,6 +265,8 @@ typedef struct Sort { Oid tempid; int keycount; SortState *sortstate; + void *psortstate; + bool cleaned; } Sort; /* ---------------- diff --git a/src/include/utils/lselect.h b/src/include/utils/lselect.h index 7cb5f8d185..048ea932e2 100644 --- a/src/include/utils/lselect.h +++ b/src/include/utils/lselect.h @@ -6,15 +6,15 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: lselect.h,v 1.3 1996/11/04 11:51:19 scrappy Exp $ - * + * $Id: lselect.h,v 1.4 1997/08/06 03:42:07 momjian Exp $ + * *------------------------------------------------------------------------- */ #ifndef LSELECT_H #define LSELECT_H #include -#include +#include "access/htup.h" struct leftist { short lt_dist; /* distance to leaf/empty node */ @@ -24,17 +24,26 @@ struct leftist { struct leftist *lt_right; }; -extern struct leftist *Tuples; +/* replaces global variables in lselect.c to make it reentrant */ +typedef struct { + TupleDesc tupDesc; + int nKeys; + ScanKey scanKeys; + int sortMem; /* needed for psort */ +} LeftistContextData; +typedef LeftistContextData *LeftistContext; -extern struct leftist *lmerge(struct leftist *pt, struct leftist *qt); -extern HeapTuple gettuple(struct leftist **treep, short *devnum); -extern int puttuple(struct leftist **treep, HeapTuple newtuple, int devnum); -extern void dumptuples(FILE *file); -extern int tuplecmp(HeapTuple ltup, HeapTuple rtup); +extern struct leftist *lmerge(struct leftist *pt, struct leftist *qt, + LeftistContext context); +extern HeapTuple gettuple(struct leftist **treep, short *devnum, + LeftistContext context); +extern void puttuple(struct leftist **treep, HeapTuple newtuple, short devnum, + LeftistContext context); +extern int tuplecmp(HeapTuple ltup, HeapTuple rtup, LeftistContext context); #ifdef EBUG -extern void checktree(struct leftist *tree); -extern int checktreer(struct leftist *tree, int level); +extern void checktree(struct leftist *tree, LeftistContext context); +extern int checktreer(struct leftist *tree, int level, LeftistContext context); #endif /* EBUG */ #endif /* LSELECT_H */ diff --git a/src/include/utils/psort.h b/src/include/utils/psort.h index 169c4bdc70..7ece609020 100644 --- a/src/include/utils/psort.h +++ b/src/include/utils/psort.h @@ -6,7 +6,7 @@ * * Copyright (c) 1994, Regents of the University of California * - * $Id: psort.h,v 1.3 1997/05/20 11:37:33 vadim Exp $ + * $Id: psort.h,v 1.4 1997/08/06 03:42:13 momjian Exp $ * *------------------------------------------------------------------------- */ @@ -14,11 +14,13 @@ #define PSORT_H #include -#include +#include "access/relscan.h" +#include "utils/lselect.h" +#include "nodes/plannodes.h" #define SORTMEM (1 << 18) /* 1/4 M - any static memory */ #define MAXTAPES 7 /* 7--See Fig. 70, p273 */ -#define TAPEEXT "pg_psort.XXXXXX" /* TEMPDIR/TAPEEXT */ +#define TAPEEXTLEN strlen("pg_psort.xxxxx.xxx") /* TEMPDIR/TAPEEXT */ #define FREE(x) pfree((char *) x) struct tape { @@ -35,13 +37,38 @@ struct cmplist { struct cmplist *cp_next; /* next in chain */ }; -extern int Nkeys; -extern ScanKey key; -extern int SortMemory; /* free memory */ -extern Relation SortRdesc; -extern struct leftist *Tuples; +/* This structure preserves the state of psort between calls from different + * nodes to its interface functions. Basically, it includes all of the global + * variables in psort. In case you were wondering, pointers to these structures + * are included in Sort node structures. -Rex 2.6.1995 + */ +typedef struct Psortstate { + LeftistContextData treeContext; + + int TapeRange; + int Level; + int TotalDummy; + struct tape Tape[MAXTAPES]; + + int BytesRead; + int BytesWritten; + int tupcount; + + struct leftist *Tuples; + + FILE *psort_grab_file; + long psort_current; /* could be file offset, or array index */ + long psort_saved; /* could be file offset, or array index */ + bool using_tape_files; + + HeapTuple *memtuples; +} Psortstate; #ifdef EBUG +#include +#include "utils/elog.h" +#include "storage/buf.h" +#include "storage/bufmgr.h" #define PDEBUG(PROC, S1)\ elog(DEBUG, "%s:%d>> PROC: %s.", __FILE__, __LINE__, S1) @@ -69,15 +96,21 @@ if (1) CODE; else #endif /* psort.c */ -extern void psort(Relation oldrel, Relation newrel, int nkeys, ScanKey key); -extern void initpsort(void); +extern bool psort_begin(Sort *node, int nkeys, ScanKey key); +extern void inittapes(Sort *node); extern void resetpsort(void); -extern void initialrun(Relation rdesc); -extern bool createrun(HeapScanDesc sdesc, FILE *file); -extern HeapTuple tuplecopy(HeapTuple tup, Relation rdesc, Buffer b); -extern FILE *mergeruns(void); -extern void merge(struct tape *dest); -extern void endpsort(Relation rdesc, FILE *file); +extern void initialrun(Sort *node, bool *empty); +extern bool createrun(Sort *node, FILE *file, bool *empty); +extern HeapTuple tuplecopy(HeapTuple tup); +extern FILE *mergeruns(Sort *node); +extern void merge(Sort *node, struct tape *dest); + +extern void dumptuples(Sort *node); +extern HeapTuple psort_grabtuple(Sort *node); +extern void psort_markpos(Sort *node); +extern void psort_restorepos(Sort *node); +extern void psort_end(Sort *node); + extern FILE *gettape(void); extern void resettape(FILE *file); extern void destroytape(FILE *file); diff --git a/src/man/postgres.1 b/src/man/postgres.1 index 940e1822f9..287e734793 100644 --- a/src/man/postgres.1 +++ b/src/man/postgres.1 @@ -1,6 +1,6 @@ .\" This is -*-nroff-*- .\" XXX standard disclaimer belongs here.... -.\" $Header: /cvsroot/pgsql/src/man/Attic/postgres.1,v 1.5 1997/01/26 15:32:20 scrappy Exp $ +.\" $Header: /cvsroot/pgsql/src/man/Attic/postgres.1,v 1.6 1997/08/06 03:42:18 momjian Exp $ .TH POSTGRES95 UNIX 12/08/96 Postgres95 Postgres95 .SH NAME postgres \(em the Postgres backend server @@ -79,7 +79,10 @@ is the number of shared-memory buffers that the .IR "postmaster" has allocated for the backend server processes that it starts. If the backend is running standalone, this specifies the number of buffers to -allocate. This value defaults to 64. +allocate. This value defaults to 64, and each buffer is 8k bytes. +.TP +.BR "-E" +Echo all queries. .TP .BR "-F" Disable automatic fsync() call after each transaction. @@ -89,15 +92,17 @@ while a transaction is in progress will probably cause data loss. .BR "-P" " filedes" .IR "filedes" specifies the file descriptor that corresponds to the socket (port) on -which to communicate to the frontend process. This option is +which to communicate to the frontend process. This option is .BR not useful for interactive use. .TP .BR "-Q" Specifies \*(lqquiet\*(rq mode. .TP -.BR "-E" -Echo all queries. +.BR "-S" +Specifies the amount of memory to be used by internal sorts before using +disk files for sorting. This value is specified in 1k bytes, and +defaults to 512. .TP .BR "-e" The @@ -154,15 +159,6 @@ Turns off the locking system. .BR "-N" Disables use of newline as a query delimiter. .TP -.BR "-S" -Indicates that the transaction system can run with the assumption of -stable main memory, thereby avoiding the necessary flushing of data -and log pages to disk at the end of each transaction system. This is -only used for performance comparisons for stable vs. non-stable -storage. Do not use this in other cases, as recovery after a system -crash may be impossible when this option is specified in the absence -of stable main memory. -.TP .BR "-b" Enables generation of bushy query plan trees (as opposed to left-deep query plans trees). These query plans are not intended for actual diff --git a/src/man/postmaster.1 b/src/man/postmaster.1 index 5b01b72702..41e7b73bfd 100644 --- a/src/man/postmaster.1 +++ b/src/man/postmaster.1 @@ -1,6 +1,6 @@ .\" This is -*-nroff-*- .\" XXX standard disclaimer belongs here.... -.\" $Header: /cvsroot/pgsql/src/man/Attic/postmaster.1,v 1.5 1997/02/19 01:31:30 momjian Exp $ +.\" $Header: /cvsroot/pgsql/src/man/Attic/postmaster.1,v 1.6 1997/08/06 03:42:21 momjian Exp $ .TH POSTMASTER UNIX 11/05/95 PostgreSQL PostgreSQL .SH "NAME" postmaster \(em run the Postgres postmaster @@ -60,7 +60,7 @@ understands the following command-line options: is the number of shared-memory buffers for the .IR "postmaster" to allocate and manage for the backend server processes that it -starts. This value defaults to 64. +starts. This value defaults to 64, and each buffer is 8k bytes. .TP .BR "-D" " data_dir" Specifies the directory to use as the root of the tree of database -- 2.11.0