From 99114a2473a57384246e2f61c79b97ccd376acc8 Mon Sep 17 00:00:00 2001 From: Neil Conway Date: Sun, 5 Mar 2006 21:34:34 +0000 Subject: [PATCH] Per recent discussion on -hackers, we should sometimes reorder the columns of the grouping clause to avoid redundant sorts. The optimizer is not currently capable of doing this, so this patch implements a simple hack in the analysis phase (transformGroupClause): if any subset of the GROUP BY clause matches a prefix of the ORDER BY list, that prefix is moved to the front of the GROUP BY clause. This shouldn't change the semantics of the query, and allows a redundant sort to be avoided for queries like "GROUP BY a, b ORDER BY b". --- src/backend/parser/parse_clause.c | 126 +++++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 41 deletions(-) diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c index 9fe07bb6fd..fdc2e41b3a 100644 --- a/src/backend/parser/parse_clause.c +++ b/src/backend/parser/parse_clause.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.146 2006/03/05 15:58:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.147 2006/03/05 21:34:34 neilc Exp $ * *------------------------------------------------------------------------- */ @@ -1296,6 +1296,16 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) return target_result; } +static GroupClause * +make_group_clause(TargetEntry *tle, List *targetlist, Oid sortop) +{ + GroupClause *result; + + result = makeNode(GroupClause); + result->tleSortGroupRef = assignSortGroupRef(tle, targetlist); + result->sortop = sortop; + return result; +} /* * transformGroupClause - @@ -1303,71 +1313,105 @@ findTargetlistEntry(ParseState *pstate, Node *node, List **tlist, int clause) * * GROUP BY items will be added to the targetlist (as resjunk columns) * if not already present, so the targetlist must be passed by reference. + * + * The order of the elements of the grouping clause does not affect + * the semantics of the query. However, the optimizer is not currently + * smart enough to reorder the grouping clause, so we try to do some + * primitive reordering here. */ List * transformGroupClause(ParseState *pstate, List *grouplist, List **targetlist, List *sortClause) { - List *glist = NIL; - ListCell *gl; - ListCell *sortItem; - - sortItem = list_head(sortClause); + List *result = NIL; + List *tle_list = NIL; + ListCell *l; - foreach(gl, grouplist) + /* Preprocess the grouping clause, lookup TLEs */ + foreach (l, grouplist) { TargetEntry *tle; - Oid restype; - Oid ordering_op; - GroupClause *grpcl; + Oid restype; - tle = findTargetlistEntry(pstate, lfirst(gl), + tle = findTargetlistEntry(pstate, lfirst(l), targetlist, GROUP_CLAUSE); - /* avoid making duplicate grouplist entries */ - if (targetIsInSortList(tle, glist)) - continue; - /* if tlist item is an UNKNOWN literal, change it to TEXT */ restype = exprType((Node *) tle->expr); if (restype == UNKNOWNOID) - { tle->expr = (Expr *) coerce_type(pstate, (Node *) tle->expr, restype, TEXTOID, -1, COERCION_IMPLICIT, COERCE_IMPLICIT_CAST); - restype = TEXTOID; - } - /* - * If the GROUP BY clause matches the ORDER BY clause, we want to - * adopt the ordering operators from the latter rather than using the - * default ops. This allows "GROUP BY foo ORDER BY foo DESC" to be - * done with only one sort step. Note we are assuming that any - * user-supplied ordering operator will bring equal values together, - * which is all that GROUP BY needs. - */ - if (sortItem && - ((SortClause *) lfirst(sortItem))->tleSortGroupRef == - tle->ressortgroupref) - { - ordering_op = ((SortClause *) lfirst(sortItem))->sortop; - sortItem = lnext(sortItem); - } - else + tle_list = lappend(tle_list, tle); + } + + /* + * Now iterate through the ORDER BY clause. If we find a grouping + * element that matches the ORDER BY element, append the grouping + * element to the result set immediately. Otherwise, stop + * iterating. The effect of this is to look for a prefix of the + * ORDER BY list in the grouping clauses, and to move that prefix + * to the front of the GROUP BY. + */ + foreach (l, sortClause) + { + SortClause *sc = (SortClause *) lfirst(l); + ListCell *prev = NULL; + ListCell *tl; + bool found = false; + + foreach (tl, tle_list) { - ordering_op = ordering_oper_opid(restype); - sortItem = NULL; /* disregard ORDER BY once match fails */ + TargetEntry *tle = (TargetEntry *) lfirst(tl); + + if (sc->tleSortGroupRef == tle->ressortgroupref) + { + GroupClause *gc; + + tle_list = list_delete_cell(tle_list, tl, prev); + + /* Use the sort clause's sorting operator */ + gc = make_group_clause(tle, *targetlist, sc->sortop); + result = lappend(result, gc); + found = true; + break; + } + + prev = tl; } - grpcl = makeNode(GroupClause); - grpcl->tleSortGroupRef = assignSortGroupRef(tle, *targetlist); - grpcl->sortop = ordering_op; - glist = lappend(glist, grpcl); + /* As soon as we've failed to match an ORDER BY element, stop */ + if (!found) + break; } - return glist; + /* + * Now add any remaining elements of the GROUP BY list in the + * order we received them. + * + * XXX: are there any additional criteria to consider when + * ordering grouping clauses? + */ + foreach(l, tle_list) + { + TargetEntry *tle = (TargetEntry *) lfirst(l); + GroupClause *gc; + Oid sort_op; + + /* avoid making duplicate grouplist entries */ + if (targetIsInSortList(tle, result)) + continue; + + sort_op = ordering_oper_opid(exprType((Node *) tle->expr)); + gc = make_group_clause(tle, *targetlist, sort_op); + result = lappend(result, gc); + } + + list_free(tle_list); + return result; } /* -- 2.11.0