From 751a14e60c309068557e68c8f322007e0522947a Mon Sep 17 00:00:00 2001
From: Tom Lane <tgl@sss.pgh.pa.us>
Date: Sat, 19 Feb 2000 23:45:07 +0000
Subject: [PATCH] Repair longstanding violation of SQL92 semantics: GROUP BY
 would interpret a column name as an output column alias (targetlist AS name),
 ather than a real column name as it ought to.  According to the spec, only
 ORDER BY should look at output column names.  I left in GROUP BY's
 willingness to use an output column number ('GROUP BY 2'), even though this
 is also contrary to the spec --- again, only ORDER BY is supposed to accept
 that.  But there is no possible reason to want to GROUP BY an integer
 constant, so keeping this old behavior won't break any SQL-compliant queries.
  DISTINCT ON will behave the same as GROUP BY.

Change numerology regress test, which depended on the incorrect
behavior.
---
 src/backend/parser/parse_clause.c        | 25 +++++++++++++++++--------
 src/test/regress/expected/numerology.out | 28 ++++++++++++++--------------
 src/test/regress/sql/numerology.sql      | 13 +++++++++----
 3 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/src/backend/parser/parse_clause.c b/src/backend/parser/parse_clause.c
index a6549015af..68da571f92 100644
--- a/src/backend/parser/parse_clause.c
+++ b/src/backend/parser/parse_clause.c
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.54 2000/02/15 23:09:08 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/parser/parse_clause.c,v 1.55 2000/02/19 23:45:05 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -721,7 +721,7 @@ parseFromClause(ParseState *pstate, List *frmList)
  * node		the ORDER BY, GROUP BY, or DISTINCT ON expression to be matched
  * tlist	the existing target list (NB: this cannot be NIL, which is a
  *			good thing since we'd be unable to append to it...)
- * clause	identifies clause type for error messages.
+ * clause	identifies clause type (mainly for error messages).
  */
 static TargetEntry *
 findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
@@ -733,7 +733,7 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
 	/*----------
 	 * Handle two special cases as mandated by the SQL92 spec:
 	 *
-	 * 1. ORDER/GROUP BY ColumnName
+	 * 1. ORDER BY ColumnName
 	 *    For a bare identifier, we search for a matching column name
 	 *	  in the existing target list.  Multiple matches are an error
 	 *	  unless they refer to identical values; for example,
@@ -741,17 +741,26 @@ findTargetlistEntry(ParseState *pstate, Node *node, List *tlist, int clause)
 	 *	  but not   SELECT a AS b, b FROM table ORDER BY b
 	 *	  If no match is found, we fall through and treat the identifier
 	 *	  as an expression.
+	 *	  We do NOT attempt this match for GROUP BY, since it is clearly
+	 *	  contrary to the spec to use an output column name in preference
+	 *	  to an underlying column name in GROUP BY.  DISTINCT ON isn't in
+	 *	  the standard, so we can do what we like there; we choose to make
+	 *	  it work like GROUP BY.
 	 *
-	 * 2. ORDER/GROUP BY IntegerConstant
+	 * 2. ORDER BY/GROUP BY/DISTINCT ON IntegerConstant
 	 *	  This means to use the n'th item in the existing target list.
-	 *	  Note that it would make no sense to order/group by an actual
-	 *	  constant, so this does not create a conflict with our extension
-	 *	  to order/group by an expression.
+	 *	  Note that it would make no sense to order/group/distinct by an
+	 *	  actual constant, so this does not create a conflict with our
+	 *	  extension to order/group by an expression.
+	 *	  I believe that GROUP BY column-number is not sanctioned by SQL92,
+	 *	  but since the standard has no other behavior defined for this
+	 *	  syntax, we may as well continue to support our past behavior.
 	 *
 	 * Note that pre-existing resjunk targets must not be used in either case.
 	 *----------
 	 */
-	if (IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
+	if (clause == ORDER_CLAUSE &&
+		IsA(node, Ident) && ((Ident *) node)->indirection == NIL)
 	{
 		char	   *name = ((Ident *) node)->name;
 		foreach(tl, tlist)
diff --git a/src/test/regress/expected/numerology.out b/src/test/regress/expected/numerology.out
index 131e5f7e64..8e13a9e6ac 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -88,7 +88,7 @@ SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
 
 SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, max_float, min_float;
  two |      max_float       |       min_float       
 -----+----------------------+-----------------------
@@ -96,19 +96,17 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
    2 |                    0 | -1.2345678901234e+200
 (2 rows)
 
+-- Postgres used to accept this, but it is clearly against SQL92 to
+-- interpret GROUP BY arguments as result column names; they should
+-- be source column names *only*.  An error is expected.
 SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
   FROM TEMP_GROUP
   GROUP BY two
   ORDER BY two, max_float, min_float;
- two |      max_float       |       min_float       
------+----------------------+-----------------------
-   1 | 1.2345678901234e+200 |                     0
-   2 |                    0 | -1.2345678901234e+200
-(2 rows)
-
+ERROR:  Attribute 'two' not found
 SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, min_minus_1;
  two |      max_plus_1      |      min_minus_1      
 -----+----------------------+-----------------------
@@ -116,14 +114,16 @@ SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
    2 |                    1 | -1.2345678901234e+200
 (2 rows)
 
-SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
+SELECT f1 AS two,
+       max(f2) + min(f2) AS max_plus_min,
+       min(f3) - 1 AS min_minus_1
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, min_minus_1;
- two |      max_plus_1      |      min_minus_1      
------+----------------------+-----------------------
-   1 | 1.2345678901234e+200 |                    -1
-   2 |                    1 | -1.2345678901234e+200
+ two | max_plus_min |      min_minus_1      
+-----+--------------+-----------------------
+   1 |            0 |                    -1
+   2 |            0 | -1.2345678901234e+200
 (2 rows)
 
 DROP TABLE TEMP_INT2;
diff --git a/src/test/regress/sql/numerology.sql b/src/test/regress/sql/numerology.sql
index 35975f3bd6..b30f008bff 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -67,9 +67,12 @@ SELECT DISTINCT f1 AS two FROM TEMP_GROUP;
 
 SELECT f1 AS two, max(f3) AS max_float, min(f3) as min_float
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, max_float, min_float;
 
+-- Postgres used to accept this, but it is clearly against SQL92 to
+-- interpret GROUP BY arguments as result column names; they should
+-- be source column names *only*.  An error is expected.
 SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
   FROM TEMP_GROUP
   GROUP BY two
@@ -77,12 +80,14 @@ SELECT f1 AS two, max(f3) AS max_float, min(f3) AS min_float
 
 SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, min_minus_1;
 
-SELECT f1 AS two, (max(f3) + 1) AS max_plus_1, (min(f3) - 1) AS min_minus_1
+SELECT f1 AS two,
+       max(f2) + min(f2) AS max_plus_min,
+       min(f3) - 1 AS min_minus_1
   FROM TEMP_GROUP
-  GROUP BY two
+  GROUP BY f1
   ORDER BY two, min_minus_1;
 
 DROP TABLE TEMP_INT2;
-- 
2.11.0