From d395aecffad7cc6bd043e2d81a1bed5b3fe2f5fa Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 26 Jun 2005 19:16:07 +0000 Subject: [PATCH] Code review for escape-strings patch. Sync psql and plpgsql lexers with main, avoid using a SQL-defined SQLSTATE for what is most definitely not a SQL-compatible error condition, fix documentation omissions, adhere to message style guidelines, don't use two GUC_REPORT variables when one is sufficient. Nothing done about pg_dump issues. --- doc/src/sgml/errcodes.sgml | 7 +++++- doc/src/sgml/libpq.sgml | 21 +++++++++++++---- doc/src/sgml/protocol.sgml | 11 +++++---- doc/src/sgml/runtime.sgml | 41 ++++++++++++-------------------- doc/src/sgml/syntax.sgml | 8 +++---- src/backend/parser/scan.l | 56 ++++++++++++++++++++++++++++++-------------- src/backend/utils/misc/guc.c | 17 +++----------- src/bin/psql/psqlscan.l | 17 +++++++++----- src/include/utils/errcodes.h | 3 ++- src/pl/plpgsql/src/scan.l | 35 ++++++++++++++++++++++++--- 10 files changed, 134 insertions(+), 82 deletions(-) diff --git a/doc/src/sgml/errcodes.sgml b/doc/src/sgml/errcodes.sgml index ad556d0151..71b4a8cd40 100644 --- a/doc/src/sgml/errcodes.sgml +++ b/doc/src/sgml/errcodes.sgml @@ -1,4 +1,4 @@ - + <productname>PostgreSQL</productname> Error Codes @@ -371,6 +371,11 @@ +22P06 +NONSTANDARD USE OF ESCAPE CHARACTER + + + 22010 INVALID INDICATOR PARAMETER VALUE diff --git a/doc/src/sgml/libpq.sgml b/doc/src/sgml/libpq.sgml index 9c1b94e2be..a5bde7fc76 100644 --- a/doc/src/sgml/libpq.sgml +++ b/doc/src/sgml/libpq.sgml @@ -1,5 +1,5 @@ @@ -286,7 +286,7 @@ PGconn *PQconnectdb(const char *conninfo); Kerberos service name to use when authenticating with Kerberos 4 or 5. This must match the service name specified in the server configuration for Kerberos authentication to succeed. (See also - .) + .) @@ -888,10 +888,13 @@ Parameters reported as of the current release include is_superuser, session_authorization, DateStyle, -TimeZone, and -integer_datetimes. +TimeZone, +integer_datetimes, and +standard_compliant_strings. (server_encoding, TimeZone, and -integer_datetimes were not reported by releases before 8.0.) +integer_datetimes were not reported by releases before 8.0; +standard_compliant_strings was not reported by releases +before 8.1.) Note that server_version, server_encoding and @@ -914,6 +917,14 @@ in a numeric form that is much easier to compare against. +If no value for standard_compliant_strings is reported, +applications may assume it is false, that is, backslashes +are treated as escapes in string literals. Also, the presence of this +parameter may be taken as an indication that the escape string syntax +(E'...') is accepted. + + + Although the returned pointer is declared const, it in fact points to mutable storage associated with the PGconn structure. It is unwise to assume the pointer will remain valid across queries. diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index cd6fa0e94f..7ebcfc63a3 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -1,4 +1,4 @@ - + Frontend/Backend Protocol @@ -1072,10 +1072,13 @@ is_superuser, session_authorization, DateStyle, - TimeZone, and - integer_datetimes. + TimeZone, + integer_datetimes, and + standard_compliant_strings. (server_encoding, TimeZone, and - integer_datetimes were not reported by releases before 8.0.) + integer_datetimes were not reported by releases before 8.0; + standard_compliant_strings was not reported by releases + before 8.1.) Note that server_version, server_encoding and diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 3841b0dee2..c0b3e65ba7 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -1,5 +1,5 @@ @@ -3766,13 +3766,11 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' When on, a warning is issued if a backslash - (\) appears in a ordinary, non-escape syntax - ('') string. To log the statement that generated the - warning, set log_min_error_statement to - error. The default is off. + (\) appears in an ordinary string literal + ('...' syntax). The default is off. - Escape string syntax (E'') should be used for + Escape string syntax (E'...') should be used for escapes, because in future versions of PostgreSQL ordinary strings will have the standard-compliant behavior of treating backslashes @@ -3988,22 +3986,7 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' - - escape_string_syntax (boolean) - stringsescape - - escape_string_syntax configuration parameter - - - - Reports whether escape string syntax (E'') is - supported. This variable is used by applications that need to - determine if escape string syntax can be used in their code. - - - - - + standard_compliant_strings (boolean) stringsescape @@ -4011,10 +3994,16 @@ dynamic_library_path = 'C:\tools\postgresql;H:\my_project\lib;$libdir' - Reports whether ordinary, non-escape syntax strings - ('') treat backslashes literally, as specified in - the SQL standard. This variable is used by applications that - need to know how ordinary strings are processed`. + Reports whether ordinary string literals + ('...') treat backslashes literally, as specified in + the SQL standard. The value is currently always false, + indicating that backslashes are treated as escapes. It is planned + that this will change to true in a future + PostgreSQL release when string literal + syntax changes to meet the standard. Applications may check this + parameter to determine how string literals will be processed. + The presence of this parameter can also be taken as an indication + that the escape string syntax (E'...') is supported. diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml index 3d8d457c56..0d3d7f19f1 100644 --- a/doc/src/sgml/syntax.sgml +++ b/doc/src/sgml/syntax.sgml @@ -1,5 +1,5 @@ @@ -249,7 +249,7 @@ UPDATE "my_table" SET "a" = 5; PostgreSQL also allows single quotes to be escaped with a backslash (\'). However, future versions of PostgreSQL will not - support this so applications using this should convert to the + allow this, so applications using backslashes should convert to the standard-compliant method outlined above. @@ -276,8 +276,8 @@ UPDATE "my_table" SET "a" = 5; eventually treat backslashes as literal characters to be standard-compliant. The proper way to specify escape processing is to use the escape string syntax to indicate that escape - processing is desired. Escape string syntax is specified by placing - the the letter E (upper or lower case) before + processing is desired. Escape string syntax is specified by writing + the letter E (upper or lower case) just before the string, e.g. E'\041'. This method will work in all future versions of PostgreSQL. diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 4c556a5fba..f9b0dbdb75 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.126 2005/06/26 03:03:38 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.127 2005/06/26 19:16:05 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -48,10 +48,19 @@ extern YYSTYPE yylval; static int xcdepth = 0; /* depth of nesting in slash-star comments */ -static char *dolqstart; /* current $foo$ quote start string */ -static bool warn_on_first_escape; +static char *dolqstart; /* current $foo$ quote start string */ + +/* + * GUC variable. This is a DIRECT violation of the warning given at the + * head of gram.y, ie flex/bison code must not depend on any GUC variables; + * as such, changing its value can induce very unintuitive behavior. + * But we shall have to live with it as a short-term thing until the switch + * to SQL-standard string syntax is complete. + */ bool escape_string_warning; +static bool warn_on_first_escape; + /* * literalbuf is used to accumulate literal values when multiple rules * are needed to parse a single literal. Call startlit to reset buffer @@ -66,6 +75,7 @@ static int literalalloc; /* current allocated buffer size */ static void addlit(char *ytext, int yleng); static void addlitchar(unsigned char ychar); static char *litbufdup(void); +static int pg_err_position(void); static void check_escape_warning(void); /* @@ -188,9 +198,8 @@ xhinside [^']* /* National character */ xnstart [nN]{quote} -/* Quote string does not warn about escapes */ +/* Quoted string that allows backslash escapes */ xestart [eE]{quote} -xeinside [^']* /* Extended quote * xqdouble implements embedded quote, '''' @@ -446,17 +455,21 @@ other . { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\' in a normal string"), - errhint("Use '' to place quotes in strings, or use the escape string syntax (E'')."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\' in a string literal"), + errhint("Use '' to write quotes in strings, or use the escape string syntax (E'...')."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else if (yytext[1] == '\\') { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of \\\\ in a normal string"), - errhint("Use the escape string syntax for backslashes, e.g. E'\\\\'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of \\\\ in a string literal"), + errhint("Use the escape string syntax for backslashes, e.g., E'\\\\'."), + errposition(pg_err_position()))); + warn_on_first_escape = false; /* warn only once per string */ } else check_escape_warning(); @@ -707,14 +720,20 @@ other . %% -void -yyerror(const char *message) +static int +pg_err_position(void) { const char *loc = token_start ? token_start : yytext; - int cursorpos; /* in multibyte encodings, return index in characters not bytes */ - cursorpos = pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; + return pg_mbstrlen_with_len(scanbuf, loc - scanbuf) + 1; +} + +void +yyerror(const char *message) +{ + const char *loc = token_start ? token_start : yytext; + int cursorpos = pg_err_position(); if (*loc == YY_END_OF_BUFFER_CHAR) { @@ -852,8 +871,9 @@ check_escape_warning(void) { if (warn_on_first_escape && escape_string_warning) ereport(WARNING, - (errcode(ERRCODE_INVALID_USE_OF_ESCAPE_CHARACTER), - errmsg("Invalid use of escapes in an ordinary string"), - errhint("Use the escape string syntax for escapes, e.g. E'\\r\\n'."))); + (errcode(ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER), + errmsg("nonstandard use of escape in a string literal"), + errhint("Use the escape string syntax for escapes, e.g., E'\\r\\n'."), + errposition(pg_err_position()))); warn_on_first_escape = false; /* warn only once per string */ } diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 1738604942..0ab8e74233 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -10,7 +10,7 @@ * Written by Peter Eisentraut . * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.269 2005/06/26 03:03:41 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/utils/misc/guc.c,v 1.270 2005/06/26 19:16:06 tgl Exp $ * *-------------------------------------------------------------------- */ @@ -190,7 +190,6 @@ static int max_index_keys; static int max_identifier_length; static int block_size; static bool integer_datetimes; -static bool escape_string_syntax; static bool standard_compliant_strings; /* should be static, but commands/variable.c needs to get at it */ @@ -877,7 +876,7 @@ static struct config_bool ConfigureNamesBool[] = { {"escape_string_warning", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS, - gettext_noop("Warn about backslash escapes in ordinary, non-escape-syntax strings."), + gettext_noop("Warn about backslash escapes in ordinary string literals."), NULL }, &escape_string_warning, @@ -885,18 +884,8 @@ static struct config_bool ConfigureNamesBool[] = }, { - {"escape_string_syntax", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("Escape string syntax (E'') is supported."), - NULL, - GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE - }, - &escape_string_syntax, - true, NULL, NULL - }, - - { {"standard_compliant_strings", PGC_INTERNAL, PRESET_OPTIONS, - gettext_noop("'' strings treat backslashes literally."), + gettext_noop("'...' strings treat backslashes literally."), NULL, GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE }, diff --git a/src/bin/psql/psqlscan.l b/src/bin/psql/psqlscan.l index a093b3703f..f62fe6224f 100644 --- a/src/bin/psql/psqlscan.l +++ b/src/bin/psql/psqlscan.l @@ -33,7 +33,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.14 2005/06/02 17:45:19 tgl Exp $ + * $PostgreSQL: pgsql/src/bin/psql/psqlscan.l,v 1.15 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -235,17 +235,18 @@ quotefail {quote}{whitespace}*"-" xbstart [bB]{quote} xbinside [^']* -/* Hexadecimal number - */ +/* Hexadecimal number */ xhstart [xX]{quote} xhinside [^']* -/* National character - */ +/* National character */ xnstart [nN]{quote} +/* Quoted string that allows backslash escapes */ +xestart [eE]{quote} + /* Extended quote - * xqdouble implements embedded quote + * xqdouble implements embedded quote, '''' */ xqstart {quote} xqdouble {quote}{quote} @@ -450,6 +451,10 @@ other . BEGIN(xq); ECHO; } +{xestart} { + BEGIN(xq); + ECHO; + } {quotestop} | {quotefail} { yyless(1); diff --git a/src/include/utils/errcodes.h b/src/include/utils/errcodes.h index 646193def4..636c3f7a37 100644 --- a/src/include/utils/errcodes.h +++ b/src/include/utils/errcodes.h @@ -11,7 +11,7 @@ * * Copyright (c) 2003-2005, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.17 2005/01/01 20:44:30 tgl Exp $ + * $PostgreSQL: pgsql/src/include/utils/errcodes.h,v 1.18 2005/06/26 19:16:06 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -126,6 +126,7 @@ #define ERRCODE_INVALID_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', '0','1','9') #define ERRCODE_INVALID_ESCAPE_OCTET MAKE_SQLSTATE('2','2', '0','0','D') #define ERRCODE_INVALID_ESCAPE_SEQUENCE MAKE_SQLSTATE('2','2', '0','2','5') +#define ERRCODE_NONSTANDARD_USE_OF_ESCAPE_CHARACTER MAKE_SQLSTATE('2','2', 'P','0','6') #define ERRCODE_INVALID_INDICATOR_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','1','0') #define ERRCODE_INVALID_LIMIT_VALUE MAKE_SQLSTATE('2','2', '0','2','0') #define ERRCODE_INVALID_PARAMETER_VALUE MAKE_SQLSTATE('2','2', '0','2','3') diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l index 680a58fc01..e69c8f17b0 100644 --- a/src/pl/plpgsql/src/scan.l +++ b/src/pl/plpgsql/src/scan.l @@ -4,7 +4,7 @@ * procedural language * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.41 2005/06/22 01:35:02 neilc Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.42 2005/06/26 19:16:07 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -291,6 +291,12 @@ dump { return O_DUMP; } start_charpos = yytext; BEGIN(IN_STRING); } +[eE]' { + /* for now, treat the same as a regular literal */ + start_lineno = plpgsql_scanner_lineno(); + start_charpos = yytext; + BEGIN(IN_STRING); + } \\. { } \\ { /* can only happen with \ at EOF */ } '' { } @@ -563,18 +569,41 @@ plpgsql_get_string_value(void) memcpy(result, yytext + dolqlen, len); result[len] = '\0'; } + else if (*yytext == 'E' || *yytext == 'e') + { + /* Token is an E'...' string */ + result = (char *) palloc(yyleng + 1); /* more than enough room */ + len = 0; + for (cp = yytext + 2; *cp; cp++) + { + if (*cp == '\'') + { + if (cp[1] == '\'') + result[len++] = *cp++; + /* else it must be string end quote */ + } + else if (*cp == '\\') + { + if (cp[1] != '\0') /* just a paranoid check */ + result[len++] = *(++cp); + } + else + result[len++] = *cp; + } + result[len] = '\0'; + } else { /* Token is a '...' string */ result = (char *) palloc(yyleng + 1); /* more than enough room */ len = 0; - for (cp = yytext; *cp; cp++) + for (cp = yytext + 1; *cp; cp++) { if (*cp == '\'') { if (cp[1] == '\'') result[len++] = *cp++; - /* else it must be string start or end quote */ + /* else it must be string end quote */ } else if (*cp == '\\') { -- 2.11.0