From 3a624e9200bbd8b88d7724e1448fe530f32e2c3c Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sun, 19 Apr 2009 18:52:58 +0000 Subject: [PATCH] Revise plpgsql's scanner to process comments and string literals in a way more nearly matching the core SQL scanner. The user-visible effects are: * Block comments (slash-star comments) now nest, as per SQL spec. * In standard_conforming_strings mode, backslash as the last character of a non-E string literal is now correctly taken as an ordinary character; formerly it was misinterpreted as escaping the ending quote. (Since the string also had to pass through the core scanner, this invariably led to syntax errors.) * Formerly, backslashes in the format string of RAISE were always treated as quoting the next character, regardless of mode. Now, they are ordinary characters with standard_conforming_strings on, while with it off, they introduce the same set of escapes as in the core SQL scanner. Also, escape_string_warning is now effective for RAISE format strings. These changes make RAISE format strings work just like any other string literal. This is implemented by copying and pasting a lot of logic from the core scanner. It would be a good idea to look into getting rid of plpgsql's scanner entirely in favor of using the core scanner. However, that involves more change than I can justify making during beta --- in particular, the core scanner would have to become re-entrant. In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or T_TRIGGER as a made-up first token. That presumably had some value once upon a time, but now it's just useless complication for both the scanner and the grammar. --- doc/src/sgml/plpgsql.sgml | 8 +- src/pl/plpgsql/src/gram.y | 63 ++++-- src/pl/plpgsql/src/pl_comp.c | 18 +- src/pl/plpgsql/src/pl_funcs.c | 39 +++- src/pl/plpgsql/src/plpgsql.h | 8 +- src/pl/plpgsql/src/scan.l | 383 ++++++++++++++++------------------ src/test/regress/expected/plpgsql.out | 71 +++++++ src/test/regress/sql/plpgsql.sql | 44 ++++ 8 files changed, 398 insertions(+), 236 deletions(-) diff --git a/doc/src/sgml/plpgsql.sgml b/doc/src/sgml/plpgsql.sgml index 6eb9b2b9e7..319547ec00 100644 --- a/doc/src/sgml/plpgsql.sgml +++ b/doc/src/sgml/plpgsql.sgml @@ -1,4 +1,4 @@ - + <application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language @@ -220,10 +220,8 @@ END label ; There are two types of comments in PL/pgSQL. A double dash (--) starts a comment that extends to the end of the line. A /* starts a block comment that extends to - the next occurrence of */. Block comments cannot be - nested, but double dash comments can be enclosed into a block comment and - a double dash can hide the block comment delimiters /* - and */. + the next occurrence of */. Block comments nest, + just as in ordinary SQL. diff --git a/src/pl/plpgsql/src/gram.y b/src/pl/plpgsql/src/gram.y index 79d4f61625..bb0ca75c64 100644 --- a/src/pl/plpgsql/src/gram.y +++ b/src/pl/plpgsql/src/gram.y @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -62,6 +62,8 @@ static PLpgSQL_row *make_scalar_list1(const char *initial_name, int lineno); static void check_sql_expr(const char *stmt); static void plpgsql_sql_error_callback(void *arg); +static char *parse_string_token(const char *token); +static void plpgsql_string_error_callback(void *arg); static char *check_label(const char *yytxt); static void check_labels(const char *start_label, const char *end_label); @@ -228,8 +230,6 @@ static List *read_raise_options(void); /* * Other tokens */ -%token T_FUNCTION -%token T_TRIGGER %token T_STRING %token T_NUMBER %token T_SCALAR /* a VAR, RECFIELD, or TRIGARG */ @@ -244,13 +244,9 @@ static List *read_raise_options(void); %% -pl_function : T_FUNCTION comp_optsect pl_block opt_semi +pl_function : comp_optsect pl_block opt_semi { - yylval.program = (PLpgSQL_stmt_block *)$3; - } - | T_TRIGGER comp_optsect pl_block opt_semi - { - yylval.program = (PLpgSQL_stmt_block *)$3; + yylval.program = (PLpgSQL_stmt_block *) $2; } ; @@ -1403,7 +1399,7 @@ stmt_raise : K_RAISE lno if (tok == T_STRING) { /* old style message and parameters */ - new->message = plpgsql_get_string_value(); + new->message = parse_string_token(yytext); /* * We expect either a semi-colon, which * indicates no parameters, or a comma that @@ -1435,7 +1431,7 @@ stmt_raise : K_RAISE lno if (yylex() != T_STRING) yyerror("syntax error"); - sqlstatestr = plpgsql_get_string_value(); + sqlstatestr = parse_string_token(yytext); if (strlen(sqlstatestr) != 5) yyerror("invalid SQLSTATE code"); @@ -1778,7 +1774,7 @@ proc_condition : opt_lblname /* next token should be a string literal */ if (yylex() != T_STRING) yyerror("syntax error"); - sqlstatestr = plpgsql_get_string_value(); + sqlstatestr = parse_string_token(yytext); if (strlen(sqlstatestr) != 5) yyerror("invalid SQLSTATE code"); @@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg) errposition(0); } +/* + * Convert a string-literal token to the represented string value. + * + * To do this, we need to invoke the core lexer. To avoid confusion between + * the core bison/flex definitions and our own, the actual invocation is in + * pl_funcs.c. Here we are only concerned with setting up the right errcontext + * state, which is handled the same as in check_sql_expr(). + */ +static char * +parse_string_token(const char *token) +{ + char *result; + ErrorContextCallback syntax_errcontext; + ErrorContextCallback *previous_errcontext; + + /* See comments in check_sql_expr() */ + Assert(error_context_stack->callback == plpgsql_compile_error_callback); + + previous_errcontext = error_context_stack; + syntax_errcontext.callback = plpgsql_string_error_callback; + syntax_errcontext.arg = (char *) token; + syntax_errcontext.previous = error_context_stack->previous; + error_context_stack = &syntax_errcontext; + + result = plpgsql_parse_string_token(token); + + /* Restore former ereport callback */ + error_context_stack = previous_errcontext; + + return result; +} + +static void +plpgsql_string_error_callback(void *arg) +{ + Assert(plpgsql_error_funcname); + + errcontext("string literal in PL/PgSQL function \"%s\" near line %d", + plpgsql_error_funcname, plpgsql_error_lineno); + /* representing the string literal as internalquery seems overkill */ + errposition(0); +} + static char * check_label(const char *yytxt) { diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index b2673dcd48..5f75818d7a 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo, bool forValidator) { Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup); - int functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION; + bool is_trigger = CALLED_AS_TRIGGER(fcinfo); Datum prosrcdatum; bool isnull; char *proc_source; @@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo, if (isnull) elog(ERROR, "null prosrc"); proc_source = TextDatumGetCString(prosrcdatum); - plpgsql_scanner_init(proc_source, functype); + plpgsql_scanner_init(proc_source); plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname)); plpgsql_error_lineno = 0; @@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo, function->fn_oid = fcinfo->flinfo->fn_oid; function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data); function->fn_tid = procTup->t_self; - function->fn_functype = functype; + function->fn_is_trigger = is_trigger; function->fn_cxt = func_cxt; function->out_param_varno = -1; /* set up for no OUT param */ - switch (functype) + switch (is_trigger) { - case T_FUNCTION: + case false: /* * Fetch info about the procedure's parameters. Allocations aren't @@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo, ReleaseSysCache(typeTup); break; - case T_TRIGGER: + case true: /* Trigger procedure's return type is unknown yet */ function->fn_rettype = InvalidOid; function->fn_retbyval = false; @@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo, break; default: - elog(ERROR, "unrecognized function typecode: %u", functype); + elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger); break; } @@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word) * Recognize tg_argv when compiling triggers * (XXX this sucks, it should be a regular variable in the namestack) */ - if (plpgsql_curr_compile->fn_functype == T_TRIGGER) + if (plpgsql_curr_compile->fn_is_trigger) { if (strcmp(cp[0], "tg_argv") == 0) { diff --git a/src/pl/plpgsql/src/pl_funcs.c b/src/pl/plpgsql/src/pl_funcs.c index b6cd6c5150..9a3a9bab97 100644 --- a/src/pl/plpgsql/src/pl_funcs.c +++ b/src/pl/plpgsql/src/pl_funcs.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -17,6 +17,8 @@ #include +#include "parser/gramparse.h" +#include "parser/gram.h" #include "parser/scansup.h" @@ -460,6 +462,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents) /* + * plpgsql_parse_string_token - get the value represented by a string literal + * + * We do not make plpgsql's lexer produce the represented value, because + * in many cases we don't need it. Instead this function is invoked when + * we do need it. The input is the T_STRING token as identified by the lexer. + * + * The result is a palloc'd string. + * + * Note: this is called only from plpgsql's gram.y, but we can't just put it + * there because including parser/gram.h there would cause confusion. + */ +char * +plpgsql_parse_string_token(const char *token) +{ + int ctoken; + + /* + * We use the core lexer to do the dirty work. Aside from getting the + * right results for escape sequences and so on, this helps us produce + * appropriate warnings for escape_string_warning etc. + */ + scanner_init(token); + + ctoken = base_yylex(); + + if (ctoken != SCONST) + elog(ERROR, "unexpected result from base lexer: %d", ctoken); + + scanner_finish(); + + return base_yylval.str; +} + + +/* * Statement type as a string, for use in error messages etc. */ const char * diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index d8a8a17f9a..e8a0736e2f 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -650,7 +650,7 @@ typedef struct PLpgSQL_function Oid fn_oid; TransactionId fn_xmin; ItemPointerData fn_tid; - int fn_functype; + bool fn_is_trigger; PLpgSQL_func_hashkey *fn_hashkey; /* back-link to hashtable key */ MemoryContext fn_cxt; @@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname); * ---------- */ extern void plpgsql_convert_ident(const char *s, char **output, int numidents); +extern char *plpgsql_parse_string_token(const char *token); extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt); extern void plpgsql_dumptree(PLpgSQL_function *func); @@ -894,8 +895,7 @@ extern int plpgsql_yylex(void); extern void plpgsql_push_back_token(int token); extern void plpgsql_yyerror(const char *message); extern int plpgsql_scanner_lineno(void); -extern void plpgsql_scanner_init(const char *str, int functype); +extern void plpgsql_scanner_init(const char *str); extern void plpgsql_scanner_finish(void); -extern char *plpgsql_get_string_value(void); #endif /* PLPGSQL_H */ diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l index 9199af6726..3dc6d73c4b 100644 --- a/src/pl/plpgsql/src/scan.l +++ b/src/pl/plpgsql/src/scan.l @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $ + * $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -19,27 +19,31 @@ #include "mb/pg_wchar.h" -/* No reason to constrain amount of data slurped */ -#define YY_READ_BUF_SIZE 16777216 - /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ #undef fprintf #define fprintf(file, fmt, msg) ereport(ERROR, (errmsg_internal("%s", msg))) +/* + * When we parse a token that requires multiple lexer rules to process, + * remember the token's starting position this way. + */ +#define SAVE_TOKEN_START() \ + ( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext ) + /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; static const char *scanstr; /* original input string */ -static int scanner_functype; -static bool scanner_typereported; static int pushback_token; static bool have_pushback_token; static const char *cur_line_start; static int cur_line_num; +static int xcdepth = 0; /* depth of nesting in slash-star comments */ static char *dolqstart; /* current $foo$ quote start string */ -static int dolqlen; /* signal to plpgsql_get_string_value */ + +extern bool standard_conforming_strings; bool plpgsql_SpaceScanned = false; %} @@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false; %option case-insensitive +/* + * Exclusive states are a subset of the core lexer's: + * extended C-style comments + * standard quoted strings + * extended quoted strings (support backslash escape sequences) + * $foo$ quoted strings + */ -%x IN_STRING -%x IN_COMMENT -%x IN_DOLLARQUOTE +%x xc +%x xe +%x xq +%x xdolq -digit [0-9] -ident_start [A-Za-z\200-\377_] -ident_cont [A-Za-z\200-\377_0-9\$] +/* + * Definitions --- these generally must match the core lexer, but in some + * cases we can simplify, since we only care about identifying the token + * boundaries and not about deriving the represented value. Also, we + * aren't trying to lex multicharacter operators so their interactions + * with comments go away. + */ -quoted_ident (\"[^\"]*\")+ +space [ \t\n\r\f] +horiz_space [ \t\f] +newline [\n\r] +non_newline [^\n\r] -identifier ({ident_start}{ident_cont}*|{quoted_ident}) +comment ("--"{non_newline}*) -param \${digit}+ +whitespace ({space}+|{comment}) +special_whitespace ({space}+|{comment}{newline}) +horiz_whitespace ({horiz_space}|{comment}) +whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*) -space [ \t\n\r\f] +quote ' +quotestop {quote}{whitespace}* +quotecontinue {quote}{whitespace_with_newline}{quote} +quotefail {quote}{whitespace}*"-" + +xestart [eE]{quote} +xeinside [^\\']+ +xeescape [\\]. + +xqstart {quote} +xqdouble {quote}{quote} +xqinside [^']+ -/* $foo$ style quotes ("dollar quoting") - * copied straight from the backend SQL parser - */ dolq_start [A-Za-z\200-\377_] dolq_cont [A-Za-z\200-\377_0-9] dolqdelim \$({dolq_start}{dolq_cont}*)?\$ +dolqfailed \${dolq_start}{dolq_cont}* dolqinside [^$]+ +xcstart \/\* +xcstop \*+\/ +xcinside [^*/]+ + +digit [0-9] +ident_start [A-Za-z\200-\377_] +ident_cont [A-Za-z\200-\377_0-9\$] + +/* This is a simpler treatment of quoted identifiers than the core uses */ +quoted_ident (\"[^\"]*\")+ + +identifier ({ident_start}{ident_cont}*|{quoted_ident}) + +param \${digit}+ + %% /* ---------- * Local variables in scanner to remember where @@ -96,17 +142,6 @@ dolqinside [^$]+ plpgsql_SpaceScanned = false; /* ---------- - * On the first call to a new source report the - * function's type (T_FUNCTION or T_TRIGGER) - * ---------- - */ - if (!scanner_typereported) - { - scanner_typereported = true; - return scanner_functype; - } - - /* ---------- * The keyword rules * ---------- */ @@ -225,119 +260,134 @@ dump { return O_DUMP; } {digit}+ { return T_NUMBER; } -\". { - plpgsql_error_lineno = plpgsql_scanner_lineno(); - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("unterminated quoted identifier"))); - } - - /* ---------- - * Ignore whitespaces but remember this happened - * ---------- - */ -{space}+ { plpgsql_SpaceScanned = true; } +\". { yyerror("unterminated quoted identifier"); } /* ---------- - * Eat up comments + * Ignore whitespace (including comments) but remember this happened * ---------- */ ---[^\r\n]* ; - -\/\* { start_lineno = plpgsql_scanner_lineno(); - BEGIN(IN_COMMENT); - } -\*\/ { BEGIN(INITIAL); plpgsql_SpaceScanned = true; } -\n ; -. ; -<> { - plpgsql_error_lineno = start_lineno; - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("unterminated /* comment"))); - } +{whitespace} { plpgsql_SpaceScanned = true; } /* ---------- - * Collect anything inside of ''s and return one STRING token - * - * Hacking yytext/yyleng here lets us avoid using yymore(), which is - * a win for performance. It's safe because we know the underlying - * input buffer is not changing. + * Comment and literal handling is mostly copied from the core lexer * ---------- */ -' { - start_lineno = plpgsql_scanner_lineno(); - start_charpos = yytext; - BEGIN(IN_STRING); - } -[eE]' { - /* for now, treat the same as a regular literal */ - start_lineno = plpgsql_scanner_lineno(); - start_charpos = yytext; - BEGIN(IN_STRING); - } -\\. { } -\\ { /* can only happen with \ at EOF */ } -'' { } -' { - /* tell plpgsql_get_string_value it's not a dollar quote */ - dolqlen = 0; - /* adjust yytext/yyleng to describe whole string token */ - yyleng += (yytext - start_charpos); - yytext = start_charpos; - BEGIN(INITIAL); - return T_STRING; - } -[^'\\]+ { } -<> { - plpgsql_error_lineno = start_lineno; - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("unterminated quoted string"))); - } - -{dolqdelim} { - start_lineno = plpgsql_scanner_lineno(); - start_charpos = yytext; - dolqstart = pstrdup(yytext); - BEGIN(IN_DOLLARQUOTE); - } -{dolqdelim} { - if (strcmp(yytext, dolqstart) == 0) - { - pfree(dolqstart); - /* tell plpgsql_get_string_value it is a dollar quote */ - dolqlen = yyleng; +{xcstart} { + /* Set location in case of syntax error in comment */ + SAVE_TOKEN_START(); + xcdepth = 0; + BEGIN(xc); + plpgsql_SpaceScanned = true; + } + +{xcstart} { + xcdepth++; + } + +{xcstop} { + if (xcdepth <= 0) + BEGIN(INITIAL); + else + xcdepth--; + } + +{xcinside} { + /* ignore */ + } + +\/+ { + /* ignore */ + } + +\*+ { + /* ignore */ + } + +<> { yyerror("unterminated /* comment"); } + +{xqstart} { + SAVE_TOKEN_START(); + if (standard_conforming_strings) + BEGIN(xq); + else + BEGIN(xe); + } +{xestart} { + SAVE_TOKEN_START(); + BEGIN(xe); + } +{quotestop} | +{quotefail} { + yyless(1); + BEGIN(INITIAL); /* adjust yytext/yyleng to describe whole string token */ yyleng += (yytext - start_charpos); yytext = start_charpos; - BEGIN(INITIAL); return T_STRING; - } - else - { - /* - * When we fail to match $...$ to dolqstart, transfer - * the $... part to the output, but put back the final - * $ for rescanning. Consider $delim$...$junk$delim$ - */ - yyless(yyleng-1); - } - } -{dolqinside} { } -. { /* needed for $ inside the quoted text */ } -<> { - plpgsql_error_lineno = start_lineno; - ereport(ERROR, - (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("unterminated dollar-quoted string"))); - } + } +{xqdouble} { + } +{xqinside} { + } +{xeinside} { + } +{xeescape} { + } +{quotecontinue} { + /* ignore */ + } +. { + /* This is only needed for \ just before EOF */ + } +<> { yyerror("unterminated quoted string"); } + +{dolqdelim} { + SAVE_TOKEN_START(); + dolqstart = pstrdup(yytext); + BEGIN(xdolq); + } +{dolqfailed} { + /* throw back all but the initial "$" */ + yyless(1); + /* and treat it as {other} */ + return yytext[0]; + } +{dolqdelim} { + if (strcmp(yytext, dolqstart) == 0) + { + pfree(dolqstart); + BEGIN(INITIAL); + /* adjust yytext/yyleng to describe whole string */ + yyleng += (yytext - start_charpos); + yytext = start_charpos; + return T_STRING; + } + else + { + /* + * When we fail to match $...$ to dolqstart, transfer + * the $... part to the output, but put back the final + * $ for rescanning. Consider $delim$...$junk$delim$ + */ + yyless(yyleng-1); + } + } +{dolqinside} { + } +{dolqfailed} { + } +. { + /* This is only needed for $ inside the quoted text */ + } +<> { yyerror("unterminated dollar-quoted string"); } /* ---------- * Any unmatched character is returned as is * ---------- */ -. { return yytext[0]; } +. { + return yytext[0]; + } %% @@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void) * to cite in error messages. */ void -plpgsql_scanner_init(const char *str, int functype) +plpgsql_scanner_init(const char *str) { Size slen; @@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype) /* Other setup */ scanstr = str; - scanner_functype = functype; - scanner_typereported = false; - have_pushback_token = false; cur_line_start = scanbuf; @@ -493,77 +540,3 @@ plpgsql_scanner_finish(void) yy_delete_buffer(scanbufhandle); pfree(scanbuf); } - -/* - * Called after a T_STRING token is read to get the string literal's value - * as a palloc'd string. (We make this a separate call because in many - * scenarios there's no need to get the decoded value.) - * - * Note: we expect the literal to be the most recently lexed token. This - * would not work well if we supported multiple-token pushback or if - * plpgsql_yylex() wanted to read ahead beyond a T_STRING token. - */ -char * -plpgsql_get_string_value(void) -{ - char *result; - const char *cp; - int len; - - if (dolqlen > 0) - { - /* Token is a $foo$...$foo$ string */ - len = yyleng - 2 * dolqlen; - Assert(len >= 0); - result = (char *) palloc(len + 1); - memcpy(result, yytext + dolqlen, len); - result[len] = '\0'; - } - else if (*yytext == 'E' || *yytext == 'e') - { - /* Token is an E'...' string */ - result = (char *) palloc(yyleng + 1); /* more than enough room */ - len = 0; - for (cp = yytext + 2; *cp; cp++) - { - if (*cp == '\'') - { - if (cp[1] == '\'') - result[len++] = *cp++; - /* else it must be string end quote */ - } - else if (*cp == '\\') - { - if (cp[1] != '\0') /* just a paranoid check */ - result[len++] = *(++cp); - } - else - result[len++] = *cp; - } - result[len] = '\0'; - } - else - { - /* Token is a '...' string */ - result = (char *) palloc(yyleng + 1); /* more than enough room */ - len = 0; - for (cp = yytext + 1; *cp; cp++) - { - if (*cp == '\'') - { - if (cp[1] == '\'') - result[len++] = *cp++; - /* else it must be string end quote */ - } - else if (*cp == '\\') - { - if (cp[1] != '\0') /* just a paranoid check */ - result[len++] = *(++cp); - } - else - result[len++] = *cp; - } - result[len] = '\0'; - } - return result; -} diff --git a/src/test/regress/expected/plpgsql.out b/src/test/regress/expected/plpgsql.out index 25be3857ab..0446f5193c 100644 --- a/src/test/regress/expected/plpgsql.out +++ b/src/test/regress/expected/plpgsql.out @@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true); DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_2(bool); +-- Test handling of string literals. +set standard_conforming_strings = off; +create or replace function strtest() returns text as $$ +begin + raise notice 'foo\\bar\041baz'; + return 'foo\\bar\041baz'; +end +$$ language plpgsql; +WARNING: nonstandard use of \\ in a string literal +HINT: Use the escape string syntax for backslashes, e.g., E'\\'. +CONTEXT: string literal in PL/PgSQL function "strtest" near line 2 +WARNING: nonstandard use of \\ in a string literal +LINE 1: SELECT 'foo\\bar\041baz' + ^ +HINT: Use the escape string syntax for backslashes, e.g., E'\\'. +QUERY: SELECT 'foo\\bar\041baz' +CONTEXT: SQL statement in PL/PgSQL function "strtest" near line 3 +select strtest(); +NOTICE: foo\bar!baz +WARNING: nonstandard use of \\ in a string literal +LINE 1: SELECT 'foo\\bar\041baz' + ^ +HINT: Use the escape string syntax for backslashes, e.g., E'\\'. +QUERY: SELECT 'foo\\bar\041baz' +CONTEXT: PL/pgSQL function "strtest" line 3 at RETURN + strtest +------------- + foo\bar!baz +(1 row) + +create or replace function strtest() returns text as $$ +begin + raise notice E'foo\\bar\041baz'; + return E'foo\\bar\041baz'; +end +$$ language plpgsql; +select strtest(); +NOTICE: foo\bar!baz + strtest +------------- + foo\bar!baz +(1 row) + +set standard_conforming_strings = on; +create or replace function strtest() returns text as $$ +begin + raise notice 'foo\\bar\041baz\'; + return 'foo\\bar\041baz\'; +end +$$ language plpgsql; +select strtest(); +NOTICE: foo\\bar\041baz\ + strtest +------------------ + foo\\bar\041baz\ +(1 row) + +create or replace function strtest() returns text as $$ +begin + raise notice E'foo\\bar\041baz'; + return E'foo\\bar\041baz'; +end +$$ language plpgsql; +select strtest(); +NOTICE: foo\bar!baz + strtest +------------- + foo\bar!baz +(1 row) + +drop function strtest(); diff --git a/src/test/regress/sql/plpgsql.sql b/src/test/regress/sql/plpgsql.sql index d9026bd117..3dcfc9e781 100644 --- a/src/test/regress/sql/plpgsql.sql +++ b/src/test/regress/sql/plpgsql.sql @@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true); DROP FUNCTION leaker_1(bool); DROP FUNCTION leaker_2(bool); + +-- Test handling of string literals. + +set standard_conforming_strings = off; + +create or replace function strtest() returns text as $$ +begin + raise notice 'foo\\bar\041baz'; + return 'foo\\bar\041baz'; +end +$$ language plpgsql; + +select strtest(); + +create or replace function strtest() returns text as $$ +begin + raise notice E'foo\\bar\041baz'; + return E'foo\\bar\041baz'; +end +$$ language plpgsql; + +select strtest(); + +set standard_conforming_strings = on; + +create or replace function strtest() returns text as $$ +begin + raise notice 'foo\\bar\041baz\'; + return 'foo\\bar\041baz\'; +end +$$ language plpgsql; + +select strtest(); + +create or replace function strtest() returns text as $$ +begin + raise notice E'foo\\bar\041baz'; + return E'foo\\bar\041baz'; +end +$$ language plpgsql; + +select strtest(); + +drop function strtest(); -- 2.11.0