OSDN Git Service

Revise plpgsql's scanner to process comments and string literals in a way
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 19 Apr 2009 18:52:58 +0000 (18:52 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 19 Apr 2009 18:52:58 +0000 (18:52 +0000)
more nearly matching the core SQL scanner.  The user-visible effects are:

* Block comments (slash-star comments) now nest, as per SQL spec.

* In standard_conforming_strings mode, backslash as the last character of a
  non-E string literal is now correctly taken as an ordinary character;
  formerly it was misinterpreted as escaping the ending quote.  (Since the
  string also had to pass through the core scanner, this invariably led
  to syntax errors.)

* Formerly, backslashes in the format string of RAISE were always treated as
  quoting the next character, regardless of mode.  Now, they are ordinary
  characters with standard_conforming_strings on, while with it off, they
  introduce the same set of escapes as in the core SQL scanner.  Also,
  escape_string_warning is now effective for RAISE format strings.  These
  changes make RAISE format strings work just like any other string literal.

This is implemented by copying and pasting a lot of logic from the core
scanner.  It would be a good idea to look into getting rid of plpgsql's
scanner entirely in favor of using the core scanner.  However, that involves
more change than I can justify making during beta --- in particular, the core
scanner would have to become re-entrant.

In passing, remove the kluge that made the plpgsql scanner emit T_FUNCTION or
T_TRIGGER as a made-up first token.  That presumably had some value once upon
a time, but now it's just useless complication for both the scanner and the
grammar.

doc/src/sgml/plpgsql.sgml
src/pl/plpgsql/src/gram.y
src/pl/plpgsql/src/pl_comp.c
src/pl/plpgsql/src/pl_funcs.c
src/pl/plpgsql/src/plpgsql.h
src/pl/plpgsql/src/scan.l
src/test/regress/expected/plpgsql.out
src/test/regress/sql/plpgsql.sql

index 6eb9b2b..319547e 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.139 2009/04/02 19:20:45 momjian Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/plpgsql.sgml,v 1.140 2009/04/19 18:52:56 tgl Exp $ -->
 
 <chapter id="plpgsql">
   <title><application>PL/pgSQL</application> - <acronym>SQL</acronym> Procedural Language</title>
@@ -220,10 +220,8 @@ END <optional> <replaceable>label</replaceable> </optional>;
      There are two types of comments in <application>PL/pgSQL</>. A double
      dash (<literal>--</literal>) starts a comment that extends to the end of
      the line. A <literal>/*</literal> starts a block comment that extends to
-     the next occurrence of <literal>*/</literal>.  Block comments cannot be
-     nested, but double dash comments can be enclosed into a block comment and
-     a double dash can hide the block comment delimiters <literal>/*</literal>
-     and <literal>*/</literal>.
+     the next occurrence of <literal>*/</literal>.  Block comments nest,
+     just as in ordinary SQL.
     </para>
 
     <para>
index 79d4f61..bb0ca75 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.121 2009/02/18 11:33:04 petere Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/gram.y,v 1.122 2009/04/19 18:52:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -62,6 +62,8 @@ static PLpgSQL_row            *make_scalar_list1(const char *initial_name,
                                                                                   int lineno);
 static void                     check_sql_expr(const char *stmt);
 static void                     plpgsql_sql_error_callback(void *arg);
+static char                    *parse_string_token(const char *token);
+static void                     plpgsql_string_error_callback(void *arg);
 static char                    *check_label(const char *yytxt);
 static void                     check_labels(const char *start_label,
                                                                          const char *end_label);
@@ -228,8 +230,6 @@ static List                         *read_raise_options(void);
                /*
                 * Other tokens
                 */
-%token T_FUNCTION
-%token T_TRIGGER
 %token T_STRING
 %token T_NUMBER
 %token T_SCALAR                                /* a VAR, RECFIELD, or TRIGARG */
@@ -244,13 +244,9 @@ static List                                *read_raise_options(void);
 
 %%
 
-pl_function            : T_FUNCTION comp_optsect pl_block opt_semi
+pl_function            : comp_optsect pl_block opt_semi
                                        {
-                                               yylval.program = (PLpgSQL_stmt_block *)$3;
-                                       }
-                               | T_TRIGGER comp_optsect pl_block opt_semi
-                                       {
-                                               yylval.program = (PLpgSQL_stmt_block *)$3;
+                                               yylval.program = (PLpgSQL_stmt_block *) $2;
                                        }
                                ;
 
@@ -1403,7 +1399,7 @@ stmt_raise                : K_RAISE lno
                                                        if (tok == T_STRING)
                                                        {
                                                                /* old style message and parameters */
-                                                               new->message = plpgsql_get_string_value();
+                                                               new->message = parse_string_token(yytext);
                                                                /*
                                                                 * We expect either a semi-colon, which
                                                                 * indicates no parameters, or a comma that
@@ -1435,7 +1431,7 @@ stmt_raise                : K_RAISE lno
 
                                                                        if (yylex() != T_STRING)
                                                                                yyerror("syntax error");
-                                                                       sqlstatestr = plpgsql_get_string_value();
+                                                                       sqlstatestr = parse_string_token(yytext);
 
                                                                        if (strlen(sqlstatestr) != 5)
                                                                                yyerror("invalid SQLSTATE code");
@@ -1778,7 +1774,7 @@ proc_condition    : opt_lblname
                                                        /* next token should be a string literal */
                                                        if (yylex() != T_STRING)
                                                                yyerror("syntax error");
-                                                       sqlstatestr = plpgsql_get_string_value();
+                                                       sqlstatestr = parse_string_token(yytext);
 
                                                        if (strlen(sqlstatestr) != 5)
                                                                yyerror("invalid SQLSTATE code");
@@ -2738,6 +2734,49 @@ plpgsql_sql_error_callback(void *arg)
        errposition(0);
 }
 
+/*
+ * Convert a string-literal token to the represented string value.
+ *
+ * To do this, we need to invoke the core lexer.  To avoid confusion between
+ * the core bison/flex definitions and our own, the actual invocation is in
+ * pl_funcs.c.  Here we are only concerned with setting up the right errcontext
+ * state, which is handled the same as in check_sql_expr().
+ */
+static char *
+parse_string_token(const char *token)
+{
+       char       *result;
+       ErrorContextCallback  syntax_errcontext;
+       ErrorContextCallback *previous_errcontext;
+
+       /* See comments in check_sql_expr() */
+       Assert(error_context_stack->callback == plpgsql_compile_error_callback);
+
+       previous_errcontext = error_context_stack;
+       syntax_errcontext.callback = plpgsql_string_error_callback;
+       syntax_errcontext.arg = (char *) token;
+       syntax_errcontext.previous = error_context_stack->previous;
+       error_context_stack = &syntax_errcontext;
+
+       result = plpgsql_parse_string_token(token);
+
+       /* Restore former ereport callback */
+       error_context_stack = previous_errcontext;
+
+       return result;
+}
+
+static void
+plpgsql_string_error_callback(void *arg)
+{
+       Assert(plpgsql_error_funcname);
+
+       errcontext("string literal in PL/PgSQL function \"%s\" near line %d",
+                          plpgsql_error_funcname, plpgsql_error_lineno);
+       /* representing the string literal as internalquery seems overkill */
+       errposition(0);
+}
+
 static char *
 check_label(const char *yytxt)
 {
index b2673dc..5f75818 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.134 2009/02/18 11:33:04 petere Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.135 2009/04/19 18:52:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -261,7 +261,7 @@ do_compile(FunctionCallInfo fcinfo,
                   bool forValidator)
 {
        Form_pg_proc procStruct = (Form_pg_proc) GETSTRUCT(procTup);
-       int                     functype = CALLED_AS_TRIGGER(fcinfo) ? T_TRIGGER : T_FUNCTION;
+       bool            is_trigger = CALLED_AS_TRIGGER(fcinfo);
        Datum           prosrcdatum;
        bool            isnull;
        char       *proc_source;
@@ -293,7 +293,7 @@ do_compile(FunctionCallInfo fcinfo,
        if (isnull)
                elog(ERROR, "null prosrc");
        proc_source = TextDatumGetCString(prosrcdatum);
-       plpgsql_scanner_init(proc_source, functype);
+       plpgsql_scanner_init(proc_source);
 
        plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname));
        plpgsql_error_lineno = 0;
@@ -359,13 +359,13 @@ do_compile(FunctionCallInfo fcinfo,
        function->fn_oid = fcinfo->flinfo->fn_oid;
        function->fn_xmin = HeapTupleHeaderGetXmin(procTup->t_data);
        function->fn_tid = procTup->t_self;
-       function->fn_functype = functype;
+       function->fn_is_trigger = is_trigger;
        function->fn_cxt = func_cxt;
        function->out_param_varno = -1;         /* set up for no OUT param */
 
-       switch (functype)
+       switch (is_trigger)
        {
-               case T_FUNCTION:
+               case false:
 
                        /*
                         * Fetch info about the procedure's parameters. Allocations aren't
@@ -564,7 +564,7 @@ do_compile(FunctionCallInfo fcinfo,
                        ReleaseSysCache(typeTup);
                        break;
 
-               case T_TRIGGER:
+               case true:
                        /* Trigger procedure's return type is unknown yet */
                        function->fn_rettype = InvalidOid;
                        function->fn_retbyval = false;
@@ -645,7 +645,7 @@ do_compile(FunctionCallInfo fcinfo,
                        break;
 
                default:
-                       elog(ERROR, "unrecognized function typecode: %u", functype);
+                       elog(ERROR, "unrecognized function typecode: %d", (int) is_trigger);
                        break;
        }
 
@@ -790,7 +790,7 @@ plpgsql_parse_word(const char *word)
         * Recognize tg_argv when compiling triggers
         * (XXX this sucks, it should be a regular variable in the namestack)
         */
-       if (plpgsql_curr_compile->fn_functype == T_TRIGGER)
+       if (plpgsql_curr_compile->fn_is_trigger)
        {
                if (strcmp(cp[0], "tg_argv") == 0)
                {
index b6cd6c5..9a3a9ba 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.76 2009/02/18 11:33:04 petere Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/pl_funcs.c,v 1.77 2009/04/19 18:52:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -17,6 +17,8 @@
 
 #include <ctype.h>
 
+#include "parser/gramparse.h"
+#include "parser/gram.h"
 #include "parser/scansup.h"
 
 
@@ -460,6 +462,41 @@ plpgsql_convert_ident(const char *s, char **output, int numidents)
 
 
 /*
+ * plpgsql_parse_string_token - get the value represented by a string literal
+ *
+ * We do not make plpgsql's lexer produce the represented value, because
+ * in many cases we don't need it.  Instead this function is invoked when
+ * we do need it.  The input is the T_STRING token as identified by the lexer.
+ *
+ * The result is a palloc'd string.
+ *
+ * Note: this is called only from plpgsql's gram.y, but we can't just put it
+ * there because including parser/gram.h there would cause confusion.
+ */
+char *
+plpgsql_parse_string_token(const char *token)
+{
+       int             ctoken;
+
+       /*
+        * We use the core lexer to do the dirty work.  Aside from getting the
+        * right results for escape sequences and so on, this helps us produce
+        * appropriate warnings for escape_string_warning etc.
+        */
+       scanner_init(token);
+
+       ctoken = base_yylex();
+
+       if (ctoken != SCONST)
+               elog(ERROR, "unexpected result from base lexer: %d", ctoken);
+
+       scanner_finish();
+
+       return base_yylval.str;
+}
+
+
+/*
  * Statement type as a string, for use in error messages etc.
  */
 const char *
index d8a8a17..e8a0736 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.110 2009/04/09 02:57:53 tgl Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.111 2009/04/19 18:52:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -650,7 +650,7 @@ typedef struct PLpgSQL_function
        Oid                     fn_oid;
        TransactionId fn_xmin;
        ItemPointerData fn_tid;
-       int                     fn_functype;
+       bool            fn_is_trigger;
        PLpgSQL_func_hashkey *fn_hashkey;       /* back-link to hashtable key */
        MemoryContext fn_cxt;
 
@@ -880,6 +880,7 @@ extern void plpgsql_ns_rename(char *oldname, char *newname);
  * ----------
  */
 extern void plpgsql_convert_ident(const char *s, char **output, int numidents);
+extern char *plpgsql_parse_string_token(const char *token);
 extern const char *plpgsql_stmt_typename(PLpgSQL_stmt *stmt);
 extern void plpgsql_dumptree(PLpgSQL_function *func);
 
@@ -894,8 +895,7 @@ extern int  plpgsql_yylex(void);
 extern void plpgsql_push_back_token(int token);
 extern void plpgsql_yyerror(const char *message);
 extern int     plpgsql_scanner_lineno(void);
-extern void plpgsql_scanner_init(const char *str, int functype);
+extern void plpgsql_scanner_init(const char *str);
 extern void plpgsql_scanner_finish(void);
-extern char *plpgsql_get_string_value(void);
 
 #endif   /* PLPGSQL_H */
index 9199af6..3dc6d73 100644 (file)
@@ -9,7 +9,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.67 2009/02/18 11:33:04 petere Exp $
+ *       $PostgreSQL: pgsql/src/pl/plpgsql/src/scan.l,v 1.68 2009/04/19 18:52:57 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "mb/pg_wchar.h"
 
 
-/* No reason to constrain amount of data slurped */
-#define YY_READ_BUF_SIZE 16777216
-
 /* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
 #undef fprintf
 #define fprintf(file, fmt, msg)  ereport(ERROR, (errmsg_internal("%s", msg)))
 
+/*
+ * When we parse a token that requires multiple lexer rules to process,
+ * remember the token's starting position this way.
+ */
+#define SAVE_TOKEN_START()  \
+       ( start_lineno = plpgsql_scanner_lineno(), start_charpos = yytext )
+
 /* Handles to the buffer that the lexer uses internally */
 static YY_BUFFER_STATE scanbufhandle;
 static char *scanbuf;
 
 static const char *scanstr;            /* original input string */
 
-static int     scanner_functype;
-static bool    scanner_typereported;
 static int     pushback_token;
 static bool have_pushback_token;
 static const char *cur_line_start;
 static int     cur_line_num;
+static int             xcdepth = 0;    /* depth of nesting in slash-star comments */
 static char    *dolqstart;      /* current $foo$ quote start string */
-static int     dolqlen;                        /* signal to plpgsql_get_string_value */
+
+extern bool            standard_conforming_strings;
 
 bool plpgsql_SpaceScanned = false;
 %}
@@ -54,31 +58,73 @@ bool plpgsql_SpaceScanned = false;
 
 %option case-insensitive
 
+/*
+ * Exclusive states are a subset of the core lexer's:
+ *  <xc> extended C-style comments
+ *  <xq> standard quoted strings
+ *  <xe> extended quoted strings (support backslash escape sequences)
+ *  <xdolq> $foo$ quoted strings
+ */
 
-%x     IN_STRING
-%x     IN_COMMENT
-%x     IN_DOLLARQUOTE
+%x xc
+%x xe
+%x xq
+%x xdolq
 
-digit                  [0-9]
-ident_start            [A-Za-z\200-\377_]
-ident_cont             [A-Za-z\200-\377_0-9\$]
+/*
+ * Definitions --- these generally must match the core lexer, but in some
+ * cases we can simplify, since we only care about identifying the token
+ * boundaries and not about deriving the represented value.  Also, we
+ * aren't trying to lex multicharacter operators so their interactions
+ * with comments go away.
+ */
 
-quoted_ident   (\"[^\"]*\")+
+space                  [ \t\n\r\f]
+horiz_space            [ \t\f]
+newline                        [\n\r]
+non_newline            [^\n\r]
 
-identifier             ({ident_start}{ident_cont}*|{quoted_ident})
+comment                        ("--"{non_newline}*)
 
-param                  \${digit}+
+whitespace             ({space}+|{comment})
+special_whitespace             ({space}+|{comment}{newline})
+horiz_whitespace               ({horiz_space}|{comment})
+whitespace_with_newline        ({horiz_whitespace}*{newline}{special_whitespace}*)
 
-space                  [ \t\n\r\f]
+quote                  '
+quotestop              {quote}{whitespace}*
+quotecontinue  {quote}{whitespace_with_newline}{quote}
+quotefail              {quote}{whitespace}*"-"
+
+xestart                        [eE]{quote}
+xeinside               [^\\']+
+xeescape               [\\].
+
+xqstart                        {quote}
+xqdouble               {quote}{quote}
+xqinside               [^']+
 
-/* $foo$ style quotes ("dollar quoting")
- * copied straight from the backend SQL parser
- */
 dolq_start             [A-Za-z\200-\377_]
 dolq_cont              [A-Za-z\200-\377_0-9]
 dolqdelim              \$({dolq_start}{dolq_cont}*)?\$
+dolqfailed             \${dolq_start}{dolq_cont}*
 dolqinside             [^$]+
 
+xcstart                        \/\*
+xcstop                 \*+\/
+xcinside               [^*/]+
+
+digit                  [0-9]
+ident_start            [A-Za-z\200-\377_]
+ident_cont             [A-Za-z\200-\377_0-9\$]
+
+/* This is a simpler treatment of quoted identifiers than the core uses */
+quoted_ident   (\"[^\"]*\")+
+
+identifier             ({ident_start}{ident_cont}*|{quoted_ident})
+
+param                  \${digit}+
+
 %%
     /* ----------
      * Local variables in scanner to remember where
@@ -96,17 +142,6 @@ dolqinside          [^$]+
     plpgsql_SpaceScanned = false;
 
     /* ----------
-     * On the first call to a new source report the
-     * function's type (T_FUNCTION or T_TRIGGER)
-     * ----------
-     */
-       if (!scanner_typereported)
-       {
-               scanner_typereported = true;
-               return scanner_functype;
-       }
-
-    /* ----------
      * The keyword rules
      * ----------
      */
@@ -225,119 +260,134 @@ dump                    { return O_DUMP;                        }
 
 {digit}+               { return T_NUMBER;                      }
 
-\".                            {
-                               plpgsql_error_lineno = plpgsql_scanner_lineno();
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_DATATYPE_MISMATCH),
-                                                errmsg("unterminated quoted identifier")));
-                       }
-
-    /* ----------
-     * Ignore whitespaces but remember this happened
-     * ----------
-     */
-{space}+               { plpgsql_SpaceScanned = true;          }
+\".                            { yyerror("unterminated quoted identifier"); }
 
     /* ----------
-     * Eat up comments
+     * Ignore whitespace (including comments) but remember this happened
      * ----------
      */
---[^\r\n]*             ;
-
-\/\*                   { start_lineno = plpgsql_scanner_lineno();
-                         BEGIN(IN_COMMENT);
-                       }
-<IN_COMMENT>\*\/       { BEGIN(INITIAL); plpgsql_SpaceScanned = true; }
-<IN_COMMENT>\n         ;
-<IN_COMMENT>.          ;
-<IN_COMMENT><<EOF>>    {
-                               plpgsql_error_lineno = start_lineno;
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_DATATYPE_MISMATCH),
-                                                errmsg("unterminated /* comment")));
-                       }
+{whitespace}   { plpgsql_SpaceScanned = true; }
 
     /* ----------
-     * Collect anything inside of ''s and return one STRING token
-        *
-        * Hacking yytext/yyleng here lets us avoid using yymore(), which is
-        * a win for performance.  It's safe because we know the underlying
-        * input buffer is not changing.
+     * Comment and literal handling is mostly copied from the core lexer
      * ----------
      */
-'                      {
-                         start_lineno = plpgsql_scanner_lineno();
-                         start_charpos = yytext;
-                         BEGIN(IN_STRING);
-                       }
-[eE]'          {
-                         /* for now, treat the same as a regular literal */
-                         start_lineno = plpgsql_scanner_lineno();
-                         start_charpos = yytext;
-                         BEGIN(IN_STRING);
-                       }
-<IN_STRING>\\.         { }
-<IN_STRING>\\          { /* can only happen with \ at EOF */ }
-<IN_STRING>''          { }
-<IN_STRING>'           {
-                         /* tell plpgsql_get_string_value it's not a dollar quote */
-                         dolqlen = 0;
-                         /* adjust yytext/yyleng to describe whole string token */
-                         yyleng += (yytext - start_charpos);
-                         yytext = start_charpos;
-                         BEGIN(INITIAL);
-                         return T_STRING;
-                       }
-<IN_STRING>[^'\\]+     { }
-<IN_STRING><<EOF>>     {
-                               plpgsql_error_lineno = start_lineno;
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_DATATYPE_MISMATCH),
-                                                errmsg("unterminated quoted string")));
-                       }
-
-{dolqdelim}            {
-                         start_lineno = plpgsql_scanner_lineno();
-                         start_charpos = yytext;
-                         dolqstart = pstrdup(yytext);
-                         BEGIN(IN_DOLLARQUOTE);
-                       }
-<IN_DOLLARQUOTE>{dolqdelim} {
-                         if (strcmp(yytext, dolqstart) == 0)
-                         {
-                                       pfree(dolqstart);
-                                       /* tell plpgsql_get_string_value it is a dollar quote */
-                                       dolqlen = yyleng;
+{xcstart}              {
+                                       /* Set location in case of syntax error in comment */
+                                       SAVE_TOKEN_START();
+                                       xcdepth = 0;
+                                       BEGIN(xc);
+                                       plpgsql_SpaceScanned = true;
+                               }
+
+<xc>{xcstart}  {
+                                       xcdepth++;
+                               }
+
+<xc>{xcstop}   {
+                                       if (xcdepth <= 0)
+                                               BEGIN(INITIAL);
+                                       else
+                                               xcdepth--;
+                               }
+
+<xc>{xcinside} {
+                                       /* ignore */
+                               }
+
+<xc>\/+                        {
+                                       /* ignore */
+                               }
+
+<xc>\*+                        {
+                                       /* ignore */
+                               }
+
+<xc><<EOF>>            { yyerror("unterminated /* comment"); }
+
+{xqstart}              {
+                                       SAVE_TOKEN_START();
+                                       if (standard_conforming_strings)
+                                               BEGIN(xq);
+                                       else
+                                               BEGIN(xe);
+                               }
+{xestart}              {
+                                       SAVE_TOKEN_START();
+                                       BEGIN(xe);
+                               }
+<xq,xe>{quotestop}     |
+<xq,xe>{quotefail} {
+                                       yyless(1);
+                                       BEGIN(INITIAL);
                                        /* adjust yytext/yyleng to describe whole string token */
                                        yyleng += (yytext - start_charpos);
                                        yytext = start_charpos;
-                                       BEGIN(INITIAL);
                                        return T_STRING;
-                         }
-                         else
-                         {
-                                       /*
-                                        * When we fail to match $...$ to dolqstart, transfer
-                                        * the $... part to the output, but put back the final
-                                        * $ for rescanning.  Consider $delim$...$junk$delim$
-                                        */
-                                       yyless(yyleng-1);
-                         }
-                       }
-<IN_DOLLARQUOTE>{dolqinside} { }
-<IN_DOLLARQUOTE>.      { /* needed for $ inside the quoted text */ }
-<IN_DOLLARQUOTE><<EOF>>        {
-                               plpgsql_error_lineno = start_lineno;
-                               ereport(ERROR,
-                                               (errcode(ERRCODE_DATATYPE_MISMATCH),
-                                                errmsg("unterminated dollar-quoted string")));
-                       }
+                               }
+<xq,xe>{xqdouble} {
+                               }
+<xq>{xqinside}  {
+                               }
+<xe>{xeinside}  {
+                               }
+<xe>{xeescape}  {
+                               }
+<xq,xe>{quotecontinue} {
+                                       /* ignore */
+                               }
+<xe>.                  {
+                                       /* This is only needed for \ just before EOF */
+                               }
+<xq,xe><<EOF>>         { yyerror("unterminated quoted string"); }
+
+{dolqdelim}            {
+                                       SAVE_TOKEN_START();
+                                       dolqstart = pstrdup(yytext);
+                                       BEGIN(xdolq);
+                               }
+{dolqfailed}   {
+                                       /* throw back all but the initial "$" */
+                                       yyless(1);
+                                       /* and treat it as {other} */
+                                       return yytext[0];
+                               }
+<xdolq>{dolqdelim} {
+                                       if (strcmp(yytext, dolqstart) == 0)
+                                       {
+                                               pfree(dolqstart);
+                                               BEGIN(INITIAL);
+                                               /* adjust yytext/yyleng to describe whole string */
+                                               yyleng += (yytext - start_charpos);
+                                               yytext = start_charpos;
+                                               return T_STRING;
+                                       }
+                                       else
+                                       {
+                                               /*
+                                                * When we fail to match $...$ to dolqstart, transfer
+                                                * the $... part to the output, but put back the final
+                                                * $ for rescanning.  Consider $delim$...$junk$delim$
+                                                */
+                                               yyless(yyleng-1);
+                                       }
+                               }
+<xdolq>{dolqinside} {
+                               }
+<xdolq>{dolqfailed} {
+                               }
+<xdolq>.               {
+                                       /* This is only needed for $ inside the quoted text */
+                               }
+<xdolq><<EOF>> { yyerror("unterminated dollar-quoted string"); }
 
     /* ----------
      * Any unmatched character is returned as is
      * ----------
      */
-.                      { return yytext[0];                     }
+.                              {
+                                       return yytext[0];
+                               }
 
 %%
 
@@ -437,7 +487,7 @@ plpgsql_scanner_lineno(void)
  * to cite in error messages.
  */
 void
-plpgsql_scanner_init(const char *str, int functype)
+plpgsql_scanner_init(const char *str)
 {
        Size    slen;
 
@@ -460,9 +510,6 @@ plpgsql_scanner_init(const char *str, int functype)
        /* Other setup */
        scanstr = str;
 
-    scanner_functype = functype;
-    scanner_typereported = false;
-
        have_pushback_token = false;
 
        cur_line_start = scanbuf;
@@ -493,77 +540,3 @@ plpgsql_scanner_finish(void)
        yy_delete_buffer(scanbufhandle);
        pfree(scanbuf);
 }
-
-/*
- * Called after a T_STRING token is read to get the string literal's value
- * as a palloc'd string.  (We make this a separate call because in many
- * scenarios there's no need to get the decoded value.)
- *
- * Note: we expect the literal to be the most recently lexed token.  This
- * would not work well if we supported multiple-token pushback or if
- * plpgsql_yylex() wanted to read ahead beyond a T_STRING token.
- */
-char *
-plpgsql_get_string_value(void)
-{
-       char       *result;
-       const char *cp;
-       int                     len;
-
-       if (dolqlen > 0)
-       {
-               /* Token is a $foo$...$foo$ string */
-               len = yyleng - 2 * dolqlen;
-               Assert(len >= 0);
-               result = (char *) palloc(len + 1);
-               memcpy(result, yytext + dolqlen, len);
-               result[len] = '\0';
-       }
-       else if (*yytext == 'E' || *yytext == 'e')
-       {
-               /* Token is an E'...' string */
-               result = (char *) palloc(yyleng + 1);   /* more than enough room */
-               len = 0;
-               for (cp = yytext + 2; *cp; cp++)
-               {
-                       if (*cp == '\'')
-                       {
-                               if (cp[1] == '\'')
-                                       result[len++] = *cp++;
-                               /* else it must be string end quote */
-                       }
-                       else if (*cp == '\\')
-                       {
-                               if (cp[1] != '\0')      /* just a paranoid check */
-                                       result[len++] = *(++cp);
-                       }
-                       else
-                               result[len++] = *cp;
-               }
-               result[len] = '\0';
-       }
-       else
-       {
-               /* Token is a '...' string */
-               result = (char *) palloc(yyleng + 1);   /* more than enough room */
-               len = 0;
-               for (cp = yytext + 1; *cp; cp++)
-               {
-                       if (*cp == '\'')
-                       {
-                               if (cp[1] == '\'')
-                                       result[len++] = *cp++;
-                               /* else it must be string end quote */
-                       }
-                       else if (*cp == '\\')
-                       {
-                               if (cp[1] != '\0')      /* just a paranoid check */
-                                       result[len++] = *(++cp);
-                       }
-                       else
-                               result[len++] = *cp;
-               }
-               result[len] = '\0';
-       }
-       return result;
-}
index 25be385..0446f51 100644 (file)
@@ -3737,3 +3737,74 @@ SELECT * FROM leaker_1(true);
 
 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
+-- Test handling of string literals.
+set standard_conforming_strings = off;
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz';
+  return 'foo\\bar\041baz';
+end
+$$ language plpgsql;
+WARNING:  nonstandard use of \\ in a string literal
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+CONTEXT:  string literal in PL/PgSQL function "strtest" near line 2
+WARNING:  nonstandard use of \\ in a string literal
+LINE 1: SELECT  'foo\\bar\041baz'
+                ^
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+QUERY:  SELECT  'foo\\bar\041baz'
+CONTEXT:  SQL statement in PL/PgSQL function "strtest" near line 3
+select strtest();
+NOTICE:  foo\bar!baz
+WARNING:  nonstandard use of \\ in a string literal
+LINE 1: SELECT  'foo\\bar\041baz'
+                ^
+HINT:  Use the escape string syntax for backslashes, e.g., E'\\'.
+QUERY:  SELECT  'foo\\bar\041baz'
+CONTEXT:  PL/pgSQL function "strtest" line 3 at RETURN
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\bar!baz
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+set standard_conforming_strings = on;
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz\';
+  return 'foo\\bar\041baz\';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\\bar\041baz\
+     strtest      
+------------------
+ foo\\bar\041baz\
+(1 row)
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+select strtest();
+NOTICE:  foo\bar!baz
+   strtest   
+-------------
+ foo\bar!baz
+(1 row)
+
+drop function strtest();
index d9026bd..3dcfc9e 100644 (file)
@@ -3005,3 +3005,47 @@ SELECT * FROM leaker_1(true);
 
 DROP FUNCTION leaker_1(bool);
 DROP FUNCTION leaker_2(bool);
+
+-- Test handling of string literals.
+
+set standard_conforming_strings = off;
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz';
+  return 'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+set standard_conforming_strings = on;
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice 'foo\\bar\041baz\';
+  return 'foo\\bar\041baz\';
+end
+$$ language plpgsql;
+
+select strtest();
+
+create or replace function strtest() returns text as $$
+begin
+  raise notice E'foo\\bar\041baz';
+  return E'foo\\bar\041baz';
+end
+$$ language plpgsql;
+
+select strtest();
+
+drop function strtest();