From 20aea2ec7b278e664e420172160073ccc2ad24ad Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Mon, 5 May 2003 16:46:28 +0000 Subject: [PATCH] Alter plpgsql's lexer so that yylineno and yymore are not used. This avoids 'input buffer overflow' failure on long literals, improves performance, gives the right answer for line position in functions containing multiline literals, suppresses annoying compiler warnings, and generally is so much better I wonder why we didn't do it before. --- src/pl/plpgsql/src/gram.y | 51 ++++++++-------- src/pl/plpgsql/src/pl_comp.c | 17 +++--- src/pl/plpgsql/src/plpgsql.h | 13 ++-- src/pl/plpgsql/src/scan.l | 139 ++++++++++++++++++++++++++++--------------- 4 files changed, 132 insertions(+), 88 deletions(-) diff --git a/src/pl/plpgsql/src/gram.y b/src/pl/plpgsql/src/gram.y index dd15cf80a2..ef7b934f25 100644 --- a/src/pl/plpgsql/src/gram.y +++ b/src/pl/plpgsql/src/gram.y @@ -4,7 +4,7 @@ * procedural language * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/gram.y,v 1.42 2003/04/27 22:21:22 tgl Exp $ + * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/gram.y,v 1.43 2003/05/05 16:46:27 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -334,7 +334,7 @@ decl_statement : decl_varname decl_const decl_datatype decl_notnull decl_defval /* Composite type --- treat as rowtype */ PLpgSQL_row *row; - row = build_rowtype($3->typrelid); + row = plpgsql_build_rowtype($3->typrelid); row->dtype = PLPGSQL_DTYPE_ROW; row->refname = $1.name; row->lineno = $1.lineno; @@ -486,7 +486,7 @@ decl_cursor_arglist : decl_cursor_arg new->dtype = PLPGSQL_DTYPE_ROW; new->refname = strdup("*internal*"); - new->lineno = yylineno; + new->lineno = plpgsql_scanner_lineno(); new->rowtypeclass = InvalidOid; /* * We make temporary fieldnames/varnos arrays that @@ -553,7 +553,7 @@ decl_aliasitem : T_WORD nsi = plpgsql_ns_lookup(name, NULL); if (nsi == NULL) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "function has no parameter %s", name); } @@ -578,7 +578,7 @@ decl_varname : T_WORD plpgsql_convert_ident(yytext, &name, 1); /* name should be malloc'd for use as varname */ $$.name = strdup(name); - $$.lineno = yylineno; + $$.lineno = plpgsql_scanner_lineno(); pfree(name); } ; @@ -625,7 +625,7 @@ decl_defval : ';' PLpgSQL_dstring ds; PLpgSQL_expr *expr; - lno = yylineno; + lno = plpgsql_scanner_lineno(); expr = malloc(sizeof(PLpgSQL_expr)); plpgsql_dstring_init(&ds); plpgsql_dstring_append(&ds, "SELECT "); @@ -1034,7 +1034,7 @@ fori_varname : T_VARIABLE plpgsql_convert_ident(yytext, &name, 1); /* name should be malloc'd for use as varname */ $$.name = strdup(name); - $$.lineno = yylineno; + $$.lineno = plpgsql_scanner_lineno(); pfree(name); } | T_WORD @@ -1044,7 +1044,7 @@ fori_varname : T_VARIABLE plpgsql_convert_ident(yytext, &name, 1); /* name should be malloc'd for use as varname */ $$.name = strdup(name); - $$.lineno = yylineno; + $$.lineno = plpgsql_scanner_lineno(); pfree(name); } ; @@ -1405,7 +1405,7 @@ stmt_open : K_OPEN lno cursor_varptr if (tok != '(') { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "cursor %s has arguments", $3->refname); } @@ -1427,7 +1427,7 @@ stmt_open : K_OPEN lno cursor_varptr if (strncmp(cp, "SELECT", 6) != 0) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "expected 'SELECT (', got '%s' (internal error)", new->argquery->query); } @@ -1436,7 +1436,7 @@ stmt_open : K_OPEN lno cursor_varptr cp++; if (*cp != '(') { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "expected 'SELECT (', got '%s' (internal error)", new->argquery->query); } @@ -1454,13 +1454,13 @@ stmt_open : K_OPEN lno cursor_varptr if (tok == '(') { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "cursor %s has no arguments", $3->refname); } if (tok != ';') { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "syntax error at \"%s\"", yytext); } } @@ -1502,7 +1502,7 @@ cursor_varptr : T_VARIABLE if (((PLpgSQL_var *) yylval.variable)->datatype->typoid != REFCURSOROID) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "%s must be of type cursor or refcursor", ((PLpgSQL_var *) yylval.variable)->refname); } @@ -1517,7 +1517,7 @@ cursor_variable : T_VARIABLE if (((PLpgSQL_var *) yylval.variable)->datatype->typoid != REFCURSOROID) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "%s must be of type refcursor", ((PLpgSQL_var *) yylval.variable)->refname); } @@ -1583,8 +1583,7 @@ opt_lblname : T_WORD lno : { - plpgsql_error_lineno = yylineno; - $$ = yylineno; + $$ = plpgsql_error_lineno = plpgsql_scanner_lineno(); } ; @@ -1618,7 +1617,7 @@ read_sql_construct(int until, char buf[32]; PLpgSQL_expr *expr; - lno = yylineno; + lno = plpgsql_scanner_lineno(); plpgsql_dstring_init(&ds); plpgsql_dstring_append(&ds, (char *) sqlstart); @@ -1690,7 +1689,7 @@ read_datatype(int tok) bool needspace = false; int parenlevel = 0; - lno = yylineno; + lno = plpgsql_scanner_lineno(); /* Often there will be a lookahead token, but if not, get one */ if (tok == YYEMPTY) @@ -1769,14 +1768,14 @@ make_select_stmt(void) break; if (tok == 0) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "unexpected end of file"); } if (tok == K_INTO) { if (have_into) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "INTO specified more than once"); } tok = yylex(); @@ -1814,7 +1813,7 @@ make_select_stmt(void) break; default: - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "plpgsql: %s is not a variable", yytext); } @@ -1824,7 +1823,7 @@ make_select_stmt(void) row = malloc(sizeof(PLpgSQL_row)); row->dtype = PLPGSQL_DTYPE_ROW; row->refname = strdup("*internal*"); - row->lineno = yylineno; + row->lineno = plpgsql_scanner_lineno(); row->rowtypeclass = InvalidOid; row->nfields = nfields; row->fieldnames = malloc(sizeof(char *) * nfields); @@ -1945,7 +1944,7 @@ make_fetch_stmt(void) break; default: - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "plpgsql: %s is not a variable", yytext); } @@ -1955,7 +1954,7 @@ make_fetch_stmt(void) row = malloc(sizeof(PLpgSQL_row)); row->dtype = PLPGSQL_DTYPE_ROW; row->refname = strdup("*internal*"); - row->lineno = yylineno; + row->lineno = plpgsql_scanner_lineno(); row->rowtypeclass = InvalidOid; row->nfields = nfields; row->fieldnames = malloc(sizeof(char *) * nfields); @@ -2028,7 +2027,7 @@ check_assignable(PLpgSQL_datum *datum) case PLPGSQL_DTYPE_VAR: if (((PLpgSQL_var *) datum)->isconst) { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "%s is declared CONSTANT", ((PLpgSQL_var *) datum)->refname); } diff --git a/src/pl/plpgsql/src/pl_comp.c b/src/pl/plpgsql/src/pl_comp.c index 5c88761e05..d62b237f11 100644 --- a/src/pl/plpgsql/src/pl_comp.c +++ b/src/pl/plpgsql/src/pl_comp.c @@ -3,7 +3,7 @@ * procedural language * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.57 2003/04/27 22:21:22 tgl Exp $ + * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/pl_comp.c,v 1.58 2003/05/05 16:46:27 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -141,7 +141,8 @@ plpgsql_compile(Oid fn_oid, int functype) procStruct = (Form_pg_proc) GETSTRUCT(procTup); proc_source = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(&procStruct->prosrc))); - plpgsql_setinput(proc_source, functype); + plpgsql_scanner_init(proc_source, functype); + pfree(proc_source); plpgsql_error_funcname = pstrdup(NameStr(procStruct->proname)); plpgsql_error_lineno = 0; @@ -258,7 +259,7 @@ plpgsql_compile(Oid fn_oid, int functype) * For tuple type parameters, we set up a record of * that type */ - row = build_rowtype(typeStruct->typrelid); + row = plpgsql_build_rowtype(typeStruct->typrelid); row->refname = strdup(buf); @@ -496,6 +497,8 @@ plpgsql_compile(Oid fn_oid, int functype) if (parse_rc != 0) elog(ERROR, "plpgsql: parser returned %d ???", parse_rc); + plpgsql_scanner_finish(); + /* * If that was successful, complete the functions info. */ @@ -1200,7 +1203,7 @@ plpgsql_parse_wordrowtype(char *word) /* * Build and return the complete row definition */ - plpgsql_yylval.row = build_rowtype(classOid); + plpgsql_yylval.row = plpgsql_build_rowtype(classOid); pfree(cp[0]); pfree(cp[1]); @@ -1241,7 +1244,7 @@ plpgsql_parse_dblwordrowtype(char *word) /* * Build and return the complete row definition */ - plpgsql_yylval.row = build_rowtype(classOid); + plpgsql_yylval.row = plpgsql_build_rowtype(classOid); pfree(cp); @@ -1252,7 +1255,7 @@ plpgsql_parse_dblwordrowtype(char *word) * Build a rowtype data structure given the pg_class OID. */ PLpgSQL_row * -build_rowtype(Oid classOid) +plpgsql_build_rowtype(Oid classOid) { PLpgSQL_row *row; HeapTuple classtup; @@ -1494,6 +1497,6 @@ plpgsql_add_initdatums(int **varnos) void plpgsql_yyerror(const char *s) { - plpgsql_error_lineno = plpgsql_yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "%s at or near \"%s\"", s, plpgsql_yytext); } diff --git a/src/pl/plpgsql/src/plpgsql.h b/src/pl/plpgsql/src/plpgsql.h index 1140cebddc..e10dd18ac8 100644 --- a/src/pl/plpgsql/src/plpgsql.h +++ b/src/pl/plpgsql/src/plpgsql.h @@ -3,7 +3,7 @@ * procedural language * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.35 2003/04/27 22:21:22 tgl Exp $ + * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/plpgsql.h,v 1.36 2003/05/05 16:46:28 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -573,13 +573,10 @@ extern PLpgSQL_datum **plpgsql_Datums; extern int plpgsql_error_lineno; extern char *plpgsql_error_funcname; -/* linkage to the real yytext and yylineno variables */ +/* linkage to the real yytext variable */ extern char *plpgsql_base_yytext; #define plpgsql_yytext plpgsql_base_yytext -extern int plpgsql_base_yylineno; - -#define plpgsql_yylineno plpgsql_base_yylineno extern PLpgSQL_function *plpgsql_curr_compile; @@ -601,7 +598,7 @@ extern int plpgsql_parse_tripwordtype(char *word); extern int plpgsql_parse_wordrowtype(char *word); extern int plpgsql_parse_dblwordrowtype(char *word); extern PLpgSQL_type *plpgsql_parse_datatype(char *string); -extern PLpgSQL_row *build_rowtype(Oid classOid); +extern PLpgSQL_row *plpgsql_build_rowtype(Oid classOid); extern void plpgsql_adddatum(PLpgSQL_datum * new); extern int plpgsql_add_initdatums(int **varnos); extern void plpgsql_yyerror(const char *s); @@ -660,6 +657,8 @@ extern int plpgsql_yyparse(void); extern int plpgsql_base_yylex(void); extern int plpgsql_yylex(void); extern void plpgsql_push_back_token(int token); -extern void plpgsql_setinput(char *s, int functype); +extern int plpgsql_scanner_lineno(void); +extern void plpgsql_scanner_init(const char *str, int functype); +extern void plpgsql_scanner_finish(void); #endif /* PLPGSQL_H */ diff --git a/src/pl/plpgsql/src/scan.l b/src/pl/plpgsql/src/scan.l index 7e89ca8309..5f7e162f08 100644 --- a/src/pl/plpgsql/src/scan.l +++ b/src/pl/plpgsql/src/scan.l @@ -4,7 +4,7 @@ * procedural language * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/Attic/scan.l,v 1.24 2002/11/07 06:06:17 tgl Exp $ + * $Header: /cvsroot/pgsql/src/pl/plpgsql/src/Attic/scan.l,v 1.25 2003/05/05 16:46:28 tgl Exp $ * * This software is copyrighted by Jan Wieck - Hamburg. * @@ -39,20 +39,26 @@ #include "plpgsql.h" -static char *plpgsql_source; -static int plpgsql_bytes_left; +/* No reason to constrain amount of data slurped */ +#define YY_READ_BUF_SIZE 16777216 + +/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */ +#define fprintf(file, fmt, msg) ereport(FATAL, (errmsg_internal("%s", msg))) + +/* Handles to the buffer that the lexer uses internally */ +static YY_BUFFER_STATE scanbufhandle; +static char *scanbuf; + static int scanner_functype; static int scanner_typereported; static int pushback_token; static bool have_pushback_token; static int lookahead_token; static bool have_lookahead_token; +static const char *cur_line_start; +static int cur_line_num; int plpgsql_SpaceScanned = 0; - -static void plpgsql_input(char *buf, int *result, int max); - -#define YY_INPUT(buf,res,max) plpgsql_input(buf, &res, max) %} %option 8bit @@ -60,7 +66,6 @@ static void plpgsql_input(char *buf, int *result, int max); %option nounput %option noyywrap -%option yylineno %option case-insensitive @@ -78,11 +83,12 @@ space [ \t\n\r\f] %% /* ---------- - * Local variable in scanner to remember where + * Local variables in scanner to remember where * a string or comment started * ---------- */ int start_lineno = 0; + char *start_charpos = NULL; /* ---------- * Reset the state when entering the scanner @@ -185,7 +191,7 @@ dump { return O_DUMP; } {digit}+ { return T_NUMBER; } \". { - plpgsql_error_lineno = yylineno; + plpgsql_error_lineno = plpgsql_scanner_lineno(); elog(ERROR, "unterminated quoted identifier"); } @@ -201,7 +207,7 @@ dump { return O_DUMP; } */ --[^\r\n]* ; -\/\* { start_lineno = yylineno; +\/\* { start_lineno = plpgsql_scanner_lineno(); BEGIN IN_COMMENT; } \*\/ { BEGIN INITIAL; plpgsql_SpaceScanned = 1; } @@ -214,22 +220,30 @@ dump { return O_DUMP; } /* ---------- * Collect anything inside of ''s and return one STRING + * + * Hacking yytext/yyleng here lets us avoid using yymore(), which is + * a win for performance. It's safe because we know the underlying + * input buffer is not changing. * ---------- */ -' { start_lineno = yylineno; +' { + start_lineno = plpgsql_scanner_lineno(); + start_charpos = yytext; BEGIN IN_STRING; - yymore(); } -\\. | -'' { yymore(); } -' { BEGIN INITIAL; +\\. { } +'' { } +' { + yyleng -= (yytext - start_charpos); + yytext = start_charpos; + BEGIN INITIAL; return T_STRING; } <> { plpgsql_error_lineno = start_lineno; elog(ERROR, "unterminated string"); } -[^'\\]* { yymore(); } +[^'\\]* { } /* ---------- * Any unmatched character is returned as is @@ -240,26 +254,6 @@ dump { return O_DUMP; } %% -static void -plpgsql_input(char *buf, int *result, int max) -{ - int n = max; - - if (n > plpgsql_bytes_left) - n = plpgsql_bytes_left; - - if (n == 0) - { - *result = YY_NULL; - return; - } - - *result = n; - memcpy(buf, plpgsql_source, n); - plpgsql_source += n; - plpgsql_bytes_left -= n; -} - /* * This is the yylex routine called from outside. It exists to provide * a pushback facility, as well as to allow us to parse syntax that @@ -319,17 +313,35 @@ plpgsql_push_back_token(int token) have_pushback_token = true; } +/* + * Get the line number at which the current token ends. This substitutes + * for flex's very poorly implemented yylineno facility. + * + * We assume that flex has written a '\0' over the character following the + * current token in scanbuf. So, we just have to count the '\n' characters + * before that. We optimize this a little by keeping track of the last + * '\n' seen so far. + */ +int +plpgsql_scanner_lineno(void) +{ + const char *c; + + while ((c = strchr(cur_line_start, '\n')) != NULL) + { + cur_line_start = c + 1; + cur_line_num++; + } + return cur_line_num; +} /* - * Initialize the scanner for new input. + * Called before any actual parsing is done */ void -plpgsql_setinput(char *source, int functype) +plpgsql_scanner_init(const char *str, int functype) { - yyrestart(NULL); - yylineno = 1; - - plpgsql_source = source; + Size slen; /*---------- * Hack: skip any initial newline, so that in the common coding layout @@ -339,16 +351,47 @@ plpgsql_setinput(char *source, int functype) * we will think "line 1" is what the programmer thinks of as line 1. *---------- */ - if (*plpgsql_source == '\r') - plpgsql_source++; - if (*plpgsql_source == '\n') - plpgsql_source++; + if (*str == '\r') + str++; + if (*str == '\n') + str++; - plpgsql_bytes_left = strlen(plpgsql_source); + slen = strlen(str); + /* + * Might be left over after ereport() + */ + if (YY_CURRENT_BUFFER) + yy_delete_buffer(YY_CURRENT_BUFFER); + + /* + * Make a scan buffer with special termination needed by flex. + */ + scanbuf = palloc(slen + 2); + memcpy(scanbuf, str, slen); + scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR; + scanbufhandle = yy_scan_buffer(scanbuf, slen + 2); + + /* Other setup */ scanner_functype = functype; scanner_typereported = 0; have_pushback_token = false; have_lookahead_token = false; + + cur_line_start = scanbuf; + cur_line_num = 1; + + BEGIN(INITIAL); +} + + +/* + * Called after parsing is done to clean up after plpgsql_scanner_init() + */ +void +plpgsql_scanner_finish(void) +{ + yy_delete_buffer(scanbufhandle); + pfree(scanbuf); } -- 2.11.0