OSDN Git Service

Added new version of ecpg's parser generator script. This one was written by
authorMichael Meskes <meskes@postgresql.org>
Thu, 3 Mar 2011 12:43:50 +0000 (13:43 +0100)
committerMichael Meskes <meskes@postgresql.org>
Thu, 3 Mar 2011 12:43:50 +0000 (13:43 +0100)
Andy Colson <andy@squeakycode.net>.

src/interfaces/ecpg/preproc/Makefile
src/interfaces/ecpg/preproc/README.parser
src/interfaces/ecpg/preproc/ecpg.trailer
src/interfaces/ecpg/preproc/parse2.pl [new file with mode: 0644]

index 8e85fe9..36b3c26 100644 (file)
@@ -57,8 +57,8 @@ else
        @$(missing) flex $< $@
 endif
 
-preproc.y: ../../../backend/parser/gram.y parse.pl ecpg.addons ecpg.header ecpg.tokens ecpg.trailer ecpg.type
-       $(PERL) $(srcdir)/parse.pl $(srcdir) < $< > $@
+preproc.y: ../../../backend/parser/gram.y parse2.pl ecpg.addons ecpg.header ecpg.tokens ecpg.trailer ecpg.type
+       $(PERL) $(srcdir)/parse2.pl $(srcdir) < $< > $@
        $(PERL) $(srcdir)/check_rules.pl $(srcdir) $<
 
 ecpg_keywords.o c_keywords.o keywords.o preproc.o parser.o: preproc.h
index ddc3061..6d6daac 100644 (file)
@@ -3,7 +3,7 @@ ECPG modifies and extends the core grammar in a way that
    defined in ecpg.tokens, types are defined in ecpg.type
 2) most tokens from the core grammar are simply converted
    to literals concatenated together to form the SQL string
-   passed to the server, this is done by parse.pl.
+   passed to the server, this is done by parse2.pl.
 3) some rules need side-effects, actions are either added
    or completely overridden (compared to the basic token
    concatenation) for them, these are defined in ecpg.addons,
@@ -20,7 +20,7 @@ rules concatenated together. e.g. if gram.y has this:
        ruleA: tokenA tokenB tokenC {...}
 then "dumpedtokens" is "ruleAtokenAtokenBtokenC".
 "postfix" above can be:
-a) "block" - the automatic rule created by parse.pl is completely
+a) "block" - the automatic rule created by parse2.pl is completely
     overridden, the code block has to be written completely as
     it were in a plain bison grammar
 b) "rule" - the automatic rule is extended on, so new syntaxes
index 4e0c52c..3a941ce 100644 (file)
@@ -1591,7 +1591,7 @@ ECPGCKeywords: S_AUTO                             { $$ = mm_strdup("auto"); }
  * CONNECTION can be added back in all_unreserved_keyword, but CURRENT and
  * INPUT are reserved for ecpg purposes.
  *
- * The mentioned exclusions are done by $replace_line settings in parse.pl.
+ * The mentioned exclusions are done by $replace_line settings in parse2.pl.
  */
 all_unreserved_keyword: unreserved_keyword     { $$ = $1; }
                | ECPGunreserved_interval                       { $$ = $1; }
diff --git a/src/interfaces/ecpg/preproc/parse2.pl b/src/interfaces/ecpg/preproc/parse2.pl
new file mode 100644 (file)
index 0000000..dd50937
--- /dev/null
@@ -0,0 +1,648 @@
+#!/usr/bin/perl
+# src/interfaces/ecpg/preproc/parse2.pl
+# parser generater for ecpg version 2
+# call with backend parser as stdin
+#
+# Copyright (c) 2007-2011, PostgreSQL Global Development Group
+#
+# Written by Mike Aubury <mike.aubury@aubit.com>
+#           Michael Meskes <meskes@postgresql.org>
+#            Andy Colson <andy@squeakycode.net>   
+#
+# Placed under the same license as PostgreSQL.
+#
+
+use strict;
+use warnings;
+no warnings 'uninitialized';
+
+my $path = shift @ARGV;
+$path = "." unless $path;
+
+my $copymode              = 0;
+my $brace_indent          = 0;
+my $yaccmode              = 0;
+my $header_included       = 0;
+my $feature_not_supported = 0;
+my $tokenmode             = 0;
+
+my(%buff, $infield, $comment, %tokens, %addons );
+my($stmt_mode, @fields);
+my($line, $non_term_id);
+
+
+# some token have to be replaced by other symbols
+# either in the rule
+my %replace_token = (
+       'BCONST' => 'ecpg_bconst',
+       'FCONST' => 'ecpg_fconst',
+       'Sconst' => 'ecpg_sconst',
+       'IDENT'  => 'ecpg_ident',
+       'PARAM'  => 'ecpg_param',
+);
+
+# or in the block
+my %replace_string = (
+       'WITH_TIME'    => 'with time',
+       'NULLS_FIRST'  => 'nulls first',
+       'NULLS_LAST'   => 'nulls last',
+       'TYPECAST'     => '::',
+       'DOT_DOT'      => '..',
+       'COLON_EQUALS' => ':=',
+);
+
+# specific replace_types for specific non-terminals - never include the ':'
+# ECPG-only replace_types are defined in ecpg-replace_types
+my %replace_types = (
+       'PrepareStmt'      => '<prep>',
+       'opt_array_bounds' => '<index>',
+
+       # "ignore" means: do not create type and rules for this non-term-id
+       'stmtblock'          => 'ignore',
+       'stmtmulti'          => 'ignore',
+       'CreateAsStmt'       => 'ignore',
+       'DeallocateStmt'     => 'ignore',
+       'ColId'              => 'ignore',
+       'type_function_name' => 'ignore',
+       'ColLabel'           => 'ignore',
+       'Sconst'             => 'ignore',
+);
+
+# these replace_line commands excise certain keywords from the core keyword
+# lists.  Be sure to account for these in ColLabel and related productions.
+my %replace_line = (
+       'unreserved_keywordCONNECTION' => 'ignore',
+       'unreserved_keywordCURRENT_P'  => 'ignore',
+       'unreserved_keywordDAY_P'      => 'ignore',
+       'unreserved_keywordHOUR_P'     => 'ignore',
+       'unreserved_keywordINPUT_P'    => 'ignore',
+       'unreserved_keywordMINUTE_P'   => 'ignore',
+       'unreserved_keywordMONTH_P'    => 'ignore',
+       'unreserved_keywordSECOND_P'   => 'ignore',
+       'unreserved_keywordYEAR_P'     => 'ignore',
+       'col_name_keywordCHAR_P'       => 'ignore',
+       'col_name_keywordINT_P'        => 'ignore',
+       'col_name_keywordVALUES'       => 'ignore',
+       'reserved_keywordTO'           => 'ignore',
+       'reserved_keywordUNION'        => 'ignore',
+
+       # some other production rules have to be ignored or replaced
+       'fetch_argsFORWARDopt_from_incursor_name'      => 'ignore',
+       'fetch_argsBACKWARDopt_from_incursor_name'     => 'ignore',
+       "opt_array_boundsopt_array_bounds'['Iconst']'" => 'ignore',
+       'VariableShowStmtSHOWvar_name'                 => 'SHOW var_name ecpg_into',
+       'VariableShowStmtSHOWTIMEZONE' => 'SHOW TIME ZONE ecpg_into',
+       'VariableShowStmtSHOWTRANSACTIONISOLATIONLEVEL' => 'SHOW TRANSACTION ISOLATION LEVEL ecpg_into',
+       'VariableShowStmtSHOWSESSIONAUTHORIZATION' => 'SHOW SESSION AUTHORIZATION ecpg_into',
+       'returning_clauseRETURNINGtarget_list' => 'RETURNING target_list ecpg_into',
+       'ExecuteStmtEXECUTEnameexecute_param_clause' => 'EXECUTE prepared_name execute_param_clause execute_rest',
+       'ExecuteStmtCREATEOptTempTABLEcreate_as_targetASEXECUTEnameexecute_param_clause' => 
+                       'CREATE OptTemp TABLE create_as_target AS EXECUTE prepared_name execute_param_clause',
+       'PrepareStmtPREPAREnameprep_type_clauseASPreparableStmt' => 
+                       'PREPARE prepared_name prep_type_clause AS PreparableStmt',
+       'var_nameColId' => 'ECPGColId',
+);
+
+preload_addons();
+
+main();
+
+dump_buffer('header');
+dump_buffer('tokens');
+dump_buffer('types');
+dump_buffer('ecpgtype');
+dump_buffer('orig_tokens');
+print '%%', "\n";
+print 'prog: statements;', "\n";
+dump_buffer('rules');
+include_file( 'trailer', 'ecpg.trailer' );
+dump_buffer('trailer');
+
+sub main
+{
+       line: while (<>) 
+       {
+               if (/ERRCODE_FEATURE_NOT_SUPPORTED/) 
+               {
+                       $feature_not_supported = 1;
+                       next line;
+               }
+
+               chomp; 
+               
+               # comment out the line below to make the result file match (blank line wise)
+               # the prior version. 
+               #next if ($_ eq '');
+
+               # Dump the action for a rule -
+               # stmt_mode indicates if we are processing the 'stmt:'
+               # rule (mode==0 means normal,  mode==1 means stmt:)
+               # flds are the fields to use. These may start with a '$' - in
+               # which case they are the result of a previous non-terminal
+               #
+               # if they dont start with a '$' then they are token name
+               #
+               # len is the number of fields in flds...
+               # leadin is the padding to apply at the beginning (just use for formatting)
+
+               if (/^%%/) {
+                       $tokenmode = 2;
+                       $copymode  = 1;
+                       $yaccmode++;
+                       $infield    = 0;
+               }
+
+               my $prec = 0;
+
+               # Make sure any braces are split
+               s/{/ { /g;
+               s/}/ } /g;
+
+               # Any comments are split
+               s|\/\*| /* |g;
+               s|\*\/| */ |g;
+
+               # Now split the line into individual fields
+               my @arr = split(' ');
+
+               if ( $arr[0] eq '%token' && $tokenmode == 0 ) 
+               {
+                       $tokenmode = 1;
+                       include_file( 'tokens', 'ecpg.tokens' );
+               }
+               elsif ( $arr[0] eq '%type' && $header_included == 0 ) 
+               {
+                       include_file( 'header',   'ecpg.header' );
+                       include_file( 'ecpgtype', 'ecpg.type' );
+                       $header_included = 1;
+               }
+
+               if ( $tokenmode == 1 ) 
+               {
+                       my $str = '';
+                       my $prior = '';
+                       for my $a (@arr)
+                       {
+                               if ( $a eq '/*' ) 
+                               {
+                                       $comment++;
+                                       next;
+                               }
+                               if ( $a eq '*/' ) 
+                               {
+                                       $comment--;
+                                       next;
+                               }
+                               if ($comment) 
+                               {
+                                       next;
+                               }
+                               if ( substr( $a, 0, 1 ) eq '<' ) {
+                                       next;
+
+                                       # its a type
+                               }
+                               $tokens{ $a } = 1;
+
+                               $str = $str . ' ' . $a;
+                               if ( $a eq 'IDENT' && $prior eq '%nonassoc' ) 
+                               {
+                                       # add two more tokens to the list
+                                       $str = $str . "\n%nonassoc CSTRING\n%nonassoc UIDENT";
+                               }
+                               $prior = $a;
+                       }
+                       add_to_buffer( 'orig_tokens', $str );
+                       next line;
+               }
+
+               # Dont worry about anything if we're not in the right section of gram.y
+               if ( $yaccmode != 1 ) 
+               {
+                       next line;
+               }
+               
+
+               # Go through each field in turn
+               for (my $fieldIndexer = 0 ; $fieldIndexer < scalar(@arr) ; $fieldIndexer++ ) 
+               {
+                       if ( $arr[$fieldIndexer] eq '*/' && $comment ) 
+                       {
+                               $comment = 0;
+                               next;
+                       }
+                       elsif ($comment) 
+                       {
+                               next;
+                       }
+                       elsif ( $arr[$fieldIndexer] eq '/*' ) 
+                       {
+                               # start of a multiline comment
+                               $comment = 1;
+                               next;
+                       }
+                       elsif ( $arr[$fieldIndexer] eq '//' ) 
+                       {
+                               next line;
+                       }
+                       elsif ( $arr[$fieldIndexer] eq '}' ) 
+                       {
+                               $brace_indent--;
+                               next;
+                       }
+                       elsif ( $arr[$fieldIndexer] eq '{' ) 
+                       {
+                               $brace_indent++;
+                               next;
+                       }
+
+                       if ( $brace_indent > 0 ) 
+                       {
+                               next;
+                       }
+                       if ( $arr[$fieldIndexer] eq ';' ) 
+                       {
+                               if ($copymode) 
+                               {
+                                       if ( $infield ) 
+                                       {
+                                               dump_line( $stmt_mode, \@fields );
+                                       }
+                                       add_to_buffer( 'rules', ";\n\n" );
+                               }
+                               else 
+                               {
+                                       $copymode = 1;
+                               }
+                               @fields   = ();
+                               $infield  = 0;
+                               $line     = '';
+                               next;
+                       }
+
+                       if ( $arr[$fieldIndexer] eq '|' ) 
+                       {
+                               if ($copymode) 
+                               {
+                                       if ( $infield  ) 
+                                       {
+                                               $infield = $infield + dump_line( $stmt_mode, \@fields );
+                                       }
+                                       if ( $infield > 1 ) 
+                                       {
+                                               $line = '| ';
+                                       }
+                               }
+                               @fields = ();
+                               next;
+                       }
+
+                       if ( exists $replace_token{ $arr[$fieldIndexer] } ) 
+                       {
+                               $arr[$fieldIndexer] = $replace_token{ $arr[$fieldIndexer] };
+                       }
+
+                       # Are we looking at a declaration of a non-terminal ?
+                       if ( ( $arr[$fieldIndexer] =~ /[A-Za-z0-9]+:/ )
+                               || $arr[ $fieldIndexer + 1 ] eq ':' )
+                       {
+                               $non_term_id = $arr[$fieldIndexer];
+                               $non_term_id =~ tr/://d;
+
+                               if ( not defined $replace_types{$non_term_id} ) 
+                               {
+                                       $replace_types{$non_term_id} = '<str>';
+                                       $copymode = 1;
+                               }
+                               elsif ( $replace_types{$non_term_id} eq 'ignore' ) 
+                               {
+                                       $copymode = 0;
+                                       $line     = '';
+                                       next line;
+                               }
+                               $line = $line . ' ' . $arr[$fieldIndexer];
+
+                               # Do we have the : attached already ?
+                               # If yes, we'll have already printed the ':'
+                               if ( !( $arr[$fieldIndexer] =~ '[A-Za-z0-9]+:' ) ) 
+                               {
+                                       # Consume the ':' which is next...
+                                       $line = $line . ':';
+                                       $fieldIndexer++;
+                               }
+
+                               # Special mode?
+                               if ( $non_term_id eq 'stmt' ) 
+                               {
+                                       $stmt_mode = 1;
+                               }
+                               else 
+                               {
+                                       $stmt_mode = 0;
+                               }
+                               my $tstr = '%type ' . $replace_types{$non_term_id} . ' ' . $non_term_id;
+                               add_to_buffer( 'types', $tstr );
+
+                               if ($copymode) 
+                               {
+                                       add_to_buffer( 'rules', $line );
+                               }
+                               $line     = '';
+                               @fields   = ();
+                               $infield  = 1;
+                               next;
+                       }
+                       elsif ($copymode) {
+                               $line = $line . ' ' . $arr[$fieldIndexer];
+                       }
+                       if ( $arr[$fieldIndexer] eq '%prec' ) 
+                       {
+                               $prec = 1;
+                               next;
+                       }
+
+                       if (   $copymode
+                               && !$prec
+                               && !$comment
+                               && length( $arr[$fieldIndexer] )
+                               && $infield )
+                       {
+                               if (
+                                       $arr[$fieldIndexer] ne 'Op'
+                                       && ( $tokens{ $arr[$fieldIndexer] } > 0 || $arr[$fieldIndexer] =~ /'.+'/ )
+                                       || $stmt_mode == 1
+                                 )
+                               {
+                                       my $S;
+                                       if ( exists $replace_string{ $arr[$fieldIndexer] } ) 
+                                       {
+                                               $S = $replace_string{ $arr[$fieldIndexer] };
+                                       }
+                                       else 
+                                       {
+                                               $S = $arr[$fieldIndexer];
+                                       }
+                                       $S =~ s/_P//g;
+                                       $S =~ tr/'//d;
+                                       if ( $stmt_mode == 1 ) 
+                                       {
+                                               push(@fields, $S);
+                                       }
+                                       else 
+                                       {
+                                               push(@fields, lc($S));
+                                       }
+                               }
+                               else 
+                               {
+                                       push(@fields, '$' . (scalar(@fields)+1));
+                               }
+                       }
+               }
+       }
+}
+
+
+# append a file onto a buffer.
+# Arguments:  buffer_name, filename (without path)
+sub include_file 
+{
+       my ($buffer, $filename) = @_;
+       my $full = "$path/$filename";
+       open(my $fh, '<', $full) or die;
+       while ( <$fh> ) 
+       {
+               chomp;
+               add_to_buffer( $buffer, $_ );
+       }
+       close($fh);
+}
+
+sub include_addon
+{
+       my($buffer, $block, $fields, $stmt_mode) = @_;
+       my $rec = $addons{$block};
+       return 0 unless $rec;
+       
+       if ( $rec->{type} eq 'rule' ) 
+       {
+               dump_fields( $stmt_mode, $fields, ' { ' );
+       }
+       elsif ( $rec->{type} eq 'addon' ) 
+       {
+               add_to_buffer( 'rules', ' { ' );
+       }
+
+       #add_to_buffer( $stream, $_ );
+       #We have an array to add to the buffer, we'll add it ourself instead of 
+       #calling add_to_buffer, which does not know about arrays
+       
+       push( @{ $buff{$buffer} }, @{ $rec->{lines} } );
+
+       if ( $rec->{type} eq 'addon' ) 
+       {
+               dump_fields( $stmt_mode, $fields, '' );
+       }
+
+
+       # if we added something (ie there are lines in our array), return 1
+       return 1 if (scalar(@{ $rec->{lines} }) > 0);
+       return 0;
+}
+
+
+# include_addon does this same thing, but does not call this
+# sub... so if you change this, you need to fix include_addon too
+#   Pass:  buffer_name, string_to_append
+sub add_to_buffer 
+{
+       push( @{ $buff{$_[0]} }, "$_[1]\n" );
+}
+
+sub dump_buffer 
+{
+       my($buffer) = @_;
+       print '/* ', $buffer, ' */',"\n";
+       my $ref = $buff{$buffer};
+       print @$ref;
+}
+
+sub dump_fields 
+{
+       my ( $mode, $flds, $ln ) = @_;
+       my $len = scalar(@$flds);
+
+       if ( $mode == 0 ) 
+       {
+               #Normal
+               add_to_buffer( 'rules', $ln );
+               if ( $feature_not_supported == 1 ) 
+               {
+                       # we found an unsupported feature, but we have to
+                       # filter out ExecuteStmt: CREATE OptTemp TABLE ...
+                       # because the warning there is only valid in some situations
+                       if ( $flds->[0] ne 'create' || $flds->[2] ne 'table' ) 
+                       {
+                               add_to_buffer( 'rules', 
+                                       'mmerror(PARSE_ERROR, ET_WARNING, "unsupported feature will be passed to server");'
+                               );
+                       }
+                       $feature_not_supported = 0;
+               }
+
+               if ( $len == 0 ) 
+               {
+                       # We have no fields ?
+                       add_to_buffer( 'rules', ' $$=EMPTY; }' );
+               } 
+               else 
+               {
+                       # Go through each field and try to 'aggregate' the tokens 
+                       # into a single 'mm_strdup' where possible
+                       my @flds_new;
+                       my $str;
+                       for ( my $z = 0 ; $z < $len ; $z++ ) 
+                       {
+                               if ( substr( $flds->[$z], 0, 1 ) eq '$' ) 
+                               {
+                                       push(@flds_new, $flds->[$z]);
+                                       next;
+                               }
+
+                               $str = $flds->[$z];
+
+                               while (1) 
+                               {
+                                       if ( $z >= $len - 1 || substr( $flds->[ $z + 1 ], 0, 1 ) eq '$' )
+                                       {
+                                               # We're at the end...
+                                               push(@flds_new,  "mm_strdup(\"$str\")");
+                                               last;
+                                       }
+                                       $z++;
+                                       $str = $str . ' ' . $flds->[$z];
+                               }
+                       }
+
+                       # So - how many fields did we end up with ?
+                       $len = scalar(@flds_new);
+                       if ( $len == 1 ) 
+                       {
+                               # Straight assignement
+                               $str = ' $$ = ' . $flds_new[0] . ';';
+                               add_to_buffer( 'rules', $str );
+                       }
+                       else 
+                       {
+                               # Need to concatenate the results to form
+                               # our final string
+                               $str = ' $$ = cat_str(' . $len . ',' . join(',', @flds_new) . ');';
+                               add_to_buffer( 'rules', $str );
+                       }
+                       add_to_buffer( 'rules', '}' );
+               }
+       }
+       else
+       {
+               # we're in the stmt: rule
+               if ($len)
+               {
+                       # or just the statement ...
+                       add_to_buffer( 'rules', ' { output_statement($1, 0, ECPGst_normal); }' );
+               }
+               else
+               {
+                       add_to_buffer( 'rules', ' { $$ = NULL; }' );
+               }
+       }
+}
+
+
+sub dump_line 
+{
+       my($stmt_mode, $fields) = @_;
+       my $block = $non_term_id . $line;
+       $block =~ tr/ |//d;
+       my $rep = $replace_line{$block};
+       if ($rep)
+       {
+               if ($rep eq 'ignore' ) 
+               {
+                       return 0;
+               }
+
+               if ( index( $line, '|' ) != -1 ) 
+               {
+                       $line = '| ' . $rep;
+               }
+               else 
+               {
+                       $line = $rep;
+               }
+               $block = $non_term_id . $line;
+               $block =~ tr/ |//d;
+       }
+       add_to_buffer( 'rules', $line );
+       my $i = include_addon( 'rules', $block, $fields, $stmt_mode);
+       if ( $i == 0 ) 
+       {
+               dump_fields( $stmt_mode, $fields, ' { ' );
+       }
+       return 1;
+}
+
+=top
+       load addons into cache
+       %addons = { 
+               stmtClosePortalStmt => { 'type' => 'block', 'lines' => [ "{", "if (INFORMIX_MODE)" ..., "}" ] },
+               stmtViewStmt => { 'type' => 'rule', 'lines' => [ "| ECPGAllocateDescr", ... ] }
+       }
+
+=cut
+sub preload_addons
+{
+       my $filename = $path . "/ecpg.addons";
+       open(my $fh, '<', $filename) or die;
+       # there may be multple lines starting ECPG: and then multiple lines of code.
+       # the code need to be add to all prior ECPG records.
+       my (@needsRules, @code, $record);
+       # there may be comments before the first ECPG line, skip them
+       my $skip = 1;
+       while ( <$fh> ) 
+       {
+               if (/^ECPG:\s(\S+)\s?(\w+)?/)
+               {
+                       $skip = 0;
+                       if (@code)
+                       {
+                               for my $x (@needsRules)
+                               {
+                                       push(@{ $x->{lines} }, @code);
+                               }
+                               @code = ();
+                               @needsRules = ();
+                       }
+                       $record = {};
+                       $record->{type} = $2;
+                       $record->{lines} = [];
+                       if (exists $addons{$1}) { die "Ga! there are dups!\n"; }
+                       $addons{$1} = $record;
+                       push(@needsRules, $record);
+               } 
+               else
+               {
+                       next if $skip;
+                       push(@code, $_);
+               }
+       }
+       close($fh);
+       if (@code)
+       {
+               for my $x (@needsRules)
+               {
+                       push(@{ $x->{lines} }, @code);
+               }
+       }
+}
+
+