2 * Copyright (C) 1991,1992 Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de)
4 * This file is part of NASE A60.
6 * NASE A60 is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
11 * NASE A60 is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with NASE A60; see the file COPYING. If not, write to the Free
18 * Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
22 * Erik Schoenfelder (schoenfr@ibr.cs.tu-bs.de)
24 * The Algol 60 scanner.
26 * a big hack includes the scanning of non quoted keywords:
27 * begin vprint ("nase") end
28 * this cannot be used within identifiers with white spaces:
29 * bad: begin integer foo bar; end
30 * ok: begin integer foobar; end
32 * so scan_strict is set, if a60_strict is set, or if a quoted begin
44 * the linenumber of the scanner; reported as lineno is the last seen
45 * line; (delay for the look-ahead-token)
48 static int scan_lineno;
52 * character test and conversion.
54 #define mischar(c) (((c) >= 'a' && (c) <= 'z') \
55 || ((c) >= 'A' && (c) <= 'Z'))
56 #define misdigit(c) ((c) >= '0' && (c) <= '9')
57 #define misupper(c) ((c) >= 'A' && (c) <= 'Z')
58 #define mtolower(c) (misupper(c) ? (c) - 'A' + 'a' : (c))
59 #define mprintable(c) ((c) >= 32 && (c) <= 126)
65 static int scan_exp ();
66 static void skip_following_comment ();
67 static void skip_end_comment ();
68 static void skip_over_whites ();
69 static int s_getchar ();
70 static void s_unput ();
82 * translate a string into a readable ascii form:
90 static char *buf = (char *) 0;
104 /* maximum is a two char escape-sequence for one input char: */
105 buf = xmalloc ((long) (2 * strlen (s) + 1));
107 for (ptr = buf; len > 0; s++, ptr++, len--) {
110 *ptr++ = '\\', *ptr = 'n';
112 *ptr++ = '\\', *ptr = 'r';
113 else if (mprintable (*s))
116 *ptr++ = '^', *ptr = '@';
117 else if (*s >= 1 && *s <= 26)
118 *ptr++ = '^', *ptr = 'A' + *s - 1;
129 * return a printable string for a single char:
140 return asc_str (tmp, 1);
145 * give a readable string of the scanned input.
152 static char *rval = (char *) 0;
159 rval = xstrdup (asc_str (yytext, -1));
171 * if there is a ``parse error'' or a ``syntax error''
172 * reported from the skeleton, print the scanned string too.
174 if (! strcmp (s, "parse error")
175 || ! strcmp (s, "syntax error")) {
177 a60_error (infname, lineno, "%s (scanned: %s)\n",
181 a60_error (infname, lineno, "%s\n", s);
188 a60_error (infname, lineno, "warning: %s\n", s);
193 * the keywords. (they are expected to be enclosed in ').
196 #define fstrcmp(a, b) \
197 (*(a) != *(b) || strcmp (a, b))
207 { "boolean", TBOOL },
209 /*** { "comment", TCOMMENT }, ***/
219 { "greater", TGREATER },
222 { "integer", TINTEGER },
226 { "notequal", TNOTEQUAL },
227 { "notgreater", TNOTGREATER },
228 { "notless", TNOTLESS },
232 { "procedure", TPROC },
235 { "string", TSTRING },
236 { "switch", TSWITCH },
247 * look for a keyword in the keyword table; if found, return the token,
248 * if not found return 0.
259 lower_str = xmalloc ((long) strlen (s) + 1);
260 for (i = 0; i < strlen (s); i++)
261 lower_str [i] = mtolower(s[i]);
265 for (kp = keywords; kp->name && *kp->name; kp++)
266 if (! fstrcmp (lower_str, kp->name))
276 * the special strings; short constant strings, but no (real) keywords.
279 #define MAX_SPEC 2 /* maximum length of a special */
284 { "+", '+' }, { "-", '-' },
285 { "*", '*' }, { "/", '/' },
286 { ",", ',' }, { ".", '.' },
287 { ";", ';' }, { "(", '(' },
288 { ")", ')' }, { ":", ':' },
289 { "[", '[' }, { "]", ']' },
290 { "..", ':' }, { "(/", '[' },
295 { "<=", TNOTGREATER },
309 static int skip_white;
312 * current linenumber.
321 static char *inbuf, *ib_ptr;
322 static int ib_max, ib_len, ib_eof;
325 * character test (and conversion).
346 if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
354 * case insensitive strncmp:
358 ci_strncmp (s1, s2, n)
369 for (; n > 0; s1++, s2++, n--) {
371 if (mtolower(*s1) != mtolower(*s2))
395 /* hmmmmm - what to do ... */
412 * called one time for initialisation.
421 * allocate the input buffer and the error-text buffer (yytext):
425 inbuf = xmalloc ((long) ib_max);
430 yytext = xmalloc ((long) 100);
434 scan_lineno = lineno = 1;
437 * skip leading whites; the following quote decides...
441 skip_over_whites (c);
446 fprintf (stderr, "will scan in strict a60 manner.\n");
450 s_unput (c); /* flush back */
452 skip_white = scan_strict;
460 int ib_offset = (int) (ib_ptr - inbuf); /* offset into inbuf */
465 printf ("++ inbuf expanded to %ld bytes.\n", (long) ib_max);
467 inbuf = xrealloc (inbuf, (long) ib_max);
469 ib_ptr = inbuf + ib_offset;
483 if (ib_ptr != inbuf) {
485 * cleanup buffer ptr:
488 for (i = 0; i < ib_len; i++)
489 inbuf [i] = ib_ptr [i];
493 fill_ptr = ib_ptr + ib_len;
495 for (i = 0; i < n; i++) {
497 if (ib_ptr + ib_len + 2 >= inbuf + ib_max)
498 expand_inbuf (n + 10);
515 * return the next input character; return 0 at eof.
516 * skip whites, if skip_white == 1;
526 if (! ib_eof && ib_len == 0)
529 if (ib_eof && ib_len == 0)
546 if (yyidx + 2 >= yylen) {
548 yytext = xrealloc (yytext, (long) yylen);
550 yytext [yyidx++] = c;
554 printf ("++ s_getchar: next one: `%s' (scan_lineno %d)\n",
555 ch_str (c), scan_lineno);
569 /* expand about 2 or 3 characters ... (don't care)
570 one for the unget char and one for a trailing '\0'
571 (better for debugging) */
573 if (ib_ptr + ib_len + 3 >= inbuf + ib_max)
576 if (ib_ptr == inbuf) {
578 * shift one char to right.
579 * (the + 1 is for a trailing 0)
581 for (i = ib_len + 1; i > 0; i--)
582 inbuf [i] = inbuf [i-1];
597 printf ("++ s_unput (%c): ib_len %d ; ib now `", c, ib_len);
598 for (i = 0; i < ib_len; i++)
599 printf ("%c", ib_ptr [i]);
600 printf ("' - honk.\n");
634 * still scanned a quote ('); look for keyword.
635 * no keyword is longer than 10 chars ('notgreater'). lets scan max
636 * 20 chars for some context in the yytext string.
644 int kw_len = 0, kwt, c;
648 printf ("++ looking for keyword ...\n");
651 while ((c = s_getchar ()) != 0) {
659 if (kw_len + 2 >= KW_MAX)
667 kwt = get_keyword (keyw);
670 * reported (hopefully) by parser-module:
673 * yyerror ("unknown keyword");
679 printf ("++ got %d from `%s'\n", kwt, keyw);
683 skip_following_comment ();
686 skip_end_comment (1); /* quotes active */
695 static int st_max = 0;
697 int st_len = 0, c, level = 1, krach = 0;
701 printf ("++ looking for string ...\n");
706 while ((c = s_getchar ()) != 0) {
708 if (! krach && (c == '\'' || c == '"'))
710 else if (! krach && c == '`')
716 if (st_len + 2 >= st_max) {
719 str = xmalloc ((long) st_max);
721 str = xrealloc (str, (long) st_max);
724 if (c == 'n') c = '\n';
725 else if (c == 't') c = '\t';
726 else if (c == '"') c = '"';
727 else if (c == '\'') c = '\'';
728 else if (c == '`') c = '`';
742 printf ("++ found `%s'.\n", str);
745 skip_white = scan_strict;
747 yylval.str = xstrdup (str);
757 static int id_max = 0;
763 printf ("++ looking for identifier ...\n");
768 while ((c = s_getchar ()) != 0) {
774 skip_over_whites (c);
778 if (! is_char (c) && ! is_digit (c)) {
783 if (id_len + 2 >= id_max) {
786 ident = xmalloc ((long) id_max);
788 ident = xrealloc (ident, (long) id_max);
790 ident [id_len++] = c;
797 printf ("++ found `%s'.\n", ident);
801 /* how to parse: begin integer a nase; end */
804 * if the string is a keyword, then return the keyword-token
807 int kwt = get_keyword (ident);
811 skip_following_comment ();
814 skip_end_comment (0);
820 /* found an identifier: */
822 yylval.str = xstrdup (ident);
829 * scan the fractional part; num is the full (sp?) part.
837 double frac = 0, pot = 10;
840 while (is_digit (c = s_getchar ())) {
841 frac = frac + (c - '0') / pot;
845 if (c == 'e' || c == 'E')
846 return scan_exp (rval + frac);
849 /* ok - still scanned a real value: */
853 printf ("++ got real %e\n", (double) rval + frac);
858 yylval.rtype = rval + frac;
865 * scan the exponential part; if it is expressed as X '10' Y its done in
866 * a60-parse.y; here a additional form X e Y (or X E Y) is scanned.
887 if (! is_digit (c)) {
888 a60_error (infname, lineno, "malformed exponent.\n");
891 /* scan the exponent : */
894 exp_val = 10 * exp_val + c - '0';
895 } while (is_digit (c = s_getchar ()));
897 rval = rval * pow10 (rsign * exp_val);
903 printf ("++ got real %e\n", (double) rval);
915 * here we have a dot or a digit:
927 printf ("++ looking for number...\n");
931 return scan_frac ((long) 0);
935 while (is_digit (c = s_getchar ()))
936 ival = 10 * ival + c - '0';
939 return scan_frac ((long) ival);
941 if (c == 'e' || c == 'E')
942 return scan_exp ((double) ival);
944 /* ok - still scanned a integer value: */
948 printf ("++ got integer %ld\n", (long) ival);
961 * handle this unknown char; skip input til end-of-line.
968 static int last_line = -1;
970 if (last_line == lineno)
979 if (mprintable(ch)) {
982 a60_error (infname, lineno,
983 "unknown char `%c' found (scanned: %s).\n",
986 a60_error (infname, lineno,
987 "unknown char `%c' found.\n", ch);
991 a60_error (infname, lineno,
992 "unknown char 0x%02x found (scanned: %s).\n",
995 a60_error (infname, lineno,
996 "unknown char 0x%02x found.\n", ch);
999 yyerror ("syntax error");
1001 a60_error (infname, lineno, " [ skipping to next line ]\n");
1009 } while (ch && ch != '\n');
1011 skip_white = scan_strict;
1016 * we've found a ';' or a 'begin'; now look about a following comment
1017 * and skip, if found, to the next semicolon.
1021 skip_following_comment ()
1023 char *str = xmalloc ((long) 100);
1025 int str_len = 0, quoted_comment = 0;
1030 printf ("++ looking for 'comment'...\n");
1035 if (! scan_strict) {
1036 skip_over_whites (c);
1040 quoted_comment = (c == '\'');
1042 if (scan_strict && ! quoted_comment) {
1045 printf ("++ no 'comment'.\n");
1052 if (str_len + 2 >= str_max) {
1054 str = xrealloc (str, str_max);
1056 str [str_len++] = c; /*** mtolower (c); ***/
1062 } while ((quoted_comment && c != '\'')
1063 || (! quoted_comment && c != 't'
1064 && str_len != 6 && ci_strncmp ("commen", str, 6)));
1066 str [str_len++] = c;
1067 str [str_len] = '\0';
1071 printf ("++ examining `%s'...\n", asc_str (str, -1));
1074 if ((quoted_comment && ! ci_strncmp (str+1, "comment", 7))
1075 || ! ci_strncmp ("comment", str, 7)) {
1077 * well done; skip anything til a ';':
1081 printf ("++ skipping 'comment': `");
1083 while ((c = s_getchar ()) != ';') {
1086 printf ("%s", ch_str (c));
1089 #endif /* ! DEBUG */
1094 printf ("' skipped.\n");
1098 * now do this again...
1101 skip_following_comment ();
1106 * no comment; unget and forget:
1110 printf ("++ forgetting `%s'...\n",
1111 asc_str (str, str_len));
1114 s_unput (str [--str_len]);
1122 * scan the end of the string for and 'end' delimiter: 'end' or 'else'
1123 * or ';'; return an ptr to the first char.
1127 end_delim (str, str_len, quoted_comment)
1132 static char *e_delim [4];
1133 char **ptr, *str_ptr;
1137 e_delim [1] = "'end'", e_delim [2] = "'else'";
1139 e_delim [1] = "end", e_delim [2] = "else";
1144 printf ("++ looking for end_delim in `%s': ", str);
1147 for (ptr = e_delim; ptr && *ptr && **ptr; ptr++) {
1148 str_ptr = str + str_len - strlen (*ptr);
1149 if (str_ptr >= str &&
1150 ! strncmp (str_ptr, *ptr, strlen (*ptr))) {
1153 printf (" found %s.\n", *ptr);
1161 printf (" not found.\n");
1169 * got a 'end'; now skip anything until 'end' or 'else' or ';':
1173 skip_end_comment (quoted_comment)
1176 char *str = xmalloc ((long) 10);
1179 char *end_str = str, *ptr;
1184 printf ("++ looking for 'end' comment...\n");
1187 while ((c = s_getchar ()) != 0) {
1189 if (str_len + 2 >= str_max) {
1191 str = xrealloc (str, str_max);
1193 str [str_len++] = mtolower (c);
1196 end_str = end_delim (str, str_len, quoted_comment);
1202 if (! c && str_len > 0) {
1203 yyerror ("EOF reached within 'end' ...");
1213 printf ("++ found behind 'end': `%s'\n",
1214 asc_str (str, str_len));
1216 for (ptr = str+str_len-1; ptr >= end_str; ptr--)
1224 * scan a special; two or one char long.
1232 char str [MAX_SPEC+1];
1233 int i, tok, max_spec_len;
1237 for (i=1; i<MAX_SPEC; i++) {
1238 str [i] = s_getchar ();
1239 if (is_white (str [i])) {
1249 printf ("++ looking for special `%s'...\n", str);
1252 for (i = max_spec_len; i > 0; i--) {
1253 for (kw = special; kw->name && *kw->name; kw++) {
1254 if (! strncmp (kw->name, str, i)) {
1258 printf ("++ got special %d\n", tok);
1261 skip_following_comment ();
1266 s_unput (str [i-1]);
1269 /* Oops - what to do... */
1274 skip_unknown (str[0]);
1281 * the common yylex entry;
1282 * return's the token number or 0 on end of file
1288 static int last_scan_lineno = 1;
1293 if (ib_eof && ib_len == 0) {
1296 printf ("+++ EOF reached...\n");
1303 if (! scan_strict) {
1304 skip_over_whites (ch);
1309 tok = scan_keyword ();
1310 else if (ch == '`' || ch == '"')
1311 tok = scan_string ();
1312 else if (is_char (ch))
1313 tok = scan_identifier (ch);
1314 else if (is_digit (ch) || ch == '.')
1315 tok = scan_number (ch);
1317 tok = scan_special (ch);
1323 if (tok >= 0 && tok <= 256)
1324 printf ("+++ yylex: returnig token %d (`%s')\n",
1327 printf ("+++ yylex: returnig token %d\n", tok);
1331 lineno = last_scan_lineno;
1332 last_scan_lineno = scan_lineno;
1337 /* end of a60-scan.c */