1 /* $OpenBSD: syn.c,v 1.29 2013/06/03 18:40:05 jca Exp $ */
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009,
5 * 2011, 2012, 2013, 2014
6 * Thorsten Glaser <tg@mirbsd.org>
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
26 __RCSID("$MirOS: src/bin/mksh/syn.c,v 1.94 2014/01/05 21:57:29 tg Exp $");
28 struct nesting_state {
29 int start_token; /* token than began nesting (eg, FOR) */
30 int start_line; /* line nesting began on */
33 struct yyrecursive_state {
34 struct yyrecursive_state *next;
35 struct ioword **old_herep;
42 static void yyparse(void);
43 static struct op *pipeline(int);
44 static struct op *andor(void);
45 static struct op *c_list(bool);
46 static struct ioword *synio(int);
47 static struct op *nested(int, int, int);
48 static struct op *get_command(int);
49 static struct op *dogroup(void);
50 static struct op *thenpart(void);
51 static struct op *elsepart(void);
52 static struct op *caselist(void);
53 static struct op *casepart(int);
54 static struct op *function_body(char *, bool);
55 static char **wordlist(void);
56 static struct op *block(int, struct op *, struct op *);
57 static struct op *newtp(int);
58 static void syntaxerr(const char *) MKSH_A_NORETURN;
59 static void nesting_push(struct nesting_state *, int);
60 static void nesting_pop(struct nesting_state *);
61 static int assign_command(const char *);
62 static int inalias(struct source *) MKSH_A_PURE;
63 static Test_op dbtestp_isa(Test_env *, Test_meta);
64 static const char *dbtestp_getopnd(Test_env *, Test_op, bool);
65 static int dbtestp_eval(Test_env *, Test_op, const char *,
67 static void dbtestp_error(Test_env *, int, const char *) MKSH_A_NORETURN;
69 static struct op *outtree; /* yyparse output */
70 static struct nesting_state nesting; /* \n changed to ; */
72 static bool reject; /* token(cf) gets symbol again */
73 static int symbol; /* yylex value */
74 static int sALIAS = ALIAS; /* 0 in yyrecursive */
76 #define REJECT (reject = true)
77 #define ACCEPT (reject = false)
78 #define token(cf) ((reject) ? (ACCEPT, symbol) : (symbol = yylex(cf)))
79 #define tpeek(cf) ((reject) ? (symbol) : (REJECT, symbol = yylex(cf)))
80 #define musthave(c,cf) do { if (token(cf) != (c)) syntaxerr(NULL); } while (/* CONSTCOND */ 0)
82 static const char Tcbrace[] = "}";
83 static const char Tesac[] = "esac";
92 outtree = c_list(source->type == SSTRING);
94 if (c == 0 && !outtree)
95 outtree = newtp(TEOF);
96 else if (c != '\n' && c != 0)
103 struct op *t, *p, *tl = NULL;
107 while (token(0) == '|') {
108 if ((p = get_command(CONTIN)) == NULL)
111 t = tl = block(TPIPE, t, p);
113 tl = tl->right = block(TPIPE, tl->right, p);
128 while ((c = token(0)) == LOGAND || c == LOGOR) {
129 if ((p = pipeline(CONTIN)) == NULL)
131 t = block(c == LOGAND? TAND: TOR, t, p);
141 struct op *t = NULL, *p, *tl = NULL;
145 while (/* CONSTCOND */ 1) {
148 * Token has always been read/rejected at this point, so
149 * we don't worry about what flags to pass token()
153 if (c == '\n' && (multi || inalias(source))) {
155 /* ignore blank lines */
159 else if (c == '&' || c == COPROC)
160 p = block(c == '&' ? TASYNC : TCOPROC, p, NULL);
166 t = tl = block(TLIST, t, p);
168 tl = tl->right = block(TLIST, tl->right, p);
176 static struct ioword *
180 static struct ioword *nextiop;
183 if (nextiop != NULL) {
189 if (tpeek(cf) != REDIR)
193 if (iop->flag & IONDELIM)
195 ishere = (iop->flag & IOTYPE) == IOHERE;
196 musthave(LWORD, ishere ? HEREDELIM : 0);
198 iop->delim = yylval.cp;
203 if (herep > &heres[HERES - 1])
204 yyerror("too many %ss\n", "<<");
207 iop->name = yylval.cp;
209 if (iop->flag & IOBASH) {
212 nextiop = alloc(sizeof(*iop), ATEMP);
213 nextiop->name = cp = alloc(5, ATEMP);
217 *cp++ = '0' + (iop->unit / 10);
220 *cp++ = '0' + (iop->unit % 10);
223 iop->flag &= ~IOBASH;
225 nextiop->flag = IODUP;
226 nextiop->delim = NULL;
227 nextiop->heredoc = NULL;
233 nested(int type, int smark, int emark)
236 struct nesting_state old_nesting;
238 nesting_push(&old_nesting, smark);
240 musthave(emark, KEYWORD|sALIAS);
241 nesting_pop(&old_nesting);
242 return (block(type, t, NULL));
245 static const char let_cmd[] = {
246 CHAR, 'l', CHAR, 'e', CHAR, 't', CHAR, ']', EOS
248 static const char setA_cmd0[] = {
249 CHAR, 's', CHAR, 'e', CHAR, 't', EOS
251 static const char setA_cmd1[] = {
252 CHAR, '-', CHAR, 'A', EOS
254 static const char setA_cmd2[] = {
255 CHAR, '-', CHAR, '-', EOS
262 int c, iopn = 0, syniocf, lno;
263 struct ioword *iop, **iops;
266 struct nesting_state old_nesting;
268 /* NUFILE is small enough to leave this addition unchecked */
269 iops = alloc2((NUFILE + 1), sizeof(struct ioword *), ATEMP);
273 syniocf = KEYWORD|sALIAS;
274 switch (c = token(cf|KEYWORD|sALIAS|VARASN)) {
286 syniocf &= ~(KEYWORD|sALIAS);
288 t->lineno = source->line;
289 while (/* CONSTCOND */ 1) {
290 cf = (t->u.evalflags ? ARRAYVAR : 0) |
291 (XPsize(args) == 0 ? sALIAS|VARASN : CMDWORD);
294 while ((iop = synio(cf)) != NULL) {
296 yyerror("too many %ss\n",
305 * the iopn == 0 and XPsize(vars) == 0 are
306 * dubious but AT&T ksh acts this way
308 if (iopn == 0 && XPsize(vars) == 0 &&
310 assign_command(ident))
311 t->u.evalflags = DOVACHECK;
312 if ((XPsize(args) == 0 || Flag(FKEYWORD)) &&
313 is_wdvarassign(yylval.cp))
314 XPput(vars, yylval.cp);
316 XPput(args, yylval.cp);
320 if (XPsize(args) == 0 && XPsize(vars) == 1 &&
321 is_wdvarassign(yylval.cp)) {
322 /* wdarrassign: foo=(bar) */
325 /* manipulate the vars string */
326 tcp = XPptrv(vars)[(vars.len = 0)];
327 /* 'varname=' -> 'varname' */
328 tcp[wdscan(tcp, EOS) - tcp - 3] = EOS;
330 /* construct new args strings */
331 XPput(args, wdcopy(setA_cmd0, ATEMP));
332 XPput(args, wdcopy(setA_cmd1, ATEMP));
334 XPput(args, wdcopy(setA_cmd2, ATEMP));
336 /* slurp in words till closing paren */
337 while (token(CONTIN) == LWORD)
338 XPput(args, yylval.cp);
339 if (symbol != /*(*/ ')')
343 * Check for "> foo (echo hi)"
344 * which AT&T ksh allows (not
345 * POSIX, but not disallowed)
348 if (XPsize(args) == 0 &&
354 /* must be a function */
355 if (iopn != 0 || XPsize(args) != 1 ||
359 musthave(/*(*/')', 0);
360 t = function_body(XPptrv(args)[0], false);
372 int subshell_nesting_type_saved;
374 subshell_nesting_type_saved = subshell_nesting_type;
375 subshell_nesting_type = ')';
376 t = nested(TPAREN, '(', ')');
377 subshell_nesting_type = subshell_nesting_type_saved;
382 t = nested(TBRACE, '{', '}');
386 /* leave KEYWORD in syniocf (allow if (( 1 )) then ...) */
389 switch (token(LETEXPR)) {
399 XPput(args, wdcopy(let_cmd, ATEMP));
400 XPput(args, yylval.cp);
403 case DBRACKET: /* [[ .. ]] */
404 /* leave KEYWORD in syniocf (allow if [[ -n 1 ]] then ...) */
405 t = newtp(TDBRACKET);
410 te.flags = TEF_DBRACKET;
412 te.isa = dbtestp_isa;
413 te.getopnd = dbtestp_getopnd;
414 te.eval = dbtestp_eval;
415 te.error = dbtestp_error;
423 t = newtp((c == FOR) ? TFOR : TSELECT);
424 musthave(LWORD, ARRAYVAR);
425 if (!is_wdvarname(yylval.cp, true))
426 yyerror("%s: %s\n", c == FOR ? "for" : Tselect,
428 strdupx(t->str, ident, ATEMP);
429 nesting_push(&old_nesting, c);
430 t->vars = wordlist();
432 nesting_pop(&old_nesting);
437 nesting_push(&old_nesting, c);
438 t = newtp((c == WHILE) ? TWHILE : TUNTIL);
439 t->left = c_list(true);
440 t->right = dogroup();
441 nesting_pop(&old_nesting);
448 nesting_push(&old_nesting, c);
449 t->left = caselist();
450 nesting_pop(&old_nesting);
454 nesting_push(&old_nesting, c);
456 t->left = c_list(true);
457 t->right = thenpart();
458 musthave(FI, KEYWORD|sALIAS);
459 nesting_pop(&old_nesting);
463 syniocf &= ~(KEYWORD|sALIAS);
467 t = block(TBANG, NULL, t);
471 syniocf &= ~(KEYWORD|sALIAS);
473 if (t && t->type == TCOM) {
474 t->str = alloc(2, ATEMP);
479 t = block(TTIME, t, NULL);
484 t = function_body(yylval.cp, true);
488 while ((iop = synio(syniocf)) != NULL) {
490 yyerror("too many %ss\n", "redirection");
499 iops = aresize2(iops, iopn, sizeof(struct ioword *), ATEMP);
503 if (t->type == TCOM || t->type == TDBRACKET) {
505 t->args = (const char **)XPclose(args);
507 t->vars = (char **)XPclose(vars);
522 c = token(CONTIN|KEYWORD|sALIAS);
524 * A {...} can be used instead of do...done for for/select loops
525 * but not for while/until loops - we don't need to check if it
526 * is a while loop because it would have been parsed as part of
527 * the conditional command list...
536 musthave(c, KEYWORD|sALIAS);
545 musthave(THEN, KEYWORD|sALIAS);
547 t->left = c_list(true);
550 t->right = elsepart();
559 switch (token(KEYWORD|sALIAS|VARASN)) {
561 if ((t = c_list(true)) == NULL)
567 t->left = c_list(true);
568 t->right = thenpart();
583 c = token(CONTIN|KEYWORD|sALIAS);
584 /* A {...} can be used instead of in...esac for case statements */
593 while ((tpeek(CONTIN|KEYWORD|ESACONLY)) != c) {
594 struct op *tc = casepart(c);
596 t = tl = tc, tl->right = NULL;
598 tl->right = tc, tl = tc;
600 musthave(c, KEYWORD|sALIAS);
613 if (token(CONTIN | KEYWORD) != '(')
621 if (symbol != endtok) {
623 symbol == '}' ? Tcbrace : Tesac, ATEMP);
630 XPput(ptns, yylval.cp);
631 } while (token(0) == '|');
634 t->vars = (char **)XPclose(ptns);
637 t->left = c_list(true);
639 /* initialise to default for ;; or omitted */
641 /* SUSv4 requires the ;; except in the last casepart */
642 if ((tpeek(CONTIN|KEYWORD|sALIAS)) != endtok)
654 /* initialised above, but we need to eat the token */
661 function_body(char *name,
662 /* function foo { ... } vs foo() { .. } */
668 sname = wdstrip(name, 0);
670 * Check for valid characters in name. POSIX and AT&T ksh93 say
671 * only allow [a-zA-Z_0-9] but this allows more as old pdkshs
672 * have allowed more; the following were never allowed:
673 * NUL TAB NL SP " $ & ' ( ) ; < = > \ ` |
674 * C_QUOTE covers all but adds # * ? [ ]
676 for (p = sname; *p; p++)
677 if (ctype(*p, C_QUOTE))
678 yyerror("%s: %s\n", sname, "invalid function name");
681 * Note that POSIX allows only compound statements after foo(),
682 * sh and AT&T ksh allow any command, go with the later since it
683 * shouldn't break anything. However, for function foo, AT&T ksh
684 * only accepts an open-brace.
687 if (tpeek(CONTIN|KEYWORD|sALIAS) == '(' /*)*/) {
688 /* function foo () { //}*/
691 /* degrade to POSIX function */
694 musthave('{' /*}*/, CONTIN|KEYWORD|sALIAS);
700 t->u.ksh_func = tobool(ksh_func);
701 t->lineno = source->line;
703 if ((t->left = get_command(CONTIN)) == NULL) {
706 * Probably something like foo() followed by EOF or ';'.
707 * This is accepted by sh and ksh88.
708 * To make "typeset -f foo" work reliably (so its output can
709 * be used as input), we pretend there is a colon here.
711 t->left = newtp(TCOM);
712 /* (2 * sizeof(char *)) is small enough */
713 t->left->args = alloc(2 * sizeof(char *), ATEMP);
714 t->left->args[0] = tv = alloc(3, ATEMP);
718 t->left->args[1] = NULL;
719 t->left->vars = alloc(sizeof(char *), ATEMP);
720 t->left->vars[0] = NULL;
734 /* POSIX does not do alias expansion here... */
735 if ((c = token(CONTIN|KEYWORD|sALIAS)) != IN) {
737 /* non-POSIX, but AT&T ksh accepts a ; here */
741 while ((c = token(0)) == LWORD)
742 XPput(args, yylval.cp);
743 if (c != '\n' && c != ';')
746 return ((char **)XPclose(args));
750 * supporting functions
754 block(int type, struct op *t1, struct op *t2)
764 static const struct tokeninfo {
771 { "then", THEN, true },
772 { "else", ELSE, true },
773 { "elif", ELIF, true },
775 { "case", CASE, true },
776 { Tesac, ESAC, true },
777 { "for", FOR, true },
778 { Tselect, SELECT, true },
779 { "while", WHILE, true },
780 { "until", UNTIL, true },
782 { "done", DONE, true },
784 { Tfunction, FUNCTION, true },
785 { "time", TIME, true },
787 { Tcbrace, '}', true },
789 { "[[", DBRACKET, true },
790 /* Lexical tokens (0[EOF], LWORD and REDIR handled specially) */
791 { "&&", LOGAND, false },
792 { "||", LOGOR, false },
793 { ";;", BREAK, false },
794 { ";|", BRKEV, false },
795 { ";&", BRKFT, false },
796 { "((", MDPAREN, false },
797 { "|&", COPROC, false },
798 /* and some special cases... */
799 { "newline", '\n', false },
806 struct tokeninfo const *tt;
809 ktinit(APERM, &keywords,
810 /* currently 28 keywords: 75% of 64 = 2^6 */
812 for (tt = tokentab; tt->name; tt++) {
814 p = ktenter(&keywords, tt->name, hash(tt->name));
815 p->flag |= DEFINED|ISSET;
823 syntaxerr(const char *what)
825 /* 2<<- is the longest redirection, I think */
828 struct tokeninfo const *tt;
838 if (nesting.start_token) {
839 c = nesting.start_token;
840 source->errline = nesting.start_line;
844 /* don't quote the EOF */
845 yyerror("%s: %s %s\n", Tsynerr, "unexpected", "EOF");
849 s = snptreef(NULL, 32, "%S", yylval.cp);
853 s = snptreef(redir, sizeof(redir), "%R", yylval.iop);
857 for (tt = tokentab; tt->name; tt++)
863 if (c > 0 && c < 256) {
867 shf_snprintf(redir, sizeof(redir),
872 yyerror("%s: '%s' %s\n", Tsynerr, s, what);
876 nesting_push(struct nesting_state *save, int tok)
879 nesting.start_token = tok;
880 nesting.start_line = source->line;
884 nesting_pop(struct nesting_state *saved)
894 t = alloc(sizeof(struct op), ATEMP);
900 t->left = t->right = NULL;
906 compile(Source *s, bool skiputf8bom)
908 nesting.start_token = 0;
909 nesting.start_line = 0;
919 * This kludge exists to take care of sh/AT&T ksh oddity in which
920 * the arguments of alias/export/readonly/typeset have no field
921 * splitting, file globbing, or (normal) tilde expansion done.
922 * AT&T ksh seems to do something similar to this since
923 * $ touch a=a; typeset a=[ab]; echo "$a"
925 * $ x=typeset; $x a=[ab]; echo "$a"
930 assign_command(const char *s)
934 return ((strcmp(s, Talias) == 0) ||
935 (strcmp(s, Texport) == 0) ||
936 (strcmp(s, Treadonly) == 0) ||
937 (strcmp(s, Ttypeset) == 0));
940 /* Check if we are in the middle of reading an alias */
942 inalias(struct source *s)
944 for (; s && s->type == SALIAS; s = s->next)
945 if (!(s->flags & SF_ALIASEND))
952 * Order important - indexed by Test_meta values
953 * Note that ||, &&, ( and ) can't appear in as unquoted strings
954 * in normal shell input, so these can be interpreted unambiguously
955 * in the evaluation pass.
957 static const char dbtest_or[] = { CHAR, '|', CHAR, '|', EOS };
958 static const char dbtest_and[] = { CHAR, '&', CHAR, '&', EOS };
959 static const char dbtest_not[] = { CHAR, '!', EOS };
960 static const char dbtest_oparen[] = { CHAR, '(', EOS };
961 static const char dbtest_cparen[] = { CHAR, ')', EOS };
962 const char * const dbtest_tokens[] = {
963 dbtest_or, dbtest_and, dbtest_not,
964 dbtest_oparen, dbtest_cparen
966 static const char db_close[] = { CHAR, ']', CHAR, ']', EOS };
967 static const char db_lthan[] = { CHAR, '<', EOS };
968 static const char db_gthan[] = { CHAR, '>', EOS };
971 * Test if the current token is a whatever. Accepts the current token if
972 * it is. Returns 0 if it is not, non-zero if it is (in the case of
973 * TM_UNOP and TM_BINOP, the returned value is a Test_op).
976 dbtestp_isa(Test_env *te, Test_meta meta)
978 int c = tpeek(ARRAYVAR | (meta == TM_BINOP ? 0 : CONTIN));
981 Test_op ret = TO_NONOP;
984 uqword = c == LWORD && *ident;
987 ret = c == LOGOR ? TO_NONNULL : TO_NONOP;
988 else if (meta == TM_AND)
989 ret = c == LOGAND ? TO_NONNULL : TO_NONOP;
990 else if (meta == TM_NOT)
991 ret = (uqword && !strcmp(yylval.cp,
992 dbtest_tokens[(int)TM_NOT])) ? TO_NONNULL : TO_NONOP;
993 else if (meta == TM_OPAREN)
994 ret = c == '(' /*)*/ ? TO_NONNULL : TO_NONOP;
995 else if (meta == TM_CPAREN)
996 ret = c == /*(*/ ')' ? TO_NONNULL : TO_NONOP;
997 else if (meta == TM_UNOP || meta == TM_BINOP) {
998 if (meta == TM_BINOP && c == REDIR &&
999 (yylval.iop->flag == IOREAD || yylval.iop->flag == IOWRITE)) {
1001 save = wdcopy(yylval.iop->flag == IOREAD ?
1002 db_lthan : db_gthan, ATEMP);
1003 } else if (uqword && (ret = test_isop(meta, ident)))
1006 /* meta == TM_END */
1007 ret = (uqword && !strcmp(yylval.cp,
1008 db_close)) ? TO_NONNULL : TO_NONOP;
1009 if (ret != TO_NONOP) {
1011 if ((unsigned int)meta < NELEM(dbtest_tokens))
1012 save = wdcopy(dbtest_tokens[(int)meta], ATEMP);
1014 XPput(*te->pos.av, save);
1020 dbtestp_getopnd(Test_env *te, Test_op op MKSH_A_UNUSED,
1021 bool do_eval MKSH_A_UNUSED)
1023 int c = tpeek(ARRAYVAR);
1029 XPput(*te->pos.av, yylval.cp);
1035 dbtestp_eval(Test_env *te MKSH_A_UNUSED, Test_op op MKSH_A_UNUSED,
1036 const char *opnd1 MKSH_A_UNUSED, const char *opnd2 MKSH_A_UNUSED,
1037 bool do_eval MKSH_A_UNUSED)
1043 dbtestp_error(Test_env *te, int offset, const char *msg)
1045 te->flags |= TEF_ERROR;
1049 /* Kludgy to say the least... */
1051 yylval.cp = *(XPptrv(*te->pos.av) + XPsize(*te->pos.av) +
1061 #define EOVERFLOW ERANGE
1063 #define EOVERFLOW EINVAL
1068 parse_usec(const char *s, struct timeval *tv)
1074 /* parse integral part */
1075 while (ksh_isdigit(*s)) {
1076 tt.tv_sec = tv->tv_sec * 10 + (*s++ - '0');
1077 if (tt.tv_sec / 10 != tv->tv_sec) {
1081 tv->tv_sec = tt.tv_sec;
1086 /* no decimal fraction */
1088 else if (*s++ != '.') {
1089 /* junk after integral part */
1094 /* parse decimal fraction */
1096 while (ksh_isdigit(*s)) {
1097 tv->tv_usec += i * (*s++ - '0');
1102 /* check for junk after fractional part */
1103 while (ksh_isdigit(*s))
1110 /* end of input string reached, no errors */
1116 * Helper function called from within lex.c:yylex() to parse
1117 * a COMSUB recursively using the main shell parser and lexer
1120 yyrecursive(int subtype MKSH_A_UNUSED)
1124 struct yyrecursive_state *ys;
1127 if (subtype != COMSUB) {
1135 ys = alloc(sizeof(struct yyrecursive_state), ATEMP);
1137 /* tell the lexer to accept a closing parenthesis as EOD */
1138 ys->old_nesting_type = subshell_nesting_type;
1139 subshell_nesting_type = etok;
1141 /* push reject state, parse recursively, pop reject state */
1142 ys->old_reject = reject;
1143 ys->old_symbol = symbol;
1145 ys->old_herep = herep;
1146 ys->old_salias = sALIAS;
1148 ys->next = e->yyrecursive_statep;
1149 e->yyrecursive_statep = ys;
1150 /* we use TPAREN as a helper container here */
1151 t = nested(TPAREN, stok, etok);
1152 yyrecursive_pop(false);
1154 /* t->left because nested(TPAREN, ...) hides our goodies there */
1155 cp = snptreef(NULL, 0, "%T", t->left);
1162 yyrecursive_pop(bool popall)
1164 struct yyrecursive_state *ys;
1167 if (!(ys = e->yyrecursive_statep))
1169 e->yyrecursive_statep = ys->next;
1171 sALIAS = ys->old_salias;
1172 herep = ys->old_herep;
1173 reject = ys->old_reject;
1174 symbol = ys->old_symbol;
1176 subshell_nesting_type = ys->old_nesting_type;