OSDN Git Service

e58d8b8281fd4184f6f2694f4ec9600bcbe53db8
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.47 2013/03/03 19:11:34 guenther Exp $       */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013
6  *      Thorsten Glaser <tg@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.188 2013/08/10 13:44:31 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 int start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104
105 static int backslash_skip;
106 static int ignore_backslash_newline;
107
108 /* optimised getsc_bn() */
109 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
110                             !backslash_skip ? *source->str++ : getsc_bn())
111 /* optimised getsc_uu() */
112 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
113
114 /* retrace helper */
115 #define o_getsc_r(carg) {                               \
116         int cev = (carg);                               \
117         struct sretrace_info *rp = retrace_info;        \
118                                                         \
119         while (rp) {                                    \
120                 Xcheck(rp->xs, rp->xp);                 \
121                 *rp->xp++ = cev;                        \
122                 rp = rp->next;                          \
123         }                                               \
124                                                         \
125         return (cev);                                   \
126 }
127
128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
129 static int getsc(void);
130
131 static int
132 getsc(void)
133 {
134         o_getsc_r(o_getsc());
135 }
136 #else
137 static int getsc_r(int);
138
139 static int
140 getsc_r(int c)
141 {
142         o_getsc_r(c);
143 }
144
145 #define getsc()         getsc_r(o_getsc())
146 #endif
147
148 #define STATE_BSIZE     8
149
150 #define PUSH_STATE(s)   do {                                    \
151         if (++statep == state_info.end)                         \
152                 statep = push_state_i(&state_info, statep);     \
153         state = statep->type = (s);                             \
154 } while (/* CONSTCOND */ 0)
155
156 #define POP_STATE()     do {                                    \
157         if (--statep == state_info.base)                        \
158                 statep = pop_state_i(&state_info, statep);      \
159         state = statep->type;                                   \
160 } while (/* CONSTCOND */ 0)
161
162 #define PUSH_SRETRACE() do {                                    \
163         struct sretrace_info *ri;                               \
164                                                                 \
165         statep->ls_start = Xsavepos(ws, wp);                    \
166         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
167         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
168         ri->next = retrace_info;                                \
169         retrace_info = ri;                                      \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE()  do {                                    \
173         wp = Xrestpos(ws, wp, statep->ls_start);                \
174         *retrace_info->xp = '\0';                               \
175         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
176         dp = (void *)retrace_info;                              \
177         retrace_info = retrace_info->next;                      \
178         afree(dp, ATEMP);                                       \
179 } while (/* CONSTCOND */ 0)
180
181 /**
182  * Lexical analyser
183  *
184  * tokens are not regular expressions, they are LL(1).
185  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
186  * hence the state stack. Note "$(...)" are now parsed recursively.
187  */
188
189 int
190 yylex(int cf)
191 {
192         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
193         State_info state_info;
194         int c, c2, state;
195         size_t cz;
196         XString ws;             /* expandable output word */
197         char *wp;               /* output word pointer */
198         char *sp, *dp;
199
200  Again:
201         states[0].type = SINVALID;
202         states[0].ls_base = NULL;
203         statep = &states[1];
204         state_info.base = states;
205         state_info.end = &state_info.base[STATE_BSIZE];
206
207         Xinit(ws, wp, 64, ATEMP);
208
209         backslash_skip = 0;
210         ignore_backslash_newline = 0;
211
212         if (cf & ONEWORD)
213                 state = SWORD;
214         else if (cf & LETEXPR) {
215                 /* enclose arguments in (double) quotes */
216                 *wp++ = OQUOTE;
217                 state = SLETPAREN;
218                 statep->nparen = 0;
219         } else {
220                 /* normal lexing */
221                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
222                 while ((c = getsc()) == ' ' || c == '\t')
223                         ;
224                 if (c == '#') {
225                         ignore_backslash_newline++;
226                         while ((c = getsc()) != '\0' && c != '\n')
227                                 ;
228                         ignore_backslash_newline--;
229                 }
230                 ungetsc(c);
231         }
232         if (source->flags & SF_ALIAS) {
233                 /* trailing ' ' in alias definition */
234                 source->flags &= ~SF_ALIAS;
235                 cf |= ALIAS;
236         }
237
238         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
239         statep->type = state;
240
241         /* check for here string */
242         if (state == SHEREDELIM) {
243                 c = getsc();
244                 if (c == '<') {
245                         state = SHEREDELIM;
246                         while ((c = getsc()) == ' ' || c == '\t')
247                                 ;
248                         ungetsc(c);
249                         c = '<';
250                         goto accept_nonword;
251                 }
252                 ungetsc(c);
253         }
254
255         /* collect non-special or quoted characters to form word */
256         while (!((c = getsc()) == 0 ||
257             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
258                 if (state == SBASE &&
259                     subshell_nesting_type == /*{*/ '}' &&
260                     c == /*{*/ '}')
261                         /* possibly end ${ :;} */
262                         break;
263  accept_nonword:
264                 Xcheck(ws, wp);
265                 switch (state) {
266                 case SADELIM:
267                         if (c == '(')
268                                 statep->nparen++;
269                         else if (c == ')')
270                                 statep->nparen--;
271                         else if (statep->nparen == 0 && (c == /*{*/ '}' ||
272                             c == (int)statep->ls_adelim.delimiter)) {
273                                 *wp++ = ADELIM;
274                                 *wp++ = c;
275                                 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
276                                         POP_STATE();
277                                 if (c == /*{*/ '}')
278                                         POP_STATE();
279                                 break;
280                         }
281                         /* FALLTHROUGH */
282                 case SBASE:
283                         if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
284                                 /* temporary */
285                                 *wp = EOS;
286                                 if (is_wdvarname(Xstring(ws, wp), false)) {
287                                         char *p, *tmp;
288
289                                         if (arraysub(&tmp)) {
290                                                 *wp++ = CHAR;
291                                                 *wp++ = c;
292                                                 for (p = tmp; *p; ) {
293                                                         Xcheck(ws, wp);
294                                                         *wp++ = CHAR;
295                                                         *wp++ = *p++;
296                                                 }
297                                                 afree(tmp, ATEMP);
298                                                 break;
299                                         } else {
300                                                 Source *s;
301
302                                                 s = pushs(SREREAD,
303                                                     source->areap);
304                                                 s->start = s->str =
305                                                     s->u.freeme = tmp;
306                                                 s->next = source;
307                                                 source = s;
308                                         }
309                                 }
310                                 *wp++ = CHAR;
311                                 *wp++ = c;
312                                 break;
313                         }
314                         /* FALLTHROUGH */
315  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
316                         if (c == '*' || c == '@' || c == '+' || c == '?' ||
317                             c == '!') {
318                                 c2 = getsc();
319                                 if (c2 == '(' /*)*/ ) {
320                                         *wp++ = OPAT;
321                                         *wp++ = c;
322                                         PUSH_STATE(SPATTERN);
323                                         break;
324                                 }
325                                 ungetsc(c2);
326                         }
327                         /* FALLTHROUGH */
328  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
329                         switch (c) {
330                         case '\\':
331  getsc_qchar:
332                                 if ((c = getsc())) {
333                                         /* trailing \ is lost */
334                                         *wp++ = QCHAR;
335                                         *wp++ = c;
336                                 }
337                                 break;
338                         case '\'':
339  open_ssquote_unless_heredoc:
340                                 if ((cf & HEREDOC))
341                                         goto store_char;
342                                 *wp++ = OQUOTE;
343                                 ignore_backslash_newline++;
344                                 PUSH_STATE(SSQUOTE);
345                                 break;
346                         case '"':
347  open_sdquote:
348                                 *wp++ = OQUOTE;
349                                 PUSH_STATE(SDQUOTE);
350                                 break;
351                         case '$':
352                                 /*
353                                  * processing of dollar sign belongs into
354                                  * Subst, except for those which can open
355                                  * a string: $'…' and $"…"
356                                  */
357  subst_dollar_ex:
358                                 c = getsc();
359                                 switch (c) {
360                                 case '"':
361                                         goto open_sdquote;
362                                 case '\'':
363                                         goto open_sequote;
364                                 default:
365                                         goto SubstS;
366                                 }
367                         default:
368                                 goto Subst;
369                         }
370                         break;
371
372  Subst:
373                         switch (c) {
374                         case '\\':
375                                 c = getsc();
376                                 switch (c) {
377                                 case '"':
378                                         if ((cf & HEREDOC))
379                                                 goto heredocquote;
380                                         /* FALLTHROUGH */
381                                 case '\\':
382                                 case '$': case '`':
383  store_qchar:
384                                         *wp++ = QCHAR;
385                                         *wp++ = c;
386                                         break;
387                                 default:
388  heredocquote:
389                                         Xcheck(ws, wp);
390                                         if (c) {
391                                                 /* trailing \ is lost */
392                                                 *wp++ = CHAR;
393                                                 *wp++ = '\\';
394                                                 *wp++ = CHAR;
395                                                 *wp++ = c;
396                                         }
397                                         break;
398                                 }
399                                 break;
400                         case '$':
401                                 c = getsc();
402  SubstS:
403                                 if (c == '(') /*)*/ {
404                                         c = getsc();
405                                         if (c == '(') /*)*/ {
406                                                 *wp++ = EXPRSUB;
407                                                 PUSH_STATE(SASPAREN);
408                                                 statep->nparen = 2;
409                                                 PUSH_SRETRACE();
410                                                 *retrace_info->xp++ = '(';
411                                         } else {
412                                                 ungetsc(c);
413  subst_command:
414                                                 c = COMSUB;
415  subst_command2:
416                                                 sp = yyrecursive(c);
417                                                 cz = strlen(sp) + 1;
418                                                 XcheckN(ws, wp, cz);
419                                                 *wp++ = c;
420                                                 memcpy(wp, sp, cz);
421                                                 wp += cz;
422                                         }
423                                 } else if (c == '{') /*}*/ {
424                                         if ((c = getsc()) == '|') {
425                                                 /*
426                                                  * non-subenvironment
427                                                  * value substitution
428                                                  */
429                                                 c = VALSUB;
430                                                 goto subst_command2;
431                                         } else if (ctype(c, C_IFSWS)) {
432                                                 /*
433                                                  * non-subenvironment
434                                                  * "command" substitution
435                                                  */
436                                                 c = FUNSUB;
437                                                 goto subst_command2;
438                                         }
439                                         ungetsc(c);
440                                         *wp++ = OSUBST;
441                                         *wp++ = '{'; /*}*/
442                                         wp = get_brace_var(&ws, wp);
443                                         c = getsc();
444                                         /* allow :# and :% (ksh88 compat) */
445                                         if (c == ':') {
446                                                 *wp++ = CHAR;
447                                                 *wp++ = c;
448                                                 c = getsc();
449                                                 if (c == ':') {
450                                                         *wp++ = CHAR;
451                                                         *wp++ = '0';
452                                                         *wp++ = ADELIM;
453                                                         *wp++ = ':';
454                                                         PUSH_STATE(SBRACE);
455                                                         PUSH_STATE(SADELIM);
456                                                         statep->ls_adelim.delimiter = ':';
457                                                         statep->ls_adelim.num = 1;
458                                                         statep->nparen = 0;
459                                                         break;
460                                                 } else if (ksh_isdigit(c) ||
461                                                     c == '('/*)*/ || c == ' ' ||
462                                                     /*XXX what else? */
463                                                     c == '$') {
464                                                         /* substring subst. */
465                                                         if (c != ' ') {
466                                                                 *wp++ = CHAR;
467                                                                 *wp++ = ' ';
468                                                         }
469                                                         ungetsc(c);
470                                                         PUSH_STATE(SBRACE);
471                                                         PUSH_STATE(SADELIM);
472                                                         statep->ls_adelim.delimiter = ':';
473                                                         statep->ls_adelim.num = 2;
474                                                         statep->nparen = 0;
475                                                         break;
476                                                 }
477                                         } else if (c == '/') {
478                                                 *wp++ = CHAR;
479                                                 *wp++ = c;
480                                                 if ((c = getsc()) == '/') {
481                                                         *wp++ = ADELIM;
482                                                         *wp++ = c;
483                                                 } else
484                                                         ungetsc(c);
485                                                 PUSH_STATE(SBRACE);
486                                                 PUSH_STATE(SADELIM);
487                                                 statep->ls_adelim.delimiter = '/';
488                                                 statep->ls_adelim.num = 1;
489                                                 statep->nparen = 0;
490                                                 break;
491                                         }
492                                         /*
493                                          * If this is a trim operation,
494                                          * treat (,|,) specially in STBRACE.
495                                          */
496                                         if (ctype(c, C_SUBOP2)) {
497                                                 ungetsc(c);
498                                                 if (Flag(FSH))
499                                                         PUSH_STATE(STBRACEBOURNE);
500                                                 else
501                                                         PUSH_STATE(STBRACEKORN);
502                                         } else {
503                                                 ungetsc(c);
504                                                 if (state == SDQUOTE ||
505                                                     state == SQBRACE)
506                                                         PUSH_STATE(SQBRACE);
507                                                 else
508                                                         PUSH_STATE(SBRACE);
509                                         }
510                                 } else if (ksh_isalphx(c)) {
511                                         *wp++ = OSUBST;
512                                         *wp++ = 'X';
513                                         do {
514                                                 Xcheck(ws, wp);
515                                                 *wp++ = c;
516                                                 c = getsc();
517                                         } while (ksh_isalnux(c));
518                                         *wp++ = '\0';
519                                         *wp++ = CSUBST;
520                                         *wp++ = 'X';
521                                         ungetsc(c);
522                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
523                                         Xcheck(ws, wp);
524                                         *wp++ = OSUBST;
525                                         *wp++ = 'X';
526                                         *wp++ = c;
527                                         *wp++ = '\0';
528                                         *wp++ = CSUBST;
529                                         *wp++ = 'X';
530                                 } else {
531                                         *wp++ = CHAR;
532                                         *wp++ = '$';
533                                         ungetsc(c);
534                                 }
535                                 break;
536                         case '`':
537  subst_gravis:
538                                 PUSH_STATE(SBQUOTE);
539                                 *wp++ = COMSUB;
540                                 /*
541                                  * Need to know if we are inside double quotes
542                                  * since sh/AT&T-ksh translate the \" to " in
543                                  * "`...\"...`".
544                                  * This is not done in POSIX mode (section
545                                  * 3.2.3, Double Quotes: "The backquote shall
546                                  * retain its special meaning introducing the
547                                  * other form of command substitution (see
548                                  * 3.6.3). The portion of the quoted string
549                                  * from the initial backquote and the
550                                  * characters up to the next backquote that
551                                  * is not preceded by a backslash (having
552                                  * escape characters removed) defines that
553                                  * command whose output replaces `...` when
554                                  * the word is expanded."
555                                  * Section 3.6.3, Command Substitution:
556                                  * "Within the backquoted style of command
557                                  * substitution, backslash shall retain its
558                                  * literal meaning, except when followed by
559                                  * $ ` \.").
560                                  */
561                                 statep->ls_bool = false;
562                                 s2 = statep;
563                                 base = state_info.base;
564                                 while (/* CONSTCOND */ 1) {
565                                         for (; s2 != base; s2--) {
566                                                 if (s2->type == SDQUOTE) {
567                                                         statep->ls_bool = true;
568                                                         break;
569                                                 }
570                                         }
571                                         if (s2 != base)
572                                                 break;
573                                         if (!(s2 = s2->ls_base))
574                                                 break;
575                                         base = s2-- - STATE_BSIZE;
576                                 }
577                                 break;
578                         case QCHAR:
579                                 if (cf & LQCHAR) {
580                                         *wp++ = QCHAR;
581                                         *wp++ = getsc();
582                                         break;
583                                 }
584                                 /* FALLTHROUGH */
585                         default:
586  store_char:
587                                 *wp++ = CHAR;
588                                 *wp++ = c;
589                         }
590                         break;
591
592                 case SEQUOTE:
593                         if (c == '\'') {
594                                 POP_STATE();
595                                 *wp++ = CQUOTE;
596                                 ignore_backslash_newline--;
597                         } else if (c == '\\') {
598                                 if ((c2 = unbksl(true, s_get, s_put)) == -1)
599                                         c2 = s_get();
600                                 if (c2 == 0)
601                                         statep->ls_bool = true;
602                                 if (!statep->ls_bool) {
603                                         char ts[4];
604
605                                         if ((unsigned int)c2 < 0x100) {
606                                                 *wp++ = QCHAR;
607                                                 *wp++ = c2;
608                                         } else {
609                                                 cz = utf_wctomb(ts, c2 - 0x100);
610                                                 ts[cz] = 0;
611                                                 for (cz = 0; ts[cz]; ++cz) {
612                                                         *wp++ = QCHAR;
613                                                         *wp++ = ts[cz];
614                                                 }
615                                         }
616                                 }
617                         } else if (!statep->ls_bool) {
618                                 *wp++ = QCHAR;
619                                 *wp++ = c;
620                         }
621                         break;
622
623                 case SSQUOTE:
624                         if (c == '\'') {
625                                 POP_STATE();
626                                 if ((cf & HEREDOC) || state == SQBRACE)
627                                         goto store_char;
628                                 *wp++ = CQUOTE;
629                                 ignore_backslash_newline--;
630                         } else {
631                                 *wp++ = QCHAR;
632                                 *wp++ = c;
633                         }
634                         break;
635
636                 case SDQUOTE:
637                         if (c == '"') {
638                                 POP_STATE();
639                                 *wp++ = CQUOTE;
640                         } else
641                                 goto Subst;
642                         break;
643
644                 /* $(( ... )) */
645                 case SASPAREN:
646                         if (c == '(')
647                                 statep->nparen++;
648                         else if (c == ')') {
649                                 statep->nparen--;
650                                 if (statep->nparen == 1) {
651                                         /* end of EXPRSUB */
652                                         POP_SRETRACE();
653                                         POP_STATE();
654
655                                         if ((c2 = getsc()) == /*(*/ ')') {
656                                                 cz = strlen(sp) - 2;
657                                                 XcheckN(ws, wp, cz);
658                                                 memcpy(wp, sp + 1, cz);
659                                                 wp += cz;
660                                                 afree(sp, ATEMP);
661                                                 *wp++ = '\0';
662                                                 break;
663                                         } else {
664                                                 Source *s;
665
666                                                 ungetsc(c2);
667                                                 /*
668                                                  * mismatched parenthesis -
669                                                  * assume we were really
670                                                  * parsing a $(...) expression
671                                                  */
672                                                 --wp;
673                                                 s = pushs(SREREAD,
674                                                     source->areap);
675                                                 s->start = s->str =
676                                                     s->u.freeme = sp;
677                                                 s->next = source;
678                                                 source = s;
679                                                 goto subst_command;
680                                         }
681                                 }
682                         }
683                         /* reuse existing state machine */
684                         goto Sbase2;
685
686                 case SQBRACE:
687                         if (c == '\\') {
688                                 /*
689                                  * perform POSIX "quote removal" if the back-
690                                  * slash is "special", i.e. same cases as the
691                                  * {case '\\':} in Subst: plus closing brace;
692                                  * in mksh code "quote removal" on '\c' means
693                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
694                                  * emitted (in heredocquote:)
695                                  */
696                                 if ((c = getsc()) == '"' || c == '\\' ||
697                                     c == '$' || c == '`' || c == /*{*/'}')
698                                         goto store_qchar;
699                                 goto heredocquote;
700                         }
701                         goto common_SQBRACE;
702
703                 case SBRACE:
704                         if (c == '\'')
705                                 goto open_ssquote_unless_heredoc;
706                         else if (c == '\\')
707                                 goto getsc_qchar;
708  common_SQBRACE:
709                         if (c == '"')
710                                 goto open_sdquote;
711                         else if (c == '$')
712                                 goto subst_dollar_ex;
713                         else if (c == '`')
714                                 goto subst_gravis;
715                         else if (c != /*{*/ '}')
716                                 goto store_char;
717                         POP_STATE();
718                         *wp++ = CSUBST;
719                         *wp++ = /*{*/ '}';
720                         break;
721
722                 /* Same as SBASE, except (,|,) treated specially */
723                 case STBRACEKORN:
724                         if (c == '|')
725                                 *wp++ = SPAT;
726                         else if (c == '(') {
727                                 *wp++ = OPAT;
728                                 /* simile for @ */
729                                 *wp++ = ' ';
730                                 PUSH_STATE(SPATTERN);
731                         } else /* FALLTHROUGH */
732                 case STBRACEBOURNE:
733                           if (c == /*{*/ '}') {
734                                 POP_STATE();
735                                 *wp++ = CSUBST;
736                                 *wp++ = /*{*/ '}';
737                         } else
738                                 goto Sbase1;
739                         break;
740
741                 case SBQUOTE:
742                         if (c == '`') {
743                                 *wp++ = 0;
744                                 POP_STATE();
745                         } else if (c == '\\') {
746                                 switch (c = getsc()) {
747                                 case 0:
748                                         /* trailing \ is lost */
749                                         break;
750                                 case '\\':
751                                 case '$': case '`':
752                                         *wp++ = c;
753                                         break;
754                                 case '"':
755                                         if (statep->ls_bool) {
756                                                 *wp++ = c;
757                                                 break;
758                                         }
759                                         /* FALLTHROUGH */
760                                 default:
761                                         *wp++ = '\\';
762                                         *wp++ = c;
763                                         break;
764                                 }
765                         } else
766                                 *wp++ = c;
767                         break;
768
769                 /* ONEWORD */
770                 case SWORD:
771                         goto Subst;
772
773                 /* LETEXPR: (( ... )) */
774                 case SLETPAREN:
775                         if (c == /*(*/ ')') {
776                                 if (statep->nparen > 0)
777                                         --statep->nparen;
778                                 else if ((c2 = getsc()) == /*(*/ ')') {
779                                         c = 0;
780                                         *wp++ = CQUOTE;
781                                         goto Done;
782                                 } else {
783                                         Source *s;
784
785                                         ungetsc(c2);
786                                         /*
787                                          * mismatched parenthesis -
788                                          * assume we were really
789                                          * parsing a (...) expression
790                                          */
791                                         *wp = EOS;
792                                         sp = Xstring(ws, wp);
793                                         dp = wdstrip(sp, WDS_KEEPQ);
794                                         s = pushs(SREREAD, source->areap);
795                                         s->start = s->str = s->u.freeme = dp;
796                                         s->next = source;
797                                         source = s;
798                                         return ('('/*)*/);
799                                 }
800                         } else if (c == '(')
801                                 /*
802                                  * parentheses inside quotes and
803                                  * backslashes are lost, but AT&T ksh
804                                  * doesn't count them either
805                                  */
806                                 ++statep->nparen;
807                         goto Sbase2;
808
809                 /* <<, <<-, <<< delimiter */
810                 case SHEREDELIM:
811                         /*
812                          * here delimiters need a special case since
813                          * $ and `...` are not to be treated specially
814                          */
815                         switch (c) {
816                         case '\\':
817                                 if ((c = getsc())) {
818                                         /* trailing \ is lost */
819                                         *wp++ = QCHAR;
820                                         *wp++ = c;
821                                 }
822                                 break;
823                         case '\'':
824                                 goto open_ssquote_unless_heredoc;
825                         case '$':
826                                 if ((c2 = getsc()) == '\'') {
827  open_sequote:
828                                         *wp++ = OQUOTE;
829                                         ignore_backslash_newline++;
830                                         PUSH_STATE(SEQUOTE);
831                                         statep->ls_bool = false;
832                                         break;
833                                 } else if (c2 == '"') {
834                                         /* FALLTHROUGH */
835                         case '"':
836                                         state = statep->type = SHEREDQUOTE;
837                                         PUSH_SRETRACE();
838                                         break;
839                                 }
840                                 ungetsc(c2);
841                                 /* FALLTHROUGH */
842                         default:
843                                 *wp++ = CHAR;
844                                 *wp++ = c;
845                         }
846                         break;
847
848                 /* " in <<, <<-, <<< delimiter */
849                 case SHEREDQUOTE:
850                         if (c != '"')
851                                 goto Subst;
852                         POP_SRETRACE();
853                         dp = strnul(sp) - 1;
854                         /* remove the trailing double quote */
855                         *dp = '\0';
856                         /* store the quoted string */
857                         *wp++ = OQUOTE;
858                         XcheckN(ws, wp, (dp - sp));
859                         dp = sp;
860                         while ((c = *dp++)) {
861                                 if (c == '\\') {
862                                         switch ((c = *dp++)) {
863                                         case '\\':
864                                         case '"':
865                                         case '$':
866                                         case '`':
867                                                 break;
868                                         default:
869                                                 *wp++ = CHAR;
870                                                 *wp++ = '\\';
871                                                 break;
872                                         }
873                                 }
874                                 *wp++ = CHAR;
875                                 *wp++ = c;
876                         }
877                         afree(sp, ATEMP);
878                         *wp++ = CQUOTE;
879                         state = statep->type = SHEREDELIM;
880                         break;
881
882                 /* in *(...|...) pattern (*+?@!) */
883                 case SPATTERN:
884                         if (c == /*(*/ ')') {
885                                 *wp++ = CPAT;
886                                 POP_STATE();
887                         } else if (c == '|') {
888                                 *wp++ = SPAT;
889                         } else if (c == '(') {
890                                 *wp++ = OPAT;
891                                 /* simile for @ */
892                                 *wp++ = ' ';
893                                 PUSH_STATE(SPATTERN);
894                         } else
895                                 goto Sbase1;
896                         break;
897                 }
898         }
899  Done:
900         Xcheck(ws, wp);
901         if (statep != &states[1])
902                 /* XXX figure out what is missing */
903                 yyerror("no closing quote\n");
904
905         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
906         if (state == SHEREDELIM)
907                 state = SBASE;
908
909         dp = Xstring(ws, wp);
910         if (state == SBASE && (
911 #ifndef MKSH_LEGACY_MODE
912             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
913 #endif
914             c == '<' || c == '>')) {
915                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
916
917                 if (Xlength(ws, wp) == 0)
918                         iop->unit = c == '<' ? 0 : 1;
919                 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
920                         if (dp[c2] != CHAR)
921                                 goto no_iop;
922                         if (!ksh_isdigit(dp[c2 + 1]))
923                                 goto no_iop;
924                         iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
925                 }
926
927                 if (iop->unit >= FDBASE)
928                         goto no_iop;
929
930                 if (c == '&') {
931                         if ((c2 = getsc()) != '>') {
932                                 ungetsc(c2);
933                                 goto no_iop;
934                         }
935                         c = c2;
936                         iop->flag = IOBASH;
937                 } else
938                         iop->flag = 0;
939
940                 c2 = getsc();
941                 /* <<, >>, <> are ok, >< is not */
942                 if (c == c2 || (c == '<' && c2 == '>')) {
943                         iop->flag |= c == c2 ?
944                             (c == '>' ? IOCAT : IOHERE) : IORDWR;
945                         if (iop->flag == IOHERE) {
946                                 if ((c2 = getsc()) == '-') {
947                                         iop->flag |= IOSKIP;
948                                         c2 = getsc();
949                                 } else if (c2 == '<')
950                                         iop->flag |= IOHERESTR;
951                                 ungetsc(c2);
952                                 if (c2 == '\n')
953                                         iop->flag |= IONDELIM;
954                         }
955                 } else if (c2 == '&')
956                         iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
957                 else {
958                         iop->flag |= c == '>' ? IOWRITE : IOREAD;
959                         if (c == '>' && c2 == '|')
960                                 iop->flag |= IOCLOB;
961                         else
962                                 ungetsc(c2);
963                 }
964
965                 iop->name = NULL;
966                 iop->delim = NULL;
967                 iop->heredoc = NULL;
968                 /* free word */
969                 Xfree(ws, wp);
970                 yylval.iop = iop;
971                 return (REDIR);
972  no_iop:
973                 afree(iop, ATEMP);
974         }
975
976         if (wp == dp && state == SBASE) {
977                 /* free word */
978                 Xfree(ws, wp);
979                 /* no word, process LEX1 character */
980                 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
981                         if ((c2 = getsc()) == c)
982                                 c = (c == ';') ? BREAK :
983                                     (c == '|') ? LOGOR :
984                                     (c == '&') ? LOGAND :
985                                     /* c == '(' ) */ MDPAREN;
986                         else if (c == '|' && c2 == '&')
987                                 c = COPROC;
988                         else if (c == ';' && c2 == '|')
989                                 c = BRKEV;
990                         else if (c == ';' && c2 == '&')
991                                 c = BRKFT;
992                         else
993                                 ungetsc(c2);
994 #ifndef MKSH_SMALL
995                         if (c == BREAK) {
996                                 if ((c2 = getsc()) == '&')
997                                         c = BRKEV;
998                                 else
999                                         ungetsc(c2);
1000                         }
1001 #endif
1002                 } else if (c == '\n') {
1003                         gethere(false);
1004                         if (cf & CONTIN)
1005                                 goto Again;
1006                 } else if (c == '\0')
1007                         /* need here strings at EOF */
1008                         gethere(true);
1009                 return (c);
1010         }
1011
1012         /* terminate word */
1013         *wp++ = EOS;
1014         yylval.cp = Xclose(ws, wp);
1015         if (state == SWORD || state == SLETPAREN
1016             /* XXX ONEWORD? */)
1017                 return (LWORD);
1018
1019         /* unget terminator */
1020         ungetsc(c);
1021
1022         /*
1023          * note: the alias-vs-function code below depends on several
1024          * interna: starting from here, source->str is not modified;
1025          * the way getsc() and ungetsc() operate; etc.
1026          */
1027
1028         /* copy word to unprefixed string ident */
1029         sp = yylval.cp;
1030         dp = ident;
1031         if ((cf & HEREDELIM) && (sp[1] == '<'))
1032                 while ((dp - ident) < IDENT) {
1033                         if ((c = *sp++) == CHAR)
1034                                 *dp++ = *sp++;
1035                         else if ((c != OQUOTE) && (c != CQUOTE))
1036                                 break;
1037                 }
1038         else
1039                 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1040                         *dp++ = *sp++;
1041         /* Make sure the ident array stays '\0' padded */
1042         memset(dp, 0, (ident + IDENT) - dp + 1);
1043         if (c != EOS)
1044                 /* word is not unquoted */
1045                 *ident = '\0';
1046
1047         if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1048                 struct tbl *p;
1049                 uint32_t h = hash(ident);
1050
1051                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1052                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1053                     p->val.i == /*{*/ '}')) {
1054                         afree(yylval.cp, ATEMP);
1055                         return (p->val.i);
1056                 }
1057                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1058                     (p->flag & ISSET)) {
1059                         /*
1060                          * this still points to the same character as the
1061                          * ungetsc'd terminator from above
1062                          */
1063                         const char *cp = source->str;
1064
1065                         /* prefer POSIX but not Korn functions over aliases */
1066                         while (*cp == ' ' || *cp == '\t')
1067                                 /*
1068                                  * this is like getsc() without skipping
1069                                  * over Source boundaries (including not
1070                                  * parsing ungetsc'd characters that got
1071                                  * pushed into an SREREAD) which is what
1072                                  * we want here anyway: find out whether
1073                                  * the alias name is followed by a POSIX
1074                                  * function definition (only the opening
1075                                  * parenthesis is checked though)
1076                                  */
1077                                 ++cp;
1078                         /* prefer functions over aliases */
1079                         if (cp[0] != '(' || cp[1] != ')') {
1080                                 Source *s = source;
1081
1082                                 while (s && (s->flags & SF_HASALIAS))
1083                                         if (s->u.tblp == p)
1084                                                 return (LWORD);
1085                                         else
1086                                                 s = s->next;
1087                                 /* push alias expansion */
1088                                 s = pushs(SALIAS, source->areap);
1089                                 s->start = s->str = p->val.s;
1090                                 s->u.tblp = p;
1091                                 s->flags |= SF_HASALIAS;
1092                                 s->next = source;
1093                                 if (source->type == SEOF) {
1094                                         /* prevent infinite recursion at EOS */
1095                                         source->u.tblp = p;
1096                                         source->flags |= SF_HASALIAS;
1097                                 }
1098                                 source = s;
1099                                 afree(yylval.cp, ATEMP);
1100                                 goto Again;
1101                         }
1102                 }
1103         }
1104
1105         return (LWORD);
1106 }
1107
1108 static void
1109 gethere(bool iseof)
1110 {
1111         struct ioword **p;
1112
1113         for (p = heres; p < herep; p++)
1114                 if (iseof && !((*p)->flag & IOHERESTR))
1115                         /* only here strings at EOF */
1116                         return;
1117                 else
1118                         readhere(*p);
1119         herep = heres;
1120 }
1121
1122 /*
1123  * read "<<word" text into temp file
1124  */
1125
1126 static void
1127 readhere(struct ioword *iop)
1128 {
1129         int c;
1130         const char *eof, *eofp;
1131         XString xs;
1132         char *xp;
1133         int xpos;
1134
1135         if (iop->flag & IOHERESTR) {
1136                 /* process the here string */
1137                 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1138                 xpos = strlen(xp) - 1;
1139                 memmove(xp, xp + 1, xpos);
1140                 xp[xpos] = '\n';
1141                 return;
1142         }
1143
1144         eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1145
1146         if (!(iop->flag & IOEVAL))
1147                 ignore_backslash_newline++;
1148
1149         Xinit(xs, xp, 256, ATEMP);
1150
1151  heredoc_read_line:
1152         /* beginning of line */
1153         eofp = eof;
1154         xpos = Xsavepos(xs, xp);
1155         if (iop->flag & IOSKIP) {
1156                 /* skip over leading tabs */
1157                 while ((c = getsc()) == '\t')
1158                         /* nothing */;
1159                 goto heredoc_parse_char;
1160         }
1161  heredoc_read_char:
1162         c = getsc();
1163  heredoc_parse_char:
1164         /* compare with here document marker */
1165         if (!*eofp) {
1166                 /* end of here document marker, what to do? */
1167                 switch (c) {
1168                 case /*(*/ ')':
1169                         if (!subshell_nesting_type)
1170                                 /*-
1171                                  * not allowed outside $(...) or (...)
1172                                  * => mismatch
1173                                  */
1174                                 break;
1175                         /* allow $(...) or (...) to close here */
1176                         ungetsc(/*(*/ ')');
1177                         /* FALLTHROUGH */
1178                 case 0:
1179                         /*
1180                          * Allow EOF here to commands without trailing
1181                          * newlines (mksh -c '...') will work as well.
1182                          */
1183                 case '\n':
1184                         /* Newline terminates here document marker */
1185                         goto heredoc_found_terminator;
1186                 }
1187         } else if (c == *eofp++)
1188                 /* store; then read and compare next character */
1189                 goto heredoc_store_and_loop;
1190         /* nope, mismatch; read until end of line */
1191         while (c != '\n') {
1192                 if (!c)
1193                         /* oops, reached EOF */
1194                         yyerror("%s '%s' unclosed\n", "here document", eof);
1195                 /* store character */
1196                 Xcheck(xs, xp);
1197                 Xput(xs, xp, c);
1198                 /* read next character */
1199                 c = getsc();
1200         }
1201         /* we read a newline as last character */
1202  heredoc_store_and_loop:
1203         /* store character */
1204         Xcheck(xs, xp);
1205         Xput(xs, xp, c);
1206         if (c == '\n')
1207                 goto heredoc_read_line;
1208         goto heredoc_read_char;
1209
1210  heredoc_found_terminator:
1211         /* jump back to saved beginning of line */
1212         xp = Xrestpos(xs, xp, xpos);
1213         /* terminate, close and store */
1214         Xput(xs, xp, '\0');
1215         iop->heredoc = Xclose(xs, xp);
1216
1217         if (!(iop->flag & IOEVAL))
1218                 ignore_backslash_newline--;
1219 }
1220
1221 void
1222 yyerror(const char *fmt, ...)
1223 {
1224         va_list va;
1225
1226         /* pop aliases and re-reads */
1227         while (source->type == SALIAS || source->type == SREREAD)
1228                 source = source->next;
1229         /* zap pending input */
1230         source->str = null;
1231
1232         error_prefix(true);
1233         va_start(va, fmt);
1234         shf_vfprintf(shl_out, fmt, va);
1235         va_end(va);
1236         errorfz();
1237 }
1238
1239 /*
1240  * input for yylex with alias expansion
1241  */
1242
1243 Source *
1244 pushs(int type, Area *areap)
1245 {
1246         Source *s;
1247
1248         s = alloc(sizeof(Source), areap);
1249         memset(s, 0, sizeof(Source));
1250         s->type = type;
1251         s->str = null;
1252         s->areap = areap;
1253         if (type == SFILE || type == SSTDIN)
1254                 XinitN(s->xs, 256, s->areap);
1255         return (s);
1256 }
1257
1258 static int
1259 getsc_uu(void)
1260 {
1261         Source *s = source;
1262         int c;
1263
1264         while ((c = *s->str++) == 0) {
1265                 /* return 0 for EOF by default */
1266                 s->str = NULL;
1267                 switch (s->type) {
1268                 case SEOF:
1269                         s->str = null;
1270                         return (0);
1271
1272                 case SSTDIN:
1273                 case SFILE:
1274                         getsc_line(s);
1275                         break;
1276
1277                 case SWSTR:
1278                         break;
1279
1280                 case SSTRING:
1281                 case SSTRINGCMDLINE:
1282                         break;
1283
1284                 case SWORDS:
1285                         s->start = s->str = *s->u.strv++;
1286                         s->type = SWORDSEP;
1287                         break;
1288
1289                 case SWORDSEP:
1290                         if (*s->u.strv == NULL) {
1291                                 s->start = s->str = "\n";
1292                                 s->type = SEOF;
1293                         } else {
1294                                 s->start = s->str = " ";
1295                                 s->type = SWORDS;
1296                         }
1297                         break;
1298
1299                 case SALIAS:
1300                         if (s->flags & SF_ALIASEND) {
1301                                 /* pass on an unused SF_ALIAS flag */
1302                                 source = s->next;
1303                                 source->flags |= s->flags & SF_ALIAS;
1304                                 s = source;
1305                         } else if (*s->u.tblp->val.s &&
1306                             (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1307                                 /* pop source stack */
1308                                 source = s = s->next;
1309                                 /*
1310                                  * Note that this alias ended with a
1311                                  * space, enabling alias expansion on
1312                                  * the following word.
1313                                  */
1314                                 s->flags |= SF_ALIAS;
1315                         } else {
1316                                 /*
1317                                  * At this point, we need to keep the current
1318                                  * alias in the source list so recursive
1319                                  * aliases can be detected and we also need to
1320                                  * return the next character. Do this by
1321                                  * temporarily popping the alias to get the
1322                                  * next character and then put it back in the
1323                                  * source list with the SF_ALIASEND flag set.
1324                                  */
1325                                 /* pop source stack */
1326                                 source = s->next;
1327                                 source->flags |= s->flags & SF_ALIAS;
1328                                 c = getsc_uu();
1329                                 if (c) {
1330                                         s->flags |= SF_ALIASEND;
1331                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1332                                         s->start = s->str = s->ugbuf;
1333                                         s->next = source;
1334                                         source = s;
1335                                 } else {
1336                                         s = source;
1337                                         /* avoid reading EOF twice */
1338                                         s->str = NULL;
1339                                         break;
1340                                 }
1341                         }
1342                         continue;
1343
1344                 case SREREAD:
1345                         if (s->start != s->ugbuf)
1346                                 /* yuck */
1347                                 afree(s->u.freeme, ATEMP);
1348                         source = s = s->next;
1349                         continue;
1350                 }
1351                 if (s->str == NULL) {
1352                         s->type = SEOF;
1353                         s->start = s->str = null;
1354                         return ('\0');
1355                 }
1356                 if (s->flags & SF_ECHO) {
1357                         shf_puts(s->str, shl_out);
1358                         shf_flush(shl_out);
1359                 }
1360         }
1361         return (c);
1362 }
1363
1364 static void
1365 getsc_line(Source *s)
1366 {
1367         char *xp = Xstring(s->xs, xp), *cp;
1368         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1369         bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1370
1371         /* Done here to ensure nothing odd happens when a timeout occurs */
1372         XcheckN(s->xs, xp, LINE);
1373         *xp = '\0';
1374         s->start = s->str = xp;
1375
1376         if (have_tty && ksh_tmout) {
1377                 ksh_tmout_state = TMOUT_READING;
1378                 alarm(ksh_tmout);
1379         }
1380         if (interactive)
1381                 change_winsz();
1382 #ifndef MKSH_NO_CMDLINE_EDITING
1383         if (have_tty && (
1384 #if !MKSH_S_NOVI
1385             Flag(FVI) ||
1386 #endif
1387             Flag(FEMACS) || Flag(FGMACS))) {
1388                 int nread;
1389
1390                 nread = x_read(xp);
1391                 if (nread < 0)
1392                         /* read error */
1393                         nread = 0;
1394                 xp[nread] = '\0';
1395                 xp += nread;
1396         } else
1397 #endif
1398           {
1399                 if (interactive)
1400                         pprompt(prompt, 0);
1401                 else
1402                         s->line++;
1403
1404                 while (/* CONSTCOND */ 1) {
1405                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1406
1407                         if (!p && shf_error(s->u.shf) &&
1408                             shf_errno(s->u.shf) == EINTR) {
1409                                 shf_clearerr(s->u.shf);
1410                                 if (trap)
1411                                         runtraps(0);
1412                                 continue;
1413                         }
1414                         if (!p || (xp = p, xp[-1] == '\n'))
1415                                 break;
1416                         /* double buffer size */
1417                         /* move past NUL so doubling works... */
1418                         xp++;
1419                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420                         /* ...and move back again */
1421                         xp--;
1422                 }
1423                 /*
1424                  * flush any unwanted input so other programs/builtins
1425                  * can read it. Not very optimal, but less error prone
1426                  * than flushing else where, dealing with redirections,
1427                  * etc.
1428                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1429                  */
1430                 if (s->type == SSTDIN)
1431                         shf_flush(s->u.shf);
1432         }
1433         /*
1434          * XXX: temporary kludge to restore source after a
1435          * trap may have been executed.
1436          */
1437         source = s;
1438         if (have_tty && ksh_tmout) {
1439                 ksh_tmout_state = TMOUT_EXECUTING;
1440                 alarm(0);
1441         }
1442         cp = Xstring(s->xs, xp);
1443         s->start = s->str = cp;
1444         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1445         /* Note: if input is all nulls, this is not eof */
1446         if (Xlength(s->xs, xp) == 0) {
1447                 /* EOF */
1448                 if (s->type == SFILE)
1449                         shf_fdclose(s->u.shf);
1450                 s->str = NULL;
1451         } else if (interactive && *s->str &&
1452             (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1453                 histsave(&s->line, s->str, true, true);
1454 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1455         } else if (interactive && cur_prompt == PS1) {
1456                 cp = Xstring(s->xs, xp);
1457                 while (*cp && ctype(*cp, C_IFSWS))
1458                         ++cp;
1459                 if (!*cp)
1460                         histsync();
1461 #endif
1462         }
1463         if (interactive)
1464                 set_prompt(PS2, NULL);
1465 }
1466
1467 void
1468 set_prompt(int to, Source *s)
1469 {
1470         cur_prompt = to;
1471
1472         switch (to) {
1473         /* command */
1474         case PS1:
1475                 /*
1476                  * Substitute ! and !! here, before substitutions are done
1477                  * so ! in expanded variables are not expanded.
1478                  * NOTE: this is not what AT&T ksh does (it does it after
1479                  * substitutions, POSIX doesn't say which is to be done.
1480                  */
1481                 {
1482                         struct shf *shf;
1483                         char * volatile ps1;
1484                         Area *saved_atemp;
1485
1486                         ps1 = str_val(global("PS1"));
1487                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1488                             SHF_WR | SHF_DYNAMIC, NULL);
1489                         while (*ps1)
1490                                 if (*ps1 != '!' || *++ps1 == '!')
1491                                         shf_putchar(*ps1++, shf);
1492                                 else
1493                                         shf_fprintf(shf, "%d",
1494                                                 s ? s->line + 1 : 0);
1495                         ps1 = shf_sclose(shf);
1496                         saved_atemp = ATEMP;
1497                         newenv(E_ERRH);
1498                         if (kshsetjmp(e->jbuf)) {
1499                                 prompt = safe_prompt;
1500                                 /*
1501                                  * Don't print an error - assume it has already
1502                                  * been printed. Reason is we may have forked
1503                                  * to run a command and the child may be
1504                                  * unwinding its stack through this code as it
1505                                  * exits.
1506                                  */
1507                         } else {
1508                                 char *cp = substitute(ps1, 0);
1509                                 strdupx(prompt, cp, saved_atemp);
1510                         }
1511                         quitenv(NULL);
1512                 }
1513                 break;
1514         /* command continuation */
1515         case PS2:
1516                 prompt = str_val(global("PS2"));
1517                 break;
1518         }
1519 }
1520
1521 int
1522 pprompt(const char *cp, int ntruncate)
1523 {
1524         int columns = 0, lines = 0;
1525         bool indelimit = false;
1526         char delimiter = 0;
1527
1528         /*
1529          * Undocumented AT&T ksh feature:
1530          * If the second char in the prompt string is \r then the first
1531          * char is taken to be a non-printing delimiter and any chars
1532          * between two instances of the delimiter are not considered to
1533          * be part of the prompt length
1534          */
1535         if (*cp && cp[1] == '\r') {
1536                 delimiter = *cp;
1537                 cp += 2;
1538         }
1539         for (; *cp; cp++) {
1540                 if (indelimit && *cp != delimiter)
1541                         ;
1542                 else if (*cp == '\n' || *cp == '\r') {
1543                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1544                         columns = 0;
1545                 } else if (*cp == '\t') {
1546                         columns = (columns | 7) + 1;
1547                 } else if (*cp == '\b') {
1548                         if (columns > 0)
1549                                 columns--;
1550                 } else if (*cp == delimiter)
1551                         indelimit = !indelimit;
1552                 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1553                         const char *cp2;
1554                         columns += utf_widthadj(cp, &cp2);
1555                         if (indelimit ||
1556                             (ntruncate < (x_cols * lines + columns)))
1557                                 shf_write(cp, cp2 - cp, shl_out);
1558                         cp = cp2 - /* loop increment */ 1;
1559                         continue;
1560                 } else
1561                         columns++;
1562                 if ((*cp != delimiter) &&
1563                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1564                         shf_putc(*cp, shl_out);
1565         }
1566         shf_flush(shl_out);
1567         return (x_cols * lines + columns);
1568 }
1569
1570 /*
1571  * Read the variable part of a ${...} expression (i.e. up to but not
1572  * including the :[-+?=#%] or close-brace).
1573  */
1574 static char *
1575 get_brace_var(XString *wsp, char *wp)
1576 {
1577         char c;
1578         enum parse_state {
1579                 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1580                 PS_NUMBER, PS_VAR1
1581         } state = PS_INITIAL;
1582
1583         while (/* CONSTCOND */ 1) {
1584                 c = getsc();
1585                 /* State machine to figure out where the variable part ends. */
1586                 switch (state) {
1587                 case PS_INITIAL:
1588                         if (c == '#' || c == '!' || c == '%') {
1589                                 state = PS_SAW_HASH;
1590                                 break;
1591                         }
1592                         /* FALLTHROUGH */
1593                 case PS_SAW_HASH:
1594                         if (ksh_isalphx(c))
1595                                 state = PS_IDENT;
1596                         else if (ksh_isdigit(c))
1597                                 state = PS_NUMBER;
1598                         else if (c == '#') {
1599                                 if (state == PS_SAW_HASH) {
1600                                         char c2;
1601
1602                                         c2 = getsc();
1603                                         ungetsc(c2);
1604                                         if (c2 != /*{*/ '}') {
1605                                                 ungetsc(c);
1606                                                 goto out;
1607                                         }
1608                                 }
1609                                 state = PS_VAR1;
1610                         } else if (ctype(c, C_VAR1))
1611                                 state = PS_VAR1;
1612                         else
1613                                 goto out;
1614                         break;
1615                 case PS_IDENT:
1616                         if (!ksh_isalnux(c)) {
1617                                 if (c == '[') {
1618                                         char *tmp, *p;
1619
1620                                         if (!arraysub(&tmp))
1621                                                 yyerror("missing ]\n");
1622                                         *wp++ = c;
1623                                         for (p = tmp; *p; ) {
1624                                                 Xcheck(*wsp, wp);
1625                                                 *wp++ = *p++;
1626                                         }
1627                                         afree(tmp, ATEMP);
1628                                         /* the ] */
1629                                         c = getsc();
1630                                 }
1631                                 goto out;
1632                         }
1633                         break;
1634                 case PS_NUMBER:
1635                         if (!ksh_isdigit(c))
1636                                 goto out;
1637                         break;
1638                 case PS_VAR1:
1639                         goto out;
1640                 }
1641                 Xcheck(*wsp, wp);
1642                 *wp++ = c;
1643         }
1644  out:
1645         /* end of variable part */
1646         *wp++ = '\0';
1647         ungetsc(c);
1648         return (wp);
1649 }
1650
1651 /*
1652  * Save an array subscript - returns true if matching bracket found, false
1653  * if eof or newline was found.
1654  * (Returned string double null terminated)
1655  */
1656 static bool
1657 arraysub(char **strp)
1658 {
1659         XString ws;
1660         char *wp, c;
1661         /* we are just past the initial [ */
1662         unsigned int depth = 1;
1663
1664         Xinit(ws, wp, 32, ATEMP);
1665
1666         do {
1667                 c = getsc();
1668                 Xcheck(ws, wp);
1669                 *wp++ = c;
1670                 if (c == '[')
1671                         depth++;
1672                 else if (c == ']')
1673                         depth--;
1674         } while (depth > 0 && c && c != '\n');
1675
1676         *wp++ = '\0';
1677         *strp = Xclose(ws, wp);
1678
1679         return (tobool(depth == 0));
1680 }
1681
1682 /* Unget a char: handles case when we are already at the start of the buffer */
1683 static void
1684 ungetsc(int c)
1685 {
1686         struct sretrace_info *rp = retrace_info;
1687
1688         if (backslash_skip)
1689                 backslash_skip--;
1690         /* Don't unget EOF... */
1691         if (source->str == null && c == '\0')
1692                 return;
1693         while (rp) {
1694                 if (Xlength(rp->xs, rp->xp))
1695                         rp->xp--;
1696                 rp = rp->next;
1697         }
1698         ungetsc_i(c);
1699 }
1700 static void
1701 ungetsc_i(int c)
1702 {
1703         if (source->str > source->start)
1704                 source->str--;
1705         else {
1706                 Source *s;
1707
1708                 s = pushs(SREREAD, source->areap);
1709                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1710                 s->start = s->str = s->ugbuf;
1711                 s->next = source;
1712                 source = s;
1713         }
1714 }
1715
1716
1717 /* Called to get a char that isn't a \newline sequence. */
1718 static int
1719 getsc_bn(void)
1720 {
1721         int c, c2;
1722
1723         if (ignore_backslash_newline)
1724                 return (o_getsc_u());
1725
1726         if (backslash_skip == 1) {
1727                 backslash_skip = 2;
1728                 return (o_getsc_u());
1729         }
1730
1731         backslash_skip = 0;
1732
1733         while (/* CONSTCOND */ 1) {
1734                 c = o_getsc_u();
1735                 if (c == '\\') {
1736                         if ((c2 = o_getsc_u()) == '\n')
1737                                 /* ignore the \newline; get the next char... */
1738                                 continue;
1739                         ungetsc_i(c2);
1740                         backslash_skip = 1;
1741                 }
1742                 return (c);
1743         }
1744 }
1745
1746 void
1747 yyskiputf8bom(void)
1748 {
1749         int c;
1750
1751         if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1752                 ungetsc_i(c);
1753                 return;
1754         }
1755         if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1756                 ungetsc_i(c);
1757                 ungetsc_i(0xEF);
1758                 return;
1759         }
1760         if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1761                 ungetsc_i(c);
1762                 ungetsc_i(0xBB);
1763                 ungetsc_i(0xEF);
1764                 return;
1765         }
1766         UTFMODE |= 8;
1767 }
1768
1769 static Lex_state *
1770 push_state_i(State_info *si, Lex_state *old_end)
1771 {
1772         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1773
1774         news[0].ls_base = old_end;
1775         si->base = &news[0];
1776         si->end = &news[STATE_BSIZE];
1777         return (&news[1]);
1778 }
1779
1780 static Lex_state *
1781 pop_state_i(State_info *si, Lex_state *old_end)
1782 {
1783         Lex_state *old_base = si->base;
1784
1785         si->base = old_end->ls_base - STATE_BSIZE;
1786         si->end = old_end->ls_base;
1787
1788         afree(old_base, ATEMP);
1789
1790         return (si->base + STATE_BSIZE - 1);
1791 }
1792
1793 static int
1794 s_get(void)
1795 {
1796         return (getsc());
1797 }
1798
1799 static void
1800 s_put(int c)
1801 {
1802         ungetsc(c);
1803 }