OSDN Git Service

Merge master@5428150 into git_qt-dev-plus-aosp.
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $   */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6  *      mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in << or <<- delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 size_t start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80         /* extra flags */
81         uint8_t ls_flags;
82 } Lex_state;
83 #define ls_base         u.base
84 #define ls_start        u.start
85 #define ls_bool         u.abool
86 #define ls_adelim       u.adelim
87
88 /* ls_flags */
89 #define LS_HEREDOC      BIT(0)
90
91 typedef struct {
92         Lex_state *base;
93         Lex_state *end;
94 } State_info;
95
96 static void readhere(struct ioword *);
97 static void ungetsc(int);
98 static void ungetsc_i(int);
99 static int getsc_uu(void);
100 static void getsc_line(Source *);
101 static int getsc_bn(void);
102 static int getsc_i(void);
103 static char *get_brace_var(XString *, char *);
104 static bool arraysub(char **);
105 static void gethere(void);
106 static Lex_state *push_state_i(State_info *, Lex_state *);
107 static Lex_state *pop_state_i(State_info *, Lex_state *);
108
109 static int backslash_skip;
110 static int ignore_backslash_newline;
111
112 /* optimised getsc_bn() */
113 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
114                             !backslash_skip ? *source->str++ : getsc_bn())
115 /* optimised getsc_uu() */
116 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
117
118 /* retrace helper */
119 #define o_getsc_r(carg)                                 \
120         int cev = (carg);                               \
121         struct sretrace_info *rp = retrace_info;        \
122                                                         \
123         while (rp) {                                    \
124                 Xcheck(rp->xs, rp->xp);                 \
125                 *rp->xp++ = cev;                        \
126                 rp = rp->next;                          \
127         }                                               \
128                                                         \
129         return (cev);
130
131 /* callback */
132 static int
133 getsc_i(void)
134 {
135         o_getsc_r((unsigned int)(unsigned char)o_getsc());
136 }
137
138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
139 #define getsc()         getsc_i()
140 #else
141 static int getsc_r(int);
142
143 static int
144 getsc_r(int c)
145 {
146         o_getsc_r(c);
147 }
148
149 #define getsc()         getsc_r((unsigned int)(unsigned char)o_getsc())
150 #endif
151
152 #define STATE_BSIZE     8
153
154 #define PUSH_STATE(s)   do {                                    \
155         uint8_t state_flags = statep->ls_flags;                 \
156         if (++statep == state_info.end)                         \
157                 statep = push_state_i(&state_info, statep);     \
158         state = statep->type = (s);                             \
159         statep->ls_flags = state_flags;                         \
160 } while (/* CONSTCOND */ 0)
161
162 #define POP_STATE()     do {                                    \
163         if (--statep == state_info.base)                        \
164                 statep = pop_state_i(&state_info, statep);      \
165         state = statep->type;                                   \
166 } while (/* CONSTCOND */ 0)
167
168 #define PUSH_SRETRACE(s) do {                                   \
169         struct sretrace_info *ri;                               \
170                                                                 \
171         PUSH_STATE(s);                                          \
172         statep->ls_start = Xsavepos(ws, wp);                    \
173         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
174         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
175         ri->next = retrace_info;                                \
176         retrace_info = ri;                                      \
177 } while (/* CONSTCOND */ 0)
178
179 #define POP_SRETRACE()  do {                                    \
180         wp = Xrestpos(ws, wp, statep->ls_start);                \
181         *retrace_info->xp = '\0';                               \
182         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
183         dp = (void *)retrace_info;                              \
184         retrace_info = retrace_info->next;                      \
185         afree(dp, ATEMP);                                       \
186         POP_STATE();                                            \
187 } while (/* CONSTCOND */ 0)
188
189 /**
190  * Lexical analyser
191  *
192  * tokens are not regular expressions, they are LL(1).
193  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
194  * hence the state stack. Note "$(...)" are now parsed recursively.
195  */
196
197 int
198 yylex(int cf)
199 {
200         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
201         State_info state_info;
202         int c, c2, state;
203         size_t cz;
204         XString ws;             /* expandable output word */
205         char *wp;               /* output word pointer */
206         char *sp, *dp;
207
208  Again:
209         states[0].type = SINVALID;
210         states[0].ls_base = NULL;
211         statep = &states[1];
212         state_info.base = states;
213         state_info.end = &state_info.base[STATE_BSIZE];
214
215         Xinit(ws, wp, 64, ATEMP);
216
217         backslash_skip = 0;
218         ignore_backslash_newline = 0;
219
220         if (cf & ONEWORD)
221                 state = SWORD;
222         else if (cf & LETEXPR) {
223                 /* enclose arguments in (double) quotes */
224                 *wp++ = OQUOTE;
225                 state = SLETPAREN;
226                 statep->nparen = 0;
227         } else {
228                 /* normal lexing */
229                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
230                 do {
231                         c = getsc();
232                 } while (ctype(c, C_BLANK));
233                 if (c == '#') {
234                         ignore_backslash_newline++;
235                         do {
236                                 c = getsc();
237                         } while (!ctype(c, C_NUL | C_LF));
238                         ignore_backslash_newline--;
239                 }
240                 ungetsc(c);
241         }
242         if (source->flags & SF_ALIAS) {
243                 /* trailing ' ' in alias definition */
244                 source->flags &= ~SF_ALIAS;
245                 /* POSIX: trailing space only counts if parsing simple cmd */
246                 if (!Flag(FPOSIX) || (cf & CMDWORD))
247                         cf |= ALIAS;
248         }
249
250         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
251         statep->type = state;
252         statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
253
254         /* collect non-special or quoted characters to form word */
255         while (!((c = getsc()) == 0 ||
256             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
257                 if (state == SBASE &&
258                     subshell_nesting_type == ORD(/*{*/ '}') &&
259                     (unsigned int)c == ORD(/*{*/ '}'))
260                         /* possibly end ${ :;} */
261                         break;
262                 Xcheck(ws, wp);
263                 switch (state) {
264                 case SADELIM:
265                         if ((unsigned int)c == ORD('('))
266                                 statep->nparen++;
267                         else if ((unsigned int)c == ORD(')'))
268                                 statep->nparen--;
269                         else if (statep->nparen == 0 &&
270                             ((unsigned int)c == ORD(/*{*/ '}') ||
271                             c == (int)statep->ls_adelim.delimiter)) {
272                                 *wp++ = ADELIM;
273                                 *wp++ = c;
274                                 if ((unsigned int)c == ORD(/*{*/ '}') ||
275                                     --statep->ls_adelim.num == 0)
276                                         POP_STATE();
277                                 if ((unsigned int)c == ORD(/*{*/ '}'))
278                                         POP_STATE();
279                                 break;
280                         }
281                         /* FALLTHROUGH */
282                 case SBASE:
283                         if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
284                                 /* temporary */
285                                 *wp = EOS;
286                                 if (is_wdvarname(Xstring(ws, wp), false)) {
287                                         char *p, *tmp;
288
289                                         if (arraysub(&tmp)) {
290                                                 *wp++ = CHAR;
291                                                 *wp++ = c;
292                                                 for (p = tmp; *p; ) {
293                                                         Xcheck(ws, wp);
294                                                         *wp++ = CHAR;
295                                                         *wp++ = *p++;
296                                                 }
297                                                 afree(tmp, ATEMP);
298                                                 break;
299                                         }
300                                 }
301                                 *wp++ = CHAR;
302                                 *wp++ = c;
303                                 break;
304                         }
305                         /* FALLTHROUGH */
306  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
307                         if (ctype(c, C_PATMO)) {
308                                 c2 = getsc();
309                                 if ((unsigned int)c2 == ORD('(' /*)*/)) {
310                                         *wp++ = OPAT;
311                                         *wp++ = c;
312                                         PUSH_STATE(SPATTERN);
313                                         break;
314                                 }
315                                 ungetsc(c2);
316                         }
317                         /* FALLTHROUGH */
318  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
319                         switch (c) {
320                         case ORD('\\'):
321  getsc_qchar:
322                                 if ((c = getsc())) {
323                                         /* trailing \ is lost */
324                                         *wp++ = QCHAR;
325                                         *wp++ = c;
326                                 }
327                                 break;
328                         case ORD('\''):
329  open_ssquote_unless_heredoc:
330                                 if ((statep->ls_flags & LS_HEREDOC))
331                                         goto store_char;
332                                 *wp++ = OQUOTE;
333                                 ignore_backslash_newline++;
334                                 PUSH_STATE(SSQUOTE);
335                                 break;
336                         case ORD('"'):
337  open_sdquote:
338                                 *wp++ = OQUOTE;
339                                 PUSH_STATE(SDQUOTE);
340                                 break;
341                         case ORD('$'):
342                                 /*
343                                  * processing of dollar sign belongs into
344                                  * Subst, except for those which can open
345                                  * a string: $'…' and $"…"
346                                  */
347  subst_dollar_ex:
348                                 c = getsc();
349                                 switch (c) {
350                                 case ORD('"'):
351                                         goto open_sdquote;
352                                 case ORD('\''):
353                                         goto open_sequote;
354                                 default:
355                                         goto SubstS;
356                                 }
357                         default:
358                                 goto Subst;
359                         }
360                         break;
361
362  Subst:
363                         switch (c) {
364                         case ORD('\\'):
365                                 c = getsc();
366                                 switch (c) {
367                                 case ORD('"'):
368                                         if ((statep->ls_flags & LS_HEREDOC))
369                                                 goto heredocquote;
370                                         /* FALLTHROUGH */
371                                 case ORD('\\'):
372                                 case ORD('$'):
373                                 case ORD('`'):
374  store_qchar:
375                                         *wp++ = QCHAR;
376                                         *wp++ = c;
377                                         break;
378                                 default:
379  heredocquote:
380                                         Xcheck(ws, wp);
381                                         if (c) {
382                                                 /* trailing \ is lost */
383                                                 *wp++ = CHAR;
384                                                 *wp++ = '\\';
385                                                 *wp++ = CHAR;
386                                                 *wp++ = c;
387                                         }
388                                         break;
389                                 }
390                                 break;
391                         case ORD('$'):
392                                 c = getsc();
393  SubstS:
394                                 if ((unsigned int)c == ORD('(' /*)*/)) {
395                                         c = getsc();
396                                         if ((unsigned int)c == ORD('(' /*)*/)) {
397                                                 *wp++ = EXPRSUB;
398                                                 PUSH_SRETRACE(SASPAREN);
399                                                 /* unneeded? */
400                                                 /*statep->ls_flags &= ~LS_HEREDOC;*/
401                                                 statep->nparen = 2;
402                                                 *retrace_info->xp++ = '(';
403                                         } else {
404                                                 ungetsc(c);
405  subst_command:
406                                                 c = COMSUB;
407  subst_command2:
408                                                 sp = yyrecursive(c);
409                                                 cz = strlen(sp) + 1;
410                                                 XcheckN(ws, wp, cz);
411                                                 *wp++ = c;
412                                                 memcpy(wp, sp, cz);
413                                                 wp += cz;
414                                         }
415                                 } else if ((unsigned int)c == ORD('{' /*}*/)) {
416                                         if ((unsigned int)(c = getsc()) == ORD('|')) {
417                                                 /*
418                                                  * non-subenvironment
419                                                  * value substitution
420                                                  */
421                                                 c = VALSUB;
422                                                 goto subst_command2;
423                                         } else if (ctype(c, C_IFSWS)) {
424                                                 /*
425                                                  * non-subenvironment
426                                                  * "command" substitution
427                                                  */
428                                                 c = FUNSUB;
429                                                 goto subst_command2;
430                                         }
431                                         ungetsc(c);
432                                         *wp++ = OSUBST;
433                                         *wp++ = '{' /*}*/;
434                                         wp = get_brace_var(&ws, wp);
435                                         c = getsc();
436                                         /* allow :# and :% (ksh88 compat) */
437                                         if ((unsigned int)c == ORD(':')) {
438                                                 *wp++ = CHAR;
439                                                 *wp++ = c;
440                                                 c = getsc();
441                                                 if ((unsigned int)c == ORD(':')) {
442                                                         *wp++ = CHAR;
443                                                         *wp++ = '0';
444                                                         *wp++ = ADELIM;
445                                                         *wp++ = ':';
446                                                         PUSH_STATE(SBRACE);
447                                                         /* perhaps unneeded? */
448                                                         statep->ls_flags &= ~LS_HEREDOC;
449                                                         PUSH_STATE(SADELIM);
450                                                         statep->ls_adelim.delimiter = ':';
451                                                         statep->ls_adelim.num = 1;
452                                                         statep->nparen = 0;
453                                                         break;
454                                                 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
455                                                     /*XXX what else? */
456                                                     c == '(' /*)*/) {
457                                                         /* substring subst. */
458                                                         if (c != ' ') {
459                                                                 *wp++ = CHAR;
460                                                                 *wp++ = ' ';
461                                                         }
462                                                         ungetsc(c);
463                                                         PUSH_STATE(SBRACE);
464                                                         /* perhaps unneeded? */
465                                                         statep->ls_flags &= ~LS_HEREDOC;
466                                                         PUSH_STATE(SADELIM);
467                                                         statep->ls_adelim.delimiter = ':';
468                                                         statep->ls_adelim.num = 2;
469                                                         statep->nparen = 0;
470                                                         break;
471                                                 }
472                                         } else if (c == '/') {
473                                                 c2 = ADELIM;
474  parse_adelim_slash:
475                                                 *wp++ = CHAR;
476                                                 *wp++ = c;
477                                                 if ((unsigned int)(c = getsc()) == ORD('/')) {
478                                                         *wp++ = c2;
479                                                         *wp++ = c;
480                                                 } else
481                                                         ungetsc(c);
482                                                 PUSH_STATE(SBRACE);
483                                                 /* perhaps unneeded? */
484                                                 statep->ls_flags &= ~LS_HEREDOC;
485                                                 PUSH_STATE(SADELIM);
486                                                 statep->ls_adelim.delimiter = '/';
487                                                 statep->ls_adelim.num = 1;
488                                                 statep->nparen = 0;
489                                                 break;
490                                         } else if (c == '@') {
491                                                 c2 = getsc();
492                                                 ungetsc(c2);
493                                                 if ((unsigned int)c2 == ORD('/')) {
494                                                         c2 = CHAR;
495                                                         goto parse_adelim_slash;
496                                                 }
497                                         }
498                                         /*
499                                          * If this is a trim operation,
500                                          * treat (,|,) specially in STBRACE.
501                                          */
502                                         if (ctype(c, C_SUB2)) {
503                                                 ungetsc(c);
504                                                 if (Flag(FSH))
505                                                         PUSH_STATE(STBRACEBOURNE);
506                                                 else
507                                                         PUSH_STATE(STBRACEKORN);
508                                                 /* single-quotes-in-heredoc-trim */
509                                                 statep->ls_flags &= ~LS_HEREDOC;
510                                         } else {
511                                                 ungetsc(c);
512                                                 if (state == SDQUOTE ||
513                                                     state == SQBRACE)
514                                                         PUSH_STATE(SQBRACE);
515                                                 else
516                                                         PUSH_STATE(SBRACE);
517                                                 /* here no LS_HEREDOC removal */
518                                                 /* single-quotes-in-heredoc-braces */
519                                         }
520                                 } else if (ctype(c, C_ALPHX)) {
521                                         *wp++ = OSUBST;
522                                         *wp++ = 'X';
523                                         do {
524                                                 Xcheck(ws, wp);
525                                                 *wp++ = c;
526                                                 c = getsc();
527                                         } while (ctype(c, C_ALNUX));
528                                         *wp++ = '\0';
529                                         *wp++ = CSUBST;
530                                         *wp++ = 'X';
531                                         ungetsc(c);
532                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
533                                         Xcheck(ws, wp);
534                                         *wp++ = OSUBST;
535                                         *wp++ = 'X';
536                                         *wp++ = c;
537                                         *wp++ = '\0';
538                                         *wp++ = CSUBST;
539                                         *wp++ = 'X';
540                                 } else {
541                                         *wp++ = CHAR;
542                                         *wp++ = '$';
543                                         ungetsc(c);
544                                 }
545                                 break;
546                         case ORD('`'):
547  subst_gravis:
548                                 PUSH_STATE(SBQUOTE);
549                                 *wp++ = COMASUB;
550                                 /*
551                                  * We need to know whether we are within double
552                                  * quotes in order to translate \" to " within
553                                  * "…`…\"…`…" because, unlike for COMSUBs, the
554                                  * outer double quoteing changes the backslash
555                                  * meaning for the inside. For more details:
556                                  * http://austingroupbugs.net/view.php?id=1015
557                                  */
558                                 statep->ls_bool = false;
559                                 s2 = statep;
560                                 base = state_info.base;
561                                 while (/* CONSTCOND */ 1) {
562                                         for (; s2 != base; s2--) {
563                                                 if (s2->type == SDQUOTE) {
564                                                         statep->ls_bool = true;
565                                                         break;
566                                                 }
567                                         }
568                                         if (s2 != base)
569                                                 break;
570                                         if (!(s2 = s2->ls_base))
571                                                 break;
572                                         base = s2-- - STATE_BSIZE;
573                                 }
574                                 break;
575                         case QCHAR:
576                                 if (cf & LQCHAR) {
577                                         *wp++ = QCHAR;
578                                         *wp++ = getsc();
579                                         break;
580                                 }
581                                 /* FALLTHROUGH */
582                         default:
583  store_char:
584                                 *wp++ = CHAR;
585                                 *wp++ = c;
586                         }
587                         break;
588
589                 case SEQUOTE:
590                         if ((unsigned int)c == ORD('\'')) {
591                                 POP_STATE();
592                                 *wp++ = CQUOTE;
593                                 ignore_backslash_newline--;
594                         } else if ((unsigned int)c == ORD('\\')) {
595                                 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
596                                         c2 = getsc();
597                                 if (c2 == 0)
598                                         statep->ls_bool = true;
599                                 if (!statep->ls_bool) {
600                                         char ts[4];
601
602                                         if ((unsigned int)c2 < 0x100) {
603                                                 *wp++ = QCHAR;
604                                                 *wp++ = c2;
605                                         } else {
606                                                 cz = utf_wctomb(ts, c2 - 0x100);
607                                                 ts[cz] = 0;
608                                                 cz = 0;
609                                                 do {
610                                                         *wp++ = QCHAR;
611                                                         *wp++ = ts[cz];
612                                                 } while (ts[++cz]);
613                                         }
614                                 }
615                         } else if (!statep->ls_bool) {
616                                 *wp++ = QCHAR;
617                                 *wp++ = c;
618                         }
619                         break;
620
621                 case SSQUOTE:
622                         if ((unsigned int)c == ORD('\'')) {
623                                 POP_STATE();
624                                 if ((statep->ls_flags & LS_HEREDOC) ||
625                                     state == SQBRACE)
626                                         goto store_char;
627                                 *wp++ = CQUOTE;
628                                 ignore_backslash_newline--;
629                         } else {
630                                 *wp++ = QCHAR;
631                                 *wp++ = c;
632                         }
633                         break;
634
635                 case SDQUOTE:
636                         if ((unsigned int)c == ORD('"')) {
637                                 POP_STATE();
638                                 *wp++ = CQUOTE;
639                         } else
640                                 goto Subst;
641                         break;
642
643                 /* $(( ... )) */
644                 case SASPAREN:
645                         if ((unsigned int)c == ORD('('))
646                                 statep->nparen++;
647                         else if ((unsigned int)c == ORD(')')) {
648                                 statep->nparen--;
649                                 if (statep->nparen == 1) {
650                                         /* end of EXPRSUB */
651                                         POP_SRETRACE();
652
653                                         if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
654                                                 cz = strlen(sp) - 2;
655                                                 XcheckN(ws, wp, cz);
656                                                 memcpy(wp, sp + 1, cz);
657                                                 wp += cz;
658                                                 afree(sp, ATEMP);
659                                                 *wp++ = '\0';
660                                                 break;
661                                         } else {
662                                                 Source *s;
663
664                                                 ungetsc(c2);
665                                                 /*
666                                                  * mismatched parenthesis -
667                                                  * assume we were really
668                                                  * parsing a $(...) expression
669                                                  */
670                                                 --wp;
671                                                 s = pushs(SREREAD,
672                                                     source->areap);
673                                                 s->start = s->str =
674                                                     s->u.freeme = sp;
675                                                 s->next = source;
676                                                 source = s;
677                                                 goto subst_command;
678                                         }
679                                 }
680                         }
681                         /* reuse existing state machine */
682                         goto Sbase2;
683
684                 case SQBRACE:
685                         if ((unsigned int)c == ORD('\\')) {
686                                 /*
687                                  * perform POSIX "quote removal" if the back-
688                                  * slash is "special", i.e. same cases as the
689                                  * {case '\\':} in Subst: plus closing brace;
690                                  * in mksh code "quote removal" on '\c' means
691                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
692                                  * emitted (in heredocquote:)
693                                  */
694                                 if ((unsigned int)(c = getsc()) == ORD('"') ||
695                                     (unsigned int)c == ORD('\\') ||
696                                     ctype(c, C_DOLAR | C_GRAVE) ||
697                                     (unsigned int)c == ORD(/*{*/ '}'))
698                                         goto store_qchar;
699                                 goto heredocquote;
700                         }
701                         goto common_SQBRACE;
702
703                 case SBRACE:
704                         if ((unsigned int)c == ORD('\''))
705                                 goto open_ssquote_unless_heredoc;
706                         else if ((unsigned int)c == ORD('\\'))
707                                 goto getsc_qchar;
708  common_SQBRACE:
709                         if ((unsigned int)c == ORD('"'))
710                                 goto open_sdquote;
711                         else if ((unsigned int)c == ORD('$'))
712                                 goto subst_dollar_ex;
713                         else if ((unsigned int)c == ORD('`'))
714                                 goto subst_gravis;
715                         else if ((unsigned int)c != ORD(/*{*/ '}'))
716                                 goto store_char;
717                         POP_STATE();
718                         *wp++ = CSUBST;
719                         *wp++ = /*{*/ '}';
720                         break;
721
722                 /* Same as SBASE, except (,|,) treated specially */
723                 case STBRACEKORN:
724                         if ((unsigned int)c == ORD('|'))
725                                 *wp++ = SPAT;
726                         else if ((unsigned int)c == ORD('(')) {
727                                 *wp++ = OPAT;
728                                 /* simile for @ */
729                                 *wp++ = ' ';
730                                 PUSH_STATE(SPATTERN);
731                         } else /* FALLTHROUGH */
732                 case STBRACEBOURNE:
733                           if ((unsigned int)c == ORD(/*{*/ '}')) {
734                                 POP_STATE();
735                                 *wp++ = CSUBST;
736                                 *wp++ = /*{*/ '}';
737                         } else
738                                 goto Sbase1;
739                         break;
740
741                 case SBQUOTE:
742                         if ((unsigned int)c == ORD('`')) {
743                                 *wp++ = 0;
744                                 POP_STATE();
745                         } else if ((unsigned int)c == ORD('\\')) {
746                                 switch (c = getsc()) {
747                                 case 0:
748                                         /* trailing \ is lost */
749                                         break;
750                                 case ORD('$'):
751                                 case ORD('`'):
752                                 case ORD('\\'):
753                                         *wp++ = c;
754                                         break;
755                                 case ORD('"'):
756                                         if (statep->ls_bool) {
757                                                 *wp++ = c;
758                                                 break;
759                                         }
760                                         /* FALLTHROUGH */
761                                 default:
762                                         *wp++ = '\\';
763                                         *wp++ = c;
764                                         break;
765                                 }
766                         } else
767                                 *wp++ = c;
768                         break;
769
770                 /* ONEWORD */
771                 case SWORD:
772                         goto Subst;
773
774                 /* LETEXPR: (( ... )) */
775                 case SLETPAREN:
776                         if ((unsigned int)c == ORD(/*(*/ ')')) {
777                                 if (statep->nparen > 0)
778                                         --statep->nparen;
779                                 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
780                                         c = 0;
781                                         *wp++ = CQUOTE;
782                                         goto Done;
783                                 } else {
784                                         Source *s;
785
786                                         ungetsc(c2);
787                                         ungetsc(c);
788                                         /*
789                                          * mismatched parenthesis -
790                                          * assume we were really
791                                          * parsing a (...) expression
792                                          */
793                                         *wp = EOS;
794                                         sp = Xstring(ws, wp);
795                                         dp = wdstrip(sp + 1, WDS_TPUTS);
796                                         s = pushs(SREREAD, source->areap);
797                                         s->start = s->str = s->u.freeme = dp;
798                                         s->next = source;
799                                         source = s;
800                                         ungetsc('(' /*)*/);
801                                         return (ORD('(' /*)*/));
802                                 }
803                         } else if ((unsigned int)c == ORD('('))
804                                 /*
805                                  * parentheses inside quotes and
806                                  * backslashes are lost, but AT&T ksh
807                                  * doesn't count them either
808                                  */
809                                 ++statep->nparen;
810                         goto Sbase2;
811
812                 /* << or <<- delimiter */
813                 case SHEREDELIM:
814                         /*
815                          * here delimiters need a special case since
816                          * $ and `...` are not to be treated specially
817                          */
818                         switch (c) {
819                         case ORD('\\'):
820                                 if ((c = getsc())) {
821                                         /* trailing \ is lost */
822                                         *wp++ = QCHAR;
823                                         *wp++ = c;
824                                 }
825                                 break;
826                         case ORD('\''):
827                                 goto open_ssquote_unless_heredoc;
828                         case ORD('$'):
829                                 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
830  open_sequote:
831                                         *wp++ = OQUOTE;
832                                         ignore_backslash_newline++;
833                                         PUSH_STATE(SEQUOTE);
834                                         statep->ls_bool = false;
835                                         break;
836                                 } else if ((unsigned int)c2 == ORD('"')) {
837                                         /* FALLTHROUGH */
838                         case ORD('"'):
839                                         PUSH_SRETRACE(SHEREDQUOTE);
840                                         break;
841                                 }
842                                 ungetsc(c2);
843                                 /* FALLTHROUGH */
844                         default:
845                                 *wp++ = CHAR;
846                                 *wp++ = c;
847                         }
848                         break;
849
850                 /* " in << or <<- delimiter */
851                 case SHEREDQUOTE:
852                         if ((unsigned int)c != ORD('"'))
853                                 goto Subst;
854                         POP_SRETRACE();
855                         dp = strnul(sp) - 1;
856                         /* remove the trailing double quote */
857                         *dp = '\0';
858                         /* store the quoted string */
859                         *wp++ = OQUOTE;
860                         XcheckN(ws, wp, (dp - sp) * 2);
861                         dp = sp;
862                         while ((c = *dp++)) {
863                                 if (c == '\\') {
864                                         switch ((c = *dp++)) {
865                                         case ORD('\\'):
866                                         case ORD('"'):
867                                         case ORD('$'):
868                                         case ORD('`'):
869                                                 break;
870                                         default:
871                                                 *wp++ = CHAR;
872                                                 *wp++ = '\\';
873                                                 break;
874                                         }
875                                 }
876                                 *wp++ = CHAR;
877                                 *wp++ = c;
878                         }
879                         afree(sp, ATEMP);
880                         *wp++ = CQUOTE;
881                         state = statep->type = SHEREDELIM;
882                         break;
883
884                 /* in *(...|...) pattern (*+?@!) */
885                 case SPATTERN:
886                         if ((unsigned int)c == ORD(/*(*/ ')')) {
887                                 *wp++ = CPAT;
888                                 POP_STATE();
889                         } else if ((unsigned int)c == ORD('|')) {
890                                 *wp++ = SPAT;
891                         } else if ((unsigned int)c == ORD('(')) {
892                                 *wp++ = OPAT;
893                                 /* simile for @ */
894                                 *wp++ = ' ';
895                                 PUSH_STATE(SPATTERN);
896                         } else
897                                 goto Sbase1;
898                         break;
899                 }
900         }
901  Done:
902         Xcheck(ws, wp);
903         if (statep != &states[1])
904                 /* XXX figure out what is missing */
905                 yyerror("no closing quote");
906
907         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
908         if (state == SHEREDELIM)
909                 state = SBASE;
910
911         dp = Xstring(ws, wp);
912         if (state == SBASE && (
913             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
914             ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
915             (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
916                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
917
918                 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
919
920                 if (c == '&') {
921                         if ((unsigned int)(c2 = getsc()) != ORD('>')) {
922                                 ungetsc(c2);
923                                 goto no_iop;
924                         }
925                         c = c2;
926                         iop->ioflag = IOBASH;
927                 } else
928                         iop->ioflag = 0;
929
930                 c2 = getsc();
931                 /* <<, >>, <> are ok, >< is not */
932                 if (c == c2 || ((unsigned int)c == ORD('<') &&
933                     (unsigned int)c2 == ORD('>'))) {
934                         iop->ioflag |= c == c2 ?
935                             ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
936                         if (iop->ioflag == IOHERE) {
937                                 if ((unsigned int)(c2 = getsc()) == ORD('-'))
938                                         iop->ioflag |= IOSKIP;
939                                 else if ((unsigned int)c2 == ORD('<'))
940                                         iop->ioflag |= IOHERESTR;
941                                 else
942                                         ungetsc(c2);
943                         }
944                 } else if ((unsigned int)c2 == ORD('&'))
945                         iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
946                 else {
947                         iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
948                         if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
949                                 iop->ioflag |= IOCLOB;
950                         else
951                                 ungetsc(c2);
952                 }
953
954                 iop->ioname = NULL;
955                 iop->delim = NULL;
956                 iop->heredoc = NULL;
957                 /* free word */
958                 Xfree(ws, wp);
959                 yylval.iop = iop;
960                 return (REDIR);
961  no_iop:
962                 afree(iop, ATEMP);
963         }
964
965         if (wp == dp && state == SBASE) {
966                 /* free word */
967                 Xfree(ws, wp);
968                 /* no word, process LEX1 character */
969                 if (((unsigned int)c == ORD('|')) ||
970                     ((unsigned int)c == ORD('&')) ||
971                     ((unsigned int)c == ORD(';')) ||
972                     ((unsigned int)c == ORD('(' /*)*/))) {
973                         if ((c2 = getsc()) == c)
974                                 c = ((unsigned int)c == ORD(';')) ? BREAK :
975                                     ((unsigned int)c == ORD('|')) ? LOGOR :
976                                     ((unsigned int)c == ORD('&')) ? LOGAND :
977                                     /* (unsigned int)c == ORD('(' )) */ MDPAREN;
978                         else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
979                                 c = COPROC;
980                         else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
981                                 c = BRKEV;
982                         else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
983                                 c = BRKFT;
984                         else
985                                 ungetsc(c2);
986 #ifndef MKSH_SMALL
987                         if (c == BREAK) {
988                                 if ((unsigned int)(c2 = getsc()) == ORD('&'))
989                                         c = BRKEV;
990                                 else
991                                         ungetsc(c2);
992                         }
993 #endif
994                 } else if ((unsigned int)c == ORD('\n')) {
995                         if (cf & HEREDELIM)
996                                 ungetsc(c);
997                         else {
998                                 gethere();
999                                 if (cf & CONTIN)
1000                                         goto Again;
1001                         }
1002                 } else if (c == '\0' && !(cf & HEREDELIM)) {
1003                         struct ioword **p = heres;
1004
1005                         while (p < herep)
1006                                 if ((*p)->ioflag & IOHERESTR)
1007                                         ++p;
1008                                 else
1009                                         /* ksh -c 'cat <<EOF' can cause this */
1010                                         yyerror(Tf_heredoc,
1011                                             evalstr((*p)->delim, 0));
1012                 }
1013                 return (c);
1014         }
1015
1016         /* terminate word */
1017         *wp++ = EOS;
1018         yylval.cp = Xclose(ws, wp);
1019         if (state == SWORD || state == SLETPAREN
1020             /* XXX ONEWORD? */)
1021                 return (LWORD);
1022
1023         /* unget terminator */
1024         ungetsc(c);
1025
1026         /*
1027          * note: the alias-vs-function code below depends on several
1028          * interna: starting from here, source->str is not modified;
1029          * the way getsc() and ungetsc() operate; etc.
1030          */
1031
1032         /* copy word to unprefixed string ident */
1033         sp = yylval.cp;
1034         dp = ident;
1035         while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1036                 *dp++ = *sp++;
1037         if (c != EOS)
1038                 /* word is not unquoted, or space ran out */
1039                 dp = ident;
1040         /* make sure the ident array stays NUL padded */
1041         memset(dp, 0, (ident + IDENT) - dp + 1);
1042
1043         if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1044                 struct tbl *p;
1045                 uint32_t h = hash(ident);
1046
1047                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1048                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1049                     (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1050                         afree(yylval.cp, ATEMP);
1051                         return (p->val.i);
1052                 }
1053                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1054                     (p->flag & ISSET)) {
1055                         /*
1056                          * this still points to the same character as the
1057                          * ungetsc'd terminator from above
1058                          */
1059                         const char *cp = source->str;
1060
1061                         /* prefer POSIX but not Korn functions over aliases */
1062                         while (ctype(*cp, C_BLANK))
1063                                 /*
1064                                  * this is like getsc() without skipping
1065                                  * over Source boundaries (including not
1066                                  * parsing ungetsc'd characters that got
1067                                  * pushed into an SREREAD) which is what
1068                                  * we want here anyway: find out whether
1069                                  * the alias name is followed by a POSIX
1070                                  * function definition
1071                                  */
1072                                 ++cp;
1073                         /* prefer functions over aliases */
1074                         if (cp[0] != '(' || cp[1] != ')') {
1075                                 Source *s = source;
1076
1077                                 while (s && (s->flags & SF_HASALIAS))
1078                                         if (s->u.tblp == p)
1079                                                 return (LWORD);
1080                                         else
1081                                                 s = s->next;
1082                                 /* push alias expansion */
1083                                 s = pushs(SALIAS, source->areap);
1084                                 s->start = s->str = p->val.s;
1085                                 s->u.tblp = p;
1086                                 s->flags |= SF_HASALIAS;
1087                                 s->line = source->line;
1088                                 s->next = source;
1089                                 if (source->type == SEOF) {
1090                                         /* prevent infinite recursion at EOS */
1091                                         source->u.tblp = p;
1092                                         source->flags |= SF_HASALIAS;
1093                                 }
1094                                 source = s;
1095                                 afree(yylval.cp, ATEMP);
1096                                 goto Again;
1097                         }
1098                 }
1099         } else if (*ident == '\0') {
1100                 /* retain typeset et al. even when quoted */
1101                 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1102                 uint32_t flag = tt ? tt->flag : 0;
1103
1104                 if (flag & (DECL_UTIL | DECL_FWDR))
1105                         strlcpy(ident, dp, sizeof(ident));
1106                 afree(dp, ATEMP);
1107         }
1108
1109         return (LWORD);
1110 }
1111
1112 static void
1113 gethere(void)
1114 {
1115         struct ioword **p;
1116
1117         for (p = heres; p < herep; p++)
1118                 if (!((*p)->ioflag & IOHERESTR))
1119                         readhere(*p);
1120         herep = heres;
1121 }
1122
1123 /*
1124  * read "<<word" text into temp file
1125  */
1126
1127 static void
1128 readhere(struct ioword *iop)
1129 {
1130         int c;
1131         const char *eof, *eofp;
1132         XString xs;
1133         char *xp;
1134         size_t xpos;
1135
1136         eof = evalstr(iop->delim, 0);
1137
1138         if (!(iop->ioflag & IOEVAL))
1139                 ignore_backslash_newline++;
1140
1141         Xinit(xs, xp, 256, ATEMP);
1142
1143  heredoc_read_line:
1144         /* beginning of line */
1145         eofp = eof;
1146         xpos = Xsavepos(xs, xp);
1147         if (iop->ioflag & IOSKIP) {
1148                 /* skip over leading tabs */
1149                 while ((c = getsc()) == '\t')
1150                         ;       /* nothing */
1151                 goto heredoc_parse_char;
1152         }
1153  heredoc_read_char:
1154         c = getsc();
1155  heredoc_parse_char:
1156         /* compare with here document marker */
1157         if (!*eofp) {
1158                 /* end of here document marker, what to do? */
1159                 switch (c) {
1160                 case ORD(/*(*/ ')'):
1161                         if (!subshell_nesting_type)
1162                                 /*-
1163                                  * not allowed outside $(...) or (...)
1164                                  * => mismatch
1165                                  */
1166                                 break;
1167                         /* allow $(...) or (...) to close here */
1168                         ungetsc(/*(*/ ')');
1169                         /* FALLTHROUGH */
1170                 case 0:
1171                         /*
1172                          * Allow EOF here to commands without trailing
1173                          * newlines (mksh -c '...') will work as well.
1174                          */
1175                 case ORD('\n'):
1176                         /* Newline terminates here document marker */
1177                         goto heredoc_found_terminator;
1178                 }
1179         } else if ((unsigned int)c == ord(*eofp++))
1180                 /* store; then read and compare next character */
1181                 goto heredoc_store_and_loop;
1182         /* nope, mismatch; read until end of line */
1183         while (c != '\n') {
1184                 if (!c)
1185                         /* oops, reached EOF */
1186                         yyerror(Tf_heredoc, eof);
1187                 /* store character */
1188                 Xcheck(xs, xp);
1189                 Xput(xs, xp, c);
1190                 /* read next character */
1191                 c = getsc();
1192         }
1193         /* we read a newline as last character */
1194  heredoc_store_and_loop:
1195         /* store character */
1196         Xcheck(xs, xp);
1197         Xput(xs, xp, c);
1198         if (c == '\n')
1199                 goto heredoc_read_line;
1200         goto heredoc_read_char;
1201
1202  heredoc_found_terminator:
1203         /* jump back to saved beginning of line */
1204         xp = Xrestpos(xs, xp, xpos);
1205         /* terminate, close and store */
1206         Xput(xs, xp, '\0');
1207         iop->heredoc = Xclose(xs, xp);
1208
1209         if (!(iop->ioflag & IOEVAL))
1210                 ignore_backslash_newline--;
1211 }
1212
1213 void
1214 yyerror(const char *fmt, ...)
1215 {
1216         va_list va;
1217
1218         /* pop aliases and re-reads */
1219         while (source->type == SALIAS || source->type == SREREAD)
1220                 source = source->next;
1221         /* zap pending input */
1222         source->str = null;
1223
1224         error_prefix(true);
1225         va_start(va, fmt);
1226         shf_vfprintf(shl_out, fmt, va);
1227         shf_putc('\n', shl_out);
1228         va_end(va);
1229         errorfz();
1230 }
1231
1232 /*
1233  * input for yylex with alias expansion
1234  */
1235
1236 Source *
1237 pushs(int type, Area *areap)
1238 {
1239         Source *s;
1240
1241         s = alloc(sizeof(Source), areap);
1242         memset(s, 0, sizeof(Source));
1243         s->type = type;
1244         s->str = null;
1245         s->areap = areap;
1246         if (type == SFILE || type == SSTDIN)
1247                 XinitN(s->xs, 256, s->areap);
1248         return (s);
1249 }
1250
1251 static int
1252 getsc_uu(void)
1253 {
1254         Source *s = source;
1255         int c;
1256
1257         while ((c = ord(*s->str++)) == 0) {
1258                 /* return 0 for EOF by default */
1259                 s->str = NULL;
1260                 switch (s->type) {
1261                 case SEOF:
1262                         s->str = null;
1263                         return (0);
1264
1265                 case SSTDIN:
1266                 case SFILE:
1267                         getsc_line(s);
1268                         break;
1269
1270                 case SWSTR:
1271                         break;
1272
1273                 case SSTRING:
1274                 case SSTRINGCMDLINE:
1275                         break;
1276
1277                 case SWORDS:
1278                         s->start = s->str = *s->u.strv++;
1279                         s->type = SWORDSEP;
1280                         break;
1281
1282                 case SWORDSEP:
1283                         if (*s->u.strv == NULL) {
1284                                 s->start = s->str = "\n";
1285                                 s->type = SEOF;
1286                         } else {
1287                                 s->start = s->str = T1space;
1288                                 s->type = SWORDS;
1289                         }
1290                         break;
1291
1292                 case SALIAS:
1293                         if (s->flags & SF_ALIASEND) {
1294                                 /* pass on an unused SF_ALIAS flag */
1295                                 source = s->next;
1296                                 source->flags |= s->flags & SF_ALIAS;
1297                                 s = source;
1298                         } else if (*s->u.tblp->val.s &&
1299                             ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1300                                 /* pop source stack */
1301                                 source = s = s->next;
1302                                 /*
1303                                  * Note that this alias ended with a
1304                                  * space, enabling alias expansion on
1305                                  * the following word.
1306                                  */
1307                                 s->flags |= SF_ALIAS;
1308                         } else {
1309                                 /*
1310                                  * At this point, we need to keep the current
1311                                  * alias in the source list so recursive
1312                                  * aliases can be detected and we also need to
1313                                  * return the next character. Do this by
1314                                  * temporarily popping the alias to get the
1315                                  * next character and then put it back in the
1316                                  * source list with the SF_ALIASEND flag set.
1317                                  */
1318                                 /* pop source stack */
1319                                 source = s->next;
1320                                 source->flags |= s->flags & SF_ALIAS;
1321                                 c = getsc_uu();
1322                                 if (c) {
1323                                         s->flags |= SF_ALIASEND;
1324                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1325                                         s->start = s->str = s->ugbuf;
1326                                         s->next = source;
1327                                         source = s;
1328                                 } else {
1329                                         s = source;
1330                                         /* avoid reading EOF twice */
1331                                         s->str = NULL;
1332                                         break;
1333                                 }
1334                         }
1335                         continue;
1336
1337                 case SREREAD:
1338                         if (s->start != s->ugbuf)
1339                                 /* yuck */
1340                                 afree(s->u.freeme, ATEMP);
1341                         source = s = s->next;
1342                         continue;
1343                 }
1344                 if (s->str == NULL) {
1345                         s->type = SEOF;
1346                         s->start = s->str = null;
1347                         return ('\0');
1348                 }
1349                 if (s->flags & SF_ECHO) {
1350                         shf_puts(s->str, shl_out);
1351                         shf_flush(shl_out);
1352                 }
1353         }
1354         return (c);
1355 }
1356
1357 static void
1358 getsc_line(Source *s)
1359 {
1360         char *xp = Xstring(s->xs, xp), *cp;
1361         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1362         bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
1363
1364         /* Done here to ensure nothing odd happens when a timeout occurs */
1365         XcheckN(s->xs, xp, LINE);
1366         *xp = '\0';
1367         s->start = s->str = xp;
1368
1369         if (have_tty && ksh_tmout) {
1370                 ksh_tmout_state = TMOUT_READING;
1371                 alarm(ksh_tmout);
1372         }
1373         if (interactive) {
1374                 if (cur_prompt == PS1)
1375                         histsave(&s->line, NULL, HIST_FLUSH, true);
1376                 change_winsz();
1377         }
1378 #ifndef MKSH_NO_CMDLINE_EDITING
1379         if (have_tty && (
1380 #if !MKSH_S_NOVI
1381             Flag(FVI) ||
1382 #endif
1383             Flag(FEMACS) || Flag(FGMACS))) {
1384                 int nread;
1385
1386                 nread = x_read(xp);
1387                 if (nread < 0)
1388                         /* read error */
1389                         nread = 0;
1390                 xp[nread] = '\0';
1391                 xp += nread;
1392         } else
1393 #endif
1394           {
1395                 if (interactive)
1396                         pprompt(prompt, 0);
1397                 else
1398                         s->line++;
1399
1400                 while (/* CONSTCOND */ 1) {
1401                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1402
1403                         if (!p && shf_error(s->u.shf) &&
1404                             shf_errno(s->u.shf) == EINTR) {
1405                                 shf_clearerr(s->u.shf);
1406                                 if (trap)
1407                                         runtraps(0);
1408                                 continue;
1409                         }
1410                         if (!p || (xp = p, xp[-1] == '\n'))
1411                                 break;
1412                         /* double buffer size */
1413                         /* move past NUL so doubling works... */
1414                         xp++;
1415                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1416                         /* ...and move back again */
1417                         xp--;
1418                 }
1419                 /*
1420                  * flush any unwanted input so other programs/builtins
1421                  * can read it. Not very optimal, but less error prone
1422                  * than flushing else where, dealing with redirections,
1423                  * etc.
1424                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1425                  */
1426                 if (s->type == SSTDIN)
1427                         shf_flush(s->u.shf);
1428         }
1429         /*
1430          * XXX: temporary kludge to restore source after a
1431          * trap may have been executed.
1432          */
1433         source = s;
1434         if (have_tty && ksh_tmout) {
1435                 ksh_tmout_state = TMOUT_EXECUTING;
1436                 alarm(0);
1437         }
1438         cp = Xstring(s->xs, xp);
1439         rndpush(cp);
1440         s->start = s->str = cp;
1441         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1442         /* Note: if input is all nulls, this is not eof */
1443         if (Xlength(s->xs, xp) == 0) {
1444                 /* EOF */
1445                 if (s->type == SFILE)
1446                         shf_fdclose(s->u.shf);
1447                 s->str = NULL;
1448         } else if (interactive && *s->str) {
1449                 if (cur_prompt != PS1)
1450                         histsave(&s->line, s->str, HIST_APPEND, true);
1451                 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1452                         histsave(&s->line, s->str, HIST_QUEUE, true);
1453 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1454                 else
1455                         goto check_for_sole_return;
1456         } else if (interactive && cur_prompt == PS1) {
1457  check_for_sole_return:
1458                 cp = Xstring(s->xs, xp);
1459                 while (ctype(*cp, C_IFSWS))
1460                         ++cp;
1461                 if (!*cp) {
1462                         histsave(&s->line, NULL, HIST_FLUSH, true);
1463                         histsync();
1464                 }
1465 #endif
1466         }
1467         if (interactive)
1468                 set_prompt(PS2, NULL);
1469 }
1470
1471 void
1472 set_prompt(int to, Source *s)
1473 {
1474         cur_prompt = (uint8_t)to;
1475
1476         switch (to) {
1477         /* command */
1478         case PS1:
1479                 /*
1480                  * Substitute ! and !! here, before substitutions are done
1481                  * so ! in expanded variables are not expanded.
1482                  * NOTE: this is not what AT&T ksh does (it does it after
1483                  * substitutions, POSIX doesn't say which is to be done.
1484                  */
1485                 {
1486                         struct shf *shf;
1487                         char * volatile ps1;
1488                         Area *saved_atemp;
1489                         int saved_lineno;
1490
1491                         ps1 = str_val(global("PS1"));
1492                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1493                             SHF_WR | SHF_DYNAMIC, NULL);
1494                         while (*ps1)
1495                                 if (*ps1 != '!' || *++ps1 == '!')
1496                                         shf_putchar(*ps1++, shf);
1497                                 else
1498                                         shf_fprintf(shf, Tf_lu, s ?
1499                                             (unsigned long)s->line + 1 : 0UL);
1500                         ps1 = shf_sclose(shf);
1501                         saved_lineno = current_lineno;
1502                         if (s)
1503                                 current_lineno = s->line + 1;
1504                         saved_atemp = ATEMP;
1505                         newenv(E_ERRH);
1506                         if (kshsetjmp(e->jbuf)) {
1507                                 prompt = safe_prompt;
1508                                 /*
1509                                  * Don't print an error - assume it has already
1510                                  * been printed. Reason is we may have forked
1511                                  * to run a command and the child may be
1512                                  * unwinding its stack through this code as it
1513                                  * exits.
1514                                  */
1515                         } else {
1516                                 char *cp = substitute(ps1, 0);
1517                                 strdupx(prompt, cp, saved_atemp);
1518                         }
1519                         current_lineno = saved_lineno;
1520                         quitenv(NULL);
1521                 }
1522                 break;
1523         /* command continuation */
1524         case PS2:
1525                 prompt = str_val(global("PS2"));
1526                 break;
1527         }
1528 }
1529
1530 int
1531 pprompt(const char *cp, int ntruncate)
1532 {
1533         char delimiter = 0;
1534         bool doprint = (ntruncate != -1);
1535         bool indelimit = false;
1536         int columns = 0, lines = 0;
1537
1538         /*
1539          * Undocumented AT&T ksh feature:
1540          * If the second char in the prompt string is \r then the first
1541          * char is taken to be a non-printing delimiter and any chars
1542          * between two instances of the delimiter are not considered to
1543          * be part of the prompt length
1544          */
1545         if (*cp && cp[1] == '\r') {
1546                 delimiter = *cp;
1547                 cp += 2;
1548         }
1549         for (; *cp; cp++) {
1550                 if (indelimit && *cp != delimiter)
1551                         ;
1552                 else if (ctype(*cp, C_CR | C_LF)) {
1553                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1554                         columns = 0;
1555                 } else if (*cp == '\t') {
1556                         columns = (columns | 7) + 1;
1557                 } else if (*cp == '\b') {
1558                         if (columns > 0)
1559                                 columns--;
1560                 } else if (*cp == delimiter)
1561                         indelimit = !indelimit;
1562                 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1563                         const char *cp2;
1564                         columns += utf_widthadj(cp, &cp2);
1565                         if (doprint && (indelimit ||
1566                             (ntruncate < (x_cols * lines + columns))))
1567                                 shf_write(cp, cp2 - cp, shl_out);
1568                         cp = cp2 - /* loop increment */ 1;
1569                         continue;
1570                 } else
1571                         columns++;
1572                 if (doprint && (*cp != delimiter) &&
1573                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1574                         shf_putc(*cp, shl_out);
1575         }
1576         if (doprint)
1577                 shf_flush(shl_out);
1578         return (x_cols * lines + columns);
1579 }
1580
1581 /*
1582  * Read the variable part of a ${...} expression (i.e. up to but not
1583  * including the :[-+?=#%] or close-brace).
1584  */
1585 static char *
1586 get_brace_var(XString *wsp, char *wp)
1587 {
1588         char c;
1589         enum parse_state {
1590                 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1591                 PS_IDENT, PS_NUMBER, PS_VAR1
1592         } state = PS_INITIAL;
1593
1594         while (/* CONSTCOND */ 1) {
1595                 c = getsc();
1596                 /* State machine to figure out where the variable part ends. */
1597                 switch (state) {
1598                 case PS_SAW_HASH:
1599                         if (ctype(c, C_VAR1)) {
1600                                 char c2;
1601
1602                                 c2 = getsc();
1603                                 ungetsc(c2);
1604                                 if (ord(c2) != ORD(/*{*/ '}')) {
1605                                         ungetsc(c);
1606                                         goto out;
1607                                 }
1608                         }
1609                         goto ps_common;
1610                 case PS_SAW_BANG:
1611                         switch (ord(c)) {
1612                         case ORD('@'):
1613                         case ORD('#'):
1614                         case ORD('-'):
1615                         case ORD('?'):
1616                                 goto out;
1617                         }
1618                         goto ps_common;
1619                 case PS_INITIAL:
1620                         switch (ord(c)) {
1621                         case ORD('%'):
1622                                 state = PS_SAW_PERCENT;
1623                                 goto next;
1624                         case ORD('#'):
1625                                 state = PS_SAW_HASH;
1626                                 goto next;
1627                         case ORD('!'):
1628                                 state = PS_SAW_BANG;
1629                                 goto next;
1630                         }
1631                         /* FALLTHROUGH */
1632                 case PS_SAW_PERCENT:
1633  ps_common:
1634                         if (ctype(c, C_ALPHX))
1635                                 state = PS_IDENT;
1636                         else if (ctype(c, C_DIGIT))
1637                                 state = PS_NUMBER;
1638                         else if (ctype(c, C_VAR1))
1639                                 state = PS_VAR1;
1640                         else
1641                                 goto out;
1642                         break;
1643                 case PS_IDENT:
1644                         if (!ctype(c, C_ALNUX)) {
1645                                 if (ord(c) == ORD('[')) {
1646                                         char *tmp, *p;
1647
1648                                         if (!arraysub(&tmp))
1649                                                 yyerror("missing ]");
1650                                         *wp++ = c;
1651                                         p = tmp;
1652                                         while (*p) {
1653                                                 Xcheck(*wsp, wp);
1654                                                 *wp++ = *p++;
1655                                         }
1656                                         afree(tmp, ATEMP);
1657                                         /* the ] */
1658                                         c = getsc();
1659                                 }
1660                                 goto out;
1661                         }
1662  next:
1663                         break;
1664                 case PS_NUMBER:
1665                         if (!ctype(c, C_DIGIT))
1666                                 goto out;
1667                         break;
1668                 case PS_VAR1:
1669                         goto out;
1670                 }
1671                 Xcheck(*wsp, wp);
1672                 *wp++ = c;
1673         }
1674  out:
1675         /* end of variable part */
1676         *wp++ = '\0';
1677         ungetsc(c);
1678         return (wp);
1679 }
1680
1681 /*
1682  * Save an array subscript - returns true if matching bracket found, false
1683  * if eof or newline was found.
1684  * (Returned string double null terminated)
1685  */
1686 static bool
1687 arraysub(char **strp)
1688 {
1689         XString ws;
1690         char *wp, c;
1691         /* we are just past the initial [ */
1692         unsigned int depth = 1;
1693
1694         Xinit(ws, wp, 32, ATEMP);
1695
1696         do {
1697                 c = getsc();
1698                 Xcheck(ws, wp);
1699                 *wp++ = c;
1700                 if (ord(c) == ORD('['))
1701                         depth++;
1702                 else if (ord(c) == ORD(']'))
1703                         depth--;
1704         } while (depth > 0 && c && c != '\n');
1705
1706         *wp++ = '\0';
1707         *strp = Xclose(ws, wp);
1708
1709         return (tobool(depth == 0));
1710 }
1711
1712 /* Unget a char: handles case when we are already at the start of the buffer */
1713 static void
1714 ungetsc(int c)
1715 {
1716         struct sretrace_info *rp = retrace_info;
1717
1718         if (backslash_skip)
1719                 backslash_skip--;
1720         /* Don't unget EOF... */
1721         if (source->str == null && c == '\0')
1722                 return;
1723         while (rp) {
1724                 if (Xlength(rp->xs, rp->xp))
1725                         rp->xp--;
1726                 rp = rp->next;
1727         }
1728         ungetsc_i(c);
1729 }
1730 static void
1731 ungetsc_i(int c)
1732 {
1733         if (source->str > source->start)
1734                 source->str--;
1735         else {
1736                 Source *s;
1737
1738                 s = pushs(SREREAD, source->areap);
1739                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1740                 s->start = s->str = s->ugbuf;
1741                 s->next = source;
1742                 source = s;
1743         }
1744 }
1745
1746
1747 /* Called to get a char that isn't a \newline sequence. */
1748 static int
1749 getsc_bn(void)
1750 {
1751         int c, c2;
1752
1753         if (ignore_backslash_newline)
1754                 return (o_getsc_u());
1755
1756         if (backslash_skip == 1) {
1757                 backslash_skip = 2;
1758                 return (o_getsc_u());
1759         }
1760
1761         backslash_skip = 0;
1762
1763         while (/* CONSTCOND */ 1) {
1764                 c = o_getsc_u();
1765                 if (c == '\\') {
1766                         if ((c2 = o_getsc_u()) == '\n')
1767                                 /* ignore the \newline; get the next char... */
1768                                 continue;
1769                         ungetsc_i(c2);
1770                         backslash_skip = 1;
1771                 }
1772                 return (c);
1773         }
1774 }
1775
1776 void
1777 yyskiputf8bom(void)
1778 {
1779         int c;
1780
1781         if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1782                 ungetsc_i(c);
1783                 return;
1784         }
1785         if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1786                 ungetsc_i(c);
1787                 ungetsc_i(asc2rtt(0xEF));
1788                 return;
1789         }
1790         if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1791                 ungetsc_i(c);
1792                 ungetsc_i(asc2rtt(0xBB));
1793                 ungetsc_i(asc2rtt(0xEF));
1794                 return;
1795         }
1796         UTFMODE |= 8;
1797 }
1798
1799 static Lex_state *
1800 push_state_i(State_info *si, Lex_state *old_end)
1801 {
1802         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1803
1804         news[0].ls_base = old_end;
1805         si->base = &news[0];
1806         si->end = &news[STATE_BSIZE];
1807         return (&news[1]);
1808 }
1809
1810 static Lex_state *
1811 pop_state_i(State_info *si, Lex_state *old_end)
1812 {
1813         Lex_state *old_base = si->base;
1814
1815         si->base = old_end->ls_base - STATE_BSIZE;
1816         si->end = old_end->ls_base;
1817
1818         afree(old_base, ATEMP);
1819
1820         return (si->base + STATE_BSIZE - 1);
1821 }