OSDN Git Service

9300311f5862b6dcd7edfe8e287e31b37acf610b
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $   */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6  *      mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.247 2018/01/14 01:44:01 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in << or <<- delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 size_t start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106
107 /* optimised getsc_bn() */
108 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
109                             !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
112
113 /* retrace helper */
114 #define o_getsc_r(carg)                                 \
115         int cev = (carg);                               \
116         struct sretrace_info *rp = retrace_info;        \
117                                                         \
118         while (rp) {                                    \
119                 Xcheck(rp->xs, rp->xp);                 \
120                 *rp->xp++ = cev;                        \
121                 rp = rp->next;                          \
122         }                                               \
123                                                         \
124         return (cev);
125
126 /* callback */
127 static int
128 getsc_i(void)
129 {
130         o_getsc_r((unsigned int)(unsigned char)o_getsc());
131 }
132
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc()         getsc_i()
135 #else
136 static int getsc_r(int);
137
138 static int
139 getsc_r(int c)
140 {
141         o_getsc_r(c);
142 }
143
144 #define getsc()         getsc_r((unsigned int)(unsigned char)o_getsc())
145 #endif
146
147 #define STATE_BSIZE     8
148
149 #define PUSH_STATE(s)   do {                                    \
150         if (++statep == state_info.end)                         \
151                 statep = push_state_i(&state_info, statep);     \
152         state = statep->type = (s);                             \
153 } while (/* CONSTCOND */ 0)
154
155 #define POP_STATE()     do {                                    \
156         if (--statep == state_info.base)                        \
157                 statep = pop_state_i(&state_info, statep);      \
158         state = statep->type;                                   \
159 } while (/* CONSTCOND */ 0)
160
161 #define PUSH_SRETRACE(s) do {                                   \
162         struct sretrace_info *ri;                               \
163                                                                 \
164         PUSH_STATE(s);                                          \
165         statep->ls_start = Xsavepos(ws, wp);                    \
166         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
167         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
168         ri->next = retrace_info;                                \
169         retrace_info = ri;                                      \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE()  do {                                    \
173         wp = Xrestpos(ws, wp, statep->ls_start);                \
174         *retrace_info->xp = '\0';                               \
175         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
176         dp = (void *)retrace_info;                              \
177         retrace_info = retrace_info->next;                      \
178         afree(dp, ATEMP);                                       \
179         POP_STATE();                                            \
180 } while (/* CONSTCOND */ 0)
181
182 /**
183  * Lexical analyser
184  *
185  * tokens are not regular expressions, they are LL(1).
186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187  * hence the state stack. Note "$(...)" are now parsed recursively.
188  */
189
190 int
191 yylex(int cf)
192 {
193         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194         State_info state_info;
195         int c, c2, state;
196         size_t cz;
197         XString ws;             /* expandable output word */
198         char *wp;               /* output word pointer */
199         char *sp, *dp;
200
201  Again:
202         states[0].type = SINVALID;
203         states[0].ls_base = NULL;
204         statep = &states[1];
205         state_info.base = states;
206         state_info.end = &state_info.base[STATE_BSIZE];
207
208         Xinit(ws, wp, 64, ATEMP);
209
210         backslash_skip = 0;
211         ignore_backslash_newline = 0;
212
213         if (cf & ONEWORD)
214                 state = SWORD;
215         else if (cf & LETEXPR) {
216                 /* enclose arguments in (double) quotes */
217                 *wp++ = OQUOTE;
218                 state = SLETPAREN;
219                 statep->nparen = 0;
220         } else {
221                 /* normal lexing */
222                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223                 do {
224                         c = getsc();
225                 } while (ctype(c, C_BLANK));
226                 if (c == '#') {
227                         ignore_backslash_newline++;
228                         do {
229                                 c = getsc();
230                         } while (!ctype(c, C_NUL | C_LF));
231                         ignore_backslash_newline--;
232                 }
233                 ungetsc(c);
234         }
235         if (source->flags & SF_ALIAS) {
236                 /* trailing ' ' in alias definition */
237                 source->flags &= ~SF_ALIAS;
238                 /* POSIX: trailing space only counts if parsing simple cmd */
239                 if (!Flag(FPOSIX) || (cf & CMDWORD))
240                         cf |= ALIAS;
241         }
242
243         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244         statep->type = state;
245
246         /* collect non-special or quoted characters to form word */
247         while (!((c = getsc()) == 0 ||
248             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
249                 if (state == SBASE &&
250                     subshell_nesting_type == ORD(/*{*/ '}') &&
251                     (unsigned int)c == ORD(/*{*/ '}'))
252                         /* possibly end ${ :;} */
253                         break;
254                 Xcheck(ws, wp);
255                 switch (state) {
256                 case SADELIM:
257                         if ((unsigned int)c == ORD('('))
258                                 statep->nparen++;
259                         else if ((unsigned int)c == ORD(')'))
260                                 statep->nparen--;
261                         else if (statep->nparen == 0 &&
262                             ((unsigned int)c == ORD(/*{*/ '}') ||
263                             c == (int)statep->ls_adelim.delimiter)) {
264                                 *wp++ = ADELIM;
265                                 *wp++ = c;
266                                 if ((unsigned int)c == ORD(/*{*/ '}') ||
267                                     --statep->ls_adelim.num == 0)
268                                         POP_STATE();
269                                 if ((unsigned int)c == ORD(/*{*/ '}'))
270                                         POP_STATE();
271                                 break;
272                         }
273                         /* FALLTHROUGH */
274                 case SBASE:
275                         if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
276                                 /* temporary */
277                                 *wp = EOS;
278                                 if (is_wdvarname(Xstring(ws, wp), false)) {
279                                         char *p, *tmp;
280
281                                         if (arraysub(&tmp)) {
282                                                 *wp++ = CHAR;
283                                                 *wp++ = c;
284                                                 for (p = tmp; *p; ) {
285                                                         Xcheck(ws, wp);
286                                                         *wp++ = CHAR;
287                                                         *wp++ = *p++;
288                                                 }
289                                                 afree(tmp, ATEMP);
290                                                 break;
291                                         }
292                                 }
293                                 *wp++ = CHAR;
294                                 *wp++ = c;
295                                 break;
296                         }
297                         /* FALLTHROUGH */
298  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
299                         if (ctype(c, C_PATMO)) {
300                                 c2 = getsc();
301                                 if ((unsigned int)c2 == ORD('(' /*)*/)) {
302                                         *wp++ = OPAT;
303                                         *wp++ = c;
304                                         PUSH_STATE(SPATTERN);
305                                         break;
306                                 }
307                                 ungetsc(c2);
308                         }
309                         /* FALLTHROUGH */
310  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
311                         switch (c) {
312                         case ORD('\\'):
313  getsc_qchar:
314                                 if ((c = getsc())) {
315                                         /* trailing \ is lost */
316                                         *wp++ = QCHAR;
317                                         *wp++ = c;
318                                 }
319                                 break;
320                         case ORD('\''):
321  open_ssquote_unless_heredoc:
322                                 if ((cf & HEREDOC))
323                                         goto store_char;
324                                 *wp++ = OQUOTE;
325                                 ignore_backslash_newline++;
326                                 PUSH_STATE(SSQUOTE);
327                                 break;
328                         case ORD('"'):
329  open_sdquote:
330                                 *wp++ = OQUOTE;
331                                 PUSH_STATE(SDQUOTE);
332                                 break;
333                         case ORD('$'):
334                                 /*
335                                  * processing of dollar sign belongs into
336                                  * Subst, except for those which can open
337                                  * a string: $'…' and $"…"
338                                  */
339  subst_dollar_ex:
340                                 c = getsc();
341                                 switch (c) {
342                                 case ORD('"'):
343                                         goto open_sdquote;
344                                 case ORD('\''):
345                                         goto open_sequote;
346                                 default:
347                                         goto SubstS;
348                                 }
349                         default:
350                                 goto Subst;
351                         }
352                         break;
353
354  Subst:
355                         switch (c) {
356                         case ORD('\\'):
357                                 c = getsc();
358                                 switch (c) {
359                                 case ORD('"'):
360                                         if ((cf & HEREDOC))
361                                                 goto heredocquote;
362                                         /* FALLTHROUGH */
363                                 case ORD('\\'):
364                                 case ORD('$'):
365                                 case ORD('`'):
366  store_qchar:
367                                         *wp++ = QCHAR;
368                                         *wp++ = c;
369                                         break;
370                                 default:
371  heredocquote:
372                                         Xcheck(ws, wp);
373                                         if (c) {
374                                                 /* trailing \ is lost */
375                                                 *wp++ = CHAR;
376                                                 *wp++ = '\\';
377                                                 *wp++ = CHAR;
378                                                 *wp++ = c;
379                                         }
380                                         break;
381                                 }
382                                 break;
383                         case ORD('$'):
384                                 c = getsc();
385  SubstS:
386                                 if ((unsigned int)c == ORD('(' /*)*/)) {
387                                         c = getsc();
388                                         if ((unsigned int)c == ORD('(' /*)*/)) {
389                                                 *wp++ = EXPRSUB;
390                                                 PUSH_SRETRACE(SASPAREN);
391                                                 statep->nparen = 2;
392                                                 *retrace_info->xp++ = '(';
393                                         } else {
394                                                 ungetsc(c);
395  subst_command:
396                                                 c = COMSUB;
397  subst_command2:
398                                                 sp = yyrecursive(c);
399                                                 cz = strlen(sp) + 1;
400                                                 XcheckN(ws, wp, cz);
401                                                 *wp++ = c;
402                                                 memcpy(wp, sp, cz);
403                                                 wp += cz;
404                                         }
405                                 } else if ((unsigned int)c == ORD('{' /*}*/)) {
406                                         if ((unsigned int)(c = getsc()) == ORD('|')) {
407                                                 /*
408                                                  * non-subenvironment
409                                                  * value substitution
410                                                  */
411                                                 c = VALSUB;
412                                                 goto subst_command2;
413                                         } else if (ctype(c, C_IFSWS)) {
414                                                 /*
415                                                  * non-subenvironment
416                                                  * "command" substitution
417                                                  */
418                                                 c = FUNSUB;
419                                                 goto subst_command2;
420                                         }
421                                         ungetsc(c);
422                                         *wp++ = OSUBST;
423                                         *wp++ = '{' /*}*/;
424                                         wp = get_brace_var(&ws, wp);
425                                         c = getsc();
426                                         /* allow :# and :% (ksh88 compat) */
427                                         if ((unsigned int)c == ORD(':')) {
428                                                 *wp++ = CHAR;
429                                                 *wp++ = c;
430                                                 c = getsc();
431                                                 if ((unsigned int)c == ORD(':')) {
432                                                         *wp++ = CHAR;
433                                                         *wp++ = '0';
434                                                         *wp++ = ADELIM;
435                                                         *wp++ = ':';
436                                                         PUSH_STATE(SBRACE);
437                                                         PUSH_STATE(SADELIM);
438                                                         statep->ls_adelim.delimiter = ':';
439                                                         statep->ls_adelim.num = 1;
440                                                         statep->nparen = 0;
441                                                         break;
442                                                 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
443                                                     /*XXX what else? */
444                                                     c == '(' /*)*/) {
445                                                         /* substring subst. */
446                                                         if (c != ' ') {
447                                                                 *wp++ = CHAR;
448                                                                 *wp++ = ' ';
449                                                         }
450                                                         ungetsc(c);
451                                                         PUSH_STATE(SBRACE);
452                                                         PUSH_STATE(SADELIM);
453                                                         statep->ls_adelim.delimiter = ':';
454                                                         statep->ls_adelim.num = 2;
455                                                         statep->nparen = 0;
456                                                         break;
457                                                 }
458                                         } else if (c == '/') {
459                                                 c2 = ADELIM;
460  parse_adelim_slash:
461                                                 *wp++ = CHAR;
462                                                 *wp++ = c;
463                                                 if ((unsigned int)(c = getsc()) == ORD('/')) {
464                                                         *wp++ = c2;
465                                                         *wp++ = c;
466                                                 } else
467                                                         ungetsc(c);
468                                                 PUSH_STATE(SBRACE);
469                                                 PUSH_STATE(SADELIM);
470                                                 statep->ls_adelim.delimiter = '/';
471                                                 statep->ls_adelim.num = 1;
472                                                 statep->nparen = 0;
473                                                 break;
474                                         } else if (c == '@') {
475                                                 c2 = getsc();
476                                                 ungetsc(c2);
477                                                 if ((unsigned int)c2 == ORD('/')) {
478                                                         c2 = CHAR;
479                                                         goto parse_adelim_slash;
480                                                 }
481                                         }
482                                         /*
483                                          * If this is a trim operation,
484                                          * treat (,|,) specially in STBRACE.
485                                          */
486                                         if (ctype(c, C_SUB2)) {
487                                                 ungetsc(c);
488                                                 if (Flag(FSH))
489                                                         PUSH_STATE(STBRACEBOURNE);
490                                                 else
491                                                         PUSH_STATE(STBRACEKORN);
492                                         } else {
493                                                 ungetsc(c);
494                                                 if (state == SDQUOTE ||
495                                                     state == SQBRACE)
496                                                         PUSH_STATE(SQBRACE);
497                                                 else
498                                                         PUSH_STATE(SBRACE);
499                                         }
500                                 } else if (ctype(c, C_ALPHX)) {
501                                         *wp++ = OSUBST;
502                                         *wp++ = 'X';
503                                         do {
504                                                 Xcheck(ws, wp);
505                                                 *wp++ = c;
506                                                 c = getsc();
507                                         } while (ctype(c, C_ALNUX));
508                                         *wp++ = '\0';
509                                         *wp++ = CSUBST;
510                                         *wp++ = 'X';
511                                         ungetsc(c);
512                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
513                                         Xcheck(ws, wp);
514                                         *wp++ = OSUBST;
515                                         *wp++ = 'X';
516                                         *wp++ = c;
517                                         *wp++ = '\0';
518                                         *wp++ = CSUBST;
519                                         *wp++ = 'X';
520                                 } else {
521                                         *wp++ = CHAR;
522                                         *wp++ = '$';
523                                         ungetsc(c);
524                                 }
525                                 break;
526                         case ORD('`'):
527  subst_gravis:
528                                 PUSH_STATE(SBQUOTE);
529                                 *wp++ = COMASUB;
530                                 /*
531                                  * We need to know whether we are within double
532                                  * quotes in order to translate \" to " within
533                                  * "…`…\"…`…" because, unlike for COMSUBs, the
534                                  * outer double quoteing changes the backslash
535                                  * meaning for the inside. For more details:
536                                  * http://austingroupbugs.net/view.php?id=1015
537                                  */
538                                 statep->ls_bool = false;
539                                 s2 = statep;
540                                 base = state_info.base;
541                                 while (/* CONSTCOND */ 1) {
542                                         for (; s2 != base; s2--) {
543                                                 if (s2->type == SDQUOTE) {
544                                                         statep->ls_bool = true;
545                                                         break;
546                                                 }
547                                         }
548                                         if (s2 != base)
549                                                 break;
550                                         if (!(s2 = s2->ls_base))
551                                                 break;
552                                         base = s2-- - STATE_BSIZE;
553                                 }
554                                 break;
555                         case QCHAR:
556                                 if (cf & LQCHAR) {
557                                         *wp++ = QCHAR;
558                                         *wp++ = getsc();
559                                         break;
560                                 }
561                                 /* FALLTHROUGH */
562                         default:
563  store_char:
564                                 *wp++ = CHAR;
565                                 *wp++ = c;
566                         }
567                         break;
568
569                 case SEQUOTE:
570                         if ((unsigned int)c == ORD('\'')) {
571                                 POP_STATE();
572                                 *wp++ = CQUOTE;
573                                 ignore_backslash_newline--;
574                         } else if ((unsigned int)c == ORD('\\')) {
575                                 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
576                                         c2 = getsc();
577                                 if (c2 == 0)
578                                         statep->ls_bool = true;
579                                 if (!statep->ls_bool) {
580                                         char ts[4];
581
582                                         if ((unsigned int)c2 < 0x100) {
583                                                 *wp++ = QCHAR;
584                                                 *wp++ = c2;
585                                         } else {
586                                                 cz = utf_wctomb(ts, c2 - 0x100);
587                                                 ts[cz] = 0;
588                                                 cz = 0;
589                                                 do {
590                                                         *wp++ = QCHAR;
591                                                         *wp++ = ts[cz];
592                                                 } while (ts[++cz]);
593                                         }
594                                 }
595                         } else if (!statep->ls_bool) {
596                                 *wp++ = QCHAR;
597                                 *wp++ = c;
598                         }
599                         break;
600
601                 case SSQUOTE:
602                         if ((unsigned int)c == ORD('\'')) {
603                                 POP_STATE();
604                                 if ((cf & HEREDOC) || state == SQBRACE)
605                                         goto store_char;
606                                 *wp++ = CQUOTE;
607                                 ignore_backslash_newline--;
608                         } else {
609                                 *wp++ = QCHAR;
610                                 *wp++ = c;
611                         }
612                         break;
613
614                 case SDQUOTE:
615                         if ((unsigned int)c == ORD('"')) {
616                                 POP_STATE();
617                                 *wp++ = CQUOTE;
618                         } else
619                                 goto Subst;
620                         break;
621
622                 /* $(( ... )) */
623                 case SASPAREN:
624                         if ((unsigned int)c == ORD('('))
625                                 statep->nparen++;
626                         else if ((unsigned int)c == ORD(')')) {
627                                 statep->nparen--;
628                                 if (statep->nparen == 1) {
629                                         /* end of EXPRSUB */
630                                         POP_SRETRACE();
631
632                                         if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
633                                                 cz = strlen(sp) - 2;
634                                                 XcheckN(ws, wp, cz);
635                                                 memcpy(wp, sp + 1, cz);
636                                                 wp += cz;
637                                                 afree(sp, ATEMP);
638                                                 *wp++ = '\0';
639                                                 break;
640                                         } else {
641                                                 Source *s;
642
643                                                 ungetsc(c2);
644                                                 /*
645                                                  * mismatched parenthesis -
646                                                  * assume we were really
647                                                  * parsing a $(...) expression
648                                                  */
649                                                 --wp;
650                                                 s = pushs(SREREAD,
651                                                     source->areap);
652                                                 s->start = s->str =
653                                                     s->u.freeme = sp;
654                                                 s->next = source;
655                                                 source = s;
656                                                 goto subst_command;
657                                         }
658                                 }
659                         }
660                         /* reuse existing state machine */
661                         goto Sbase2;
662
663                 case SQBRACE:
664                         if ((unsigned int)c == ORD('\\')) {
665                                 /*
666                                  * perform POSIX "quote removal" if the back-
667                                  * slash is "special", i.e. same cases as the
668                                  * {case '\\':} in Subst: plus closing brace;
669                                  * in mksh code "quote removal" on '\c' means
670                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
671                                  * emitted (in heredocquote:)
672                                  */
673                                 if ((unsigned int)(c = getsc()) == ORD('"') ||
674                                     (unsigned int)c == ORD('\\') ||
675                                     ctype(c, C_DOLAR | C_GRAVE) ||
676                                     (unsigned int)c == ORD(/*{*/ '}'))
677                                         goto store_qchar;
678                                 goto heredocquote;
679                         }
680                         goto common_SQBRACE;
681
682                 case SBRACE:
683                         if ((unsigned int)c == ORD('\''))
684                                 goto open_ssquote_unless_heredoc;
685                         else if ((unsigned int)c == ORD('\\'))
686                                 goto getsc_qchar;
687  common_SQBRACE:
688                         if ((unsigned int)c == ORD('"'))
689                                 goto open_sdquote;
690                         else if ((unsigned int)c == ORD('$'))
691                                 goto subst_dollar_ex;
692                         else if ((unsigned int)c == ORD('`'))
693                                 goto subst_gravis;
694                         else if ((unsigned int)c != ORD(/*{*/ '}'))
695                                 goto store_char;
696                         POP_STATE();
697                         *wp++ = CSUBST;
698                         *wp++ = /*{*/ '}';
699                         break;
700
701                 /* Same as SBASE, except (,|,) treated specially */
702                 case STBRACEKORN:
703                         if ((unsigned int)c == ORD('|'))
704                                 *wp++ = SPAT;
705                         else if ((unsigned int)c == ORD('(')) {
706                                 *wp++ = OPAT;
707                                 /* simile for @ */
708                                 *wp++ = ' ';
709                                 PUSH_STATE(SPATTERN);
710                         } else /* FALLTHROUGH */
711                 case STBRACEBOURNE:
712                           if ((unsigned int)c == ORD(/*{*/ '}')) {
713                                 POP_STATE();
714                                 *wp++ = CSUBST;
715                                 *wp++ = /*{*/ '}';
716                         } else
717                                 goto Sbase1;
718                         break;
719
720                 case SBQUOTE:
721                         if ((unsigned int)c == ORD('`')) {
722                                 *wp++ = 0;
723                                 POP_STATE();
724                         } else if ((unsigned int)c == ORD('\\')) {
725                                 switch (c = getsc()) {
726                                 case 0:
727                                         /* trailing \ is lost */
728                                         break;
729                                 case ORD('$'):
730                                 case ORD('`'):
731                                 case ORD('\\'):
732                                         *wp++ = c;
733                                         break;
734                                 case ORD('"'):
735                                         if (statep->ls_bool) {
736                                                 *wp++ = c;
737                                                 break;
738                                         }
739                                         /* FALLTHROUGH */
740                                 default:
741                                         *wp++ = '\\';
742                                         *wp++ = c;
743                                         break;
744                                 }
745                         } else
746                                 *wp++ = c;
747                         break;
748
749                 /* ONEWORD */
750                 case SWORD:
751                         goto Subst;
752
753                 /* LETEXPR: (( ... )) */
754                 case SLETPAREN:
755                         if ((unsigned int)c == ORD(/*(*/ ')')) {
756                                 if (statep->nparen > 0)
757                                         --statep->nparen;
758                                 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
759                                         c = 0;
760                                         *wp++ = CQUOTE;
761                                         goto Done;
762                                 } else {
763                                         Source *s;
764
765                                         ungetsc(c2);
766                                         ungetsc(c);
767                                         /*
768                                          * mismatched parenthesis -
769                                          * assume we were really
770                                          * parsing a (...) expression
771                                          */
772                                         *wp = EOS;
773                                         sp = Xstring(ws, wp);
774                                         dp = wdstrip(sp + 1, WDS_TPUTS);
775                                         s = pushs(SREREAD, source->areap);
776                                         s->start = s->str = s->u.freeme = dp;
777                                         s->next = source;
778                                         source = s;
779                                         ungetsc('(' /*)*/);
780                                         return (ORD('(' /*)*/));
781                                 }
782                         } else if ((unsigned int)c == ORD('('))
783                                 /*
784                                  * parentheses inside quotes and
785                                  * backslashes are lost, but AT&T ksh
786                                  * doesn't count them either
787                                  */
788                                 ++statep->nparen;
789                         goto Sbase2;
790
791                 /* << or <<- delimiter */
792                 case SHEREDELIM:
793                         /*
794                          * here delimiters need a special case since
795                          * $ and `...` are not to be treated specially
796                          */
797                         switch (c) {
798                         case ORD('\\'):
799                                 if ((c = getsc())) {
800                                         /* trailing \ is lost */
801                                         *wp++ = QCHAR;
802                                         *wp++ = c;
803                                 }
804                                 break;
805                         case ORD('\''):
806                                 goto open_ssquote_unless_heredoc;
807                         case ORD('$'):
808                                 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
809  open_sequote:
810                                         *wp++ = OQUOTE;
811                                         ignore_backslash_newline++;
812                                         PUSH_STATE(SEQUOTE);
813                                         statep->ls_bool = false;
814                                         break;
815                                 } else if ((unsigned int)c2 == ORD('"')) {
816                                         /* FALLTHROUGH */
817                         case ORD('"'):
818                                         PUSH_SRETRACE(SHEREDQUOTE);
819                                         break;
820                                 }
821                                 ungetsc(c2);
822                                 /* FALLTHROUGH */
823                         default:
824                                 *wp++ = CHAR;
825                                 *wp++ = c;
826                         }
827                         break;
828
829                 /* " in << or <<- delimiter */
830                 case SHEREDQUOTE:
831                         if ((unsigned int)c != ORD('"'))
832                                 goto Subst;
833                         POP_SRETRACE();
834                         dp = strnul(sp) - 1;
835                         /* remove the trailing double quote */
836                         *dp = '\0';
837                         /* store the quoted string */
838                         *wp++ = OQUOTE;
839                         XcheckN(ws, wp, (dp - sp) * 2);
840                         dp = sp;
841                         while ((c = *dp++)) {
842                                 if (c == '\\') {
843                                         switch ((c = *dp++)) {
844                                         case ORD('\\'):
845                                         case ORD('"'):
846                                         case ORD('$'):
847                                         case ORD('`'):
848                                                 break;
849                                         default:
850                                                 *wp++ = CHAR;
851                                                 *wp++ = '\\';
852                                                 break;
853                                         }
854                                 }
855                                 *wp++ = CHAR;
856                                 *wp++ = c;
857                         }
858                         afree(sp, ATEMP);
859                         *wp++ = CQUOTE;
860                         state = statep->type = SHEREDELIM;
861                         break;
862
863                 /* in *(...|...) pattern (*+?@!) */
864                 case SPATTERN:
865                         if ((unsigned int)c == ORD(/*(*/ ')')) {
866                                 *wp++ = CPAT;
867                                 POP_STATE();
868                         } else if ((unsigned int)c == ORD('|')) {
869                                 *wp++ = SPAT;
870                         } else if ((unsigned int)c == ORD('(')) {
871                                 *wp++ = OPAT;
872                                 /* simile for @ */
873                                 *wp++ = ' ';
874                                 PUSH_STATE(SPATTERN);
875                         } else
876                                 goto Sbase1;
877                         break;
878                 }
879         }
880  Done:
881         Xcheck(ws, wp);
882         if (statep != &states[1])
883                 /* XXX figure out what is missing */
884                 yyerror("no closing quote");
885
886         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
887         if (state == SHEREDELIM)
888                 state = SBASE;
889
890         dp = Xstring(ws, wp);
891         if (state == SBASE && (
892             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
893             ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
894             (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
895                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
896
897                 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
898
899                 if (c == '&') {
900                         if ((unsigned int)(c2 = getsc()) != ORD('>')) {
901                                 ungetsc(c2);
902                                 goto no_iop;
903                         }
904                         c = c2;
905                         iop->ioflag = IOBASH;
906                 } else
907                         iop->ioflag = 0;
908
909                 c2 = getsc();
910                 /* <<, >>, <> are ok, >< is not */
911                 if (c == c2 || ((unsigned int)c == ORD('<') &&
912                     (unsigned int)c2 == ORD('>'))) {
913                         iop->ioflag |= c == c2 ?
914                             ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
915                         if (iop->ioflag == IOHERE) {
916                                 if ((unsigned int)(c2 = getsc()) == ORD('-'))
917                                         iop->ioflag |= IOSKIP;
918                                 else if ((unsigned int)c2 == ORD('<'))
919                                         iop->ioflag |= IOHERESTR;
920                                 else
921                                         ungetsc(c2);
922                         }
923                 } else if ((unsigned int)c2 == ORD('&'))
924                         iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
925                 else {
926                         iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
927                         if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
928                                 iop->ioflag |= IOCLOB;
929                         else
930                                 ungetsc(c2);
931                 }
932
933                 iop->ioname = NULL;
934                 iop->delim = NULL;
935                 iop->heredoc = NULL;
936                 /* free word */
937                 Xfree(ws, wp);
938                 yylval.iop = iop;
939                 return (REDIR);
940  no_iop:
941                 afree(iop, ATEMP);
942         }
943
944         if (wp == dp && state == SBASE) {
945                 /* free word */
946                 Xfree(ws, wp);
947                 /* no word, process LEX1 character */
948                 if (((unsigned int)c == ORD('|')) ||
949                     ((unsigned int)c == ORD('&')) ||
950                     ((unsigned int)c == ORD(';')) ||
951                     ((unsigned int)c == ORD('(' /*)*/))) {
952                         if ((c2 = getsc()) == c)
953                                 c = ((unsigned int)c == ORD(';')) ? BREAK :
954                                     ((unsigned int)c == ORD('|')) ? LOGOR :
955                                     ((unsigned int)c == ORD('&')) ? LOGAND :
956                                     /* (unsigned int)c == ORD('(' )) */ MDPAREN;
957                         else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
958                                 c = COPROC;
959                         else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
960                                 c = BRKEV;
961                         else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
962                                 c = BRKFT;
963                         else
964                                 ungetsc(c2);
965 #ifndef MKSH_SMALL
966                         if (c == BREAK) {
967                                 if ((unsigned int)(c2 = getsc()) == ORD('&'))
968                                         c = BRKEV;
969                                 else
970                                         ungetsc(c2);
971                         }
972 #endif
973                 } else if ((unsigned int)c == ORD('\n')) {
974                         if (cf & HEREDELIM)
975                                 ungetsc(c);
976                         else {
977                                 gethere();
978                                 if (cf & CONTIN)
979                                         goto Again;
980                         }
981                 } else if (c == '\0' && !(cf & HEREDELIM)) {
982                         struct ioword **p = heres;
983
984                         while (p < herep)
985                                 if ((*p)->ioflag & IOHERESTR)
986                                         ++p;
987                                 else
988                                         /* ksh -c 'cat <<EOF' can cause this */
989                                         yyerror(Tf_heredoc,
990                                             evalstr((*p)->delim, 0));
991                 }
992                 return (c);
993         }
994
995         /* terminate word */
996         *wp++ = EOS;
997         yylval.cp = Xclose(ws, wp);
998         if (state == SWORD || state == SLETPAREN
999             /* XXX ONEWORD? */)
1000                 return (LWORD);
1001
1002         /* unget terminator */
1003         ungetsc(c);
1004
1005         /*
1006          * note: the alias-vs-function code below depends on several
1007          * interna: starting from here, source->str is not modified;
1008          * the way getsc() and ungetsc() operate; etc.
1009          */
1010
1011         /* copy word to unprefixed string ident */
1012         sp = yylval.cp;
1013         dp = ident;
1014         while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1015                 *dp++ = *sp++;
1016         if (c != EOS)
1017                 /* word is not unquoted, or space ran out */
1018                 dp = ident;
1019         /* make sure the ident array stays NUL padded */
1020         memset(dp, 0, (ident + IDENT) - dp + 1);
1021
1022         if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1023                 struct tbl *p;
1024                 uint32_t h = hash(ident);
1025
1026                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1027                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1028                     (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1029                         afree(yylval.cp, ATEMP);
1030                         return (p->val.i);
1031                 }
1032                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1033                     (p->flag & ISSET)) {
1034                         /*
1035                          * this still points to the same character as the
1036                          * ungetsc'd terminator from above
1037                          */
1038                         const char *cp = source->str;
1039
1040                         /* prefer POSIX but not Korn functions over aliases */
1041                         while (ctype(*cp, C_BLANK))
1042                                 /*
1043                                  * this is like getsc() without skipping
1044                                  * over Source boundaries (including not
1045                                  * parsing ungetsc'd characters that got
1046                                  * pushed into an SREREAD) which is what
1047                                  * we want here anyway: find out whether
1048                                  * the alias name is followed by a POSIX
1049                                  * function definition
1050                                  */
1051                                 ++cp;
1052                         /* prefer functions over aliases */
1053                         if (cp[0] != '(' || cp[1] != ')') {
1054                                 Source *s = source;
1055
1056                                 while (s && (s->flags & SF_HASALIAS))
1057                                         if (s->u.tblp == p)
1058                                                 return (LWORD);
1059                                         else
1060                                                 s = s->next;
1061                                 /* push alias expansion */
1062                                 s = pushs(SALIAS, source->areap);
1063                                 s->start = s->str = p->val.s;
1064                                 s->u.tblp = p;
1065                                 s->flags |= SF_HASALIAS;
1066                                 s->line = source->line;
1067                                 s->next = source;
1068                                 if (source->type == SEOF) {
1069                                         /* prevent infinite recursion at EOS */
1070                                         source->u.tblp = p;
1071                                         source->flags |= SF_HASALIAS;
1072                                 }
1073                                 source = s;
1074                                 afree(yylval.cp, ATEMP);
1075                                 goto Again;
1076                         }
1077                 }
1078         } else if (*ident == '\0') {
1079                 /* retain typeset et al. even when quoted */
1080                 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1081                 uint32_t flag = tt ? tt->flag : 0;
1082
1083                 if (flag & (DECL_UTIL | DECL_FWDR))
1084                         strlcpy(ident, dp, sizeof(ident));
1085                 afree(dp, ATEMP);
1086         }
1087
1088         return (LWORD);
1089 }
1090
1091 static void
1092 gethere(void)
1093 {
1094         struct ioword **p;
1095
1096         for (p = heres; p < herep; p++)
1097                 if (!((*p)->ioflag & IOHERESTR))
1098                         readhere(*p);
1099         herep = heres;
1100 }
1101
1102 /*
1103  * read "<<word" text into temp file
1104  */
1105
1106 static void
1107 readhere(struct ioword *iop)
1108 {
1109         int c;
1110         const char *eof, *eofp;
1111         XString xs;
1112         char *xp;
1113         size_t xpos;
1114
1115         eof = evalstr(iop->delim, 0);
1116
1117         if (!(iop->ioflag & IOEVAL))
1118                 ignore_backslash_newline++;
1119
1120         Xinit(xs, xp, 256, ATEMP);
1121
1122  heredoc_read_line:
1123         /* beginning of line */
1124         eofp = eof;
1125         xpos = Xsavepos(xs, xp);
1126         if (iop->ioflag & IOSKIP) {
1127                 /* skip over leading tabs */
1128                 while ((c = getsc()) == '\t')
1129                         ;       /* nothing */
1130                 goto heredoc_parse_char;
1131         }
1132  heredoc_read_char:
1133         c = getsc();
1134  heredoc_parse_char:
1135         /* compare with here document marker */
1136         if (!*eofp) {
1137                 /* end of here document marker, what to do? */
1138                 switch (c) {
1139                 case ORD(/*(*/ ')'):
1140                         if (!subshell_nesting_type)
1141                                 /*-
1142                                  * not allowed outside $(...) or (...)
1143                                  * => mismatch
1144                                  */
1145                                 break;
1146                         /* allow $(...) or (...) to close here */
1147                         ungetsc(/*(*/ ')');
1148                         /* FALLTHROUGH */
1149                 case 0:
1150                         /*
1151                          * Allow EOF here to commands without trailing
1152                          * newlines (mksh -c '...') will work as well.
1153                          */
1154                 case ORD('\n'):
1155                         /* Newline terminates here document marker */
1156                         goto heredoc_found_terminator;
1157                 }
1158         } else if (c == *eofp++)
1159                 /* store; then read and compare next character */
1160                 goto heredoc_store_and_loop;
1161         /* nope, mismatch; read until end of line */
1162         while (c != '\n') {
1163                 if (!c)
1164                         /* oops, reached EOF */
1165                         yyerror(Tf_heredoc, eof);
1166                 /* store character */
1167                 Xcheck(xs, xp);
1168                 Xput(xs, xp, c);
1169                 /* read next character */
1170                 c = getsc();
1171         }
1172         /* we read a newline as last character */
1173  heredoc_store_and_loop:
1174         /* store character */
1175         Xcheck(xs, xp);
1176         Xput(xs, xp, c);
1177         if (c == '\n')
1178                 goto heredoc_read_line;
1179         goto heredoc_read_char;
1180
1181  heredoc_found_terminator:
1182         /* jump back to saved beginning of line */
1183         xp = Xrestpos(xs, xp, xpos);
1184         /* terminate, close and store */
1185         Xput(xs, xp, '\0');
1186         iop->heredoc = Xclose(xs, xp);
1187
1188         if (!(iop->ioflag & IOEVAL))
1189                 ignore_backslash_newline--;
1190 }
1191
1192 void
1193 yyerror(const char *fmt, ...)
1194 {
1195         va_list va;
1196
1197         /* pop aliases and re-reads */
1198         while (source->type == SALIAS || source->type == SREREAD)
1199                 source = source->next;
1200         /* zap pending input */
1201         source->str = null;
1202
1203         error_prefix(true);
1204         va_start(va, fmt);
1205         shf_vfprintf(shl_out, fmt, va);
1206         shf_putc('\n', shl_out);
1207         va_end(va);
1208         errorfz();
1209 }
1210
1211 /*
1212  * input for yylex with alias expansion
1213  */
1214
1215 Source *
1216 pushs(int type, Area *areap)
1217 {
1218         Source *s;
1219
1220         s = alloc(sizeof(Source), areap);
1221         memset(s, 0, sizeof(Source));
1222         s->type = type;
1223         s->str = null;
1224         s->areap = areap;
1225         if (type == SFILE || type == SSTDIN)
1226                 XinitN(s->xs, 256, s->areap);
1227         return (s);
1228 }
1229
1230 static int
1231 getsc_uu(void)
1232 {
1233         Source *s = source;
1234         int c;
1235
1236         while ((c = ord(*s->str++)) == 0) {
1237                 /* return 0 for EOF by default */
1238                 s->str = NULL;
1239                 switch (s->type) {
1240                 case SEOF:
1241                         s->str = null;
1242                         return (0);
1243
1244                 case SSTDIN:
1245                 case SFILE:
1246                         getsc_line(s);
1247                         break;
1248
1249                 case SWSTR:
1250                         break;
1251
1252                 case SSTRING:
1253                 case SSTRINGCMDLINE:
1254                         break;
1255
1256                 case SWORDS:
1257                         s->start = s->str = *s->u.strv++;
1258                         s->type = SWORDSEP;
1259                         break;
1260
1261                 case SWORDSEP:
1262                         if (*s->u.strv == NULL) {
1263                                 s->start = s->str = "\n";
1264                                 s->type = SEOF;
1265                         } else {
1266                                 s->start = s->str = T1space;
1267                                 s->type = SWORDS;
1268                         }
1269                         break;
1270
1271                 case SALIAS:
1272                         if (s->flags & SF_ALIASEND) {
1273                                 /* pass on an unused SF_ALIAS flag */
1274                                 source = s->next;
1275                                 source->flags |= s->flags & SF_ALIAS;
1276                                 s = source;
1277                         } else if (*s->u.tblp->val.s &&
1278                             ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1279                                 /* pop source stack */
1280                                 source = s = s->next;
1281                                 /*
1282                                  * Note that this alias ended with a
1283                                  * space, enabling alias expansion on
1284                                  * the following word.
1285                                  */
1286                                 s->flags |= SF_ALIAS;
1287                         } else {
1288                                 /*
1289                                  * At this point, we need to keep the current
1290                                  * alias in the source list so recursive
1291                                  * aliases can be detected and we also need to
1292                                  * return the next character. Do this by
1293                                  * temporarily popping the alias to get the
1294                                  * next character and then put it back in the
1295                                  * source list with the SF_ALIASEND flag set.
1296                                  */
1297                                 /* pop source stack */
1298                                 source = s->next;
1299                                 source->flags |= s->flags & SF_ALIAS;
1300                                 c = getsc_uu();
1301                                 if (c) {
1302                                         s->flags |= SF_ALIASEND;
1303                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1304                                         s->start = s->str = s->ugbuf;
1305                                         s->next = source;
1306                                         source = s;
1307                                 } else {
1308                                         s = source;
1309                                         /* avoid reading EOF twice */
1310                                         s->str = NULL;
1311                                         break;
1312                                 }
1313                         }
1314                         continue;
1315
1316                 case SREREAD:
1317                         if (s->start != s->ugbuf)
1318                                 /* yuck */
1319                                 afree(s->u.freeme, ATEMP);
1320                         source = s = s->next;
1321                         continue;
1322                 }
1323                 if (s->str == NULL) {
1324                         s->type = SEOF;
1325                         s->start = s->str = null;
1326                         return ('\0');
1327                 }
1328                 if (s->flags & SF_ECHO) {
1329                         shf_puts(s->str, shl_out);
1330                         shf_flush(shl_out);
1331                 }
1332         }
1333         return (c);
1334 }
1335
1336 static void
1337 getsc_line(Source *s)
1338 {
1339         char *xp = Xstring(s->xs, xp), *cp;
1340         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1341         bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1342
1343         /* Done here to ensure nothing odd happens when a timeout occurs */
1344         XcheckN(s->xs, xp, LINE);
1345         *xp = '\0';
1346         s->start = s->str = xp;
1347
1348         if (have_tty && ksh_tmout) {
1349                 ksh_tmout_state = TMOUT_READING;
1350                 alarm(ksh_tmout);
1351         }
1352         if (interactive) {
1353                 if (cur_prompt == PS1)
1354                         histsave(&s->line, NULL, HIST_FLUSH, true);
1355                 change_winsz();
1356         }
1357 #ifndef MKSH_NO_CMDLINE_EDITING
1358         if (have_tty && (
1359 #if !MKSH_S_NOVI
1360             Flag(FVI) ||
1361 #endif
1362             Flag(FEMACS) || Flag(FGMACS))) {
1363                 int nread;
1364
1365                 nread = x_read(xp);
1366                 if (nread < 0)
1367                         /* read error */
1368                         nread = 0;
1369                 xp[nread] = '\0';
1370                 xp += nread;
1371         } else
1372 #endif
1373           {
1374                 if (interactive)
1375                         pprompt(prompt, 0);
1376                 else
1377                         s->line++;
1378
1379                 while (/* CONSTCOND */ 1) {
1380                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1381
1382                         if (!p && shf_error(s->u.shf) &&
1383                             shf_errno(s->u.shf) == EINTR) {
1384                                 shf_clearerr(s->u.shf);
1385                                 if (trap)
1386                                         runtraps(0);
1387                                 continue;
1388                         }
1389                         if (!p || (xp = p, xp[-1] == '\n'))
1390                                 break;
1391                         /* double buffer size */
1392                         /* move past NUL so doubling works... */
1393                         xp++;
1394                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1395                         /* ...and move back again */
1396                         xp--;
1397                 }
1398                 /*
1399                  * flush any unwanted input so other programs/builtins
1400                  * can read it. Not very optimal, but less error prone
1401                  * than flushing else where, dealing with redirections,
1402                  * etc.
1403                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1404                  */
1405                 if (s->type == SSTDIN)
1406                         shf_flush(s->u.shf);
1407         }
1408         /*
1409          * XXX: temporary kludge to restore source after a
1410          * trap may have been executed.
1411          */
1412         source = s;
1413         if (have_tty && ksh_tmout) {
1414                 ksh_tmout_state = TMOUT_EXECUTING;
1415                 alarm(0);
1416         }
1417         cp = Xstring(s->xs, xp);
1418         rndpush(cp);
1419         s->start = s->str = cp;
1420         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1421         /* Note: if input is all nulls, this is not eof */
1422         if (Xlength(s->xs, xp) == 0) {
1423                 /* EOF */
1424                 if (s->type == SFILE)
1425                         shf_fdclose(s->u.shf);
1426                 s->str = NULL;
1427         } else if (interactive && *s->str) {
1428                 if (cur_prompt != PS1)
1429                         histsave(&s->line, s->str, HIST_APPEND, true);
1430                 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1431                         histsave(&s->line, s->str, HIST_QUEUE, true);
1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1433                 else
1434                         goto check_for_sole_return;
1435         } else if (interactive && cur_prompt == PS1) {
1436  check_for_sole_return:
1437                 cp = Xstring(s->xs, xp);
1438                 while (ctype(*cp, C_IFSWS))
1439                         ++cp;
1440                 if (!*cp) {
1441                         histsave(&s->line, NULL, HIST_FLUSH, true);
1442                         histsync();
1443                 }
1444 #endif
1445         }
1446         if (interactive)
1447                 set_prompt(PS2, NULL);
1448 }
1449
1450 void
1451 set_prompt(int to, Source *s)
1452 {
1453         cur_prompt = (uint8_t)to;
1454
1455         switch (to) {
1456         /* command */
1457         case PS1:
1458                 /*
1459                  * Substitute ! and !! here, before substitutions are done
1460                  * so ! in expanded variables are not expanded.
1461                  * NOTE: this is not what AT&T ksh does (it does it after
1462                  * substitutions, POSIX doesn't say which is to be done.
1463                  */
1464                 {
1465                         struct shf *shf;
1466                         char * volatile ps1;
1467                         Area *saved_atemp;
1468                         int saved_lineno;
1469
1470                         ps1 = str_val(global("PS1"));
1471                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1472                             SHF_WR | SHF_DYNAMIC, NULL);
1473                         while (*ps1)
1474                                 if (*ps1 != '!' || *++ps1 == '!')
1475                                         shf_putchar(*ps1++, shf);
1476                                 else
1477                                         shf_fprintf(shf, Tf_lu, s ?
1478                                             (unsigned long)s->line + 1 : 0UL);
1479                         ps1 = shf_sclose(shf);
1480                         saved_lineno = current_lineno;
1481                         if (s)
1482                                 current_lineno = s->line + 1;
1483                         saved_atemp = ATEMP;
1484                         newenv(E_ERRH);
1485                         if (kshsetjmp(e->jbuf)) {
1486                                 prompt = safe_prompt;
1487                                 /*
1488                                  * Don't print an error - assume it has already
1489                                  * been printed. Reason is we may have forked
1490                                  * to run a command and the child may be
1491                                  * unwinding its stack through this code as it
1492                                  * exits.
1493                                  */
1494                         } else {
1495                                 char *cp = substitute(ps1, 0);
1496                                 strdupx(prompt, cp, saved_atemp);
1497                         }
1498                         current_lineno = saved_lineno;
1499                         quitenv(NULL);
1500                 }
1501                 break;
1502         /* command continuation */
1503         case PS2:
1504                 prompt = str_val(global("PS2"));
1505                 break;
1506         }
1507 }
1508
1509 int
1510 pprompt(const char *cp, int ntruncate)
1511 {
1512         char delimiter = 0;
1513         bool doprint = (ntruncate != -1);
1514         bool indelimit = false;
1515         int columns = 0, lines = 0;
1516
1517         /*
1518          * Undocumented AT&T ksh feature:
1519          * If the second char in the prompt string is \r then the first
1520          * char is taken to be a non-printing delimiter and any chars
1521          * between two instances of the delimiter are not considered to
1522          * be part of the prompt length
1523          */
1524         if (*cp && cp[1] == '\r') {
1525                 delimiter = *cp;
1526                 cp += 2;
1527         }
1528         for (; *cp; cp++) {
1529                 if (indelimit && *cp != delimiter)
1530                         ;
1531                 else if (ctype(*cp, C_CR | C_LF)) {
1532                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1533                         columns = 0;
1534                 } else if (*cp == '\t') {
1535                         columns = (columns | 7) + 1;
1536                 } else if (*cp == '\b') {
1537                         if (columns > 0)
1538                                 columns--;
1539                 } else if (*cp == delimiter)
1540                         indelimit = !indelimit;
1541                 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1542                         const char *cp2;
1543                         columns += utf_widthadj(cp, &cp2);
1544                         if (doprint && (indelimit ||
1545                             (ntruncate < (x_cols * lines + columns))))
1546                                 shf_write(cp, cp2 - cp, shl_out);
1547                         cp = cp2 - /* loop increment */ 1;
1548                         continue;
1549                 } else
1550                         columns++;
1551                 if (doprint && (*cp != delimiter) &&
1552                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1553                         shf_putc(*cp, shl_out);
1554         }
1555         if (doprint)
1556                 shf_flush(shl_out);
1557         return (x_cols * lines + columns);
1558 }
1559
1560 /*
1561  * Read the variable part of a ${...} expression (i.e. up to but not
1562  * including the :[-+?=#%] or close-brace).
1563  */
1564 static char *
1565 get_brace_var(XString *wsp, char *wp)
1566 {
1567         char c;
1568         enum parse_state {
1569                 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1570                 PS_IDENT, PS_NUMBER, PS_VAR1
1571         } state = PS_INITIAL;
1572
1573         while (/* CONSTCOND */ 1) {
1574                 c = getsc();
1575                 /* State machine to figure out where the variable part ends. */
1576                 switch (state) {
1577                 case PS_SAW_HASH:
1578                         if (ctype(c, C_VAR1)) {
1579                                 char c2;
1580
1581                                 c2 = getsc();
1582                                 ungetsc(c2);
1583                                 if (ord(c2) != ORD(/*{*/ '}')) {
1584                                         ungetsc(c);
1585                                         goto out;
1586                                 }
1587                         }
1588                         goto ps_common;
1589                 case PS_SAW_BANG:
1590                         switch (ord(c)) {
1591                         case ORD('@'):
1592                         case ORD('#'):
1593                         case ORD('-'):
1594                         case ORD('?'):
1595                                 goto out;
1596                         }
1597                         goto ps_common;
1598                 case PS_INITIAL:
1599                         switch (ord(c)) {
1600                         case ORD('%'):
1601                                 state = PS_SAW_PERCENT;
1602                                 goto next;
1603                         case ORD('#'):
1604                                 state = PS_SAW_HASH;
1605                                 goto next;
1606                         case ORD('!'):
1607                                 state = PS_SAW_BANG;
1608                                 goto next;
1609                         }
1610                         /* FALLTHROUGH */
1611                 case PS_SAW_PERCENT:
1612  ps_common:
1613                         if (ctype(c, C_ALPHX))
1614                                 state = PS_IDENT;
1615                         else if (ctype(c, C_DIGIT))
1616                                 state = PS_NUMBER;
1617                         else if (ctype(c, C_VAR1))
1618                                 state = PS_VAR1;
1619                         else
1620                                 goto out;
1621                         break;
1622                 case PS_IDENT:
1623                         if (!ctype(c, C_ALNUX)) {
1624                                 if (ord(c) == ORD('[')) {
1625                                         char *tmp, *p;
1626
1627                                         if (!arraysub(&tmp))
1628                                                 yyerror("missing ]");
1629                                         *wp++ = c;
1630                                         p = tmp;
1631                                         while (*p) {
1632                                                 Xcheck(*wsp, wp);
1633                                                 *wp++ = *p++;
1634                                         }
1635                                         afree(tmp, ATEMP);
1636                                         /* the ] */
1637                                         c = getsc();
1638                                 }
1639                                 goto out;
1640                         }
1641  next:
1642                         break;
1643                 case PS_NUMBER:
1644                         if (!ctype(c, C_DIGIT))
1645                                 goto out;
1646                         break;
1647                 case PS_VAR1:
1648                         goto out;
1649                 }
1650                 Xcheck(*wsp, wp);
1651                 *wp++ = c;
1652         }
1653  out:
1654         /* end of variable part */
1655         *wp++ = '\0';
1656         ungetsc(c);
1657         return (wp);
1658 }
1659
1660 /*
1661  * Save an array subscript - returns true if matching bracket found, false
1662  * if eof or newline was found.
1663  * (Returned string double null terminated)
1664  */
1665 static bool
1666 arraysub(char **strp)
1667 {
1668         XString ws;
1669         char *wp, c;
1670         /* we are just past the initial [ */
1671         unsigned int depth = 1;
1672
1673         Xinit(ws, wp, 32, ATEMP);
1674
1675         do {
1676                 c = getsc();
1677                 Xcheck(ws, wp);
1678                 *wp++ = c;
1679                 if (ord(c) == ORD('['))
1680                         depth++;
1681                 else if (ord(c) == ORD(']'))
1682                         depth--;
1683         } while (depth > 0 && c && c != '\n');
1684
1685         *wp++ = '\0';
1686         *strp = Xclose(ws, wp);
1687
1688         return (tobool(depth == 0));
1689 }
1690
1691 /* Unget a char: handles case when we are already at the start of the buffer */
1692 static void
1693 ungetsc(int c)
1694 {
1695         struct sretrace_info *rp = retrace_info;
1696
1697         if (backslash_skip)
1698                 backslash_skip--;
1699         /* Don't unget EOF... */
1700         if (source->str == null && c == '\0')
1701                 return;
1702         while (rp) {
1703                 if (Xlength(rp->xs, rp->xp))
1704                         rp->xp--;
1705                 rp = rp->next;
1706         }
1707         ungetsc_i(c);
1708 }
1709 static void
1710 ungetsc_i(int c)
1711 {
1712         if (source->str > source->start)
1713                 source->str--;
1714         else {
1715                 Source *s;
1716
1717                 s = pushs(SREREAD, source->areap);
1718                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1719                 s->start = s->str = s->ugbuf;
1720                 s->next = source;
1721                 source = s;
1722         }
1723 }
1724
1725
1726 /* Called to get a char that isn't a \newline sequence. */
1727 static int
1728 getsc_bn(void)
1729 {
1730         int c, c2;
1731
1732         if (ignore_backslash_newline)
1733                 return (o_getsc_u());
1734
1735         if (backslash_skip == 1) {
1736                 backslash_skip = 2;
1737                 return (o_getsc_u());
1738         }
1739
1740         backslash_skip = 0;
1741
1742         while (/* CONSTCOND */ 1) {
1743                 c = o_getsc_u();
1744                 if (c == '\\') {
1745                         if ((c2 = o_getsc_u()) == '\n')
1746                                 /* ignore the \newline; get the next char... */
1747                                 continue;
1748                         ungetsc_i(c2);
1749                         backslash_skip = 1;
1750                 }
1751                 return (c);
1752         }
1753 }
1754
1755 void
1756 yyskiputf8bom(void)
1757 {
1758         int c;
1759
1760         if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1761                 ungetsc_i(c);
1762                 return;
1763         }
1764         if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1765                 ungetsc_i(c);
1766                 ungetsc_i(asc2rtt(0xEF));
1767                 return;
1768         }
1769         if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1770                 ungetsc_i(c);
1771                 ungetsc_i(asc2rtt(0xBB));
1772                 ungetsc_i(asc2rtt(0xEF));
1773                 return;
1774         }
1775         UTFMODE |= 8;
1776 }
1777
1778 static Lex_state *
1779 push_state_i(State_info *si, Lex_state *old_end)
1780 {
1781         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1782
1783         news[0].ls_base = old_end;
1784         si->base = &news[0];
1785         si->end = &news[STATE_BSIZE];
1786         return (&news[1]);
1787 }
1788
1789 static Lex_state *
1790 pop_state_i(State_info *si, Lex_state *old_end)
1791 {
1792         Lex_state *old_base = si->base;
1793
1794         si->base = old_end->ls_base - STATE_BSIZE;
1795         si->end = old_end->ls_base;
1796
1797         afree(old_base, ATEMP);
1798
1799         return (si->base + STATE_BSIZE - 1);
1800 }