OSDN Git Service

305f5a480d849d21d242d71b2946af3007c30bb6
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.49 2013/12/17 16:37:06 deraadt Exp $        */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013, 2014, 2015
6  *      Thorsten Glaser <tg@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.193.2.5 2015/04/19 19:18:19 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing <<,<<-,<<< delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in <<,<<-,<<< delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 int start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_i(State_info *, Lex_state *);
103 static Lex_state *pop_state_i(State_info *, Lex_state *);
104
105 static int backslash_skip;
106 static int ignore_backslash_newline;
107
108 /* optimised getsc_bn() */
109 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
110                             !backslash_skip ? *source->str++ : getsc_bn())
111 /* optimised getsc_uu() */
112 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
113
114 /* retrace helper */
115 #define o_getsc_r(carg) {                               \
116         int cev = (carg);                               \
117         struct sretrace_info *rp = retrace_info;        \
118                                                         \
119         while (rp) {                                    \
120                 Xcheck(rp->xs, rp->xp);                 \
121                 *rp->xp++ = cev;                        \
122                 rp = rp->next;                          \
123         }                                               \
124                                                         \
125         return (cev);                                   \
126 }
127
128 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
129 static int getsc(void);
130
131 static int
132 getsc(void)
133 {
134         o_getsc_r(o_getsc());
135 }
136 #else
137 static int getsc_r(int);
138
139 static int
140 getsc_r(int c)
141 {
142         o_getsc_r(c);
143 }
144
145 #define getsc()         getsc_r(o_getsc())
146 #endif
147
148 #define STATE_BSIZE     8
149
150 #define PUSH_STATE(s)   do {                                    \
151         if (++statep == state_info.end)                         \
152                 statep = push_state_i(&state_info, statep);     \
153         state = statep->type = (s);                             \
154 } while (/* CONSTCOND */ 0)
155
156 #define POP_STATE()     do {                                    \
157         if (--statep == state_info.base)                        \
158                 statep = pop_state_i(&state_info, statep);      \
159         state = statep->type;                                   \
160 } while (/* CONSTCOND */ 0)
161
162 #define PUSH_SRETRACE(s) do {                                   \
163         struct sretrace_info *ri;                               \
164                                                                 \
165         PUSH_STATE(s);                                          \
166         statep->ls_start = Xsavepos(ws, wp);                    \
167         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
168         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
169         ri->next = retrace_info;                                \
170         retrace_info = ri;                                      \
171 } while (/* CONSTCOND */ 0)
172
173 #define POP_SRETRACE()  do {                                    \
174         wp = Xrestpos(ws, wp, statep->ls_start);                \
175         *retrace_info->xp = '\0';                               \
176         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
177         dp = (void *)retrace_info;                              \
178         retrace_info = retrace_info->next;                      \
179         afree(dp, ATEMP);                                       \
180         POP_STATE();                                            \
181 } while (/* CONSTCOND */ 0)
182
183 /**
184  * Lexical analyser
185  *
186  * tokens are not regular expressions, they are LL(1).
187  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
188  * hence the state stack. Note "$(...)" are now parsed recursively.
189  */
190
191 int
192 yylex(int cf)
193 {
194         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
195         State_info state_info;
196         int c, c2, state;
197         size_t cz;
198         XString ws;             /* expandable output word */
199         char *wp;               /* output word pointer */
200         char *sp, *dp;
201
202  Again:
203         states[0].type = SINVALID;
204         states[0].ls_base = NULL;
205         statep = &states[1];
206         state_info.base = states;
207         state_info.end = &state_info.base[STATE_BSIZE];
208
209         Xinit(ws, wp, 64, ATEMP);
210
211         backslash_skip = 0;
212         ignore_backslash_newline = 0;
213
214         if (cf & ONEWORD)
215                 state = SWORD;
216         else if (cf & LETEXPR) {
217                 /* enclose arguments in (double) quotes */
218                 *wp++ = OQUOTE;
219                 state = SLETPAREN;
220                 statep->nparen = 0;
221         } else {
222                 /* normal lexing */
223                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
224                 while ((c = getsc()) == ' ' || c == '\t')
225                         ;
226                 if (c == '#') {
227                         ignore_backslash_newline++;
228                         while ((c = getsc()) != '\0' && c != '\n')
229                                 ;
230                         ignore_backslash_newline--;
231                 }
232                 ungetsc(c);
233         }
234         if (source->flags & SF_ALIAS) {
235                 /* trailing ' ' in alias definition */
236                 source->flags &= ~SF_ALIAS;
237                 cf |= ALIAS;
238         }
239
240         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
241         statep->type = state;
242
243         /* check for here string */
244         if (state == SHEREDELIM) {
245                 c = getsc();
246                 if (c == '<') {
247                         state = SHEREDELIM;
248                         while ((c = getsc()) == ' ' || c == '\t')
249                                 ;
250                         ungetsc(c);
251                         c = '<';
252                         goto accept_nonword;
253                 }
254                 ungetsc(c);
255         }
256
257         /* collect non-special or quoted characters to form word */
258         while (!((c = getsc()) == 0 ||
259             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
260                 if (state == SBASE &&
261                     subshell_nesting_type == /*{*/ '}' &&
262                     c == /*{*/ '}')
263                         /* possibly end ${ :;} */
264                         break;
265  accept_nonword:
266                 Xcheck(ws, wp);
267                 switch (state) {
268                 case SADELIM:
269                         if (c == '(')
270                                 statep->nparen++;
271                         else if (c == ')')
272                                 statep->nparen--;
273                         else if (statep->nparen == 0 && (c == /*{*/ '}' ||
274                             c == (int)statep->ls_adelim.delimiter)) {
275                                 *wp++ = ADELIM;
276                                 *wp++ = c;
277                                 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
278                                         POP_STATE();
279                                 if (c == /*{*/ '}')
280                                         POP_STATE();
281                                 break;
282                         }
283                         /* FALLTHROUGH */
284                 case SBASE:
285                         if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
286                                 /* temporary */
287                                 *wp = EOS;
288                                 if (is_wdvarname(Xstring(ws, wp), false)) {
289                                         char *p, *tmp;
290
291                                         if (arraysub(&tmp)) {
292                                                 *wp++ = CHAR;
293                                                 *wp++ = c;
294                                                 for (p = tmp; *p; ) {
295                                                         Xcheck(ws, wp);
296                                                         *wp++ = CHAR;
297                                                         *wp++ = *p++;
298                                                 }
299                                                 afree(tmp, ATEMP);
300                                                 break;
301                                         } else {
302                                                 Source *s;
303
304                                                 s = pushs(SREREAD,
305                                                     source->areap);
306                                                 s->start = s->str =
307                                                     s->u.freeme = tmp;
308                                                 s->next = source;
309                                                 source = s;
310                                         }
311                                 }
312                                 *wp++ = CHAR;
313                                 *wp++ = c;
314                                 break;
315                         }
316                         /* FALLTHROUGH */
317  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
318                         if (c == '*' || c == '@' || c == '+' || c == '?' ||
319                             c == '!') {
320                                 c2 = getsc();
321                                 if (c2 == '(' /*)*/ ) {
322                                         *wp++ = OPAT;
323                                         *wp++ = c;
324                                         PUSH_STATE(SPATTERN);
325                                         break;
326                                 }
327                                 ungetsc(c2);
328                         }
329                         /* FALLTHROUGH */
330  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
331                         switch (c) {
332                         case '\\':
333  getsc_qchar:
334                                 if ((c = getsc())) {
335                                         /* trailing \ is lost */
336                                         *wp++ = QCHAR;
337                                         *wp++ = c;
338                                 }
339                                 break;
340                         case '\'':
341  open_ssquote_unless_heredoc:
342                                 if ((cf & HEREDOC))
343                                         goto store_char;
344                                 *wp++ = OQUOTE;
345                                 ignore_backslash_newline++;
346                                 PUSH_STATE(SSQUOTE);
347                                 break;
348                         case '"':
349  open_sdquote:
350                                 *wp++ = OQUOTE;
351                                 PUSH_STATE(SDQUOTE);
352                                 break;
353                         case '$':
354                                 /*
355                                  * processing of dollar sign belongs into
356                                  * Subst, except for those which can open
357                                  * a string: $'…' and $"…"
358                                  */
359  subst_dollar_ex:
360                                 c = getsc();
361                                 switch (c) {
362                                 case '"':
363                                         goto open_sdquote;
364                                 case '\'':
365                                         goto open_sequote;
366                                 default:
367                                         goto SubstS;
368                                 }
369                         default:
370                                 goto Subst;
371                         }
372                         break;
373
374  Subst:
375                         switch (c) {
376                         case '\\':
377                                 c = getsc();
378                                 switch (c) {
379                                 case '"':
380                                         if ((cf & HEREDOC))
381                                                 goto heredocquote;
382                                         /* FALLTHROUGH */
383                                 case '\\':
384                                 case '$': case '`':
385  store_qchar:
386                                         *wp++ = QCHAR;
387                                         *wp++ = c;
388                                         break;
389                                 default:
390  heredocquote:
391                                         Xcheck(ws, wp);
392                                         if (c) {
393                                                 /* trailing \ is lost */
394                                                 *wp++ = CHAR;
395                                                 *wp++ = '\\';
396                                                 *wp++ = CHAR;
397                                                 *wp++ = c;
398                                         }
399                                         break;
400                                 }
401                                 break;
402                         case '$':
403                                 c = getsc();
404  SubstS:
405                                 if (c == '(') /*)*/ {
406                                         c = getsc();
407                                         if (c == '(') /*)*/ {
408                                                 *wp++ = EXPRSUB;
409                                                 PUSH_SRETRACE(SASPAREN);
410                                                 statep->nparen = 2;
411                                                 *retrace_info->xp++ = '(';
412                                         } else {
413                                                 ungetsc(c);
414  subst_command:
415                                                 c = COMSUB;
416  subst_command2:
417                                                 sp = yyrecursive(c);
418                                                 cz = strlen(sp) + 1;
419                                                 XcheckN(ws, wp, cz);
420                                                 *wp++ = c;
421                                                 memcpy(wp, sp, cz);
422                                                 wp += cz;
423                                         }
424                                 } else if (c == '{') /*}*/ {
425                                         if ((c = getsc()) == '|') {
426                                                 /*
427                                                  * non-subenvironment
428                                                  * value substitution
429                                                  */
430                                                 c = VALSUB;
431                                                 goto subst_command2;
432                                         } else if (ctype(c, C_IFSWS)) {
433                                                 /*
434                                                  * non-subenvironment
435                                                  * "command" substitution
436                                                  */
437                                                 c = FUNSUB;
438                                                 goto subst_command2;
439                                         }
440                                         ungetsc(c);
441                                         *wp++ = OSUBST;
442                                         *wp++ = '{'; /*}*/
443                                         wp = get_brace_var(&ws, wp);
444                                         c = getsc();
445                                         /* allow :# and :% (ksh88 compat) */
446                                         if (c == ':') {
447                                                 *wp++ = CHAR;
448                                                 *wp++ = c;
449                                                 c = getsc();
450                                                 if (c == ':') {
451                                                         *wp++ = CHAR;
452                                                         *wp++ = '0';
453                                                         *wp++ = ADELIM;
454                                                         *wp++ = ':';
455                                                         PUSH_STATE(SBRACE);
456                                                         PUSH_STATE(SADELIM);
457                                                         statep->ls_adelim.delimiter = ':';
458                                                         statep->ls_adelim.num = 1;
459                                                         statep->nparen = 0;
460                                                         break;
461                                                 } else if (ksh_isdigit(c) ||
462                                                     c == '('/*)*/ || c == ' ' ||
463                                                     /*XXX what else? */
464                                                     c == '$') {
465                                                         /* substring subst. */
466                                                         if (c != ' ') {
467                                                                 *wp++ = CHAR;
468                                                                 *wp++ = ' ';
469                                                         }
470                                                         ungetsc(c);
471                                                         PUSH_STATE(SBRACE);
472                                                         PUSH_STATE(SADELIM);
473                                                         statep->ls_adelim.delimiter = ':';
474                                                         statep->ls_adelim.num = 2;
475                                                         statep->nparen = 0;
476                                                         break;
477                                                 }
478                                         } else if (c == '/') {
479                                                 *wp++ = CHAR;
480                                                 *wp++ = c;
481                                                 if ((c = getsc()) == '/') {
482                                                         *wp++ = ADELIM;
483                                                         *wp++ = c;
484                                                 } else
485                                                         ungetsc(c);
486                                                 PUSH_STATE(SBRACE);
487                                                 PUSH_STATE(SADELIM);
488                                                 statep->ls_adelim.delimiter = '/';
489                                                 statep->ls_adelim.num = 1;
490                                                 statep->nparen = 0;
491                                                 break;
492                                         }
493                                         /*
494                                          * If this is a trim operation,
495                                          * treat (,|,) specially in STBRACE.
496                                          */
497                                         if (ctype(c, C_SUBOP2)) {
498                                                 ungetsc(c);
499                                                 if (Flag(FSH))
500                                                         PUSH_STATE(STBRACEBOURNE);
501                                                 else
502                                                         PUSH_STATE(STBRACEKORN);
503                                         } else {
504                                                 ungetsc(c);
505                                                 if (state == SDQUOTE ||
506                                                     state == SQBRACE)
507                                                         PUSH_STATE(SQBRACE);
508                                                 else
509                                                         PUSH_STATE(SBRACE);
510                                         }
511                                 } else if (ksh_isalphx(c)) {
512                                         *wp++ = OSUBST;
513                                         *wp++ = 'X';
514                                         do {
515                                                 Xcheck(ws, wp);
516                                                 *wp++ = c;
517                                                 c = getsc();
518                                         } while (ksh_isalnux(c));
519                                         *wp++ = '\0';
520                                         *wp++ = CSUBST;
521                                         *wp++ = 'X';
522                                         ungetsc(c);
523                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
524                                         Xcheck(ws, wp);
525                                         *wp++ = OSUBST;
526                                         *wp++ = 'X';
527                                         *wp++ = c;
528                                         *wp++ = '\0';
529                                         *wp++ = CSUBST;
530                                         *wp++ = 'X';
531                                 } else {
532                                         *wp++ = CHAR;
533                                         *wp++ = '$';
534                                         ungetsc(c);
535                                 }
536                                 break;
537                         case '`':
538  subst_gravis:
539                                 PUSH_STATE(SBQUOTE);
540                                 *wp++ = COMSUB;
541                                 /*
542                                  * Need to know if we are inside double quotes
543                                  * since sh/AT&T-ksh translate the \" to " in
544                                  * "`...\"...`".
545                                  * This is not done in POSIX mode (section
546                                  * 3.2.3, Double Quotes: "The backquote shall
547                                  * retain its special meaning introducing the
548                                  * other form of command substitution (see
549                                  * 3.6.3). The portion of the quoted string
550                                  * from the initial backquote and the
551                                  * characters up to the next backquote that
552                                  * is not preceded by a backslash (having
553                                  * escape characters removed) defines that
554                                  * command whose output replaces `...` when
555                                  * the word is expanded."
556                                  * Section 3.6.3, Command Substitution:
557                                  * "Within the backquoted style of command
558                                  * substitution, backslash shall retain its
559                                  * literal meaning, except when followed by
560                                  * $ ` \.").
561                                  */
562                                 statep->ls_bool = false;
563                                 s2 = statep;
564                                 base = state_info.base;
565                                 while (/* CONSTCOND */ 1) {
566                                         for (; s2 != base; s2--) {
567                                                 if (s2->type == SDQUOTE) {
568                                                         statep->ls_bool = true;
569                                                         break;
570                                                 }
571                                         }
572                                         if (s2 != base)
573                                                 break;
574                                         if (!(s2 = s2->ls_base))
575                                                 break;
576                                         base = s2-- - STATE_BSIZE;
577                                 }
578                                 break;
579                         case QCHAR:
580                                 if (cf & LQCHAR) {
581                                         *wp++ = QCHAR;
582                                         *wp++ = getsc();
583                                         break;
584                                 }
585                                 /* FALLTHROUGH */
586                         default:
587  store_char:
588                                 *wp++ = CHAR;
589                                 *wp++ = c;
590                         }
591                         break;
592
593                 case SEQUOTE:
594                         if (c == '\'') {
595                                 POP_STATE();
596                                 *wp++ = CQUOTE;
597                                 ignore_backslash_newline--;
598                         } else if (c == '\\') {
599                                 if ((c2 = unbksl(true, s_get, s_put)) == -1)
600                                         c2 = s_get();
601                                 if (c2 == 0)
602                                         statep->ls_bool = true;
603                                 if (!statep->ls_bool) {
604                                         char ts[4];
605
606                                         if ((unsigned int)c2 < 0x100) {
607                                                 *wp++ = QCHAR;
608                                                 *wp++ = c2;
609                                         } else {
610                                                 cz = utf_wctomb(ts, c2 - 0x100);
611                                                 ts[cz] = 0;
612                                                 for (cz = 0; ts[cz]; ++cz) {
613                                                         *wp++ = QCHAR;
614                                                         *wp++ = ts[cz];
615                                                 }
616                                         }
617                                 }
618                         } else if (!statep->ls_bool) {
619                                 *wp++ = QCHAR;
620                                 *wp++ = c;
621                         }
622                         break;
623
624                 case SSQUOTE:
625                         if (c == '\'') {
626                                 POP_STATE();
627                                 if ((cf & HEREDOC) || state == SQBRACE)
628                                         goto store_char;
629                                 *wp++ = CQUOTE;
630                                 ignore_backslash_newline--;
631                         } else {
632                                 *wp++ = QCHAR;
633                                 *wp++ = c;
634                         }
635                         break;
636
637                 case SDQUOTE:
638                         if (c == '"') {
639                                 POP_STATE();
640                                 *wp++ = CQUOTE;
641                         } else
642                                 goto Subst;
643                         break;
644
645                 /* $(( ... )) */
646                 case SASPAREN:
647                         if (c == '(')
648                                 statep->nparen++;
649                         else if (c == ')') {
650                                 statep->nparen--;
651                                 if (statep->nparen == 1) {
652                                         /* end of EXPRSUB */
653                                         POP_SRETRACE();
654
655                                         if ((c2 = getsc()) == /*(*/ ')') {
656                                                 cz = strlen(sp) - 2;
657                                                 XcheckN(ws, wp, cz);
658                                                 memcpy(wp, sp + 1, cz);
659                                                 wp += cz;
660                                                 afree(sp, ATEMP);
661                                                 *wp++ = '\0';
662                                                 break;
663                                         } else {
664                                                 Source *s;
665
666                                                 ungetsc(c2);
667                                                 /*
668                                                  * mismatched parenthesis -
669                                                  * assume we were really
670                                                  * parsing a $(...) expression
671                                                  */
672                                                 --wp;
673                                                 s = pushs(SREREAD,
674                                                     source->areap);
675                                                 s->start = s->str =
676                                                     s->u.freeme = sp;
677                                                 s->next = source;
678                                                 source = s;
679                                                 goto subst_command;
680                                         }
681                                 }
682                         }
683                         /* reuse existing state machine */
684                         goto Sbase2;
685
686                 case SQBRACE:
687                         if (c == '\\') {
688                                 /*
689                                  * perform POSIX "quote removal" if the back-
690                                  * slash is "special", i.e. same cases as the
691                                  * {case '\\':} in Subst: plus closing brace;
692                                  * in mksh code "quote removal" on '\c' means
693                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
694                                  * emitted (in heredocquote:)
695                                  */
696                                 if ((c = getsc()) == '"' || c == '\\' ||
697                                     c == '$' || c == '`' || c == /*{*/'}')
698                                         goto store_qchar;
699                                 goto heredocquote;
700                         }
701                         goto common_SQBRACE;
702
703                 case SBRACE:
704                         if (c == '\'')
705                                 goto open_ssquote_unless_heredoc;
706                         else if (c == '\\')
707                                 goto getsc_qchar;
708  common_SQBRACE:
709                         if (c == '"')
710                                 goto open_sdquote;
711                         else if (c == '$')
712                                 goto subst_dollar_ex;
713                         else if (c == '`')
714                                 goto subst_gravis;
715                         else if (c != /*{*/ '}')
716                                 goto store_char;
717                         POP_STATE();
718                         *wp++ = CSUBST;
719                         *wp++ = /*{*/ '}';
720                         break;
721
722                 /* Same as SBASE, except (,|,) treated specially */
723                 case STBRACEKORN:
724                         if (c == '|')
725                                 *wp++ = SPAT;
726                         else if (c == '(') {
727                                 *wp++ = OPAT;
728                                 /* simile for @ */
729                                 *wp++ = ' ';
730                                 PUSH_STATE(SPATTERN);
731                         } else /* FALLTHROUGH */
732                 case STBRACEBOURNE:
733                           if (c == /*{*/ '}') {
734                                 POP_STATE();
735                                 *wp++ = CSUBST;
736                                 *wp++ = /*{*/ '}';
737                         } else
738                                 goto Sbase1;
739                         break;
740
741                 case SBQUOTE:
742                         if (c == '`') {
743                                 *wp++ = 0;
744                                 POP_STATE();
745                         } else if (c == '\\') {
746                                 switch (c = getsc()) {
747                                 case 0:
748                                         /* trailing \ is lost */
749                                         break;
750                                 case '\\':
751                                 case '$': case '`':
752                                         *wp++ = c;
753                                         break;
754                                 case '"':
755                                         if (statep->ls_bool) {
756                                                 *wp++ = c;
757                                                 break;
758                                         }
759                                         /* FALLTHROUGH */
760                                 default:
761                                         *wp++ = '\\';
762                                         *wp++ = c;
763                                         break;
764                                 }
765                         } else
766                                 *wp++ = c;
767                         break;
768
769                 /* ONEWORD */
770                 case SWORD:
771                         goto Subst;
772
773                 /* LETEXPR: (( ... )) */
774                 case SLETPAREN:
775                         if (c == /*(*/ ')') {
776                                 if (statep->nparen > 0)
777                                         --statep->nparen;
778                                 else if ((c2 = getsc()) == /*(*/ ')') {
779                                         c = 0;
780                                         *wp++ = CQUOTE;
781                                         goto Done;
782                                 } else {
783                                         Source *s;
784
785                                         ungetsc(c2);
786                                         /*
787                                          * mismatched parenthesis -
788                                          * assume we were really
789                                          * parsing a (...) expression
790                                          */
791                                         *wp = EOS;
792                                         sp = Xstring(ws, wp);
793                                         dp = wdstrip(sp, WDS_KEEPQ);
794                                         s = pushs(SREREAD, source->areap);
795                                         s->start = s->str = s->u.freeme = dp;
796                                         s->next = source;
797                                         source = s;
798                                         return ('('/*)*/);
799                                 }
800                         } else if (c == '(')
801                                 /*
802                                  * parentheses inside quotes and
803                                  * backslashes are lost, but AT&T ksh
804                                  * doesn't count them either
805                                  */
806                                 ++statep->nparen;
807                         goto Sbase2;
808
809                 /* <<, <<-, <<< delimiter */
810                 case SHEREDELIM:
811                         /*
812                          * here delimiters need a special case since
813                          * $ and `...` are not to be treated specially
814                          */
815                         switch (c) {
816                         case '\\':
817                                 if ((c = getsc())) {
818                                         /* trailing \ is lost */
819                                         *wp++ = QCHAR;
820                                         *wp++ = c;
821                                 }
822                                 break;
823                         case '\'':
824                                 goto open_ssquote_unless_heredoc;
825                         case '$':
826                                 if ((c2 = getsc()) == '\'') {
827  open_sequote:
828                                         *wp++ = OQUOTE;
829                                         ignore_backslash_newline++;
830                                         PUSH_STATE(SEQUOTE);
831                                         statep->ls_bool = false;
832                                         break;
833                                 } else if (c2 == '"') {
834                                         /* FALLTHROUGH */
835                         case '"':
836                                         PUSH_SRETRACE(SHEREDQUOTE);
837                                         break;
838                                 }
839                                 ungetsc(c2);
840                                 /* FALLTHROUGH */
841                         default:
842                                 *wp++ = CHAR;
843                                 *wp++ = c;
844                         }
845                         break;
846
847                 /* " in <<, <<-, <<< delimiter */
848                 case SHEREDQUOTE:
849                         if (c != '"')
850                                 goto Subst;
851                         POP_SRETRACE();
852                         dp = strnul(sp) - 1;
853                         /* remove the trailing double quote */
854                         *dp = '\0';
855                         /* store the quoted string */
856                         *wp++ = OQUOTE;
857                         XcheckN(ws, wp, (dp - sp) * 2);
858                         dp = sp;
859                         while ((c = *dp++)) {
860                                 if (c == '\\') {
861                                         switch ((c = *dp++)) {
862                                         case '\\':
863                                         case '"':
864                                         case '$':
865                                         case '`':
866                                                 break;
867                                         default:
868                                                 *wp++ = CHAR;
869                                                 *wp++ = '\\';
870                                                 break;
871                                         }
872                                 }
873                                 *wp++ = CHAR;
874                                 *wp++ = c;
875                         }
876                         afree(sp, ATEMP);
877                         *wp++ = CQUOTE;
878                         state = statep->type = SHEREDELIM;
879                         break;
880
881                 /* in *(...|...) pattern (*+?@!) */
882                 case SPATTERN:
883                         if (c == /*(*/ ')') {
884                                 *wp++ = CPAT;
885                                 POP_STATE();
886                         } else if (c == '|') {
887                                 *wp++ = SPAT;
888                         } else if (c == '(') {
889                                 *wp++ = OPAT;
890                                 /* simile for @ */
891                                 *wp++ = ' ';
892                                 PUSH_STATE(SPATTERN);
893                         } else
894                                 goto Sbase1;
895                         break;
896                 }
897         }
898  Done:
899         Xcheck(ws, wp);
900         if (statep != &states[1])
901                 /* XXX figure out what is missing */
902                 yyerror("no closing quote\n");
903
904         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
905         if (state == SHEREDELIM)
906                 state = SBASE;
907
908         dp = Xstring(ws, wp);
909         if (state == SBASE && (
910 #ifndef MKSH_LEGACY_MODE
911             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
912 #endif
913             c == '<' || c == '>')) {
914                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
915
916                 if (Xlength(ws, wp) == 0)
917                         iop->unit = c == '<' ? 0 : 1;
918                 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
919                         if (dp[c2] != CHAR)
920                                 goto no_iop;
921                         if (!ksh_isdigit(dp[c2 + 1]))
922                                 goto no_iop;
923                         iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
924                         if (iop->unit >= FDBASE)
925                                 goto no_iop;
926                 }
927
928                 if (c == '&') {
929                         if ((c2 = getsc()) != '>') {
930                                 ungetsc(c2);
931                                 goto no_iop;
932                         }
933                         c = c2;
934                         iop->ioflag = IOBASH;
935                 } else
936                         iop->ioflag = 0;
937
938                 c2 = getsc();
939                 /* <<, >>, <> are ok, >< is not */
940                 if (c == c2 || (c == '<' && c2 == '>')) {
941                         iop->ioflag |= c == c2 ?
942                             (c == '>' ? IOCAT : IOHERE) : IORDWR;
943                         if (iop->ioflag == IOHERE) {
944                                 if ((c2 = getsc()) == '-') {
945                                         iop->ioflag |= IOSKIP;
946                                         c2 = getsc();
947                                 } else if (c2 == '<')
948                                         iop->ioflag |= IOHERESTR;
949                                 ungetsc(c2);
950                                 if (c2 == '\n')
951                                         iop->ioflag |= IONDELIM;
952                         }
953                 } else if (c2 == '&')
954                         iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
955                 else {
956                         iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
957                         if (c == '>' && c2 == '|')
958                                 iop->ioflag |= IOCLOB;
959                         else
960                                 ungetsc(c2);
961                 }
962
963                 iop->name = NULL;
964                 iop->delim = NULL;
965                 iop->heredoc = NULL;
966                 /* free word */
967                 Xfree(ws, wp);
968                 yylval.iop = iop;
969                 return (REDIR);
970  no_iop:
971                 afree(iop, ATEMP);
972         }
973
974         if (wp == dp && state == SBASE) {
975                 /* free word */
976                 Xfree(ws, wp);
977                 /* no word, process LEX1 character */
978                 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
979                         if ((c2 = getsc()) == c)
980                                 c = (c == ';') ? BREAK :
981                                     (c == '|') ? LOGOR :
982                                     (c == '&') ? LOGAND :
983                                     /* c == '(' ) */ MDPAREN;
984                         else if (c == '|' && c2 == '&')
985                                 c = COPROC;
986                         else if (c == ';' && c2 == '|')
987                                 c = BRKEV;
988                         else if (c == ';' && c2 == '&')
989                                 c = BRKFT;
990                         else
991                                 ungetsc(c2);
992 #ifndef MKSH_SMALL
993                         if (c == BREAK) {
994                                 if ((c2 = getsc()) == '&')
995                                         c = BRKEV;
996                                 else
997                                         ungetsc(c2);
998                         }
999 #endif
1000                 } else if (c == '\n') {
1001                         gethere(false);
1002                         if (cf & CONTIN)
1003                                 goto Again;
1004                 } else if (c == '\0')
1005                         /* need here strings at EOF */
1006                         gethere(true);
1007                 return (c);
1008         }
1009
1010         /* terminate word */
1011         *wp++ = EOS;
1012         yylval.cp = Xclose(ws, wp);
1013         if (state == SWORD || state == SLETPAREN
1014             /* XXX ONEWORD? */)
1015                 return (LWORD);
1016
1017         /* unget terminator */
1018         ungetsc(c);
1019
1020         /*
1021          * note: the alias-vs-function code below depends on several
1022          * interna: starting from here, source->str is not modified;
1023          * the way getsc() and ungetsc() operate; etc.
1024          */
1025
1026         /* copy word to unprefixed string ident */
1027         sp = yylval.cp;
1028         dp = ident;
1029         if ((cf & HEREDELIM) && (sp[1] == '<')) {
1030  herestringloop:
1031                 switch ((c = *sp++)) {
1032                 case CHAR:
1033                         ++sp;
1034                         /* FALLTHROUGH */
1035                 case OQUOTE:
1036                 case CQUOTE:
1037                         goto herestringloop;
1038                 default:
1039                         break;
1040                 }
1041                 /* dummy value */
1042                 *dp++ = 'x';
1043         } else
1044                 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1045                         *dp++ = *sp++;
1046         if (c != EOS)
1047                 /* word is not unquoted */
1048                 dp = ident;
1049         /* make sure the ident array stays NUL padded */
1050         memset(dp, 0, (ident + IDENT) - dp + 1);
1051
1052         if (!(cf & (KEYWORD | ALIAS)))
1053                 return (LWORD);
1054
1055         if (*ident != '\0') {
1056                 struct tbl *p;
1057                 uint32_t h = hash(ident);
1058
1059                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1060                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1061                     p->val.i == /*{*/ '}')) {
1062                         afree(yylval.cp, ATEMP);
1063                         return (p->val.i);
1064                 }
1065                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1066                     (p->flag & ISSET)) {
1067                         /*
1068                          * this still points to the same character as the
1069                          * ungetsc'd terminator from above
1070                          */
1071                         const char *cp = source->str;
1072
1073                         /* prefer POSIX but not Korn functions over aliases */
1074                         while (*cp == ' ' || *cp == '\t')
1075                                 /*
1076                                  * this is like getsc() without skipping
1077                                  * over Source boundaries (including not
1078                                  * parsing ungetsc'd characters that got
1079                                  * pushed into an SREREAD) which is what
1080                                  * we want here anyway: find out whether
1081                                  * the alias name is followed by a POSIX
1082                                  * function definition
1083                                  */
1084                                 ++cp;
1085                         /* prefer functions over aliases */
1086                         if (cp[0] != '(' || cp[1] != ')') {
1087                                 Source *s = source;
1088
1089                                 while (s && (s->flags & SF_HASALIAS))
1090                                         if (s->u.tblp == p)
1091                                                 return (LWORD);
1092                                         else
1093                                                 s = s->next;
1094                                 /* push alias expansion */
1095                                 s = pushs(SALIAS, source->areap);
1096                                 s->start = s->str = p->val.s;
1097                                 s->u.tblp = p;
1098                                 s->flags |= SF_HASALIAS;
1099                                 s->next = source;
1100                                 if (source->type == SEOF) {
1101                                         /* prevent infinite recursion at EOS */
1102                                         source->u.tblp = p;
1103                                         source->flags |= SF_HASALIAS;
1104                                 }
1105                                 source = s;
1106                                 afree(yylval.cp, ATEMP);
1107                                 goto Again;
1108                         }
1109                 }
1110         } else if (cf & ALIAS) {
1111                 /* retain typeset et al. even when quoted */
1112                 if (assign_command((dp = wdstrip(yylval.cp, 0))))
1113                         strlcpy(ident, dp, sizeof(ident));
1114                 afree(dp, ATEMP);
1115         }
1116
1117         return (LWORD);
1118 }
1119
1120 static void
1121 gethere(bool iseof)
1122 {
1123         struct ioword **p;
1124
1125         for (p = heres; p < herep; p++)
1126                 if (iseof && !((*p)->ioflag & IOHERESTR))
1127                         /* only here strings at EOF */
1128                         return;
1129                 else
1130                         readhere(*p);
1131         herep = heres;
1132 }
1133
1134 /*
1135  * read "<<word" text into temp file
1136  */
1137
1138 static void
1139 readhere(struct ioword *iop)
1140 {
1141         int c;
1142         const char *eof, *eofp;
1143         XString xs;
1144         char *xp;
1145         int xpos;
1146
1147         if (iop->ioflag & IOHERESTR) {
1148                 /* process the here string */
1149                 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1150                 xpos = strlen(xp) - 1;
1151                 memmove(xp, xp + 1, xpos);
1152                 xp[xpos] = '\n';
1153                 return;
1154         }
1155
1156         eof = iop->ioflag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1157
1158         if (!(iop->ioflag & IOEVAL))
1159                 ignore_backslash_newline++;
1160
1161         Xinit(xs, xp, 256, ATEMP);
1162
1163  heredoc_read_line:
1164         /* beginning of line */
1165         eofp = eof;
1166         xpos = Xsavepos(xs, xp);
1167         if (iop->ioflag & IOSKIP) {
1168                 /* skip over leading tabs */
1169                 while ((c = getsc()) == '\t')
1170                         ;       /* nothing */
1171                 goto heredoc_parse_char;
1172         }
1173  heredoc_read_char:
1174         c = getsc();
1175  heredoc_parse_char:
1176         /* compare with here document marker */
1177         if (!*eofp) {
1178                 /* end of here document marker, what to do? */
1179                 switch (c) {
1180                 case /*(*/ ')':
1181                         if (!subshell_nesting_type)
1182                                 /*-
1183                                  * not allowed outside $(...) or (...)
1184                                  * => mismatch
1185                                  */
1186                                 break;
1187                         /* allow $(...) or (...) to close here */
1188                         ungetsc(/*(*/ ')');
1189                         /* FALLTHROUGH */
1190                 case 0:
1191                         /*
1192                          * Allow EOF here to commands without trailing
1193                          * newlines (mksh -c '...') will work as well.
1194                          */
1195                 case '\n':
1196                         /* Newline terminates here document marker */
1197                         goto heredoc_found_terminator;
1198                 }
1199         } else if (c == *eofp++)
1200                 /* store; then read and compare next character */
1201                 goto heredoc_store_and_loop;
1202         /* nope, mismatch; read until end of line */
1203         while (c != '\n') {
1204                 if (!c)
1205                         /* oops, reached EOF */
1206                         yyerror("%s '%s' unclosed\n", "here document", eof);
1207                 /* store character */
1208                 Xcheck(xs, xp);
1209                 Xput(xs, xp, c);
1210                 /* read next character */
1211                 c = getsc();
1212         }
1213         /* we read a newline as last character */
1214  heredoc_store_and_loop:
1215         /* store character */
1216         Xcheck(xs, xp);
1217         Xput(xs, xp, c);
1218         if (c == '\n')
1219                 goto heredoc_read_line;
1220         goto heredoc_read_char;
1221
1222  heredoc_found_terminator:
1223         /* jump back to saved beginning of line */
1224         xp = Xrestpos(xs, xp, xpos);
1225         /* terminate, close and store */
1226         Xput(xs, xp, '\0');
1227         iop->heredoc = Xclose(xs, xp);
1228
1229         if (!(iop->ioflag & IOEVAL))
1230                 ignore_backslash_newline--;
1231 }
1232
1233 void
1234 yyerror(const char *fmt, ...)
1235 {
1236         va_list va;
1237
1238         /* pop aliases and re-reads */
1239         while (source->type == SALIAS || source->type == SREREAD)
1240                 source = source->next;
1241         /* zap pending input */
1242         source->str = null;
1243
1244         error_prefix(true);
1245         va_start(va, fmt);
1246         shf_vfprintf(shl_out, fmt, va);
1247         va_end(va);
1248         errorfz();
1249 }
1250
1251 /*
1252  * input for yylex with alias expansion
1253  */
1254
1255 Source *
1256 pushs(int type, Area *areap)
1257 {
1258         Source *s;
1259
1260         s = alloc(sizeof(Source), areap);
1261         memset(s, 0, sizeof(Source));
1262         s->type = type;
1263         s->str = null;
1264         s->areap = areap;
1265         if (type == SFILE || type == SSTDIN)
1266                 XinitN(s->xs, 256, s->areap);
1267         return (s);
1268 }
1269
1270 static int
1271 getsc_uu(void)
1272 {
1273         Source *s = source;
1274         int c;
1275
1276         while ((c = *s->str++) == 0) {
1277                 /* return 0 for EOF by default */
1278                 s->str = NULL;
1279                 switch (s->type) {
1280                 case SEOF:
1281                         s->str = null;
1282                         return (0);
1283
1284                 case SSTDIN:
1285                 case SFILE:
1286                         getsc_line(s);
1287                         break;
1288
1289                 case SWSTR:
1290                         break;
1291
1292                 case SSTRING:
1293                 case SSTRINGCMDLINE:
1294                         break;
1295
1296                 case SWORDS:
1297                         s->start = s->str = *s->u.strv++;
1298                         s->type = SWORDSEP;
1299                         break;
1300
1301                 case SWORDSEP:
1302                         if (*s->u.strv == NULL) {
1303                                 s->start = s->str = "\n";
1304                                 s->type = SEOF;
1305                         } else {
1306                                 s->start = s->str = " ";
1307                                 s->type = SWORDS;
1308                         }
1309                         break;
1310
1311                 case SALIAS:
1312                         if (s->flags & SF_ALIASEND) {
1313                                 /* pass on an unused SF_ALIAS flag */
1314                                 source = s->next;
1315                                 source->flags |= s->flags & SF_ALIAS;
1316                                 s = source;
1317                         } else if (*s->u.tblp->val.s &&
1318                             (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1319                                 /* pop source stack */
1320                                 source = s = s->next;
1321                                 /*
1322                                  * Note that this alias ended with a
1323                                  * space, enabling alias expansion on
1324                                  * the following word.
1325                                  */
1326                                 s->flags |= SF_ALIAS;
1327                         } else {
1328                                 /*
1329                                  * At this point, we need to keep the current
1330                                  * alias in the source list so recursive
1331                                  * aliases can be detected and we also need to
1332                                  * return the next character. Do this by
1333                                  * temporarily popping the alias to get the
1334                                  * next character and then put it back in the
1335                                  * source list with the SF_ALIASEND flag set.
1336                                  */
1337                                 /* pop source stack */
1338                                 source = s->next;
1339                                 source->flags |= s->flags & SF_ALIAS;
1340                                 c = getsc_uu();
1341                                 if (c) {
1342                                         s->flags |= SF_ALIASEND;
1343                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1344                                         s->start = s->str = s->ugbuf;
1345                                         s->next = source;
1346                                         source = s;
1347                                 } else {
1348                                         s = source;
1349                                         /* avoid reading EOF twice */
1350                                         s->str = NULL;
1351                                         break;
1352                                 }
1353                         }
1354                         continue;
1355
1356                 case SREREAD:
1357                         if (s->start != s->ugbuf)
1358                                 /* yuck */
1359                                 afree(s->u.freeme, ATEMP);
1360                         source = s = s->next;
1361                         continue;
1362                 }
1363                 if (s->str == NULL) {
1364                         s->type = SEOF;
1365                         s->start = s->str = null;
1366                         return ('\0');
1367                 }
1368                 if (s->flags & SF_ECHO) {
1369                         shf_puts(s->str, shl_out);
1370                         shf_flush(shl_out);
1371                 }
1372         }
1373         return (c);
1374 }
1375
1376 static void
1377 getsc_line(Source *s)
1378 {
1379         char *xp = Xstring(s->xs, xp), *cp;
1380         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1381         bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1382
1383         /* Done here to ensure nothing odd happens when a timeout occurs */
1384         XcheckN(s->xs, xp, LINE);
1385         *xp = '\0';
1386         s->start = s->str = xp;
1387
1388         if (have_tty && ksh_tmout) {
1389                 ksh_tmout_state = TMOUT_READING;
1390                 alarm(ksh_tmout);
1391         }
1392         if (interactive)
1393                 change_winsz();
1394 #ifndef MKSH_NO_CMDLINE_EDITING
1395         if (have_tty && (
1396 #if !MKSH_S_NOVI
1397             Flag(FVI) ||
1398 #endif
1399             Flag(FEMACS) || Flag(FGMACS))) {
1400                 int nread;
1401
1402                 nread = x_read(xp);
1403                 if (nread < 0)
1404                         /* read error */
1405                         nread = 0;
1406                 xp[nread] = '\0';
1407                 xp += nread;
1408         } else
1409 #endif
1410           {
1411                 if (interactive)
1412                         pprompt(prompt, 0);
1413                 else
1414                         s->line++;
1415
1416                 while (/* CONSTCOND */ 1) {
1417                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1418
1419                         if (!p && shf_error(s->u.shf) &&
1420                             shf_errno(s->u.shf) == EINTR) {
1421                                 shf_clearerr(s->u.shf);
1422                                 if (trap)
1423                                         runtraps(0);
1424                                 continue;
1425                         }
1426                         if (!p || (xp = p, xp[-1] == '\n'))
1427                                 break;
1428                         /* double buffer size */
1429                         /* move past NUL so doubling works... */
1430                         xp++;
1431                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1432                         /* ...and move back again */
1433                         xp--;
1434                 }
1435                 /*
1436                  * flush any unwanted input so other programs/builtins
1437                  * can read it. Not very optimal, but less error prone
1438                  * than flushing else where, dealing with redirections,
1439                  * etc.
1440                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1441                  */
1442                 if (s->type == SSTDIN)
1443                         shf_flush(s->u.shf);
1444         }
1445         /*
1446          * XXX: temporary kludge to restore source after a
1447          * trap may have been executed.
1448          */
1449         source = s;
1450         if (have_tty && ksh_tmout) {
1451                 ksh_tmout_state = TMOUT_EXECUTING;
1452                 alarm(0);
1453         }
1454         cp = Xstring(s->xs, xp);
1455         rndpush(cp);
1456         s->start = s->str = cp;
1457         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1458         /* Note: if input is all nulls, this is not eof */
1459         if (Xlength(s->xs, xp) == 0) {
1460                 /* EOF */
1461                 if (s->type == SFILE)
1462                         shf_fdclose(s->u.shf);
1463                 s->str = NULL;
1464         } else if (interactive && *s->str &&
1465             (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1466                 histsave(&s->line, s->str, true, true);
1467 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1468         } else if (interactive && cur_prompt == PS1) {
1469                 cp = Xstring(s->xs, xp);
1470                 while (*cp && ctype(*cp, C_IFSWS))
1471                         ++cp;
1472                 if (!*cp)
1473                         histsync();
1474 #endif
1475         }
1476         if (interactive)
1477                 set_prompt(PS2, NULL);
1478 }
1479
1480 void
1481 set_prompt(int to, Source *s)
1482 {
1483         cur_prompt = (uint8_t)to;
1484
1485         switch (to) {
1486         /* command */
1487         case PS1:
1488                 /*
1489                  * Substitute ! and !! here, before substitutions are done
1490                  * so ! in expanded variables are not expanded.
1491                  * NOTE: this is not what AT&T ksh does (it does it after
1492                  * substitutions, POSIX doesn't say which is to be done.
1493                  */
1494                 {
1495                         struct shf *shf;
1496                         char * volatile ps1;
1497                         Area *saved_atemp;
1498
1499                         ps1 = str_val(global("PS1"));
1500                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1501                             SHF_WR | SHF_DYNAMIC, NULL);
1502                         while (*ps1)
1503                                 if (*ps1 != '!' || *++ps1 == '!')
1504                                         shf_putchar(*ps1++, shf);
1505                                 else
1506                                         shf_fprintf(shf, "%lu", s ?
1507                                             (unsigned long)s->line + 1 : 0UL);
1508                         ps1 = shf_sclose(shf);
1509                         saved_atemp = ATEMP;
1510                         newenv(E_ERRH);
1511                         if (kshsetjmp(e->jbuf)) {
1512                                 prompt = safe_prompt;
1513                                 /*
1514                                  * Don't print an error - assume it has already
1515                                  * been printed. Reason is we may have forked
1516                                  * to run a command and the child may be
1517                                  * unwinding its stack through this code as it
1518                                  * exits.
1519                                  */
1520                         } else {
1521                                 char *cp = substitute(ps1, 0);
1522                                 strdupx(prompt, cp, saved_atemp);
1523                         }
1524                         quitenv(NULL);
1525                 }
1526                 break;
1527         /* command continuation */
1528         case PS2:
1529                 prompt = str_val(global("PS2"));
1530                 break;
1531         }
1532 }
1533
1534 int
1535 pprompt(const char *cp, int ntruncate)
1536 {
1537         char delimiter = 0;
1538         bool doprint = (ntruncate != -1);
1539         bool indelimit = false;
1540         int columns = 0, lines = 0;
1541
1542         /*
1543          * Undocumented AT&T ksh feature:
1544          * If the second char in the prompt string is \r then the first
1545          * char is taken to be a non-printing delimiter and any chars
1546          * between two instances of the delimiter are not considered to
1547          * be part of the prompt length
1548          */
1549         if (*cp && cp[1] == '\r') {
1550                 delimiter = *cp;
1551                 cp += 2;
1552         }
1553         for (; *cp; cp++) {
1554                 if (indelimit && *cp != delimiter)
1555                         ;
1556                 else if (*cp == '\n' || *cp == '\r') {
1557                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1558                         columns = 0;
1559                 } else if (*cp == '\t') {
1560                         columns = (columns | 7) + 1;
1561                 } else if (*cp == '\b') {
1562                         if (columns > 0)
1563                                 columns--;
1564                 } else if (*cp == delimiter)
1565                         indelimit = !indelimit;
1566                 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1567                         const char *cp2;
1568                         columns += utf_widthadj(cp, &cp2);
1569                         if (doprint && (indelimit ||
1570                             (ntruncate < (x_cols * lines + columns))))
1571                                 shf_write(cp, cp2 - cp, shl_out);
1572                         cp = cp2 - /* loop increment */ 1;
1573                         continue;
1574                 } else
1575                         columns++;
1576                 if (doprint && (*cp != delimiter) &&
1577                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1578                         shf_putc(*cp, shl_out);
1579         }
1580         if (doprint)
1581                 shf_flush(shl_out);
1582         return (x_cols * lines + columns);
1583 }
1584
1585 /*
1586  * Read the variable part of a ${...} expression (i.e. up to but not
1587  * including the :[-+?=#%] or close-brace).
1588  */
1589 static char *
1590 get_brace_var(XString *wsp, char *wp)
1591 {
1592         char c;
1593         enum parse_state {
1594                 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1595                 PS_NUMBER, PS_VAR1
1596         } state = PS_INITIAL;
1597
1598         while (/* CONSTCOND */ 1) {
1599                 c = getsc();
1600                 /* State machine to figure out where the variable part ends. */
1601                 switch (state) {
1602                 case PS_INITIAL:
1603                         if (c == '#' || c == '!' || c == '%') {
1604                                 state = PS_SAW_HASH;
1605                                 break;
1606                         }
1607                         /* FALLTHROUGH */
1608                 case PS_SAW_HASH:
1609                         if (ksh_isalphx(c))
1610                                 state = PS_IDENT;
1611                         else if (ksh_isdigit(c))
1612                                 state = PS_NUMBER;
1613                         else if (c == '#') {
1614                                 if (state == PS_SAW_HASH) {
1615                                         char c2;
1616
1617                                         c2 = getsc();
1618                                         ungetsc(c2);
1619                                         if (c2 != /*{*/ '}') {
1620                                                 ungetsc(c);
1621                                                 goto out;
1622                                         }
1623                                 }
1624                                 state = PS_VAR1;
1625                         } else if (ctype(c, C_VAR1))
1626                                 state = PS_VAR1;
1627                         else
1628                                 goto out;
1629                         break;
1630                 case PS_IDENT:
1631                         if (!ksh_isalnux(c)) {
1632                                 if (c == '[') {
1633                                         char *tmp, *p;
1634
1635                                         if (!arraysub(&tmp))
1636                                                 yyerror("missing ]\n");
1637                                         *wp++ = c;
1638                                         for (p = tmp; *p; ) {
1639                                                 Xcheck(*wsp, wp);
1640                                                 *wp++ = *p++;
1641                                         }
1642                                         afree(tmp, ATEMP);
1643                                         /* the ] */
1644                                         c = getsc();
1645                                 }
1646                                 goto out;
1647                         }
1648                         break;
1649                 case PS_NUMBER:
1650                         if (!ksh_isdigit(c))
1651                                 goto out;
1652                         break;
1653                 case PS_VAR1:
1654                         goto out;
1655                 }
1656                 Xcheck(*wsp, wp);
1657                 *wp++ = c;
1658         }
1659  out:
1660         /* end of variable part */
1661         *wp++ = '\0';
1662         ungetsc(c);
1663         return (wp);
1664 }
1665
1666 /*
1667  * Save an array subscript - returns true if matching bracket found, false
1668  * if eof or newline was found.
1669  * (Returned string double null terminated)
1670  */
1671 static bool
1672 arraysub(char **strp)
1673 {
1674         XString ws;
1675         char *wp, c;
1676         /* we are just past the initial [ */
1677         unsigned int depth = 1;
1678
1679         Xinit(ws, wp, 32, ATEMP);
1680
1681         do {
1682                 c = getsc();
1683                 Xcheck(ws, wp);
1684                 *wp++ = c;
1685                 if (c == '[')
1686                         depth++;
1687                 else if (c == ']')
1688                         depth--;
1689         } while (depth > 0 && c && c != '\n');
1690
1691         *wp++ = '\0';
1692         *strp = Xclose(ws, wp);
1693
1694         return (tobool(depth == 0));
1695 }
1696
1697 /* Unget a char: handles case when we are already at the start of the buffer */
1698 static void
1699 ungetsc(int c)
1700 {
1701         struct sretrace_info *rp = retrace_info;
1702
1703         if (backslash_skip)
1704                 backslash_skip--;
1705         /* Don't unget EOF... */
1706         if (source->str == null && c == '\0')
1707                 return;
1708         while (rp) {
1709                 if (Xlength(rp->xs, rp->xp))
1710                         rp->xp--;
1711                 rp = rp->next;
1712         }
1713         ungetsc_i(c);
1714 }
1715 static void
1716 ungetsc_i(int c)
1717 {
1718         if (source->str > source->start)
1719                 source->str--;
1720         else {
1721                 Source *s;
1722
1723                 s = pushs(SREREAD, source->areap);
1724                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1725                 s->start = s->str = s->ugbuf;
1726                 s->next = source;
1727                 source = s;
1728         }
1729 }
1730
1731
1732 /* Called to get a char that isn't a \newline sequence. */
1733 static int
1734 getsc_bn(void)
1735 {
1736         int c, c2;
1737
1738         if (ignore_backslash_newline)
1739                 return (o_getsc_u());
1740
1741         if (backslash_skip == 1) {
1742                 backslash_skip = 2;
1743                 return (o_getsc_u());
1744         }
1745
1746         backslash_skip = 0;
1747
1748         while (/* CONSTCOND */ 1) {
1749                 c = o_getsc_u();
1750                 if (c == '\\') {
1751                         if ((c2 = o_getsc_u()) == '\n')
1752                                 /* ignore the \newline; get the next char... */
1753                                 continue;
1754                         ungetsc_i(c2);
1755                         backslash_skip = 1;
1756                 }
1757                 return (c);
1758         }
1759 }
1760
1761 void
1762 yyskiputf8bom(void)
1763 {
1764         int c;
1765
1766         if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1767                 ungetsc_i(c);
1768                 return;
1769         }
1770         if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1771                 ungetsc_i(c);
1772                 ungetsc_i(0xEF);
1773                 return;
1774         }
1775         if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1776                 ungetsc_i(c);
1777                 ungetsc_i(0xBB);
1778                 ungetsc_i(0xEF);
1779                 return;
1780         }
1781         UTFMODE |= 8;
1782 }
1783
1784 static Lex_state *
1785 push_state_i(State_info *si, Lex_state *old_end)
1786 {
1787         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1788
1789         news[0].ls_base = old_end;
1790         si->base = &news[0];
1791         si->end = &news[STATE_BSIZE];
1792         return (&news[1]);
1793 }
1794
1795 static Lex_state *
1796 pop_state_i(State_info *si, Lex_state *old_end)
1797 {
1798         Lex_state *old_base = si->base;
1799
1800         si->base = old_end->ls_base - STATE_BSIZE;
1801         si->end = old_end->ls_base;
1802
1803         afree(old_base, ATEMP);
1804
1805         return (si->base + STATE_BSIZE - 1);
1806 }
1807
1808 static int
1809 s_get(void)
1810 {
1811         return (getsc());
1812 }
1813
1814 static void
1815 s_put(int c)
1816 {
1817         ungetsc(c);
1818 }