OSDN Git Service

Updated mksh to ToT as of 12 October 2011.
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.45 2011/03/09 09:30:39 okan Exp $   */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
5  *      Thorsten Glaser <tg@mirbsd.org>
6  *
7  * Provided that these terms and disclaimer and all copyright notices
8  * are retained or reproduced in an accompanying document, permission
9  * is granted to deal in this work without restriction, including un-
10  * limited rights to use, publicly perform, distribute, sell, modify,
11  * merge, give away, or sublicence.
12  *
13  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
14  * the utmost extent permitted by applicable law, neither express nor
15  * implied; without malicious intent or gross negligence. In no event
16  * may a licensor, author or contributor be held liable for indirect,
17  * direct, other damage, loss, or other issues arising in any way out
18  * of dealing in the work, even if advised of the possibility of such
19  * damage or existence of a defect, except proven that it results out
20  * of said person's immediate fault when using the work as intended.
21  */
22
23 #include "sh.h"
24
25 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.156 2011/09/07 15:24:16 tg Exp $");
26
27 /*
28  * states while lexing word
29  */
30 #define SBASE           0       /* outside any lexical constructs */
31 #define SWORD           1       /* implicit quoting for substitute() */
32 #define SLETPAREN       2       /* inside (( )), implicit quoting */
33 #define SSQUOTE         3       /* inside '' */
34 #define SDQUOTE         4       /* inside "" */
35 #define SEQUOTE         5       /* inside $'' */
36 #define SBRACE          6       /* inside ${} */
37 #define SQBRACE         7       /* inside "${}" */
38 #define SBQUOTE         8       /* inside `` */
39 #define SASPAREN        9       /* inside $(( )) */
40 #define SHEREDELIM      10      /* parsing <<,<<- delimiter */
41 #define SHEREDQUOTE     11      /* parsing " in <<,<<- delimiter */
42 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
43 #define SADELIM         13      /* like SBASE, looking for delimiter */
44 #define SHERESTRING     14      /* parsing <<< string */
45 #define STBRACEKORN     15      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   16      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 int start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int s_get(void);
98 static void s_put(int);
99 static char *get_brace_var(XString *, char *);
100 static bool arraysub(char **);
101 static void gethere(bool);
102 static Lex_state *push_state_(State_info *, Lex_state *);
103 static Lex_state *pop_state_(State_info *, Lex_state *);
104
105 static int dopprompt(const char *, int, bool);
106 void yyskiputf8bom(void);
107
108 static int backslash_skip;
109 static int ignore_backslash_newline;
110 static struct sretrace_info *retrace_info;
111 short subshell_nesting_level = 0;
112
113 /* optimised getsc_bn() */
114 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
115                             !backslash_skip ? *source->str++ : getsc_bn())
116 /* optimised getsc_uu() */
117 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
118
119 /* retrace helper */
120 #define o_getsc_r(carg) {                               \
121         int cev = (carg);                               \
122         struct sretrace_info *rp = retrace_info;        \
123                                                         \
124         while (rp) {                                    \
125                 Xcheck(rp->xs, rp->xp);                 \
126                 *rp->xp++ = cev;                        \
127                 rp = rp->next;                          \
128         }                                               \
129                                                         \
130         return (cev);                                   \
131 }
132
133 #ifdef MKSH_SMALL
134 static int getsc(void);
135
136 static int
137 getsc(void)
138 {
139         o_getsc_r(o_getsc());
140 }
141 #else
142 static int getsc_r(int);
143
144 static int
145 getsc_r(int c)
146 {
147         o_getsc_r(c);
148 }
149
150 #define getsc()         getsc_r(o_getsc())
151 #endif
152
153 #define STATE_BSIZE     8
154
155 #define PUSH_STATE(s)   do {                                    \
156         if (++statep == state_info.end)                         \
157                 statep = push_state_(&state_info, statep);      \
158         state = statep->type = (s);                             \
159 } while (/* CONSTCOND */ 0)
160
161 #define POP_STATE()     do {                                    \
162         if (--statep == state_info.base)                        \
163                 statep = pop_state_(&state_info, statep);       \
164         state = statep->type;                                   \
165 } while (/* CONSTCOND */ 0)
166
167 #define PUSH_SRETRACE() do {                                    \
168         struct sretrace_info *ri;                               \
169                                                                 \
170         statep->ls_start = Xsavepos(ws, wp);                    \
171         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
172         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
173         ri->next = retrace_info;                                \
174         retrace_info = ri;                                      \
175 } while (/* CONSTCOND */ 0)
176
177 #define POP_SRETRACE()  do {                                    \
178         wp = Xrestpos(ws, wp, statep->ls_start);                \
179         *retrace_info->xp = '\0';                               \
180         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
181         dp = (void *)retrace_info;                              \
182         retrace_info = retrace_info->next;                      \
183         afree(dp, ATEMP);                                       \
184 } while (/* CONSTCOND */ 0)
185
186 /**
187  * Lexical analyser
188  *
189  * tokens are not regular expressions, they are LL(1).
190  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
191  * hence the state stack. Note "$(...)" are now parsed recursively.
192  */
193
194 int
195 yylex(int cf)
196 {
197         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
198         State_info state_info;
199         int c, c2, state;
200         size_t cz;
201         XString ws;             /* expandable output word */
202         char *wp;               /* output word pointer */
203         char *sp, *dp;
204
205  Again:
206         states[0].type = SINVALID;
207         states[0].ls_base = NULL;
208         statep = &states[1];
209         state_info.base = states;
210         state_info.end = &state_info.base[STATE_BSIZE];
211
212         Xinit(ws, wp, 64, ATEMP);
213
214         backslash_skip = 0;
215         ignore_backslash_newline = 0;
216
217         if (cf & ONEWORD)
218                 state = SWORD;
219         else if (cf & LETEXPR) {
220                 /* enclose arguments in (double) quotes */
221                 *wp++ = OQUOTE;
222                 state = SLETPAREN;
223                 statep->nparen = 0;
224         } else {
225                 /* normal lexing */
226                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
227                 while ((c = getsc()) == ' ' || c == '\t')
228                         ;
229                 if (c == '#') {
230                         ignore_backslash_newline++;
231                         while ((c = getsc()) != '\0' && c != '\n')
232                                 ;
233                         ignore_backslash_newline--;
234                 }
235                 ungetsc(c);
236         }
237         if (source->flags & SF_ALIAS) {
238                 /* trailing ' ' in alias definition */
239                 source->flags &= ~SF_ALIAS;
240                 cf |= ALIAS;
241         }
242
243         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
244         statep->type = state;
245
246         /* check for here string */
247         if (state == SHEREDELIM) {
248                 c = getsc();
249                 if (c == '<') {
250                         state = SHERESTRING;
251                         while ((c = getsc()) == ' ' || c == '\t')
252                                 ;
253                         ungetsc(c);
254                         c = '<';
255                         goto accept_nonword;
256                 }
257                 ungetsc(c);
258         }
259
260         /* collect non-special or quoted characters to form word */
261         while (!((c = getsc()) == 0 ||
262             ((state == SBASE || state == SHEREDELIM || state == SHERESTRING) &&
263             ctype(c, C_LEX1)))) {
264  accept_nonword:
265                 Xcheck(ws, wp);
266                 switch (state) {
267                 case SADELIM:
268                         if (c == '(')
269                                 statep->nparen++;
270                         else if (c == ')')
271                                 statep->nparen--;
272                         else if (statep->nparen == 0 &&
273                             (c == /*{*/ '}' || c == statep->ls_adelim.delimiter)) {
274                                 *wp++ = ADELIM;
275                                 *wp++ = c;
276                                 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
277                                         POP_STATE();
278                                 if (c == /*{*/ '}')
279                                         POP_STATE();
280                                 break;
281                         }
282                         /* FALLTHROUGH */
283                 case SBASE:
284                         if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
285                                 /* temporary */
286                                 *wp = EOS;
287                                 if (is_wdvarname(Xstring(ws, wp), false)) {
288                                         char *p, *tmp;
289
290                                         if (arraysub(&tmp)) {
291                                                 *wp++ = CHAR;
292                                                 *wp++ = c;
293                                                 for (p = tmp; *p; ) {
294                                                         Xcheck(ws, wp);
295                                                         *wp++ = CHAR;
296                                                         *wp++ = *p++;
297                                                 }
298                                                 afree(tmp, ATEMP);
299                                                 break;
300                                         } else {
301                                                 Source *s;
302
303                                                 s = pushs(SREREAD,
304                                                     source->areap);
305                                                 s->start = s->str =
306                                                     s->u.freeme = tmp;
307                                                 s->next = source;
308                                                 source = s;
309                                         }
310                                 }
311                                 *wp++ = CHAR;
312                                 *wp++ = c;
313                                 break;
314                         }
315                         /* FALLTHROUGH */
316  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
317                         if (c == '*' || c == '@' || c == '+' || c == '?' ||
318                             c == '!') {
319                                 c2 = getsc();
320                                 if (c2 == '(' /*)*/ ) {
321                                         *wp++ = OPAT;
322                                         *wp++ = c;
323                                         PUSH_STATE(SPATTERN);
324                                         break;
325                                 }
326                                 ungetsc(c2);
327                         }
328                         /* FALLTHROUGH */
329  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
330                         switch (c) {
331                         case '\\':
332  getsc_qchar:
333                                 if ((c = getsc())) {
334                                         /* trailing \ is lost */
335                                         *wp++ = QCHAR;
336                                         *wp++ = c;
337                                 }
338                                 break;
339                         case '\'':
340  open_ssquote:
341                                 *wp++ = OQUOTE;
342                                 ignore_backslash_newline++;
343                                 PUSH_STATE(SSQUOTE);
344                                 break;
345                         case '"':
346  open_sdquote:
347                                 *wp++ = OQUOTE;
348                                 PUSH_STATE(SDQUOTE);
349                                 break;
350                         default:
351                                 goto Subst;
352                         }
353                         break;
354
355  Subst:
356                         switch (c) {
357                         case '\\':
358                                 c = getsc();
359                                 switch (c) {
360                                 case '"':
361                                         if ((cf & HEREDOC))
362                                                 goto heredocquote;
363                                         /* FALLTHROUGH */
364                                 case '\\':
365                                 case '$': case '`':
366  store_qchar:
367                                         *wp++ = QCHAR;
368                                         *wp++ = c;
369                                         break;
370                                 default:
371  heredocquote:
372                                         Xcheck(ws, wp);
373                                         if (c) {
374                                                 /* trailing \ is lost */
375                                                 *wp++ = CHAR;
376                                                 *wp++ = '\\';
377                                                 *wp++ = CHAR;
378                                                 *wp++ = c;
379                                         }
380                                         break;
381                                 }
382                                 break;
383                         case '$':
384  subst_dollar:
385                                 c = getsc();
386                                 if (c == '(') /*)*/ {
387                                         c = getsc();
388                                         if (c == '(') /*)*/ {
389                                                 *wp++ = EXPRSUB;
390                                                 PUSH_STATE(SASPAREN);
391                                                 statep->nparen = 2;
392                                                 PUSH_SRETRACE();
393                                                 *retrace_info->xp++ = '(';
394                                         } else {
395                                                 ungetsc(c);
396  subst_command:
397                                                 sp = yyrecursive();
398                                                 cz = strlen(sp) + 1;
399                                                 XcheckN(ws, wp, cz);
400                                                 *wp++ = COMSUB;
401                                                 memcpy(wp, sp, cz);
402                                                 wp += cz;
403                                         }
404                                 } else if (c == '{') /*}*/ {
405                                         *wp++ = OSUBST;
406                                         *wp++ = '{'; /*}*/
407                                         wp = get_brace_var(&ws, wp);
408                                         c = getsc();
409                                         /* allow :# and :% (ksh88 compat) */
410                                         if (c == ':') {
411                                                 *wp++ = CHAR;
412                                                 *wp++ = c;
413                                                 c = getsc();
414                                                 if (c == ':') {
415                                                         *wp++ = CHAR;
416                                                         *wp++ = '0';
417                                                         *wp++ = ADELIM;
418                                                         *wp++ = ':';
419                                                         PUSH_STATE(SBRACE);
420                                                         PUSH_STATE(SADELIM);
421                                                         statep->ls_adelim.delimiter = ':';
422                                                         statep->ls_adelim.num = 1;
423                                                         statep->nparen = 0;
424                                                         break;
425                                                 } else if (ksh_isdigit(c) ||
426                                                     c == '('/*)*/ || c == ' ' ||
427                                                     /*XXX what else? */
428                                                     c == '$') {
429                                                         /* substring subst. */
430                                                         if (c != ' ') {
431                                                                 *wp++ = CHAR;
432                                                                 *wp++ = ' ';
433                                                         }
434                                                         ungetsc(c);
435                                                         PUSH_STATE(SBRACE);
436                                                         PUSH_STATE(SADELIM);
437                                                         statep->ls_adelim.delimiter = ':';
438                                                         statep->ls_adelim.num = 2;
439                                                         statep->nparen = 0;
440                                                         break;
441                                                 }
442                                         } else if (c == '/') {
443                                                 *wp++ = CHAR;
444                                                 *wp++ = c;
445                                                 if ((c = getsc()) == '/') {
446                                                         *wp++ = ADELIM;
447                                                         *wp++ = c;
448                                                 } else
449                                                         ungetsc(c);
450                                                 PUSH_STATE(SBRACE);
451                                                 PUSH_STATE(SADELIM);
452                                                 statep->ls_adelim.delimiter = '/';
453                                                 statep->ls_adelim.num = 1;
454                                                 statep->nparen = 0;
455                                                 break;
456                                         }
457                                         /*
458                                          * If this is a trim operation,
459                                          * treat (,|,) specially in STBRACE.
460                                          */
461                                         if (ctype(c, C_SUBOP2)) {
462                                                 ungetsc(c);
463                                                 if (Flag(FSH))
464                                                         PUSH_STATE(STBRACEBOURNE);
465                                                 else
466                                                         PUSH_STATE(STBRACEKORN);
467                                         } else {
468                                                 ungetsc(c);
469                                                 if (state == SDQUOTE)
470                                                         PUSH_STATE(SQBRACE);
471                                                 else
472                                                         PUSH_STATE(SBRACE);
473                                         }
474                                 } else if (ksh_isalphx(c)) {
475                                         *wp++ = OSUBST;
476                                         *wp++ = 'X';
477                                         do {
478                                                 Xcheck(ws, wp);
479                                                 *wp++ = c;
480                                                 c = getsc();
481                                         } while (ksh_isalnux(c));
482                                         *wp++ = '\0';
483                                         *wp++ = CSUBST;
484                                         *wp++ = 'X';
485                                         ungetsc(c);
486                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
487                                         Xcheck(ws, wp);
488                                         *wp++ = OSUBST;
489                                         *wp++ = 'X';
490                                         *wp++ = c;
491                                         *wp++ = '\0';
492                                         *wp++ = CSUBST;
493                                         *wp++ = 'X';
494                                 } else if (c == '\'' && (state == SBASE)) {
495                                         /* XXX which other states are valid? */
496                                         *wp++ = OQUOTE;
497                                         ignore_backslash_newline++;
498                                         PUSH_STATE(SEQUOTE);
499                                         statep->ls_bool = false;
500                                         break;
501                                 } else if (c == '"' && (state == SBASE)) {
502                                         /* XXX which other states are valid? */
503                                         goto DEQUOTE;
504                                 } else {
505                                         *wp++ = CHAR;
506                                         *wp++ = '$';
507  DEQUOTE:
508                                         ungetsc(c);
509                                 }
510                                 break;
511                         case '`':
512  subst_gravis:
513                                 PUSH_STATE(SBQUOTE);
514                                 *wp++ = COMSUB;
515                                 /*
516                                  * Need to know if we are inside double quotes
517                                  * since sh/AT&T-ksh translate the \" to " in
518                                  * "`...\"...`".
519                                  * This is not done in POSIX mode (section
520                                  * 3.2.3, Double Quotes: "The backquote shall
521                                  * retain its special meaning introducing the
522                                  * other form of command substitution (see
523                                  * 3.6.3). The portion of the quoted string
524                                  * from the initial backquote and the
525                                  * characters up to the next backquote that
526                                  * is not preceded by a backslash (having
527                                  * escape characters removed) defines that
528                                  * command whose output replaces `...` when
529                                  * the word is expanded."
530                                  * Section 3.6.3, Command Substitution:
531                                  * "Within the backquoted style of command
532                                  * substitution, backslash shall retain its
533                                  * literal meaning, except when followed by
534                                  * $ ` \.").
535                                  */
536                                 statep->ls_bool = false;
537                                 s2 = statep;
538                                 base = state_info.base;
539                                 while (/* CONSTCOND */ 1) {
540                                         for (; s2 != base; s2--) {
541                                                 if (s2->type == SDQUOTE) {
542                                                         statep->ls_bool = true;
543                                                         break;
544                                                 }
545                                         }
546                                         if (s2 != base)
547                                                 break;
548                                         if (!(s2 = s2->ls_base))
549                                                 break;
550                                         base = s2-- - STATE_BSIZE;
551                                 }
552                                 break;
553                         case QCHAR:
554                                 if (cf & LQCHAR) {
555                                         *wp++ = QCHAR;
556                                         *wp++ = getsc();
557                                         break;
558                                 }
559                                 /* FALLTHROUGH */
560                         default:
561  store_char:
562                                 *wp++ = CHAR;
563                                 *wp++ = c;
564                         }
565                         break;
566
567                 case SEQUOTE:
568                         if (c == '\'') {
569                                 POP_STATE();
570                                 *wp++ = CQUOTE;
571                                 ignore_backslash_newline--;
572                         } else if (c == '\\') {
573                                 if ((c2 = unbksl(true, s_get, s_put)) == -1)
574                                         c2 = s_get();
575                                 if (c2 == 0)
576                                         statep->ls_bool = true;
577                                 if (!statep->ls_bool) {
578                                         char ts[4];
579
580                                         if ((unsigned int)c2 < 0x100) {
581                                                 *wp++ = QCHAR;
582                                                 *wp++ = c2;
583                                         } else {
584                                                 cz = utf_wctomb(ts, c2 - 0x100);
585                                                 ts[cz] = 0;
586                                                 for (cz = 0; ts[cz]; ++cz) {
587                                                         *wp++ = QCHAR;
588                                                         *wp++ = ts[cz];
589                                                 }
590                                         }
591                                 }
592                         } else if (!statep->ls_bool) {
593                                 *wp++ = QCHAR;
594                                 *wp++ = c;
595                         }
596                         break;
597
598                 case SSQUOTE:
599                         if (c == '\'') {
600                                 POP_STATE();
601                                 *wp++ = CQUOTE;
602                                 ignore_backslash_newline--;
603                         } else {
604                                 *wp++ = QCHAR;
605                                 *wp++ = c;
606                         }
607                         break;
608
609                 case SDQUOTE:
610                         if (c == '"') {
611                                 POP_STATE();
612                                 *wp++ = CQUOTE;
613                         } else
614                                 goto Subst;
615                         break;
616
617                 /* $(( ... )) */
618                 case SASPAREN:
619                         if (c == '(')
620                                 statep->nparen++;
621                         else if (c == ')') {
622                                 statep->nparen--;
623                                 if (statep->nparen == 1) {
624                                         /* end of EXPRSUB */
625                                         POP_SRETRACE();
626                                         POP_STATE();
627
628                                         if ((c2 = getsc()) == /*(*/ ')') {
629                                                 cz = strlen(sp) - 2;
630                                                 XcheckN(ws, wp, cz);
631                                                 memcpy(wp, sp + 1, cz);
632                                                 wp += cz;
633                                                 afree(sp, ATEMP);
634                                                 *wp++ = '\0';
635                                                 break;
636                                         } else {
637                                                 Source *s;
638
639                                                 ungetsc(c2);
640                                                 /*
641                                                  * mismatched parenthesis -
642                                                  * assume we were really
643                                                  * parsing a $(...) expression
644                                                  */
645                                                 --wp;
646                                                 s = pushs(SREREAD,
647                                                     source->areap);
648                                                 s->start = s->str =
649                                                     s->u.freeme = sp;
650                                                 s->next = source;
651                                                 source = s;
652                                                 goto subst_command;
653                                         }
654                                 }
655                         }
656                         /* reuse existing state machine */
657                         goto Sbase2;
658
659                 case SQBRACE:
660                         if (c == '\\') {
661                                 /*
662                                  * perform POSIX "quote removal" if the back-
663                                  * slash is "special", i.e. same cases as the
664                                  * {case '\\':} in Subst: plus closing brace;
665                                  * in mksh code "quote removal" on '\c' means
666                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
667                                  * emitted (in heredocquote:)
668                                  */
669                                 if ((c = getsc()) == '"' || c == '\\' ||
670                                     c == '$' || c == '`' || c == /*{*/'}')
671                                         goto store_qchar;
672                                 goto heredocquote;
673                         }
674                         goto common_SQBRACE;
675
676                 case SBRACE:
677                         if (c == '\'')
678                                 goto open_ssquote;
679                         else if (c == '\\')
680                                 goto getsc_qchar;
681  common_SQBRACE:
682                         if (c == '"')
683                                 goto open_sdquote;
684                         else if (c == '$')
685                                 goto subst_dollar;
686                         else if (c == '`')
687                                 goto subst_gravis;
688                         else if (c != /*{*/ '}')
689                                 goto store_char;
690                         POP_STATE();
691                         *wp++ = CSUBST;
692                         *wp++ = /*{*/ '}';
693                         break;
694
695                 /* Same as SBASE, except (,|,) treated specially */
696                 case STBRACEKORN:
697                         if (c == '|')
698                                 *wp++ = SPAT;
699                         else if (c == '(') {
700                                 *wp++ = OPAT;
701                                 /* simile for @ */
702                                 *wp++ = ' ';
703                                 PUSH_STATE(SPATTERN);
704                         } else /* FALLTHROUGH */
705                 case STBRACEBOURNE:
706                           if (c == /*{*/ '}') {
707                                 POP_STATE();
708                                 *wp++ = CSUBST;
709                                 *wp++ = /*{*/ '}';
710                         } else
711                                 goto Sbase1;
712                         break;
713
714                 case SBQUOTE:
715                         if (c == '`') {
716                                 *wp++ = 0;
717                                 POP_STATE();
718                         } else if (c == '\\') {
719                                 switch (c = getsc()) {
720                                 case 0:
721                                         /* trailing \ is lost */
722                                         break;
723                                 case '\\':
724                                 case '$': case '`':
725                                         *wp++ = c;
726                                         break;
727                                 case '"':
728                                         if (statep->ls_bool) {
729                                                 *wp++ = c;
730                                                 break;
731                                         }
732                                         /* FALLTHROUGH */
733                                 default:
734                                         *wp++ = '\\';
735                                         *wp++ = c;
736                                         break;
737                                 }
738                         } else
739                                 *wp++ = c;
740                         break;
741
742                 /* ONEWORD */
743                 case SWORD:
744                         goto Subst;
745
746                 /* LETEXPR: (( ... )) */
747                 case SLETPAREN:
748                         if (c == /*(*/ ')') {
749                                 if (statep->nparen > 0)
750                                         --statep->nparen;
751                                 else if ((c2 = getsc()) == /*(*/ ')') {
752                                         c = 0;
753                                         *wp++ = CQUOTE;
754                                         goto Done;
755                                 } else {
756                                         Source *s;
757
758                                         ungetsc(c2);
759                                         /*
760                                          * mismatched parenthesis -
761                                          * assume we were really
762                                          * parsing a (...) expression
763                                          */
764                                         *wp = EOS;
765                                         sp = Xstring(ws, wp);
766                                         dp = wdstrip(sp, WDS_KEEPQ);
767                                         s = pushs(SREREAD, source->areap);
768                                         s->start = s->str = s->u.freeme = dp;
769                                         s->next = source;
770                                         source = s;
771                                         return ('('/*)*/);
772                                 }
773                         } else if (c == '(')
774                                 /*
775                                  * parentheses inside quotes and
776                                  * backslashes are lost, but AT&T ksh
777                                  * doesn't count them either
778                                  */
779                                 ++statep->nparen;
780                         goto Sbase2;
781
782                 /* <<< delimiter */
783                 case SHERESTRING:
784                         if (c == '\\') {
785                                 c = getsc();
786                                 if (c) {
787                                         /* trailing \ is lost */
788                                         *wp++ = QCHAR;
789                                         *wp++ = c;
790                                 }
791                         } else if (c == '$') {
792                                 if ((c2 = getsc()) == '\'') {
793                                         PUSH_STATE(SEQUOTE);
794                                         statep->ls_bool = false;
795                                         goto sherestring_quoted;
796                                 } else if (c2 == '"')
797                                         goto sherestring_dquoted;
798                                 ungetsc(c2);
799                                 goto sherestring_regular;
800                         } else if (c == '\'') {
801                                 PUSH_STATE(SSQUOTE);
802  sherestring_quoted:
803                                 *wp++ = OQUOTE;
804                                 ignore_backslash_newline++;
805                         } else if (c == '"') {
806  sherestring_dquoted:
807                                 state = statep->type = SHEREDQUOTE;
808                                 *wp++ = OQUOTE;
809                                 /* just don't IFS split; no quoting mode */
810                         } else {
811  sherestring_regular:
812                                 *wp++ = CHAR;
813                                 *wp++ = c;
814                         }
815                         break;
816
817                 /* <<,<<- delimiter */
818                 case SHEREDELIM:
819                         /*
820                          * XXX chuck this state (and the next) - use
821                          * the existing states ($ and \`...` should be
822                          * stripped of their specialness after the
823                          * fact).
824                          */
825                         /*
826                          * here delimiters need a special case since
827                          * $ and `...` are not to be treated specially
828                          */
829                         if (c == '\\') {
830                                 c = getsc();
831                                 if (c) {
832                                         /* trailing \ is lost */
833                                         *wp++ = QCHAR;
834                                         *wp++ = c;
835                                 }
836                         } else if (c == '$') {
837                                 if ((c2 = getsc()) == '\'') {
838                                         PUSH_STATE(SEQUOTE);
839                                         statep->ls_bool = false;
840                                         goto sheredelim_quoted;
841                                 } else if (c2 == '"')
842                                         goto sheredelim_dquoted;
843                                 ungetsc(c2);
844                                 goto sheredelim_regular;
845                         } else if (c == '\'') {
846                                 PUSH_STATE(SSQUOTE);
847  sheredelim_quoted:
848                                 *wp++ = OQUOTE;
849                                 ignore_backslash_newline++;
850                         } else if (c == '"') {
851  sheredelim_dquoted:
852                                 state = statep->type = SHEREDQUOTE;
853                                 *wp++ = OQUOTE;
854                         } else {
855  sheredelim_regular:
856                                 *wp++ = CHAR;
857                                 *wp++ = c;
858                         }
859                         break;
860
861                 /* " in <<,<<- delimiter */
862                 case SHEREDQUOTE:
863                         if (c == '"') {
864                                 *wp++ = CQUOTE;
865                                 state = statep->type =
866                                     /* dp[1] == '<' means here string */
867                                     Xstring(ws, wp)[1] == '<' ?
868                                     SHERESTRING : SHEREDELIM;
869                         } else {
870                                 if (c == '\\') {
871                                         switch (c = getsc()) {
872                                         case 0:
873                                                 /* trailing \ is lost */
874                                         case '\\':
875                                         case '"':
876                                         case '$':
877                                         case '`':
878                                                 break;
879                                         default:
880                                                 *wp++ = CHAR;
881                                                 *wp++ = '\\';
882                                                 break;
883                                         }
884                                 }
885                                 *wp++ = CHAR;
886                                 *wp++ = c;
887                         }
888                         break;
889
890                 /* in *(...|...) pattern (*+?@!) */
891                 case SPATTERN:
892                         if (c == /*(*/ ')') {
893                                 *wp++ = CPAT;
894                                 POP_STATE();
895                         } else if (c == '|') {
896                                 *wp++ = SPAT;
897                         } else if (c == '(') {
898                                 *wp++ = OPAT;
899                                 /* simile for @ */
900                                 *wp++ = ' ';
901                                 PUSH_STATE(SPATTERN);
902                         } else
903                                 goto Sbase1;
904                         break;
905                 }
906         }
907  Done:
908         Xcheck(ws, wp);
909         if (statep != &states[1])
910                 /* XXX figure out what is missing */
911                 yyerror("no closing quote\n");
912
913         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
914         if (state == SHEREDELIM || state == SHERESTRING)
915                 state = SBASE;
916
917         dp = Xstring(ws, wp);
918         if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
919                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
920
921                 if (Xlength(ws, wp) == 0)
922                         iop->unit = c == '<' ? 0 : 1;
923                 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
924                         if (dp[c2] != CHAR)
925                                 goto no_iop;
926                         if (!ksh_isdigit(dp[c2 + 1]))
927                                 goto no_iop;
928                         iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
929                 }
930
931                 if (iop->unit >= FDBASE)
932                         goto no_iop;
933
934                 if (c == '&') {
935                         if ((c2 = getsc()) != '>') {
936                                 ungetsc(c2);
937                                 goto no_iop;
938                         }
939                         c = c2;
940                         iop->flag = IOBASH;
941                 } else
942                         iop->flag = 0;
943
944                 c2 = getsc();
945                 /* <<, >>, <> are ok, >< is not */
946                 if (c == c2 || (c == '<' && c2 == '>')) {
947                         iop->flag |= c == c2 ?
948                             (c == '>' ? IOCAT : IOHERE) : IORDWR;
949                         if (iop->flag == IOHERE) {
950                                 if ((c2 = getsc()) == '-') {
951                                         iop->flag |= IOSKIP;
952                                         c2 = getsc();
953                                 } else if (c2 == '<')
954                                         iop->flag |= IOHERESTR;
955                                 ungetsc(c2);
956                                 if (c2 == '\n')
957                                         iop->flag |= IONDELIM;
958                         }
959                 } else if (c2 == '&')
960                         iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
961                 else {
962                         iop->flag |= c == '>' ? IOWRITE : IOREAD;
963                         if (c == '>' && c2 == '|')
964                                 iop->flag |= IOCLOB;
965                         else
966                                 ungetsc(c2);
967                 }
968
969                 iop->name = NULL;
970                 iop->delim = NULL;
971                 iop->heredoc = NULL;
972                 /* free word */
973                 Xfree(ws, wp);
974                 yylval.iop = iop;
975                 return (REDIR);
976  no_iop:
977                 afree(iop, ATEMP);
978         }
979
980         if (wp == dp && state == SBASE) {
981                 /* free word */
982                 Xfree(ws, wp);
983                 /* no word, process LEX1 character */
984                 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
985                         if ((c2 = getsc()) == c)
986                                 c = (c == ';') ? BREAK :
987                                     (c == '|') ? LOGOR :
988                                     (c == '&') ? LOGAND :
989                                     /* c == '(' ) */ MDPAREN;
990                         else if (c == '|' && c2 == '&')
991                                 c = COPROC;
992                         else if (c == ';' && c2 == '|')
993                                 c = BRKEV;
994                         else if (c == ';' && c2 == '&')
995                                 c = BRKFT;
996                         else
997                                 ungetsc(c2);
998 #ifndef MKSH_SMALL
999                         if (c == BREAK) {
1000                                 if ((c2 = getsc()) == '&')
1001                                         c = BRKEV;
1002                                 else
1003                                         ungetsc(c2);
1004                         }
1005 #endif
1006                 } else if (c == '\n') {
1007                         gethere(false);
1008                         if (cf & CONTIN)
1009                                 goto Again;
1010                 } else if (c == '\0')
1011                         /* need here strings at EOF */
1012                         gethere(true);
1013                 return (c);
1014         }
1015
1016         /* terminate word */
1017         *wp++ = EOS;
1018         yylval.cp = Xclose(ws, wp);
1019         if (state == SWORD || state == SLETPAREN
1020             /* XXX ONEWORD? */)
1021                 return (LWORD);
1022
1023         /* unget terminator */
1024         ungetsc(c);
1025
1026         /*
1027          * note: the alias-vs-function code below depends on several
1028          * interna: starting from here, source->str is not modified;
1029          * the way getsc() and ungetsc() operate; etc.
1030          */
1031
1032         /* copy word to unprefixed string ident */
1033         sp = yylval.cp;
1034         dp = ident;
1035         if ((cf & HEREDELIM) && (sp[1] == '<'))
1036                 while (dp < ident+IDENT) {
1037                         if ((c = *sp++) == CHAR)
1038                                 *dp++ = *sp++;
1039                         else if ((c != OQUOTE) && (c != CQUOTE))
1040                                 break;
1041                 }
1042         else
1043                 while (dp < ident+IDENT && (c = *sp++) == CHAR)
1044                         *dp++ = *sp++;
1045         /* Make sure the ident array stays '\0' padded */
1046         memset(dp, 0, (ident+IDENT) - dp + 1);
1047         if (c != EOS)
1048                 /* word is not unquoted */
1049                 *ident = '\0';
1050
1051         if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1052                 struct tbl *p;
1053                 uint32_t h = hash(ident);
1054
1055                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1056                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1057                     p->val.i == /*{*/ '}')) {
1058                         afree(yylval.cp, ATEMP);
1059                         return (p->val.i);
1060                 }
1061                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1062                     (p->flag & ISSET)) {
1063                         /*
1064                          * this still points to the same character as the
1065                          * ungetsc'd terminator from above
1066                          */
1067                         const char *cp = source->str;
1068
1069                         /* prefer POSIX but not Korn functions over aliases */
1070                         while (*cp == ' ' || *cp == '\t')
1071                                 /*
1072                                  * this is like getsc() without skipping
1073                                  * over Source boundaries (including not
1074                                  * parsing ungetsc'd characters that got
1075                                  * pushed into an SREREAD) which is what
1076                                  * we want here anyway: find out whether
1077                                  * the alias name is followed by a POSIX
1078                                  * function definition (only the opening
1079                                  * parenthesis is checked though)
1080                                  */
1081                                 ++cp;
1082                         /* prefer functions over aliases */
1083                         if (cp[0] != '(' || cp[1] != ')') {
1084                                 Source *s = source;
1085
1086                                 while (s && (s->flags & SF_HASALIAS))
1087                                         if (s->u.tblp == p)
1088                                                 return (LWORD);
1089                                         else
1090                                                 s = s->next;
1091                                 /* push alias expansion */
1092                                 s = pushs(SALIAS, source->areap);
1093                                 s->start = s->str = p->val.s;
1094                                 s->u.tblp = p;
1095                                 s->flags |= SF_HASALIAS;
1096                                 s->next = source;
1097                                 if (source->type == SEOF) {
1098                                         /* prevent infinite recursion at EOS */
1099                                         source->u.tblp = p;
1100                                         source->flags |= SF_HASALIAS;
1101                                 }
1102                                 source = s;
1103                                 afree(yylval.cp, ATEMP);
1104                                 goto Again;
1105                         }
1106                 }
1107         }
1108
1109         return (LWORD);
1110 }
1111
1112 static void
1113 gethere(bool iseof)
1114 {
1115         struct ioword **p;
1116
1117         for (p = heres; p < herep; p++)
1118                 if (iseof && !((*p)->flag & IOHERESTR))
1119                         /* only here strings at EOF */
1120                         return;
1121                 else
1122                         readhere(*p);
1123         herep = heres;
1124 }
1125
1126 /*
1127  * read "<<word" text into temp file
1128  */
1129
1130 static void
1131 readhere(struct ioword *iop)
1132 {
1133         int c;
1134         const char *eof, *eofp;
1135         XString xs;
1136         char *xp;
1137         int xpos;
1138
1139         if (iop->flag & IOHERESTR) {
1140                 /* process the here string */
1141                 iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
1142                 xpos = strlen(xp) - 1;
1143                 memmove(xp, xp + 1, xpos);
1144                 xp[xpos] = '\n';
1145                 return;
1146         }
1147
1148         eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
1149
1150         if (!(iop->flag & IOEVAL))
1151                 ignore_backslash_newline++;
1152
1153         Xinit(xs, xp, 256, ATEMP);
1154
1155  heredoc_read_line:
1156         /* beginning of line */
1157         eofp = eof;
1158         xpos = Xsavepos(xs, xp);
1159         if (iop->flag & IOSKIP) {
1160                 /* skip over leading tabs */
1161                 while ((c = getsc()) == '\t')
1162                         /* nothing */;
1163                 goto heredoc_parse_char;
1164         }
1165  heredoc_read_char:
1166         c = getsc();
1167  heredoc_parse_char:
1168         /* compare with here document marker */
1169         if (!*eofp) {
1170                 /* end of here document marker, what to do? */
1171                 switch (c) {
1172                 case /*(*/ ')':
1173                         if (!subshell_nesting_level)
1174                                 /*-
1175                                  * not allowed outside $(...) or (...)
1176                                  * => mismatch
1177                                  */
1178                                 break;
1179                         /* allow $(...) or (...) to close here */
1180                         ungetsc(/*(*/ ')');
1181                         /* FALLTHROUGH */
1182                 case 0:
1183                         /*
1184                          * Allow EOF here to commands without trailing
1185                          * newlines (mksh -c '...') will work as well.
1186                          */
1187                 case '\n':
1188                         /* Newline terminates here document marker */
1189                         goto heredoc_found_terminator;
1190                 }
1191         } else if (c == *eofp++)
1192                 /* store; then read and compare next character */
1193                 goto heredoc_store_and_loop;
1194         /* nope, mismatch; read until end of line */
1195         while (c != '\n') {
1196                 if (!c)
1197                         /* oops, reached EOF */
1198                         yyerror("%s '%s' unclosed\n", "here document", eof);
1199                 /* store character */
1200                 Xcheck(xs, xp);
1201                 Xput(xs, xp, c);
1202                 /* read next character */
1203                 c = getsc();
1204         }
1205         /* we read a newline as last character */
1206  heredoc_store_and_loop:
1207         /* store character */
1208         Xcheck(xs, xp);
1209         Xput(xs, xp, c);
1210         if (c == '\n')
1211                 goto heredoc_read_line;
1212         goto heredoc_read_char;
1213
1214  heredoc_found_terminator:
1215         /* jump back to saved beginning of line */
1216         xp = Xrestpos(xs, xp, xpos);
1217         /* terminate, close and store */
1218         Xput(xs, xp, '\0');
1219         iop->heredoc = Xclose(xs, xp);
1220
1221         if (!(iop->flag & IOEVAL))
1222                 ignore_backslash_newline--;
1223 }
1224
1225 void
1226 yyerror(const char *fmt, ...)
1227 {
1228         va_list va;
1229
1230         /* pop aliases and re-reads */
1231         while (source->type == SALIAS || source->type == SREREAD)
1232                 source = source->next;
1233         /* zap pending input */
1234         source->str = null;
1235
1236         error_prefix(true);
1237         va_start(va, fmt);
1238         shf_vfprintf(shl_out, fmt, va);
1239         va_end(va);
1240         errorfz();
1241 }
1242
1243 /*
1244  * input for yylex with alias expansion
1245  */
1246
1247 Source *
1248 pushs(int type, Area *areap)
1249 {
1250         Source *s;
1251
1252         s = alloc(sizeof(Source), areap);
1253         memset(s, 0, sizeof(Source));
1254         s->type = type;
1255         s->str = null;
1256         s->areap = areap;
1257         if (type == SFILE || type == SSTDIN)
1258                 XinitN(s->xs, 256, s->areap);
1259         return (s);
1260 }
1261
1262 static int
1263 getsc_uu(void)
1264 {
1265         Source *s = source;
1266         int c;
1267
1268         while ((c = *s->str++) == 0) {
1269                 /* return 0 for EOF by default */
1270                 s->str = NULL;
1271                 switch (s->type) {
1272                 case SEOF:
1273                         s->str = null;
1274                         return (0);
1275
1276                 case SSTDIN:
1277                 case SFILE:
1278                         getsc_line(s);
1279                         break;
1280
1281                 case SWSTR:
1282                         break;
1283
1284                 case SSTRING:
1285                         break;
1286
1287                 case SWORDS:
1288                         s->start = s->str = *s->u.strv++;
1289                         s->type = SWORDSEP;
1290                         break;
1291
1292                 case SWORDSEP:
1293                         if (*s->u.strv == NULL) {
1294                                 s->start = s->str = "\n";
1295                                 s->type = SEOF;
1296                         } else {
1297                                 s->start = s->str = " ";
1298                                 s->type = SWORDS;
1299                         }
1300                         break;
1301
1302                 case SALIAS:
1303                         if (s->flags & SF_ALIASEND) {
1304                                 /* pass on an unused SF_ALIAS flag */
1305                                 source = s->next;
1306                                 source->flags |= s->flags & SF_ALIAS;
1307                                 s = source;
1308                         } else if (*s->u.tblp->val.s &&
1309                             (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1310                                 /* pop source stack */
1311                                 source = s = s->next;
1312                                 /*
1313                                  * Note that this alias ended with a
1314                                  * space, enabling alias expansion on
1315                                  * the following word.
1316                                  */
1317                                 s->flags |= SF_ALIAS;
1318                         } else {
1319                                 /*
1320                                  * At this point, we need to keep the current
1321                                  * alias in the source list so recursive
1322                                  * aliases can be detected and we also need to
1323                                  * return the next character. Do this by
1324                                  * temporarily popping the alias to get the
1325                                  * next character and then put it back in the
1326                                  * source list with the SF_ALIASEND flag set.
1327                                  */
1328                                 /* pop source stack */
1329                                 source = s->next;
1330                                 source->flags |= s->flags & SF_ALIAS;
1331                                 c = getsc_uu();
1332                                 if (c) {
1333                                         s->flags |= SF_ALIASEND;
1334                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1335                                         s->start = s->str = s->ugbuf;
1336                                         s->next = source;
1337                                         source = s;
1338                                 } else {
1339                                         s = source;
1340                                         /* avoid reading EOF twice */
1341                                         s->str = NULL;
1342                                         break;
1343                                 }
1344                         }
1345                         continue;
1346
1347                 case SREREAD:
1348                         if (s->start != s->ugbuf)
1349                                 /* yuck */
1350                                 afree(s->u.freeme, ATEMP);
1351                         source = s = s->next;
1352                         continue;
1353                 }
1354                 if (s->str == NULL) {
1355                         s->type = SEOF;
1356                         s->start = s->str = null;
1357                         return ('\0');
1358                 }
1359                 if (s->flags & SF_ECHO) {
1360                         shf_puts(s->str, shl_out);
1361                         shf_flush(shl_out);
1362                 }
1363         }
1364         return (c);
1365 }
1366
1367 static void
1368 getsc_line(Source *s)
1369 {
1370         char *xp = Xstring(s->xs, xp), *cp;
1371         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1372         int have_tty = interactive && (s->flags & SF_TTY);
1373
1374         /* Done here to ensure nothing odd happens when a timeout occurs */
1375         XcheckN(s->xs, xp, LINE);
1376         *xp = '\0';
1377         s->start = s->str = xp;
1378
1379         if (have_tty && ksh_tmout) {
1380                 ksh_tmout_state = TMOUT_READING;
1381                 alarm(ksh_tmout);
1382         }
1383         if (interactive)
1384                 change_winsz();
1385         if (have_tty && (
1386 #if !MKSH_S_NOVI
1387             Flag(FVI) ||
1388 #endif
1389             Flag(FEMACS) || Flag(FGMACS))) {
1390                 int nread;
1391
1392                 nread = x_read(xp, LINE);
1393                 if (nread < 0)
1394                         /* read error */
1395                         nread = 0;
1396                 xp[nread] = '\0';
1397                 xp += nread;
1398         } else {
1399                 if (interactive)
1400                         pprompt(prompt, 0);
1401                 else
1402                         s->line++;
1403
1404                 while (/* CONSTCOND */ 1) {
1405                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1406
1407                         if (!p && shf_error(s->u.shf) &&
1408                             shf_errno(s->u.shf) == EINTR) {
1409                                 shf_clearerr(s->u.shf);
1410                                 if (trap)
1411                                         runtraps(0);
1412                                 continue;
1413                         }
1414                         if (!p || (xp = p, xp[-1] == '\n'))
1415                                 break;
1416                         /* double buffer size */
1417                         /* move past NUL so doubling works... */
1418                         xp++;
1419                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1420                         /* ...and move back again */
1421                         xp--;
1422                 }
1423                 /*
1424                  * flush any unwanted input so other programs/builtins
1425                  * can read it. Not very optimal, but less error prone
1426                  * than flushing else where, dealing with redirections,
1427                  * etc.
1428                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1429                  */
1430                 if (s->type == SSTDIN)
1431                         shf_flush(s->u.shf);
1432         }
1433         /*
1434          * XXX: temporary kludge to restore source after a
1435          * trap may have been executed.
1436          */
1437         source = s;
1438         if (have_tty && ksh_tmout) {
1439                 ksh_tmout_state = TMOUT_EXECUTING;
1440                 alarm(0);
1441         }
1442         cp = Xstring(s->xs, xp);
1443 #ifndef MKSH_SMALL
1444         if (interactive && *cp == '!' && cur_prompt == PS1) {
1445                 int linelen;
1446
1447                 linelen = Xlength(s->xs, xp);
1448                 XcheckN(s->xs, xp, Zfc_e_dash + /* NUL */ 1);
1449                 /* reload after potential realloc */
1450                 cp = Xstring(s->xs, xp);
1451                 /* change initial '!' into space */
1452                 *cp = ' ';
1453                 /* NUL terminate the current string */
1454                 *xp = '\0';
1455                 /* move the actual string forward */
1456                 memmove(cp + Zfc_e_dash, cp, linelen + /* NUL */ 1);
1457                 xp += Zfc_e_dash;
1458                 /* prepend it with "fc -e -" */
1459                 memcpy(cp, Tfc_e_dash, Zfc_e_dash);
1460         }
1461 #endif
1462         s->start = s->str = cp;
1463         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1464         /* Note: if input is all nulls, this is not eof */
1465         if (Xlength(s->xs, xp) == 0) {
1466                 /* EOF */
1467                 if (s->type == SFILE)
1468                         shf_fdclose(s->u.shf);
1469                 s->str = NULL;
1470         } else if (interactive && *s->str &&
1471             (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
1472                 histsave(&s->line, s->str, true, true);
1473 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1474         } else if (interactive && cur_prompt == PS1) {
1475                 cp = Xstring(s->xs, xp);
1476                 while (*cp && ctype(*cp, C_IFSWS))
1477                         ++cp;
1478                 if (!*cp)
1479                         histsync();
1480 #endif
1481         }
1482         if (interactive)
1483                 set_prompt(PS2, NULL);
1484 }
1485
1486 void
1487 set_prompt(int to, Source *s)
1488 {
1489         cur_prompt = to;
1490
1491         switch (to) {
1492         /* command */
1493         case PS1:
1494                 /*
1495                  * Substitute ! and !! here, before substitutions are done
1496                  * so ! in expanded variables are not expanded.
1497                  * NOTE: this is not what AT&T ksh does (it does it after
1498                  * substitutions, POSIX doesn't say which is to be done.
1499                  */
1500                 {
1501                         struct shf *shf;
1502                         char * volatile ps1;
1503                         Area *saved_atemp;
1504
1505                         ps1 = str_val(global("PS1"));
1506                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1507                             SHF_WR | SHF_DYNAMIC, NULL);
1508                         while (*ps1)
1509                                 if (*ps1 != '!' || *++ps1 == '!')
1510                                         shf_putchar(*ps1++, shf);
1511                                 else
1512                                         shf_fprintf(shf, "%d",
1513                                                 s ? s->line + 1 : 0);
1514                         ps1 = shf_sclose(shf);
1515                         saved_atemp = ATEMP;
1516                         newenv(E_ERRH);
1517                         if (sigsetjmp(e->jbuf, 0)) {
1518                                 prompt = safe_prompt;
1519                                 /*
1520                                  * Don't print an error - assume it has already
1521                                  * been printed. Reason is we may have forked
1522                                  * to run a command and the child may be
1523                                  * unwinding its stack through this code as it
1524                                  * exits.
1525                                  */
1526                         } else {
1527                                 char *cp = substitute(ps1, 0);
1528                                 strdupx(prompt, cp, saved_atemp);
1529                         }
1530                         quitenv(NULL);
1531                 }
1532                 break;
1533         /* command continuation */
1534         case PS2:
1535                 prompt = str_val(global("PS2"));
1536                 break;
1537         }
1538 }
1539
1540 static int
1541 dopprompt(const char *cp, int ntruncate, bool doprint)
1542 {
1543         int columns = 0, lines = 0, indelimit = 0;
1544         char delimiter = 0;
1545
1546         /*
1547          * Undocumented AT&T ksh feature:
1548          * If the second char in the prompt string is \r then the first
1549          * char is taken to be a non-printing delimiter and any chars
1550          * between two instances of the delimiter are not considered to
1551          * be part of the prompt length
1552          */
1553         if (*cp && cp[1] == '\r') {
1554                 delimiter = *cp;
1555                 cp += 2;
1556         }
1557         for (; *cp; cp++) {
1558                 if (indelimit && *cp != delimiter)
1559                         ;
1560                 else if (*cp == '\n' || *cp == '\r') {
1561                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1562                         columns = 0;
1563                 } else if (*cp == '\t') {
1564                         columns = (columns | 7) + 1;
1565                 } else if (*cp == '\b') {
1566                         if (columns > 0)
1567                                 columns--;
1568                 } else if (*cp == delimiter)
1569                         indelimit = !indelimit;
1570                 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1571                         const char *cp2;
1572                         columns += utf_widthadj(cp, &cp2);
1573                         if (doprint && (indelimit ||
1574                             (ntruncate < (x_cols * lines + columns))))
1575                                 shf_write(cp, cp2 - cp, shl_out);
1576                         cp = cp2 - /* loop increment */ 1;
1577                         continue;
1578                 } else
1579                         columns++;
1580                 if (doprint && (*cp != delimiter) &&
1581                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1582                         shf_putc(*cp, shl_out);
1583         }
1584         if (doprint)
1585                 shf_flush(shl_out);
1586         return (x_cols * lines + columns);
1587 }
1588
1589
1590 void
1591 pprompt(const char *cp, int ntruncate)
1592 {
1593         dopprompt(cp, ntruncate, true);
1594 }
1595
1596 int
1597 promptlen(const char *cp)
1598 {
1599         return (dopprompt(cp, 0, false));
1600 }
1601
1602 /*
1603  * Read the variable part of a ${...} expression (i.e. up to but not
1604  * including the :[-+?=#%] or close-brace).
1605  */
1606 static char *
1607 get_brace_var(XString *wsp, char *wp)
1608 {
1609         char c;
1610         enum parse_state {
1611                 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1612                 PS_NUMBER, PS_VAR1
1613         } state = PS_INITIAL;
1614
1615         while (/* CONSTCOND */ 1) {
1616                 c = getsc();
1617                 /* State machine to figure out where the variable part ends. */
1618                 switch (state) {
1619                 case PS_INITIAL:
1620                         if (c == '#' || c == '!' || c == '%') {
1621                                 state = PS_SAW_HASH;
1622                                 break;
1623                         }
1624                         /* FALLTHROUGH */
1625                 case PS_SAW_HASH:
1626                         if (ksh_isalphx(c))
1627                                 state = PS_IDENT;
1628                         else if (ksh_isdigit(c))
1629                                 state = PS_NUMBER;
1630                         else if (c == '#') {
1631                                 if (state == PS_SAW_HASH) {
1632                                         char c2;
1633
1634                                         c2 = getsc();
1635                                         ungetsc(c2);
1636                                         if (c2 != '}') {
1637                                                 ungetsc(c);
1638                                                 goto out;
1639                                         }
1640                                 }
1641                                 state = PS_VAR1;
1642                         } else if (ctype(c, C_VAR1))
1643                                 state = PS_VAR1;
1644                         else
1645                                 goto out;
1646                         break;
1647                 case PS_IDENT:
1648                         if (!ksh_isalnux(c)) {
1649                                 if (c == '[') {
1650                                         char *tmp, *p;
1651
1652                                         if (!arraysub(&tmp))
1653                                                 yyerror("missing ]\n");
1654                                         *wp++ = c;
1655                                         for (p = tmp; *p; ) {
1656                                                 Xcheck(*wsp, wp);
1657                                                 *wp++ = *p++;
1658                                         }
1659                                         afree(tmp, ATEMP);
1660                                         /* the ] */
1661                                         c = getsc();
1662                                 }
1663                                 goto out;
1664                         }
1665                         break;
1666                 case PS_NUMBER:
1667                         if (!ksh_isdigit(c))
1668                                 goto out;
1669                         break;
1670                 case PS_VAR1:
1671                         goto out;
1672                 }
1673                 Xcheck(*wsp, wp);
1674                 *wp++ = c;
1675         }
1676  out:
1677         /* end of variable part */
1678         *wp++ = '\0';
1679         ungetsc(c);
1680         return (wp);
1681 }
1682
1683 /*
1684  * Save an array subscript - returns true if matching bracket found, false
1685  * if eof or newline was found.
1686  * (Returned string double null terminated)
1687  */
1688 static bool
1689 arraysub(char **strp)
1690 {
1691         XString ws;
1692         char *wp, c;
1693         /* we are just past the initial [ */
1694         int depth = 1;
1695
1696         Xinit(ws, wp, 32, ATEMP);
1697
1698         do {
1699                 c = getsc();
1700                 Xcheck(ws, wp);
1701                 *wp++ = c;
1702                 if (c == '[')
1703                         depth++;
1704                 else if (c == ']')
1705                         depth--;
1706         } while (depth > 0 && c && c != '\n');
1707
1708         *wp++ = '\0';
1709         *strp = Xclose(ws, wp);
1710
1711         return (tobool(depth == 0));
1712 }
1713
1714 /* Unget a char: handles case when we are already at the start of the buffer */
1715 static void
1716 ungetsc(int c)
1717 {
1718         struct sretrace_info *rp = retrace_info;
1719
1720         if (backslash_skip)
1721                 backslash_skip--;
1722         /* Don't unget EOF... */
1723         if (source->str == null && c == '\0')
1724                 return;
1725         while (rp) {
1726                 if (Xlength(rp->xs, rp->xp))
1727                         rp->xp--;
1728                 rp = rp->next;
1729         }
1730         ungetsc_(c);
1731 }
1732 static void
1733 ungetsc_(int c)
1734 {
1735         if (source->str > source->start)
1736                 source->str--;
1737         else {
1738                 Source *s;
1739
1740                 s = pushs(SREREAD, source->areap);
1741                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1742                 s->start = s->str = s->ugbuf;
1743                 s->next = source;
1744                 source = s;
1745         }
1746 }
1747
1748
1749 /* Called to get a char that isn't a \newline sequence. */
1750 static int
1751 getsc_bn(void)
1752 {
1753         int c, c2;
1754
1755         if (ignore_backslash_newline)
1756                 return (o_getsc_u());
1757
1758         if (backslash_skip == 1) {
1759                 backslash_skip = 2;
1760                 return (o_getsc_u());
1761         }
1762
1763         backslash_skip = 0;
1764
1765         while (/* CONSTCOND */ 1) {
1766                 c = o_getsc_u();
1767                 if (c == '\\') {
1768                         if ((c2 = o_getsc_u()) == '\n')
1769                                 /* ignore the \newline; get the next char... */
1770                                 continue;
1771                         ungetsc_(c2);
1772                         backslash_skip = 1;
1773                 }
1774                 return (c);
1775         }
1776 }
1777
1778 void
1779 yyskiputf8bom(void)
1780 {
1781         int c;
1782
1783         if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1784                 ungetsc_(c);
1785                 return;
1786         }
1787         if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1788                 ungetsc_(c);
1789                 ungetsc_(0xEF);
1790                 return;
1791         }
1792         if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1793                 ungetsc_(c);
1794                 ungetsc_(0xBB);
1795                 ungetsc_(0xEF);
1796                 return;
1797         }
1798         UTFMODE |= 8;
1799 }
1800
1801 static Lex_state *
1802 push_state_(State_info *si, Lex_state *old_end)
1803 {
1804         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1805
1806         news[0].ls_base = old_end;
1807         si->base = &news[0];
1808         si->end = &news[STATE_BSIZE];
1809         return (&news[1]);
1810 }
1811
1812 static Lex_state *
1813 pop_state_(State_info *si, Lex_state *old_end)
1814 {
1815         Lex_state *old_base = si->base;
1816
1817         si->base = old_end->ls_base - STATE_BSIZE;
1818         si->end = old_end->ls_base;
1819
1820         afree(old_base, ATEMP);
1821
1822         return (si->base + STATE_BSIZE - 1);
1823 }
1824
1825 static int
1826 s_get(void)
1827 {
1828         return (getsc());
1829 }
1830
1831 static void
1832 s_put(int c)
1833 {
1834         ungetsc(c);
1835 }