OSDN Git Service

78c2ee798808424dfb7cd06f107bbfac1b90445c
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $   */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013, 2014, 2015, 2016, 2017
6  *      mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.234 2017/04/06 01:59:55 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in << or <<- delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 size_t start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106
107 /* optimised getsc_bn() */
108 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
109                             !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
112
113 /* retrace helper */
114 #define o_getsc_r(carg)                                 \
115         int cev = (carg);                               \
116         struct sretrace_info *rp = retrace_info;        \
117                                                         \
118         while (rp) {                                    \
119                 Xcheck(rp->xs, rp->xp);                 \
120                 *rp->xp++ = cev;                        \
121                 rp = rp->next;                          \
122         }                                               \
123                                                         \
124         return (cev);
125
126 /* callback */
127 static int
128 getsc_i(void)
129 {
130         o_getsc_r(o_getsc());
131 }
132
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc getsc_i
135 #else
136 static int getsc_r(int);
137
138 static int
139 getsc_r(int c)
140 {
141         o_getsc_r(c);
142 }
143
144 #define getsc()         getsc_r(o_getsc())
145 #endif
146
147 #define STATE_BSIZE     8
148
149 #define PUSH_STATE(s)   do {                                    \
150         if (++statep == state_info.end)                         \
151                 statep = push_state_i(&state_info, statep);     \
152         state = statep->type = (s);                             \
153 } while (/* CONSTCOND */ 0)
154
155 #define POP_STATE()     do {                                    \
156         if (--statep == state_info.base)                        \
157                 statep = pop_state_i(&state_info, statep);      \
158         state = statep->type;                                   \
159 } while (/* CONSTCOND */ 0)
160
161 #define PUSH_SRETRACE(s) do {                                   \
162         struct sretrace_info *ri;                               \
163                                                                 \
164         PUSH_STATE(s);                                          \
165         statep->ls_start = Xsavepos(ws, wp);                    \
166         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
167         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
168         ri->next = retrace_info;                                \
169         retrace_info = ri;                                      \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE()  do {                                    \
173         wp = Xrestpos(ws, wp, statep->ls_start);                \
174         *retrace_info->xp = '\0';                               \
175         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
176         dp = (void *)retrace_info;                              \
177         retrace_info = retrace_info->next;                      \
178         afree(dp, ATEMP);                                       \
179         POP_STATE();                                            \
180 } while (/* CONSTCOND */ 0)
181
182 /**
183  * Lexical analyser
184  *
185  * tokens are not regular expressions, they are LL(1).
186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187  * hence the state stack. Note "$(...)" are now parsed recursively.
188  */
189
190 int
191 yylex(int cf)
192 {
193         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194         State_info state_info;
195         int c, c2, state;
196         size_t cz;
197         XString ws;             /* expandable output word */
198         char *wp;               /* output word pointer */
199         char *sp, *dp;
200
201  Again:
202         states[0].type = SINVALID;
203         states[0].ls_base = NULL;
204         statep = &states[1];
205         state_info.base = states;
206         state_info.end = &state_info.base[STATE_BSIZE];
207
208         Xinit(ws, wp, 64, ATEMP);
209
210         backslash_skip = 0;
211         ignore_backslash_newline = 0;
212
213         if (cf & ONEWORD)
214                 state = SWORD;
215         else if (cf & LETEXPR) {
216                 /* enclose arguments in (double) quotes */
217                 *wp++ = OQUOTE;
218                 state = SLETPAREN;
219                 statep->nparen = 0;
220         } else {
221                 /* normal lexing */
222                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223                 while ((c = getsc()) == ' ' || c == '\t')
224                         ;
225                 if (c == '#') {
226                         ignore_backslash_newline++;
227                         while ((c = getsc()) != '\0' && c != '\n')
228                                 ;
229                         ignore_backslash_newline--;
230                 }
231                 ungetsc(c);
232         }
233         if (source->flags & SF_ALIAS) {
234                 /* trailing ' ' in alias definition */
235                 source->flags &= ~SF_ALIAS;
236                 /* POSIX: trailing space only counts if parsing simple cmd */
237                 if (!Flag(FPOSIX) || (cf & CMDWORD))
238                         cf |= ALIAS;
239         }
240
241         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
242         statep->type = state;
243
244         /* collect non-special or quoted characters to form word */
245         while (!((c = getsc()) == 0 ||
246             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
247                 if (state == SBASE &&
248                     subshell_nesting_type == /*{*/ '}' &&
249                     c == /*{*/ '}')
250                         /* possibly end ${ :;} */
251                         break;
252                 Xcheck(ws, wp);
253                 switch (state) {
254                 case SADELIM:
255                         if (c == '(')
256                                 statep->nparen++;
257                         else if (c == ')')
258                                 statep->nparen--;
259                         else if (statep->nparen == 0 && (c == /*{*/ '}' ||
260                             c == (int)statep->ls_adelim.delimiter)) {
261                                 *wp++ = ADELIM;
262                                 *wp++ = c;
263                                 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
264                                         POP_STATE();
265                                 if (c == /*{*/ '}')
266                                         POP_STATE();
267                                 break;
268                         }
269                         /* FALLTHROUGH */
270                 case SBASE:
271                         if (c == '[' && (cf & CMDASN)) {
272                                 /* temporary */
273                                 *wp = EOS;
274                                 if (is_wdvarname(Xstring(ws, wp), false)) {
275                                         char *p, *tmp;
276
277                                         if (arraysub(&tmp)) {
278                                                 *wp++ = CHAR;
279                                                 *wp++ = c;
280                                                 for (p = tmp; *p; ) {
281                                                         Xcheck(ws, wp);
282                                                         *wp++ = CHAR;
283                                                         *wp++ = *p++;
284                                                 }
285                                                 afree(tmp, ATEMP);
286                                                 break;
287                                         } else {
288                                                 Source *s;
289
290                                                 s = pushs(SREREAD,
291                                                     source->areap);
292                                                 s->start = s->str =
293                                                     s->u.freeme = tmp;
294                                                 s->next = source;
295                                                 source = s;
296                                         }
297                                 }
298                                 *wp++ = CHAR;
299                                 *wp++ = c;
300                                 break;
301                         }
302                         /* FALLTHROUGH */
303  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
304                         if (c == '*' || c == '@' || c == '+' || c == '?' ||
305                             c == '!') {
306                                 c2 = getsc();
307                                 if (c2 == '(' /*)*/ ) {
308                                         *wp++ = OPAT;
309                                         *wp++ = c;
310                                         PUSH_STATE(SPATTERN);
311                                         break;
312                                 }
313                                 ungetsc(c2);
314                         }
315                         /* FALLTHROUGH */
316  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
317                         switch (c) {
318                         case '\\':
319  getsc_qchar:
320                                 if ((c = getsc())) {
321                                         /* trailing \ is lost */
322                                         *wp++ = QCHAR;
323                                         *wp++ = c;
324                                 }
325                                 break;
326                         case '\'':
327  open_ssquote_unless_heredoc:
328                                 if ((cf & HEREDOC))
329                                         goto store_char;
330                                 *wp++ = OQUOTE;
331                                 ignore_backslash_newline++;
332                                 PUSH_STATE(SSQUOTE);
333                                 break;
334                         case '"':
335  open_sdquote:
336                                 *wp++ = OQUOTE;
337                                 PUSH_STATE(SDQUOTE);
338                                 break;
339                         case '$':
340                                 /*
341                                  * processing of dollar sign belongs into
342                                  * Subst, except for those which can open
343                                  * a string: $'…' and $"…"
344                                  */
345  subst_dollar_ex:
346                                 c = getsc();
347                                 switch (c) {
348                                 case '"':
349                                         goto open_sdquote;
350                                 case '\'':
351                                         goto open_sequote;
352                                 default:
353                                         goto SubstS;
354                                 }
355                         default:
356                                 goto Subst;
357                         }
358                         break;
359
360  Subst:
361                         switch (c) {
362                         case '\\':
363                                 c = getsc();
364                                 switch (c) {
365                                 case '"':
366                                         if ((cf & HEREDOC))
367                                                 goto heredocquote;
368                                         /* FALLTHROUGH */
369                                 case '\\':
370                                 case '$': case '`':
371  store_qchar:
372                                         *wp++ = QCHAR;
373                                         *wp++ = c;
374                                         break;
375                                 default:
376  heredocquote:
377                                         Xcheck(ws, wp);
378                                         if (c) {
379                                                 /* trailing \ is lost */
380                                                 *wp++ = CHAR;
381                                                 *wp++ = '\\';
382                                                 *wp++ = CHAR;
383                                                 *wp++ = c;
384                                         }
385                                         break;
386                                 }
387                                 break;
388                         case '$':
389                                 c = getsc();
390  SubstS:
391                                 if (c == '(') /*)*/ {
392                                         c = getsc();
393                                         if (c == '(') /*)*/ {
394                                                 *wp++ = EXPRSUB;
395                                                 PUSH_SRETRACE(SASPAREN);
396                                                 statep->nparen = 2;
397                                                 *retrace_info->xp++ = '(';
398                                         } else {
399                                                 ungetsc(c);
400  subst_command:
401                                                 c = COMSUB;
402  subst_command2:
403                                                 sp = yyrecursive(c);
404                                                 cz = strlen(sp) + 1;
405                                                 XcheckN(ws, wp, cz);
406                                                 *wp++ = c;
407                                                 memcpy(wp, sp, cz);
408                                                 wp += cz;
409                                         }
410                                 } else if (c == '{') /*}*/ {
411                                         if ((c = getsc()) == '|') {
412                                                 /*
413                                                  * non-subenvironment
414                                                  * value substitution
415                                                  */
416                                                 c = VALSUB;
417                                                 goto subst_command2;
418                                         } else if (ctype(c, C_IFSWS)) {
419                                                 /*
420                                                  * non-subenvironment
421                                                  * "command" substitution
422                                                  */
423                                                 c = FUNSUB;
424                                                 goto subst_command2;
425                                         }
426                                         ungetsc(c);
427                                         *wp++ = OSUBST;
428                                         *wp++ = '{'; /*}*/
429                                         wp = get_brace_var(&ws, wp);
430                                         c = getsc();
431                                         /* allow :# and :% (ksh88 compat) */
432                                         if (c == ':') {
433                                                 *wp++ = CHAR;
434                                                 *wp++ = c;
435                                                 c = getsc();
436                                                 if (c == ':') {
437                                                         *wp++ = CHAR;
438                                                         *wp++ = '0';
439                                                         *wp++ = ADELIM;
440                                                         *wp++ = ':';
441                                                         PUSH_STATE(SBRACE);
442                                                         PUSH_STATE(SADELIM);
443                                                         statep->ls_adelim.delimiter = ':';
444                                                         statep->ls_adelim.num = 1;
445                                                         statep->nparen = 0;
446                                                         break;
447                                                 } else if (ksh_isdigit(c) ||
448                                                     c == '('/*)*/ || c == ' ' ||
449                                                     /*XXX what else? */
450                                                     c == '$') {
451                                                         /* substring subst. */
452                                                         if (c != ' ') {
453                                                                 *wp++ = CHAR;
454                                                                 *wp++ = ' ';
455                                                         }
456                                                         ungetsc(c);
457                                                         PUSH_STATE(SBRACE);
458                                                         PUSH_STATE(SADELIM);
459                                                         statep->ls_adelim.delimiter = ':';
460                                                         statep->ls_adelim.num = 2;
461                                                         statep->nparen = 0;
462                                                         break;
463                                                 }
464                                         } else if (c == '/') {
465                                                 c2 = ADELIM;
466  parse_adelim_slash:
467                                                 *wp++ = CHAR;
468                                                 *wp++ = c;
469                                                 if ((c = getsc()) == '/') {
470                                                         *wp++ = c2;
471                                                         *wp++ = c;
472                                                 } else
473                                                         ungetsc(c);
474                                                 PUSH_STATE(SBRACE);
475                                                 PUSH_STATE(SADELIM);
476                                                 statep->ls_adelim.delimiter = '/';
477                                                 statep->ls_adelim.num = 1;
478                                                 statep->nparen = 0;
479                                                 break;
480                                         } else if (c == '@') {
481                                                 c2 = getsc();
482                                                 ungetsc(c2);
483                                                 if (c2 == '/') {
484                                                         c2 = CHAR;
485                                                         goto parse_adelim_slash;
486                                                 }
487                                         }
488                                         /*
489                                          * If this is a trim operation,
490                                          * treat (,|,) specially in STBRACE.
491                                          */
492                                         if (ksh_issubop2(c)) {
493                                                 ungetsc(c);
494                                                 if (Flag(FSH))
495                                                         PUSH_STATE(STBRACEBOURNE);
496                                                 else
497                                                         PUSH_STATE(STBRACEKORN);
498                                         } else {
499                                                 ungetsc(c);
500                                                 if (state == SDQUOTE ||
501                                                     state == SQBRACE)
502                                                         PUSH_STATE(SQBRACE);
503                                                 else
504                                                         PUSH_STATE(SBRACE);
505                                         }
506                                 } else if (ksh_isalphx(c)) {
507                                         *wp++ = OSUBST;
508                                         *wp++ = 'X';
509                                         do {
510                                                 Xcheck(ws, wp);
511                                                 *wp++ = c;
512                                                 c = getsc();
513                                         } while (ksh_isalnux(c));
514                                         *wp++ = '\0';
515                                         *wp++ = CSUBST;
516                                         *wp++ = 'X';
517                                         ungetsc(c);
518                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
519                                         Xcheck(ws, wp);
520                                         *wp++ = OSUBST;
521                                         *wp++ = 'X';
522                                         *wp++ = c;
523                                         *wp++ = '\0';
524                                         *wp++ = CSUBST;
525                                         *wp++ = 'X';
526                                 } else {
527                                         *wp++ = CHAR;
528                                         *wp++ = '$';
529                                         ungetsc(c);
530                                 }
531                                 break;
532                         case '`':
533  subst_gravis:
534                                 PUSH_STATE(SBQUOTE);
535                                 *wp++ = COMASUB;
536                                 /*
537                                  * We need to know whether we are within double
538                                  * quotes in order to translate \" to " within
539                                  * "…`…\"…`…" because, unlike for COMSUBs, the
540                                  * outer double quoteing changes the backslash
541                                  * meaning for the inside. For more details:
542                                  * http://austingroupbugs.net/view.php?id=1015
543                                  */
544                                 statep->ls_bool = false;
545                                 s2 = statep;
546                                 base = state_info.base;
547                                 while (/* CONSTCOND */ 1) {
548                                         for (; s2 != base; s2--) {
549                                                 if (s2->type == SDQUOTE) {
550                                                         statep->ls_bool = true;
551                                                         break;
552                                                 }
553                                         }
554                                         if (s2 != base)
555                                                 break;
556                                         if (!(s2 = s2->ls_base))
557                                                 break;
558                                         base = s2-- - STATE_BSIZE;
559                                 }
560                                 break;
561                         case QCHAR:
562                                 if (cf & LQCHAR) {
563                                         *wp++ = QCHAR;
564                                         *wp++ = getsc();
565                                         break;
566                                 }
567                                 /* FALLTHROUGH */
568                         default:
569  store_char:
570                                 *wp++ = CHAR;
571                                 *wp++ = c;
572                         }
573                         break;
574
575                 case SEQUOTE:
576                         if (c == '\'') {
577                                 POP_STATE();
578                                 *wp++ = CQUOTE;
579                                 ignore_backslash_newline--;
580                         } else if (c == '\\') {
581                                 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
582                                         c2 = getsc();
583                                 if (c2 == 0)
584                                         statep->ls_bool = true;
585                                 if (!statep->ls_bool) {
586                                         char ts[4];
587
588                                         if ((unsigned int)c2 < 0x100) {
589                                                 *wp++ = QCHAR;
590                                                 *wp++ = c2;
591                                         } else {
592                                                 cz = utf_wctomb(ts, c2 - 0x100);
593                                                 ts[cz] = 0;
594                                                 cz = 0;
595                                                 do {
596                                                         *wp++ = QCHAR;
597                                                         *wp++ = ts[cz];
598                                                 } while (ts[++cz]);
599                                         }
600                                 }
601                         } else if (!statep->ls_bool) {
602                                 *wp++ = QCHAR;
603                                 *wp++ = c;
604                         }
605                         break;
606
607                 case SSQUOTE:
608                         if (c == '\'') {
609                                 POP_STATE();
610                                 if ((cf & HEREDOC) || state == SQBRACE)
611                                         goto store_char;
612                                 *wp++ = CQUOTE;
613                                 ignore_backslash_newline--;
614                         } else {
615                                 *wp++ = QCHAR;
616                                 *wp++ = c;
617                         }
618                         break;
619
620                 case SDQUOTE:
621                         if (c == '"') {
622                                 POP_STATE();
623                                 *wp++ = CQUOTE;
624                         } else
625                                 goto Subst;
626                         break;
627
628                 /* $(( ... )) */
629                 case SASPAREN:
630                         if (c == '(')
631                                 statep->nparen++;
632                         else if (c == ')') {
633                                 statep->nparen--;
634                                 if (statep->nparen == 1) {
635                                         /* end of EXPRSUB */
636                                         POP_SRETRACE();
637
638                                         if ((c2 = getsc()) == /*(*/ ')') {
639                                                 cz = strlen(sp) - 2;
640                                                 XcheckN(ws, wp, cz);
641                                                 memcpy(wp, sp + 1, cz);
642                                                 wp += cz;
643                                                 afree(sp, ATEMP);
644                                                 *wp++ = '\0';
645                                                 break;
646                                         } else {
647                                                 Source *s;
648
649                                                 ungetsc(c2);
650                                                 /*
651                                                  * mismatched parenthesis -
652                                                  * assume we were really
653                                                  * parsing a $(...) expression
654                                                  */
655                                                 --wp;
656                                                 s = pushs(SREREAD,
657                                                     source->areap);
658                                                 s->start = s->str =
659                                                     s->u.freeme = sp;
660                                                 s->next = source;
661                                                 source = s;
662                                                 goto subst_command;
663                                         }
664                                 }
665                         }
666                         /* reuse existing state machine */
667                         goto Sbase2;
668
669                 case SQBRACE:
670                         if (c == '\\') {
671                                 /*
672                                  * perform POSIX "quote removal" if the back-
673                                  * slash is "special", i.e. same cases as the
674                                  * {case '\\':} in Subst: plus closing brace;
675                                  * in mksh code "quote removal" on '\c' means
676                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
677                                  * emitted (in heredocquote:)
678                                  */
679                                 if ((c = getsc()) == '"' || c == '\\' ||
680                                     c == '$' || c == '`' || c == /*{*/'}')
681                                         goto store_qchar;
682                                 goto heredocquote;
683                         }
684                         goto common_SQBRACE;
685
686                 case SBRACE:
687                         if (c == '\'')
688                                 goto open_ssquote_unless_heredoc;
689                         else if (c == '\\')
690                                 goto getsc_qchar;
691  common_SQBRACE:
692                         if (c == '"')
693                                 goto open_sdquote;
694                         else if (c == '$')
695                                 goto subst_dollar_ex;
696                         else if (c == '`')
697                                 goto subst_gravis;
698                         else if (c != /*{*/ '}')
699                                 goto store_char;
700                         POP_STATE();
701                         *wp++ = CSUBST;
702                         *wp++ = /*{*/ '}';
703                         break;
704
705                 /* Same as SBASE, except (,|,) treated specially */
706                 case STBRACEKORN:
707                         if (c == '|')
708                                 *wp++ = SPAT;
709                         else if (c == '(') {
710                                 *wp++ = OPAT;
711                                 /* simile for @ */
712                                 *wp++ = ' ';
713                                 PUSH_STATE(SPATTERN);
714                         } else /* FALLTHROUGH */
715                 case STBRACEBOURNE:
716                           if (c == /*{*/ '}') {
717                                 POP_STATE();
718                                 *wp++ = CSUBST;
719                                 *wp++ = /*{*/ '}';
720                         } else
721                                 goto Sbase1;
722                         break;
723
724                 case SBQUOTE:
725                         if (c == '`') {
726                                 *wp++ = 0;
727                                 POP_STATE();
728                         } else if (c == '\\') {
729                                 switch (c = getsc()) {
730                                 case 0:
731                                         /* trailing \ is lost */
732                                         break;
733                                 case '$':
734                                 case '`':
735                                 case '\\':
736                                         *wp++ = c;
737                                         break;
738                                 case '"':
739                                         if (statep->ls_bool) {
740                                                 *wp++ = c;
741                                                 break;
742                                         }
743                                         /* FALLTHROUGH */
744                                 default:
745                                         *wp++ = '\\';
746                                         *wp++ = c;
747                                         break;
748                                 }
749                         } else
750                                 *wp++ = c;
751                         break;
752
753                 /* ONEWORD */
754                 case SWORD:
755                         goto Subst;
756
757                 /* LETEXPR: (( ... )) */
758                 case SLETPAREN:
759                         if (c == /*(*/ ')') {
760                                 if (statep->nparen > 0)
761                                         --statep->nparen;
762                                 else if ((c2 = getsc()) == /*(*/ ')') {
763                                         c = 0;
764                                         *wp++ = CQUOTE;
765                                         goto Done;
766                                 } else {
767                                         Source *s;
768
769                                         ungetsc(c2);
770                                         ungetsc(c);
771                                         /*
772                                          * mismatched parenthesis -
773                                          * assume we were really
774                                          * parsing a (...) expression
775                                          */
776                                         *wp = EOS;
777                                         sp = Xstring(ws, wp);
778                                         dp = wdstrip(sp + 1, WDS_TPUTS);
779                                         s = pushs(SREREAD, source->areap);
780                                         s->start = s->str = s->u.freeme = dp;
781                                         s->next = source;
782                                         source = s;
783                                         ungetsc('('/*)*/);
784                                         return ('('/*)*/);
785                                 }
786                         } else if (c == '(')
787                                 /*
788                                  * parentheses inside quotes and
789                                  * backslashes are lost, but AT&T ksh
790                                  * doesn't count them either
791                                  */
792                                 ++statep->nparen;
793                         goto Sbase2;
794
795                 /* << or <<- delimiter */
796                 case SHEREDELIM:
797                         /*
798                          * here delimiters need a special case since
799                          * $ and `...` are not to be treated specially
800                          */
801                         switch (c) {
802                         case '\\':
803                                 if ((c = getsc())) {
804                                         /* trailing \ is lost */
805                                         *wp++ = QCHAR;
806                                         *wp++ = c;
807                                 }
808                                 break;
809                         case '\'':
810                                 goto open_ssquote_unless_heredoc;
811                         case '$':
812                                 if ((c2 = getsc()) == '\'') {
813  open_sequote:
814                                         *wp++ = OQUOTE;
815                                         ignore_backslash_newline++;
816                                         PUSH_STATE(SEQUOTE);
817                                         statep->ls_bool = false;
818                                         break;
819                                 } else if (c2 == '"') {
820                                         /* FALLTHROUGH */
821                         case '"':
822                                         PUSH_SRETRACE(SHEREDQUOTE);
823                                         break;
824                                 }
825                                 ungetsc(c2);
826                                 /* FALLTHROUGH */
827                         default:
828                                 *wp++ = CHAR;
829                                 *wp++ = c;
830                         }
831                         break;
832
833                 /* " in << or <<- delimiter */
834                 case SHEREDQUOTE:
835                         if (c != '"')
836                                 goto Subst;
837                         POP_SRETRACE();
838                         dp = strnul(sp) - 1;
839                         /* remove the trailing double quote */
840                         *dp = '\0';
841                         /* store the quoted string */
842                         *wp++ = OQUOTE;
843                         XcheckN(ws, wp, (dp - sp) * 2);
844                         dp = sp;
845                         while ((c = *dp++)) {
846                                 if (c == '\\') {
847                                         switch ((c = *dp++)) {
848                                         case '\\':
849                                         case '"':
850                                         case '$':
851                                         case '`':
852                                                 break;
853                                         default:
854                                                 *wp++ = CHAR;
855                                                 *wp++ = '\\';
856                                                 break;
857                                         }
858                                 }
859                                 *wp++ = CHAR;
860                                 *wp++ = c;
861                         }
862                         afree(sp, ATEMP);
863                         *wp++ = CQUOTE;
864                         state = statep->type = SHEREDELIM;
865                         break;
866
867                 /* in *(...|...) pattern (*+?@!) */
868                 case SPATTERN:
869                         if (c == /*(*/ ')') {
870                                 *wp++ = CPAT;
871                                 POP_STATE();
872                         } else if (c == '|') {
873                                 *wp++ = SPAT;
874                         } else if (c == '(') {
875                                 *wp++ = OPAT;
876                                 /* simile for @ */
877                                 *wp++ = ' ';
878                                 PUSH_STATE(SPATTERN);
879                         } else
880                                 goto Sbase1;
881                         break;
882                 }
883         }
884  Done:
885         Xcheck(ws, wp);
886         if (statep != &states[1])
887                 /* XXX figure out what is missing */
888                 yyerror("no closing quote");
889
890         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
891         if (state == SHEREDELIM)
892                 state = SBASE;
893
894         dp = Xstring(ws, wp);
895         if (state == SBASE && (
896             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
897             c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
898             (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
899                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
900
901                 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
902
903                 if (c == '&') {
904                         if ((c2 = getsc()) != '>') {
905                                 ungetsc(c2);
906                                 goto no_iop;
907                         }
908                         c = c2;
909                         iop->ioflag = IOBASH;
910                 } else
911                         iop->ioflag = 0;
912
913                 c2 = getsc();
914                 /* <<, >>, <> are ok, >< is not */
915                 if (c == c2 || (c == '<' && c2 == '>')) {
916                         iop->ioflag |= c == c2 ?
917                             (c == '>' ? IOCAT : IOHERE) : IORDWR;
918                         if (iop->ioflag == IOHERE) {
919                                 if ((c2 = getsc()) == '-')
920                                         iop->ioflag |= IOSKIP;
921                                 else if (c2 == '<')
922                                         iop->ioflag |= IOHERESTR;
923                                 else
924                                         ungetsc(c2);
925                         }
926                 } else if (c2 == '&')
927                         iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
928                 else {
929                         iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
930                         if (c == '>' && c2 == '|')
931                                 iop->ioflag |= IOCLOB;
932                         else
933                                 ungetsc(c2);
934                 }
935
936                 iop->ioname = NULL;
937                 iop->delim = NULL;
938                 iop->heredoc = NULL;
939                 /* free word */
940                 Xfree(ws, wp);
941                 yylval.iop = iop;
942                 return (REDIR);
943  no_iop:
944                 afree(iop, ATEMP);
945         }
946
947         if (wp == dp && state == SBASE) {
948                 /* free word */
949                 Xfree(ws, wp);
950                 /* no word, process LEX1 character */
951                 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
952                         if ((c2 = getsc()) == c)
953                                 c = (c == ';') ? BREAK :
954                                     (c == '|') ? LOGOR :
955                                     (c == '&') ? LOGAND :
956                                     /* c == '(' ) */ MDPAREN;
957                         else if (c == '|' && c2 == '&')
958                                 c = COPROC;
959                         else if (c == ';' && c2 == '|')
960                                 c = BRKEV;
961                         else if (c == ';' && c2 == '&')
962                                 c = BRKFT;
963                         else
964                                 ungetsc(c2);
965 #ifndef MKSH_SMALL
966                         if (c == BREAK) {
967                                 if ((c2 = getsc()) == '&')
968                                         c = BRKEV;
969                                 else
970                                         ungetsc(c2);
971                         }
972 #endif
973                 } else if (c == '\n') {
974                         if (cf & HEREDELIM)
975                                 ungetsc(c);
976                         else {
977                                 gethere();
978                                 if (cf & CONTIN)
979                                         goto Again;
980                         }
981                 } else if (c == '\0' && !(cf & HEREDELIM)) {
982                         struct ioword **p = heres;
983
984                         while (p < herep)
985                                 if ((*p)->ioflag & IOHERESTR)
986                                         ++p;
987                                 else
988                                         /* ksh -c 'cat <<EOF' can cause this */
989                                         yyerror(Tf_heredoc,
990                                             evalstr((*p)->delim, 0));
991                 }
992                 return (c);
993         }
994
995         /* terminate word */
996         *wp++ = EOS;
997         yylval.cp = Xclose(ws, wp);
998         if (state == SWORD || state == SLETPAREN
999             /* XXX ONEWORD? */)
1000                 return (LWORD);
1001
1002         /* unget terminator */
1003         ungetsc(c);
1004
1005         /*
1006          * note: the alias-vs-function code below depends on several
1007          * interna: starting from here, source->str is not modified;
1008          * the way getsc() and ungetsc() operate; etc.
1009          */
1010
1011         /* copy word to unprefixed string ident */
1012         sp = yylval.cp;
1013         dp = ident;
1014         while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1015                 *dp++ = *sp++;
1016         if (c != EOS)
1017                 /* word is not unquoted, or space ran out */
1018                 dp = ident;
1019         /* make sure the ident array stays NUL padded */
1020         memset(dp, 0, (ident + IDENT) - dp + 1);
1021
1022         if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1023                 struct tbl *p;
1024                 uint32_t h = hash(ident);
1025
1026                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1027                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1028                     p->val.i == /*{*/ '}')) {
1029                         afree(yylval.cp, ATEMP);
1030                         return (p->val.i);
1031                 }
1032                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1033                     (p->flag & ISSET)) {
1034                         /*
1035                          * this still points to the same character as the
1036                          * ungetsc'd terminator from above
1037                          */
1038                         const char *cp = source->str;
1039
1040                         /* prefer POSIX but not Korn functions over aliases */
1041                         while (*cp == ' ' || *cp == '\t')
1042                                 /*
1043                                  * this is like getsc() without skipping
1044                                  * over Source boundaries (including not
1045                                  * parsing ungetsc'd characters that got
1046                                  * pushed into an SREREAD) which is what
1047                                  * we want here anyway: find out whether
1048                                  * the alias name is followed by a POSIX
1049                                  * function definition
1050                                  */
1051                                 ++cp;
1052                         /* prefer functions over aliases */
1053                         if (cp[0] != '(' || cp[1] != ')') {
1054                                 Source *s = source;
1055
1056                                 while (s && (s->flags & SF_HASALIAS))
1057                                         if (s->u.tblp == p)
1058                                                 return (LWORD);
1059                                         else
1060                                                 s = s->next;
1061                                 /* push alias expansion */
1062                                 s = pushs(SALIAS, source->areap);
1063                                 s->start = s->str = p->val.s;
1064                                 s->u.tblp = p;
1065                                 s->flags |= SF_HASALIAS;
1066                                 s->line = source->line;
1067                                 s->next = source;
1068                                 if (source->type == SEOF) {
1069                                         /* prevent infinite recursion at EOS */
1070                                         source->u.tblp = p;
1071                                         source->flags |= SF_HASALIAS;
1072                                 }
1073                                 source = s;
1074                                 afree(yylval.cp, ATEMP);
1075                                 goto Again;
1076                         }
1077                 }
1078         } else if (*ident == '\0') {
1079                 /* retain typeset et al. even when quoted */
1080                 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1081                 uint32_t flag = tt ? tt->flag : 0;
1082
1083                 if (flag & (DECL_UTIL | DECL_FWDR))
1084                         strlcpy(ident, dp, sizeof(ident));
1085                 afree(dp, ATEMP);
1086         }
1087
1088         return (LWORD);
1089 }
1090
1091 static void
1092 gethere(void)
1093 {
1094         struct ioword **p;
1095
1096         for (p = heres; p < herep; p++)
1097                 if (!((*p)->ioflag & IOHERESTR))
1098                         readhere(*p);
1099         herep = heres;
1100 }
1101
1102 /*
1103  * read "<<word" text into temp file
1104  */
1105
1106 static void
1107 readhere(struct ioword *iop)
1108 {
1109         int c;
1110         const char *eof, *eofp;
1111         XString xs;
1112         char *xp;
1113         size_t xpos;
1114
1115         eof = evalstr(iop->delim, 0);
1116
1117         if (!(iop->ioflag & IOEVAL))
1118                 ignore_backslash_newline++;
1119
1120         Xinit(xs, xp, 256, ATEMP);
1121
1122  heredoc_read_line:
1123         /* beginning of line */
1124         eofp = eof;
1125         xpos = Xsavepos(xs, xp);
1126         if (iop->ioflag & IOSKIP) {
1127                 /* skip over leading tabs */
1128                 while ((c = getsc()) == '\t')
1129                         ;       /* nothing */
1130                 goto heredoc_parse_char;
1131         }
1132  heredoc_read_char:
1133         c = getsc();
1134  heredoc_parse_char:
1135         /* compare with here document marker */
1136         if (!*eofp) {
1137                 /* end of here document marker, what to do? */
1138                 switch (c) {
1139                 case /*(*/ ')':
1140                         if (!subshell_nesting_type)
1141                                 /*-
1142                                  * not allowed outside $(...) or (...)
1143                                  * => mismatch
1144                                  */
1145                                 break;
1146                         /* allow $(...) or (...) to close here */
1147                         ungetsc(/*(*/ ')');
1148                         /* FALLTHROUGH */
1149                 case 0:
1150                         /*
1151                          * Allow EOF here to commands without trailing
1152                          * newlines (mksh -c '...') will work as well.
1153                          */
1154                 case '\n':
1155                         /* Newline terminates here document marker */
1156                         goto heredoc_found_terminator;
1157                 }
1158         } else if (c == *eofp++)
1159                 /* store; then read and compare next character */
1160                 goto heredoc_store_and_loop;
1161         /* nope, mismatch; read until end of line */
1162         while (c != '\n') {
1163                 if (!c)
1164                         /* oops, reached EOF */
1165                         yyerror(Tf_heredoc, eof);
1166                 /* store character */
1167                 Xcheck(xs, xp);
1168                 Xput(xs, xp, c);
1169                 /* read next character */
1170                 c = getsc();
1171         }
1172         /* we read a newline as last character */
1173  heredoc_store_and_loop:
1174         /* store character */
1175         Xcheck(xs, xp);
1176         Xput(xs, xp, c);
1177         if (c == '\n')
1178                 goto heredoc_read_line;
1179         goto heredoc_read_char;
1180
1181  heredoc_found_terminator:
1182         /* jump back to saved beginning of line */
1183         xp = Xrestpos(xs, xp, xpos);
1184         /* terminate, close and store */
1185         Xput(xs, xp, '\0');
1186         iop->heredoc = Xclose(xs, xp);
1187
1188         if (!(iop->ioflag & IOEVAL))
1189                 ignore_backslash_newline--;
1190 }
1191
1192 void
1193 yyerror(const char *fmt, ...)
1194 {
1195         va_list va;
1196
1197         /* pop aliases and re-reads */
1198         while (source->type == SALIAS || source->type == SREREAD)
1199                 source = source->next;
1200         /* zap pending input */
1201         source->str = null;
1202
1203         error_prefix(true);
1204         va_start(va, fmt);
1205         shf_vfprintf(shl_out, fmt, va);
1206         shf_putc('\n', shl_out);
1207         va_end(va);
1208         errorfz();
1209 }
1210
1211 /*
1212  * input for yylex with alias expansion
1213  */
1214
1215 Source *
1216 pushs(int type, Area *areap)
1217 {
1218         Source *s;
1219
1220         s = alloc(sizeof(Source), areap);
1221         memset(s, 0, sizeof(Source));
1222         s->type = type;
1223         s->str = null;
1224         s->areap = areap;
1225         if (type == SFILE || type == SSTDIN)
1226                 XinitN(s->xs, 256, s->areap);
1227         return (s);
1228 }
1229
1230 static int
1231 getsc_uu(void)
1232 {
1233         Source *s = source;
1234         int c;
1235
1236         while ((c = *s->str++) == 0) {
1237                 /* return 0 for EOF by default */
1238                 s->str = NULL;
1239                 switch (s->type) {
1240                 case SEOF:
1241                         s->str = null;
1242                         return (0);
1243
1244                 case SSTDIN:
1245                 case SFILE:
1246                         getsc_line(s);
1247                         break;
1248
1249                 case SWSTR:
1250                         break;
1251
1252                 case SSTRING:
1253                 case SSTRINGCMDLINE:
1254                         break;
1255
1256                 case SWORDS:
1257                         s->start = s->str = *s->u.strv++;
1258                         s->type = SWORDSEP;
1259                         break;
1260
1261                 case SWORDSEP:
1262                         if (*s->u.strv == NULL) {
1263                                 s->start = s->str = "\n";
1264                                 s->type = SEOF;
1265                         } else {
1266                                 s->start = s->str = T1space;
1267                                 s->type = SWORDS;
1268                         }
1269                         break;
1270
1271                 case SALIAS:
1272                         if (s->flags & SF_ALIASEND) {
1273                                 /* pass on an unused SF_ALIAS flag */
1274                                 source = s->next;
1275                                 source->flags |= s->flags & SF_ALIAS;
1276                                 s = source;
1277                         } else if (*s->u.tblp->val.s &&
1278                             (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1279                                 /* pop source stack */
1280                                 source = s = s->next;
1281                                 /*
1282                                  * Note that this alias ended with a
1283                                  * space, enabling alias expansion on
1284                                  * the following word.
1285                                  */
1286                                 s->flags |= SF_ALIAS;
1287                         } else {
1288                                 /*
1289                                  * At this point, we need to keep the current
1290                                  * alias in the source list so recursive
1291                                  * aliases can be detected and we also need to
1292                                  * return the next character. Do this by
1293                                  * temporarily popping the alias to get the
1294                                  * next character and then put it back in the
1295                                  * source list with the SF_ALIASEND flag set.
1296                                  */
1297                                 /* pop source stack */
1298                                 source = s->next;
1299                                 source->flags |= s->flags & SF_ALIAS;
1300                                 c = getsc_uu();
1301                                 if (c) {
1302                                         s->flags |= SF_ALIASEND;
1303                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1304                                         s->start = s->str = s->ugbuf;
1305                                         s->next = source;
1306                                         source = s;
1307                                 } else {
1308                                         s = source;
1309                                         /* avoid reading EOF twice */
1310                                         s->str = NULL;
1311                                         break;
1312                                 }
1313                         }
1314                         continue;
1315
1316                 case SREREAD:
1317                         if (s->start != s->ugbuf)
1318                                 /* yuck */
1319                                 afree(s->u.freeme, ATEMP);
1320                         source = s = s->next;
1321                         continue;
1322                 }
1323                 if (s->str == NULL) {
1324                         s->type = SEOF;
1325                         s->start = s->str = null;
1326                         return ('\0');
1327                 }
1328                 if (s->flags & SF_ECHO) {
1329                         shf_puts(s->str, shl_out);
1330                         shf_flush(shl_out);
1331                 }
1332         }
1333         return (c);
1334 }
1335
1336 static void
1337 getsc_line(Source *s)
1338 {
1339         char *xp = Xstring(s->xs, xp), *cp;
1340         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1341         bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1342
1343         /* Done here to ensure nothing odd happens when a timeout occurs */
1344         XcheckN(s->xs, xp, LINE);
1345         *xp = '\0';
1346         s->start = s->str = xp;
1347
1348         if (have_tty && ksh_tmout) {
1349                 ksh_tmout_state = TMOUT_READING;
1350                 alarm(ksh_tmout);
1351         }
1352         if (interactive) {
1353                 if (cur_prompt == PS1)
1354                         histsave(&s->line, NULL, HIST_FLUSH, true);
1355                 change_winsz();
1356         }
1357 #ifndef MKSH_NO_CMDLINE_EDITING
1358         if (have_tty && (
1359 #if !MKSH_S_NOVI
1360             Flag(FVI) ||
1361 #endif
1362             Flag(FEMACS) || Flag(FGMACS))) {
1363                 int nread;
1364
1365                 nread = x_read(xp);
1366                 if (nread < 0)
1367                         /* read error */
1368                         nread = 0;
1369                 xp[nread] = '\0';
1370                 xp += nread;
1371         } else
1372 #endif
1373           {
1374                 if (interactive)
1375                         pprompt(prompt, 0);
1376                 else
1377                         s->line++;
1378
1379                 while (/* CONSTCOND */ 1) {
1380                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1381
1382                         if (!p && shf_error(s->u.shf) &&
1383                             shf_errno(s->u.shf) == EINTR) {
1384                                 shf_clearerr(s->u.shf);
1385                                 if (trap)
1386                                         runtraps(0);
1387                                 continue;
1388                         }
1389                         if (!p || (xp = p, xp[-1] == '\n'))
1390                                 break;
1391                         /* double buffer size */
1392                         /* move past NUL so doubling works... */
1393                         xp++;
1394                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1395                         /* ...and move back again */
1396                         xp--;
1397                 }
1398                 /*
1399                  * flush any unwanted input so other programs/builtins
1400                  * can read it. Not very optimal, but less error prone
1401                  * than flushing else where, dealing with redirections,
1402                  * etc.
1403                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1404                  */
1405                 if (s->type == SSTDIN)
1406                         shf_flush(s->u.shf);
1407         }
1408         /*
1409          * XXX: temporary kludge to restore source after a
1410          * trap may have been executed.
1411          */
1412         source = s;
1413         if (have_tty && ksh_tmout) {
1414                 ksh_tmout_state = TMOUT_EXECUTING;
1415                 alarm(0);
1416         }
1417         cp = Xstring(s->xs, xp);
1418         rndpush(cp);
1419         s->start = s->str = cp;
1420         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1421         /* Note: if input is all nulls, this is not eof */
1422         if (Xlength(s->xs, xp) == 0) {
1423                 /* EOF */
1424                 if (s->type == SFILE)
1425                         shf_fdclose(s->u.shf);
1426                 s->str = NULL;
1427         } else if (interactive && *s->str) {
1428                 if (cur_prompt != PS1)
1429                         histsave(&s->line, s->str, HIST_APPEND, true);
1430                 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1431                         histsave(&s->line, s->str, HIST_QUEUE, true);
1432 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1433                 else
1434                         goto check_for_sole_return;
1435         } else if (interactive && cur_prompt == PS1) {
1436  check_for_sole_return:
1437                 cp = Xstring(s->xs, xp);
1438                 while (*cp && ctype(*cp, C_IFSWS))
1439                         ++cp;
1440                 if (!*cp) {
1441                         histsave(&s->line, NULL, HIST_FLUSH, true);
1442                         histsync();
1443                 }
1444 #endif
1445         }
1446         if (interactive)
1447                 set_prompt(PS2, NULL);
1448 }
1449
1450 void
1451 set_prompt(int to, Source *s)
1452 {
1453         cur_prompt = (uint8_t)to;
1454
1455         switch (to) {
1456         /* command */
1457         case PS1:
1458                 /*
1459                  * Substitute ! and !! here, before substitutions are done
1460                  * so ! in expanded variables are not expanded.
1461                  * NOTE: this is not what AT&T ksh does (it does it after
1462                  * substitutions, POSIX doesn't say which is to be done.
1463                  */
1464                 {
1465                         struct shf *shf;
1466                         char * volatile ps1;
1467                         Area *saved_atemp;
1468                         int saved_lineno;
1469
1470                         ps1 = str_val(global("PS1"));
1471                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1472                             SHF_WR | SHF_DYNAMIC, NULL);
1473                         while (*ps1)
1474                                 if (*ps1 != '!' || *++ps1 == '!')
1475                                         shf_putchar(*ps1++, shf);
1476                                 else
1477                                         shf_fprintf(shf, Tf_lu, s ?
1478                                             (unsigned long)s->line + 1 : 0UL);
1479                         ps1 = shf_sclose(shf);
1480                         saved_lineno = current_lineno;
1481                         if (s)
1482                                 current_lineno = s->line + 1;
1483                         saved_atemp = ATEMP;
1484                         newenv(E_ERRH);
1485                         if (kshsetjmp(e->jbuf)) {
1486                                 prompt = safe_prompt;
1487                                 /*
1488                                  * Don't print an error - assume it has already
1489                                  * been printed. Reason is we may have forked
1490                                  * to run a command and the child may be
1491                                  * unwinding its stack through this code as it
1492                                  * exits.
1493                                  */
1494                         } else {
1495                                 char *cp = substitute(ps1, 0);
1496                                 strdupx(prompt, cp, saved_atemp);
1497                         }
1498                         current_lineno = saved_lineno;
1499                         quitenv(NULL);
1500                 }
1501                 break;
1502         /* command continuation */
1503         case PS2:
1504                 prompt = str_val(global("PS2"));
1505                 break;
1506         }
1507 }
1508
1509 int
1510 pprompt(const char *cp, int ntruncate)
1511 {
1512         char delimiter = 0;
1513         bool doprint = (ntruncate != -1);
1514         bool indelimit = false;
1515         int columns = 0, lines = 0;
1516
1517         /*
1518          * Undocumented AT&T ksh feature:
1519          * If the second char in the prompt string is \r then the first
1520          * char is taken to be a non-printing delimiter and any chars
1521          * between two instances of the delimiter are not considered to
1522          * be part of the prompt length
1523          */
1524         if (*cp && cp[1] == '\r') {
1525                 delimiter = *cp;
1526                 cp += 2;
1527         }
1528         for (; *cp; cp++) {
1529                 if (indelimit && *cp != delimiter)
1530                         ;
1531                 else if (*cp == '\n' || *cp == '\r') {
1532                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1533                         columns = 0;
1534                 } else if (*cp == '\t') {
1535                         columns = (columns | 7) + 1;
1536                 } else if (*cp == '\b') {
1537                         if (columns > 0)
1538                                 columns--;
1539                 } else if (*cp == delimiter)
1540                         indelimit = !indelimit;
1541                 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1542                         const char *cp2;
1543                         columns += utf_widthadj(cp, &cp2);
1544                         if (doprint && (indelimit ||
1545                             (ntruncate < (x_cols * lines + columns))))
1546                                 shf_write(cp, cp2 - cp, shl_out);
1547                         cp = cp2 - /* loop increment */ 1;
1548                         continue;
1549                 } else
1550                         columns++;
1551                 if (doprint && (*cp != delimiter) &&
1552                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1553                         shf_putc(*cp, shl_out);
1554         }
1555         if (doprint)
1556                 shf_flush(shl_out);
1557         return (x_cols * lines + columns);
1558 }
1559
1560 /*
1561  * Read the variable part of a ${...} expression (i.e. up to but not
1562  * including the :[-+?=#%] or close-brace).
1563  */
1564 static char *
1565 get_brace_var(XString *wsp, char *wp)
1566 {
1567         char c;
1568         enum parse_state {
1569                 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1570                 PS_IDENT, PS_NUMBER, PS_VAR1
1571         } state = PS_INITIAL;
1572
1573         while (/* CONSTCOND */ 1) {
1574                 c = getsc();
1575                 /* State machine to figure out where the variable part ends. */
1576                 switch (state) {
1577                 case PS_SAW_HASH:
1578                         if (ctype(c, C_VAR1)) {
1579                                 char c2;
1580
1581                                 c2 = getsc();
1582                                 ungetsc(c2);
1583                                 if (c2 != /*{*/ '}') {
1584                                         ungetsc(c);
1585                                         goto out;
1586                                 }
1587                         }
1588                         goto ps_common;
1589                 case PS_SAW_BANG:
1590                         switch (c) {
1591                         case '@':
1592                         case '#':
1593                         case '-':
1594                         case '?':
1595                                 goto out;
1596                         }
1597                         goto ps_common;
1598                 case PS_INITIAL:
1599                         switch (c) {
1600                         case '%':
1601                                 state = PS_SAW_PERCENT;
1602                                 goto next;
1603                         case '#':
1604                                 state = PS_SAW_HASH;
1605                                 goto next;
1606                         case '!':
1607                                 state = PS_SAW_BANG;
1608                                 goto next;
1609                         }
1610                         /* FALLTHROUGH */
1611                 case PS_SAW_PERCENT:
1612  ps_common:
1613                         if (ksh_isalphx(c))
1614                                 state = PS_IDENT;
1615                         else if (ksh_isdigit(c))
1616                                 state = PS_NUMBER;
1617                         else if (ctype(c, C_VAR1))
1618                                 state = PS_VAR1;
1619                         else
1620                                 goto out;
1621                         break;
1622                 case PS_IDENT:
1623                         if (!ksh_isalnux(c)) {
1624                                 if (c == '[') {
1625                                         char *tmp, *p;
1626
1627                                         if (!arraysub(&tmp))
1628                                                 yyerror("missing ]");
1629                                         *wp++ = c;
1630                                         for (p = tmp; *p; ) {
1631                                                 Xcheck(*wsp, wp);
1632                                                 *wp++ = *p++;
1633                                         }
1634                                         afree(tmp, ATEMP);
1635                                         /* the ] */
1636                                         c = getsc();
1637                                 }
1638                                 goto out;
1639                         }
1640  next:
1641                         break;
1642                 case PS_NUMBER:
1643                         if (!ksh_isdigit(c))
1644                                 goto out;
1645                         break;
1646                 case PS_VAR1:
1647                         goto out;
1648                 }
1649                 Xcheck(*wsp, wp);
1650                 *wp++ = c;
1651         }
1652  out:
1653         /* end of variable part */
1654         *wp++ = '\0';
1655         ungetsc(c);
1656         return (wp);
1657 }
1658
1659 /*
1660  * Save an array subscript - returns true if matching bracket found, false
1661  * if eof or newline was found.
1662  * (Returned string double null terminated)
1663  */
1664 static bool
1665 arraysub(char **strp)
1666 {
1667         XString ws;
1668         char *wp, c;
1669         /* we are just past the initial [ */
1670         unsigned int depth = 1;
1671
1672         Xinit(ws, wp, 32, ATEMP);
1673
1674         do {
1675                 c = getsc();
1676                 Xcheck(ws, wp);
1677                 *wp++ = c;
1678                 if (c == '[')
1679                         depth++;
1680                 else if (c == ']')
1681                         depth--;
1682         } while (depth > 0 && c && c != '\n');
1683
1684         *wp++ = '\0';
1685         *strp = Xclose(ws, wp);
1686
1687         return (tobool(depth == 0));
1688 }
1689
1690 /* Unget a char: handles case when we are already at the start of the buffer */
1691 static void
1692 ungetsc(int c)
1693 {
1694         struct sretrace_info *rp = retrace_info;
1695
1696         if (backslash_skip)
1697                 backslash_skip--;
1698         /* Don't unget EOF... */
1699         if (source->str == null && c == '\0')
1700                 return;
1701         while (rp) {
1702                 if (Xlength(rp->xs, rp->xp))
1703                         rp->xp--;
1704                 rp = rp->next;
1705         }
1706         ungetsc_i(c);
1707 }
1708 static void
1709 ungetsc_i(int c)
1710 {
1711         if (source->str > source->start)
1712                 source->str--;
1713         else {
1714                 Source *s;
1715
1716                 s = pushs(SREREAD, source->areap);
1717                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1718                 s->start = s->str = s->ugbuf;
1719                 s->next = source;
1720                 source = s;
1721         }
1722 }
1723
1724
1725 /* Called to get a char that isn't a \newline sequence. */
1726 static int
1727 getsc_bn(void)
1728 {
1729         int c, c2;
1730
1731         if (ignore_backslash_newline)
1732                 return (o_getsc_u());
1733
1734         if (backslash_skip == 1) {
1735                 backslash_skip = 2;
1736                 return (o_getsc_u());
1737         }
1738
1739         backslash_skip = 0;
1740
1741         while (/* CONSTCOND */ 1) {
1742                 c = o_getsc_u();
1743                 if (c == '\\') {
1744                         if ((c2 = o_getsc_u()) == '\n')
1745                                 /* ignore the \newline; get the next char... */
1746                                 continue;
1747                         ungetsc_i(c2);
1748                         backslash_skip = 1;
1749                 }
1750                 return (c);
1751         }
1752 }
1753
1754 void
1755 yyskiputf8bom(void)
1756 {
1757         int c;
1758
1759         if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1760                 ungetsc_i(c);
1761                 return;
1762         }
1763         if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1764                 ungetsc_i(c);
1765                 ungetsc_i(0xEF);
1766                 return;
1767         }
1768         if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1769                 ungetsc_i(c);
1770                 ungetsc_i(0xBB);
1771                 ungetsc_i(0xEF);
1772                 return;
1773         }
1774         UTFMODE |= 8;
1775 }
1776
1777 static Lex_state *
1778 push_state_i(State_info *si, Lex_state *old_end)
1779 {
1780         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1781
1782         news[0].ls_base = old_end;
1783         si->base = &news[0];
1784         si->end = &news[STATE_BSIZE];
1785         return (&news[1]);
1786 }
1787
1788 static Lex_state *
1789 pop_state_i(State_info *si, Lex_state *old_end)
1790 {
1791         Lex_state *old_base = si->base;
1792
1793         si->base = old_end->ls_base - STATE_BSIZE;
1794         si->end = old_end->ls_base;
1795
1796         afree(old_base, ATEMP);
1797
1798         return (si->base + STATE_BSIZE - 1);
1799 }