OSDN Git Service

e5305ae71a12a1778a902480a8387cfd985acb8a
[android-x86/external-mksh.git] / src / lex.c
1 /*      $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $   */
2
3 /*-
4  * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5  *               2011, 2012, 2013, 2014, 2015, 2016
6  *      mirabilos <m@mirbsd.org>
7  *
8  * Provided that these terms and disclaimer and all copyright notices
9  * are retained or reproduced in an accompanying document, permission
10  * is granted to deal in this work without restriction, including un-
11  * limited rights to use, publicly perform, distribute, sell, modify,
12  * merge, give away, or sublicence.
13  *
14  * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15  * the utmost extent permitted by applicable law, neither express nor
16  * implied; without malicious intent or gross negligence. In no event
17  * may a licensor, author or contributor be held liable for indirect,
18  * direct, other damage, loss, or other issues arising in any way out
19  * of dealing in the work, even if advised of the possibility of such
20  * damage or existence of a defect, except proven that it results out
21  * of said person's immediate fault when using the work as intended.
22  */
23
24 #include "sh.h"
25
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.218 2016/01/20 21:34:12 tg Exp $");
27
28 /*
29  * states while lexing word
30  */
31 #define SBASE           0       /* outside any lexical constructs */
32 #define SWORD           1       /* implicit quoting for substitute() */
33 #define SLETPAREN       2       /* inside (( )), implicit quoting */
34 #define SSQUOTE         3       /* inside '' */
35 #define SDQUOTE         4       /* inside "" */
36 #define SEQUOTE         5       /* inside $'' */
37 #define SBRACE          6       /* inside ${} */
38 #define SQBRACE         7       /* inside "${}" */
39 #define SBQUOTE         8       /* inside `` */
40 #define SASPAREN        9       /* inside $(( )) */
41 #define SHEREDELIM      10      /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE     11      /* parsing " in << or <<- delimiter */
43 #define SPATTERN        12      /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM         13      /* like SBASE, looking for delimiter */
45 #define STBRACEKORN     14      /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE   15      /* parsing ${...[#%]...} FSH */
47 #define SINVALID        255     /* invalid state */
48
49 struct sretrace_info {
50         struct sretrace_info *next;
51         XString xs;
52         char *xp;
53 };
54
55 /*
56  * Structure to keep track of the lexing state and the various pieces of info
57  * needed for each particular state.
58  */
59 typedef struct lex_state {
60         union {
61                 /* point to the next state block */
62                 struct lex_state *base;
63                 /* marks start of state output in output string */
64                 size_t start;
65                 /* SBQUOTE: true if in double quotes: "`...`" */
66                 /* SEQUOTE: got NUL, ignore rest of string */
67                 bool abool;
68                 /* SADELIM information */
69                 struct {
70                         /* character to search for */
71                         unsigned char delimiter;
72                         /* max. number of delimiters */
73                         unsigned char num;
74                 } adelim;
75         } u;
76         /* count open parentheses */
77         short nparen;
78         /* type of this state */
79         uint8_t type;
80 } Lex_state;
81 #define ls_base         u.base
82 #define ls_start        u.start
83 #define ls_bool         u.abool
84 #define ls_adelim       u.adelim
85
86 typedef struct {
87         Lex_state *base;
88         Lex_state *end;
89 } State_info;
90
91 static void readhere(struct ioword *);
92 static void ungetsc(int);
93 static void ungetsc_i(int);
94 static int getsc_uu(void);
95 static void getsc_line(Source *);
96 static int getsc_bn(void);
97 static int getsc_i(void);
98 static char *get_brace_var(XString *, char *);
99 static bool arraysub(char **);
100 static void gethere(void);
101 static Lex_state *push_state_i(State_info *, Lex_state *);
102 static Lex_state *pop_state_i(State_info *, Lex_state *);
103
104 static int backslash_skip;
105 static int ignore_backslash_newline;
106
107 /* optimised getsc_bn() */
108 #define o_getsc()       (*source->str != '\0' && *source->str != '\\' && \
109                             !backslash_skip ? *source->str++ : getsc_bn())
110 /* optimised getsc_uu() */
111 #define o_getsc_u()     ((*source->str != '\0') ? *source->str++ : getsc_uu())
112
113 /* retrace helper */
114 #define o_getsc_r(carg)                                 \
115         int cev = (carg);                               \
116         struct sretrace_info *rp = retrace_info;        \
117                                                         \
118         while (rp) {                                    \
119                 Xcheck(rp->xs, rp->xp);                 \
120                 *rp->xp++ = cev;                        \
121                 rp = rp->next;                          \
122         }                                               \
123                                                         \
124         return (cev);
125
126 /* callback */
127 static int
128 getsc_i(void)
129 {
130         o_getsc_r(o_getsc());
131 }
132
133 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
134 #define getsc getsc_i
135 #else
136 static int getsc_r(int);
137
138 static int
139 getsc_r(int c)
140 {
141         o_getsc_r(c);
142 }
143
144 #define getsc()         getsc_r(o_getsc())
145 #endif
146
147 #define STATE_BSIZE     8
148
149 #define PUSH_STATE(s)   do {                                    \
150         if (++statep == state_info.end)                         \
151                 statep = push_state_i(&state_info, statep);     \
152         state = statep->type = (s);                             \
153 } while (/* CONSTCOND */ 0)
154
155 #define POP_STATE()     do {                                    \
156         if (--statep == state_info.base)                        \
157                 statep = pop_state_i(&state_info, statep);      \
158         state = statep->type;                                   \
159 } while (/* CONSTCOND */ 0)
160
161 #define PUSH_SRETRACE(s) do {                                   \
162         struct sretrace_info *ri;                               \
163                                                                 \
164         PUSH_STATE(s);                                          \
165         statep->ls_start = Xsavepos(ws, wp);                    \
166         ri = alloc(sizeof(struct sretrace_info), ATEMP);        \
167         Xinit(ri->xs, ri->xp, 64, ATEMP);                       \
168         ri->next = retrace_info;                                \
169         retrace_info = ri;                                      \
170 } while (/* CONSTCOND */ 0)
171
172 #define POP_SRETRACE()  do {                                    \
173         wp = Xrestpos(ws, wp, statep->ls_start);                \
174         *retrace_info->xp = '\0';                               \
175         sp = Xstring(retrace_info->xs, retrace_info->xp);       \
176         dp = (void *)retrace_info;                              \
177         retrace_info = retrace_info->next;                      \
178         afree(dp, ATEMP);                                       \
179         POP_STATE();                                            \
180 } while (/* CONSTCOND */ 0)
181
182 /**
183  * Lexical analyser
184  *
185  * tokens are not regular expressions, they are LL(1).
186  * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
187  * hence the state stack. Note "$(...)" are now parsed recursively.
188  */
189
190 int
191 yylex(int cf)
192 {
193         Lex_state states[STATE_BSIZE], *statep, *s2, *base;
194         State_info state_info;
195         int c, c2, state;
196         size_t cz;
197         XString ws;             /* expandable output word */
198         char *wp;               /* output word pointer */
199         char *sp, *dp;
200
201  Again:
202         states[0].type = SINVALID;
203         states[0].ls_base = NULL;
204         statep = &states[1];
205         state_info.base = states;
206         state_info.end = &state_info.base[STATE_BSIZE];
207
208         Xinit(ws, wp, 64, ATEMP);
209
210         backslash_skip = 0;
211         ignore_backslash_newline = 0;
212
213         if (cf & ONEWORD)
214                 state = SWORD;
215         else if (cf & LETEXPR) {
216                 /* enclose arguments in (double) quotes */
217                 *wp++ = OQUOTE;
218                 state = SLETPAREN;
219                 statep->nparen = 0;
220         } else {
221                 /* normal lexing */
222                 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
223                 while ((c = getsc()) == ' ' || c == '\t')
224                         ;
225                 if (c == '#') {
226                         ignore_backslash_newline++;
227                         while ((c = getsc()) != '\0' && c != '\n')
228                                 ;
229                         ignore_backslash_newline--;
230                 }
231                 ungetsc(c);
232         }
233         if (source->flags & SF_ALIAS) {
234                 /* trailing ' ' in alias definition */
235                 source->flags &= ~SF_ALIAS;
236                 /* POSIX: trailing space only counts if parsing simple cmd */
237                 if (!Flag(FPOSIX) || (cf & CMDWORD))
238                         cf |= ALIAS;
239         }
240
241         /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
242         statep->type = state;
243
244         /* collect non-special or quoted characters to form word */
245         while (!((c = getsc()) == 0 ||
246             ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
247                 if (state == SBASE &&
248                     subshell_nesting_type == /*{*/ '}' &&
249                     c == /*{*/ '}')
250                         /* possibly end ${ :;} */
251                         break;
252                 Xcheck(ws, wp);
253                 switch (state) {
254                 case SADELIM:
255                         if (c == '(')
256                                 statep->nparen++;
257                         else if (c == ')')
258                                 statep->nparen--;
259                         else if (statep->nparen == 0 && (c == /*{*/ '}' ||
260                             c == (int)statep->ls_adelim.delimiter)) {
261                                 *wp++ = ADELIM;
262                                 *wp++ = c;
263                                 if (c == /*{*/ '}' || --statep->ls_adelim.num == 0)
264                                         POP_STATE();
265                                 if (c == /*{*/ '}')
266                                         POP_STATE();
267                                 break;
268                         }
269                         /* FALLTHROUGH */
270                 case SBASE:
271                         if (c == '[' && (cf & CMDASN)) {
272                                 /* temporary */
273                                 *wp = EOS;
274                                 if (is_wdvarname(Xstring(ws, wp), false)) {
275                                         char *p, *tmp;
276
277                                         if (arraysub(&tmp)) {
278                                                 *wp++ = CHAR;
279                                                 *wp++ = c;
280                                                 for (p = tmp; *p; ) {
281                                                         Xcheck(ws, wp);
282                                                         *wp++ = CHAR;
283                                                         *wp++ = *p++;
284                                                 }
285                                                 afree(tmp, ATEMP);
286                                                 break;
287                                         } else {
288                                                 Source *s;
289
290                                                 s = pushs(SREREAD,
291                                                     source->areap);
292                                                 s->start = s->str =
293                                                     s->u.freeme = tmp;
294                                                 s->next = source;
295                                                 source = s;
296                                         }
297                                 }
298                                 *wp++ = CHAR;
299                                 *wp++ = c;
300                                 break;
301                         }
302                         /* FALLTHROUGH */
303  Sbase1:                /* includes *(...|...) pattern (*+?@!) */
304                         if (c == '*' || c == '@' || c == '+' || c == '?' ||
305                             c == '!') {
306                                 c2 = getsc();
307                                 if (c2 == '(' /*)*/ ) {
308                                         *wp++ = OPAT;
309                                         *wp++ = c;
310                                         PUSH_STATE(SPATTERN);
311                                         break;
312                                 }
313                                 ungetsc(c2);
314                         }
315                         /* FALLTHROUGH */
316  Sbase2:                /* doesn't include *(...|...) pattern (*+?@!) */
317                         switch (c) {
318                         case '\\':
319  getsc_qchar:
320                                 if ((c = getsc())) {
321                                         /* trailing \ is lost */
322                                         *wp++ = QCHAR;
323                                         *wp++ = c;
324                                 }
325                                 break;
326                         case '\'':
327  open_ssquote_unless_heredoc:
328                                 if ((cf & HEREDOC))
329                                         goto store_char;
330                                 *wp++ = OQUOTE;
331                                 ignore_backslash_newline++;
332                                 PUSH_STATE(SSQUOTE);
333                                 break;
334                         case '"':
335  open_sdquote:
336                                 *wp++ = OQUOTE;
337                                 PUSH_STATE(SDQUOTE);
338                                 break;
339                         case '$':
340                                 /*
341                                  * processing of dollar sign belongs into
342                                  * Subst, except for those which can open
343                                  * a string: $'…' and $"…"
344                                  */
345  subst_dollar_ex:
346                                 c = getsc();
347                                 switch (c) {
348                                 case '"':
349                                         goto open_sdquote;
350                                 case '\'':
351                                         goto open_sequote;
352                                 default:
353                                         goto SubstS;
354                                 }
355                         default:
356                                 goto Subst;
357                         }
358                         break;
359
360  Subst:
361                         switch (c) {
362                         case '\\':
363                                 c = getsc();
364                                 switch (c) {
365                                 case '"':
366                                         if ((cf & HEREDOC))
367                                                 goto heredocquote;
368                                         /* FALLTHROUGH */
369                                 case '\\':
370                                 case '$': case '`':
371  store_qchar:
372                                         *wp++ = QCHAR;
373                                         *wp++ = c;
374                                         break;
375                                 default:
376  heredocquote:
377                                         Xcheck(ws, wp);
378                                         if (c) {
379                                                 /* trailing \ is lost */
380                                                 *wp++ = CHAR;
381                                                 *wp++ = '\\';
382                                                 *wp++ = CHAR;
383                                                 *wp++ = c;
384                                         }
385                                         break;
386                                 }
387                                 break;
388                         case '$':
389                                 c = getsc();
390  SubstS:
391                                 if (c == '(') /*)*/ {
392                                         c = getsc();
393                                         if (c == '(') /*)*/ {
394                                                 *wp++ = EXPRSUB;
395                                                 PUSH_SRETRACE(SASPAREN);
396                                                 statep->nparen = 2;
397                                                 *retrace_info->xp++ = '(';
398                                         } else {
399                                                 ungetsc(c);
400  subst_command:
401                                                 c = COMSUB;
402  subst_command2:
403                                                 sp = yyrecursive(c);
404                                                 cz = strlen(sp) + 1;
405                                                 XcheckN(ws, wp, cz);
406                                                 *wp++ = c;
407                                                 memcpy(wp, sp, cz);
408                                                 wp += cz;
409                                         }
410                                 } else if (c == '{') /*}*/ {
411                                         if ((c = getsc()) == '|') {
412                                                 /*
413                                                  * non-subenvironment
414                                                  * value substitution
415                                                  */
416                                                 c = VALSUB;
417                                                 goto subst_command2;
418                                         } else if (ctype(c, C_IFSWS)) {
419                                                 /*
420                                                  * non-subenvironment
421                                                  * "command" substitution
422                                                  */
423                                                 c = FUNSUB;
424                                                 goto subst_command2;
425                                         }
426                                         ungetsc(c);
427                                         *wp++ = OSUBST;
428                                         *wp++ = '{'; /*}*/
429                                         wp = get_brace_var(&ws, wp);
430                                         c = getsc();
431                                         /* allow :# and :% (ksh88 compat) */
432                                         if (c == ':') {
433                                                 *wp++ = CHAR;
434                                                 *wp++ = c;
435                                                 c = getsc();
436                                                 if (c == ':') {
437                                                         *wp++ = CHAR;
438                                                         *wp++ = '0';
439                                                         *wp++ = ADELIM;
440                                                         *wp++ = ':';
441                                                         PUSH_STATE(SBRACE);
442                                                         PUSH_STATE(SADELIM);
443                                                         statep->ls_adelim.delimiter = ':';
444                                                         statep->ls_adelim.num = 1;
445                                                         statep->nparen = 0;
446                                                         break;
447                                                 } else if (ksh_isdigit(c) ||
448                                                     c == '('/*)*/ || c == ' ' ||
449                                                     /*XXX what else? */
450                                                     c == '$') {
451                                                         /* substring subst. */
452                                                         if (c != ' ') {
453                                                                 *wp++ = CHAR;
454                                                                 *wp++ = ' ';
455                                                         }
456                                                         ungetsc(c);
457                                                         PUSH_STATE(SBRACE);
458                                                         PUSH_STATE(SADELIM);
459                                                         statep->ls_adelim.delimiter = ':';
460                                                         statep->ls_adelim.num = 2;
461                                                         statep->nparen = 0;
462                                                         break;
463                                                 }
464                                         } else if (c == '/') {
465                                                 *wp++ = CHAR;
466                                                 *wp++ = c;
467                                                 if ((c = getsc()) == '/') {
468                                                         *wp++ = ADELIM;
469                                                         *wp++ = c;
470                                                 } else
471                                                         ungetsc(c);
472                                                 PUSH_STATE(SBRACE);
473                                                 PUSH_STATE(SADELIM);
474                                                 statep->ls_adelim.delimiter = '/';
475                                                 statep->ls_adelim.num = 1;
476                                                 statep->nparen = 0;
477                                                 break;
478                                         }
479                                         /*
480                                          * If this is a trim operation,
481                                          * treat (,|,) specially in STBRACE.
482                                          */
483                                         if (ctype(c, C_SUBOP2)) {
484                                                 ungetsc(c);
485                                                 if (Flag(FSH))
486                                                         PUSH_STATE(STBRACEBOURNE);
487                                                 else
488                                                         PUSH_STATE(STBRACEKORN);
489                                         } else {
490                                                 ungetsc(c);
491                                                 if (state == SDQUOTE ||
492                                                     state == SQBRACE)
493                                                         PUSH_STATE(SQBRACE);
494                                                 else
495                                                         PUSH_STATE(SBRACE);
496                                         }
497                                 } else if (ksh_isalphx(c)) {
498                                         *wp++ = OSUBST;
499                                         *wp++ = 'X';
500                                         do {
501                                                 Xcheck(ws, wp);
502                                                 *wp++ = c;
503                                                 c = getsc();
504                                         } while (ksh_isalnux(c));
505                                         *wp++ = '\0';
506                                         *wp++ = CSUBST;
507                                         *wp++ = 'X';
508                                         ungetsc(c);
509                                 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
510                                         Xcheck(ws, wp);
511                                         *wp++ = OSUBST;
512                                         *wp++ = 'X';
513                                         *wp++ = c;
514                                         *wp++ = '\0';
515                                         *wp++ = CSUBST;
516                                         *wp++ = 'X';
517                                 } else {
518                                         *wp++ = CHAR;
519                                         *wp++ = '$';
520                                         ungetsc(c);
521                                 }
522                                 break;
523                         case '`':
524  subst_gravis:
525                                 PUSH_STATE(SBQUOTE);
526                                 *wp++ = COMSUB;
527                                 /*
528                                  * We need to know whether we are within double
529                                  * quotes, since most shells translate \" to "
530                                  * within "…`…\"…`…". This is not done in POSIX
531                                  * mode (§2.2.3 Double-Quotes: “The backquote
532                                  * shall retain its special meaning introducing
533                                  * the other form of command substitution (see
534                                  * Command Substitution). The portion of the
535                                  * quoted string from the initial backquote and
536                                  * the characters up to the next backquote that
537                                  * is not preceded by a <backslash>, having
538                                  * escape characters removed, defines that
539                                  * command whose output replaces "`...`" when
540                                  * the word is expanded.”; §2.6.3 Command
541                                  * Substitution: “Within the backquoted style
542                                  * of command substitution, <backslash> shall
543                                  * retain its literal meaning, except when
544                                  * followed by: '$', '`', or <backslash>. The
545                                  * search for the matching backquote shall be
546                                  * satisfied by the first unquoted non-escaped
547                                  * backquote; during this search, if a
548                                  * non-escaped backquote is encountered[…],
549                                  * undefined results occur.”).
550                                  */
551                                 statep->ls_bool = false;
552 #ifdef austingroupbugs1015_is_still_not_resolved
553                                 if (Flag(FPOSIX))
554                                         break;
555 #endif
556                                 s2 = statep;
557                                 base = state_info.base;
558                                 while (/* CONSTCOND */ 1) {
559                                         for (; s2 != base; s2--) {
560                                                 if (s2->type == SDQUOTE) {
561                                                         statep->ls_bool = true;
562                                                         break;
563                                                 }
564                                         }
565                                         if (s2 != base)
566                                                 break;
567                                         if (!(s2 = s2->ls_base))
568                                                 break;
569                                         base = s2-- - STATE_BSIZE;
570                                 }
571                                 break;
572                         case QCHAR:
573                                 if (cf & LQCHAR) {
574                                         *wp++ = QCHAR;
575                                         *wp++ = getsc();
576                                         break;
577                                 }
578                                 /* FALLTHROUGH */
579                         default:
580  store_char:
581                                 *wp++ = CHAR;
582                                 *wp++ = c;
583                         }
584                         break;
585
586                 case SEQUOTE:
587                         if (c == '\'') {
588                                 POP_STATE();
589                                 *wp++ = CQUOTE;
590                                 ignore_backslash_newline--;
591                         } else if (c == '\\') {
592                                 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
593                                         c2 = getsc();
594                                 if (c2 == 0)
595                                         statep->ls_bool = true;
596                                 if (!statep->ls_bool) {
597                                         char ts[4];
598
599                                         if ((unsigned int)c2 < 0x100) {
600                                                 *wp++ = QCHAR;
601                                                 *wp++ = c2;
602                                         } else {
603                                                 cz = utf_wctomb(ts, c2 - 0x100);
604                                                 ts[cz] = 0;
605                                                 cz = 0;
606                                                 do {
607                                                         *wp++ = QCHAR;
608                                                         *wp++ = ts[cz];
609                                                 } while (ts[++cz]);
610                                         }
611                                 }
612                         } else if (!statep->ls_bool) {
613                                 *wp++ = QCHAR;
614                                 *wp++ = c;
615                         }
616                         break;
617
618                 case SSQUOTE:
619                         if (c == '\'') {
620                                 POP_STATE();
621                                 if ((cf & HEREDOC) || state == SQBRACE)
622                                         goto store_char;
623                                 *wp++ = CQUOTE;
624                                 ignore_backslash_newline--;
625                         } else {
626                                 *wp++ = QCHAR;
627                                 *wp++ = c;
628                         }
629                         break;
630
631                 case SDQUOTE:
632                         if (c == '"') {
633                                 POP_STATE();
634                                 *wp++ = CQUOTE;
635                         } else
636                                 goto Subst;
637                         break;
638
639                 /* $(( ... )) */
640                 case SASPAREN:
641                         if (c == '(')
642                                 statep->nparen++;
643                         else if (c == ')') {
644                                 statep->nparen--;
645                                 if (statep->nparen == 1) {
646                                         /* end of EXPRSUB */
647                                         POP_SRETRACE();
648
649                                         if ((c2 = getsc()) == /*(*/ ')') {
650                                                 cz = strlen(sp) - 2;
651                                                 XcheckN(ws, wp, cz);
652                                                 memcpy(wp, sp + 1, cz);
653                                                 wp += cz;
654                                                 afree(sp, ATEMP);
655                                                 *wp++ = '\0';
656                                                 break;
657                                         } else {
658                                                 Source *s;
659
660                                                 ungetsc(c2);
661                                                 /*
662                                                  * mismatched parenthesis -
663                                                  * assume we were really
664                                                  * parsing a $(...) expression
665                                                  */
666                                                 --wp;
667                                                 s = pushs(SREREAD,
668                                                     source->areap);
669                                                 s->start = s->str =
670                                                     s->u.freeme = sp;
671                                                 s->next = source;
672                                                 source = s;
673                                                 goto subst_command;
674                                         }
675                                 }
676                         }
677                         /* reuse existing state machine */
678                         goto Sbase2;
679
680                 case SQBRACE:
681                         if (c == '\\') {
682                                 /*
683                                  * perform POSIX "quote removal" if the back-
684                                  * slash is "special", i.e. same cases as the
685                                  * {case '\\':} in Subst: plus closing brace;
686                                  * in mksh code "quote removal" on '\c' means
687                                  * write QCHAR+c, otherwise CHAR+\+CHAR+c are
688                                  * emitted (in heredocquote:)
689                                  */
690                                 if ((c = getsc()) == '"' || c == '\\' ||
691                                     c == '$' || c == '`' || c == /*{*/'}')
692                                         goto store_qchar;
693                                 goto heredocquote;
694                         }
695                         goto common_SQBRACE;
696
697                 case SBRACE:
698                         if (c == '\'')
699                                 goto open_ssquote_unless_heredoc;
700                         else if (c == '\\')
701                                 goto getsc_qchar;
702  common_SQBRACE:
703                         if (c == '"')
704                                 goto open_sdquote;
705                         else if (c == '$')
706                                 goto subst_dollar_ex;
707                         else if (c == '`')
708                                 goto subst_gravis;
709                         else if (c != /*{*/ '}')
710                                 goto store_char;
711                         POP_STATE();
712                         *wp++ = CSUBST;
713                         *wp++ = /*{*/ '}';
714                         break;
715
716                 /* Same as SBASE, except (,|,) treated specially */
717                 case STBRACEKORN:
718                         if (c == '|')
719                                 *wp++ = SPAT;
720                         else if (c == '(') {
721                                 *wp++ = OPAT;
722                                 /* simile for @ */
723                                 *wp++ = ' ';
724                                 PUSH_STATE(SPATTERN);
725                         } else /* FALLTHROUGH */
726                 case STBRACEBOURNE:
727                           if (c == /*{*/ '}') {
728                                 POP_STATE();
729                                 *wp++ = CSUBST;
730                                 *wp++ = /*{*/ '}';
731                         } else
732                                 goto Sbase1;
733                         break;
734
735                 case SBQUOTE:
736                         if (c == '`') {
737                                 *wp++ = 0;
738                                 POP_STATE();
739                         } else if (c == '\\') {
740                                 switch (c = getsc()) {
741                                 case 0:
742                                         /* trailing \ is lost */
743                                         break;
744                                 case '$':
745                                 case '`':
746                                 case '\\':
747                                         *wp++ = c;
748                                         break;
749                                 case '"':
750                                         if (statep->ls_bool) {
751                                                 *wp++ = c;
752                                                 break;
753                                         }
754                                         /* FALLTHROUGH */
755                                 default:
756                                         *wp++ = '\\';
757                                         *wp++ = c;
758                                         break;
759                                 }
760                         } else
761                                 *wp++ = c;
762                         break;
763
764                 /* ONEWORD */
765                 case SWORD:
766                         goto Subst;
767
768                 /* LETEXPR: (( ... )) */
769                 case SLETPAREN:
770                         if (c == /*(*/ ')') {
771                                 if (statep->nparen > 0)
772                                         --statep->nparen;
773                                 else if ((c2 = getsc()) == /*(*/ ')') {
774                                         c = 0;
775                                         *wp++ = CQUOTE;
776                                         goto Done;
777                                 } else {
778                                         Source *s;
779
780                                         ungetsc(c2);
781                                         ungetsc(c);
782                                         /*
783                                          * mismatched parenthesis -
784                                          * assume we were really
785                                          * parsing a (...) expression
786                                          */
787                                         *wp = EOS;
788                                         sp = Xstring(ws, wp);
789                                         dp = wdstrip(sp + 1, WDS_TPUTS);
790                                         s = pushs(SREREAD, source->areap);
791                                         s->start = s->str = s->u.freeme = dp;
792                                         s->next = source;
793                                         source = s;
794                                         ungetsc('('/*)*/);
795                                         return ('('/*)*/);
796                                 }
797                         } else if (c == '(')
798                                 /*
799                                  * parentheses inside quotes and
800                                  * backslashes are lost, but AT&T ksh
801                                  * doesn't count them either
802                                  */
803                                 ++statep->nparen;
804                         goto Sbase2;
805
806                 /* << or <<- delimiter */
807                 case SHEREDELIM:
808                         /*
809                          * here delimiters need a special case since
810                          * $ and `...` are not to be treated specially
811                          */
812                         switch (c) {
813                         case '\\':
814                                 if ((c = getsc())) {
815                                         /* trailing \ is lost */
816                                         *wp++ = QCHAR;
817                                         *wp++ = c;
818                                 }
819                                 break;
820                         case '\'':
821                                 goto open_ssquote_unless_heredoc;
822                         case '$':
823                                 if ((c2 = getsc()) == '\'') {
824  open_sequote:
825                                         *wp++ = OQUOTE;
826                                         ignore_backslash_newline++;
827                                         PUSH_STATE(SEQUOTE);
828                                         statep->ls_bool = false;
829                                         break;
830                                 } else if (c2 == '"') {
831                                         /* FALLTHROUGH */
832                         case '"':
833                                         PUSH_SRETRACE(SHEREDQUOTE);
834                                         break;
835                                 }
836                                 ungetsc(c2);
837                                 /* FALLTHROUGH */
838                         default:
839                                 *wp++ = CHAR;
840                                 *wp++ = c;
841                         }
842                         break;
843
844                 /* " in << or <<- delimiter */
845                 case SHEREDQUOTE:
846                         if (c != '"')
847                                 goto Subst;
848                         POP_SRETRACE();
849                         dp = strnul(sp) - 1;
850                         /* remove the trailing double quote */
851                         *dp = '\0';
852                         /* store the quoted string */
853                         *wp++ = OQUOTE;
854                         XcheckN(ws, wp, (dp - sp) * 2);
855                         dp = sp;
856                         while ((c = *dp++)) {
857                                 if (c == '\\') {
858                                         switch ((c = *dp++)) {
859                                         case '\\':
860                                         case '"':
861                                         case '$':
862                                         case '`':
863                                                 break;
864                                         default:
865                                                 *wp++ = CHAR;
866                                                 *wp++ = '\\';
867                                                 break;
868                                         }
869                                 }
870                                 *wp++ = CHAR;
871                                 *wp++ = c;
872                         }
873                         afree(sp, ATEMP);
874                         *wp++ = CQUOTE;
875                         state = statep->type = SHEREDELIM;
876                         break;
877
878                 /* in *(...|...) pattern (*+?@!) */
879                 case SPATTERN:
880                         if (c == /*(*/ ')') {
881                                 *wp++ = CPAT;
882                                 POP_STATE();
883                         } else if (c == '|') {
884                                 *wp++ = SPAT;
885                         } else if (c == '(') {
886                                 *wp++ = OPAT;
887                                 /* simile for @ */
888                                 *wp++ = ' ';
889                                 PUSH_STATE(SPATTERN);
890                         } else
891                                 goto Sbase1;
892                         break;
893                 }
894         }
895  Done:
896         Xcheck(ws, wp);
897         if (statep != &states[1])
898                 /* XXX figure out what is missing */
899                 yyerror("no closing quote\n");
900
901         /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
902         if (state == SHEREDELIM)
903                 state = SBASE;
904
905         dp = Xstring(ws, wp);
906         if (state == SBASE && (
907 #ifndef MKSH_LEGACY_MODE
908             (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
909 #endif
910             c == '<' || c == '>')) {
911                 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
912
913                 if (Xlength(ws, wp) == 0)
914                         iop->unit = c == '<' ? 0 : 1;
915                 else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
916                         if (dp[c2] != CHAR)
917                                 goto no_iop;
918                         if (!ksh_isdigit(dp[c2 + 1]))
919                                 goto no_iop;
920                         iop->unit = iop->unit * 10 + ksh_numdig(dp[c2 + 1]);
921                         if (iop->unit >= FDBASE)
922                                 goto no_iop;
923                 }
924
925                 if (c == '&') {
926                         if ((c2 = getsc()) != '>') {
927                                 ungetsc(c2);
928                                 goto no_iop;
929                         }
930                         c = c2;
931                         iop->ioflag = IOBASH;
932                 } else
933                         iop->ioflag = 0;
934
935                 c2 = getsc();
936                 /* <<, >>, <> are ok, >< is not */
937                 if (c == c2 || (c == '<' && c2 == '>')) {
938                         iop->ioflag |= c == c2 ?
939                             (c == '>' ? IOCAT : IOHERE) : IORDWR;
940                         if (iop->ioflag == IOHERE) {
941                                 if ((c2 = getsc()) == '-')
942                                         iop->ioflag |= IOSKIP;
943                                 else if (c2 == '<')
944                                         iop->ioflag |= IOHERESTR;
945                                 else
946                                         ungetsc(c2);
947                         }
948                 } else if (c2 == '&')
949                         iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
950                 else {
951                         iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
952                         if (c == '>' && c2 == '|')
953                                 iop->ioflag |= IOCLOB;
954                         else
955                                 ungetsc(c2);
956                 }
957
958                 iop->ioname = NULL;
959                 iop->delim = NULL;
960                 iop->heredoc = NULL;
961                 /* free word */
962                 Xfree(ws, wp);
963                 yylval.iop = iop;
964                 return (REDIR);
965  no_iop:
966                 afree(iop, ATEMP);
967         }
968
969         if (wp == dp && state == SBASE) {
970                 /* free word */
971                 Xfree(ws, wp);
972                 /* no word, process LEX1 character */
973                 if ((c == '|') || (c == '&') || (c == ';') || (c == '('/*)*/)) {
974                         if ((c2 = getsc()) == c)
975                                 c = (c == ';') ? BREAK :
976                                     (c == '|') ? LOGOR :
977                                     (c == '&') ? LOGAND :
978                                     /* c == '(' ) */ MDPAREN;
979                         else if (c == '|' && c2 == '&')
980                                 c = COPROC;
981                         else if (c == ';' && c2 == '|')
982                                 c = BRKEV;
983                         else if (c == ';' && c2 == '&')
984                                 c = BRKFT;
985                         else
986                                 ungetsc(c2);
987 #ifndef MKSH_SMALL
988                         if (c == BREAK) {
989                                 if ((c2 = getsc()) == '&')
990                                         c = BRKEV;
991                                 else
992                                         ungetsc(c2);
993                         }
994 #endif
995                 } else if (c == '\n') {
996                         if (cf & HEREDELIM)
997                                 ungetsc(c);
998                         else {
999                                 gethere();
1000                                 if (cf & CONTIN)
1001                                         goto Again;
1002                         }
1003                 }
1004                 return (c);
1005         }
1006
1007         /* terminate word */
1008         *wp++ = EOS;
1009         yylval.cp = Xclose(ws, wp);
1010         if (state == SWORD || state == SLETPAREN
1011             /* XXX ONEWORD? */)
1012                 return (LWORD);
1013
1014         /* unget terminator */
1015         ungetsc(c);
1016
1017         /*
1018          * note: the alias-vs-function code below depends on several
1019          * interna: starting from here, source->str is not modified;
1020          * the way getsc() and ungetsc() operate; etc.
1021          */
1022
1023         /* copy word to unprefixed string ident */
1024         sp = yylval.cp;
1025         dp = ident;
1026         while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1027                 *dp++ = *sp++;
1028         if (c != EOS)
1029                 /* word is not unquoted */
1030                 dp = ident;
1031         /* make sure the ident array stays NUL padded */
1032         memset(dp, 0, (ident + IDENT) - dp + 1);
1033
1034         if (!(cf & (KEYWORD | ALIAS)))
1035                 return (LWORD);
1036
1037         if (*ident != '\0') {
1038                 struct tbl *p;
1039                 uint32_t h = hash(ident);
1040
1041                 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1042                     (!(cf & ESACONLY) || p->val.i == ESAC ||
1043                     p->val.i == /*{*/ '}')) {
1044                         afree(yylval.cp, ATEMP);
1045                         return (p->val.i);
1046                 }
1047                 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1048                     (p->flag & ISSET)) {
1049                         /*
1050                          * this still points to the same character as the
1051                          * ungetsc'd terminator from above
1052                          */
1053                         const char *cp = source->str;
1054
1055                         /* prefer POSIX but not Korn functions over aliases */
1056                         while (*cp == ' ' || *cp == '\t')
1057                                 /*
1058                                  * this is like getsc() without skipping
1059                                  * over Source boundaries (including not
1060                                  * parsing ungetsc'd characters that got
1061                                  * pushed into an SREREAD) which is what
1062                                  * we want here anyway: find out whether
1063                                  * the alias name is followed by a POSIX
1064                                  * function definition
1065                                  */
1066                                 ++cp;
1067                         /* prefer functions over aliases */
1068                         if (cp[0] != '(' || cp[1] != ')') {
1069                                 Source *s = source;
1070
1071                                 while (s && (s->flags & SF_HASALIAS))
1072                                         if (s->u.tblp == p)
1073                                                 return (LWORD);
1074                                         else
1075                                                 s = s->next;
1076                                 /* push alias expansion */
1077                                 s = pushs(SALIAS, source->areap);
1078                                 s->start = s->str = p->val.s;
1079                                 s->u.tblp = p;
1080                                 s->flags |= SF_HASALIAS;
1081                                 s->next = source;
1082                                 if (source->type == SEOF) {
1083                                         /* prevent infinite recursion at EOS */
1084                                         source->u.tblp = p;
1085                                         source->flags |= SF_HASALIAS;
1086                                 }
1087                                 source = s;
1088                                 afree(yylval.cp, ATEMP);
1089                                 goto Again;
1090                         }
1091                 }
1092         } else if (cf & ALIAS) {
1093                 /* retain typeset et al. even when quoted */
1094                 if (assign_command((dp = wdstrip(yylval.cp, 0)), true))
1095                         strlcpy(ident, dp, sizeof(ident));
1096                 afree(dp, ATEMP);
1097         }
1098
1099         return (LWORD);
1100 }
1101
1102 static void
1103 gethere(void)
1104 {
1105         struct ioword **p;
1106
1107         for (p = heres; p < herep; p++)
1108                 if (!((*p)->ioflag & IOHERESTR))
1109                         readhere(*p);
1110         herep = heres;
1111 }
1112
1113 /*
1114  * read "<<word" text into temp file
1115  */
1116
1117 static void
1118 readhere(struct ioword *iop)
1119 {
1120         int c;
1121         const char *eof, *eofp;
1122         XString xs;
1123         char *xp;
1124         size_t xpos;
1125
1126         eof = evalstr(iop->delim, 0);
1127
1128         if (!(iop->ioflag & IOEVAL))
1129                 ignore_backslash_newline++;
1130
1131         Xinit(xs, xp, 256, ATEMP);
1132
1133  heredoc_read_line:
1134         /* beginning of line */
1135         eofp = eof;
1136         xpos = Xsavepos(xs, xp);
1137         if (iop->ioflag & IOSKIP) {
1138                 /* skip over leading tabs */
1139                 while ((c = getsc()) == '\t')
1140                         ;       /* nothing */
1141                 goto heredoc_parse_char;
1142         }
1143  heredoc_read_char:
1144         c = getsc();
1145  heredoc_parse_char:
1146         /* compare with here document marker */
1147         if (!*eofp) {
1148                 /* end of here document marker, what to do? */
1149                 switch (c) {
1150                 case /*(*/ ')':
1151                         if (!subshell_nesting_type)
1152                                 /*-
1153                                  * not allowed outside $(...) or (...)
1154                                  * => mismatch
1155                                  */
1156                                 break;
1157                         /* allow $(...) or (...) to close here */
1158                         ungetsc(/*(*/ ')');
1159                         /* FALLTHROUGH */
1160                 case 0:
1161                         /*
1162                          * Allow EOF here to commands without trailing
1163                          * newlines (mksh -c '...') will work as well.
1164                          */
1165                 case '\n':
1166                         /* Newline terminates here document marker */
1167                         goto heredoc_found_terminator;
1168                 }
1169         } else if (c == *eofp++)
1170                 /* store; then read and compare next character */
1171                 goto heredoc_store_and_loop;
1172         /* nope, mismatch; read until end of line */
1173         while (c != '\n') {
1174                 if (!c)
1175                         /* oops, reached EOF */
1176                         yyerror("%s '%s' unclosed\n", "here document", eof);
1177                 /* store character */
1178                 Xcheck(xs, xp);
1179                 Xput(xs, xp, c);
1180                 /* read next character */
1181                 c = getsc();
1182         }
1183         /* we read a newline as last character */
1184  heredoc_store_and_loop:
1185         /* store character */
1186         Xcheck(xs, xp);
1187         Xput(xs, xp, c);
1188         if (c == '\n')
1189                 goto heredoc_read_line;
1190         goto heredoc_read_char;
1191
1192  heredoc_found_terminator:
1193         /* jump back to saved beginning of line */
1194         xp = Xrestpos(xs, xp, xpos);
1195         /* terminate, close and store */
1196         Xput(xs, xp, '\0');
1197         iop->heredoc = Xclose(xs, xp);
1198
1199         if (!(iop->ioflag & IOEVAL))
1200                 ignore_backslash_newline--;
1201 }
1202
1203 void
1204 yyerror(const char *fmt, ...)
1205 {
1206         va_list va;
1207
1208         /* pop aliases and re-reads */
1209         while (source->type == SALIAS || source->type == SREREAD)
1210                 source = source->next;
1211         /* zap pending input */
1212         source->str = null;
1213
1214         error_prefix(true);
1215         va_start(va, fmt);
1216         shf_vfprintf(shl_out, fmt, va);
1217         va_end(va);
1218         errorfz();
1219 }
1220
1221 /*
1222  * input for yylex with alias expansion
1223  */
1224
1225 Source *
1226 pushs(int type, Area *areap)
1227 {
1228         Source *s;
1229
1230         s = alloc(sizeof(Source), areap);
1231         memset(s, 0, sizeof(Source));
1232         s->type = type;
1233         s->str = null;
1234         s->areap = areap;
1235         if (type == SFILE || type == SSTDIN)
1236                 XinitN(s->xs, 256, s->areap);
1237         return (s);
1238 }
1239
1240 static int
1241 getsc_uu(void)
1242 {
1243         Source *s = source;
1244         int c;
1245
1246         while ((c = *s->str++) == 0) {
1247                 /* return 0 for EOF by default */
1248                 s->str = NULL;
1249                 switch (s->type) {
1250                 case SEOF:
1251                         s->str = null;
1252                         return (0);
1253
1254                 case SSTDIN:
1255                 case SFILE:
1256                         getsc_line(s);
1257                         break;
1258
1259                 case SWSTR:
1260                         break;
1261
1262                 case SSTRING:
1263                 case SSTRINGCMDLINE:
1264                         break;
1265
1266                 case SWORDS:
1267                         s->start = s->str = *s->u.strv++;
1268                         s->type = SWORDSEP;
1269                         break;
1270
1271                 case SWORDSEP:
1272                         if (*s->u.strv == NULL) {
1273                                 s->start = s->str = "\n";
1274                                 s->type = SEOF;
1275                         } else {
1276                                 s->start = s->str = " ";
1277                                 s->type = SWORDS;
1278                         }
1279                         break;
1280
1281                 case SALIAS:
1282                         if (s->flags & SF_ALIASEND) {
1283                                 /* pass on an unused SF_ALIAS flag */
1284                                 source = s->next;
1285                                 source->flags |= s->flags & SF_ALIAS;
1286                                 s = source;
1287                         } else if (*s->u.tblp->val.s &&
1288                             (c = strnul(s->u.tblp->val.s)[-1], ksh_isspace(c))) {
1289                                 /* pop source stack */
1290                                 source = s = s->next;
1291                                 /*
1292                                  * Note that this alias ended with a
1293                                  * space, enabling alias expansion on
1294                                  * the following word.
1295                                  */
1296                                 s->flags |= SF_ALIAS;
1297                         } else {
1298                                 /*
1299                                  * At this point, we need to keep the current
1300                                  * alias in the source list so recursive
1301                                  * aliases can be detected and we also need to
1302                                  * return the next character. Do this by
1303                                  * temporarily popping the alias to get the
1304                                  * next character and then put it back in the
1305                                  * source list with the SF_ALIASEND flag set.
1306                                  */
1307                                 /* pop source stack */
1308                                 source = s->next;
1309                                 source->flags |= s->flags & SF_ALIAS;
1310                                 c = getsc_uu();
1311                                 if (c) {
1312                                         s->flags |= SF_ALIASEND;
1313                                         s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1314                                         s->start = s->str = s->ugbuf;
1315                                         s->next = source;
1316                                         source = s;
1317                                 } else {
1318                                         s = source;
1319                                         /* avoid reading EOF twice */
1320                                         s->str = NULL;
1321                                         break;
1322                                 }
1323                         }
1324                         continue;
1325
1326                 case SREREAD:
1327                         if (s->start != s->ugbuf)
1328                                 /* yuck */
1329                                 afree(s->u.freeme, ATEMP);
1330                         source = s = s->next;
1331                         continue;
1332                 }
1333                 if (s->str == NULL) {
1334                         s->type = SEOF;
1335                         s->start = s->str = null;
1336                         return ('\0');
1337                 }
1338                 if (s->flags & SF_ECHO) {
1339                         shf_puts(s->str, shl_out);
1340                         shf_flush(shl_out);
1341                 }
1342         }
1343         return (c);
1344 }
1345
1346 static void
1347 getsc_line(Source *s)
1348 {
1349         char *xp = Xstring(s->xs, xp), *cp;
1350         bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1351         bool have_tty = tobool(interactive && (s->flags & SF_TTY));
1352
1353         /* Done here to ensure nothing odd happens when a timeout occurs */
1354         XcheckN(s->xs, xp, LINE);
1355         *xp = '\0';
1356         s->start = s->str = xp;
1357
1358         if (have_tty && ksh_tmout) {
1359                 ksh_tmout_state = TMOUT_READING;
1360                 alarm(ksh_tmout);
1361         }
1362         if (interactive)
1363                 change_winsz();
1364 #ifndef MKSH_NO_CMDLINE_EDITING
1365         if (have_tty && (
1366 #if !MKSH_S_NOVI
1367             Flag(FVI) ||
1368 #endif
1369             Flag(FEMACS) || Flag(FGMACS))) {
1370                 int nread;
1371
1372                 nread = x_read(xp);
1373                 if (nread < 0)
1374                         /* read error */
1375                         nread = 0;
1376                 xp[nread] = '\0';
1377                 xp += nread;
1378         } else
1379 #endif
1380           {
1381                 if (interactive)
1382                         pprompt(prompt, 0);
1383                 else
1384                         s->line++;
1385
1386                 while (/* CONSTCOND */ 1) {
1387                         char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1388
1389                         if (!p && shf_error(s->u.shf) &&
1390                             shf_errno(s->u.shf) == EINTR) {
1391                                 shf_clearerr(s->u.shf);
1392                                 if (trap)
1393                                         runtraps(0);
1394                                 continue;
1395                         }
1396                         if (!p || (xp = p, xp[-1] == '\n'))
1397                                 break;
1398                         /* double buffer size */
1399                         /* move past NUL so doubling works... */
1400                         xp++;
1401                         XcheckN(s->xs, xp, Xlength(s->xs, xp));
1402                         /* ...and move back again */
1403                         xp--;
1404                 }
1405                 /*
1406                  * flush any unwanted input so other programs/builtins
1407                  * can read it. Not very optimal, but less error prone
1408                  * than flushing else where, dealing with redirections,
1409                  * etc.
1410                  * TODO: reduce size of shf buffer (~128?) if SSTDIN
1411                  */
1412                 if (s->type == SSTDIN)
1413                         shf_flush(s->u.shf);
1414         }
1415         /*
1416          * XXX: temporary kludge to restore source after a
1417          * trap may have been executed.
1418          */
1419         source = s;
1420         if (have_tty && ksh_tmout) {
1421                 ksh_tmout_state = TMOUT_EXECUTING;
1422                 alarm(0);
1423         }
1424         cp = Xstring(s->xs, xp);
1425         rndpush(cp);
1426         s->start = s->str = cp;
1427         strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1428         /* Note: if input is all nulls, this is not eof */
1429         if (Xlength(s->xs, xp) == 0) {
1430                 /* EOF */
1431                 if (s->type == SFILE)
1432                         shf_fdclose(s->u.shf);
1433                 s->str = NULL;
1434         } else if (interactive && *s->str) {
1435                 if (cur_prompt != PS1)
1436                         histsave(&s->line, s->str, HIST_APPEND, true);
1437                 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1438                         histsave(&s->line, s->str, HIST_QUEUE, true);
1439 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1440                 else
1441                         goto check_for_sole_return;
1442         } else if (interactive && cur_prompt == PS1) {
1443  check_for_sole_return:
1444                 cp = Xstring(s->xs, xp);
1445                 while (*cp && ctype(*cp, C_IFSWS))
1446                         ++cp;
1447                 if (!*cp) {
1448                         histsave(&s->line, NULL, HIST_FLUSH, true);
1449                         histsync();
1450                 }
1451 #endif
1452         }
1453         if (interactive)
1454                 set_prompt(PS2, NULL);
1455 }
1456
1457 void
1458 set_prompt(int to, Source *s)
1459 {
1460         cur_prompt = (uint8_t)to;
1461
1462         switch (to) {
1463         /* command */
1464         case PS1:
1465                 /*
1466                  * Substitute ! and !! here, before substitutions are done
1467                  * so ! in expanded variables are not expanded.
1468                  * NOTE: this is not what AT&T ksh does (it does it after
1469                  * substitutions, POSIX doesn't say which is to be done.
1470                  */
1471                 {
1472                         struct shf *shf;
1473                         char * volatile ps1;
1474                         Area *saved_atemp;
1475                         int saved_lineno;
1476
1477                         ps1 = str_val(global("PS1"));
1478                         shf = shf_sopen(NULL, strlen(ps1) * 2,
1479                             SHF_WR | SHF_DYNAMIC, NULL);
1480                         while (*ps1)
1481                                 if (*ps1 != '!' || *++ps1 == '!')
1482                                         shf_putchar(*ps1++, shf);
1483                                 else
1484                                         shf_fprintf(shf, "%lu", s ?
1485                                             (unsigned long)s->line + 1 : 0UL);
1486                         ps1 = shf_sclose(shf);
1487                         saved_lineno = current_lineno;
1488                         if (s)
1489                                 current_lineno = s->line + 1;
1490                         saved_atemp = ATEMP;
1491                         newenv(E_ERRH);
1492                         if (kshsetjmp(e->jbuf)) {
1493                                 prompt = safe_prompt;
1494                                 /*
1495                                  * Don't print an error - assume it has already
1496                                  * been printed. Reason is we may have forked
1497                                  * to run a command and the child may be
1498                                  * unwinding its stack through this code as it
1499                                  * exits.
1500                                  */
1501                         } else {
1502                                 char *cp = substitute(ps1, 0);
1503                                 strdupx(prompt, cp, saved_atemp);
1504                         }
1505                         current_lineno = saved_lineno;
1506                         quitenv(NULL);
1507                 }
1508                 break;
1509         /* command continuation */
1510         case PS2:
1511                 prompt = str_val(global("PS2"));
1512                 break;
1513         }
1514 }
1515
1516 int
1517 pprompt(const char *cp, int ntruncate)
1518 {
1519         char delimiter = 0;
1520         bool doprint = (ntruncate != -1);
1521         bool indelimit = false;
1522         int columns = 0, lines = 0;
1523
1524         /*
1525          * Undocumented AT&T ksh feature:
1526          * If the second char in the prompt string is \r then the first
1527          * char is taken to be a non-printing delimiter and any chars
1528          * between two instances of the delimiter are not considered to
1529          * be part of the prompt length
1530          */
1531         if (*cp && cp[1] == '\r') {
1532                 delimiter = *cp;
1533                 cp += 2;
1534         }
1535         for (; *cp; cp++) {
1536                 if (indelimit && *cp != delimiter)
1537                         ;
1538                 else if (*cp == '\n' || *cp == '\r') {
1539                         lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1540                         columns = 0;
1541                 } else if (*cp == '\t') {
1542                         columns = (columns | 7) + 1;
1543                 } else if (*cp == '\b') {
1544                         if (columns > 0)
1545                                 columns--;
1546                 } else if (*cp == delimiter)
1547                         indelimit = !indelimit;
1548                 else if (UTFMODE && ((unsigned char)*cp > 0x7F)) {
1549                         const char *cp2;
1550                         columns += utf_widthadj(cp, &cp2);
1551                         if (doprint && (indelimit ||
1552                             (ntruncate < (x_cols * lines + columns))))
1553                                 shf_write(cp, cp2 - cp, shl_out);
1554                         cp = cp2 - /* loop increment */ 1;
1555                         continue;
1556                 } else
1557                         columns++;
1558                 if (doprint && (*cp != delimiter) &&
1559                     (indelimit || (ntruncate < (x_cols * lines + columns))))
1560                         shf_putc(*cp, shl_out);
1561         }
1562         if (doprint)
1563                 shf_flush(shl_out);
1564         return (x_cols * lines + columns);
1565 }
1566
1567 /*
1568  * Read the variable part of a ${...} expression (i.e. up to but not
1569  * including the :[-+?=#%] or close-brace).
1570  */
1571 static char *
1572 get_brace_var(XString *wsp, char *wp)
1573 {
1574         char c;
1575         enum parse_state {
1576                 PS_INITIAL, PS_SAW_HASH, PS_IDENT,
1577                 PS_NUMBER, PS_VAR1
1578         } state = PS_INITIAL;
1579
1580         while (/* CONSTCOND */ 1) {
1581                 c = getsc();
1582                 /* State machine to figure out where the variable part ends. */
1583                 switch (state) {
1584                 case PS_INITIAL:
1585                         if (c == '#' || c == '!' || c == '%') {
1586                                 state = PS_SAW_HASH;
1587                                 break;
1588                         }
1589                         /* FALLTHROUGH */
1590                 case PS_SAW_HASH:
1591                         if (ksh_isalphx(c))
1592                                 state = PS_IDENT;
1593                         else if (ksh_isdigit(c))
1594                                 state = PS_NUMBER;
1595                         else if (c == '#') {
1596                                 if (state == PS_SAW_HASH) {
1597                                         char c2;
1598
1599                                         c2 = getsc();
1600                                         ungetsc(c2);
1601                                         if (c2 != /*{*/ '}') {
1602                                                 ungetsc(c);
1603                                                 goto out;
1604                                         }
1605                                 }
1606                                 state = PS_VAR1;
1607                         } else if (ctype(c, C_VAR1))
1608                                 state = PS_VAR1;
1609                         else
1610                                 goto out;
1611                         break;
1612                 case PS_IDENT:
1613                         if (!ksh_isalnux(c)) {
1614                                 if (c == '[') {
1615                                         char *tmp, *p;
1616
1617                                         if (!arraysub(&tmp))
1618                                                 yyerror("missing ]\n");
1619                                         *wp++ = c;
1620                                         for (p = tmp; *p; ) {
1621                                                 Xcheck(*wsp, wp);
1622                                                 *wp++ = *p++;
1623                                         }
1624                                         afree(tmp, ATEMP);
1625                                         /* the ] */
1626                                         c = getsc();
1627                                 }
1628                                 goto out;
1629                         }
1630                         break;
1631                 case PS_NUMBER:
1632                         if (!ksh_isdigit(c))
1633                                 goto out;
1634                         break;
1635                 case PS_VAR1:
1636                         goto out;
1637                 }
1638                 Xcheck(*wsp, wp);
1639                 *wp++ = c;
1640         }
1641  out:
1642         /* end of variable part */
1643         *wp++ = '\0';
1644         ungetsc(c);
1645         return (wp);
1646 }
1647
1648 /*
1649  * Save an array subscript - returns true if matching bracket found, false
1650  * if eof or newline was found.
1651  * (Returned string double null terminated)
1652  */
1653 static bool
1654 arraysub(char **strp)
1655 {
1656         XString ws;
1657         char *wp, c;
1658         /* we are just past the initial [ */
1659         unsigned int depth = 1;
1660
1661         Xinit(ws, wp, 32, ATEMP);
1662
1663         do {
1664                 c = getsc();
1665                 Xcheck(ws, wp);
1666                 *wp++ = c;
1667                 if (c == '[')
1668                         depth++;
1669                 else if (c == ']')
1670                         depth--;
1671         } while (depth > 0 && c && c != '\n');
1672
1673         *wp++ = '\0';
1674         *strp = Xclose(ws, wp);
1675
1676         return (tobool(depth == 0));
1677 }
1678
1679 /* Unget a char: handles case when we are already at the start of the buffer */
1680 static void
1681 ungetsc(int c)
1682 {
1683         struct sretrace_info *rp = retrace_info;
1684
1685         if (backslash_skip)
1686                 backslash_skip--;
1687         /* Don't unget EOF... */
1688         if (source->str == null && c == '\0')
1689                 return;
1690         while (rp) {
1691                 if (Xlength(rp->xs, rp->xp))
1692                         rp->xp--;
1693                 rp = rp->next;
1694         }
1695         ungetsc_i(c);
1696 }
1697 static void
1698 ungetsc_i(int c)
1699 {
1700         if (source->str > source->start)
1701                 source->str--;
1702         else {
1703                 Source *s;
1704
1705                 s = pushs(SREREAD, source->areap);
1706                 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1707                 s->start = s->str = s->ugbuf;
1708                 s->next = source;
1709                 source = s;
1710         }
1711 }
1712
1713
1714 /* Called to get a char that isn't a \newline sequence. */
1715 static int
1716 getsc_bn(void)
1717 {
1718         int c, c2;
1719
1720         if (ignore_backslash_newline)
1721                 return (o_getsc_u());
1722
1723         if (backslash_skip == 1) {
1724                 backslash_skip = 2;
1725                 return (o_getsc_u());
1726         }
1727
1728         backslash_skip = 0;
1729
1730         while (/* CONSTCOND */ 1) {
1731                 c = o_getsc_u();
1732                 if (c == '\\') {
1733                         if ((c2 = o_getsc_u()) == '\n')
1734                                 /* ignore the \newline; get the next char... */
1735                                 continue;
1736                         ungetsc_i(c2);
1737                         backslash_skip = 1;
1738                 }
1739                 return (c);
1740         }
1741 }
1742
1743 void
1744 yyskiputf8bom(void)
1745 {
1746         int c;
1747
1748         if ((unsigned char)(c = o_getsc_u()) != 0xEF) {
1749                 ungetsc_i(c);
1750                 return;
1751         }
1752         if ((unsigned char)(c = o_getsc_u()) != 0xBB) {
1753                 ungetsc_i(c);
1754                 ungetsc_i(0xEF);
1755                 return;
1756         }
1757         if ((unsigned char)(c = o_getsc_u()) != 0xBF) {
1758                 ungetsc_i(c);
1759                 ungetsc_i(0xBB);
1760                 ungetsc_i(0xEF);
1761                 return;
1762         }
1763         UTFMODE |= 8;
1764 }
1765
1766 static Lex_state *
1767 push_state_i(State_info *si, Lex_state *old_end)
1768 {
1769         Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1770
1771         news[0].ls_base = old_end;
1772         si->base = &news[0];
1773         si->end = &news[STATE_BSIZE];
1774         return (&news[1]);
1775 }
1776
1777 static Lex_state *
1778 pop_state_i(State_info *si, Lex_state *old_end)
1779 {
1780         Lex_state *old_base = si->base;
1781
1782         si->base = old_end->ls_base - STATE_BSIZE;
1783         si->end = old_end->ls_base;
1784
1785         afree(old_base, ATEMP);
1786
1787         return (si->base + STATE_BSIZE - 1);
1788 }