1 /* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
6 * mirabilos <m@mirbsd.org>
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
26 __RCSID("$MirOS: src/bin/mksh/lex.c,v 1.250 2018/10/20 18:34:14 tg Exp $");
29 * states while lexing word
31 #define SBASE 0 /* outside any lexical constructs */
32 #define SWORD 1 /* implicit quoting for substitute() */
33 #define SLETPAREN 2 /* inside (( )), implicit quoting */
34 #define SSQUOTE 3 /* inside '' */
35 #define SDQUOTE 4 /* inside "" */
36 #define SEQUOTE 5 /* inside $'' */
37 #define SBRACE 6 /* inside ${} */
38 #define SQBRACE 7 /* inside "${}" */
39 #define SBQUOTE 8 /* inside `` */
40 #define SASPAREN 9 /* inside $(( )) */
41 #define SHEREDELIM 10 /* parsing << or <<- delimiter */
42 #define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
43 #define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
44 #define SADELIM 13 /* like SBASE, looking for delimiter */
45 #define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
46 #define STBRACEBOURNE 15 /* parsing ${...[#%]...} FSH */
47 #define SINVALID 255 /* invalid state */
49 struct sretrace_info {
50 struct sretrace_info *next;
56 * Structure to keep track of the lexing state and the various pieces of info
57 * needed for each particular state.
59 typedef struct lex_state {
61 /* point to the next state block */
62 struct lex_state *base;
63 /* marks start of state output in output string */
65 /* SBQUOTE: true if in double quotes: "`...`" */
66 /* SEQUOTE: got NUL, ignore rest of string */
68 /* SADELIM information */
70 /* character to search for */
71 unsigned char delimiter;
72 /* max. number of delimiters */
76 /* count open parentheses */
78 /* type of this state */
83 #define ls_base u.base
84 #define ls_start u.start
85 #define ls_bool u.abool
86 #define ls_adelim u.adelim
89 #define LS_HEREDOC BIT(0)
96 static void readhere(struct ioword *);
97 static void ungetsc(int);
98 static void ungetsc_i(int);
99 static int getsc_uu(void);
100 static void getsc_line(Source *);
101 static int getsc_bn(void);
102 static int getsc_i(void);
103 static char *get_brace_var(XString *, char *);
104 static bool arraysub(char **);
105 static void gethere(void);
106 static Lex_state *push_state_i(State_info *, Lex_state *);
107 static Lex_state *pop_state_i(State_info *, Lex_state *);
109 static int backslash_skip;
110 static int ignore_backslash_newline;
112 /* optimised getsc_bn() */
113 #define o_getsc() (*source->str != '\0' && *source->str != '\\' && \
114 !backslash_skip ? *source->str++ : getsc_bn())
115 /* optimised getsc_uu() */
116 #define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
119 #define o_getsc_r(carg) \
121 struct sretrace_info *rp = retrace_info; \
124 Xcheck(rp->xs, rp->xp); \
135 o_getsc_r((unsigned int)(unsigned char)o_getsc());
138 #if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
139 #define getsc() getsc_i()
141 static int getsc_r(int);
149 #define getsc() getsc_r((unsigned int)(unsigned char)o_getsc())
152 #define STATE_BSIZE 8
154 #define PUSH_STATE(s) do { \
155 uint8_t state_flags = statep->ls_flags; \
156 if (++statep == state_info.end) \
157 statep = push_state_i(&state_info, statep); \
158 state = statep->type = (s); \
159 statep->ls_flags = state_flags; \
160 } while (/* CONSTCOND */ 0)
162 #define POP_STATE() do { \
163 if (--statep == state_info.base) \
164 statep = pop_state_i(&state_info, statep); \
165 state = statep->type; \
166 } while (/* CONSTCOND */ 0)
168 #define PUSH_SRETRACE(s) do { \
169 struct sretrace_info *ri; \
172 statep->ls_start = Xsavepos(ws, wp); \
173 ri = alloc(sizeof(struct sretrace_info), ATEMP); \
174 Xinit(ri->xs, ri->xp, 64, ATEMP); \
175 ri->next = retrace_info; \
177 } while (/* CONSTCOND */ 0)
179 #define POP_SRETRACE() do { \
180 wp = Xrestpos(ws, wp, statep->ls_start); \
181 *retrace_info->xp = '\0'; \
182 sp = Xstring(retrace_info->xs, retrace_info->xp); \
183 dp = (void *)retrace_info; \
184 retrace_info = retrace_info->next; \
187 } while (/* CONSTCOND */ 0)
192 * tokens are not regular expressions, they are LL(1).
193 * for example, "${var:-${PWD}}", and "$(size $(whence ksh))".
194 * hence the state stack. Note "$(...)" are now parsed recursively.
200 Lex_state states[STATE_BSIZE], *statep, *s2, *base;
201 State_info state_info;
204 XString ws; /* expandable output word */
205 char *wp; /* output word pointer */
209 states[0].type = SINVALID;
210 states[0].ls_base = NULL;
212 state_info.base = states;
213 state_info.end = &state_info.base[STATE_BSIZE];
215 Xinit(ws, wp, 64, ATEMP);
218 ignore_backslash_newline = 0;
222 else if (cf & LETEXPR) {
223 /* enclose arguments in (double) quotes */
229 state = (cf & HEREDELIM) ? SHEREDELIM : SBASE;
232 } while (ctype(c, C_BLANK));
234 ignore_backslash_newline++;
237 } while (!ctype(c, C_NUL | C_LF));
238 ignore_backslash_newline--;
242 if (source->flags & SF_ALIAS) {
243 /* trailing ' ' in alias definition */
244 source->flags &= ~SF_ALIAS;
245 /* POSIX: trailing space only counts if parsing simple cmd */
246 if (!Flag(FPOSIX) || (cf & CMDWORD))
250 /* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
251 statep->type = state;
252 statep->ls_flags = (cf & HEREDOC) ? LS_HEREDOC : 0;
254 /* collect non-special or quoted characters to form word */
255 while (!((c = getsc()) == 0 ||
256 ((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
257 if (state == SBASE &&
258 subshell_nesting_type == ORD(/*{*/ '}') &&
259 (unsigned int)c == ORD(/*{*/ '}'))
260 /* possibly end ${ :;} */
265 if ((unsigned int)c == ORD('('))
267 else if ((unsigned int)c == ORD(')'))
269 else if (statep->nparen == 0 &&
270 ((unsigned int)c == ORD(/*{*/ '}') ||
271 c == (int)statep->ls_adelim.delimiter)) {
274 if ((unsigned int)c == ORD(/*{*/ '}') ||
275 --statep->ls_adelim.num == 0)
277 if ((unsigned int)c == ORD(/*{*/ '}'))
283 if ((unsigned int)c == ORD('[') && (cf & CMDASN)) {
286 if (is_wdvarname(Xstring(ws, wp), false)) {
289 if (arraysub(&tmp)) {
292 for (p = tmp; *p; ) {
306 Sbase1: /* includes *(...|...) pattern (*+?@!) */
307 if (ctype(c, C_PATMO)) {
309 if ((unsigned int)c2 == ORD('(' /*)*/)) {
312 PUSH_STATE(SPATTERN);
318 Sbase2: /* doesn't include *(...|...) pattern (*+?@!) */
323 /* trailing \ is lost */
329 open_ssquote_unless_heredoc:
330 if ((statep->ls_flags & LS_HEREDOC))
333 ignore_backslash_newline++;
343 * processing of dollar sign belongs into
344 * Subst, except for those which can open
345 * a string: $'…' and $"…"
368 if ((statep->ls_flags & LS_HEREDOC))
382 /* trailing \ is lost */
394 if ((unsigned int)c == ORD('(' /*)*/)) {
396 if ((unsigned int)c == ORD('(' /*)*/)) {
398 PUSH_SRETRACE(SASPAREN);
400 /*statep->ls_flags &= ~LS_HEREDOC;*/
402 *retrace_info->xp++ = '(';
415 } else if ((unsigned int)c == ORD('{' /*}*/)) {
416 if ((unsigned int)(c = getsc()) == ORD('|')) {
423 } else if (ctype(c, C_IFSWS)) {
426 * "command" substitution
434 wp = get_brace_var(&ws, wp);
436 /* allow :# and :% (ksh88 compat) */
437 if ((unsigned int)c == ORD(':')) {
441 if ((unsigned int)c == ORD(':')) {
447 /* perhaps unneeded? */
448 statep->ls_flags &= ~LS_HEREDOC;
450 statep->ls_adelim.delimiter = ':';
451 statep->ls_adelim.num = 1;
454 } else if (ctype(c, C_DIGIT | C_DOLAR | C_SPC) ||
457 /* substring subst. */
464 /* perhaps unneeded? */
465 statep->ls_flags &= ~LS_HEREDOC;
467 statep->ls_adelim.delimiter = ':';
468 statep->ls_adelim.num = 2;
472 } else if (c == '/') {
477 if ((unsigned int)(c = getsc()) == ORD('/')) {
483 /* perhaps unneeded? */
484 statep->ls_flags &= ~LS_HEREDOC;
486 statep->ls_adelim.delimiter = '/';
487 statep->ls_adelim.num = 1;
490 } else if (c == '@') {
493 if ((unsigned int)c2 == ORD('/')) {
495 goto parse_adelim_slash;
499 * If this is a trim operation,
500 * treat (,|,) specially in STBRACE.
502 if (ctype(c, C_SUB2)) {
505 PUSH_STATE(STBRACEBOURNE);
507 PUSH_STATE(STBRACEKORN);
508 /* single-quotes-in-heredoc-trim */
509 statep->ls_flags &= ~LS_HEREDOC;
512 if (state == SDQUOTE ||
517 /* here no LS_HEREDOC removal */
518 /* single-quotes-in-heredoc-braces */
520 } else if (ctype(c, C_ALPHX)) {
527 } while (ctype(c, C_ALNUX));
532 } else if (ctype(c, C_VAR1 | C_DIGIT)) {
551 * We need to know whether we are within double
552 * quotes in order to translate \" to " within
553 * "…`…\"…`…" because, unlike for COMSUBs, the
554 * outer double quoteing changes the backslash
555 * meaning for the inside. For more details:
556 * http://austingroupbugs.net/view.php?id=1015
558 statep->ls_bool = false;
560 base = state_info.base;
561 while (/* CONSTCOND */ 1) {
562 for (; s2 != base; s2--) {
563 if (s2->type == SDQUOTE) {
564 statep->ls_bool = true;
570 if (!(s2 = s2->ls_base))
572 base = s2-- - STATE_BSIZE;
590 if ((unsigned int)c == ORD('\'')) {
593 ignore_backslash_newline--;
594 } else if ((unsigned int)c == ORD('\\')) {
595 if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
598 statep->ls_bool = true;
599 if (!statep->ls_bool) {
602 if ((unsigned int)c2 < 0x100) {
606 cz = utf_wctomb(ts, c2 - 0x100);
615 } else if (!statep->ls_bool) {
622 if ((unsigned int)c == ORD('\'')) {
624 if ((statep->ls_flags & LS_HEREDOC) ||
628 ignore_backslash_newline--;
636 if ((unsigned int)c == ORD('"')) {
645 if ((unsigned int)c == ORD('('))
647 else if ((unsigned int)c == ORD(')')) {
649 if (statep->nparen == 1) {
653 if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
656 memcpy(wp, sp + 1, cz);
666 * mismatched parenthesis -
667 * assume we were really
668 * parsing a $(...) expression
681 /* reuse existing state machine */
685 if ((unsigned int)c == ORD('\\')) {
687 * perform POSIX "quote removal" if the back-
688 * slash is "special", i.e. same cases as the
689 * {case '\\':} in Subst: plus closing brace;
690 * in mksh code "quote removal" on '\c' means
691 * write QCHAR+c, otherwise CHAR+\+CHAR+c are
692 * emitted (in heredocquote:)
694 if ((unsigned int)(c = getsc()) == ORD('"') ||
695 (unsigned int)c == ORD('\\') ||
696 ctype(c, C_DOLAR | C_GRAVE) ||
697 (unsigned int)c == ORD(/*{*/ '}'))
704 if ((unsigned int)c == ORD('\''))
705 goto open_ssquote_unless_heredoc;
706 else if ((unsigned int)c == ORD('\\'))
709 if ((unsigned int)c == ORD('"'))
711 else if ((unsigned int)c == ORD('$'))
712 goto subst_dollar_ex;
713 else if ((unsigned int)c == ORD('`'))
715 else if ((unsigned int)c != ORD(/*{*/ '}'))
722 /* Same as SBASE, except (,|,) treated specially */
724 if ((unsigned int)c == ORD('|'))
726 else if ((unsigned int)c == ORD('(')) {
730 PUSH_STATE(SPATTERN);
731 } else /* FALLTHROUGH */
733 if ((unsigned int)c == ORD(/*{*/ '}')) {
742 if ((unsigned int)c == ORD('`')) {
745 } else if ((unsigned int)c == ORD('\\')) {
746 switch (c = getsc()) {
748 /* trailing \ is lost */
756 if (statep->ls_bool) {
774 /* LETEXPR: (( ... )) */
776 if ((unsigned int)c == ORD(/*(*/ ')')) {
777 if (statep->nparen > 0)
779 else if ((unsigned int)(c2 = getsc()) == ORD(/*(*/ ')')) {
789 * mismatched parenthesis -
790 * assume we were really
791 * parsing a (...) expression
794 sp = Xstring(ws, wp);
795 dp = wdstrip(sp + 1, WDS_TPUTS);
796 s = pushs(SREREAD, source->areap);
797 s->start = s->str = s->u.freeme = dp;
801 return (ORD('(' /*)*/));
803 } else if ((unsigned int)c == ORD('('))
805 * parentheses inside quotes and
806 * backslashes are lost, but AT&T ksh
807 * doesn't count them either
812 /* << or <<- delimiter */
815 * here delimiters need a special case since
816 * $ and `...` are not to be treated specially
821 /* trailing \ is lost */
827 goto open_ssquote_unless_heredoc;
829 if ((unsigned int)(c2 = getsc()) == ORD('\'')) {
832 ignore_backslash_newline++;
834 statep->ls_bool = false;
836 } else if ((unsigned int)c2 == ORD('"')) {
839 PUSH_SRETRACE(SHEREDQUOTE);
850 /* " in << or <<- delimiter */
852 if ((unsigned int)c != ORD('"'))
856 /* remove the trailing double quote */
858 /* store the quoted string */
860 XcheckN(ws, wp, (dp - sp) * 2);
862 while ((c = *dp++)) {
864 switch ((c = *dp++)) {
881 state = statep->type = SHEREDELIM;
884 /* in *(...|...) pattern (*+?@!) */
886 if ((unsigned int)c == ORD(/*(*/ ')')) {
889 } else if ((unsigned int)c == ORD('|')) {
891 } else if ((unsigned int)c == ORD('(')) {
895 PUSH_STATE(SPATTERN);
903 if (statep != &states[1])
904 /* XXX figure out what is missing */
905 yyerror("no closing quote");
907 /* This done to avoid tests for SHEREDELIM wherever SBASE tested */
908 if (state == SHEREDELIM)
911 dp = Xstring(ws, wp);
912 if (state == SBASE && (
913 (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
914 ctype(c, C_ANGLE)) && ((c2 = Xlength(ws, wp)) == 0 ||
915 (c2 == 2 && dp[0] == CHAR && ctype(dp[1], C_DIGIT)))) {
916 struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
918 iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
921 if ((unsigned int)(c2 = getsc()) != ORD('>')) {
926 iop->ioflag = IOBASH;
931 /* <<, >>, <> are ok, >< is not */
932 if (c == c2 || ((unsigned int)c == ORD('<') &&
933 (unsigned int)c2 == ORD('>'))) {
934 iop->ioflag |= c == c2 ?
935 ((unsigned int)c == ORD('>') ? IOCAT : IOHERE) : IORDWR;
936 if (iop->ioflag == IOHERE) {
937 if ((unsigned int)(c2 = getsc()) == ORD('-'))
938 iop->ioflag |= IOSKIP;
939 else if ((unsigned int)c2 == ORD('<'))
940 iop->ioflag |= IOHERESTR;
944 } else if ((unsigned int)c2 == ORD('&'))
945 iop->ioflag |= IODUP | ((unsigned int)c == ORD('<') ? IORDUP : 0);
947 iop->ioflag |= (unsigned int)c == ORD('>') ? IOWRITE : IOREAD;
948 if ((unsigned int)c == ORD('>') && (unsigned int)c2 == ORD('|'))
949 iop->ioflag |= IOCLOB;
965 if (wp == dp && state == SBASE) {
968 /* no word, process LEX1 character */
969 if (((unsigned int)c == ORD('|')) ||
970 ((unsigned int)c == ORD('&')) ||
971 ((unsigned int)c == ORD(';')) ||
972 ((unsigned int)c == ORD('(' /*)*/))) {
973 if ((c2 = getsc()) == c)
974 c = ((unsigned int)c == ORD(';')) ? BREAK :
975 ((unsigned int)c == ORD('|')) ? LOGOR :
976 ((unsigned int)c == ORD('&')) ? LOGAND :
977 /* (unsigned int)c == ORD('(' )) */ MDPAREN;
978 else if ((unsigned int)c == ORD('|') && (unsigned int)c2 == ORD('&'))
980 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('|'))
982 else if ((unsigned int)c == ORD(';') && (unsigned int)c2 == ORD('&'))
988 if ((unsigned int)(c2 = getsc()) == ORD('&'))
994 } else if ((unsigned int)c == ORD('\n')) {
1002 } else if (c == '\0' && !(cf & HEREDELIM)) {
1003 struct ioword **p = heres;
1006 if ((*p)->ioflag & IOHERESTR)
1009 /* ksh -c 'cat <<EOF' can cause this */
1011 evalstr((*p)->delim, 0));
1016 /* terminate word */
1018 yylval.cp = Xclose(ws, wp);
1019 if (state == SWORD || state == SLETPAREN
1023 /* unget terminator */
1027 * note: the alias-vs-function code below depends on several
1028 * interna: starting from here, source->str is not modified;
1029 * the way getsc() and ungetsc() operate; etc.
1032 /* copy word to unprefixed string ident */
1035 while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
1038 /* word is not unquoted, or space ran out */
1040 /* make sure the ident array stays NUL padded */
1041 memset(dp, 0, (ident + IDENT) - dp + 1);
1043 if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
1045 uint32_t h = hash(ident);
1047 if ((cf & KEYWORD) && (p = ktsearch(&keywords, ident, h)) &&
1048 (!(cf & ESACONLY) || p->val.i == ESAC ||
1049 (unsigned int)p->val.i == ORD(/*{*/ '}'))) {
1050 afree(yylval.cp, ATEMP);
1053 if ((cf & ALIAS) && (p = ktsearch(&aliases, ident, h)) &&
1054 (p->flag & ISSET)) {
1056 * this still points to the same character as the
1057 * ungetsc'd terminator from above
1059 const char *cp = source->str;
1061 /* prefer POSIX but not Korn functions over aliases */
1062 while (ctype(*cp, C_BLANK))
1064 * this is like getsc() without skipping
1065 * over Source boundaries (including not
1066 * parsing ungetsc'd characters that got
1067 * pushed into an SREREAD) which is what
1068 * we want here anyway: find out whether
1069 * the alias name is followed by a POSIX
1070 * function definition
1073 /* prefer functions over aliases */
1074 if (cp[0] != '(' || cp[1] != ')') {
1077 while (s && (s->flags & SF_HASALIAS))
1082 /* push alias expansion */
1083 s = pushs(SALIAS, source->areap);
1084 s->start = s->str = p->val.s;
1086 s->flags |= SF_HASALIAS;
1087 s->line = source->line;
1089 if (source->type == SEOF) {
1090 /* prevent infinite recursion at EOS */
1092 source->flags |= SF_HASALIAS;
1095 afree(yylval.cp, ATEMP);
1099 } else if (*ident == '\0') {
1100 /* retain typeset et al. even when quoted */
1101 struct tbl *tt = get_builtin((dp = wdstrip(yylval.cp, 0)));
1102 uint32_t flag = tt ? tt->flag : 0;
1104 if (flag & (DECL_UTIL | DECL_FWDR))
1105 strlcpy(ident, dp, sizeof(ident));
1117 for (p = heres; p < herep; p++)
1118 if (!((*p)->ioflag & IOHERESTR))
1124 * read "<<word" text into temp file
1128 readhere(struct ioword *iop)
1131 const char *eof, *eofp;
1136 eof = evalstr(iop->delim, 0);
1138 if (!(iop->ioflag & IOEVAL))
1139 ignore_backslash_newline++;
1141 Xinit(xs, xp, 256, ATEMP);
1144 /* beginning of line */
1146 xpos = Xsavepos(xs, xp);
1147 if (iop->ioflag & IOSKIP) {
1148 /* skip over leading tabs */
1149 while ((c = getsc()) == '\t')
1151 goto heredoc_parse_char;
1156 /* compare with here document marker */
1158 /* end of here document marker, what to do? */
1160 case ORD(/*(*/ ')'):
1161 if (!subshell_nesting_type)
1163 * not allowed outside $(...) or (...)
1167 /* allow $(...) or (...) to close here */
1172 * Allow EOF here to commands without trailing
1173 * newlines (mksh -c '...') will work as well.
1176 /* Newline terminates here document marker */
1177 goto heredoc_found_terminator;
1179 } else if ((unsigned int)c == ord(*eofp++))
1180 /* store; then read and compare next character */
1181 goto heredoc_store_and_loop;
1182 /* nope, mismatch; read until end of line */
1185 /* oops, reached EOF */
1186 yyerror(Tf_heredoc, eof);
1187 /* store character */
1190 /* read next character */
1193 /* we read a newline as last character */
1194 heredoc_store_and_loop:
1195 /* store character */
1199 goto heredoc_read_line;
1200 goto heredoc_read_char;
1202 heredoc_found_terminator:
1203 /* jump back to saved beginning of line */
1204 xp = Xrestpos(xs, xp, xpos);
1205 /* terminate, close and store */
1207 iop->heredoc = Xclose(xs, xp);
1209 if (!(iop->ioflag & IOEVAL))
1210 ignore_backslash_newline--;
1214 yyerror(const char *fmt, ...)
1218 /* pop aliases and re-reads */
1219 while (source->type == SALIAS || source->type == SREREAD)
1220 source = source->next;
1221 /* zap pending input */
1226 shf_vfprintf(shl_out, fmt, va);
1227 shf_putc('\n', shl_out);
1233 * input for yylex with alias expansion
1237 pushs(int type, Area *areap)
1241 s = alloc(sizeof(Source), areap);
1242 memset(s, 0, sizeof(Source));
1246 if (type == SFILE || type == SSTDIN)
1247 XinitN(s->xs, 256, s->areap);
1257 while ((c = ord(*s->str++)) == 0) {
1258 /* return 0 for EOF by default */
1274 case SSTRINGCMDLINE:
1278 s->start = s->str = *s->u.strv++;
1283 if (*s->u.strv == NULL) {
1284 s->start = s->str = "\n";
1287 s->start = s->str = T1space;
1293 if (s->flags & SF_ALIASEND) {
1294 /* pass on an unused SF_ALIAS flag */
1296 source->flags |= s->flags & SF_ALIAS;
1298 } else if (*s->u.tblp->val.s &&
1299 ctype((c = strnul(s->u.tblp->val.s)[-1]), C_SPACE)) {
1300 /* pop source stack */
1301 source = s = s->next;
1303 * Note that this alias ended with a
1304 * space, enabling alias expansion on
1305 * the following word.
1307 s->flags |= SF_ALIAS;
1310 * At this point, we need to keep the current
1311 * alias in the source list so recursive
1312 * aliases can be detected and we also need to
1313 * return the next character. Do this by
1314 * temporarily popping the alias to get the
1315 * next character and then put it back in the
1316 * source list with the SF_ALIASEND flag set.
1318 /* pop source stack */
1320 source->flags |= s->flags & SF_ALIAS;
1323 s->flags |= SF_ALIASEND;
1324 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1325 s->start = s->str = s->ugbuf;
1330 /* avoid reading EOF twice */
1338 if (s->start != s->ugbuf)
1340 afree(s->u.freeme, ATEMP);
1341 source = s = s->next;
1344 if (s->str == NULL) {
1346 s->start = s->str = null;
1349 if (s->flags & SF_ECHO) {
1350 shf_puts(s->str, shl_out);
1358 getsc_line(Source *s)
1360 char *xp = Xstring(s->xs, xp), *cp;
1361 bool interactive = Flag(FTALKING) && s->type == SSTDIN;
1362 bool have_tty = interactive && (s->flags & SF_TTY) && tty_hasstate;
1364 /* Done here to ensure nothing odd happens when a timeout occurs */
1365 XcheckN(s->xs, xp, LINE);
1367 s->start = s->str = xp;
1369 if (have_tty && ksh_tmout) {
1370 ksh_tmout_state = TMOUT_READING;
1374 if (cur_prompt == PS1)
1375 histsave(&s->line, NULL, HIST_FLUSH, true);
1378 #ifndef MKSH_NO_CMDLINE_EDITING
1383 Flag(FEMACS) || Flag(FGMACS))) {
1400 while (/* CONSTCOND */ 1) {
1401 char *p = shf_getse(xp, Xnleft(s->xs, xp), s->u.shf);
1403 if (!p && shf_error(s->u.shf) &&
1404 shf_errno(s->u.shf) == EINTR) {
1405 shf_clearerr(s->u.shf);
1410 if (!p || (xp = p, xp[-1] == '\n'))
1412 /* double buffer size */
1413 /* move past NUL so doubling works... */
1415 XcheckN(s->xs, xp, Xlength(s->xs, xp));
1416 /* ...and move back again */
1420 * flush any unwanted input so other programs/builtins
1421 * can read it. Not very optimal, but less error prone
1422 * than flushing else where, dealing with redirections,
1424 * TODO: reduce size of shf buffer (~128?) if SSTDIN
1426 if (s->type == SSTDIN)
1427 shf_flush(s->u.shf);
1430 * XXX: temporary kludge to restore source after a
1431 * trap may have been executed.
1434 if (have_tty && ksh_tmout) {
1435 ksh_tmout_state = TMOUT_EXECUTING;
1438 cp = Xstring(s->xs, xp);
1440 s->start = s->str = cp;
1441 strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
1442 /* Note: if input is all nulls, this is not eof */
1443 if (Xlength(s->xs, xp) == 0) {
1445 if (s->type == SFILE)
1446 shf_fdclose(s->u.shf);
1448 } else if (interactive && *s->str) {
1449 if (cur_prompt != PS1)
1450 histsave(&s->line, s->str, HIST_APPEND, true);
1451 else if (!ctype(*s->str, C_IFS | C_IFSWS))
1452 histsave(&s->line, s->str, HIST_QUEUE, true);
1453 #if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
1455 goto check_for_sole_return;
1456 } else if (interactive && cur_prompt == PS1) {
1457 check_for_sole_return:
1458 cp = Xstring(s->xs, xp);
1459 while (ctype(*cp, C_IFSWS))
1462 histsave(&s->line, NULL, HIST_FLUSH, true);
1468 set_prompt(PS2, NULL);
1472 set_prompt(int to, Source *s)
1474 cur_prompt = (uint8_t)to;
1480 * Substitute ! and !! here, before substitutions are done
1481 * so ! in expanded variables are not expanded.
1482 * NOTE: this is not what AT&T ksh does (it does it after
1483 * substitutions, POSIX doesn't say which is to be done.
1487 char * volatile ps1;
1491 ps1 = str_val(global("PS1"));
1492 shf = shf_sopen(NULL, strlen(ps1) * 2,
1493 SHF_WR | SHF_DYNAMIC, NULL);
1495 if (*ps1 != '!' || *++ps1 == '!')
1496 shf_putchar(*ps1++, shf);
1498 shf_fprintf(shf, Tf_lu, s ?
1499 (unsigned long)s->line + 1 : 0UL);
1500 ps1 = shf_sclose(shf);
1501 saved_lineno = current_lineno;
1503 current_lineno = s->line + 1;
1504 saved_atemp = ATEMP;
1506 if (kshsetjmp(e->jbuf)) {
1507 prompt = safe_prompt;
1509 * Don't print an error - assume it has already
1510 * been printed. Reason is we may have forked
1511 * to run a command and the child may be
1512 * unwinding its stack through this code as it
1516 char *cp = substitute(ps1, 0);
1517 strdupx(prompt, cp, saved_atemp);
1519 current_lineno = saved_lineno;
1523 /* command continuation */
1525 prompt = str_val(global("PS2"));
1531 pprompt(const char *cp, int ntruncate)
1534 bool doprint = (ntruncate != -1);
1535 bool indelimit = false;
1536 int columns = 0, lines = 0;
1539 * Undocumented AT&T ksh feature:
1540 * If the second char in the prompt string is \r then the first
1541 * char is taken to be a non-printing delimiter and any chars
1542 * between two instances of the delimiter are not considered to
1543 * be part of the prompt length
1545 if (*cp && cp[1] == '\r') {
1550 if (indelimit && *cp != delimiter)
1552 else if (ctype(*cp, C_CR | C_LF)) {
1553 lines += columns / x_cols + ((*cp == '\n') ? 1 : 0);
1555 } else if (*cp == '\t') {
1556 columns = (columns | 7) + 1;
1557 } else if (*cp == '\b') {
1560 } else if (*cp == delimiter)
1561 indelimit = !indelimit;
1562 else if (UTFMODE && (rtt2asc(*cp) > 0x7F)) {
1564 columns += utf_widthadj(cp, &cp2);
1565 if (doprint && (indelimit ||
1566 (ntruncate < (x_cols * lines + columns))))
1567 shf_write(cp, cp2 - cp, shl_out);
1568 cp = cp2 - /* loop increment */ 1;
1572 if (doprint && (*cp != delimiter) &&
1573 (indelimit || (ntruncate < (x_cols * lines + columns))))
1574 shf_putc(*cp, shl_out);
1578 return (x_cols * lines + columns);
1582 * Read the variable part of a ${...} expression (i.e. up to but not
1583 * including the :[-+?=#%] or close-brace).
1586 get_brace_var(XString *wsp, char *wp)
1590 PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
1591 PS_IDENT, PS_NUMBER, PS_VAR1
1592 } state = PS_INITIAL;
1594 while (/* CONSTCOND */ 1) {
1596 /* State machine to figure out where the variable part ends. */
1599 if (ctype(c, C_VAR1)) {
1604 if (ord(c2) != ORD(/*{*/ '}')) {
1622 state = PS_SAW_PERCENT;
1625 state = PS_SAW_HASH;
1628 state = PS_SAW_BANG;
1632 case PS_SAW_PERCENT:
1634 if (ctype(c, C_ALPHX))
1636 else if (ctype(c, C_DIGIT))
1638 else if (ctype(c, C_VAR1))
1644 if (!ctype(c, C_ALNUX)) {
1645 if (ord(c) == ORD('[')) {
1648 if (!arraysub(&tmp))
1649 yyerror("missing ]");
1665 if (!ctype(c, C_DIGIT))
1675 /* end of variable part */
1682 * Save an array subscript - returns true if matching bracket found, false
1683 * if eof or newline was found.
1684 * (Returned string double null terminated)
1687 arraysub(char **strp)
1691 /* we are just past the initial [ */
1692 unsigned int depth = 1;
1694 Xinit(ws, wp, 32, ATEMP);
1700 if (ord(c) == ORD('['))
1702 else if (ord(c) == ORD(']'))
1704 } while (depth > 0 && c && c != '\n');
1707 *strp = Xclose(ws, wp);
1709 return (tobool(depth == 0));
1712 /* Unget a char: handles case when we are already at the start of the buffer */
1716 struct sretrace_info *rp = retrace_info;
1720 /* Don't unget EOF... */
1721 if (source->str == null && c == '\0')
1724 if (Xlength(rp->xs, rp->xp))
1733 if (source->str > source->start)
1738 s = pushs(SREREAD, source->areap);
1739 s->ugbuf[0] = c; s->ugbuf[1] = '\0';
1740 s->start = s->str = s->ugbuf;
1747 /* Called to get a char that isn't a \newline sequence. */
1753 if (ignore_backslash_newline)
1754 return (o_getsc_u());
1756 if (backslash_skip == 1) {
1758 return (o_getsc_u());
1763 while (/* CONSTCOND */ 1) {
1766 if ((c2 = o_getsc_u()) == '\n')
1767 /* ignore the \newline; get the next char... */
1781 if (rtt2asc((c = o_getsc_u())) != 0xEF) {
1785 if (rtt2asc((c = o_getsc_u())) != 0xBB) {
1787 ungetsc_i(asc2rtt(0xEF));
1790 if (rtt2asc((c = o_getsc_u())) != 0xBF) {
1792 ungetsc_i(asc2rtt(0xBB));
1793 ungetsc_i(asc2rtt(0xEF));
1800 push_state_i(State_info *si, Lex_state *old_end)
1802 Lex_state *news = alloc2(STATE_BSIZE, sizeof(Lex_state), ATEMP);
1804 news[0].ls_base = old_end;
1805 si->base = &news[0];
1806 si->end = &news[STATE_BSIZE];
1811 pop_state_i(State_info *si, Lex_state *old_end)
1813 Lex_state *old_base = si->base;
1815 si->base = old_end->ls_base - STATE_BSIZE;
1816 si->end = old_end->ls_base;
1818 afree(old_base, ATEMP);
1820 return (si->base + STATE_BSIZE - 1);