-/* $OpenBSD: lex.c,v 1.46 2013/01/20 14:47:46 stsp Exp $ */
+/* $OpenBSD: lex.c,v 1.51 2015/09/10 22:48:58 nicm Exp $ */
/*-
* Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
- * 2011, 2012, 2013
- * Thorsten Glaser <tg@mirbsd.org>
+ * 2011, 2012, 2013, 2014, 2015, 2016
+ * mirabilos <m@mirbsd.org>
*
* Provided that these terms and disclaimer and all copyright notices
* are retained or reproduced in an accompanying document, permission
#include "sh.h"
-__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.182 2013/02/19 18:45:20 tg Exp $");
+__RCSID("$MirOS: src/bin/mksh/lex.c,v 1.228 2016/08/01 21:38:03 tg Exp $");
/*
* states while lexing word
#define SQBRACE 7 /* inside "${}" */
#define SBQUOTE 8 /* inside `` */
#define SASPAREN 9 /* inside $(( )) */
-#define SHEREDELIM 10 /* parsing <<,<<-,<<< delimiter */
-#define SHEREDQUOTE 11 /* parsing " in <<,<<-,<<< delimiter */
+#define SHEREDELIM 10 /* parsing << or <<- delimiter */
+#define SHEREDQUOTE 11 /* parsing " in << or <<- delimiter */
#define SPATTERN 12 /* parsing *(...|...) pattern (*+?@!) */
#define SADELIM 13 /* like SBASE, looking for delimiter */
#define STBRACEKORN 14 /* parsing ${...[#%]...} !FSH */
/* point to the next state block */
struct lex_state *base;
/* marks start of state output in output string */
- int start;
+ size_t start;
/* SBQUOTE: true if in double quotes: "`...`" */
/* SEQUOTE: got NUL, ignore rest of string */
bool abool;
static int getsc_uu(void);
static void getsc_line(Source *);
static int getsc_bn(void);
-static int s_get(void);
-static void s_put(int);
+static int getsc_i(void);
static char *get_brace_var(XString *, char *);
static bool arraysub(char **);
-static void gethere(bool);
+static void gethere(void);
static Lex_state *push_state_i(State_info *, Lex_state *);
static Lex_state *pop_state_i(State_info *, Lex_state *);
-static int dopprompt(const char *, int, bool);
-
static int backslash_skip;
static int ignore_backslash_newline;
#define o_getsc_u() ((*source->str != '\0') ? *source->str++ : getsc_uu())
/* retrace helper */
-#define o_getsc_r(carg) { \
+#define o_getsc_r(carg) \
int cev = (carg); \
struct sretrace_info *rp = retrace_info; \
\
rp = rp->next; \
} \
\
- return (cev); \
-}
-
-#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
-static int getsc(void);
+ return (cev);
+/* callback */
static int
-getsc(void)
+getsc_i(void)
{
o_getsc_r(o_getsc());
}
+
+#if defined(MKSH_SMALL) && !defined(MKSH_SMALL_BUT_FAST)
+#define getsc getsc_i
#else
static int getsc_r(int);
state = statep->type; \
} while (/* CONSTCOND */ 0)
-#define PUSH_SRETRACE() do { \
+#define PUSH_SRETRACE(s) do { \
struct sretrace_info *ri; \
\
+ PUSH_STATE(s); \
statep->ls_start = Xsavepos(ws, wp); \
ri = alloc(sizeof(struct sretrace_info), ATEMP); \
Xinit(ri->xs, ri->xp, 64, ATEMP); \
dp = (void *)retrace_info; \
retrace_info = retrace_info->next; \
afree(dp, ATEMP); \
+ POP_STATE(); \
} while (/* CONSTCOND */ 0)
/**
if (source->flags & SF_ALIAS) {
/* trailing ' ' in alias definition */
source->flags &= ~SF_ALIAS;
- cf |= ALIAS;
+ /* POSIX: trailing space only counts if parsing simple cmd */
+ if (!Flag(FPOSIX) || (cf & CMDWORD))
+ cf |= ALIAS;
}
/* Initial state: one of SWORD SLETPAREN SHEREDELIM SBASE */
statep->type = state;
- /* check for here string */
- if (state == SHEREDELIM) {
- c = getsc();
- if (c == '<') {
- state = SHEREDELIM;
- while ((c = getsc()) == ' ' || c == '\t')
- ;
- ungetsc(c);
- c = '<';
- goto accept_nonword;
- }
- ungetsc(c);
- }
-
/* collect non-special or quoted characters to form word */
while (!((c = getsc()) == 0 ||
((state == SBASE || state == SHEREDELIM) && ctype(c, C_LEX1)))) {
c == /*{*/ '}')
/* possibly end ${ :;} */
break;
- accept_nonword:
Xcheck(ws, wp);
switch (state) {
case SADELIM:
}
/* FALLTHROUGH */
case SBASE:
- if (c == '[' && (cf & (VARASN|ARRAYVAR))) {
+ if (c == '[' && (cf & CMDASN)) {
/* temporary */
*wp = EOS;
if (is_wdvarname(Xstring(ws, wp), false)) {
}
break;
case '\'':
- open_ssquote:
+ open_ssquote_unless_heredoc:
+ if ((cf & HEREDOC))
+ goto store_char;
*wp++ = OQUOTE;
ignore_backslash_newline++;
PUSH_STATE(SSQUOTE);
c = getsc();
if (c == '(') /*)*/ {
*wp++ = EXPRSUB;
- PUSH_STATE(SASPAREN);
+ PUSH_SRETRACE(SASPAREN);
statep->nparen = 2;
- PUSH_SRETRACE();
*retrace_info->xp++ = '(';
} else {
ungetsc(c);
wp += cz;
}
} else if (c == '{') /*}*/ {
- c = getsc();
- if (ctype(c, C_IFSWS)) {
+ if ((c = getsc()) == '|') {
+ /*
+ * non-subenvironment
+ * value substitution
+ */
+ c = VALSUB;
+ goto subst_command2;
+ } else if (ctype(c, C_IFSWS)) {
/*
* non-subenvironment
* "command" substitution
break;
}
} else if (c == '/') {
+ c2 = ADELIM;
+ parse_adelim_slash:
*wp++ = CHAR;
*wp++ = c;
if ((c = getsc()) == '/') {
- *wp++ = ADELIM;
+ *wp++ = c2;
*wp++ = c;
} else
ungetsc(c);
statep->ls_adelim.num = 1;
statep->nparen = 0;
break;
+ } else if (c == '@') {
+ c2 = getsc();
+ ungetsc(c2);
+ if (c2 == '/') {
+ c2 = CHAR;
+ goto parse_adelim_slash;
+ }
}
/*
* If this is a trim operation,
PUSH_STATE(STBRACEKORN);
} else {
ungetsc(c);
- if (state == SDQUOTE)
+ if (state == SDQUOTE ||
+ state == SQBRACE)
PUSH_STATE(SQBRACE);
else
PUSH_STATE(SBRACE);
PUSH_STATE(SBQUOTE);
*wp++ = COMSUB;
/*
- * Need to know if we are inside double quotes
- * since sh/AT&T-ksh translate the \" to " in
- * "`...\"...`".
- * This is not done in POSIX mode (section
- * 3.2.3, Double Quotes: "The backquote shall
- * retain its special meaning introducing the
- * other form of command substitution (see
- * 3.6.3). The portion of the quoted string
- * from the initial backquote and the
- * characters up to the next backquote that
- * is not preceded by a backslash (having
- * escape characters removed) defines that
- * command whose output replaces `...` when
- * the word is expanded."
- * Section 3.6.3, Command Substitution:
- * "Within the backquoted style of command
- * substitution, backslash shall retain its
- * literal meaning, except when followed by
- * $ ` \.").
+ * We need to know whether we are within double
+ * quotes in order to translate \" to " within
+ * "…`…\"…`…" because, unlike for COMSUBs, the
+ * outer double quoteing changes the backslash
+ * meaning for the inside. For more details:
+ * http://austingroupbugs.net/view.php?id=1015
*/
statep->ls_bool = false;
s2 = statep;
*wp++ = CQUOTE;
ignore_backslash_newline--;
} else if (c == '\\') {
- if ((c2 = unbksl(true, s_get, s_put)) == -1)
- c2 = s_get();
+ if ((c2 = unbksl(true, getsc_i, ungetsc)) == -1)
+ c2 = getsc();
if (c2 == 0)
statep->ls_bool = true;
if (!statep->ls_bool) {
} else {
cz = utf_wctomb(ts, c2 - 0x100);
ts[cz] = 0;
- for (cz = 0; ts[cz]; ++cz) {
+ cz = 0;
+ do {
*wp++ = QCHAR;
*wp++ = ts[cz];
- }
+ } while (ts[++cz]);
}
}
} else if (!statep->ls_bool) {
case SSQUOTE:
if (c == '\'') {
POP_STATE();
+ if ((cf & HEREDOC) || state == SQBRACE)
+ goto store_char;
*wp++ = CQUOTE;
ignore_backslash_newline--;
} else {
if (statep->nparen == 1) {
/* end of EXPRSUB */
POP_SRETRACE();
- POP_STATE();
if ((c2 = getsc()) == /*(*/ ')') {
cz = strlen(sp) - 2;
case SBRACE:
if (c == '\'')
- goto open_ssquote;
+ goto open_ssquote_unless_heredoc;
else if (c == '\\')
goto getsc_qchar;
common_SQBRACE:
case 0:
/* trailing \ is lost */
break;
+ case '$':
+ case '`':
case '\\':
- case '$': case '`':
*wp++ = c;
break;
case '"':
Source *s;
ungetsc(c2);
+ ungetsc(c);
/*
* mismatched parenthesis -
* assume we were really
*/
*wp = EOS;
sp = Xstring(ws, wp);
- dp = wdstrip(sp, WDS_KEEPQ);
+ dp = wdstrip(sp + 1, WDS_TPUTS);
s = pushs(SREREAD, source->areap);
s->start = s->str = s->u.freeme = dp;
s->next = source;
source = s;
+ ungetsc('('/*)*/);
return ('('/*)*/);
}
} else if (c == '(')
++statep->nparen;
goto Sbase2;
- /* <<, <<-, <<< delimiter */
+ /* << or <<- delimiter */
case SHEREDELIM:
/*
* here delimiters need a special case since
}
break;
case '\'':
- goto open_ssquote;
+ goto open_ssquote_unless_heredoc;
case '$':
if ((c2 = getsc()) == '\'') {
open_sequote:
} else if (c2 == '"') {
/* FALLTHROUGH */
case '"':
- state = statep->type = SHEREDQUOTE;
- PUSH_SRETRACE();
+ PUSH_SRETRACE(SHEREDQUOTE);
break;
}
ungetsc(c2);
}
break;
- /* " in <<, <<-, <<< delimiter */
+ /* " in << or <<- delimiter */
case SHEREDQUOTE:
if (c != '"')
goto Subst;
*dp = '\0';
/* store the quoted string */
*wp++ = OQUOTE;
- XcheckN(ws, wp, (dp - sp));
+ XcheckN(ws, wp, (dp - sp) * 2);
dp = sp;
while ((c = *dp++)) {
if (c == '\\') {
state = SBASE;
dp = Xstring(ws, wp);
- if ((c == '<' || c == '>' || c == '&') && state == SBASE) {
+ if (state == SBASE && (
+#ifndef MKSH_LEGACY_MODE
+ (c == '&' && !Flag(FSH) && !Flag(FPOSIX)) ||
+#endif
+ c == '<' || c == '>') && ((c2 = Xlength(ws, wp)) == 0 ||
+ (c2 == 2 && dp[0] == CHAR && ksh_isdigit(dp[1])))) {
struct ioword *iop = alloc(sizeof(struct ioword), ATEMP);
- if (Xlength(ws, wp) == 0)
- iop->unit = c == '<' ? 0 : 1;
- else for (iop->unit = 0, c2 = 0; c2 < Xlength(ws, wp); c2 += 2) {
- if (dp[c2] != CHAR)
- goto no_iop;
- if (!ksh_isdigit(dp[c2 + 1]))
- goto no_iop;
- iop->unit = (iop->unit * 10) + dp[c2 + 1] - '0';
- }
-
- if (iop->unit >= FDBASE)
- goto no_iop;
+ iop->unit = c2 == 2 ? ksh_numdig(dp[1]) : c == '<' ? 0 : 1;
if (c == '&') {
if ((c2 = getsc()) != '>') {
goto no_iop;
}
c = c2;
- iop->flag = IOBASH;
+ iop->ioflag = IOBASH;
} else
- iop->flag = 0;
+ iop->ioflag = 0;
c2 = getsc();
/* <<, >>, <> are ok, >< is not */
if (c == c2 || (c == '<' && c2 == '>')) {
- iop->flag |= c == c2 ?
+ iop->ioflag |= c == c2 ?
(c == '>' ? IOCAT : IOHERE) : IORDWR;
- if (iop->flag == IOHERE) {
- if ((c2 = getsc()) == '-') {
- iop->flag |= IOSKIP;
- c2 = getsc();
- } else if (c2 == '<')
- iop->flag |= IOHERESTR;
- ungetsc(c2);
- if (c2 == '\n')
- iop->flag |= IONDELIM;
+ if (iop->ioflag == IOHERE) {
+ if ((c2 = getsc()) == '-')
+ iop->ioflag |= IOSKIP;
+ else if (c2 == '<')
+ iop->ioflag |= IOHERESTR;
+ else
+ ungetsc(c2);
}
} else if (c2 == '&')
- iop->flag |= IODUP | (c == '<' ? IORDUP : 0);
+ iop->ioflag |= IODUP | (c == '<' ? IORDUP : 0);
else {
- iop->flag |= c == '>' ? IOWRITE : IOREAD;
+ iop->ioflag |= c == '>' ? IOWRITE : IOREAD;
if (c == '>' && c2 == '|')
- iop->flag |= IOCLOB;
+ iop->ioflag |= IOCLOB;
else
ungetsc(c2);
}
- iop->name = NULL;
+ iop->ioname = NULL;
iop->delim = NULL;
iop->heredoc = NULL;
/* free word */
}
#endif
} else if (c == '\n') {
- gethere(false);
- if (cf & CONTIN)
- goto Again;
- } else if (c == '\0')
- /* need here strings at EOF */
- gethere(true);
+ if (cf & HEREDELIM)
+ ungetsc(c);
+ else {
+ gethere();
+ if (cf & CONTIN)
+ goto Again;
+ }
+ } else if (c == '\0' && !(cf & HEREDELIM)) {
+ struct ioword **p = heres;
+
+ while (p < herep)
+ if ((*p)->ioflag & IOHERESTR)
+ ++p;
+ else
+ /* ksh -c 'cat <<EOF' can cause this */
+ yyerror(Tf_heredoc,
+ evalstr((*p)->delim, 0));
+ }
return (c);
}
/* copy word to unprefixed string ident */
sp = yylval.cp;
dp = ident;
- if ((cf & HEREDELIM) && (sp[1] == '<'))
- while ((dp - ident) < IDENT) {
- if ((c = *sp++) == CHAR)
- *dp++ = *sp++;
- else if ((c != OQUOTE) && (c != CQUOTE))
- break;
- }
- else
- while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
- *dp++ = *sp++;
- /* Make sure the ident array stays '\0' padded */
- memset(dp, 0, (ident + IDENT) - dp + 1);
+ while ((dp - ident) < IDENT && (c = *sp++) == CHAR)
+ *dp++ = *sp++;
if (c != EOS)
/* word is not unquoted */
- *ident = '\0';
+ dp = ident;
+ /* make sure the ident array stays NUL padded */
+ memset(dp, 0, (ident + IDENT) - dp + 1);
- if (*ident != '\0' && (cf & (KEYWORD | ALIAS))) {
+ if (!(cf & (KEYWORD | ALIAS)))
+ return (LWORD);
+
+ if (*ident != '\0') {
struct tbl *p;
uint32_t h = hash(ident);
* pushed into an SREREAD) which is what
* we want here anyway: find out whether
* the alias name is followed by a POSIX
- * function definition (only the opening
- * parenthesis is checked though)
+ * function definition
*/
++cp;
/* prefer functions over aliases */
goto Again;
}
}
+ } else if (cf & ALIAS) {
+ /* retain typeset et al. even when quoted */
+ if (assign_command((dp = wdstrip(yylval.cp, 0)), true))
+ strlcpy(ident, dp, sizeof(ident));
+ afree(dp, ATEMP);
}
return (LWORD);
}
static void
-gethere(bool iseof)
+gethere(void)
{
struct ioword **p;
for (p = heres; p < herep; p++)
- if (iseof && !((*p)->flag & IOHERESTR))
- /* only here strings at EOF */
- return;
- else
+ if (!((*p)->ioflag & IOHERESTR))
readhere(*p);
herep = heres;
}
const char *eof, *eofp;
XString xs;
char *xp;
- int xpos;
-
- if (iop->flag & IOHERESTR) {
- /* process the here string */
- iop->heredoc = xp = evalstr(iop->delim, DOBLANK);
- xpos = strlen(xp) - 1;
- memmove(xp, xp + 1, xpos);
- xp[xpos] = '\n';
- return;
- }
+ size_t xpos;
- eof = iop->flag & IONDELIM ? "<<" : evalstr(iop->delim, 0);
+ eof = evalstr(iop->delim, 0);
- if (!(iop->flag & IOEVAL))
+ if (!(iop->ioflag & IOEVAL))
ignore_backslash_newline++;
Xinit(xs, xp, 256, ATEMP);
/* beginning of line */
eofp = eof;
xpos = Xsavepos(xs, xp);
- if (iop->flag & IOSKIP) {
+ if (iop->ioflag & IOSKIP) {
/* skip over leading tabs */
while ((c = getsc()) == '\t')
- /* nothing */;
+ ; /* nothing */
goto heredoc_parse_char;
}
heredoc_read_char:
while (c != '\n') {
if (!c)
/* oops, reached EOF */
- yyerror("%s '%s' unclosed\n", "here document", eof);
+ yyerror(Tf_heredoc, eof);
/* store character */
Xcheck(xs, xp);
Xput(xs, xp, c);
Xput(xs, xp, '\0');
iop->heredoc = Xclose(xs, xp);
- if (!(iop->flag & IOEVAL))
+ if (!(iop->ioflag & IOEVAL))
ignore_backslash_newline--;
}
s->start = s->str = "\n";
s->type = SEOF;
} else {
- s->start = s->str = " ";
+ s->start = s->str = T1space;
s->type = SWORDS;
}
break;
ksh_tmout_state = TMOUT_READING;
alarm(ksh_tmout);
}
- if (interactive)
+ if (interactive) {
+ if (cur_prompt == PS1)
+ histsave(&s->line, NULL, HIST_FLUSH, true);
change_winsz();
+ }
#ifndef MKSH_NO_CMDLINE_EDITING
if (have_tty && (
#if !MKSH_S_NOVI
Flag(FEMACS) || Flag(FGMACS))) {
int nread;
- nread = x_read(xp, LINE);
+ nread = x_read(xp);
if (nread < 0)
/* read error */
nread = 0;
alarm(0);
}
cp = Xstring(s->xs, xp);
+ rndpush(cp);
s->start = s->str = cp;
strip_nuls(Xstring(s->xs, xp), Xlength(s->xs, xp));
/* Note: if input is all nulls, this is not eof */
if (s->type == SFILE)
shf_fdclose(s->u.shf);
s->str = NULL;
- } else if (interactive && *s->str &&
- (cur_prompt != PS1 || !ctype(*s->str, C_IFS | C_IFSWS))) {
- histsave(&s->line, s->str, true, true);
+ } else if (interactive && *s->str) {
+ if (cur_prompt != PS1)
+ histsave(&s->line, s->str, HIST_APPEND, true);
+ else if (!ctype(*s->str, C_IFS | C_IFSWS))
+ histsave(&s->line, s->str, HIST_QUEUE, true);
#if !defined(MKSH_SMALL) && HAVE_PERSISTENT_HISTORY
+ else
+ goto check_for_sole_return;
} else if (interactive && cur_prompt == PS1) {
+ check_for_sole_return:
cp = Xstring(s->xs, xp);
while (*cp && ctype(*cp, C_IFSWS))
++cp;
- if (!*cp)
+ if (!*cp) {
+ histsave(&s->line, NULL, HIST_FLUSH, true);
histsync();
+ }
#endif
}
if (interactive)
void
set_prompt(int to, Source *s)
{
- cur_prompt = to;
+ cur_prompt = (uint8_t)to;
switch (to) {
/* command */
struct shf *shf;
char * volatile ps1;
Area *saved_atemp;
+ int saved_lineno;
ps1 = str_val(global("PS1"));
shf = shf_sopen(NULL, strlen(ps1) * 2,
if (*ps1 != '!' || *++ps1 == '!')
shf_putchar(*ps1++, shf);
else
- shf_fprintf(shf, "%d",
- s ? s->line + 1 : 0);
+ shf_fprintf(shf, Tf_lu, s ?
+ (unsigned long)s->line + 1 : 0UL);
ps1 = shf_sclose(shf);
+ saved_lineno = current_lineno;
+ if (s)
+ current_lineno = s->line + 1;
saved_atemp = ATEMP;
newenv(E_ERRH);
if (kshsetjmp(e->jbuf)) {
char *cp = substitute(ps1, 0);
strdupx(prompt, cp, saved_atemp);
}
+ current_lineno = saved_lineno;
quitenv(NULL);
}
break;
}
}
-static int
-dopprompt(const char *cp, int ntruncate, bool doprint)
+int
+pprompt(const char *cp, int ntruncate)
{
- int columns = 0, lines = 0;
- bool indelimit = false;
char delimiter = 0;
+ bool doprint = (ntruncate != -1);
+ bool indelimit = false;
+ int columns = 0, lines = 0;
/*
* Undocumented AT&T ksh feature:
return (x_cols * lines + columns);
}
-
-void
-pprompt(const char *cp, int ntruncate)
-{
- dopprompt(cp, ntruncate, true);
-}
-
-int
-promptlen(const char *cp)
-{
- return (dopprompt(cp, 0, false));
-}
-
/*
* Read the variable part of a ${...} expression (i.e. up to but not
* including the :[-+?=#%] or close-brace).
{
char c;
enum parse_state {
- PS_INITIAL, PS_SAW_HASH, PS_IDENT,
- PS_NUMBER, PS_VAR1
+ PS_INITIAL, PS_SAW_PERCENT, PS_SAW_HASH, PS_SAW_BANG,
+ PS_IDENT, PS_NUMBER, PS_VAR1
} state = PS_INITIAL;
while (/* CONSTCOND */ 1) {
c = getsc();
/* State machine to figure out where the variable part ends. */
switch (state) {
+ case PS_SAW_HASH:
+ if (ctype(c, C_VAR1)) {
+ char c2;
+
+ c2 = getsc();
+ ungetsc(c2);
+ if (c2 != /*{*/ '}') {
+ ungetsc(c);
+ goto out;
+ }
+ }
+ goto ps_common;
+ case PS_SAW_BANG:
+ switch (c) {
+ case '@':
+ case '#':
+ case '-':
+ case '?':
+ goto out;
+ }
+ goto ps_common;
case PS_INITIAL:
- if (c == '#' || c == '!' || c == '%') {
+ switch (c) {
+ case '%':
+ state = PS_SAW_PERCENT;
+ goto next;
+ case '#':
state = PS_SAW_HASH;
- break;
+ goto next;
+ case '!':
+ state = PS_SAW_BANG;
+ goto next;
}
/* FALLTHROUGH */
- case PS_SAW_HASH:
+ case PS_SAW_PERCENT:
+ ps_common:
if (ksh_isalphx(c))
state = PS_IDENT;
else if (ksh_isdigit(c))
state = PS_NUMBER;
- else if (c == '#') {
- if (state == PS_SAW_HASH) {
- char c2;
-
- c2 = getsc();
- ungetsc(c2);
- if (c2 != /*{*/ '}') {
- ungetsc(c);
- goto out;
- }
- }
- state = PS_VAR1;
- } else if (ctype(c, C_VAR1))
+ else if (ctype(c, C_VAR1))
state = PS_VAR1;
else
goto out;
}
goto out;
}
+ next:
break;
case PS_NUMBER:
if (!ksh_isdigit(c))
return (si->base + STATE_BSIZE - 1);
}
-
-static int
-s_get(void)
-{
- return (getsc());
-}
-
-static void
-s_put(int c)
-{
- ungetsc(c);
-}