1 /* $OpenBSD: expr.c,v 1.24 2014/12/08 14:26:31 otto Exp $ */
4 * Copyright (c) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 * 2011, 2012, 2013, 2014, 2016
6 * mirabilos <m@mirbsd.org>
8 * Provided that these terms and disclaimer and all copyright notices
9 * are retained or reproduced in an accompanying document, permission
10 * is granted to deal in this work without restriction, including un-
11 * limited rights to use, publicly perform, distribute, sell, modify,
12 * merge, give away, or sublicence.
14 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
15 * the utmost extent permitted by applicable law, neither express nor
16 * implied; without malicious intent or gross negligence. In no event
17 * may a licensor, author or contributor be held liable for indirect,
18 * direct, other damage, loss, or other issues arising in any way out
19 * of dealing in the work, even if advised of the possibility of such
20 * damage or existence of a defect, except proven that it results out
21 * of said person's immediate fault when using the work as intended.
26 __RCSID("$MirOS: src/bin/mksh/expr.c,v 1.88 2016/07/27 00:55:27 tg Exp $");
31 /* precisions; used to be enum prec but we do arithmetics on it */
32 #define P_PRIMARY 0 /* VAR, LIT, (), ! ~ ++ -- */
33 #define P_MULT 1 /* * / % */
34 #define P_ADD 2 /* + - */
35 #define P_SHIFT 3 /* ^< ^> << >> */
36 #define P_RELATION 4 /* < <= > >= */
37 #define P_EQUALITY 5 /* == != */
38 #define P_BAND 6 /* & */
39 #define P_BXOR 7 /* ^ */
40 #define P_BOR 8 /* | */
41 #define P_LAND 9 /* && */
42 #define P_LOR 10 /* || */
43 #define P_TERN 11 /* ?: */
44 /* = += -= *= /= %= ^<= ^>= <<= >>= &= ^= |= */
46 #define P_COMMA 13 /* , */
47 #define MAX_PREC P_COMMA
54 static const char opname[][4] = {
59 static const uint8_t oplen[] = {
64 static const uint8_t opprec[] = {
69 typedef struct expr_state {
70 /* expression being evaluated */
71 const char *expression;
72 /* lexical position */
74 /* value from token() */
76 /* variable that is being recursively expanded (EXPRINEVAL flag set) */
78 /* token from token() */
80 /* don't do assignments (for ?:, &&, ||) */
82 /* evaluating an $(()) expression? */
84 /* unsigned arithmetic calculation */
89 ET_UNEXPECTED, ET_BADLIT, ET_RECURSIVE,
90 ET_LVALUE, ET_RDONLY, ET_STR
93 static void evalerr(Expr_state *, enum error_type, const char *)
95 static struct tbl *evalexpr(Expr_state *, unsigned int);
96 static void exprtoken(Expr_state *);
97 static struct tbl *do_ppmm(Expr_state *, enum token, struct tbl *, bool);
98 static void assign_check(Expr_state *, enum token, struct tbl *);
99 static struct tbl *intvar(Expr_state *, struct tbl *);
102 * parse and evaluate expression
105 evaluate(const char *expr, mksh_ari_t *rval, int error_ok, bool arith)
110 v.flag = DEFINED | INTEGER;
112 ret = v_evaluate(&v, expr, error_ok, arith);
118 * parse and evaluate expression, storing result in vp.
121 v_evaluate(struct tbl *vp, const char *expr, volatile int error_ok,
126 Expr_state * const es = &curstate;
129 /* save state to allow recursive calls */
130 memset(&curstate, 0, sizeof(curstate));
131 curstate.expression = curstate.tokp = expr;
133 curstate.arith = arith;
136 if ((i = kshsetjmp(e->jbuf))) {
137 /* Clear EXPRINEVAL in of any variables we were playing with */
138 if (curstate.evaling)
139 curstate.evaling->flag &= ~EXPRINEVAL;
142 if (error_ok == KSH_RETURN_ERROR)
151 if (es->tok == END) {
153 es->val = tempvar("");
155 v = intvar(es, evalexpr(es, MAX_PREC));
158 evalerr(es, ET_UNEXPECTED, NULL);
160 if (es->arith && es->natural)
162 if (vp->flag & INTEGER)
163 setint_v(vp, v, es->arith);
165 /* can fail if readonly */
166 setstr(vp, str_val(v), error_ok);
174 evalerr(Expr_state *es, enum error_type type, const char *str)
187 s = str_val(es->val);
190 s = "end of expression";
198 s = opname[(int)es->tok];
200 warningf(true, Tf_sD_s_qs, es->expression,
205 warningf(true, Tf_sD_s_qs, es->expression,
210 warningf(true, Tf_sD_s_qs, es->expression,
211 "expression recurses on parameter", str);
215 warningf(true, Tf_sD_s_s,
216 es->expression, str, "requires lvalue");
220 warningf(true, Tf_sD_s_s,
221 es->expression, str, "applied to read-only variable");
224 default: /* keep gcc happy */
226 warningf(true, Tf_sD_s, es->expression, str);
232 /* do a ++ or -- operation */
234 do_ppmm(Expr_state *es, enum token op, struct tbl *vasn, bool is_prefix)
239 assign_check(es, op, vasn);
241 vl = intvar(es, vasn);
243 if (op == O_PLUSPLUS)
248 if (vasn->flag & INTEGER)
249 setint_v(vasn, vl, es->arith);
251 setint(vasn, vl->val.i);
254 /* undo the increment/decrement */
261 evalexpr(Expr_state *es, unsigned int prec)
263 struct tbl *vl, *vr = NULL, *vasn;
265 mksh_uari_t res = 0, t1, t2, t3;
267 if (prec == P_PRIMARY) {
268 switch ((int)(op = es->tok)) {
274 vl = intvar(es, evalexpr(es, P_PRIMARY));
277 vl->val.u = ~vl->val.u;
280 vl->val.u = !vl->val.u;
283 vl->val.u = -vl->val.u;
293 vl = evalexpr(es, MAX_PREC);
294 if (es->tok != CLOSE_PAREN)
295 evalerr(es, ET_STR, "missing )");
302 vl = do_ppmm(es, op, es->val, true);
313 evalerr(es, ET_UNEXPECTED, NULL);
317 if (es->tok == O_PLUSPLUS || es->tok == O_MINUSMINUS) {
318 vl = do_ppmm(es, es->tok, vl, false);
323 /* prec == P_PRIMARY */
326 vl = evalexpr(es, prec - 1);
327 while ((int)(op = es->tok) >= (int)O_EQ && (int)op <= (int)O_COMMA &&
328 opprec[(int)op] == prec) {
332 /* vl may not have a value yet */
334 if (IS_ASSIGNOP(op)) {
336 assign_check(es, op, vasn);
337 vr = intvar(es, evalexpr(es, P_ASSIGN));
338 } else if (op == O_TERN) {
339 bool ev = vl->val.u != 0;
343 vl = evalexpr(es, MAX_PREC);
346 if (es->tok != CTERN)
347 evalerr(es, ET_STR, "missing :");
351 vr = evalexpr(es, P_TERN);
356 } else if (op != O_LAND && op != O_LOR)
357 vr = intvar(es, evalexpr(es, prec - 1));
359 /* common ops setup */
365 if (vr->val.u == 0) {
367 evalerr(es, ET_STR, "zero divisor");
370 /* calculate the absolute values */
371 t1 = vl->val.i < 0 ? -vl->val.u : vl->val.u;
372 t2 = vr->val.i < 0 ? -vr->val.u : vr->val.u;
374 #ifndef MKSH_LEGACY_MODE
398 #define cmpop(op) (es->natural ? \
399 (mksh_uari_t)(vl->val.u op vr->val.u) : \
400 (mksh_uari_t)(vl->val.i op vr->val.i) \
412 res = vl->val.u % vr->val.u;
415 goto signed_division;
419 res = vl->val.u / vr->val.u;
424 * a / b = abs(a) / abs(b) * sgn((u)a^(u)b)
427 #ifndef MKSH_LEGACY_MODE
428 res = ((vl->val.u ^ vr->val.u) & 0x80000000) ? -t3 : t3;
430 res = ((t1 == vl->val.u ? 0 : 1) ^
431 (t2 == vr->val.u ? 0 : 1)) ? -t3 : t3;
433 if (op == O_MOD || op == O_MODASN) {
435 * primitive modulo, to get the sign of
436 * the result correct:
437 * (a % b) = a - ((a / b) * b)
438 * the subtraction and multiplication
439 * are, amazingly enough, sign ignorant
441 res = vl->val.u - (res * vr->val.u);
452 #ifndef MKSH_LEGACY_MODE
455 res = (t1 << t2) | (t1 >> (32 - t2));
459 res = (t1 >> t2) | (t1 << (32 - t2));
468 res = es->natural || vl->val.i >= 0 ?
505 vr = intvar(es, evalexpr(es, prec - 1));
506 res = t1 && vr->val.u;
513 vr = intvar(es, evalexpr(es, prec - 1));
514 res = t1 || vr->val.u;
526 if (IS_ASSIGNOP(op)) {
529 if (vasn->flag & INTEGER)
530 setint_v(vasn, vr, es->arith);
532 setint(vasn, vr->val.i);
542 exprtoken(Expr_state *es)
544 const char *cp = es->tokp;
548 /* skip whitespace */
550 while ((c = *cp), ksh_isspace(c))
552 if (es->tokp == es->expression && c == '#') {
553 /* expression begins with # */
554 /* switch to unsigned */
563 else if (ksh_isalphx(c)) {
564 for (; ksh_isalnux(c); c = *cp)
569 len = array_ref_len(cp);
571 evalerr(es, ET_STR, "missing ]");
575 es->val = tempvar("");
576 es->val->flag |= EXPRLVALUE;
578 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
579 es->val = global(tvar);
583 } else if (c == '1' && cp[1] == '#') {
586 cp += utf_ptradj(cp);
587 strndupx(tvar, es->tokp, cp - es->tokp, ATEMP);
590 } else if (c == '\'') {
593 evalerr(es, ET_UNEXPECTED, NULL);
595 cp += utf_ptradj(cp);
598 "multi-character character constant");
599 /* 'x' -> 1#x (x = one multibyte character) */
601 tvar = alloc(c + /* NUL */ 1, ATEMP);
604 memcpy(tvar + 2, es->tokp + 1, c - 2);
608 } else if (ksh_isdigit(c)) {
609 while (c != '_' && (ksh_isalnux(c) || c == '#'))
611 strndupx(tvar, es->tokp, --cp - es->tokp, ATEMP);
613 es->val = tempvar("");
614 es->val->flag &= ~INTEGER;
616 es->val->val.s = tvar;
617 if (setint_v(es->val, es->val, es->arith) == NULL)
618 evalerr(es, ET_BADLIT, tvar);
624 for (i = 0; (n0 = opname[i][0]); i++)
625 if (c == n0 && strncmp(cp, opname[i],
626 (size_t)oplen[i]) == 0) {
627 es->tok = (enum token)i;
638 assign_check(Expr_state *es, enum token op, struct tbl *vasn)
640 if (es->tok == END || !vasn ||
641 (vasn->name[0] == '\0' && !(vasn->flag & EXPRLVALUE)))
642 evalerr(es, ET_LVALUE, opname[(int)op]);
643 else if (vasn->flag & RDONLY)
644 evalerr(es, ET_RDONLY, opname[(int)op]);
648 tempvar(const char *vname)
653 vsize = strlen(vname) + 1;
654 vp = alloc(offsetof(struct tbl, name[0]) + vsize, ATEMP);
655 memcpy(vp->name, vname, vsize);
656 vp->flag = ISSET|INTEGER;
664 /* cast (string) variable to temporary integer variable */
666 intvar(Expr_state *es, struct tbl *vp)
670 /* try to avoid replacing a temp var with another temp var */
671 if (vp->name[0] == '\0' &&
672 (vp->flag & (ISSET|INTEGER|EXPRLVALUE)) == (ISSET|INTEGER))
676 if (setint_v(vq, vp, es->arith) == NULL) {
677 if (vp->flag & EXPRINEVAL)
678 evalerr(es, ET_RECURSIVE, vp->name);
680 vp->flag |= EXPRINEVAL;
681 v_evaluate(vq, str_val(vp), KSH_UNWIND_ERROR, es->arith);
682 vp->flag &= ~EXPRINEVAL;
690 * UTF-8 support code: high-level functions
694 utf_widthadj(const char *src, const char **dst)
700 if (!UTFMODE || (len = utf_mbtowc(&wc, src)) == (size_t)-1 ||
703 else if ((width = utf_wcwidth(wc)) < 0)
704 /* XXX use 2 for x_zotc3 here? */
713 utf_mbswidth(const char *s)
715 size_t len, width = 0;
723 if (((len = utf_mbtowc(&wc, s)) == (size_t)-1) ||
724 ((cw = utf_wcwidth(wc)) == -1)) {
735 utf_skipcols(const char *p, int cols, int *colp)
742 /* end of input; special handling for edit.c */
744 return (p + cols - c);
748 c += utf_widthadj(p, &p);
751 while (utf_widthadj(p, &q) == 0)
759 utf_ptradj(const char *src)
764 *(const unsigned char *)(src) < 0xC2 ||
765 (n = utf_mbtowc(NULL, src)) == (size_t)-1)
771 * UTF-8 support code: low-level functions
774 /* CESU-8 multibyte and wide character conversion crafted for mksh */
777 utf_mbtowc(unsigned int *dst, const char *src)
779 const unsigned char *s = (const unsigned char *)src;
782 if ((wc = *s++) < 0x80) {
786 return (wc ? ((const char *)s - src) : 0);
788 if (wc < 0xC2 || wc >= 0xF0)
789 /* < 0xC0: spurious second byte */
790 /* < 0xC2: non-minimalistic mapping error in 2-byte seqs */
791 /* > 0xEF: beyond BMP */
795 wc = (wc & 0x1F) << 6;
796 if (((c = *s++) & 0xC0) != 0x80)
802 wc = (wc & 0x0F) << 12;
804 if (((c = *s++) & 0xC0) != 0x80)
806 wc |= (c & 0x3F) << 6;
808 if (((c = *s++) & 0xC0) != 0x80)
812 /* Check for non-minimalistic mapping error in 3-byte seqs */
813 if (wc >= 0x0800 && wc <= 0xFFFD)
816 return ((size_t)(-1));
820 utf_wctomb(char *dst, unsigned int wc)
829 d = (unsigned char *)dst;
831 *d++ = (wc >> 6) | 0xC0;
833 *d++ = ((wc = wc > 0xFFFD ? 0xFFFD : wc) >> 12) | 0xE0;
834 *d++ = ((wc >> 6) & 0x3F) | 0x80;
836 *d++ = (wc & 0x3F) | 0x80;
837 return ((char *)d - dst);
841 * Wrapper around access(2) because it says root can execute everything
842 * on some operating systems. Does not set errno, no user needs it. Use
843 * this iff mode can have the X_OK bit set, access otherwise.
846 ksh_access(const char *fn, int mode)
851 if ((rv = access(fn, mode)) == 0 && kshuid == 0 && (mode & X_OK) &&
852 (rv = stat(fn, &sb)) == 0 && !S_ISDIR(sb.st_mode) &&
853 (sb.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0)
859 #ifndef MIRBSD_BOOTFLOPPY
860 /* From: X11/xc/programs/xterm/wcwidth.c,v 1.8 2014/06/24 19:53:53 tg Exp $ */
867 static int mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems,
868 unsigned int val) MKSH_A_PURE;
871 * Generated by MirOS: contrib/code/Snippets/eawparse,v 1.2 2013/11/30 13:45:17 tg Exp $
872 * from the Unicode Character Database, Version 7.0.0
875 static const struct mb_ucsrange mb_ucs_combining[] = {
1086 static const struct mb_ucsrange mb_ucs_fullwidth[] = {
1100 /* simple binary search in ranges, with bounds optimisation */
1102 mb_ucsbsearch(const struct mb_ucsrange arr[], size_t elems, unsigned int val)
1104 size_t min = 0, mid, max = elems;
1106 if (val < arr[min].beg || val > arr[max - 1].end)
1110 mid = (min + max) / 2;
1112 if (val < arr[mid].beg)
1114 else if (val > arr[mid].end)
1122 /* Unix column width of a wide character (Unicode code point, really) */
1124 utf_wcwidth(unsigned int wc)
1126 /* except NUL, C0/C1 control characters and DEL yield -1 */
1127 if (wc < 0x20 || (wc >= 0x7F && wc < 0xA0))
1128 return (wc ? -1 : 0);
1130 /* combining characters use 0 screen columns */
1131 if (mb_ucsbsearch(mb_ucs_combining, NELEM(mb_ucs_combining), wc))
1134 /* all others use 1 or 2 screen columns */
1135 if (mb_ucsbsearch(mb_ucs_fullwidth, NELEM(mb_ucs_fullwidth), wc))