From 9bc3df373ae6f4f7e6e97d554884d4e0dbad5494 Mon Sep 17 00:00:00 2001 From: Bill Buzbee Date: Thu, 30 Jul 2009 10:52:29 -0700 Subject: [PATCH] Stage 3 of Thumb2 support. armv7-a now generates vfp code inline. --- vm/compiler/Compiler.h | 2 +- vm/compiler/codegen/arm/ArmLIR.h | 135 +++++++++- vm/compiler/codegen/arm/Assemble.c | 353 ++++++++++++++++++------- vm/compiler/codegen/arm/Codegen-armv5te-vfp.c | 1 + vm/compiler/codegen/arm/Codegen-armv5te.c | 1 + vm/compiler/codegen/arm/Codegen-armv7-a.c | 5 +- vm/compiler/codegen/arm/Codegen.c | 286 +-------------------- vm/compiler/codegen/arm/Codegen.h | 57 +++++ vm/compiler/codegen/arm/LocalOptimizations.c | 6 + vm/compiler/codegen/arm/Thumb2Util.c | 354 ++++++++++++++++++++++++++ vm/compiler/codegen/arm/ThumbUtil.c | 323 +++++++++++++++++++++++ vm/compiler/codegen/arm/armv7-a/ArchVariant.c | 312 +++++++++++++++++++++++ vm/compiler/codegen/arm/armv7-a/ArchVariant.h | 34 +++ 13 files changed, 1485 insertions(+), 384 deletions(-) create mode 100644 vm/compiler/codegen/arm/Codegen.h create mode 100644 vm/compiler/codegen/arm/Thumb2Util.c create mode 100644 vm/compiler/codegen/arm/ThumbUtil.c create mode 100644 vm/compiler/codegen/arm/armv7-a/ArchVariant.c create mode 100644 vm/compiler/codegen/arm/armv7-a/ArchVariant.h diff --git a/vm/compiler/Compiler.h b/vm/compiler/Compiler.h index 2cd112bb3..3b7ae542c 100644 --- a/vm/compiler/Compiler.h +++ b/vm/compiler/Compiler.h @@ -30,7 +30,7 @@ typedef enum JitInstructionSetType { DALVIK_JIT_ARM, DALVIK_JIT_THUMB, DALVIK_JIT_THUMB2, - DALVIK_JIT_THUMBEE, + DALVIK_JIT_THUMB2EE, DALVIK_JIT_X86 } JitInstructionSetType; diff --git a/vm/compiler/codegen/arm/ArmLIR.h b/vm/compiler/codegen/arm/ArmLIR.h index 81e7346db..ed02bae26 100644 --- a/vm/compiler/codegen/arm/ArmLIR.h +++ b/vm/compiler/codegen/arm/ArmLIR.h @@ -21,12 +21,46 @@ #define _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMLIR_H /* - * r0, r1, r2, r3, and r7 are always scratch - * r4PC is scratch if used solely in the compiled land. Otherwise it holds the - * Dalvik PC. - * rFP holds the current frame pointer - * rGLUE holds &InterpState + * r0, r1, r2, r3 are always scratch + * r4 (rPC) is scratch for Jit, but most be restored when resuming interp + * r5 (rFP) is reserved [holds Dalvik frame pointer] + * r6 (rGLUE) is reserved [holds current &interpState] + * r7 (rINST) is scratch for Jit + * r8 (rIBASE) is scratch for Jit, but must be restored when resuming interp + * r9 is always scratch + * r10 is always scratch + * r11 (fp) used by gcc unless -fomit-frame-pointer set [available for jit?] + * r12 is always scratch + * r13 (sp) is reserved + * r14 (lr) is scratch for Jit + * r15 (pc) is reserved + * + * For Thumb code use: + * r0, r1, r2, r3 to hold operands/results via scoreboard + * r4, r7 for temps + * + * For Thumb2 code use: + * r0, r1, r2, r3, r8, r9, r10, r11 for operands/results via scoreboard + * r4, r7, r14 for temps + * + * When transitioning from code cache to interp: + * restore rIBASE + * restore rPC + * restore r11 (fp)? + * + * Double precision values are stored in consecutive single precision registers + * such that dr0 -> (sr0,sr1), dr1 -> (sr2,sr3) ... dr16 -> (sr30,sr31) */ + +/* Offset to distingish FP regs */ +#define FP_REG_OFFSET 32 +/* Is reg fp? */ +#define IS_FP_REG(x) (x & FP_REG_OFFSET) +/* Mask to strip off fp flags */ +#define FP_REG_MASK (FP_REG_OFFSET-1) +/* Mask to convert high reg to low for Thumb */ +#define THUMB_REG_MASK 0x7 + typedef enum NativeRegisterPool { r0 = 0, r1 = 1, @@ -43,12 +77,41 @@ typedef enum NativeRegisterPool { r12 = 12, r13 = 13, rlr = 14, - rpc = 15 + rpc = 15, + fr0 = 0 + FP_REG_OFFSET, + fr1 = 1 + FP_REG_OFFSET, + fr2 = 2 + FP_REG_OFFSET, + fr3 = 3 + FP_REG_OFFSET, + fr4 = 4 + FP_REG_OFFSET, + fr5 = 5 + FP_REG_OFFSET, + fr6 = 6 + FP_REG_OFFSET, + fr7 = 7 + FP_REG_OFFSET, + fr8 = 8 + FP_REG_OFFSET, + fr9 = 9 + FP_REG_OFFSET, + fr10 = 10 + FP_REG_OFFSET, + fr11 = 11 + FP_REG_OFFSET, + fr12 = 12 + FP_REG_OFFSET, + fr13 = 13 + FP_REG_OFFSET, + fr14 = 14 + FP_REG_OFFSET, + fr15 = 15 + FP_REG_OFFSET, + fr16 = 16 + FP_REG_OFFSET, + fr17 = 17 + FP_REG_OFFSET, + fr18 = 18 + FP_REG_OFFSET, + fr19 = 19 + FP_REG_OFFSET, + fr20 = 20 + FP_REG_OFFSET, + fr21 = 21 + FP_REG_OFFSET, + fr22 = 22 + FP_REG_OFFSET, + fr23 = 23 + FP_REG_OFFSET, + fr24 = 24 + FP_REG_OFFSET, + fr25 = 25 + FP_REG_OFFSET, + fr26 = 26 + FP_REG_OFFSET, + fr27 = 27 + FP_REG_OFFSET, + fr28 = 28 + FP_REG_OFFSET, + fr29 = 29 + FP_REG_OFFSET, + fr30 = 30 + FP_REG_OFFSET, + fr31 = 31 + FP_REG_OFFSET, } NativeRegisterPool; -/* Mask to convert high reg to low for Thumb */ -#define THUMB_REG_MASK 0x7 - /* Thumb condition encodings */ typedef enum ArmConditionCode { ARM_COND_EQ = 0x0, /* 0000 */ @@ -154,6 +217,43 @@ typedef enum ArmOpCode { THUMB_SUB_SPI7, /* sub(4) [101100001] imm_7[6..0] */ THUMB_SWI, /* swi [11011111] imm_8[7..0] */ THUMB_TST, /* tst [0100001000] rm[5..3] rn[2..0] */ +// FIXME: Enhance assembly encoding. Only low fp regs supported here + THUMB2_VLDRS, /* vldr low sx [111011011001] rn[19..16] rd[15-12] + [1010] imm_8[7..0] */ + THUMB2_VLDRD, /* vldr low dx [111011011001] rn[19..16] rd[15-12] + [1011] imm_8[7..0] */ + THUMB2_VMULS, /* vmul vd, vn, vm [111011100010] rn[19..16] + rd[15-12] [10100000] rm[3..0] */ + THUMB2_VMULD, /* vmul vd, vn, vm [111011100010] rn[19..16] + rd[15-12] [10110000] rm[3..0] */ + THUMB2_VSTRS, /* vstr low sx [111011011000] rn[19..16] rd[15-12] + [1010] imm_8[7..0] */ + THUMB2_VSTRD, /* vstr low dx [111011011000] rn[19..16] rd[15-12] + [1011] imm_8[7..0] */ + THUMB2_VSUBS, /* vsub vd, vn, vm [111011100011] rn[19..16] + rd[15-12] [10100040] rm[3..0] */ + THUMB2_VSUBD, /* vsub vd, vn, vm [111011100011] rn[19..16] + rd[15-12] [10110040] rm[3..0] */ + THUMB2_VADDS, /* vadd vd, vn, vm [111011100011] rn[19..16] + rd[15-12] [10100000] rm[3..0] */ + THUMB2_VADDD, /* vadd vd, vn, vm [111011100011] rn[19..16] + rd[15-12] [10110000] rm[3..0] */ + THUMB2_VDIVS, /* vdiv vd, vn, vm [111011101000] rn[19..16] + rd[15-12] [10100000] rm[3..0] */ + THUMB2_VDIVD, /* vdiv vd, vn, vm [111011101000] rn[19..16] + rd[15-12] [10110000] rm[3..0] */ + THUMB2_VCVTIF, /* vcvt.F32 vd, vm [1110111010111000] vd[15..12] + [10101100] vm[3..0] */ + THUMB2_VCVTID, /* vcvt.F64 vd, vm [1110111010111000] vd[15..12] + [10111100] vm[3..0] */ + THUMB2_VCVTFI, /* vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] + [10101100] vm[3..0] */ + THUMB2_VCVTDI, /* vcvt.S32.F32 vd, vm [1110111010111101] vd[15..12] + [10111100] vm[3..0] */ + THUMB2_VCVTFD, /* vcvt.F64.F32 vd, vm [1110111010110111] vd[15..12] + [10101100] vm[3..0] */ + THUMB2_VCVTDF, /* vcvt.F32.F64 vd, vm [1110111010110111] vd[15..12] + [10111100] vm[3..0] */ ARM_LAST, } ArmOpCode; @@ -168,12 +268,23 @@ typedef enum ArmOpFeatureFlags { IS_TERTIARY_OP = 1 << 7, } ArmOpFeatureFlags; +/* Instruction assembly fieldLoc kind */ +typedef enum ArmEncodingKind { + UNUSED, + BITBLT, /* Bit string using end/start */ + DFP, /* Double FP reg */ + SFP, /* Single FP reg */ + IMMSHIFT8, /* Shifted 8-bit immed field using [26,14..12,7..0] */ + IMM12, /* Zero-extended 12-bit immediate using [26,14..12,7..0] */ +} ArmEncodingKind; + /* Struct used to define the snippet positions for each Thumb opcode */ typedef struct ArmEncodingMap { - short skeleton; + u4 skeleton; struct { - int end; - int start; + ArmEncodingKind kind; + int end; /* end for BITBLT, 1-bit slice end for FP regs */ + int start; /* start for BITBLT, 4-bit slice end for FP regs */ } fieldLoc[3]; ArmOpCode opCode; int flags; diff --git a/vm/compiler/codegen/arm/Assemble.c b/vm/compiler/codegen/arm/Assemble.c index f9961cef5..5a870147f 100644 --- a/vm/compiler/codegen/arm/Assemble.c +++ b/vm/compiler/codegen/arm/Assemble.c @@ -25,20 +25,23 @@ /* * opcode: ArmOpCode enum * skeleton: pre-designated bit-pattern for this opcode + * k0: key to applying ds/de * ds: dest start bit position * de: dest end bit position + * k1: key to applying s1s/s1e * s1s: src1 start bit position * s1e: src1 end bit position + * k2: key to applying s2s/s2e * s2s: src2 start bit position * s2e: src2 end bit position * operands: number of operands (for sanity check purposes) * name: mnemonic name * fmt: for pretty-prining */ -#define ENCODING_MAP(opcode, skeleton, ds, de, s1s, s1e, s2s, s2e, operands, \ - name, fmt, size) \ - {skeleton, {{ds, de}, {s1s, s1e}, {s2s, s2e}}, opcode, operands, name, \ - fmt, size} +#define ENCODING_MAP(opcode, skeleton, k0, ds, de, k1, s1s, s1e, k2, s2s, s2e, \ + operands, name, fmt, size) \ + {skeleton, {{k0, ds, de}, {k1, s1s, s1e}, {k2, s2s, s2e}}, \ + opcode, operands, name, fmt, size} /* Instruction dump string format keys: !pf, where "!" is the start * of the key, "p" is which numeric operand to use and "f" is the @@ -65,226 +68,371 @@ */ /* NOTE: must be kept in sync with enum ArmOpcode from ArmLIR.h */ ArmEncodingMap EncodingMap[ARM_LAST] = { - ENCODING_MAP(ARM_16BIT_DATA, 0x0000, 15, 0, -1, -1, -1, -1, + ENCODING_MAP(ARM_16BIT_DATA, 0x0000, + BITBLT, 15, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP, "data", "0x!0h(!0d)", 1), - ENCODING_MAP(THUMB_ADC, 0x4140, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ADC, 0x4140, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "adc", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ADD_RRI3, 0x1c00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_ADD_RRI3, 0x1c00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "add", "r!0d, r!1d, #!2d", 1), - ENCODING_MAP(THUMB_ADD_RI8, 0x3000, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_ADD_RI8, 0x3000, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", "r!0d, r!0d, #!1d", 1), - ENCODING_MAP(THUMB_ADD_RRR, 0x1800, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_ADD_RRR, 0x1800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "add", "r!0d, r!1d, r!2d", 1), - ENCODING_MAP(THUMB_ADD_RR_LH, 0x4440, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ADD_RR_LH, 0x4440, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ADD_RR_HL, 0x4480, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ADD_RR_HL, 0x4480, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ADD_RR_HH, 0x44c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ADD_RR_HH, 0x44c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ADD_PC_REL, 0xa000, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_ADD_PC_REL, 0xa000, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_TERTIARY_OP | CLOBBER_DEST, "add", "r!0d, pc, #!1E", 1), - ENCODING_MAP(THUMB_ADD_SP_REL, 0xa800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_ADD_SP_REL, 0xa800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "add", "r!0d, sp, #!1E", 1), - ENCODING_MAP(THUMB_ADD_SPI7, 0xb000, 6, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_ADD_SPI7, 0xb000, + BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | CLOBBER_DEST, "add", "sp, #!0d*4", 1), - ENCODING_MAP(THUMB_AND_RR, 0x4000, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_AND_RR, 0x4000, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "and", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ASR, 0x1000, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_ASR, 0x1000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "asr", "r!0d, r!1d, #!2d", 1), - ENCODING_MAP(THUMB_ASRV, 0x4100, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ASRV, 0x4100, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "asr", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_B_COND, 0xd000, 7, 0, 11, 8, -1, -1, + ENCODING_MAP(THUMB_B_COND, 0xd000, + BITBLT, 7, 0, BITBLT, 11, 8, UNUSED, -1, -1, IS_BINARY_OP | IS_BRANCH, "!1c", "!0t", 1), - ENCODING_MAP(THUMB_B_UNCOND, 0xe000, 10, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_B_UNCOND, 0xe000, + BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, NO_OPERAND | IS_BRANCH, "b", "!0t", 1), - ENCODING_MAP(THUMB_BIC, 0x4380, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_BIC, 0x4380, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "bic", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_BKPT, 0xbe00, 7, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BKPT, 0xbe00, + BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "bkpt", "!0d", 1), - ENCODING_MAP(THUMB_BLX_1, 0xf000, 10, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BLX_1, 0xf000, + BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | IS_BRANCH, "blx_1", "!0u", 1), - ENCODING_MAP(THUMB_BLX_2, 0xe800, 10, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BLX_2, 0xe800, + BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_BINARY_OP | IS_BRANCH, "blx_2", "!0v", 1), - ENCODING_MAP(THUMB_BL_1, 0xf000, 10, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BL_1, 0xf000, + BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "bl_1", "!0u", 1), - ENCODING_MAP(THUMB_BL_2, 0xf800, 10, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BL_2, 0xf800, + BITBLT, 10, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "bl_2", "!0v", 1), - ENCODING_MAP(THUMB_BLX_R, 0x4780, 6, 3, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BLX_R, 0x4780, + BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "blx", "r!0d", 1), - ENCODING_MAP(THUMB_BX, 0x4700, 6, 3, -1, -1, -1, -1, + ENCODING_MAP(THUMB_BX, 0x4700, + BITBLT, 6, 3, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "bx", "r!0d", 1), - ENCODING_MAP(THUMB_CMN, 0x42c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_CMN, 0x42c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP, "cmn", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_CMP_RI8, 0x2800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_CMP_RI8, 0x2800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP, "cmp", "r!0d, #!1d", 1), - ENCODING_MAP(THUMB_CMP_RR, 0x4280, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_CMP_RR, 0x4280, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP, "cmp", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_CMP_LH, 0x4540, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_CMP_LH, 0x4540, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP, "cmp", "r!0d, r!1D", 1), - ENCODING_MAP(THUMB_CMP_HL, 0x4580, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_CMP_HL, 0x4580, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP, "cmp", "r!0D, r!1d", 1), - ENCODING_MAP(THUMB_CMP_HH, 0x45c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_CMP_HH, 0x45c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP, "cmp", "r!0D, r!1D", 1), - ENCODING_MAP(THUMB_EOR, 0x4040, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_EOR, 0x4040, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "eor", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_LDMIA, 0xc800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_LDMIA, 0xc800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST | CLOBBER_SRC1, "ldmia", "r!0d!!, ", 1), - ENCODING_MAP(THUMB_LDR_RRI5, 0x6800, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_LDR_RRI5, 0x6800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldr", "r!0d, [r!1d, #!2E]", 1), - ENCODING_MAP(THUMB_LDR_RRR, 0x5800, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_LDR_RRR, 0x5800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldr", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_LDR_PC_REL, 0x4800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_LDR_PC_REL, 0x4800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_TERTIARY_OP | CLOBBER_DEST, "ldr", "r!0d, [pc, #!1E]", 1), - ENCODING_MAP(THUMB_LDR_SP_REL, 0x9800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_LDR_SP_REL, 0x9800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "ldr", "r!0d, [sp, #!1E]", 1), - ENCODING_MAP(THUMB_LDRB_RRI5, 0x7800, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_LDRB_RRI5, 0x7800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrb", "r!0d, [r!1d, #2d]", 1), - ENCODING_MAP(THUMB_LDRB_RRR, 0x5c00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_LDRB_RRR, 0x5c00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrb", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_LDRH_RRI5, 0x8800, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_LDRH_RRI5, 0x8800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrh", "r!0d, [r!1d, #!2F]", 1), - ENCODING_MAP(THUMB_LDRH_RRR, 0x5a00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_LDRH_RRR, 0x5a00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrh", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_LDRSB_RRR, 0x5600, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_LDRSB_RRR, 0x5600, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrsb", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_LDRSH_RRR, 0x5e00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_LDRSH_RRR, 0x5e00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "ldrsh", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_LSL, 0x0000, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_LSL, 0x0000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "lsl", "r!0d, r!1d, #!2d", 1), - ENCODING_MAP(THUMB_LSLV, 0x4080, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_LSLV, 0x4080, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "lsl", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_LSR, 0x0800, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_LSR, 0x0800, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP | CLOBBER_DEST, "lsr", "r!0d, r!1d, #!2d", 1), - ENCODING_MAP(THUMB_LSRV, 0x40c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_LSRV, 0x40c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "lsr", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_MOV_IMM, 0x2000, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_MOV_IMM, 0x2000, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mov", "r!0d, #!1d", 1), - ENCODING_MAP(THUMB_MOV_RR, 0x1c00, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MOV_RR, 0x1c00, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mov", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_MOV_RR_H2H, 0x46c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MOV_RR_H2H, 0x46c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mov", "r!0D, r!1D", 1), - ENCODING_MAP(THUMB_MOV_RR_H2L, 0x4640, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MOV_RR_H2L, 0x4640, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mov", "r!0d, r!1D", 1), - ENCODING_MAP(THUMB_MOV_RR_L2H, 0x4680, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MOV_RR_L2H, 0x4680, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mov", "r!0D, r!1d", 1), - ENCODING_MAP(THUMB_MUL, 0x4340, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MUL, 0x4340, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mul", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_MVN, 0x43c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_MVN, 0x43c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "mvn", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_NEG, 0x4240, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_NEG, 0x4240, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "neg", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_ORR, 0x4300, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ORR, 0x4300, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "orr", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_POP, 0xbc00, 8, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_POP, 0xbc00, + BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP, "pop", "", 1), - ENCODING_MAP(THUMB_PUSH, 0xb400, 8, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_PUSH, 0xb400, + BITBLT, 8, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP, "push", "", 1), - ENCODING_MAP(THUMB_ROR, 0x41c0, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_ROR, 0x41c0, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "ror", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_SBC, 0x4180, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_SBC, 0x4180, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "sbc", "r!0d, r!1d", 1), - ENCODING_MAP(THUMB_STMIA, 0xc000, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_STMIA, 0xc000, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_SRC1, "stmia", "r!0d!!, ", 1), - ENCODING_MAP(THUMB_STR_RRI5, 0x6000, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_STR_RRI5, 0x6000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP, "str", "r!0d, [r!1d, #!2E]", 1), - ENCODING_MAP(THUMB_STR_RRR, 0x5000, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_STR_RRR, 0x5000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP, "str", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_STR_SP_REL, 0x9000, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_STR_SP_REL, 0x9000, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP, "str", "r!0d, [sp, #!1E]", 1), - ENCODING_MAP(THUMB_STRB_RRI5, 0x7000, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_STRB_RRI5, 0x7000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP, "strb", "r!0d, [r!1d, #!2d]", 1), - ENCODING_MAP(THUMB_STRB_RRR, 0x5400, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_STRB_RRR, 0x5400, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP, "strb", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_STRH_RRI5, 0x8000, 2, 0, 5, 3, 10, 6, + ENCODING_MAP(THUMB_STRH_RRI5, 0x8000, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 10, 6, IS_TERTIARY_OP, "strh", "r!0d, [r!1d, #!2F]", 1), - ENCODING_MAP(THUMB_STRH_RRR, 0x5200, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_STRH_RRR, 0x5200, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP, "strh", "r!0d, [r!1d, r!2d]", 1), - ENCODING_MAP(THUMB_SUB_RRI3, 0x1e00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_SUB_RRI3, 0x1e00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "sub", "r!0d, r!1d, #!2d]", 1), - ENCODING_MAP(THUMB_SUB_RI8, 0x3800, 10, 8, 7, 0, -1, -1, + ENCODING_MAP(THUMB_SUB_RI8, 0x3800, + BITBLT, 10, 8, BITBLT, 7, 0, UNUSED, -1, -1, IS_BINARY_OP | CLOBBER_DEST, "sub", "r!0d, #!1d", 1), - ENCODING_MAP(THUMB_SUB_RRR, 0x1a00, 2, 0, 5, 3, 8, 6, + ENCODING_MAP(THUMB_SUB_RRR, 0x1a00, + BITBLT, 2, 0, BITBLT, 5, 3, BITBLT, 8, 6, IS_TERTIARY_OP | CLOBBER_DEST, "sub", "r!0d, r!1d, r!2d", 1), - ENCODING_MAP(THUMB_SUB_SPI7, 0xb080, 6, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_SUB_SPI7, 0xb080, + BITBLT, 6, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | CLOBBER_DEST, "sub", "sp, #!0d", 1), - ENCODING_MAP(THUMB_SWI, 0xdf00, 7, 0, -1, -1, -1, -1, + ENCODING_MAP(THUMB_SWI, 0xdf00, + BITBLT, 7, 0, UNUSED, -1, -1, UNUSED, -1, -1, IS_UNARY_OP | IS_BRANCH, "swi", "!0d", 1), - ENCODING_MAP(THUMB_TST, 0x4200, 2, 0, 5, 3, -1, -1, + ENCODING_MAP(THUMB_TST, 0x4200, + BITBLT, 2, 0, BITBLT, 5, 3, UNUSED, -1, -1, IS_UNARY_OP, "tst", "r!0d, r!1d", 1), + ENCODING_MAP(THUMB2_VLDRS, 0xed900a00, + SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, + IS_TERTIARY_OP, + "vldr", "s!0d, [r!1d, #!2E]", 2), + ENCODING_MAP(THUMB2_VLDRD, 0xed900b00, + DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, + IS_TERTIARY_OP, + "vldr", "d!0d, [r!1d, #!2E]", 2), + ENCODING_MAP(THUMB2_VMULS, 0xee200a00, + SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, + IS_TERTIARY_OP, + "vmuls", "s!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VMULD, 0xee200b00, + DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, + IS_TERTIARY_OP, + "vmuld", "d!0d, d!1d, d!2d", 2), + ENCODING_MAP(THUMB2_VSTRS, 0xed800a00, + SFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, + IS_TERTIARY_OP, + "vstr", "s!0d, [r!1d, #!2E]", 2), + ENCODING_MAP(THUMB2_VSTRD, 0xed800b00, + DFP, 22, 12, BITBLT, 19, 16, BITBLT, 7, 0, + IS_TERTIARY_OP, + "vstr", "d!0d, [r!1d, #!2E]", 2), + ENCODING_MAP(THUMB2_VSUBS, 0xee300a40, + SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, + IS_TERTIARY_OP, + "vsub", "s!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VSUBD, 0xee300b40, + DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, + IS_TERTIARY_OP, + "vsub", "d!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VADDS, 0xee300a00, + SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, + IS_TERTIARY_OP, + "vadd", "s!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VADDD, 0xee300b00, + DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, + IS_TERTIARY_OP, + "vadd", "d!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VDIVS, 0xee800a00, + SFP, 22, 12, SFP, 7, 16, SFP, 5, 0, + IS_TERTIARY_OP, + "vdivs", "s!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VDIVD, 0xee800b00, + DFP, 22, 12, DFP, 7, 16, DFP, 5, 0, + IS_TERTIARY_OP, + "vdivs", "s!0d, s!1d, s!2d", 2), + ENCODING_MAP(THUMB2_VCVTIF, 0xeeb80ac0, + SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.f32", "s!0d, s!1d", 2), + ENCODING_MAP(THUMB2_VCVTID, 0xeeb80bc0, + DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.f64", "s!0d, s!1d", 2), + ENCODING_MAP(THUMB2_VCVTFI, 0xeebd0ac0, + SFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.s32.f32", "s!0d, s!1d", 2), + ENCODING_MAP(THUMB2_VCVTDI, 0xeebd0bc0, + SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.s32.f64", "s!0d, s!1d", 2), + ENCODING_MAP(THUMB2_VCVTFD, 0xeeb70ac0, + DFP, 22, 12, SFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.f64.f32", "s!0d, s!1d", 2), + ENCODING_MAP(THUMB2_VCVTDF, 0xeeb70bc0, + SFP, 22, 12, DFP, 5, 0, UNUSED, -1, -1, + IS_BINARY_OP, + "vcvf.f32.f64", "s!0d, s!1d", 2), }; #define PADDING_MOV_R0_R0 0x1C00 @@ -387,18 +535,50 @@ static bool assembleInstructions(CompilationUnit *cUnit, intptr_t startAddr) } ArmEncodingMap *encoder = &EncodingMap[lir->opCode]; - short bits = encoder->skeleton; + u4 bits = encoder->skeleton; int i; for (i = 0; i < 3; i++) { - short value; - if (encoder->fieldLoc[i].end != -1) { - value = (lir->operands[i] << encoder->fieldLoc[i].start) & - ((1 << (encoder->fieldLoc[i].end + 1)) - 1); - bits |= value; - + u4 value; + switch(encoder->fieldLoc[i].kind) { + case UNUSED: + break; + case BITBLT: + value = (lir->operands[i] << encoder->fieldLoc[i].start) & + ((1 << (encoder->fieldLoc[i].end + 1)) - 1); + bits |= value; + break; + case DFP: + /* Snag the 1-bit slice and position it */ + value = ((lir->operands[i] & 0x10) >> 4) << + encoder->fieldLoc[i].end; + /* Extract and position the 4-bit slice */ + value |= (lir->operands[i] & 0x0f) << + encoder->fieldLoc[i].start; + bits |= value; + break; + case SFP: + /* Snag the 1-bit slice and position it */ + value = (lir->operands[i] & 0x1) << + encoder->fieldLoc[i].end; + /* Extract and position the 4-bit slice */ + value |= ((lir->operands[i] & 0x1e) >> 1) << + encoder->fieldLoc[i].start; + bits |= value; + break; + case IMMSHIFT8: + case IMM12: + value = ((lir->operands[i] & 0x800) >> 11) << 26; + value |= ((lir->operands[i] & 0x700) >> 8) << 12; + value |= lir->operands[i] & 0x0ff; + break; + default: + assert(0); } } - *bufferAddr++ = bits; + if (encoder->size == 2) { + *bufferAddr++ = (bits >> 16) & 0xffff; + } + *bufferAddr++ = bits & 0xffff; } return false; } @@ -459,7 +639,8 @@ void dvmCompilerAssembleLIR(CompilationUnit *cUnit, JitTranslationInfo *info) armLIR = NEXT_LIR(armLIR)) { armLIR->generic.offset = offset; if (armLIR->opCode >= 0 && !armLIR->isNop) { - offset += 2; + armLIR->size = EncodingMap[armLIR->opCode].size * 2; + offset += armLIR->size; } else if (armLIR->opCode == ARM_PSEUDO_ALIGN4) { if (offset & 0x2) { offset += 2; diff --git a/vm/compiler/codegen/arm/Codegen-armv5te-vfp.c b/vm/compiler/codegen/arm/Codegen-armv5te-vfp.c index bbe7541e1..c3a60e40b 100644 --- a/vm/compiler/codegen/arm/Codegen-armv5te-vfp.c +++ b/vm/compiler/codegen/arm/Codegen-armv5te-vfp.c @@ -24,5 +24,6 @@ #include "armv5te-vfp/ArchVariant.h" +#include "ThumbUtil.c" #include "Codegen.c" #include "armv5te-vfp/ArchVariant.c" diff --git a/vm/compiler/codegen/arm/Codegen-armv5te.c b/vm/compiler/codegen/arm/Codegen-armv5te.c index ba94d239d..baf9dc903 100644 --- a/vm/compiler/codegen/arm/Codegen-armv5te.c +++ b/vm/compiler/codegen/arm/Codegen-armv5te.c @@ -24,5 +24,6 @@ #include "armv5te/ArchVariant.h" +#include "ThumbUtil.c" #include "Codegen.c" #include "armv5te/ArchVariant.c" diff --git a/vm/compiler/codegen/arm/Codegen-armv7-a.c b/vm/compiler/codegen/arm/Codegen-armv7-a.c index bbe7541e1..a69123122 100644 --- a/vm/compiler/codegen/arm/Codegen-armv7-a.c +++ b/vm/compiler/codegen/arm/Codegen-armv7-a.c @@ -22,7 +22,8 @@ #include "ArmLIR.h" #include "vm/mterp/common/FindInterface.h" -#include "armv5te-vfp/ArchVariant.h" +#include "armv7-a/ArchVariant.h" +#include "Thumb2Util.c" #include "Codegen.c" -#include "armv5te-vfp/ArchVariant.c" +#include "armv7-a/ArchVariant.c" diff --git a/vm/compiler/codegen/arm/Codegen.c b/vm/compiler/codegen/arm/Codegen.c index e03423846..5b3f80210 100644 --- a/vm/compiler/codegen/arm/Codegen.c +++ b/vm/compiler/codegen/arm/Codegen.c @@ -24,19 +24,6 @@ * applicable directory below this one. */ -/* Routines which must be supplied by the variant-specific code */ -static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode); -bool dvmCompilerArchInit(void); -static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir); -static bool genInlineCos(CompilationUnit *cUnit, MIR *mir); -static bool genInlineSin(CompilationUnit *cUnit, MIR *mir); -static bool genConversion(CompilationUnit *cUnit, MIR *mir); -static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest, - int vSrc1, int vSrc2); -static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, - int vSrc1, int vSrc2); -static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, - int vSrc2); /* Array holding the entry offset of each template relative to the first one */ static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK]; @@ -44,12 +31,6 @@ static intptr_t templateEntryOffsets[TEMPLATE_LAST_MARK]; /* Track exercised opcodes */ static int opcodeCoverage[256]; -/* non-existent register */ -#define vNone (-1) - -/* get the next register in r0..r3 in a round-robin fashion */ -#define NEXT_REG(reg) ((reg + 1) & 3) - /*****************************************************************************/ /* @@ -116,80 +97,6 @@ static ArmLIR *newLIR23(CompilationUnit *cUnit, ArmOpCode opCode, /*****************************************************************************/ /* - * The following are utility routines to help maintain the RegisterScoreboard - * state to facilitate register renaming. - */ - -/* Reset the tracker to unknown state */ -static inline void resetRegisterScoreboard(CompilationUnit *cUnit) -{ - RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; - - dvmClearAllBits(registerScoreboard->nullCheckedRegs); - registerScoreboard->liveDalvikReg = vNone; - registerScoreboard->nativeReg = vNone; - registerScoreboard->nativeRegHi = vNone; -} - -/* Kill the corresponding bit in the null-checked register list */ -static inline void killNullCheckedRegister(CompilationUnit *cUnit, int vReg) -{ - dvmClearBit(cUnit->registerScoreboard.nullCheckedRegs, vReg); -} - -/* The Dalvik register pair held in native registers have changed */ -static inline void updateLiveRegisterPair(CompilationUnit *cUnit, - int vReg, int mRegLo, int mRegHi) -{ - cUnit->registerScoreboard.liveDalvikReg = vReg; - cUnit->registerScoreboard.nativeReg = mRegLo; - cUnit->registerScoreboard.nativeRegHi = mRegHi; - cUnit->registerScoreboard.isWide = true; -} - -/* The Dalvik register held in a native register has changed */ -static inline void updateLiveRegister(CompilationUnit *cUnit, - int vReg, int mReg) -{ - cUnit->registerScoreboard.liveDalvikReg = vReg; - cUnit->registerScoreboard.nativeReg = mReg; - cUnit->registerScoreboard.isWide = false; -} - -/* - * Given a Dalvik register id vSrc, use a very simple algorithm to increase - * the lifetime of cached Dalvik value in a native register. - */ -static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, - bool isWide) -{ - RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; - - /* No live value - suggest to use r0 */ - if (registerScoreboard->liveDalvikReg == vNone) - return r0; - - /* Reuse the previously used native reg */ - if (registerScoreboard->liveDalvikReg == vSrc) { - if (isWide != true) { - return registerScoreboard->nativeReg; - } else { - /* Return either r0 or r2 */ - return (registerScoreboard->nativeReg + 1) & 2; - } - } - - /* No reuse - choose the next one among r0..r3 in the round-robin fashion */ - if (isWide) { - return (registerScoreboard->nativeReg + 2) & 2; - } else { - return (registerScoreboard->nativeReg + 1) & 3; - } - -} -/*****************************************************************************/ - -/* * The following are building blocks to insert constants into the pool or * instruction streams. */ @@ -229,67 +136,6 @@ static ArmLIR *scanLiteralPool(CompilationUnit *cUnit, int value, return NULL; } -/* - * Load a immediate using a shortcut if possible; otherwise - * grab from the per-translation literal pool - */ -void loadConstant(CompilationUnit *cUnit, int rDest, int value) -{ - /* See if the value can be constructed cheaply */ - if ((value >= 0) && (value <= 255)) { - newLIR2(cUnit, THUMB_MOV_IMM, rDest, value); - return; - } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) { - newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value); - newLIR2(cUnit, THUMB_MVN, rDest, rDest); - return; - } - /* No shortcut - go ahead and use literal pool */ - ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255); - if (dataTarget == NULL) { - dataTarget = addWordData(cUnit, value, false); - } - ArmLIR *loadPcRel = dvmCompilerNew(sizeof(ArmLIR), true); - loadPcRel->opCode = THUMB_LDR_PC_REL; - loadPcRel->generic.target = (LIR *) dataTarget; - loadPcRel->operands[0] = rDest; - dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); - - /* - * To save space in the constant pool, we use the ADD_RRI8 instruction to - * add up to 255 to an existing constant value. - */ - if (dataTarget->operands[0] != value) { - newLIR2(cUnit, THUMB_ADD_RI8, rDest, value - dataTarget->operands[0]); - } -} - -/* Export the Dalvik PC assicated with an instruction to the StackSave area */ -static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr) -{ - int offset = offsetof(StackSaveArea, xtra.currentPc); - loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); - newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP); - newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset); - newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0); -} - -/* Generate conditional branch instructions */ -static void genConditionalBranch(CompilationUnit *cUnit, - ArmConditionCode cond, - ArmLIR *target) -{ - ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); - branch->generic.target = (LIR *) target; -} - -/* Generate unconditional branch instructions */ -static void genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target) -{ - ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND); - branch->generic.target = (LIR *) target; -} - /* Perform the actual operation for OP_RETURN_* */ static void genReturnCommon(CompilationUnit *cUnit, MIR *mir) { @@ -298,7 +144,8 @@ static void genReturnCommon(CompilationUnit *cUnit, MIR *mir) gDvmJit.returnOp++; #endif int dPC = (int) (cUnit->method->insns + mir->offset); - ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND); + /* Insert branch, but defer setting of target */ + ArmLIR *branch = genUnconditionalBranch(cUnit, NULL); /* Set up the place holder to reconstruct this Dalvik PC */ ArmLIR *pcrLabel = dvmCompilerNew(sizeof(ArmLIR), true); pcrLabel->opCode = ARM_PSEUDO_PC_RECONSTRUCTION_CELL; @@ -311,120 +158,6 @@ static void genReturnCommon(CompilationUnit *cUnit, MIR *mir) } /* - * Load a pair of values of rFP[src..src+1] and store them into rDestLo and - * rDestHi - */ -static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, - int rDestHi) -{ - /* Use reg + imm5*4 to load the values if possible */ - if (vSrc <= 30) { - newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc); - newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1); - } else { - if (vSrc <= 64) { - /* Sneak 4 into the base address first */ - newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4); - newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4); - } else { - /* Offset too far from rFP */ - loadConstant(cUnit, rDestLo, vSrc*4); - newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo); - } - assert(rDestLo < rDestHi); - newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<> 2); - } else { - loadConstant(cUnit, rDest, displacement); - newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest); - } -} - -/* Store a value from rSrc to vDest */ -static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, - int rScratch) -{ - killNullCheckedRegister(cUnit, vDest); - updateLiveRegister(cUnit, vDest, rSrc); - - /* Use reg + imm5*4 to store the value if possible */ - if (vDest <= 31) { - newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest); - } else { - loadConstant(cUnit, rScratch, vDest*4); - newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch); - } -} - -/* * Perform a binary operation on 64-bit operands and leave the results in the * r0/r1 pair. */ @@ -472,19 +205,6 @@ static inline ArmLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset, return pcrLabel; } -/* - * Perform a "reg cmp imm" operation and jump to the PCR region if condition - * satisfies. - */ -static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, - ArmConditionCode cond, int reg, - int checkValue, int dOffset, - ArmLIR *pcrLabel) -{ - newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue); - ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); - return genCheckCommon(cUnit, dOffset, branch, pcrLabel); -} /* * Perform a "reg cmp reg" operation and jump to the PCR region if condition @@ -2714,7 +2434,7 @@ static bool handleFmt31t(CompilationUnit *cUnit, MIR *mir) static bool handleFmt35c_3rc(CompilationUnit *cUnit, MIR *mir, BasicBlock *bb, ArmLIR *labelList) { - ArmLIR *retChainingCell; + ArmLIR *retChainingCell = NULL; ArmLIR *pcrLabel = NULL; if (bb->fallThrough != NULL) diff --git a/vm/compiler/codegen/arm/Codegen.h b/vm/compiler/codegen/arm/Codegen.h new file mode 100644 index 000000000..401607561 --- /dev/null +++ b/vm/compiler/codegen/arm/Codegen.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARM_CODEGEN_H +#define _DALVIK_VM_COMPILER_CODEGEN_ARM_CODEGEN_H + +/* + * Forward declarations for common routines in Codegen.c used by ISA + * variant code such as ThumbUtilty.c + */ + +static ArmLIR *newLIR0(CompilationUnit *cUnit, ArmOpCode opCode); +static ArmLIR *newLIR1(CompilationUnit *cUnit, ArmOpCode opCode, + int dest); +static ArmLIR *newLIR2(CompilationUnit *cUnit, ArmOpCode opCode, + int dest, int src1); +static ArmLIR *newLIR3(CompilationUnit *cUnit, ArmOpCode opCode, + int dest, int src1, int src2); +static ArmLIR *newLIR23(CompilationUnit *cUnit, ArmOpCode opCode, + int srcdest, int src2); +static ArmLIR *scanLiteralPool(CompilationUnit *cUnit, int value, + unsigned int delta); +static ArmLIR *addWordData(CompilationUnit *cUnit, int value, bool inPlace); +static inline ArmLIR *genCheckCommon(CompilationUnit *cUnit, int dOffset, + ArmLIR *branch, + ArmLIR *pcrLabel); + +/* Routines which must be supplied by the variant-specific code */ +static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode); +bool dvmCompilerArchInit(void); +static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir); +static bool genInlineCos(CompilationUnit *cUnit, MIR *mir); +static bool genInlineSin(CompilationUnit *cUnit, MIR *mir); +static bool genConversion(CompilationUnit *cUnit, MIR *mir); +static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest, + int vSrc1, int vSrc2); +static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, + int vSrc1, int vSrc2); +static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, + int vSrc2); + + + +#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_CODEGEN_H */ diff --git a/vm/compiler/codegen/arm/LocalOptimizations.c b/vm/compiler/codegen/arm/LocalOptimizations.c index 30b9d8610..5f43b870a 100644 --- a/vm/compiler/codegen/arm/LocalOptimizations.c +++ b/vm/compiler/codegen/arm/LocalOptimizations.c @@ -95,6 +95,12 @@ static void applyLoadStoreElimination(CompilationUnit *cUnit, checkLIR->opCode == THUMB_STR_RRR || checkLIR->opCode == THUMB_LDR_RRR; +// FIXME: need to enhance this code to sink & play well with coprocessor ld/str + stopHere |= checkLIR->opCode == THUMB2_VSTRS || + checkLIR->opCode == THUMB2_VSTRD || + checkLIR->opCode == THUMB2_VLDRS || + checkLIR->opCode == THUMB2_VLDRD; + stopHere |= (EncodingMap[checkLIR->opCode].flags & IS_BRANCH) != 0; diff --git a/vm/compiler/codegen/arm/Thumb2Util.c b/vm/compiler/codegen/arm/Thumb2Util.c new file mode 100644 index 000000000..1dd009b4e --- /dev/null +++ b/vm/compiler/codegen/arm/Thumb2Util.c @@ -0,0 +1,354 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen for the Thumb ISA and is intended to be + * includes by:and support common to all supported + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +#include "Codegen.h" + +/* Routines which must be supplied here */ +static void loadConstant(CompilationUnit *cUnit, int rDest, int value); +static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr); +static void genConditionalBranch(CompilationUnit *cUnit, + ArmConditionCode cond, + ArmLIR *target); +static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target); +static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, + int rDestHi); +static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi, + int vDest, int rScratch); +static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int vDest); +static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest); +static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement, + int rDest); +static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, + int rScratch); +static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, + ArmConditionCode cond, int reg, + int checkValue, int dOffset, + ArmLIR *pcrLabel); + +/*****************************************************************************/ + +/* + * Support for register allocation + */ + +/* non-existent register */ +#define vNone (-1) + +/* get the next register in r0..r3 in a round-robin fashion */ +#define NEXT_REG(reg) ((reg + 1) & 3) +/* + * The following are utility routines to help maintain the RegisterScoreboard + * state to facilitate register renaming. + */ + +/* Reset the tracker to unknown state */ +static inline void resetRegisterScoreboard(CompilationUnit *cUnit) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + + dvmClearAllBits(registerScoreboard->nullCheckedRegs); + registerScoreboard->liveDalvikReg = vNone; + registerScoreboard->nativeReg = vNone; + registerScoreboard->nativeRegHi = vNone; +} + +/* Kill the corresponding bit in the null-checked register list */ +static inline void killNullCheckedRegister(CompilationUnit *cUnit, int vReg) +{ + dvmClearBit(cUnit->registerScoreboard.nullCheckedRegs, vReg); +} + +/* The Dalvik register pair held in native registers have changed */ +static inline void updateLiveRegisterPair(CompilationUnit *cUnit, + int vReg, int mRegLo, int mRegHi) +{ + cUnit->registerScoreboard.liveDalvikReg = vReg; + cUnit->registerScoreboard.nativeReg = mRegLo; + cUnit->registerScoreboard.nativeRegHi = mRegHi; + cUnit->registerScoreboard.isWide = true; +} + +/* The Dalvik register held in a native register has changed */ +static inline void updateLiveRegister(CompilationUnit *cUnit, + int vReg, int mReg) +{ + cUnit->registerScoreboard.liveDalvikReg = vReg; + cUnit->registerScoreboard.nativeReg = mReg; + cUnit->registerScoreboard.isWide = false; +} + +/* + * Given a Dalvik register id vSrc, use a very simple algorithm to increase + * the lifetime of cached Dalvik value in a native register. + */ +static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, + bool isWide) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + + /* No live value - suggest to use r0 */ + if (registerScoreboard->liveDalvikReg == vNone) + return r0; + + /* Reuse the previously used native reg */ + if (registerScoreboard->liveDalvikReg == vSrc) { + if (isWide != true) { + return registerScoreboard->nativeReg; + } else { + /* Return either r0 or r2 */ + return (registerScoreboard->nativeReg + 1) & 2; + } + } + + /* No reuse - choose the next one among r0..r3 in the round-robin fashion */ + if (isWide) { + return (registerScoreboard->nativeReg + 2) & 2; + } else { + return (registerScoreboard->nativeReg + 1) & 3; + } + +} + +/*****************************************************************************/ + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool + */ +static void loadConstant(CompilationUnit *cUnit, int rDest, int value) +{ + /* See if the value can be constructed cheaply */ + if ((value >= 0) && (value <= 255)) { + newLIR2(cUnit, THUMB_MOV_IMM, rDest, value); + return; + } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) { + newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value); + newLIR2(cUnit, THUMB_MVN, rDest, rDest); + return; + } + /* No shortcut - go ahead and use literal pool */ + ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255); + if (dataTarget == NULL) { + dataTarget = addWordData(cUnit, value, false); + } + ArmLIR *loadPcRel = dvmCompilerNew(sizeof(ArmLIR), true); + loadPcRel->opCode = THUMB_LDR_PC_REL; + loadPcRel->generic.target = (LIR *) dataTarget; + loadPcRel->operands[0] = rDest; + dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); + + /* + * To save space in the constant pool, we use the ADD_RRI8 instruction to + * add up to 255 to an existing constant value. + */ + if (dataTarget->operands[0] != value) { + newLIR2(cUnit, THUMB_ADD_RI8, rDest, value - dataTarget->operands[0]); + } +} + +/* Export the Dalvik PC assicated with an instruction to the StackSave area */ +static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr) +{ + int offset = offsetof(StackSaveArea, xtra.currentPc); + loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); + newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP); + newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset); + newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0); +} + +/* Generate conditional branch instructions */ +static void genConditionalBranch(CompilationUnit *cUnit, + ArmConditionCode cond, + ArmLIR *target) +{ + ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + branch->generic.target = (LIR *) target; +} + +/* Generate unconditional branch instructions */ +static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target) +{ + ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND); + branch->generic.target = (LIR *) target; + return branch; +} + +/* + * Load a pair of values of rFP[src..src+1] and store them into rDestLo and + * rDestHi + */ +static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, + int rDestHi) +{ + /* Use reg + imm5*4 to load the values if possible */ + if (vSrc <= 30) { + newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc); + newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1); + } else { + if (vSrc <= 64) { + /* Sneak 4 into the base address first */ + newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4); + newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4); + } else { + /* Offset too far from rFP */ + loadConstant(cUnit, rDestLo, vSrc*4); + newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo); + } + assert(rDestLo < rDestHi); + newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<> 2); + } else { + loadConstant(cUnit, rDest, displacement); + newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest); + } +} + +/* Store a value from rSrc to vDest */ +static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, + int rScratch) +{ + killNullCheckedRegister(cUnit, vDest); + updateLiveRegister(cUnit, vDest, rSrc); + + /* Use reg + imm5*4 to store the value if possible */ + if (vDest <= 31) { + newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest); + } else { + loadConstant(cUnit, rScratch, vDest*4); + newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch); + } +} + +/* + * Perform a "reg cmp imm" operation and jump to the PCR region if condition + * satisfies. + */ +static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, + ArmConditionCode cond, int reg, + int checkValue, int dOffset, + ArmLIR *pcrLabel) +{ + newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue); + ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + return genCheckCommon(cUnit, dOffset, branch, pcrLabel); +} diff --git a/vm/compiler/codegen/arm/ThumbUtil.c b/vm/compiler/codegen/arm/ThumbUtil.c new file mode 100644 index 000000000..69bb0f75d --- /dev/null +++ b/vm/compiler/codegen/arm/ThumbUtil.c @@ -0,0 +1,323 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file contains codegen for the Thumb ISA and is intended to be + * includes by:and support common to all supported + * + * Codegen-$(TARGET_ARCH_VARIANT).c + * + */ + +#include "Codegen.h" + +/* Routines which must be supplied here */ +static void loadConstant(CompilationUnit *cUnit, int rDest, int value); +static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr); +static void genConditionalBranch(CompilationUnit *cUnit, + ArmConditionCode cond, + ArmLIR *target); +static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target); +static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, + int rDestHi); +static void storeValuePair(CompilationUnit *cUnit, int rSrcLo, int rSrcHi, + int vDest, int rScratch); +static void loadValueAddress(CompilationUnit *cUnit, int vSrc, int vDest); +static void loadValue(CompilationUnit *cUnit, int vSrc, int rDest); +static void loadWordDisp(CompilationUnit *cUnit, int rBase, int displacement, + int rDest); +static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, + int rScratch); +static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, + ArmConditionCode cond, int reg, + int checkValue, int dOffset, + ArmLIR *pcrLabel); + +/*****************************************************************************/ + +/* + * Support for register allocation + */ + +/* non-existent register */ +#define vNone (-1) + +/* get the next register in r0..r3 in a round-robin fashion */ +#define NEXT_REG(reg) ((reg + 1) & 3) +/* + * The following are utility routines to help maintain the RegisterScoreboard + * state to facilitate register renaming. + */ + +/* Reset the tracker to unknown state */ +static inline void resetRegisterScoreboard(CompilationUnit *cUnit) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + + dvmClearAllBits(registerScoreboard->nullCheckedRegs); + registerScoreboard->liveDalvikReg = vNone; + registerScoreboard->nativeReg = vNone; + registerScoreboard->nativeRegHi = vNone; +} + +/* Kill the corresponding bit in the null-checked register list */ +static inline void killNullCheckedRegister(CompilationUnit *cUnit, int vReg) +{ + dvmClearBit(cUnit->registerScoreboard.nullCheckedRegs, vReg); +} + +/* The Dalvik register pair held in native registers have changed */ +static inline void updateLiveRegisterPair(CompilationUnit *cUnit, + int vReg, int mRegLo, int mRegHi) +{ + cUnit->registerScoreboard.liveDalvikReg = vReg; + cUnit->registerScoreboard.nativeReg = mRegLo; + cUnit->registerScoreboard.nativeRegHi = mRegHi; + cUnit->registerScoreboard.isWide = true; +} + +/* The Dalvik register held in a native register has changed */ +static inline void updateLiveRegister(CompilationUnit *cUnit, + int vReg, int mReg) +{ + cUnit->registerScoreboard.liveDalvikReg = vReg; + cUnit->registerScoreboard.nativeReg = mReg; + cUnit->registerScoreboard.isWide = false; +} + +/* + * Given a Dalvik register id vSrc, use a very simple algorithm to increase + * the lifetime of cached Dalvik value in a native register. + */ +static inline int selectFirstRegister(CompilationUnit *cUnit, int vSrc, + bool isWide) +{ + RegisterScoreboard *registerScoreboard = &cUnit->registerScoreboard; + + /* No live value - suggest to use r0 */ + if (registerScoreboard->liveDalvikReg == vNone) + return r0; + + /* Reuse the previously used native reg */ + if (registerScoreboard->liveDalvikReg == vSrc) { + if (isWide != true) { + return registerScoreboard->nativeReg; + } else { + /* Return either r0 or r2 */ + return (registerScoreboard->nativeReg + 1) & 2; + } + } + + /* No reuse - choose the next one among r0..r3 in the round-robin fashion */ + if (isWide) { + return (registerScoreboard->nativeReg + 2) & 2; + } else { + return (registerScoreboard->nativeReg + 1) & 3; + } + +} + +/*****************************************************************************/ + +/* + * Load a immediate using a shortcut if possible; otherwise + * grab from the per-translation literal pool + */ +static void loadConstant(CompilationUnit *cUnit, int rDest, int value) +{ + /* See if the value can be constructed cheaply */ + if ((value >= 0) && (value <= 255)) { + newLIR2(cUnit, THUMB_MOV_IMM, rDest, value); + return; + } else if ((value & 0xFFFFFF00) == 0xFFFFFF00) { + newLIR2(cUnit, THUMB_MOV_IMM, rDest, ~value); + newLIR2(cUnit, THUMB_MVN, rDest, rDest); + return; + } + /* No shortcut - go ahead and use literal pool */ + ArmLIR *dataTarget = scanLiteralPool(cUnit, value, 255); + if (dataTarget == NULL) { + dataTarget = addWordData(cUnit, value, false); + } + ArmLIR *loadPcRel = dvmCompilerNew(sizeof(ArmLIR), true); + loadPcRel->opCode = THUMB_LDR_PC_REL; + loadPcRel->generic.target = (LIR *) dataTarget; + loadPcRel->operands[0] = rDest; + dvmCompilerAppendLIR(cUnit, (LIR *) loadPcRel); + + /* + * To save space in the constant pool, we use the ADD_RRI8 instruction to + * add up to 255 to an existing constant value. + */ + if (dataTarget->operands[0] != value) { + newLIR2(cUnit, THUMB_ADD_RI8, rDest, value - dataTarget->operands[0]); + } +} + +/* Export the Dalvik PC assicated with an instruction to the StackSave area */ +static void genExportPC(CompilationUnit *cUnit, MIR *mir, int rDPC, int rAddr) +{ + int offset = offsetof(StackSaveArea, xtra.currentPc); + loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset)); + newLIR2(cUnit, THUMB_MOV_RR, rAddr, rFP); + newLIR2(cUnit, THUMB_SUB_RI8, rAddr, sizeof(StackSaveArea) - offset); + newLIR3(cUnit, THUMB_STR_RRI5, rDPC, rAddr, 0); +} + +/* Generate conditional branch instructions */ +static void genConditionalBranch(CompilationUnit *cUnit, + ArmConditionCode cond, + ArmLIR *target) +{ + ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + branch->generic.target = (LIR *) target; +} + +/* Generate unconditional branch instructions */ +static ArmLIR *genUnconditionalBranch(CompilationUnit *cUnit, ArmLIR *target) +{ + ArmLIR *branch = newLIR0(cUnit, THUMB_B_UNCOND); + branch->generic.target = (LIR *) target; + return branch; +} + +/* + * Load a pair of values of rFP[src..src+1] and store them into rDestLo and + * rDestHi + */ +static void loadValuePair(CompilationUnit *cUnit, int vSrc, int rDestLo, + int rDestHi) +{ + /* Use reg + imm5*4 to load the values if possible */ + if (vSrc <= 30) { + newLIR3(cUnit, THUMB_LDR_RRI5, rDestLo, rFP, vSrc); + newLIR3(cUnit, THUMB_LDR_RRI5, rDestHi, rFP, vSrc+1); + } else { + if (vSrc <= 64) { + /* Sneak 4 into the base address first */ + newLIR3(cUnit, THUMB_ADD_RRI3, rDestLo, rFP, 4); + newLIR2(cUnit, THUMB_ADD_RI8, rDestLo, (vSrc-1)*4); + } else { + /* Offset too far from rFP */ + loadConstant(cUnit, rDestLo, vSrc*4); + newLIR3(cUnit, THUMB_ADD_RRR, rDestLo, rFP, rDestLo); + } + assert(rDestLo < rDestHi); + newLIR2(cUnit, THUMB_LDMIA, rDestLo, (1<> 2); + } else { + loadConstant(cUnit, rDest, displacement); + newLIR3(cUnit, THUMB_LDR_RRR, rDest, rBase, rDest); + } +} + +/* Store a value from rSrc to vDest */ +static void storeValue(CompilationUnit *cUnit, int rSrc, int vDest, + int rScratch) +{ + killNullCheckedRegister(cUnit, vDest); + updateLiveRegister(cUnit, vDest, rSrc); + + /* Use reg + imm5*4 to store the value if possible */ + if (vDest <= 31) { + newLIR3(cUnit, THUMB_STR_RRI5, rSrc, rFP, vDest); + } else { + loadConstant(cUnit, rScratch, vDest*4); + newLIR3(cUnit, THUMB_STR_RRR, rSrc, rFP, rScratch); + } +} + +/* + * Perform a "reg cmp imm" operation and jump to the PCR region if condition + * satisfies. + */ +static inline ArmLIR *genRegImmCheck(CompilationUnit *cUnit, + ArmConditionCode cond, int reg, + int checkValue, int dOffset, + ArmLIR *pcrLabel) +{ + newLIR2(cUnit, THUMB_CMP_RI8, reg, checkValue); + ArmLIR *branch = newLIR2(cUnit, THUMB_B_COND, 0, cond); + return genCheckCommon(cUnit, dOffset, branch, pcrLabel); +} diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.c b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c new file mode 100644 index 000000000..15ed0781f --- /dev/null +++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.c @@ -0,0 +1,312 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +static void loadFloat(CompilationUnit *cUnit, int vSrc, int rDest); + +/* + * This file is included by Codegen-armv5te-vfp.c, and implements architecture + * variant-specific code. + */ + +#define USE_IN_CACHE_HANDLER 1 + +/* + * Determine the initial instruction set to be used for this trace. + * Later components may decide to change this. + */ +JitInstructionSetType dvmCompilerInstructionSet(CompilationUnit *cUnit) +{ + return DALVIK_JIT_THUMB2; +} + +/* + * Jump to the out-of-line handler in ARM mode to finish executing the + * remaining of more complex instructions. + */ +static void genDispatchToHandler(CompilationUnit *cUnit, TemplateOpCode opCode) +{ +#if USE_IN_CACHE_HANDLER + /* + * NOTE - In practice BLX only needs one operand, but since the assembler + * may abort itself and retry due to other out-of-range conditions we + * cannot really use operand[0] to store the absolute target address since + * it may get clobbered by the final relative offset. Therefore, + * we fake BLX_1 is a two operand instruction and the absolute target + * address is stored in operand[1]. + */ + newLIR2(cUnit, THUMB_BLX_1, + (int) gDvmJit.codeCache + templateEntryOffsets[opCode], + (int) gDvmJit.codeCache + templateEntryOffsets[opCode]); + newLIR2(cUnit, THUMB_BLX_2, + (int) gDvmJit.codeCache + templateEntryOffsets[opCode], + (int) gDvmJit.codeCache + templateEntryOffsets[opCode]); +#else + /* + * In case we want to access the statically compiled handlers for + * debugging purposes, define USE_IN_CACHE_HANDLER to 0 + */ + void *templatePtr; + +#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X(); +#include "../../../template/armv5te-vfp/TemplateOpList.h" +#undef JIT_TEMPLATE + switch (opCode) { +#define JIT_TEMPLATE(X) \ + case TEMPLATE_##X: { templatePtr = dvmCompiler_TEMPLATE_##X; break; } +#include "../../../template/armv5te-vfp/TemplateOpList.h" +#undef JIT_TEMPLATE + default: templatePtr = NULL; + } + loadConstant(cUnit, r7, (int) templatePtr); + newLIR1(cUnit, THUMB_BLX_R, r7); +#endif +} + +/* Architecture-specific initializations and checks go here */ +bool dvmCompilerArchInit(void) +{ + /* First, declare dvmCompiler_TEMPLATE_XXX for each template */ +#define JIT_TEMPLATE(X) extern void dvmCompiler_TEMPLATE_##X(); +#include "../../../template/armv5te-vfp/TemplateOpList.h" +#undef JIT_TEMPLATE + + int i = 0; + extern void dvmCompilerTemplateStart(void); + + /* + * Then, populate the templateEntryOffsets array with the offsets from the + * the dvmCompilerTemplateStart symbol for each template. + */ +#define JIT_TEMPLATE(X) templateEntryOffsets[i++] = \ + (intptr_t) dvmCompiler_TEMPLATE_##X - (intptr_t) dvmCompilerTemplateStart; +#include "../../../template/armv5te-vfp/TemplateOpList.h" +#undef JIT_TEMPLATE + + /* Codegen-specific assumptions */ + assert(offsetof(ClassObject, vtable) < 128 && + (offsetof(ClassObject, vtable) & 0x3) == 0); + assert(offsetof(ArrayObject, length) < 128 && + (offsetof(ArrayObject, length) & 0x3) == 0); + assert(offsetof(ArrayObject, contents) < 256); + + /* Up to 5 args are pushed on top of FP - sizeofStackSaveArea */ + assert(sizeof(StackSaveArea) < 236); + + /* + * EA is calculated by doing "Rn + imm5 << 2", and there are 5 entry points + * that codegen may access, make sure that the offset from the top of the + * struct is less than 108. + */ + assert(offsetof(InterpState, jitToInterpEntries) < 108); + return true; +} + +static bool genInlineSqrt(CompilationUnit *cUnit, MIR *mir) +{ + int offset = offsetof(InterpState, retval); + OpCode opCode = mir->dalvikInsn.opCode; + int vSrc = mir->dalvikInsn.vA; + loadValueAddress(cUnit, vSrc, r2); + genDispatchToHandler(cUnit, TEMPLATE_SQRT_DOUBLE_VFP); + newLIR3(cUnit, THUMB_STR_RRI5, r0, rGLUE, offset >> 2); + newLIR3(cUnit, THUMB_STR_RRI5, r1, rGLUE, (offset >> 2) + 1); + return false; +} + +static bool genInlineCos(CompilationUnit *cUnit, MIR *mir) +{ + return false; +} + +static bool genInlineSin(CompilationUnit *cUnit, MIR *mir) +{ + return false; +} + +static bool genArithOpFloat(CompilationUnit *cUnit, MIR *mir, int vDest, + int vSrc1, int vSrc2) +{ + int op = THUMB_BKPT; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (mir->dalvikInsn.opCode) { + case OP_ADD_FLOAT_2ADDR: + case OP_ADD_FLOAT: + op = THUMB2_VADDS; + break; + case OP_SUB_FLOAT_2ADDR: + case OP_SUB_FLOAT: + op = THUMB2_VSUBS; + break; + case OP_DIV_FLOAT_2ADDR: + case OP_DIV_FLOAT: + op = THUMB2_VDIVS; + break; + case OP_MUL_FLOAT_2ADDR: + case OP_MUL_FLOAT: + op = THUMB2_VMULS; + break; + case OP_REM_FLOAT_2ADDR: + case OP_REM_FLOAT: + case OP_NEG_FLOAT: { + return genArithOpFloatPortable(cUnit, mir, vDest, vSrc1, vSrc2); + } + default: + return true; + } + loadFloat(cUnit, vSrc1, fr2); + loadFloat(cUnit, vSrc2, fr4); + newLIR3(cUnit, op, fr0, fr2, fr4); + storeFloat(cUnit, fr0, vDest, 0); + return false; +} + +static bool genArithOpDouble(CompilationUnit *cUnit, MIR *mir, int vDest, + int vSrc1, int vSrc2) +{ + int op = THUMB_BKPT; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch (mir->dalvikInsn.opCode) { + case OP_ADD_DOUBLE_2ADDR: + case OP_ADD_DOUBLE: + op = THUMB2_VADDD; + break; + case OP_SUB_DOUBLE_2ADDR: + case OP_SUB_DOUBLE: + op = THUMB2_VSUBD; + break; + case OP_DIV_DOUBLE_2ADDR: + case OP_DIV_DOUBLE: + op = THUMB2_VDIVD; + break; + case OP_MUL_DOUBLE_2ADDR: + case OP_MUL_DOUBLE: + op = THUMB2_VMULD; + break; + case OP_REM_DOUBLE_2ADDR: + case OP_REM_DOUBLE: + case OP_NEG_DOUBLE: { + return genArithOpDoublePortable(cUnit, mir, vDest, vSrc1, vSrc2); + } + default: + return true; + } + loadDouble(cUnit, vSrc1, fr2); + loadDouble(cUnit, vSrc2, fr4); + newLIR3(cUnit, op, fr0, fr2, fr4); + storeDouble(cUnit, fr0, vDest, 0); + return false; +} + +static bool genConversion(CompilationUnit *cUnit, MIR *mir) +{ + OpCode opCode = mir->dalvikInsn.opCode; + int vSrc1Dest = mir->dalvikInsn.vA; + int vSrc2 = mir->dalvikInsn.vB; + int op = THUMB_BKPT; + bool longSrc = false; + bool longDest = false; + + switch (opCode) { + case OP_INT_TO_FLOAT: + longSrc = false; + longDest = false; + op = THUMB2_VCVTIF; + break; + case OP_FLOAT_TO_INT: + longSrc = false; + longDest = false; + op = THUMB2_VCVTFI; + break; + case OP_DOUBLE_TO_FLOAT: + longSrc = true; + longDest = false; + op = THUMB2_VCVTDF; + break; + case OP_FLOAT_TO_DOUBLE: + longSrc = false; + longDest = true; + op = THUMB2_VCVTFD; + break; + case OP_INT_TO_DOUBLE: + longSrc = false; + longDest = true; + op = THUMB2_VCVTID; + break; + case OP_DOUBLE_TO_INT: + longSrc = true; + longDest = false; + op = THUMB2_VCVTDI; + break; + case OP_FLOAT_TO_LONG: + case OP_LONG_TO_FLOAT: + case OP_DOUBLE_TO_LONG: + case OP_LONG_TO_DOUBLE: + return genConversionPortable(cUnit, mir); + default: + return true; + } + if (longSrc) + loadDouble(cUnit, vSrc2, fr2); + else + loadFloat(cUnit, vSrc2, fr2); + newLIR2(cUnit, op, fr0, fr2); + if (longSrc) + storeDouble(cUnit, fr0, vSrc1Dest, 0); + else + storeFloat(cUnit, fr0, vSrc1Dest, 0); + return false; +} + +static bool genCmpX(CompilationUnit *cUnit, MIR *mir, int vDest, int vSrc1, + int vSrc2) +{ + TemplateOpCode template; + + /* + * Don't attempt to optimize register usage since these opcodes call out to + * the handlers. + */ + switch(mir->dalvikInsn.opCode) { + case OP_CMPL_FLOAT: + template = TEMPLATE_CMPL_FLOAT_VFP; + break; + case OP_CMPG_FLOAT: + template = TEMPLATE_CMPG_FLOAT_VFP; + break; + case OP_CMPL_DOUBLE: + template = TEMPLATE_CMPL_DOUBLE_VFP; + break; + case OP_CMPG_DOUBLE: + template = TEMPLATE_CMPG_DOUBLE_VFP; + break; + default: + return true; + } + loadValueAddress(cUnit, vSrc1, r0); + loadValueAddress(cUnit, vSrc2, r1); + genDispatchToHandler(cUnit, template); + storeValue(cUnit, r0, vDest, r1); + return false; +} diff --git a/vm/compiler/codegen/arm/armv7-a/ArchVariant.h b/vm/compiler/codegen/arm/armv7-a/ArchVariant.h new file mode 100644 index 000000000..9f862e840 --- /dev/null +++ b/vm/compiler/codegen/arm/armv7-a/ArchVariant.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2009 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H +#define _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H + +/* Create the TemplateOpcode enum */ +#define JIT_TEMPLATE(X) TEMPLATE_##X, +typedef enum { +#include "../../../template/armv5te-vfp/TemplateOpList.h" +/* + * For example, + * TEMPLATE_CMP_LONG, + * TEMPLATE_RETURN, + * ... + */ + TEMPLATE_LAST_MARK, +} TemplateOpCode; +#undef JIT_TEMPLATE + +#endif /* _DALVIK_VM_COMPILER_CODEGEN_ARM_ARMV5TE_VFP_ARCHVARIANT_H */ -- 2.11.0