From 38ac6bee58b7ef19cbc2b0540315c3c299ca77a9 Mon Sep 17 00:00:00 2001 From: John Porto Date: Fri, 4 Dec 2015 06:51:38 -0800 Subject: [PATCH] Subzero. ARM32. Initial sandboxing code. BUG= https://code.google.com/p/nativeclient/issues/detail?id=4076 Review URL: https://codereview.chromium.org/1491473002 . --- Makefile.standalone | 5 +- pydir/crosstest.py | 4 +- pydir/crosstest_generator.py | 2 +- pydir/run-pnacl-sz.py | 14 ++- pydir/szbuild.py | 8 +- pydir/targets.py | 7 +- src/IceTargetLoweringARM32.cpp | 106 ++++++++++++++--------- src/IceTargetLoweringARM32.h | 122 ++++++++++++++++++++++++++ tests_lit/assembler/arm32/sandboxing.ll | 149 ++++++++++++++++++++++++++++++++ tests_lit/assembler/x86/sandboxing.ll | 4 +- 10 files changed, 367 insertions(+), 54 deletions(-) create mode 100644 tests_lit/assembler/arm32/sandboxing.ll diff --git a/Makefile.standalone b/Makefile.standalone index 27578be02..1f18f006e 100644 --- a/Makefile.standalone +++ b/Makefile.standalone @@ -404,7 +404,10 @@ check-xtest: $(OBJDIR)/pnacl-sz make_symlink runtime -e x8664,native,sse2,test_global \ -i arm32,native,neon \ -e arm32,native,neon,test_vector_ops \ - -e arm32,native,neon,test_select + -e arm32,native,neon,test_select \ + -i arm32,sandbox,neon \ + -e arm32,sandbox,neon,test_vector_ops \ + -e arm32,sandbox,neon,test_select PNACL_BIN_PATH=$(PNACL_BIN_PATH) \ $(LLVM_SRC_PATH)/utils/lit/lit.py -sv crosstest/Output endif diff --git a/pydir/crosstest.py b/pydir/crosstest.py index bd1c89e38..67333de7a 100755 --- a/pydir/crosstest.py +++ b/pydir/crosstest.py @@ -182,7 +182,7 @@ def main(): ).format(root=nacl_root, sb='sb' if args.sandbox else 'native')) pure_c = os.path.splitext(args.driver)[1] == '.c' - # TargetX8664 is ilp32, but clang does not currently support such + # TargetX8664 is ilp32, but pnacl-clang does not currently support such # configuration. In order to run the crosstests we play nasty, dangerous # tricks with the stack pointer. needs_stack_hack = (args.target == 'x8664') @@ -202,7 +202,7 @@ def main(): bin=bindir, prefix='pnacl-' if args.sandbox else '', cc='clang' if pure_c else 'clang++') sb_native_args = (['-O0', '--pnacl-allow-native', - '-arch', target_info.target, + '-arch', target_info.compiler_arch, '-Wn,-defsym=__Sz_AbsoluteZero=0'] if args.sandbox else ['-g', '-target=' + triple, diff --git a/pydir/crosstest_generator.py b/pydir/crosstest_generator.py index 2dbd6e0e3..6c2dd73d6 100755 --- a/pydir/crosstest_generator.py +++ b/pydir/crosstest_generator.py @@ -67,7 +67,7 @@ def main(): arch_flags = { 'x8632': [], 'x8664': [], # ARM doesn't have an ELF writer yet. - 'arm32': ['--filetype=iasm'] } + 'arm32': ['--filetype=asm'] } # all_keys is only used in the help text. all_keys = '; '.join([' '.join(targets), ' '.join(sandboxing), ' '.join(opt_levels), ' '.join(flat_attrs)]) diff --git a/pydir/run-pnacl-sz.py b/pydir/run-pnacl-sz.py index cefd47542..2de915ad5 100755 --- a/pydir/run-pnacl-sz.py +++ b/pydir/run-pnacl-sz.py @@ -11,14 +11,16 @@ import tempfile from utils import shellcmd -def TargetAssemblerFlags(target): +def TargetAssemblerFlags(target, sandboxed): # TODO(stichnot): -triple=i686-nacl should be used for a # sandboxing test. This means there should be an args.sandbox # argument that also gets passed through to pnacl-sz. # TODO(reed kotler). Need to find out exactly we need to # add here for Mips32. - flags = { 'x8632': ['-triple=i686'], - 'arm32': ['-triple=armv7a', '-mcpu=cortex-a9', '-mattr=+neon'], + flags = { 'x8632': ['-triple=%s' % ('i686' if not sandboxed else 'i686-nacl')], + 'arm32': ['-triple=%s' % ( + 'armv7a' if not sandboxed else 'armv7a-nacl'), + '-mcpu=cortex-a9', '-mattr=+neon'], 'mips32': ['-triple=mipsel' ] } return flags[target] @@ -89,6 +91,8 @@ def main(): argparser.add_argument('--args', '-a', nargs=argparse.REMAINDER, default=[], help='Remaining arguments are passed to pnacl-sz') + argparser.add_argument('--sandbox', required=False, action='store_true', + help='Sanboxes the generated code.') args = argparser.parse_args() pnacl_bin_path = args.pnacl_bin_path @@ -121,6 +125,8 @@ def main(): cmd += [os.path.join(pnacl_bin_path, 'not')] cmd += [args.pnacl_sz] cmd += ['--target', args.target] + if args.sandbox: + cmd += ['-sandbox'] if args.insts: # If the tests are based on '-verbose inst' output, force # single-threaded translation because dump output does not get @@ -147,7 +153,7 @@ def main(): asm_temp.close() if args.assemble and args.filetype != 'obj': cmd += (['|', os.path.join(pnacl_bin_path, 'llvm-mc')] + - TargetAssemblerFlags(args.target) + + TargetAssemblerFlags(args.target, args.sandbox) + ['-filetype=obj', '-o', asm_temp.name]) elif asm_temp: cmd += ['-o', asm_temp.name] diff --git a/pydir/szbuild.py b/pydir/szbuild.py index 569c8caa6..10ce8c1e5 100755 --- a/pydir/szbuild.py +++ b/pydir/szbuild.py @@ -318,10 +318,14 @@ def ProcessPexe(args, pexe, exe): # Run the linker regardless of hybrid mode. if args.sandbox: - assert args.target in ['x8632'], \ + assert args.target in ('x8632', 'arm32'), \ '-sandbox is not available for %s' % args.target + target_lib_dir = { + 'arm32': 'arm', + 'x8632': 'x86-32', + }[args.target] linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' + - 'x86-32/lib').format(root=nacl_root) + '{target_dir}/lib').format(root=nacl_root, target_dir=target_lib_dir) shellcmd(( '{gold} -nostdlib --no-fix-cortex-a8 --eh-frame-hdr -z text ' + '--build-id --entry=__pnacl_start -static ' + diff --git a/pydir/targets.py b/pydir/targets.py index 3635e1314..7e2222ddc 100644 --- a/pydir/targets.py +++ b/pydir/targets.py @@ -17,22 +17,25 @@ def FindARMCrossInclude(): TargetInfo = namedtuple('TargetInfo', - ['target', 'triple', 'llc_flags', 'ld_emu', - 'cross_headers']) + ['target', 'compiler_arch', 'triple', 'llc_flags', + 'ld_emu', 'cross_headers']) X8632Target = TargetInfo(target='x8632', + compiler_arch='x8632', triple='i686-none-linux', llc_flags=['-mcpu=pentium4m'], ld_emu='elf_i386_nacl', cross_headers=[]) X8664Target = TargetInfo(target='x8664', + compiler_arch='x8664', triple='x86_64-none-linux', llc_flags=['-mcpu=x86-64'], ld_emu='elf_x86_64_nacl', cross_headers=[]) ARM32Target = TargetInfo(target='arm32', + compiler_arch='armv7', triple='armv7a-none-linux-gnueabihf', llc_flags=['-mcpu=cortex-a9', '-float-abi=hard', diff --git a/src/IceTargetLoweringARM32.cpp b/src/IceTargetLoweringARM32.cpp index d260db0e6..5469db1d5 100644 --- a/src/IceTargetLoweringARM32.cpp +++ b/src/IceTargetLoweringARM32.cpp @@ -161,7 +161,8 @@ TargetARM32Features::TargetARM32Features(const ClFlags &Flags) { } TargetARM32::TargetARM32(Cfg *Func) - : TargetLowering(Func), CPUFeatures(Func->getContext()->getFlags()) {} + : TargetLowering(Func), NeedSandboxing(Ctx->getFlags().getUseSandboxing()), + CPUFeatures(Func->getContext()->getFlags()) {} void TargetARM32::staticInit() { // Limit this size (or do all bitsets need to be the same width)??? @@ -544,8 +545,7 @@ void TargetARM32::genTargetHelperCallFor(Inst *Instr) { return; } case Intrinsics::NaClReadTP: { - if (Ctx->getFlags().getUseSandboxing()) { - UnimplementedError(Func->getContext()->getFlags()); + if (NeedSandboxing) { return; } static constexpr SizeT MaxArgs = 0; @@ -1120,6 +1120,10 @@ void TargetARM32::addProlog(CfgNode *Node) { continue; } if (CalleeSaves[i] && RegsUsed[i]) { + if (NeedSandboxing && i == RegARM32::Reg_r9) { + // r9 is never updated in sandboxed code. + continue; + } ++NumCallee; Variable *PhysicalRegister = getPhysicalRegister(i); PreservedRegsSizeBytes += @@ -1173,10 +1177,9 @@ void TargetARM32::addProlog(CfgNode *Node) { // Use the scratch register if needed to legalize the immediate. Operand *SubAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), Legal_Reg | Legal_Flex, getReservedTmpReg()); - Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); - _sub(SP, SP, SubAmount); + AutoSandboxer(this).sub_sp(SubAmount); if (FixedAllocaAlignBytes > ARM32_STACK_ALIGNMENT_BYTES) { - alignRegisterPow2(SP, FixedAllocaAlignBytes); + AutoSandboxer(this).align_sp(FixedAllocaAlignBytes); } } @@ -1270,7 +1273,7 @@ void TargetARM32::addEpilog(CfgNode *Node) { // use of SP before the assignment of SP=FP keeps previous SP adjustments // from being dead-code eliminated. Context.insert(InstFakeUse::create(Func, SP)); - _mov(SP, FP); + AutoSandboxer(this).reset_sp(FP); } else { // add SP, SpillAreaSizeBytes if (SpillAreaSizeBytes) { @@ -1278,7 +1281,7 @@ void TargetARM32::addEpilog(CfgNode *Node) { Operand *AddAmount = legalize(Ctx->getConstantInt32(SpillAreaSizeBytes), Legal_Reg | Legal_Flex, getReservedTmpReg()); - _add(SP, SP, AddAmount); + AutoSandboxer(this).add_sp(AddAmount); } } @@ -1302,6 +1305,9 @@ void TargetARM32::addEpilog(CfgNode *Node) { } if (CalleeSaves[i] && RegsUsed[i]) { + if (NeedSandboxing && i == RegARM32::Reg_r9) { + continue; + } GPRsToRestore.push_back(getPhysicalRegister(i)); } } @@ -1318,16 +1324,13 @@ void TargetARM32::addEpilog(CfgNode *Node) { // bundle_unlock // This isn't just aligning to the getBundleAlignLog2Bytes(). It needs to // restrict to the lower 1GB as well. - Operand *RetMask = - legalize(Ctx->getConstantInt32(0xc000000f), Legal_Reg | Legal_Flex); - Variable *LR = makeReg(IceType_i32, RegARM32::Reg_lr); + Variable *LR = getPhysicalRegister(RegARM32::Reg_lr); Variable *RetValue = nullptr; if (RI->getSrcSize()) RetValue = llvm::cast(RI->getSrc(0)); - _bundle_lock(); - _bic(LR, LR, RetMask); - _ret(LR, RetValue); - _bundle_unlock(); + + AutoSandboxer(this).ret(LR, RetValue); + RI->setDeleted(); } @@ -1378,7 +1381,7 @@ Variable *TargetARM32::PostLoweringLegalizer::newBaseRegister( OperandARM32Mem *TargetARM32::PostLoweringLegalizer::createMemOperand( Type Ty, Variable *Base, int32_t Offset, bool AllowOffsets) { assert(!Base->isRematerializable()); - if (AllowOffsets && Target->isLegalMemOffset(Ty, Offset)) { + if (Offset == 0 || (AllowOffsets && Target->isLegalMemOffset(Ty, Offset))) { return OperandARM32Mem::create( Target->Func, Ty, Base, llvm::cast(Target->Ctx->getConstantInt32(Offset)), @@ -1451,8 +1454,9 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) { assert(!SrcR->isRematerializable()); const int32_t Offset = Dest->getStackOffset(); // This is a _mov(Mem(), Variable), i.e., a store. - Target->_str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset), - MovInstr->getPredicate()); + TargetARM32::AutoSandboxer(Target) + .str(SrcR, createMemOperand(DestTy, StackOrFrameReg, Offset), + MovInstr->getPredicate()); // _str() does not have a Dest, so we add a fake-def(Dest). Target->Context.insert(InstFakeDef::create(Target->Func, Dest)); Legalized = true; @@ -1476,8 +1480,9 @@ void TargetARM32::PostLoweringLegalizer::legalizeMov(InstARM32Mov *MovInstr) { if (!Var->hasReg()) { // This is a _mov(Variable, Mem()), i.e., a load. const int32_t Offset = Var->getStackOffset(); - Target->_ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset), - MovInstr->getPredicate()); + TargetARM32::AutoSandboxer(Target) + .ldr(Dest, createMemOperand(DestTy, StackOrFrameReg, Offset), + MovInstr->getPredicate()); Legalized = true; } } @@ -1542,7 +1547,15 @@ TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem, Legalized = true; } - if (!Legalized) { + if (!Legalized && !Target->NeedSandboxing) { + return nullptr; + } + + if (Target->NeedSandboxing && Base->getRegNum() == RegARM32::Reg_r9) { + if (Legalized) { + llvm::report_fatal_error("r9-based mem operand should not need to be " + "legalized."); + } return nullptr; } @@ -1550,6 +1563,7 @@ TargetARM32::PostLoweringLegalizer::legalizeMemOperand(OperandARM32Mem *Mem, return createMemOperand(Mem->getType(), Base, Offset, AllowOffsets); } + assert(!Target->NeedSandboxing); assert(MemTraits[Mem->getType()].CanHaveIndex); if (Offset != 0) { @@ -1621,7 +1635,8 @@ void TargetARM32::postLowerLegalization() { } else if (auto *LdrInstr = llvm::dyn_cast(CurInstr)) { if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( llvm::cast(LdrInstr->getSrc(0)))) { - _ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate()); + AutoSandboxer(this) + .ldr(CurInstr->getDest(), LegalMem, LdrInstr->getPredicate()); CurInstr->setDeleted(); } } else if (auto *LdrexInstr = llvm::dyn_cast(CurInstr)) { @@ -1629,14 +1644,16 @@ void TargetARM32::postLowerLegalization() { if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( llvm::cast(LdrexInstr->getSrc(0)), DisallowOffsetsBecauseLdrex)) { - _ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate()); + AutoSandboxer(this) + .ldrex(CurInstr->getDest(), LegalMem, LdrexInstr->getPredicate()); CurInstr->setDeleted(); } } else if (auto *StrInstr = llvm::dyn_cast(CurInstr)) { + AutoSandboxer Bundle(this); if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( llvm::cast(StrInstr->getSrc(1)))) { - _str(llvm::cast(CurInstr->getSrc(0)), LegalMem, - StrInstr->getPredicate()); + AutoSandboxer(this).str(llvm::cast(CurInstr->getSrc(0)), + LegalMem, StrInstr->getPredicate()); CurInstr->setDeleted(); } } else if (auto *StrexInstr = llvm::dyn_cast(CurInstr)) { @@ -1644,8 +1661,9 @@ void TargetARM32::postLowerLegalization() { if (OperandARM32Mem *LegalMem = Legalizer.legalizeMemOperand( llvm::cast(StrexInstr->getSrc(1)), DisallowOffsetsBecauseStrex)) { - _strex(CurInstr->getDest(), llvm::cast(CurInstr->getSrc(0)), - LegalMem, StrexInstr->getPredicate()); + AutoSandboxer(this).strex(CurInstr->getDest(), + llvm::cast(CurInstr->getSrc(0)), + LegalMem, StrexInstr->getPredicate()); CurInstr->setDeleted(); } } @@ -1803,7 +1821,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); if (OverAligned) { - alignRegisterPow2(SP, Alignment); + AutoSandboxer(this).align_sp(Alignment); } Variable *Dest = Inst->getDest(); @@ -1828,7 +1846,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { // in Dest. Operand *SubAmountRF = legalize(Ctx->getConstantInt32(Value), Legal_Reg | Legal_Flex); - _sub(SP, SP, SubAmountRF); + AutoSandboxer(this).sub_sp(SubAmountRF); } else { // Non-constant sizes need to be adjusted to the next highest multiple of // the required alignment at runtime. @@ -1838,7 +1856,7 @@ void TargetARM32::lowerAlloca(const InstAlloca *Inst) { Operand *AddAmount = legalize(Ctx->getConstantInt32(Alignment - 1)); _add(T, T, AddAmount); alignRegisterPow2(T, Alignment); - _sub(SP, SP, T); + AutoSandboxer(this).sub_sp(T); } // Adds back a few bytes to SP to account for the out args area. @@ -3249,8 +3267,6 @@ void TargetARM32::lowerCall(const InstCall *Instr) { break; } } - // TODO(jvoung): Handle sandboxing. const bool NeedSandboxing = - // Ctx->getFlags().getUseSandboxing(); // Allow ConstantRelocatable to be left alone as a direct call, but force // other constants like ConstantInteger32 to be in a register and make it an @@ -3271,8 +3287,10 @@ void TargetARM32::lowerCall(const InstCall *Instr) { // the call. Context.insert(InstFakeUse::create(Func, Reg)); } - Inst *NewCall = InstARM32Call::create(Func, ReturnReg, CallTarget); - Context.insert(NewCall); + + InstARM32Call *NewCall = AutoSandboxer(this, InstBundleLock::Opt_AlignToEnd) + .bl(ReturnReg, CallTarget); + if (ReturnRegHi) Context.insert(InstFakeDef::create(Func, ReturnRegHi)); @@ -4612,7 +4630,14 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { llvm::report_fatal_error("memmove should have been prelowered."); } case Intrinsics::NaClReadTP: { - llvm::report_fatal_error("nacl-read-tp should have been prelowered."); + if (!NeedSandboxing) { + llvm::report_fatal_error("nacl-read-tp should have been prelowered."); + } + Variable *TP = legalizeToReg(OperandARM32Mem::create( + Func, getPointerType(), getPhysicalRegister(RegARM32::Reg_r9), + llvm::cast(Ctx->getConstantZero(IceType_i32)))); + _mov(Dest, TP); + return; } case Intrinsics::Setjmp: { llvm::report_fatal_error("setjmp should have been prelowered."); @@ -4630,9 +4655,8 @@ void TargetARM32::lowerIntrinsicCall(const InstIntrinsicCall *Instr) { return; } case Intrinsics::Stackrestore: { - Variable *SP = getPhysicalRegister(RegARM32::Reg_sp); - Operand *Val = legalize(Instr->getArg(0), Legal_Reg | Legal_Flex); - _mov_redefined(SP, Val); + Variable *Val = legalizeToReg(Instr->getArg(0)); + AutoSandboxer(this).reset_sp(Val); return; } case Intrinsics::Trap: @@ -4987,8 +5011,9 @@ OperandARM32Mem *TargetARM32::formAddressingMode(Type Ty, Cfg *Func, (void)MemTraitsSize; assert(Ty < MemTraitsSize); auto *TypeTraits = &MemTraits[Ty]; - const bool CanHaveIndex = TypeTraits->CanHaveIndex; - const bool CanHaveShiftedIndex = TypeTraits->CanHaveShiftedIndex; + const bool CanHaveIndex = !NeedSandboxing && TypeTraits->CanHaveIndex; + const bool CanHaveShiftedIndex = + !NeedSandboxing && TypeTraits->CanHaveShiftedIndex; const bool CanHaveImm = TypeTraits->CanHaveImm; const int32_t ValidImmMask = TypeTraits->ValidImmMask; (void)ValidImmMask; @@ -5160,6 +5185,7 @@ void TargetARM32::lowerRet(const InstRet *Inst) { // frame removal instructions. addEpilog is responsible for restoring the // "lr" register as needed prior to this ret instruction. _ret(getPhysicalRegister(RegARM32::Reg_lr), Reg); + // Add a fake use of sp to make sure sp stays alive for the entire function. // Otherwise post-call sp adjustments get dead-code eliminated. // TODO: Are there more places where the fake use should be inserted? E.g. diff --git a/src/IceTargetLoweringARM32.h b/src/IceTargetLoweringARM32.h index ddd10f108..a10b575b5 100644 --- a/src/IceTargetLoweringARM32.h +++ b/src/IceTargetLoweringARM32.h @@ -162,6 +162,18 @@ public: llvm::cast(Ctx->getConstantInt32(ShAmtImm & 0x1F))); } + OperandARM32FlexImm *indirectBranchBicMask() const { + constexpr uint32_t Imm8 = 0xFC; // 0xC000000F + constexpr uint32_t RotateAmt = 2; + return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); + } + + OperandARM32FlexImm *memOpBicMask() const { + constexpr uint32_t Imm8 = 0x0C; // 0xC0000000 + constexpr uint32_t RotateAmt = 2; + return OperandARM32FlexImm::create(Func, IceType_i32, Imm8, RotateAmt); + } + GlobalContext *getCtx() const { return Ctx; } protected: @@ -822,6 +834,115 @@ protected: void postLowerLegalization(); + class AutoSandboxer { + public: + explicit AutoSandboxer( + TargetARM32 *Target, + InstBundleLock::Option BundleOption = InstBundleLock::Opt_None) + : Target(Target) { + if (Target->NeedSandboxing) { + Target->_bundle_lock(BundleOption); + } + } + + void add_sp(Operand *AddAmount) { + Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); + Target->_add(SP, SP, AddAmount); + if (Target->NeedSandboxing) { + Target->_bic(SP, SP, Target->memOpBicMask()); + } + } + + void align_sp(size_t Alignment) { + Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); + Target->alignRegisterPow2(SP, Alignment); + if (Target->NeedSandboxing) { + Target->_bic(SP, SP, Target->memOpBicMask()); + } + } + + InstARM32Call *bl(Variable *ReturnReg, Operand *CallTarget) { + if (Target->NeedSandboxing) { + if (auto *CallTargetR = llvm::dyn_cast(CallTarget)) { + Target->_bic(CallTargetR, CallTargetR, + Target->indirectBranchBicMask()); + } + } + auto *Call = InstARM32Call::create(Target->Func, ReturnReg, CallTarget); + Target->Context.insert(Call); + return Call; + } + + void ldr(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred) { + if (Target->NeedSandboxing) { + assert(!Mem->isRegReg()); + Variable *MemBase = Mem->getBase(); + Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred); + } + Target->_ldr(Dest, Mem, Pred); + } + + void ldrex(Variable *Dest, OperandARM32Mem *Mem, CondARM32::Cond Pred) { + if (Target->NeedSandboxing) { + assert(!Mem->isRegReg()); + Variable *MemBase = Mem->getBase(); + Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred); + } + Target->_ldrex(Dest, Mem, Pred); + } + + void reset_sp(Variable *Src) { + Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); + Target->_mov_redefined(SP, Src); + if (Target->NeedSandboxing) { + Target->_bic(SP, SP, Target->memOpBicMask()); + } + } + + void ret(Variable *RetAddr, Variable *RetValue) { + if (Target->NeedSandboxing) { + Target->_bic(RetAddr, RetAddr, Target->indirectBranchBicMask()); + } + Target->_ret(RetAddr, RetValue); + } + + void str(Variable *Src, OperandARM32Mem *Mem, CondARM32::Cond Pred) { + if (Target->NeedSandboxing) { + assert(!Mem->isRegReg()); + Variable *MemBase = Mem->getBase(); + Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred); + } + Target->_str(Src, Mem, Pred); + } + + void strex(Variable *Dest, Variable *Src, OperandARM32Mem *Mem, + CondARM32::Cond Pred) { + if (Target->NeedSandboxing) { + assert(!Mem->isRegReg()); + Variable *MemBase = Mem->getBase(); + Target->_bic(MemBase, MemBase, Target->memOpBicMask(), Pred); + } + Target->_strex(Dest, Src, Mem, Pred); + } + + void sub_sp(Operand *SubAmount) { + Variable *SP = Target->getPhysicalRegister(RegARM32::Reg_sp); + Target->_sub(SP, SP, SubAmount); + if (Target->NeedSandboxing) { + Target->_bic(SP, SP, Target->memOpBicMask()); + } + } + + ~AutoSandboxer() { + if (Target->NeedSandboxing) { + Target->_bundle_unlock(); + } + } + + private: + TargetARM32 *Target; + }; + class PostLoweringLegalizer { PostLoweringLegalizer() = delete; PostLoweringLegalizer(const PostLoweringLegalizer &) = delete; @@ -878,6 +999,7 @@ protected: int32_t TempBaseOffset = 0; }; + const bool NeedSandboxing; TargetARM32Features CPUFeatures; bool UsesFramePointer = false; bool NeedsStackAlignment = false; diff --git a/tests_lit/assembler/arm32/sandboxing.ll b/tests_lit/assembler/arm32/sandboxing.ll new file mode 100644 index 000000000..bf9935129 --- /dev/null +++ b/tests_lit/assembler/arm32/sandboxing.ll @@ -0,0 +1,149 @@ +; Tests basics and corner cases of x86-32 sandboxing, using -Om1 in +; the hope that the output will remain stable. When packing bundles, +; we try to limit to a few instructions with well known sizes and +; minimal use of registers and stack slots in the lowering sequence. + +; RUN: %p2i -i %s --sandbox --filetype=asm --target=arm32 --assemble \ +; RUN: --disassemble --args -Om1 -allow-externally-defined-symbols \ +; RUN: -ffunction-sections | FileCheck %s + +declare void @call_target() +@global_short = internal global [2 x i8] zeroinitializer + +; A direct call sequence uses the right mask and register-call sequence. +define internal void @test_direct_call() { +entry: + call void @call_target() + ret void +} +; CHECK-LABEL: test_direct_call +; CHECK: nop +; CHECK: c: {{.*}} bl {{.*}} call_target +; CHECK-NEXT: 10: + +; An indirect call sequence uses the right mask and register-call sequence. +define internal void @test_indirect_call(i32 %target) { +entry: + %__1 = inttoptr i32 %target to void ()* + call void %__1() + ret void +} +; CHECK-LABEL: test_indirect_call +; CHECK: ldr [[REG:.*]], [sp, +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK: 18: {{.*}} bic [[REG]], [[REG]], {{.*}} 0xc000000f +; CHECK-NEXT: blx [[REG]] +; CHECk-NEXT: 20: + +; A return sequences uses the right pop / mask / jmp sequence. +define internal void @test_ret() { +entry: + ret void +} +; CHECK-LABEL: test_ret +; CHECK: 0: {{.*}} bic lr, lr, {{.*}} 0xc000000f +; CHECK-NEXT: bx lr + +; Bundle lock without padding. +define internal void @bundle_lock_without_padding() { +entry: + %addr_short = bitcast [2 x i8]* @global_short to i16* + store i16 0, i16* %addr_short, align 1 + ret void +} +; CHECK-LABEL: bundle_lock_without_padding +; CHECK: 0: {{.*}} movw +; CHECK-NEXT: movt +; CHECK-NEXT: movw +; CHECK-NEXT: strh +; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f +; CHECK-NEXT: {{.*}} bx lr + +; Bundle lock with padding. +define internal void @bundle_lock_with_padding() { +entry: + call void @call_target() + ; bundle boundary + store i16 0, i16* undef, align 1 ; 3 insts + store i16 0, i16* undef, align 1 ; 3 insts + store i16 0, i16* undef, align 1 ; 3 insts + ; SP adjustment + pop + ; nop + ; bundle boundary + ret void +} +; CHECK-LABEL: bundle_lock_with_padding +; CHECK: 38: {{.*}} pop +; CHECK-NEXT: nop +; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f +; CHECK-NEXT: {{.*}} bx lr + +; Bundle lock align_to_end without any padding. +define internal void @bundle_lock_align_to_end_padding_0() { +entry: + call void @call_target() + ; bundle boundary + store i16 0, i16* undef, align 1 + call void @call_target() + ; bundle boundary + ret void +} +; CHECK-LABEL: bundle_lock_align_to_end_padding_0 +; CHECK: c: {{.*}} bl {{.*}} call_target +; CHECK-NEXT: movw +; CHECK-NEXT: movw +; CHECK-NEXT: strh +; CHECK-NEXT: bl {{.*}} call_target +; CHECK-NEXT: add +; CHECK-NEXT: pop +; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f +; CHECK-NEXT: {{.*}} bx lr + +; Bundle lock align_to_end with one bunch of padding. +define internal void @bundle_lock_align_to_end_padding_1() { +entry: + call void @call_target() + ; bundle boundary + store i16 0, i16* undef, align 1 + store i16 0, i16* undef, align 1 + ; bundle boundary + call void @call_target() + ; bundle boundary + ret void +} +; CHECK-LABEL: bundle_lock_align_to_end_padding_1 +; CHECK: c: {{.*}} bl {{.*}} call_target +; CHECK-NEXT: movw +; CHECK-NEXT: movw +; CHECK-NEXT: strh +; CHECK-NEXT: movw +; CHECK-NEXT: movw +; CHECK-NEXT: strh +; CHECK-NEXT: nop +; CHECK-NEXT: bl {{.*}} call_target +; CHECK-NEXT: add +; CHECK-NEXT: pop +; CHECK-NEXT: bic lr, lr, {{.*}} 0xc000000f +; CHECK-NEXT: {{.*}} bx lr + +; Bundle lock align_to_end with two bunches of padding. +define internal void @bundle_lock_align_to_end_padding_2(i32 %target) { +entry: + call void @call_target() + ; bundle boundary + %__1 = inttoptr i32 %target to void ()* + store i8 0, i8* undef, align 1 + call void %__1() + ret void +} +; CHECK-LABEL: bundle_lock_align_to_end_padding_2 +; CHECK: c: {{.*}} bl {{.*}} call_target +; CHECK-NEXT: movw +; CHECK-NEXT: movw +; CHECK-NEXT: strb +; CHECK: 20: {{.*}} nop +; CHECK-NEXT: nop +; CHECK-NEXT: bic [[REG:r[0-9]+]], [[REG]], {{.*}} 0xc000000f +; CHECK-NEXT: {{.*}} blx [[REG]] + diff --git a/tests_lit/assembler/x86/sandboxing.ll b/tests_lit/assembler/x86/sandboxing.ll index c03d335d0..3233d574a 100644 --- a/tests_lit/assembler/x86/sandboxing.ll +++ b/tests_lit/assembler/x86/sandboxing.ll @@ -3,9 +3,9 @@ ; we try to limit to a few instructions with well known sizes and ; minimal use of registers and stack slots in the lowering sequence. -; RUN: %p2i -i %s --filetype=obj --disassemble --args -Om1 \ +; RUN: %p2i -i %s --sandbox --filetype=obj --disassemble --args -Om1 \ ; RUN: -allow-externally-defined-symbols \ -; RUN: -ffunction-sections -sandbox | FileCheck %s +; RUN: -ffunction-sections | FileCheck %s declare void @call_target() @global_byte = internal global [1 x i8] zeroinitializer -- 2.11.0