From e33d953beac93a595a11f11ca134df7049103851 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Tue, 5 Dec 2017 20:22:20 +0000 Subject: [PATCH] Re-commit r319490 "XOR the frame pointer with the stack cookie when protecting the stack" The patch originally broke Chromium (crbug.com/791714) due to its failing to specify that the new pseudo instructions clobber EFLAGS. This commit fixes that. > Summary: This strengthens the guard and matches MSVC. > > Reviewers: hans, etienneb > > Subscribers: hiraditya, JDevlieghere, vlad.tsyrklevich, llvm-commits > > Differential Revision: https://reviews.llvm.org/D40622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319824 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/TargetLowering.h | 11 ++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +- lib/CodeGen/StackProtector.cpp | 6 +- lib/Target/X86/X86ISelLowering.cpp | 13 ++ lib/Target/X86/X86ISelLowering.h | 4 + lib/Target/X86/X86InstrCompiler.td | 9 ++ lib/Target/X86/X86InstrInfo.cpp | 15 +++ test/CodeGen/X86/stack-protector-msvc.ll | 154 ++++++++++++++++++++--- test/CodeGen/X86/stack-protector-weight.ll | 3 +- 9 files changed, 206 insertions(+), 22 deletions(-) diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 4210f58ddb0..f9a61b8bf1a 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -1360,6 +1360,12 @@ public: /// getIRStackGuard returns nullptr. virtual Value *getSDagStackGuard(const Module &M) const; + /// If this function returns true, stack protection checks should XOR the + /// frame pointer (or whichever pointer is used to address locals) into the + /// stack guard value before checking it. getIRStackGuard must return nullptr + /// if this returns true. + virtual bool useStackGuardXorFP() const { return false; } + /// If the target has a standard stack protection check function that /// performs validation and error handling, returns the function. Otherwise, /// returns nullptr. Must be previously inserted by insertSSPDeclarations. @@ -3487,6 +3493,11 @@ public: return false; } + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + llvm_unreachable("not implemented for this target"); + } + /// Lower TLS global address SDNode for target independent emulated TLS model. virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1c31eca3ec9..f3addf05566 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2148,11 +2148,14 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2164,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2197,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2207,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -5646,6 +5649,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index e3340028863..62cef95a4af 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -385,8 +385,12 @@ static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { + // If the target wants to XOR the frame pointer into the guard value, it's + // impossible to emit the check in IR, so the target *must* support stack + // protection in SDAG. bool SupportsSelectionDAGSP = - EnableSelectionDAGSP && !TM->Options.EnableFastISel; + TLI->useStackGuardXorFP() || + (EnableSelectionDAGSP && !TM->Options.EnableFastISel); AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. for (Function::iterator I = F->begin(), E = F->end(); I != E;) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6d5712dbf67..5f013753ea8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1687,6 +1687,19 @@ bool X86TargetLowering::useLoadStackGuardNode() const { return Subtarget.isTargetMachO() && Subtarget.is64Bit(); } +bool X86TargetLowering::useStackGuardXorFP() const { + // Currently only MSVC CRTs XOR the frame pointer into the stack guard value. + return Subtarget.getTargetTriple().isOSMSVCRT(); +} + +SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const { + EVT PtrTy = getPointerTy(DAG.getDataLayout()); + unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; + MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); + return SDValue(Node, 0); +} + TargetLoweringBase::LegalizeTypeAction X86TargetLowering::getPreferredVectorAction(EVT VT) const { if (ExperimentalVectorWideningLegalization && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 90830f4d5d1..d31104f9433 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1055,9 +1055,13 @@ namespace llvm { Value *getIRStackGuard(IRBuilder<> &IRB) const override; bool useLoadStackGuardNode() const override; + bool useStackGuardXorFP() const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; Value *getSSPStackGuardCheck(const Module &M) const override; + SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, + const SDLoc &DL) const override; + /// Return true if the target stores SafeStack pointer at a fixed offset in /// some non-standard address space, and populates the address space and diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 82885687bb4..0b63f376302 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -142,6 +142,15 @@ def WIN_ALLOCA_64 : I<0, Pseudo, (outs), (ins GR64:$size), [(X86WinAlloca GR64:$size)]>, Requires<[In64BitMode]>; +// These instructions XOR the frame pointer into a GPR. They are used in some +// stack protection schemes. These are post-RA pseudos because we only know the +// frame register after register allocation. +let Constraints = "$src = $dst", isPseudo = 1, Defs = [EFLAGS] in { + def XOR32_FP : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$src), + "xorl\t$$FP, $src", []>, Requires<[NotLP64]>; + def XOR64_FP : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$src), + "xorq\t$$FP $src", []>, Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index a5bff06e70b..96f19d35815 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -7762,6 +7762,18 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB, MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0); } +static bool expandXorFP(MachineInstrBuilder &MIB, const TargetInstrInfo &TII) { + MachineBasicBlock &MBB = *MIB->getParent(); + MachineFunction &MF = *MBB.getParent(); + const X86Subtarget &Subtarget = MF.getSubtarget(); + const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); + unsigned XorOp = + MIB->getOpcode() == X86::XOR64_FP ? X86::XOR64rr : X86::XOR32rr; + MIB->setDesc(TII.get(XorOp)); + MIB.addReg(TRI->getFrameRegister(MF), RegState::Undef); + return true; +} + // This is used to handle spills for 128/256-bit registers when we have AVX512, // but not VLX. If it uses an extended register we need to use an instruction // that loads the lower 128/256-bit, but is available with only AVX512F. @@ -7956,6 +7968,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { case TargetOpcode::LOAD_STACK_GUARD: expandLoadStackGuard(MIB, *this); return true; + case X86::XOR64_FP: + case X86::XOR32_FP: + return expandXorFP(MIB, *this); } return false; } diff --git a/test/CodeGen/X86/stack-protector-msvc.ll b/test/CodeGen/X86/stack-protector-msvc.ll index 5eccc65f2de..c1f79f9db2f 100644 --- a/test/CodeGen/X86/stack-protector-msvc.ll +++ b/test/CodeGen/X86/stack-protector-msvc.ll @@ -1,19 +1,9 @@ +; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-X86 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-X64 %s -; RUN: llc -mtriple=i386-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-I386 %s -; RUN: llc -mtriple=x86_64-pc-windows-msvc < %s -o - | FileCheck -check-prefix=MSVC-64 %s - -; MSVC-I386: movl ___security_cookie, %[[REG1:[a-z]*]] -; MSVC-I386: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) -; MSVC-I386: calll _strcpy -; MSVC-I386: movl [[SLOT]](%esp), %ecx -; MSVC-I386: calll @__security_check_cookie@4 -; MSVC-I386: retl - -; MSVC-64: movq __security_cookie(%rip), %[[REG1:[a-z]*]] -; MSVC-64: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) -; MSVC-64: callq strcpy -; MSVC-64: movq [[SLOT]](%rsp), %rcx -; MSVC-64: callq __security_check_cookie +; Make sure fastisel falls back and does something secure. +; RUN: llc -mtriple=i686-pc-windows-msvc -O0 < %s -o - | FileCheck -check-prefix=MSVC-X86-O0 %s +; RUN: llc -mtriple=x86_64-pc-windows-msvc -O0 < %s -o - | FileCheck -check-prefix=MSVC-X64-O0 %s @"\01LC" = internal constant [11 x i8] c"buf == %s\0A\00" ; <[11 x i8]*> [#uses=1] @@ -21,7 +11,6 @@ define void @test(i8* %a) nounwind ssp { entry: %a_addr = alloca i8* ; [#uses=2] %buf = alloca [8 x i8] ; <[8 x i8]*> [#uses=2] - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] store i8* %a, i8** %a_addr %buf1 = bitcast [8 x i8]* %buf to i8* ; [#uses=1] %0 = load i8*, i8** %a_addr, align 4 ; [#uses=1] @@ -34,6 +23,139 @@ return: ; preds = %entry ret void } +; MSVC-X86-LABEL: _test: +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %esp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) +; MSVC-X86: calll _strcpy +; MSVC-X86: movl [[SLOT]](%esp), %ecx +; MSVC-X86: xorl %esp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: retl + +; MSVC-X64-LABEL: test: +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rsp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) +; MSVC-X64: callq strcpy +; MSVC-X64: movq [[SLOT]](%rsp), %rcx +; MSVC-X64: xorq %rsp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + +; MSVC-X86-O0-LABEL: _test: +; MSVC-X86-O0: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86-O0: xorl %esp, %[[REG1]] +; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) +; MSVC-X86-O0: calll _strcpy +; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]] +; MSVC-X86-O0: xorl %esp, %[[REG1]] +; MSVC-X86-O0: movl %[[REG1]], %ecx +; MSVC-X86-O0: calll @__security_check_cookie@4 +; MSVC-X86-O0: retl + +; MSVC-X64-O0-LABEL: test: +; MSVC-X64-O0: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64-O0: xorq %rsp, %[[REG1]] +; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) +; MSVC-X64-O0: callq strcpy +; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]] +; MSVC-X64-O0: xorq %rsp, %[[REG1]] +; MSVC-X64-O0: movq %[[REG1]], %rcx +; MSVC-X64-O0: callq __security_check_cookie +; MSVC-X64-O0: retq + + +declare void @escape(i32*) + +define void @test_vla(i32 %n) nounwind ssp { + %vla = alloca i32, i32 %n + call void @escape(i32* %vla) + ret void +} + +; MSVC-X86-LABEL: _test_vla: +; MSVC-X86: pushl %ebp +; MSVC-X86: movl %esp, %ebp +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %ebp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:-[0-9]*]](%ebp) +; MSVC-X86: calll __chkstk +; MSVC-X86: pushl +; MSVC-X86: calll _escape +; MSVC-X86: movl [[SLOT]](%ebp), %ecx +; MSVC-X86: xorl %ebp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: movl %ebp, %esp +; MSVC-X86: popl %ebp +; MSVC-X86: retl + +; MSVC-X64-LABEL: test_vla: +; MSVC-X64: pushq %rbp +; MSVC-X64: subq $16, %rsp +; MSVC-X64: leaq 16(%rsp), %rbp +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rbp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:-[0-9]*]](%rbp) +; MSVC-X64: callq __chkstk +; MSVC-X64: callq escape +; MSVC-X64: movq [[SLOT]](%rbp), %rcx +; MSVC-X64: xorq %rbp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + + +; This case is interesting because we address local variables with RBX but XOR +; the guard value with RBP. That's fine, either value will do, as long as they +; are the same across the life of the frame. + +define void @test_vla_realign(i32 %n) nounwind ssp { + %realign = alloca i32, align 32 + %vla = alloca i32, i32 %n + call void @escape(i32* %realign) + call void @escape(i32* %vla) + ret void +} + +; MSVC-X86-LABEL: _test_vla_realign: +; MSVC-X86: pushl %ebp +; MSVC-X86: movl %esp, %ebp +; MSVC-X86: pushl %esi +; MSVC-X86: andl $-32, %esp +; MSVC-X86: subl $32, %esp +; MSVC-X86: movl %esp, %esi +; MSVC-X86: movl ___security_cookie, %[[REG1:[^ ]*]] +; MSVC-X86: xorl %ebp, %[[REG1]] +; MSVC-X86: movl %[[REG1]], [[SLOT:[0-9]*]](%esi) +; MSVC-X86: calll __chkstk +; MSVC-X86: pushl +; MSVC-X86: calll _escape +; MSVC-X86: movl [[SLOT]](%esi), %ecx +; MSVC-X86: xorl %ebp, %ecx +; MSVC-X86: calll @__security_check_cookie@4 +; MSVC-X86: leal -8(%ebp), %esp +; MSVC-X86: popl %esi +; MSVC-X86: popl %ebp +; MSVC-X86: retl + +; MSVC-X64-LABEL: test_vla_realign: +; MSVC-X64: pushq %rbp +; MSVC-X64: pushq %rbx +; MSVC-X64: subq $32, %rsp +; MSVC-X64: leaq 32(%rsp), %rbp +; MSVC-X64: andq $-32, %rsp +; MSVC-X64: movq %rsp, %rbx +; MSVC-X64: movq __security_cookie(%rip), %[[REG1:[^ ]*]] +; MSVC-X64: xorq %rbp, %[[REG1]] +; MSVC-X64: movq %[[REG1]], [[SLOT:[0-9]*]](%rbx) +; MSVC-X64: callq __chkstk +; MSVC-X64: callq escape +; MSVC-X64: movq [[SLOT]](%rbx), %rcx +; MSVC-X64: xorq %rbp, %rcx +; MSVC-X64: callq __security_check_cookie +; MSVC-X64: retq + + declare i8* @strcpy(i8*, i8*) nounwind declare i32 @printf(i8*, ...) nounwind diff --git a/test/CodeGen/X86/stack-protector-weight.ll b/test/CodeGen/X86/stack-protector-weight.ll index de40d30cc48..3708d216f8d 100644 --- a/test/CodeGen/X86/stack-protector-weight.ll +++ b/test/CodeGen/X86/stack-protector-weight.ll @@ -21,10 +21,11 @@ ; MSVC-SELDAG: LD4[FixedStack0] ; MSVC-SELDAG: CALLpcrel32 +; MSVC always uses selection DAG now. ; MSVC-IR: # Machine code for function test_branch_weights: ; MSVC-IR: mem:Volatile LD4[@__security_cookie] ; MSVC-IR: ST4[FixedStack0] -; MSVC-IR: LD4[%StackGuardSlot] +; MSVC-IR: LD4[FixedStack0] ; MSVC-IR: CALLpcrel32 define i32 @test_branch_weights(i32 %n) #0 { -- 2.11.0