GlobalISel: introduce G_PTR_MASK to simplify alloca handling.

author Tim Northover <tnorthover@apple.com>

Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)

committer Tim Northover <tnorthover@apple.com>

Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)
author Tim Northover <tnorthover@apple.com>
Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)
committer Tim Northover <tnorthover@apple.com>
Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)
diff --git a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

index 008494e..a3fa6c2 100644 (file)
--- a/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -229,6 +229,22 @@ public:
    MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
                                 unsigned Op1);
  
+  /// Build and insert \p Res<def> = G_PTR_MASK \p Op0, \p NumBits
+  ///
+  /// G_PTR_MASK clears the low bits of a pointer operand without destroying its
+  /// pointer properties. This has the effect of rounding the address *down* to
+  /// a specified alignment in bits.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res and \p Op0 must be generic virtual registers with pointer
+  ///      type.
+  /// \pre \p NumBits must be an integer representing the number of low bits to
+  ///      be cleared in \p Op0.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildPtrMask(unsigned Res, unsigned Op0,
+                                   uint32_t NumBits);
+
    /// Build and insert \p Res<def>, \p CarryOut<def> = G_UADDE \p Op0,
    /// \p Op1, \p CarryIn
    ///
diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td

index 09b03bc..4c5d617 100644 (file)
--- a/include/llvm/Target/GenericOpcodes.td
+++ b/include/llvm/Target/GenericOpcodes.td
@@ -110,13 +110,6 @@ def G_ADD : Instruction {
    let isCommutable = 1;
  }
  
-// Generic pointer offset.
-def G_GEP : Instruction {
-  let OutOperandList = (outs type0:$dst);
-  let InOperandList = (ins type0:$src1, type1:$src2);
-  let hasSideEffects = 0;
-}
-
  // Generic subtraction.
  def G_SUB : Instruction {
    let OutOperandList = (outs type0:$dst);
@@ -231,6 +224,19 @@ def G_SELECT : Instruction {
    let hasSideEffects = 0;
  }
  
+// Generic pointer offset.
+def G_GEP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_PTR_MASK : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, unknown:$bits);
+  let hasSideEffects = 0;
+}
+
  //------------------------------------------------------------------------------
  // Overflow ops
  //------------------------------------------------------------------------------
diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def

index 7987ddd..b38e0cf 100644 (file)
--- a/include/llvm/Target/TargetOpcodes.def
+++ b/include/llvm/Target/TargetOpcodes.def
@@ -378,6 +378,10 @@ HANDLE_TARGET_OPCODE(G_UITOFP)
  /// Generic pointer offset
  HANDLE_TARGET_OPCODE(G_GEP)
  
+/// Clear the specified number of low bits in a pointer. This rounds the value
+/// *down* to the given alignment.
+HANDLE_TARGET_OPCODE(G_PTR_MASK)
+
  /// Generic BRANCH instruction. This is an unconditional branch.
  HANDLE_TARGET_OPCODE(G_BR)
  
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp

index 1431347..c5ac42f 100644 (file)
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -870,7 +870,7 @@ bool IRTranslator::translateAlloca(const User &U,
  
    unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
    unsigned TySize = MRI->createGenericVirtualRegister(IntPtrTy);
-  MIRBuilder.buildConstant(TySize, DL->getTypeAllocSize(Ty));
+  MIRBuilder.buildConstant(TySize, -DL->getTypeAllocSize(Ty));
    MIRBuilder.buildMul(AllocSize, NumElts, TySize);
  
    LLT PtrTy = LLT{*AI.getType(), *DL};
@@ -880,11 +880,8 @@ bool IRTranslator::translateAlloca(const User &U,
    unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
    MIRBuilder.buildCopy(SPTmp, SPReg);
  
-  unsigned SPInt = MRI->createGenericVirtualRegister(IntPtrTy);
-  MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT).addDef(SPInt).addUse(SPTmp);
-
-  unsigned AllocInt = MRI->createGenericVirtualRegister(IntPtrTy);
-  MIRBuilder.buildSub(AllocInt, SPInt, AllocSize);
+  unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+  MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
  
    // Handle alignment. We have to realign if the allocation granule was smaller
    // than stack alignment, or the specific alloca requires more than stack
@@ -896,25 +893,13 @@ bool IRTranslator::translateAlloca(const User &U,
      // Round the size of the allocation up to the stack alignment size
      // by add SA-1 to the size. This doesn't overflow because we're computing
      // an address inside an alloca.
-    unsigned TmpSize = MRI->createGenericVirtualRegister(IntPtrTy);
-    unsigned AlignMinus1 = MRI->createGenericVirtualRegister(IntPtrTy);
-    MIRBuilder.buildConstant(AlignMinus1, Align - 1);
-    MIRBuilder.buildSub(TmpSize, AllocInt, AlignMinus1);
-
-    unsigned AlignedAlloc = MRI->createGenericVirtualRegister(IntPtrTy);
-    unsigned AlignMask = MRI->createGenericVirtualRegister(IntPtrTy);
-    MIRBuilder.buildConstant(AlignMask, -(uint64_t)Align);
-    MIRBuilder.buildAnd(AlignedAlloc, TmpSize, AlignMask);
-
-    AllocInt = AlignedAlloc;
+    unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+    MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
+    AllocTmp = AlignedAlloc;
    }
  
-  unsigned DstReg = getOrCreateVReg(AI);
-  MIRBuilder.buildInstr(TargetOpcode::G_INTTOPTR)
-      .addDef(DstReg)
-      .addUse(AllocInt);
-
-  MIRBuilder.buildCopy(SPReg, DstReg);
+  MIRBuilder.buildCopy(SPReg, AllocTmp);
+  MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
  
    MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
    assert(MF->getFrameInfo().hasVarSizedObjects());
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp

index 665fb70..d92bbc1 100644 (file)
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -187,6 +187,17 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
        .addUse(Op1);
  }
  
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+                                                   uint32_t NumBits) {
+  assert(MRI->getType(Res).isPointer() &&
+         MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_PTR_MASK)
+      .addDef(Res)
+      .addUse(Op0)
+      .addImm(NumBits);
+}
+
  MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
                                                 unsigned Op1) {
    assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
diff --git a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll

index df608b6..c52a575 100644 (file)
--- a/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
+++ b/test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll
@@ -3,18 +3,14 @@
  ; CHECK-LABEL: name: test_simple_alloca
  ; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
  ; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
-; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 1
+; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1
  ; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
  ; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
-; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
-; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
-; CHECK: [[ALIGN_M_1:%[0-9]+]](s64) = G_CONSTANT i64 15
-; CHECK: [[ALIGN_TMP:%[0-9]+]](s64) = G_SUB [[ALLOC]], [[ALIGN_M_1]]
-; CHECK: [[ALIGN_MASK:%[0-9]+]](s64) = G_CONSTANT i64 -16
-; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](s64) = G_AND [[ALIGN_TMP]], [[ALIGN_MASK]]
-; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALIGNED_ALLOC]](s64)
-; CHECK: %sp = COPY [[ALLOC_PTR]]
-; CHECK: %x0 = COPY [[ALLOC_PTR]]
+; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
+; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 4
+; CHECK: %sp = COPY [[ALIGNED_ALLOC]]
+; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]]
+; CHECK: %x0 = COPY [[ALLOC]]
  define i8* @test_simple_alloca(i32 %numelts) {
    %addr = alloca i8, i32 %numelts
    ret i8* %addr
@@ -23,18 +19,14 @@ define i8* @test_simple_alloca(i32 %numelts) {
  ; CHECK-LABEL: name: test_aligned_alloca
  ; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
  ; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
-; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 1
+; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -1
  ; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
  ; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
-; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
-; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
-; CHECK: [[ALIGN_M_1:%[0-9]+]](s64) = G_CONSTANT i64 31
-; CHECK: [[ALIGN_TMP:%[0-9]+]](s64) = G_SUB [[ALLOC]], [[ALIGN_M_1]]
-; CHECK: [[ALIGN_MASK:%[0-9]+]](s64) = G_CONSTANT i64 -32
-; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](s64) = G_AND [[ALIGN_TMP]], [[ALIGN_MASK]]
-; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALIGNED_ALLOC]](s64)
-; CHECK: %sp = COPY [[ALLOC_PTR]]
-; CHECK: %x0 = COPY [[ALLOC_PTR]]
+; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
+; CHECK: [[ALIGNED_ALLOC:%[0-9]+]](p0) = G_PTR_MASK [[ALLOC]], 5
+; CHECK: %sp = COPY [[ALIGNED_ALLOC]]
+; CHECK: [[ALLOC:%[0-9]+]](p0) = COPY [[ALIGNED_ALLOC]]
+; CHECK: %x0 = COPY [[ALLOC]]
  define i8* @test_aligned_alloca(i32 %numelts) {
    %addr = alloca i8, i32 %numelts, align 32
    ret i8* %addr
@@ -43,14 +35,13 @@ define i8* @test_aligned_alloca(i32 %numelts) {
  ; CHECK-LABEL: name: test_natural_alloca
  ; CHECK: [[NUMELTS:%[0-9]+]](s32) = COPY %w0
  ; CHECK: [[NUMELTS_64:%[0-9]+]](s64) = G_ZEXT [[NUMELTS]](s32)
-; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 16
+; CHECK: [[TYPE_SIZE:%[0-9]+]](s64) = G_CONSTANT i64 -16
  ; CHECK: [[NUMBYTES:%[0-9]+]](s64) = G_MUL [[NUMELTS_64]], [[TYPE_SIZE]]
  ; CHECK: [[SP_TMP:%[0-9]+]](p0) = COPY %sp
-; CHECK: [[SP_INT:%[0-9]+]](s64) = G_PTRTOINT [[SP_TMP]](p0)
-; CHECK: [[ALLOC:%[0-9]+]](s64) = G_SUB [[SP_INT]], [[NUMBYTES]]
-; CHECK: [[ALLOC_PTR:%[0-9]+]](p0) = G_INTTOPTR [[ALLOC]](s64)
-; CHECK: %sp = COPY [[ALLOC_PTR]]
-; CHECK: %x0 = COPY [[ALLOC_PTR]]
+; CHECK: [[ALLOC:%[0-9]+]](p0) = G_GEP [[SP_TMP]], [[NUMBYTES]]
+; CHECK: %sp = COPY [[ALLOC]]
+; CHECK: [[ALLOC_TMP:%[0-9]+]](p0) = COPY [[ALLOC]]
+; CHECK: %x0 = COPY [[ALLOC_TMP]]
  define i128* @test_natural_alloca(i32 %numelts) {
    %addr = alloca i128, i32 %numelts
    ret i128* %addr
author	Tim Northover <tnorthover@apple.com>
	Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)
committer	Tim Northover <tnorthover@apple.com>
	Tue, 14 Feb 2017 20:56:18 +0000 (20:56 +0000)
include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h		patch \| blob \| history
include/llvm/Target/GenericOpcodes.td		patch \| blob \| history
include/llvm/Target/TargetOpcodes.def		patch \| blob \| history
lib/CodeGen/GlobalISel/IRTranslator.cpp		patch \| blob \| history
lib/CodeGen/GlobalISel/MachineIRBuilder.cpp		patch \| blob \| history
test/CodeGen/AArch64/GlobalISel/dynamic-alloca.ll		patch \| blob \| history