bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
bool matchAddress(SDValue N, X86ISelAddressMode &AM);
+ bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM);
bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
return false;
}
+/// Helper for selectVectorAddr. Handles things that can be folded into a
+/// gather scatter address. The index register and scale should have already
+/// been handled.
+bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) {
+ // TODO: Support other operations.
+ switch (N.getOpcode()) {
+ case X86ISD::Wrapper:
+ if (!matchWrapper(N, AM))
+ return false;
+ break;
+ }
+
+ return matchAddressBase(N, AM);
+}
+
bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment) {
- unsigned ScalarSize;
+ X86ISelAddressMode AM;
if (auto Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent)) {
- Base = Mgs->getBasePtr();
- Index = Mgs->getIndex();
- ScalarSize = Mgs->getValue().getScalarValueSizeInBits();
+ AM.IndexReg = Mgs->getIndex();
+ AM.Scale = Mgs->getValue().getScalarValueSizeInBits() / 8;
} else {
auto X86Gather = cast<X86MaskedGatherSDNode>(Parent);
- Base = X86Gather->getBasePtr();
- Index = X86Gather->getIndex();
- ScalarSize = X86Gather->getValue().getScalarValueSizeInBits();
+ AM.IndexReg = X86Gather->getIndex();
+ AM.Scale = X86Gather->getValue().getScalarValueSizeInBits() / 8;
}
- X86ISelAddressMode AM;
unsigned AddrSpace = cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
// AddrSpace 256 -> GS, 257 -> FS, 258 -> SS.
if (AddrSpace == 256)
if (AddrSpace == 258)
AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16);
- SDLoc DL(N);
- Scale = getI8Imm(ScalarSize/8, DL);
-
// If Base is 0, the whole address is in index and the Scale is 1
- if (isa<ConstantSDNode>(Base)) {
- assert(cast<ConstantSDNode>(Base)->isNullValue() &&
+ if (isa<ConstantSDNode>(N)) {
+ assert(cast<ConstantSDNode>(N)->isNullValue() &&
"Unexpected base in gather/scatter");
- Scale = getI8Imm(1, DL);
- Base = CurDAG->getRegister(0, MVT::i32);
+ AM.Scale = 1;
}
- if (AM.Segment.getNode())
- Segment = AM.Segment;
- else
- Segment = CurDAG->getRegister(0, MVT::i32);
- Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
+ // Otherwise, try to match into the base and displacement fields.
+ else if (matchVectorAddress(N, AM))
+ return false;
+
+ MVT VT = N.getSimpleValueType();
+ if (AM.BaseType == X86ISelAddressMode::RegBase) {
+ if (!AM.Base_Reg.getNode())
+ AM.Base_Reg = CurDAG->getRegister(0, VT);
+ }
+
+ getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
return true;
}
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32
-; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE
; RUN: llc -mtriple=i386-unknown-linux-gnu -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test9:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test9:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test9:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test9:
; SKX_32: # BB#0: # %entry
; KNL_32-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test10:
-; SKX: # BB#0: # %entry
-; SKX-NEXT: vpbroadcastq %rdi, %zmm2
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; SKX-NEXT: vpmovsxdq %ymm1, %zmm1
-; SKX-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
-; SKX-NEXT: vpaddq %zmm1, %zmm0, %zmm0
-; SKX-NEXT: vpaddq %zmm0, %zmm2, %zmm0
-; SKX-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test10:
+; SKX_SMALL: # BB#0: # %entry
+; SKX_SMALL-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_SMALL-NEXT: vpmullq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; SKX_SMALL-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_SMALL-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_SMALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test10:
+; SKX_LARGE: # BB#0: # %entry
+; SKX_LARGE-NEXT: vpbroadcastq %rdi, %zmm2
+; SKX_LARGE-NEXT: vpmovsxdq %ymm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm1, %zmm1
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpmullq (%rax){1to8}, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; SKX_LARGE-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax
+; SKX_LARGE-NEXT: vpaddq (%rax){1to8}, %zmm0, %zmm1
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (,%zmm1), %ymm0 {%k1}
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test10:
; SKX_32: # BB#0: # %entry
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
; KNL_64-LABEL: test_global_array:
; KNL_64: # BB#0:
-; KNL_64-NEXT: movl $glob_array, %eax
; KNL_64-NEXT: kxnorw %k0, %k0, %k1
-; KNL_64-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; KNL_64-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_64-NEXT: vmovdqa %ymm1, %ymm0
; KNL_64-NEXT: retq
;
; KNL_32-LABEL: test_global_array:
; KNL_32: # BB#0:
-; KNL_32-NEXT: movl $glob_array, %eax
; KNL_32-NEXT: kxnorw %k0, %k0, %k1
-; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; KNL_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; KNL_32-NEXT: vmovdqa %ymm1, %ymm0
; KNL_32-NEXT: retl
;
-; SKX-LABEL: test_global_array:
-; SKX: # BB#0:
-; SKX-NEXT: movl $glob_array, %eax
-; SKX-NEXT: kxnorw %k0, %k0, %k1
-; SKX-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
-; SKX-NEXT: vmovdqa %ymm1, %ymm0
-; SKX-NEXT: retq
+; SKX_SMALL-LABEL: test_global_array:
+; SKX_SMALL: # BB#0:
+; SKX_SMALL-NEXT: kxnorw %k0, %k0, %k1
+; SKX_SMALL-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
+; SKX_SMALL-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_SMALL-NEXT: retq
+;
+; SKX_LARGE-LABEL: test_global_array:
+; SKX_LARGE: # BB#0:
+; SKX_LARGE-NEXT: movabsq $glob_array, %rax
+; SKX_LARGE-NEXT: kxnorw %k0, %k0, %k1
+; SKX_LARGE-NEXT: vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
+; SKX_LARGE-NEXT: vmovdqa %ymm1, %ymm0
+; SKX_LARGE-NEXT: retq
;
; SKX_32-LABEL: test_global_array:
; SKX_32: # BB#0:
-; SKX_32-NEXT: movl $glob_array, %eax
; SKX_32-NEXT: kxnorw %k0, %k0, %k1
-; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm1 {%k1}
+; SKX_32-NEXT: vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
; SKX_32-NEXT: vmovdqa %ymm1, %ymm0
; SKX_32-NEXT: retl
%p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs