bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
MachineMemOperand *MMO, unsigned &ResultReg,
unsigned Alignment) {
+ bool HasAVX = Subtarget->hasAVX();
// Get opcode and regclass of the output for the given load instruction.
unsigned Opc = 0;
const TargetRegisterClass *RC = nullptr;
break;
case MVT::f32:
if (X86ScalarSSEf32) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
+ Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
RC = &X86::FR32RegClass;
} else {
Opc = X86::LD_Fp32m;
break;
case MVT::f64:
if (X86ScalarSSEf64) {
- Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
+ Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
RC = &X86::FR64RegClass;
} else {
Opc = X86::LD_Fp64m;
return false;
case MVT::v4f32:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
+ Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
+ Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
RC = &X86::VR128RegClass;
break;
case MVT::v2f64:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
+ Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
+ Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
RC = &X86::VR128RegClass;
break;
case MVT::v4i32:
case MVT::v8i16:
case MVT::v16i8:
if (Alignment >= 16)
- Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
+ Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
+ Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
RC = &X86::VR128RegClass;
break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+ RC = &X86::VR256RegClass;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+ RC = &X86::VR256RegClass;
+ break;
}
ResultReg = createResultReg(RC);
else
Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
} else
- Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+ break;
+ case MVT::v8f32:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
+ else
+ Opc = X86::VMOVUPSYmr;
+ break;
+ case MVT::v4f64:
+ assert(HasAVX);
+ if (Aligned) {
+ Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+ } else
+ Opc = X86::VMOVUPDYmr;
+ break;
+ case MVT::v8i32:
+ case MVT::v4i64:
+ case MVT::v16i16:
+ case MVT::v32i8:
+ assert(HasAVX);
+ if (Aligned)
+ Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
+ else
+ Opc = X86::VMOVDQUYmr;
break;
}
;
; AVX-LABEL: test_nt32xi8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
;
; AVX-LABEL: test_nt16xi16:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
;
; AVX-LABEL: test_nt8xi32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
;
; AVX-LABEL: test_nt64xi8:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
;
; AVX-LABEL: test_nt32xi16:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
;
; AVX-LABEL: test_nt16xi32:
; AVX: # BB#0: # %entry
-; AVX-NEXT: vmovntps %ymm0, (%rdi)
-; AVX-NEXT: vmovntps %ymm1, 32(%rdi)
+; AVX-NEXT: vmovntdq %ymm0, (%rdi)
+; AVX-NEXT: vmovntdq %ymm1, 32(%rdi)
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;