[X86] Fix a bug in the lowering of BLENDI introduced in r209043.

author Quentin Colombet <qcolombet@apple.com>

Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)

committer Quentin Colombet <qcolombet@apple.com>

Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)
author Quentin Colombet <qcolombet@apple.com>
Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)
committer Quentin Colombet <qcolombet@apple.com>
Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 72743a9..6182875 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -7980,7 +7980,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
    return SDValue();
  }
  
-// This function assumes its argument is a BUILD_VECTOR of constand or
+// This function assumes its argument is a BUILD_VECTOR of constants or
  // undef SDNodes. i.e: ISD::isBuildVectorOfConstantSDNodes(BuildVector) is
  // true.
  static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
@@ -8004,9 +8004,13 @@ static bool BUILD_VECTORtoBlendMask(BuildVectorSDNode *BuildVector,
        Lane2Cond = !isZero(SndLaneEltCond);
  
      if (Lane1Cond == Lane2Cond || Lane2Cond < 0)
-      MaskValue |= !!Lane1Cond << i;
+      // Lane1Cond != 0, means we want the first argument.
+      // Lane1Cond == 0, means we want the second argument.
+      // The encoding of this argument is 0 for the first argument, 1
+      // for the second. Therefore, invert the condition.
+      MaskValue |= !Lane1Cond << i;
      else if (Lane1Cond < 0)
-      MaskValue |= !!Lane2Cond << i;
+      MaskValue |= !Lane2Cond << i;
      else
        return false;
    }
diff --git a/test/CodeGen/X86/avx-blend.ll b/test/CodeGen/X86/avx-blend.ll

index 8577a61..4d4f6c1 100644 (file)
--- a/test/CodeGen/X86/avx-blend.ll
+++ b/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,16 @@
  ; AVX128 tests:
  
  ;CHECK-LABEL: vsel_float:
-;CHECK: vblendps    $5
+; select mask is <i1 true, i1 false, i1 true, i1 false>.
+; Big endian representation is 0101 = 5.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 1010 = 10.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+; result is in xmm0 => destination argument.
+;CHECK: vblendps    $10, %xmm1, %xmm0, %xmm0
  ;CHECK: ret
  define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
    %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
  
  
  ;CHECK-LABEL: vsel_i32:
-;CHECK: vblendps   $5
+;CHECK: vblendps   $10, %xmm1, %xmm0, %xmm0
  ;CHECK: ret
  define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
    %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +61,13 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
  
  ;CHECK-LABEL: vsel_float8:
  ;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $17
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
  ;CHECK: ret
  define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
    %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +76,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
  
  ;CHECK-LABEL: vsel_i328:
  ;CHECK-NOT: vinsertf128
-;CHECK: vblendps    $17
+;CHECK: vblendps    $238, %ymm1, %ymm0, %ymm0
  ;CHECK-NEXT: ret
  define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
    %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -69,8 +84,15 @@ define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
  }
  
  ;CHECK-LABEL: vsel_double8:
-;CHECK: vblendpd    $1
-;CHECK: vblendpd    $1
+; select mask is 2x: 0001 => intel mask: ~0001 = 14
+; ABI:
+; v1 is in ymm0 and ymm1.
+; v2 is in ymm2 and ymm3.
+; result is in ymm0 and ymm1.
+; Compute the low part: res.low = blend v1.low, v2.low, blendmask
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+; Compute the high part.
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
  ;CHECK: ret
  define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
    %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
@@ -78,8 +100,8 @@ define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
  }
  
  ;CHECK-LABEL: vsel_i648:
-;CHECK: vblendpd    $1
-;CHECK: vblendpd    $1
+;CHECK: vblendpd    $14, %ymm2, %ymm0, %ymm0
+;CHECK: vblendpd    $14, %ymm3, %ymm1, %ymm1
  ;CHECK: ret
  define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
    %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
diff --git a/test/CodeGen/X86/blend-msb.ll b/test/CodeGen/X86/blend-msb.ll

index 4e17a71..34aaf2c 100644 (file)
--- a/test/CodeGen/X86/blend-msb.ll
+++ b/test/CodeGen/X86/blend-msb.ll
@@ -22,7 +22,17 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
  }
  
  ;CHECK-LABEL: vsel_8xi16:
-;CHECK: pblendw $17
+; The select mask is
+; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>
+; which translates into the boolean mask (big endian representation):
+; 00010001 = 17.
+; '1' means takes the first argument, '0' means takes the second argument.
+; This is the opposite of the intel syntax, thus we expect
+; the inverted mask: 11101110 = 238.
+; According to the ABI:
+; v1 is in xmm0 => first argument is xmm0.
+; v2 is in xmm1 => second argument is xmm1.
+;CHECK: pblendw $238, %xmm1, %xmm0
  ;CHECK: ret
  define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
    %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
author	Quentin Colombet <qcolombet@apple.com>
	Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)
committer	Quentin Colombet <qcolombet@apple.com>
	Wed, 21 May 2014 22:00:39 +0000 (22:00 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
test/CodeGen/X86/avx-blend.ll		patch \| blob \| history
test/CodeGen/X86/blend-msb.ll		patch \| blob \| history