From 81aa3c01a5290ad1245d5a221d5098e3f57e2954 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 15 Nov 2018 16:34:15 +0000 Subject: [PATCH] [SLPVectorizer][X86] Regenerate reduction minmax tests and cleanup check prefixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@346965 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../SLPVectorizer/X86/horizontal-minmax.ll | 931 ++++++--------------- 1 file changed, 272 insertions(+), 659 deletions(-) diff --git a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 122c47a91de..41825a67d0c 100644 --- a/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s -; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefix=AVX -; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefix=AVX2 -; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefix=SKX +; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=core-avx2 -slp-vectorizer -S | FileCheck %s --check-prefixes=CHECK,AVX2 +; RUN: opt < %s -mtriple=x86_64-unknown-linux -mcpu=skx -slp-vectorizer -S -slp-threshold=-100 | FileCheck %s --check-prefixes=CHECK,SKX @arr = local_unnamed_addr global [32 x i32] zeroinitializer, align 16 @arr1 = local_unnamed_addr global [32 x float] zeroinitializer, align 16 @@ -10,30 +10,30 @@ @var = global i32 zeroinitializer, align 8 define i32 @maxi8(i32) { -; CHECK-LABEL: @maxi8( -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; CHECK-NEXT: ret i32 [[TMP23]] +; SSE-LABEL: @maxi8( +; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; SSE-NEXT: ret i32 [[TMP23]] ; ; AVX-LABEL: @maxi8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([32 x i32]* @arr to <8 x i32>*), align 16 @@ -192,147 +192,6 @@ define i32 @maxi16(i32) { ; CHECK-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef ; CHECK-NEXT: ret i32 [[TMP32]] ; -; AVX-LABEL: @maxi16( -; AVX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; AVX-NEXT: ret i32 [[TMP32]] -; -; AVX2-LABEL: @maxi16( -; AVX2-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; AVX2-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; AVX2-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: ret i32 [[TMP32]] -; -; SKX-LABEL: @maxi16( -; SKX-NEXT: [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* bitcast ([32 x i32]* @arr to <16 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <16 x i32> [[TMP2]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <16 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP]], <16 x i32> [[TMP2]], <16 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP2]], <16 x i32> [[RDX_MINMAX_SELECT]], <16 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP5]], <16 x i32> [[RDX_MINMAX_SELECT3]], <16 x i32> [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> undef, <16 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <16 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <16 x i1> [[RDX_MINMAX_CMP8]], <16 x i32> [[RDX_MINMAX_SELECT6]], <16 x i32> [[RDX_SHUF7]] -; SKX-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[RDX_MINMAX_SELECT9]], i32 0 -; SKX-NEXT: [[TMP33:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; SKX-NEXT: ret i32 [[TMP32]] -; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 %4 = icmp sgt i32 %2, %3 @@ -465,252 +324,6 @@ define i32 @maxi32(i32) { ; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef ; CHECK-NEXT: ret i32 [[TMP64]] ; -; AVX-LABEL: @maxi32( -; AVX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; AVX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; AVX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; AVX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; AVX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; AVX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; AVX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; AVX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; AVX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; AVX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; AVX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; AVX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; AVX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; AVX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; AVX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; AVX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; AVX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef -; AVX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef -; AVX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef -; AVX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef -; AVX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef -; AVX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef -; AVX-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef -; AVX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef -; AVX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef -; AVX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef -; AVX-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef -; AVX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef -; AVX-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef -; AVX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef -; AVX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef -; AVX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef -; AVX-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef -; AVX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef -; AVX-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef -; AVX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef -; AVX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef -; AVX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef -; AVX-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef -; AVX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef -; AVX-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef -; AVX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef -; AVX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef -; AVX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef -; AVX-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef -; AVX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef -; AVX-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef -; AVX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; AVX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] -; AVX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] -; AVX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] -; AVX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> -; AVX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] -; AVX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef -; AVX-NEXT: ret i32 [[TMP64]] -; -; AVX2-LABEL: @maxi32( -; AVX2-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; AVX2-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; AVX2-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; AVX2-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; AVX2-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; AVX2-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; AVX2-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; AVX2-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; AVX2-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; AVX2-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; AVX2-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; AVX2-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; AVX2-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; AVX2-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; AVX2-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; AVX2-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; AVX2-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; AVX2-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; AVX2-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; AVX2-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; AVX2-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; AVX2-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; AVX2-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; AVX2-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; AVX2-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; AVX2-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; AVX2-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; AVX2-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; AVX2-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; AVX2-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; AVX2-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; AVX2-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef -; AVX2-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef -; AVX2-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef -; AVX2-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef -; AVX2-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef -; AVX2-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef -; AVX2-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef -; AVX2-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef -; AVX2-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef -; AVX2-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef -; AVX2-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef -; AVX2-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef -; AVX2-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef -; AVX2-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef -; AVX2-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef -; AVX2-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef -; AVX2-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef -; AVX2-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef -; AVX2-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef -; AVX2-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef -; AVX2-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef -; AVX2-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef -; AVX2-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef -; AVX2-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef -; AVX2-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef -; AVX2-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef -; AVX2-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef -; AVX2-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef -; AVX2-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef -; AVX2-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef -; AVX2-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef -; AVX2-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; AVX2-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] -; AVX2-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] -; AVX2-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] -; AVX2-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> -; AVX2-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] -; AVX2-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; AVX2-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; AVX2-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef -; AVX2-NEXT: ret i32 [[TMP64]] -; -; SKX-LABEL: @maxi32( -; SKX-NEXT: [[TMP2:%.*]] = load <32 x i32>, <32 x i32>* bitcast ([32 x i32]* @arr to <32 x i32>*), align 16 -; SKX-NEXT: [[TMP3:%.*]] = icmp sgt i32 undef, undef -; SKX-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i32 undef, i32 undef -; SKX-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], undef -; SKX-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 undef -; SKX-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP6]], undef -; SKX-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 undef -; SKX-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], undef -; SKX-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP8]], i32 undef -; SKX-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP10]], undef -; SKX-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP10]], i32 undef -; SKX-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], undef -; SKX-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 undef -; SKX-NEXT: [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], undef -; SKX-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i32 [[TMP14]], i32 undef -; SKX-NEXT: [[TMP17:%.*]] = icmp sgt i32 [[TMP16]], undef -; SKX-NEXT: [[TMP18:%.*]] = select i1 [[TMP17]], i32 [[TMP16]], i32 undef -; SKX-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP18]], undef -; SKX-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP18]], i32 undef -; SKX-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP20]], undef -; SKX-NEXT: [[TMP22:%.*]] = select i1 [[TMP21]], i32 [[TMP20]], i32 undef -; SKX-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP22]], undef -; SKX-NEXT: [[TMP24:%.*]] = select i1 [[TMP23]], i32 [[TMP22]], i32 undef -; SKX-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP24]], undef -; SKX-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i32 [[TMP24]], i32 undef -; SKX-NEXT: [[TMP27:%.*]] = icmp sgt i32 [[TMP26]], undef -; SKX-NEXT: [[TMP28:%.*]] = select i1 [[TMP27]], i32 [[TMP26]], i32 undef -; SKX-NEXT: [[TMP29:%.*]] = icmp sgt i32 [[TMP28]], undef -; SKX-NEXT: [[TMP30:%.*]] = select i1 [[TMP29]], i32 [[TMP28]], i32 undef -; SKX-NEXT: [[TMP31:%.*]] = icmp sgt i32 [[TMP30]], undef -; SKX-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], i32 [[TMP30]], i32 undef -; SKX-NEXT: [[TMP33:%.*]] = icmp sgt i32 [[TMP32]], undef -; SKX-NEXT: [[TMP34:%.*]] = select i1 [[TMP33]], i32 [[TMP32]], i32 undef -; SKX-NEXT: [[TMP35:%.*]] = icmp sgt i32 [[TMP34]], undef -; SKX-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 [[TMP34]], i32 undef -; SKX-NEXT: [[TMP37:%.*]] = icmp sgt i32 [[TMP36]], undef -; SKX-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], i32 [[TMP36]], i32 undef -; SKX-NEXT: [[TMP39:%.*]] = icmp sgt i32 [[TMP38]], undef -; SKX-NEXT: [[TMP40:%.*]] = select i1 [[TMP39]], i32 [[TMP38]], i32 undef -; SKX-NEXT: [[TMP41:%.*]] = icmp sgt i32 [[TMP40]], undef -; SKX-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 [[TMP40]], i32 undef -; SKX-NEXT: [[TMP43:%.*]] = icmp sgt i32 [[TMP42]], undef -; SKX-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], i32 [[TMP42]], i32 undef -; SKX-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[TMP44]], undef -; SKX-NEXT: [[TMP46:%.*]] = select i1 [[TMP45]], i32 [[TMP44]], i32 undef -; SKX-NEXT: [[TMP47:%.*]] = icmp sgt i32 [[TMP46]], undef -; SKX-NEXT: [[TMP48:%.*]] = select i1 [[TMP47]], i32 [[TMP46]], i32 undef -; SKX-NEXT: [[TMP49:%.*]] = icmp sgt i32 [[TMP48]], undef -; SKX-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], i32 [[TMP48]], i32 undef -; SKX-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[TMP50]], undef -; SKX-NEXT: [[TMP52:%.*]] = select i1 [[TMP51]], i32 [[TMP50]], i32 undef -; SKX-NEXT: [[TMP53:%.*]] = icmp sgt i32 [[TMP52]], undef -; SKX-NEXT: [[TMP54:%.*]] = select i1 [[TMP53]], i32 [[TMP52]], i32 undef -; SKX-NEXT: [[TMP55:%.*]] = icmp sgt i32 [[TMP54]], undef -; SKX-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], i32 [[TMP54]], i32 undef -; SKX-NEXT: [[TMP57:%.*]] = icmp sgt i32 [[TMP56]], undef -; SKX-NEXT: [[TMP58:%.*]] = select i1 [[TMP57]], i32 [[TMP56]], i32 undef -; SKX-NEXT: [[TMP59:%.*]] = icmp sgt i32 [[TMP58]], undef -; SKX-NEXT: [[TMP60:%.*]] = select i1 [[TMP59]], i32 [[TMP58]], i32 undef -; SKX-NEXT: [[TMP61:%.*]] = icmp sgt i32 [[TMP60]], undef -; SKX-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], i32 [[TMP60]], i32 undef -; SKX-NEXT: [[TMP63:%.*]] = icmp sgt i32 [[TMP62]], undef -; SKX-NEXT: [[RDX_SHUF:%.*]] = shufflevector <32 x i32> [[TMP2]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP:%.*]] = icmp sgt <32 x i32> [[TMP2]], [[RDX_SHUF]] -; SKX-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP]], <32 x i32> [[TMP2]], <32 x i32> [[RDX_SHUF]] -; SKX-NEXT: [[RDX_SHUF1:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP2:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_MINMAX_SELECT3:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP2]], <32 x i32> [[RDX_MINMAX_SELECT]], <32 x i32> [[RDX_SHUF1]] -; SKX-NEXT: [[RDX_SHUF4:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP5:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT3]], [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_MINMAX_SELECT6:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP5]], <32 x i32> [[RDX_MINMAX_SELECT3]], <32 x i32> [[RDX_SHUF4]] -; SKX-NEXT: [[RDX_SHUF7:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP8:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT6]], [[RDX_SHUF7]] -; SKX-NEXT: [[RDX_MINMAX_SELECT9:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP8]], <32 x i32> [[RDX_MINMAX_SELECT6]], <32 x i32> [[RDX_SHUF7]] -; SKX-NEXT: [[RDX_SHUF10:%.*]] = shufflevector <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> undef, <32 x i32> -; SKX-NEXT: [[RDX_MINMAX_CMP11:%.*]] = icmp sgt <32 x i32> [[RDX_MINMAX_SELECT9]], [[RDX_SHUF10]] -; SKX-NEXT: [[RDX_MINMAX_SELECT12:%.*]] = select <32 x i1> [[RDX_MINMAX_CMP11]], <32 x i32> [[RDX_MINMAX_SELECT9]], <32 x i32> [[RDX_SHUF10]] -; SKX-NEXT: [[TMP64:%.*]] = extractelement <32 x i32> [[RDX_MINMAX_SELECT12]], i32 0 -; SKX-NEXT: [[TMP65:%.*]] = select i1 [[TMP63]], i32 [[TMP62]], i32 undef -; SKX-NEXT: ret i32 [[TMP64]] -; %2 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 %3 = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 %4 = icmp sgt i32 %2, %3 @@ -809,30 +422,30 @@ define i32 @maxi32(i32) { } define float @maxf8(float) { -; CHECK-LABEL: @maxf8( -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] -; CHECK-NEXT: ret float [[TMP23]] +; SSE-LABEL: @maxf8( +; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; SSE-NEXT: ret float [[TMP23]] ; ; AVX-LABEL: @maxf8( ; AVX-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([32 x float]* @arr1 to <8 x float>*), align 16 @@ -944,54 +557,54 @@ define float @maxf8(float) { } define float @maxf16(float) { -; CHECK-LABEL: @maxf16( -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] -; CHECK-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 -; CHECK-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 -; CHECK-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] -; CHECK-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 -; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] -; CHECK-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 -; CHECK-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 -; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] -; CHECK-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 -; CHECK-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] -; CHECK-NEXT: ret float [[TMP47]] +; SSE-LABEL: @maxf16( +; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; SSE-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; SSE-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] +; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] +; SSE-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; SSE-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] +; SSE-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] +; SSE-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; SSE-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] +; SSE-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] +; SSE-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; SSE-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] +; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] +; SSE-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; SSE-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] +; SSE-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] +; SSE-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; SSE-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] +; SSE-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] +; SSE-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; SSE-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] +; SSE-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] +; SSE-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; SSE-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] +; SSE-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] +; SSE-NEXT: ret float [[TMP47]] ; ; AVX-LABEL: @maxf16( ; AVX-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([32 x float]* @arr1 to <16 x float>*), align 16 @@ -1184,102 +797,102 @@ define float @maxf16(float) { } define float @maxf32(float) { -; CHECK-LABEL: @maxf32( -; CHECK-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 -; CHECK-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] -; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] -; CHECK-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 -; CHECK-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] -; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] -; CHECK-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 -; CHECK-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] -; CHECK-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] -; CHECK-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 -; CHECK-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] -; CHECK-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] -; CHECK-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 -; CHECK-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] -; CHECK-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] -; CHECK-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 -; CHECK-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] -; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] -; CHECK-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 -; CHECK-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] -; CHECK-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] -; CHECK-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 -; CHECK-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] -; CHECK-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] -; CHECK-NEXT: [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16 -; CHECK-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]] -; CHECK-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]] -; CHECK-NEXT: [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4 -; CHECK-NEXT: [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]] -; CHECK-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]] -; CHECK-NEXT: [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8 -; CHECK-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]] -; CHECK-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]] -; CHECK-NEXT: [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4 -; CHECK-NEXT: [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]] -; CHECK-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]] -; CHECK-NEXT: [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16 -; CHECK-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]] -; CHECK-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]] -; CHECK-NEXT: [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4 -; CHECK-NEXT: [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]] -; CHECK-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]] -; CHECK-NEXT: [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8 -; CHECK-NEXT: [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]] -; CHECK-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]] -; CHECK-NEXT: [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4 -; CHECK-NEXT: [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]] -; CHECK-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]] -; CHECK-NEXT: [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16 -; CHECK-NEXT: [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]] -; CHECK-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]] -; CHECK-NEXT: [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4 -; CHECK-NEXT: [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]] -; CHECK-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]] -; CHECK-NEXT: [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8 -; CHECK-NEXT: [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]] -; CHECK-NEXT: [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]] -; CHECK-NEXT: [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4 -; CHECK-NEXT: [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]] -; CHECK-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]] -; CHECK-NEXT: [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16 -; CHECK-NEXT: [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]] -; CHECK-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]] -; CHECK-NEXT: [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4 -; CHECK-NEXT: [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]] -; CHECK-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]] -; CHECK-NEXT: [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8 -; CHECK-NEXT: [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]] -; CHECK-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]] -; CHECK-NEXT: [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4 -; CHECK-NEXT: [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]] -; CHECK-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]] -; CHECK-NEXT: ret float [[TMP95]] +; SSE-LABEL: @maxf32( +; SSE-NEXT: [[TMP2:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = fcmp fast ogt float [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], float [[TMP2]], float [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = fcmp fast ogt float [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], float [[TMP5]], float [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = fcmp fast ogt float [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], float [[TMP8]], float [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = fcmp fast ogt float [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], float [[TMP11]], float [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = fcmp fast ogt float [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], float [[TMP14]], float [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = fcmp fast ogt float [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], float [[TMP17]], float [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = fcmp fast ogt float [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], float [[TMP20]], float [[TMP21]] +; SSE-NEXT: [[TMP24:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 8), align 16 +; SSE-NEXT: [[TMP25:%.*]] = fcmp fast ogt float [[TMP23]], [[TMP24]] +; SSE-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float [[TMP23]], float [[TMP24]] +; SSE-NEXT: [[TMP27:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 9), align 4 +; SSE-NEXT: [[TMP28:%.*]] = fcmp fast ogt float [[TMP26]], [[TMP27]] +; SSE-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float [[TMP27]] +; SSE-NEXT: [[TMP30:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 10), align 8 +; SSE-NEXT: [[TMP31:%.*]] = fcmp fast ogt float [[TMP29]], [[TMP30]] +; SSE-NEXT: [[TMP32:%.*]] = select i1 [[TMP31]], float [[TMP29]], float [[TMP30]] +; SSE-NEXT: [[TMP33:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 11), align 4 +; SSE-NEXT: [[TMP34:%.*]] = fcmp fast ogt float [[TMP32]], [[TMP33]] +; SSE-NEXT: [[TMP35:%.*]] = select i1 [[TMP34]], float [[TMP32]], float [[TMP33]] +; SSE-NEXT: [[TMP36:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 12), align 16 +; SSE-NEXT: [[TMP37:%.*]] = fcmp fast ogt float [[TMP35]], [[TMP36]] +; SSE-NEXT: [[TMP38:%.*]] = select i1 [[TMP37]], float [[TMP35]], float [[TMP36]] +; SSE-NEXT: [[TMP39:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 13), align 4 +; SSE-NEXT: [[TMP40:%.*]] = fcmp fast ogt float [[TMP38]], [[TMP39]] +; SSE-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], float [[TMP38]], float [[TMP39]] +; SSE-NEXT: [[TMP42:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 14), align 8 +; SSE-NEXT: [[TMP43:%.*]] = fcmp fast ogt float [[TMP41]], [[TMP42]] +; SSE-NEXT: [[TMP44:%.*]] = select i1 [[TMP43]], float [[TMP41]], float [[TMP42]] +; SSE-NEXT: [[TMP45:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 15), align 4 +; SSE-NEXT: [[TMP46:%.*]] = fcmp fast ogt float [[TMP44]], [[TMP45]] +; SSE-NEXT: [[TMP47:%.*]] = select i1 [[TMP46]], float [[TMP44]], float [[TMP45]] +; SSE-NEXT: [[TMP48:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 16), align 16 +; SSE-NEXT: [[TMP49:%.*]] = fcmp fast ogt float [[TMP47]], [[TMP48]] +; SSE-NEXT: [[TMP50:%.*]] = select i1 [[TMP49]], float [[TMP47]], float [[TMP48]] +; SSE-NEXT: [[TMP51:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 17), align 4 +; SSE-NEXT: [[TMP52:%.*]] = fcmp fast ogt float [[TMP50]], [[TMP51]] +; SSE-NEXT: [[TMP53:%.*]] = select i1 [[TMP52]], float [[TMP50]], float [[TMP51]] +; SSE-NEXT: [[TMP54:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 18), align 8 +; SSE-NEXT: [[TMP55:%.*]] = fcmp fast ogt float [[TMP53]], [[TMP54]] +; SSE-NEXT: [[TMP56:%.*]] = select i1 [[TMP55]], float [[TMP53]], float [[TMP54]] +; SSE-NEXT: [[TMP57:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 19), align 4 +; SSE-NEXT: [[TMP58:%.*]] = fcmp fast ogt float [[TMP56]], [[TMP57]] +; SSE-NEXT: [[TMP59:%.*]] = select i1 [[TMP58]], float [[TMP56]], float [[TMP57]] +; SSE-NEXT: [[TMP60:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 20), align 16 +; SSE-NEXT: [[TMP61:%.*]] = fcmp fast ogt float [[TMP59]], [[TMP60]] +; SSE-NEXT: [[TMP62:%.*]] = select i1 [[TMP61]], float [[TMP59]], float [[TMP60]] +; SSE-NEXT: [[TMP63:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 21), align 4 +; SSE-NEXT: [[TMP64:%.*]] = fcmp fast ogt float [[TMP62]], [[TMP63]] +; SSE-NEXT: [[TMP65:%.*]] = select i1 [[TMP64]], float [[TMP62]], float [[TMP63]] +; SSE-NEXT: [[TMP66:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 22), align 8 +; SSE-NEXT: [[TMP67:%.*]] = fcmp fast ogt float [[TMP65]], [[TMP66]] +; SSE-NEXT: [[TMP68:%.*]] = select i1 [[TMP67]], float [[TMP65]], float [[TMP66]] +; SSE-NEXT: [[TMP69:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 23), align 4 +; SSE-NEXT: [[TMP70:%.*]] = fcmp fast ogt float [[TMP68]], [[TMP69]] +; SSE-NEXT: [[TMP71:%.*]] = select i1 [[TMP70]], float [[TMP68]], float [[TMP69]] +; SSE-NEXT: [[TMP72:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 24), align 16 +; SSE-NEXT: [[TMP73:%.*]] = fcmp fast ogt float [[TMP71]], [[TMP72]] +; SSE-NEXT: [[TMP74:%.*]] = select i1 [[TMP73]], float [[TMP71]], float [[TMP72]] +; SSE-NEXT: [[TMP75:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 25), align 4 +; SSE-NEXT: [[TMP76:%.*]] = fcmp fast ogt float [[TMP74]], [[TMP75]] +; SSE-NEXT: [[TMP77:%.*]] = select i1 [[TMP76]], float [[TMP74]], float [[TMP75]] +; SSE-NEXT: [[TMP78:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 26), align 8 +; SSE-NEXT: [[TMP79:%.*]] = fcmp fast ogt float [[TMP77]], [[TMP78]] +; SSE-NEXT: [[TMP80:%.*]] = select i1 [[TMP79]], float [[TMP77]], float [[TMP78]] +; SSE-NEXT: [[TMP81:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 27), align 4 +; SSE-NEXT: [[TMP82:%.*]] = fcmp fast ogt float [[TMP80]], [[TMP81]] +; SSE-NEXT: [[TMP83:%.*]] = select i1 [[TMP82]], float [[TMP80]], float [[TMP81]] +; SSE-NEXT: [[TMP84:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 28), align 16 +; SSE-NEXT: [[TMP85:%.*]] = fcmp fast ogt float [[TMP83]], [[TMP84]] +; SSE-NEXT: [[TMP86:%.*]] = select i1 [[TMP85]], float [[TMP83]], float [[TMP84]] +; SSE-NEXT: [[TMP87:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 29), align 4 +; SSE-NEXT: [[TMP88:%.*]] = fcmp fast ogt float [[TMP86]], [[TMP87]] +; SSE-NEXT: [[TMP89:%.*]] = select i1 [[TMP88]], float [[TMP86]], float [[TMP87]] +; SSE-NEXT: [[TMP90:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 30), align 8 +; SSE-NEXT: [[TMP91:%.*]] = fcmp fast ogt float [[TMP89]], [[TMP90]] +; SSE-NEXT: [[TMP92:%.*]] = select i1 [[TMP91]], float [[TMP89]], float [[TMP90]] +; SSE-NEXT: [[TMP93:%.*]] = load float, float* getelementptr inbounds ([32 x float], [32 x float]* @arr1, i64 0, i64 31), align 4 +; SSE-NEXT: [[TMP94:%.*]] = fcmp fast ogt float [[TMP92]], [[TMP93]] +; SSE-NEXT: [[TMP95:%.*]] = select i1 [[TMP94]], float [[TMP92]], float [[TMP93]] +; SSE-NEXT: ret float [[TMP95]] ; ; AVX-LABEL: @maxf32( ; AVX-NEXT: [[TMP2:%.*]] = load <32 x float>, <32 x float>* bitcast ([32 x float]* @arr1 to <32 x float>*), align 16 @@ -1625,32 +1238,32 @@ define float @maxf32(float) { } define i32 @maxi8_mutiple_uses(i32) { -; CHECK-LABEL: @maxi8_mutiple_uses( -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; CHECK-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4 -; CHECK-NEXT: store i32 [[TMP24]], i32* @var, align 8 -; CHECK-NEXT: ret i32 [[TMP23]] +; SSE-LABEL: @maxi8_mutiple_uses( +; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; SSE-NEXT: [[TMP24:%.*]] = select i1 [[TMP4]], i32 3, i32 4 +; SSE-NEXT: store i32 [[TMP24]], i32* @var, align 8 +; SSE-NEXT: ret i32 [[TMP23]] ; ; AVX-LABEL: @maxi8_mutiple_uses( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -1791,32 +1404,32 @@ define i32 @maxi8_mutiple_uses(i32) { } define i32 @maxi8_wrong_parent(i32) { -; CHECK-LABEL: @maxi8_wrong_parent( -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] -; CHECK-NEXT: br label [[PP:%.*]] -; CHECK: pp: -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] -; CHECK-NEXT: ret i32 [[TMP23]] +; SSE-LABEL: @maxi8_wrong_parent( +; SSE-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP2]], [[TMP3]] +; SSE-NEXT: br label [[PP:%.*]] +; SSE: pp: +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP2]], i32 [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp sgt i32 [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP11]], i32 [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[TMP14]], i32 [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32 [[TMP20]], i32 [[TMP21]] +; SSE-NEXT: ret i32 [[TMP23]] ; ; AVX-LABEL: @maxi8_wrong_parent( ; AVX-NEXT: [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @arr, i64 0, i64 0), align 16 @@ -1967,30 +1580,30 @@ pp: ; PR38191 - We don't handle array-of-pointer reductions. define i32* @maxp8(i32) { -; CHECK-LABEL: @maxp8( -; CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16 -; CHECK-NEXT: [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4 -; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]] -; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8 -; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4 -; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]] -; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]] -; CHECK-NEXT: [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16 -; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]] -; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4 -; CHECK-NEXT: [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]] -; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]] -; CHECK-NEXT: [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8 -; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]] -; CHECK-NEXT: [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4 -; CHECK-NEXT: [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]] -; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]] -; CHECK-NEXT: ret i32* [[TMP23]] +; SSE-LABEL: @maxp8( +; SSE-NEXT: [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16 +; SSE-NEXT: [[TMP3:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 1), align 4 +; SSE-NEXT: [[TMP4:%.*]] = icmp ugt i32* [[TMP2]], [[TMP3]] +; SSE-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i32* [[TMP2]], i32* [[TMP3]] +; SSE-NEXT: [[TMP6:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 2), align 8 +; SSE-NEXT: [[TMP7:%.*]] = icmp ugt i32* [[TMP5]], [[TMP6]] +; SSE-NEXT: [[TMP8:%.*]] = select i1 [[TMP7]], i32* [[TMP5]], i32* [[TMP6]] +; SSE-NEXT: [[TMP9:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 3), align 4 +; SSE-NEXT: [[TMP10:%.*]] = icmp ugt i32* [[TMP8]], [[TMP9]] +; SSE-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i32* [[TMP8]], i32* [[TMP9]] +; SSE-NEXT: [[TMP12:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 4), align 16 +; SSE-NEXT: [[TMP13:%.*]] = icmp ugt i32* [[TMP11]], [[TMP12]] +; SSE-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32* [[TMP11]], i32* [[TMP12]] +; SSE-NEXT: [[TMP15:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 5), align 4 +; SSE-NEXT: [[TMP16:%.*]] = icmp ugt i32* [[TMP14]], [[TMP15]] +; SSE-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32* [[TMP14]], i32* [[TMP15]] +; SSE-NEXT: [[TMP18:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 6), align 8 +; SSE-NEXT: [[TMP19:%.*]] = icmp ugt i32* [[TMP17]], [[TMP18]] +; SSE-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32* [[TMP17]], i32* [[TMP18]] +; SSE-NEXT: [[TMP21:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 7), align 4 +; SSE-NEXT: [[TMP22:%.*]] = icmp ugt i32* [[TMP20]], [[TMP21]] +; SSE-NEXT: [[TMP23:%.*]] = select i1 [[TMP22]], i32* [[TMP20]], i32* [[TMP21]] +; SSE-NEXT: ret i32* [[TMP23]] ; ; AVX-LABEL: @maxp8( ; AVX-NEXT: [[TMP2:%.*]] = load i32*, i32** getelementptr inbounds ([32 x i32*], [32 x i32*]* @arrp, i64 0, i64 0), align 16 -- 2.11.0