test/CodeGen/AMDGPU/constant-address-space-32bit.ll

   1 ; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SICI,SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN,SICI %s
   3 ; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VIGFX9 %s
   4 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,VIGFX9 %s
   5
   6 ; GCN-LABEL: {{^}}load_i32:
   7 ; GCN-DAG: s_mov_b32 s3, 0
   8 ; GCN-DAG: s_mov_b32 s2, s1
   9 ; GCN-DAG: s_mov_b32 s1, s3
  10 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  11 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x2
  12 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  13 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
  14 define amdgpu_vs float @load_i32(i32 addrspace(6)* inreg %p0, i32 addrspace(6)* inreg %p1) #0 {
  15   %gep1 = getelementptr i32, i32 addrspace(6)* %p1, i64 2
  16   %r0 = load i32, i32 addrspace(6)* %p0
  17   %r1 = load i32, i32 addrspace(6)* %gep1
  18   %r = add i32 %r0, %r1
  19   %r2 = bitcast i32 %r to float
  20   ret float %r2
  21 }
  22
  23 ; GCN-LABEL: {{^}}load_v2i32:
  24 ; GCN-DAG: s_mov_b32 s3, 0
  25 ; GCN-DAG: s_mov_b32 s2, s1
  26 ; GCN-DAG: s_mov_b32 s1, s3
  27 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
  28 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x4
  29 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
  30 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
  31 define amdgpu_vs <2 x float> @load_v2i32(<2 x i32> addrspace(6)* inreg %p0, <2 x i32> addrspace(6)* inreg %p1) #0 {
  32   %gep1 = getelementptr <2 x i32>, <2 x i32> addrspace(6)* %p1, i64 2
  33   %r0 = load <2 x i32>, <2 x i32> addrspace(6)* %p0
  34   %r1 = load <2 x i32>, <2 x i32> addrspace(6)* %gep1
  35   %r = add <2 x i32> %r0, %r1
  36   %r2 = bitcast <2 x i32> %r to <2 x float>
  37   ret <2 x float> %r2
  38 }
  39
  40 ; GCN-LABEL: {{^}}load_v4i32:
  41 ; GCN-DAG: s_mov_b32 s3, 0
  42 ; GCN-DAG: s_mov_b32 s2, s1
  43 ; GCN-DAG: s_mov_b32 s1, s3
  44 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
  45 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x8
  46 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
  47 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
  48 define amdgpu_vs <4 x float> @load_v4i32(<4 x i32> addrspace(6)* inreg %p0, <4 x i32> addrspace(6)* inreg %p1) #0 {
  49   %gep1 = getelementptr <4 x i32>, <4 x i32> addrspace(6)* %p1, i64 2
  50   %r0 = load <4 x i32>, <4 x i32> addrspace(6)* %p0
  51   %r1 = load <4 x i32>, <4 x i32> addrspace(6)* %gep1
  52   %r = add <4 x i32> %r0, %r1
  53   %r2 = bitcast <4 x i32> %r to <4 x float>
  54   ret <4 x float> %r2
  55 }
  56
  57 ; GCN-LABEL: {{^}}load_v8i32:
  58 ; GCN-DAG: s_mov_b32 s3, 0
  59 ; GCN-DAG: s_mov_b32 s2, s1
  60 ; GCN-DAG: s_mov_b32 s1, s3
  61 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
  62 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x10
  63 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
  64 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
  65 define amdgpu_vs <8 x float> @load_v8i32(<8 x i32> addrspace(6)* inreg %p0, <8 x i32> addrspace(6)* inreg %p1) #0 {
  66   %gep1 = getelementptr <8 x i32>, <8 x i32> addrspace(6)* %p1, i64 2
  67   %r0 = load <8 x i32>, <8 x i32> addrspace(6)* %p0
  68   %r1 = load <8 x i32>, <8 x i32> addrspace(6)* %gep1
  69   %r = add <8 x i32> %r0, %r1
  70   %r2 = bitcast <8 x i32> %r to <8 x float>
  71   ret <8 x float> %r2
  72 }
  73
  74 ; GCN-LABEL: {{^}}load_v16i32:
  75 ; GCN-DAG: s_mov_b32 s3, 0
  76 ; GCN-DAG: s_mov_b32 s2, s1
  77 ; GCN-DAG: s_mov_b32 s1, s3
  78 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
  79 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x20
  80 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
  81 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
  82 define amdgpu_vs <16 x float> @load_v16i32(<16 x i32> addrspace(6)* inreg %p0, <16 x i32> addrspace(6)* inreg %p1) #0 {
  83   %gep1 = getelementptr <16 x i32>, <16 x i32> addrspace(6)* %p1, i64 2
  84   %r0 = load <16 x i32>, <16 x i32> addrspace(6)* %p0
  85   %r1 = load <16 x i32>, <16 x i32> addrspace(6)* %gep1
  86   %r = add <16 x i32> %r0, %r1
  87   %r2 = bitcast <16 x i32> %r to <16 x float>
  88   ret <16 x float> %r2
  89 }
  90
  91 ; GCN-LABEL: {{^}}load_float:
  92 ; GCN-DAG: s_mov_b32 s3, 0
  93 ; GCN-DAG: s_mov_b32 s2, s1
  94 ; GCN-DAG: s_mov_b32 s1, s3
  95 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  96 ; SICI-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x2
  97 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[0:1], 0x0
  98 ; VIGFX9-DAG: s_load_dword s{{[0-9]}}, s[2:3], 0x8
  99 define amdgpu_vs float @load_float(float addrspace(6)* inreg %p0, float addrspace(6)* inreg %p1) #0 {
 100   %gep1 = getelementptr float, float addrspace(6)* %p1, i64 2
 101   %r0 = load float, float addrspace(6)* %p0
 102   %r1 = load float, float addrspace(6)* %gep1
 103   %r = fadd float %r0, %r1
 104   ret float %r
 105 }
 106
 107 ; GCN-LABEL: {{^}}load_v2float:
 108 ; GCN-DAG: s_mov_b32 s3, 0
 109 ; GCN-DAG: s_mov_b32 s2, s1
 110 ; GCN-DAG: s_mov_b32 s1, s3
 111 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 112 ; SICI-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x4
 113 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[0:1], 0x0
 114 ; VIGFX9-DAG: s_load_dwordx2 s[{{.*}}], s[2:3], 0x10
 115 define amdgpu_vs <2 x float> @load_v2float(<2 x float> addrspace(6)* inreg %p0, <2 x float> addrspace(6)* inreg %p1) #0 {
 116   %gep1 = getelementptr <2 x float>, <2 x float> addrspace(6)* %p1, i64 2
 117   %r0 = load <2 x float>, <2 x float> addrspace(6)* %p0
 118   %r1 = load <2 x float>, <2 x float> addrspace(6)* %gep1
 119   %r = fadd <2 x float> %r0, %r1
 120   ret <2 x float> %r
 121 }
 122
 123 ; GCN-LABEL: {{^}}load_v4float:
 124 ; GCN-DAG: s_mov_b32 s3, 0
 125 ; GCN-DAG: s_mov_b32 s2, s1
 126 ; GCN-DAG: s_mov_b32 s1, s3
 127 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 128 ; SICI-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x8
 129 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[0:1], 0x0
 130 ; VIGFX9-DAG: s_load_dwordx4 s[{{.*}}], s[2:3], 0x20
 131 define amdgpu_vs <4 x float> @load_v4float(<4 x float> addrspace(6)* inreg %p0, <4 x float> addrspace(6)* inreg %p1) #0 {
 132   %gep1 = getelementptr <4 x float>, <4 x float> addrspace(6)* %p1, i64 2
 133   %r0 = load <4 x float>, <4 x float> addrspace(6)* %p0
 134   %r1 = load <4 x float>, <4 x float> addrspace(6)* %gep1
 135   %r = fadd <4 x float> %r0, %r1
 136   ret <4 x float> %r
 137 }
 138
 139 ; GCN-LABEL: {{^}}load_v8float:
 140 ; GCN-DAG: s_mov_b32 s3, 0
 141 ; GCN-DAG: s_mov_b32 s2, s1
 142 ; GCN-DAG: s_mov_b32 s1, s3
 143 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 144 ; SICI-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x10
 145 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[0:1], 0x0
 146 ; VIGFX9-DAG: s_load_dwordx8 s[{{.*}}], s[2:3], 0x40
 147 define amdgpu_vs <8 x float> @load_v8float(<8 x float> addrspace(6)* inreg %p0, <8 x float> addrspace(6)* inreg %p1) #0 {
 148   %gep1 = getelementptr <8 x float>, <8 x float> addrspace(6)* %p1, i64 2
 149   %r0 = load <8 x float>, <8 x float> addrspace(6)* %p0
 150   %r1 = load <8 x float>, <8 x float> addrspace(6)* %gep1
 151   %r = fadd <8 x float> %r0, %r1
 152   ret <8 x float> %r
 153 }
 154
 155 ; GCN-LABEL: {{^}}load_v16float:
 156 ; GCN-DAG: s_mov_b32 s3, 0
 157 ; GCN-DAG: s_mov_b32 s2, s1
 158 ; GCN-DAG: s_mov_b32 s1, s3
 159 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 160 ; SICI-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x20
 161 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[0:1], 0x0
 162 ; VIGFX9-DAG: s_load_dwordx16 s[{{.*}}], s[2:3], 0x80
 163 define amdgpu_vs <16 x float> @load_v16float(<16 x float> addrspace(6)* inreg %p0, <16 x float> addrspace(6)* inreg %p1) #0 {
 164   %gep1 = getelementptr <16 x float>, <16 x float> addrspace(6)* %p1, i64 2
 165   %r0 = load <16 x float>, <16 x float> addrspace(6)* %p0
 166   %r1 = load <16 x float>, <16 x float> addrspace(6)* %gep1
 167   %r = fadd <16 x float> %r0, %r1
 168   ret <16 x float> %r
 169 }
 170
 171 ; GCN-LABEL: {{^}}load_i32_hi0:
 172 ; GCN: s_mov_b32 s1, 0
 173 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 174 define amdgpu_vs i32 @load_i32_hi0(i32 addrspace(6)* inreg %p) #1 {
 175   %r0 = load i32, i32 addrspace(6)* %p
 176   ret i32 %r0
 177 }
 178
 179 ; GCN-LABEL: {{^}}load_i32_hi1:
 180 ; GCN: s_mov_b32 s1, 1
 181 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 182 define amdgpu_vs i32 @load_i32_hi1(i32 addrspace(6)* inreg %p) #2 {
 183   %r0 = load i32, i32 addrspace(6)* %p
 184   ret i32 %r0
 185 }
 186
 187 ; GCN-LABEL: {{^}}load_i32_hiffff8000:
 188 ; GCN: s_movk_i32 s1, 0x8000
 189 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 190 define amdgpu_vs i32 @load_i32_hiffff8000(i32 addrspace(6)* inreg %p) #3 {
 191   %r0 = load i32, i32 addrspace(6)* %p
 192   ret i32 %r0
 193 }
 194
 195 ; GCN-LABEL: {{^}}load_i32_hifffffff0:
 196 ; GCN: s_mov_b32 s1, -16
 197 ; GCN-NEXT: s_load_dword s0, s[0:1], 0x0
 198 define amdgpu_vs i32 @load_i32_hifffffff0(i32 addrspace(6)* inreg %p) #4 {
 199   %r0 = load i32, i32 addrspace(6)* %p
 200   ret i32 %r0
 201 }
 202
 203 ; GCN-LABEL: {{^}}load_sampler
 204 ; GCN: v_readfirstlane_b32
 205 ; GCN-NEXT: v_readfirstlane_b32
 206 ; SI: s_nop
 207 ; GCN-NEXT: s_load_dwordx8
 208 ; GCN-NEXT: s_load_dwordx4
 209 ; GCN: image_sample
 210 define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
 211 main_body:
 212   %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
 213   %23 = bitcast float %22 to i32
 214   %24 = shl i32 %23, 1
 215   %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24, !amdgpu.uniform !0
 216   %26 = load <8 x i32>, <8 x i32> addrspace(6)* %25, align 32, !invariant.load !0
 217   %27 = shl i32 %23, 2
 218   %28 = or i32 %27, 3
 219   %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
 220   %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28, !amdgpu.uniform !0
 221   %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
 222   %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
 223   %33 = extractelement <4 x float> %32, i32 0
 224   %34 = extractelement <4 x float> %32, i32 1
 225   %35 = extractelement <4 x float> %32, i32 2
 226   %36 = extractelement <4 x float> %32, i32 3
 227   %37 = bitcast float %4 to i32
 228   %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
 229   %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
 230   %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
 231   %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
 232   %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
 233   %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
 234   ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
 235 }
 236
 237 ; GCN-LABEL: {{^}}load_sampler_nouniform
 238 ; GCN: v_readfirstlane_b32
 239 ; GCN-NEXT: v_readfirstlane_b32
 240 ; SI: s_nop
 241 ; GCN-NEXT: s_load_dwordx8
 242 ; GCN-NEXT: s_load_dwordx4
 243 ; GCN: image_sample
 244 define amdgpu_ps <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @load_sampler_nouniform([0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <4 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), [0 x <8 x i32>] addrspace(6)* inreg noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #5 {
 245 main_body:
 246   %22 = call nsz float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %5) #8
 247   %23 = bitcast float %22 to i32
 248   %24 = shl i32 %23, 1
 249   %25 = getelementptr [0 x <8 x i32>], [0 x <8 x i32>] addrspace(6)* %1, i32 0, i32 %24
 250   %26 = load <8 x i32>, <8 x i32> addrspace(6)* %25, align 32, !invariant.load !0
 251   %27 = shl i32 %23, 2
 252   %28 = or i32 %27, 3
 253   %29 = bitcast [0 x <8 x i32>] addrspace(6)* %1 to [0 x <4 x i32>] addrspace(6)*
 254   %30 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(6)* %29, i32 0, i32 %28
 255   %31 = load <4 x i32>, <4 x i32> addrspace(6)* %30, align 16, !invariant.load !0
 256   %32 = call nsz <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> zeroinitializer, <8 x i32> %26, <4 x i32> %31, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #8
 257   %33 = extractelement <4 x float> %32, i32 0
 258   %34 = extractelement <4 x float> %32, i32 1
 259   %35 = extractelement <4 x float> %32, i32 2
 260   %36 = extractelement <4 x float> %32, i32 3
 261   %37 = bitcast float %4 to i32
 262   %38 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %37, 4
 263   %39 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %38, float %33, 5
 264   %40 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %39, float %34, 6
 265   %41 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %40, float %35, 7
 266   %42 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %41, float %36, 8
 267   %43 = insertvalue <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %42, float %20, 19
 268   ret <{ i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %43
 269 }
 270
 271 ; Function Attrs: nounwind readnone speculatable
 272 declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #6
 273
 274 ; Function Attrs: nounwind readonly
 275 declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #7
 276
 277
 278 !0 = !{}
 279
 280 attributes #0 = { nounwind }
 281 attributes #1 = { nounwind "amdgpu-32bit-address-high-bits"="0" }
 282 attributes #2 = { nounwind "amdgpu-32bit-address-high-bits"="1" }
 283 attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
 284 attributes #4 = { nounwind "amdgpu-32bit-address-high-bits"="0xfffffff0" }
 285 attributes #5 = { "InitialPSInputAddr"="45175" }
 286 attributes #6 = { nounwind readnone speculatable }
 287 attributes #7 = { nounwind readonly }
 288 attributes #8 = { nounwind readnone }