} {
gpu.module @kernels {
gpu.func @kernel_add(%arg0 : memref<8xf32>, %arg1 : memref<8xf32>, %arg2 : memref<8xf32>)
- attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
+ kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} {
%0 = "gpu.block_id"() {dimension = "x"} : () -> index
%1 = load %arg0[%0] : memref<8xf32>
%2 = load %arg1[%0] : memref<8xf32>
%cst1 = constant 1 : index
%cst8 = constant 8 : index
- gpu.launch_func"(%cst8, %cst1, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
+ "gpu.launch_func"(%cst8, %cst1, %cst1, %cst1, %cst1, %cst1, %arg0, %arg1, %arg2) { kernel = @kernels::@kernel_add }
: (index, index, index, index, index, index, memref<8xf32>, memref<8xf32>, memref<8xf32>) -> ()
%arg6 = memref_cast %arg5 : memref<?xf32> to memref<*xf32>
call @print_memref_f32(%arg6) : (memref<*xf32>) -> ()
} {
gpu.module @kernels {
gpu.func @kernel_mul(%arg0 : memref<4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<4x4xf32>)
- attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
+ kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} {
%x = "gpu.block_id"() {dimension = "x"} : () -> index
%y = "gpu.block_id"() {dimension = "y"} : () -> index
%1 = load %arg0[%x, %y] : memref<4x4xf32>
} {
gpu.module @kernels {
gpu.func @kernel_sub(%arg0 : memref<8x4x4xf32>, %arg1 : memref<4x4xf32>, %arg2 : memref<8x4x4xf32>)
- attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32>} } kernel {
+ kernel attributes { spv.entry_point_abi = {local_size = dense<[1, 1, 1]>: vector<3xi32> }} {
%x = "gpu.block_id"() {dimension = "x"} : () -> index
%y = "gpu.block_id"() {dimension = "y"} : () -> index
%z = "gpu.block_id"() {dimension = "z"} : () -> index
} {
gpu.module @kernels {
gpu.func @kernel_add(%arg0 : memref<16384xf32>, %arg1 : memref<16384xf32>, %arg2 : memref<16384xf32>)
- attributes { spv.entry_point_abi = {local_size = dense<[128, 1, 1]>: vector<3xi32>} } kernel {
+ kernel attributes { spv.entry_point_abi = {local_size = dense<[128, 1, 1]>: vector<3xi32> }} {
%bid = "gpu.block_id"() {dimension = "x"} : () -> index
%tid = "gpu.thread_id"() {dimension = "x"} : () -> index
%cst = constant 128 : index