From e8c9e65185de3e821e1e482e77906d1d51efa3ec Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 2 Nov 2017 14:52:49 -0700 Subject: [PATCH] intel/fs: Use a pure vertical stride for large register strides MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Register strides higher than 4 are uncommon but they can happen. For instance, if you have a 64-bit extract_u8 operation, we turn that into UB -> UQ MOV with a source stride of 8. Our previous calculation would try to generate a stride of <32;8,8>:ub which is invalid because the maximum horizontal stride is 4. To solve this problem, we instead use a stride of <8;1,0>. As noted in the comment, this does not work as a destination but that's ok as very few things actually generate that stride. Reviewed-by: Samuel Iglesias Gonsálvez Cc: mesa-stable@lists.freedesktop.org --- src/intel/compiler/brw_fs_generator.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 0558c82dd32..f9ea4eaedeb 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -90,9 +90,19 @@ brw_reg_from_fs_reg(const struct gen_device_info *devinfo, fs_inst *inst, * different execution size when the number of components * written to each destination GRF is not the same. */ - const unsigned width = MIN2(reg_width, phys_width); - brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); - brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); + if (reg->stride > 4) { + /* For registers with an exceptionally large stride, we use a + * width of 1 and only use the vertical stride. This only works + * for sources since destinations require hstride == 1. + */ + assert(reg != &inst->dst); + brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->nr, 0); + brw_reg = stride(brw_reg, reg->stride, 1, 0); + } else { + const unsigned width = MIN2(reg_width, phys_width); + brw_reg = brw_vecn_reg(width, brw_file_from_reg(reg), reg->nr, 0); + brw_reg = stride(brw_reg, width * reg->stride, width, reg->stride); + } if (devinfo->gen == 7 && !devinfo->is_haswell) { /* From the IvyBridge PRM (EU Changes by Processor Generation, page 13): -- 2.11.0