2 * Copyright 2014 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include <linux/firmware.h>
26 #include "amdgpu_gfx.h"
28 #include "vi_structs.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
52 #include "smu/smu_7_1_3_d.h"
54 #define GFX8_NUM_GFX_RINGS 1
55 #define GFX8_NUM_COMPUTE_RINGS 8
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
62 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
80 #define SET_BPM_SERDES_CMD 1
81 #define CLE_BPM_SERDES_CMD 0
83 /* BPM Register Address*/
85 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
86 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
87 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
88 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
89 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
93 #define RLC_FormatDirectRegListLength 14
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
151 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
169 static const u32 golden_settings_tonga_a11[] =
171 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174 mmGB_GPU_ID, 0x0000000f, 0x00000000,
175 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
189 static const u32 tonga_golden_common_all[] =
191 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
201 static const u32 tonga_mgcg_cgcg_init[] =
203 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
280 static const u32 golden_settings_polaris11_a11[] =
282 mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292 mmSQ_CONFIG, 0x07f80000, 0x01180000,
293 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
301 static const u32 polaris11_golden_common_all[] =
303 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
311 static const u32 golden_settings_polaris10_a11[] =
313 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314 mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315 mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324 mmSQ_CONFIG, 0x07f80000, 0x07180000,
325 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
332 static const u32 polaris10_golden_common_all[] =
334 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 static const u32 fiji_golden_common_all[] =
346 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
358 static const u32 golden_settings_fiji_a10[] =
360 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
373 static const u32 fiji_mgcg_cgcg_init[] =
375 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
412 static const u32 golden_settings_iceland_a11[] =
414 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417 mmGB_GPU_ID, 0x0000000f, 0x00000000,
418 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
432 static const u32 iceland_golden_common_all[] =
434 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
444 static const u32 iceland_mgcg_cgcg_init[] =
446 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
512 static const u32 cz_golden_settings_a11[] =
514 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516 mmGB_GPU_ID, 0x0000000f, 0x00000000,
517 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
528 static const u32 cz_golden_common_all[] =
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
540 static const u32 cz_mgcg_cgcg_init[] =
542 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
619 static const u32 stoney_golden_settings_a11[] =
621 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622 mmGB_GPU_ID, 0x0000000f, 0x00000000,
623 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
633 static const u32 stoney_golden_common_all[] =
635 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
645 static const u32 stoney_mgcg_cgcg_init[] =
647 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
661 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
663 switch (adev->asic_type) {
665 amdgpu_program_register_sequence(adev,
666 iceland_mgcg_cgcg_init,
667 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
668 amdgpu_program_register_sequence(adev,
669 golden_settings_iceland_a11,
670 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
671 amdgpu_program_register_sequence(adev,
672 iceland_golden_common_all,
673 (const u32)ARRAY_SIZE(iceland_golden_common_all));
676 amdgpu_program_register_sequence(adev,
678 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
679 amdgpu_program_register_sequence(adev,
680 golden_settings_fiji_a10,
681 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
682 amdgpu_program_register_sequence(adev,
683 fiji_golden_common_all,
684 (const u32)ARRAY_SIZE(fiji_golden_common_all));
688 amdgpu_program_register_sequence(adev,
689 tonga_mgcg_cgcg_init,
690 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
691 amdgpu_program_register_sequence(adev,
692 golden_settings_tonga_a11,
693 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
694 amdgpu_program_register_sequence(adev,
695 tonga_golden_common_all,
696 (const u32)ARRAY_SIZE(tonga_golden_common_all));
700 amdgpu_program_register_sequence(adev,
701 golden_settings_polaris11_a11,
702 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
703 amdgpu_program_register_sequence(adev,
704 polaris11_golden_common_all,
705 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
708 amdgpu_program_register_sequence(adev,
709 golden_settings_polaris10_a11,
710 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
711 amdgpu_program_register_sequence(adev,
712 polaris10_golden_common_all,
713 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
714 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
715 if (adev->pdev->revision == 0xc7 &&
716 ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
717 (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
718 (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
719 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
720 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
724 amdgpu_program_register_sequence(adev,
726 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
727 amdgpu_program_register_sequence(adev,
728 cz_golden_settings_a11,
729 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
730 amdgpu_program_register_sequence(adev,
731 cz_golden_common_all,
732 (const u32)ARRAY_SIZE(cz_golden_common_all));
735 amdgpu_program_register_sequence(adev,
736 stoney_mgcg_cgcg_init,
737 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
738 amdgpu_program_register_sequence(adev,
739 stoney_golden_settings_a11,
740 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
741 amdgpu_program_register_sequence(adev,
742 stoney_golden_common_all,
743 (const u32)ARRAY_SIZE(stoney_golden_common_all));
750 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
754 adev->gfx.scratch.num_reg = 7;
755 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
757 adev->gfx.scratch.free[i] = true;
758 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
764 struct amdgpu_device *adev = ring->adev;
770 r = amdgpu_gfx_scratch_get(adev, &scratch);
772 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
775 WREG32(scratch, 0xCAFEDEAD);
776 r = amdgpu_ring_alloc(ring, 3);
778 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
780 amdgpu_gfx_scratch_free(adev, scratch);
783 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
784 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
785 amdgpu_ring_write(ring, 0xDEADBEEF);
786 amdgpu_ring_commit(ring);
788 for (i = 0; i < adev->usec_timeout; i++) {
789 tmp = RREG32(scratch);
790 if (tmp == 0xDEADBEEF)
794 if (i < adev->usec_timeout) {
795 DRM_INFO("ring test on %d succeeded in %d usecs\n",
798 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
799 ring->idx, scratch, tmp);
802 amdgpu_gfx_scratch_free(adev, scratch);
806 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
808 struct amdgpu_device *adev = ring->adev;
810 struct dma_fence *f = NULL;
815 r = amdgpu_gfx_scratch_get(adev, &scratch);
817 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
820 WREG32(scratch, 0xCAFEDEAD);
821 memset(&ib, 0, sizeof(ib));
822 r = amdgpu_ib_get(adev, NULL, 256, &ib);
824 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
827 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
828 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
829 ib.ptr[2] = 0xDEADBEEF;
832 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
836 r = dma_fence_wait_timeout(f, false, timeout);
838 DRM_ERROR("amdgpu: IB test timed out.\n");
842 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
845 tmp = RREG32(scratch);
846 if (tmp == 0xDEADBEEF) {
847 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
850 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
855 amdgpu_ib_free(adev, &ib, NULL);
858 amdgpu_gfx_scratch_free(adev, scratch);
863 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
864 release_firmware(adev->gfx.pfp_fw);
865 adev->gfx.pfp_fw = NULL;
866 release_firmware(adev->gfx.me_fw);
867 adev->gfx.me_fw = NULL;
868 release_firmware(adev->gfx.ce_fw);
869 adev->gfx.ce_fw = NULL;
870 release_firmware(adev->gfx.rlc_fw);
871 adev->gfx.rlc_fw = NULL;
872 release_firmware(adev->gfx.mec_fw);
873 adev->gfx.mec_fw = NULL;
874 if ((adev->asic_type != CHIP_STONEY) &&
875 (adev->asic_type != CHIP_TOPAZ))
876 release_firmware(adev->gfx.mec2_fw);
877 adev->gfx.mec2_fw = NULL;
879 kfree(adev->gfx.rlc.register_list_format);
882 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
884 const char *chip_name;
887 struct amdgpu_firmware_info *info = NULL;
888 const struct common_firmware_header *header = NULL;
889 const struct gfx_firmware_header_v1_0 *cp_hdr;
890 const struct rlc_firmware_header_v2_0 *rlc_hdr;
891 unsigned int *tmp = NULL, i;
895 switch (adev->asic_type) {
903 chip_name = "carrizo";
909 chip_name = "polaris11";
912 chip_name = "polaris10";
915 chip_name = "polaris12";
918 chip_name = "stoney";
924 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
925 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
928 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
931 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
932 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
935 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
936 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
939 err = amdgpu_ucode_validate(adev->gfx.me_fw);
942 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
943 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
946 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
947 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
950 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
953 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
957 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
958 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
961 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
962 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
963 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
964 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
966 adev->gfx.rlc.save_and_restore_offset =
967 le32_to_cpu(rlc_hdr->save_and_restore_offset);
968 adev->gfx.rlc.clear_state_descriptor_offset =
969 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
970 adev->gfx.rlc.avail_scratch_ram_locations =
971 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
972 adev->gfx.rlc.reg_restore_list_size =
973 le32_to_cpu(rlc_hdr->reg_restore_list_size);
974 adev->gfx.rlc.reg_list_format_start =
975 le32_to_cpu(rlc_hdr->reg_list_format_start);
976 adev->gfx.rlc.reg_list_format_separate_start =
977 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
978 adev->gfx.rlc.starting_offsets_start =
979 le32_to_cpu(rlc_hdr->starting_offsets_start);
980 adev->gfx.rlc.reg_list_format_size_bytes =
981 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
982 adev->gfx.rlc.reg_list_size_bytes =
983 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
985 adev->gfx.rlc.register_list_format =
986 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
987 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
989 if (!adev->gfx.rlc.register_list_format) {
994 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
995 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
996 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
997 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
999 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1001 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1006 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1010 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1013 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1017 if ((adev->asic_type != CHIP_STONEY) &&
1018 (adev->asic_type != CHIP_TOPAZ)) {
1019 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1022 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1025 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026 adev->gfx.mec2_fw->data;
1027 adev->gfx.mec2_fw_version =
1028 le32_to_cpu(cp_hdr->header.ucode_version);
1029 adev->gfx.mec2_feature_version =
1030 le32_to_cpu(cp_hdr->ucode_feature_version);
1033 adev->gfx.mec2_fw = NULL;
1037 if (adev->firmware.smu_load) {
1038 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040 info->fw = adev->gfx.pfp_fw;
1041 header = (const struct common_firmware_header *)info->fw->data;
1042 adev->firmware.fw_size +=
1043 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1045 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047 info->fw = adev->gfx.me_fw;
1048 header = (const struct common_firmware_header *)info->fw->data;
1049 adev->firmware.fw_size +=
1050 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1052 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054 info->fw = adev->gfx.ce_fw;
1055 header = (const struct common_firmware_header *)info->fw->data;
1056 adev->firmware.fw_size +=
1057 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1059 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061 info->fw = adev->gfx.rlc_fw;
1062 header = (const struct common_firmware_header *)info->fw->data;
1063 adev->firmware.fw_size +=
1064 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1066 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068 info->fw = adev->gfx.mec_fw;
1069 header = (const struct common_firmware_header *)info->fw->data;
1070 adev->firmware.fw_size +=
1071 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1073 /* we need account JT in */
1074 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075 adev->firmware.fw_size +=
1076 ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1078 if (amdgpu_sriov_vf(adev)) {
1079 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080 info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081 info->fw = adev->gfx.mec_fw;
1082 adev->firmware.fw_size +=
1083 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1086 if (adev->gfx.mec2_fw) {
1087 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089 info->fw = adev->gfx.mec2_fw;
1090 header = (const struct common_firmware_header *)info->fw->data;
1091 adev->firmware.fw_size +=
1092 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1100 "gfx8: Failed to load firmware \"%s\"\n",
1102 release_firmware(adev->gfx.pfp_fw);
1103 adev->gfx.pfp_fw = NULL;
1104 release_firmware(adev->gfx.me_fw);
1105 adev->gfx.me_fw = NULL;
1106 release_firmware(adev->gfx.ce_fw);
1107 adev->gfx.ce_fw = NULL;
1108 release_firmware(adev->gfx.rlc_fw);
1109 adev->gfx.rlc_fw = NULL;
1110 release_firmware(adev->gfx.mec_fw);
1111 adev->gfx.mec_fw = NULL;
1112 release_firmware(adev->gfx.mec2_fw);
1113 adev->gfx.mec2_fw = NULL;
1118 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119 volatile u32 *buffer)
1122 const struct cs_section_def *sect = NULL;
1123 const struct cs_extent_def *ext = NULL;
1125 if (adev->gfx.rlc.cs_data == NULL)
1130 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1133 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134 buffer[count++] = cpu_to_le32(0x80000000);
1135 buffer[count++] = cpu_to_le32(0x80000000);
1137 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138 for (ext = sect->section; ext->extent != NULL; ++ext) {
1139 if (sect->id == SECT_CONTEXT) {
1141 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142 buffer[count++] = cpu_to_le32(ext->reg_index -
1143 PACKET3_SET_CONTEXT_REG_START);
1144 for (i = 0; i < ext->reg_count; i++)
1145 buffer[count++] = cpu_to_le32(ext->extent[i]);
1152 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154 PACKET3_SET_CONTEXT_REG_START);
1155 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156 buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1158 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1161 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162 buffer[count++] = cpu_to_le32(0);
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1167 const __le32 *fw_data;
1168 volatile u32 *dst_ptr;
1169 int me, i, max_me = 4;
1171 u32 table_offset, table_size;
1173 if (adev->asic_type == CHIP_CARRIZO)
1176 /* write the cp table buffer */
1177 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178 for (me = 0; me < max_me; me++) {
1180 const struct gfx_firmware_header_v1_0 *hdr =
1181 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182 fw_data = (const __le32 *)
1183 (adev->gfx.ce_fw->data +
1184 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185 table_offset = le32_to_cpu(hdr->jt_offset);
1186 table_size = le32_to_cpu(hdr->jt_size);
1187 } else if (me == 1) {
1188 const struct gfx_firmware_header_v1_0 *hdr =
1189 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190 fw_data = (const __le32 *)
1191 (adev->gfx.pfp_fw->data +
1192 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193 table_offset = le32_to_cpu(hdr->jt_offset);
1194 table_size = le32_to_cpu(hdr->jt_size);
1195 } else if (me == 2) {
1196 const struct gfx_firmware_header_v1_0 *hdr =
1197 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198 fw_data = (const __le32 *)
1199 (adev->gfx.me_fw->data +
1200 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201 table_offset = le32_to_cpu(hdr->jt_offset);
1202 table_size = le32_to_cpu(hdr->jt_size);
1203 } else if (me == 3) {
1204 const struct gfx_firmware_header_v1_0 *hdr =
1205 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206 fw_data = (const __le32 *)
1207 (adev->gfx.mec_fw->data +
1208 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209 table_offset = le32_to_cpu(hdr->jt_offset);
1210 table_size = le32_to_cpu(hdr->jt_size);
1211 } else if (me == 4) {
1212 const struct gfx_firmware_header_v1_0 *hdr =
1213 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214 fw_data = (const __le32 *)
1215 (adev->gfx.mec2_fw->data +
1216 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217 table_offset = le32_to_cpu(hdr->jt_offset);
1218 table_size = le32_to_cpu(hdr->jt_size);
1221 for (i = 0; i < table_size; i ++) {
1222 dst_ptr[bo_offset + i] =
1223 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1226 bo_offset += table_size;
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1234 /* clear state block */
1235 if (adev->gfx.rlc.clear_state_obj) {
1236 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237 if (unlikely(r != 0))
1238 dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242 adev->gfx.rlc.clear_state_obj = NULL;
1245 /* jump table block */
1246 if (adev->gfx.rlc.cp_table_obj) {
1247 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248 if (unlikely(r != 0))
1249 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253 adev->gfx.rlc.cp_table_obj = NULL;
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1259 volatile u32 *dst_ptr;
1261 const struct cs_section_def *cs_data;
1264 adev->gfx.rlc.cs_data = vi_cs_data;
1266 cs_data = adev->gfx.rlc.cs_data;
1269 /* clear state block */
1270 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1272 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274 AMDGPU_GEM_DOMAIN_VRAM,
1275 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1278 &adev->gfx.rlc.clear_state_obj);
1280 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281 gfx_v8_0_rlc_fini(adev);
1285 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286 if (unlikely(r != 0)) {
1287 gfx_v8_0_rlc_fini(adev);
1290 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291 &adev->gfx.rlc.clear_state_gpu_addr);
1293 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294 dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295 gfx_v8_0_rlc_fini(adev);
1299 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1301 dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302 gfx_v8_0_rlc_fini(adev);
1305 /* set up the cs buffer */
1306 dst_ptr = adev->gfx.rlc.cs_ptr;
1307 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1312 if ((adev->asic_type == CHIP_CARRIZO) ||
1313 (adev->asic_type == CHIP_STONEY)) {
1314 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317 AMDGPU_GEM_DOMAIN_VRAM,
1318 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1321 &adev->gfx.rlc.cp_table_obj);
1323 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1328 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329 if (unlikely(r != 0)) {
1330 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1333 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334 &adev->gfx.rlc.cp_table_gpu_addr);
1336 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337 dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1340 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1342 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1346 cz_init_cp_jump_table(adev);
1348 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1355 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1359 if (adev->gfx.mec.hpd_eop_obj) {
1360 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361 if (unlikely(r != 0))
1362 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1365 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366 adev->gfx.mec.hpd_eop_obj = NULL;
1370 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1371 struct amdgpu_ring *ring,
1372 struct amdgpu_irq_src *irq)
1377 ring->ring_obj = NULL;
1378 ring->use_doorbell = true;
1379 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1380 if (adev->gfx.mec2_fw) {
1390 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1391 r = amdgpu_ring_init(adev, ring, 1024,
1392 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1394 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1399 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1400 struct amdgpu_irq_src *irq)
1402 amdgpu_ring_fini(ring);
1406 #define MEC_HPD_SIZE 2048
1408 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1414 * we assign only 1 pipe because all other pipes will
1417 adev->gfx.mec.num_mec = 1;
1418 adev->gfx.mec.num_pipe = 1;
1419 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1421 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1422 r = amdgpu_bo_create(adev,
1423 adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1425 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1426 &adev->gfx.mec.hpd_eop_obj);
1428 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1433 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1434 if (unlikely(r != 0)) {
1435 gfx_v8_0_mec_fini(adev);
1438 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1439 &adev->gfx.mec.hpd_eop_gpu_addr);
1441 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1442 gfx_v8_0_mec_fini(adev);
1445 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1447 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1448 gfx_v8_0_mec_fini(adev);
1452 memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1454 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1455 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1460 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1462 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1464 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1465 kiq->eop_obj = NULL;
1468 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1474 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1475 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1476 &kiq->eop_gpu_addr, (void **)&hpd);
1478 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1482 memset(hpd, 0, MEC_HPD_SIZE);
1484 amdgpu_bo_kunmap(kiq->eop_obj);
1489 static const u32 vgpr_init_compute_shader[] =
1491 0x7e000209, 0x7e020208,
1492 0x7e040207, 0x7e060206,
1493 0x7e080205, 0x7e0a0204,
1494 0x7e0c0203, 0x7e0e0202,
1495 0x7e100201, 0x7e120200,
1496 0x7e140209, 0x7e160208,
1497 0x7e180207, 0x7e1a0206,
1498 0x7e1c0205, 0x7e1e0204,
1499 0x7e200203, 0x7e220202,
1500 0x7e240201, 0x7e260200,
1501 0x7e280209, 0x7e2a0208,
1502 0x7e2c0207, 0x7e2e0206,
1503 0x7e300205, 0x7e320204,
1504 0x7e340203, 0x7e360202,
1505 0x7e380201, 0x7e3a0200,
1506 0x7e3c0209, 0x7e3e0208,
1507 0x7e400207, 0x7e420206,
1508 0x7e440205, 0x7e460204,
1509 0x7e480203, 0x7e4a0202,
1510 0x7e4c0201, 0x7e4e0200,
1511 0x7e500209, 0x7e520208,
1512 0x7e540207, 0x7e560206,
1513 0x7e580205, 0x7e5a0204,
1514 0x7e5c0203, 0x7e5e0202,
1515 0x7e600201, 0x7e620200,
1516 0x7e640209, 0x7e660208,
1517 0x7e680207, 0x7e6a0206,
1518 0x7e6c0205, 0x7e6e0204,
1519 0x7e700203, 0x7e720202,
1520 0x7e740201, 0x7e760200,
1521 0x7e780209, 0x7e7a0208,
1522 0x7e7c0207, 0x7e7e0206,
1523 0xbf8a0000, 0xbf810000,
1526 static const u32 sgpr_init_compute_shader[] =
1528 0xbe8a0100, 0xbe8c0102,
1529 0xbe8e0104, 0xbe900106,
1530 0xbe920108, 0xbe940100,
1531 0xbe960102, 0xbe980104,
1532 0xbe9a0106, 0xbe9c0108,
1533 0xbe9e0100, 0xbea00102,
1534 0xbea20104, 0xbea40106,
1535 0xbea60108, 0xbea80100,
1536 0xbeaa0102, 0xbeac0104,
1537 0xbeae0106, 0xbeb00108,
1538 0xbeb20100, 0xbeb40102,
1539 0xbeb60104, 0xbeb80106,
1540 0xbeba0108, 0xbebc0100,
1541 0xbebe0102, 0xbec00104,
1542 0xbec20106, 0xbec40108,
1543 0xbec60100, 0xbec80102,
1544 0xbee60004, 0xbee70005,
1545 0xbeea0006, 0xbeeb0007,
1546 0xbee80008, 0xbee90009,
1547 0xbefc0000, 0xbf8a0000,
1548 0xbf810000, 0x00000000,
1551 static const u32 vgpr_init_regs[] =
1553 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1554 mmCOMPUTE_RESOURCE_LIMITS, 0,
1555 mmCOMPUTE_NUM_THREAD_X, 256*4,
1556 mmCOMPUTE_NUM_THREAD_Y, 1,
1557 mmCOMPUTE_NUM_THREAD_Z, 1,
1558 mmCOMPUTE_PGM_RSRC2, 20,
1559 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1571 static const u32 sgpr1_init_regs[] =
1573 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1574 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1575 mmCOMPUTE_NUM_THREAD_X, 256*5,
1576 mmCOMPUTE_NUM_THREAD_Y, 1,
1577 mmCOMPUTE_NUM_THREAD_Z, 1,
1578 mmCOMPUTE_PGM_RSRC2, 20,
1579 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1591 static const u32 sgpr2_init_regs[] =
1593 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1594 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1595 mmCOMPUTE_NUM_THREAD_X, 256*5,
1596 mmCOMPUTE_NUM_THREAD_Y, 1,
1597 mmCOMPUTE_NUM_THREAD_Z, 1,
1598 mmCOMPUTE_PGM_RSRC2, 20,
1599 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1600 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1601 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1602 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1603 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1604 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1605 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1606 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1607 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1608 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1611 static const u32 sec_ded_counter_registers[] =
1614 mmCPC_EDC_SCRATCH_CNT,
1615 mmCPC_EDC_UCODE_CNT,
1622 mmDC_EDC_CSINVOC_CNT,
1623 mmDC_EDC_RESTORE_CNT,
1629 mmSQC_ATC_EDC_GATCL1_CNT,
1635 mmTCP_ATC_EDC_GATCL1_CNT,
1640 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1642 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1643 struct amdgpu_ib ib;
1644 struct dma_fence *f = NULL;
1647 unsigned total_size, vgpr_offset, sgpr_offset;
1650 /* only supported on CZ */
1651 if (adev->asic_type != CHIP_CARRIZO)
1654 /* bail if the compute ring is not ready */
1658 tmp = RREG32(mmGB_EDC_MODE);
1659 WREG32(mmGB_EDC_MODE, 0);
1662 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1664 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1666 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1667 total_size = ALIGN(total_size, 256);
1668 vgpr_offset = total_size;
1669 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1670 sgpr_offset = total_size;
1671 total_size += sizeof(sgpr_init_compute_shader);
1673 /* allocate an indirect buffer to put the commands in */
1674 memset(&ib, 0, sizeof(ib));
1675 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1677 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1681 /* load the compute shaders */
1682 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1683 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1685 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1686 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1688 /* init the ib length to 0 */
1692 /* write the register state for the compute dispatch */
1693 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1694 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1695 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1696 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1698 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1699 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1700 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1701 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1702 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1703 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1705 /* write dispatch packet */
1706 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1707 ib.ptr[ib.length_dw++] = 8; /* x */
1708 ib.ptr[ib.length_dw++] = 1; /* y */
1709 ib.ptr[ib.length_dw++] = 1; /* z */
1710 ib.ptr[ib.length_dw++] =
1711 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1713 /* write CS partial flush packet */
1714 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1715 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1718 /* write the register state for the compute dispatch */
1719 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1720 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1721 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1722 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1724 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1725 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1726 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1727 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1728 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1729 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1731 /* write dispatch packet */
1732 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1733 ib.ptr[ib.length_dw++] = 8; /* x */
1734 ib.ptr[ib.length_dw++] = 1; /* y */
1735 ib.ptr[ib.length_dw++] = 1; /* z */
1736 ib.ptr[ib.length_dw++] =
1737 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1739 /* write CS partial flush packet */
1740 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1741 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1744 /* write the register state for the compute dispatch */
1745 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1746 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1747 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1748 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1750 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1751 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1752 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1753 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1754 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1755 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1757 /* write dispatch packet */
1758 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1759 ib.ptr[ib.length_dw++] = 8; /* x */
1760 ib.ptr[ib.length_dw++] = 1; /* y */
1761 ib.ptr[ib.length_dw++] = 1; /* z */
1762 ib.ptr[ib.length_dw++] =
1763 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1765 /* write CS partial flush packet */
1766 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1767 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1769 /* shedule the ib on the ring */
1770 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1772 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1776 /* wait for the GPU to finish processing the IB */
1777 r = dma_fence_wait(f, false);
1779 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1783 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1784 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1785 WREG32(mmGB_EDC_MODE, tmp);
1787 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1788 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1789 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1792 /* read back registers to clear the counters */
1793 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1794 RREG32(sec_ded_counter_registers[i]);
1797 amdgpu_ib_free(adev, &ib, NULL);
1803 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1806 u32 mc_shared_chmap, mc_arb_ramcfg;
1807 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1811 switch (adev->asic_type) {
1813 adev->gfx.config.max_shader_engines = 1;
1814 adev->gfx.config.max_tile_pipes = 2;
1815 adev->gfx.config.max_cu_per_sh = 6;
1816 adev->gfx.config.max_sh_per_se = 1;
1817 adev->gfx.config.max_backends_per_se = 2;
1818 adev->gfx.config.max_texture_channel_caches = 2;
1819 adev->gfx.config.max_gprs = 256;
1820 adev->gfx.config.max_gs_threads = 32;
1821 adev->gfx.config.max_hw_contexts = 8;
1823 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1824 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1825 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1826 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1827 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1830 adev->gfx.config.max_shader_engines = 4;
1831 adev->gfx.config.max_tile_pipes = 16;
1832 adev->gfx.config.max_cu_per_sh = 16;
1833 adev->gfx.config.max_sh_per_se = 1;
1834 adev->gfx.config.max_backends_per_se = 4;
1835 adev->gfx.config.max_texture_channel_caches = 16;
1836 adev->gfx.config.max_gprs = 256;
1837 adev->gfx.config.max_gs_threads = 32;
1838 adev->gfx.config.max_hw_contexts = 8;
1840 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1841 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1842 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1843 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1844 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1846 case CHIP_POLARIS11:
1847 case CHIP_POLARIS12:
1848 ret = amdgpu_atombios_get_gfx_info(adev);
1851 adev->gfx.config.max_gprs = 256;
1852 adev->gfx.config.max_gs_threads = 32;
1853 adev->gfx.config.max_hw_contexts = 8;
1855 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1856 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1857 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1858 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1859 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1861 case CHIP_POLARIS10:
1862 ret = amdgpu_atombios_get_gfx_info(adev);
1865 adev->gfx.config.max_gprs = 256;
1866 adev->gfx.config.max_gs_threads = 32;
1867 adev->gfx.config.max_hw_contexts = 8;
1869 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1870 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1871 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1872 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1873 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1876 adev->gfx.config.max_shader_engines = 4;
1877 adev->gfx.config.max_tile_pipes = 8;
1878 adev->gfx.config.max_cu_per_sh = 8;
1879 adev->gfx.config.max_sh_per_se = 1;
1880 adev->gfx.config.max_backends_per_se = 2;
1881 adev->gfx.config.max_texture_channel_caches = 8;
1882 adev->gfx.config.max_gprs = 256;
1883 adev->gfx.config.max_gs_threads = 32;
1884 adev->gfx.config.max_hw_contexts = 8;
1886 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1887 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1888 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1889 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1890 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1893 adev->gfx.config.max_shader_engines = 1;
1894 adev->gfx.config.max_tile_pipes = 2;
1895 adev->gfx.config.max_sh_per_se = 1;
1896 adev->gfx.config.max_backends_per_se = 2;
1898 switch (adev->pdev->revision) {
1906 adev->gfx.config.max_cu_per_sh = 8;
1916 adev->gfx.config.max_cu_per_sh = 6;
1923 adev->gfx.config.max_cu_per_sh = 6;
1932 adev->gfx.config.max_cu_per_sh = 4;
1936 adev->gfx.config.max_texture_channel_caches = 2;
1937 adev->gfx.config.max_gprs = 256;
1938 adev->gfx.config.max_gs_threads = 32;
1939 adev->gfx.config.max_hw_contexts = 8;
1941 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1942 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1943 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1944 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1945 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1948 adev->gfx.config.max_shader_engines = 1;
1949 adev->gfx.config.max_tile_pipes = 2;
1950 adev->gfx.config.max_sh_per_se = 1;
1951 adev->gfx.config.max_backends_per_se = 1;
1953 switch (adev->pdev->revision) {
1960 adev->gfx.config.max_cu_per_sh = 3;
1966 adev->gfx.config.max_cu_per_sh = 2;
1970 adev->gfx.config.max_texture_channel_caches = 2;
1971 adev->gfx.config.max_gprs = 256;
1972 adev->gfx.config.max_gs_threads = 16;
1973 adev->gfx.config.max_hw_contexts = 8;
1975 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1976 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1977 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1978 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1979 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1982 adev->gfx.config.max_shader_engines = 2;
1983 adev->gfx.config.max_tile_pipes = 4;
1984 adev->gfx.config.max_cu_per_sh = 2;
1985 adev->gfx.config.max_sh_per_se = 1;
1986 adev->gfx.config.max_backends_per_se = 2;
1987 adev->gfx.config.max_texture_channel_caches = 4;
1988 adev->gfx.config.max_gprs = 256;
1989 adev->gfx.config.max_gs_threads = 32;
1990 adev->gfx.config.max_hw_contexts = 8;
1992 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1993 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1994 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1995 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1996 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
2000 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2001 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2002 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2004 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2005 adev->gfx.config.mem_max_burst_length_bytes = 256;
2006 if (adev->flags & AMD_IS_APU) {
2007 /* Get memory bank mapping mode. */
2008 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2009 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2010 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2012 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2013 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2014 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2016 /* Validate settings in case only one DIMM installed. */
2017 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2018 dimm00_addr_map = 0;
2019 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2020 dimm01_addr_map = 0;
2021 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2022 dimm10_addr_map = 0;
2023 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2024 dimm11_addr_map = 0;
2026 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2027 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2028 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2029 adev->gfx.config.mem_row_size_in_kb = 2;
2031 adev->gfx.config.mem_row_size_in_kb = 1;
2033 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2034 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2035 if (adev->gfx.config.mem_row_size_in_kb > 4)
2036 adev->gfx.config.mem_row_size_in_kb = 4;
2039 adev->gfx.config.shader_engine_tile_size = 32;
2040 adev->gfx.config.num_gpus = 1;
2041 adev->gfx.config.multi_gpu_tile_size = 64;
2043 /* fix up row size */
2044 switch (adev->gfx.config.mem_row_size_in_kb) {
2047 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2050 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2053 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2056 adev->gfx.config.gb_addr_config = gb_addr_config;
2061 static int gfx_v8_0_sw_init(void *handle)
2064 struct amdgpu_ring *ring;
2065 struct amdgpu_kiq *kiq;
2066 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2069 r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2074 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2078 /* Privileged reg */
2079 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2083 /* Privileged inst */
2084 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2088 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2090 gfx_v8_0_scratch_init(adev);
2092 r = gfx_v8_0_init_microcode(adev);
2094 DRM_ERROR("Failed to load gfx firmware!\n");
2098 r = gfx_v8_0_rlc_init(adev);
2100 DRM_ERROR("Failed to init rlc BOs!\n");
2104 r = gfx_v8_0_mec_init(adev);
2106 DRM_ERROR("Failed to init MEC BOs!\n");
2110 r = gfx_v8_0_kiq_init(adev);
2112 DRM_ERROR("Failed to init KIQ BOs!\n");
2116 kiq = &adev->gfx.kiq;
2117 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2121 /* set up the gfx ring */
2122 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2123 ring = &adev->gfx.gfx_ring[i];
2124 ring->ring_obj = NULL;
2125 sprintf(ring->name, "gfx");
2126 /* no gfx doorbells on iceland */
2127 if (adev->asic_type != CHIP_TOPAZ) {
2128 ring->use_doorbell = true;
2129 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2132 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2133 AMDGPU_CP_IRQ_GFX_EOP);
2138 /* set up the compute queues */
2139 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2142 /* max 32 queues per MEC */
2143 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2144 DRM_ERROR("Too many (%d) compute rings!\n", i);
2147 ring = &adev->gfx.compute_ring[i];
2148 ring->ring_obj = NULL;
2149 ring->use_doorbell = true;
2150 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2151 ring->me = 1; /* first MEC */
2153 ring->queue = i % 8;
2154 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2155 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2156 /* type-2 packets are deprecated on MEC, use type-3 instead */
2157 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2163 /* reserve GDS, GWS and OA resource for gfx */
2164 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2165 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2166 &adev->gds.gds_gfx_bo, NULL, NULL);
2170 r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2171 PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2172 &adev->gds.gws_gfx_bo, NULL, NULL);
2176 r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2177 PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2178 &adev->gds.oa_gfx_bo, NULL, NULL);
2182 adev->gfx.ce_ram_size = 0x8000;
2184 r = gfx_v8_0_gpu_early_init(adev);
2191 static int gfx_v8_0_sw_fini(void *handle)
2194 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2196 amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2197 amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2198 amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2200 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2201 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2202 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2203 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2204 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2206 gfx_v8_0_kiq_fini(adev);
2207 gfx_v8_0_mec_fini(adev);
2208 gfx_v8_0_rlc_fini(adev);
2209 gfx_v8_0_free_microcode(adev);
2214 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2216 uint32_t *modearray, *mod2array;
2217 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2218 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2221 modearray = adev->gfx.config.tile_mode_array;
2222 mod2array = adev->gfx.config.macrotile_mode_array;
2224 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2225 modearray[reg_offset] = 0;
2227 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2228 mod2array[reg_offset] = 0;
2230 switch (adev->asic_type) {
2232 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2253 PIPE_CONFIG(ADDR_SURF_P2) |
2254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2257 PIPE_CONFIG(ADDR_SURF_P2) |
2258 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2259 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2261 PIPE_CONFIG(ADDR_SURF_P2));
2262 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P2) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2290 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P2) |
2328 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2330 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P2) |
2332 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2335 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 NUM_BANKS(ADDR_SURF_8_BANK));
2339 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 NUM_BANKS(ADDR_SURF_8_BANK));
2359 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362 NUM_BANKS(ADDR_SURF_8_BANK));
2363 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 NUM_BANKS(ADDR_SURF_16_BANK));
2367 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_16_BANK));
2371 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_16_BANK));
2375 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382 NUM_BANKS(ADDR_SURF_16_BANK));
2383 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386 NUM_BANKS(ADDR_SURF_16_BANK));
2387 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390 NUM_BANKS(ADDR_SURF_8_BANK));
2392 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2393 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2395 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2397 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2398 if (reg_offset != 7)
2399 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2403 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2433 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2436 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2437 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2438 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2442 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2448 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2451 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2452 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2453 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2454 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2462 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2468 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2471 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2474 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2478 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2480 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2482 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2486 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2490 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2492 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2493 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2494 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2496 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2498 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2502 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2506 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2510 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2512 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2515 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2516 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2519 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2520 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2523 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2524 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2526 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 NUM_BANKS(ADDR_SURF_8_BANK));
2530 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533 NUM_BANKS(ADDR_SURF_8_BANK));
2534 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537 NUM_BANKS(ADDR_SURF_8_BANK));
2538 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541 NUM_BANKS(ADDR_SURF_8_BANK));
2542 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545 NUM_BANKS(ADDR_SURF_8_BANK));
2546 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549 NUM_BANKS(ADDR_SURF_8_BANK));
2550 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553 NUM_BANKS(ADDR_SURF_8_BANK));
2554 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2556 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557 NUM_BANKS(ADDR_SURF_8_BANK));
2558 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561 NUM_BANKS(ADDR_SURF_8_BANK));
2562 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565 NUM_BANKS(ADDR_SURF_8_BANK));
2566 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569 NUM_BANKS(ADDR_SURF_8_BANK));
2570 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2573 NUM_BANKS(ADDR_SURF_8_BANK));
2574 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2577 NUM_BANKS(ADDR_SURF_8_BANK));
2578 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2580 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2581 NUM_BANKS(ADDR_SURF_4_BANK));
2583 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2584 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2586 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2587 if (reg_offset != 7)
2588 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2592 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2600 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2604 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2608 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2612 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2625 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2626 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2627 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2631 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2639 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2641 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2642 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2643 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2645 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2651 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2657 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2661 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2663 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2666 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2667 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2669 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2670 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2671 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2673 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2674 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2675 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2677 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2678 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2679 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2680 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2681 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2682 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2683 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2685 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2687 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2691 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2695 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2699 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2704 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2705 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2706 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2709 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2710 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2715 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718 NUM_BANKS(ADDR_SURF_16_BANK));
2719 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2722 NUM_BANKS(ADDR_SURF_16_BANK));
2723 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726 NUM_BANKS(ADDR_SURF_16_BANK));
2727 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730 NUM_BANKS(ADDR_SURF_16_BANK));
2731 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734 NUM_BANKS(ADDR_SURF_16_BANK));
2735 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2738 NUM_BANKS(ADDR_SURF_16_BANK));
2739 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742 NUM_BANKS(ADDR_SURF_16_BANK));
2743 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746 NUM_BANKS(ADDR_SURF_16_BANK));
2747 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750 NUM_BANKS(ADDR_SURF_16_BANK));
2751 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754 NUM_BANKS(ADDR_SURF_16_BANK));
2755 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758 NUM_BANKS(ADDR_SURF_16_BANK));
2759 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762 NUM_BANKS(ADDR_SURF_8_BANK));
2763 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2766 NUM_BANKS(ADDR_SURF_4_BANK));
2767 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770 NUM_BANKS(ADDR_SURF_4_BANK));
2772 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2773 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2775 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2776 if (reg_offset != 7)
2777 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2780 case CHIP_POLARIS11:
2781 case CHIP_POLARIS12:
2782 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2785 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2786 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2789 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2790 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2797 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2798 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2801 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2802 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2805 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2809 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2813 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2816 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2826 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2828 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2829 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2832 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2833 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2839 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2841 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2847 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2851 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2853 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2856 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2857 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2860 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2861 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2863 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2864 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2865 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2867 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2868 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2869 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2871 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2872 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2873 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2875 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2877 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2879 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2881 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2885 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2905 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2907 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2908 NUM_BANKS(ADDR_SURF_16_BANK));
2910 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2912 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913 NUM_BANKS(ADDR_SURF_16_BANK));
2915 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2917 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918 NUM_BANKS(ADDR_SURF_16_BANK));
2920 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 NUM_BANKS(ADDR_SURF_16_BANK));
2925 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2928 NUM_BANKS(ADDR_SURF_16_BANK));
2930 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2932 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2933 NUM_BANKS(ADDR_SURF_16_BANK));
2935 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938 NUM_BANKS(ADDR_SURF_16_BANK));
2940 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2941 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2942 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943 NUM_BANKS(ADDR_SURF_16_BANK));
2945 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2946 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2947 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2948 NUM_BANKS(ADDR_SURF_16_BANK));
2950 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2952 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 NUM_BANKS(ADDR_SURF_16_BANK));
2955 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958 NUM_BANKS(ADDR_SURF_16_BANK));
2960 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2962 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2963 NUM_BANKS(ADDR_SURF_16_BANK));
2965 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2967 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968 NUM_BANKS(ADDR_SURF_8_BANK));
2970 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2973 NUM_BANKS(ADDR_SURF_4_BANK));
2975 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2976 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2978 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2979 if (reg_offset != 7)
2980 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2983 case CHIP_POLARIS10:
2984 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2987 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2988 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2991 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2992 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2995 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2996 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2999 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3000 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3003 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3004 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3007 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3011 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3014 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3015 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3017 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3018 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3019 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3020 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3022 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3023 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3024 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3025 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3026 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3027 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3028 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3030 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3031 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3032 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3034 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3035 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3036 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3037 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3039 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3043 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3049 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3052 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3055 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3057 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3058 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3059 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3061 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3062 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3063 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3066 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3067 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3069 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3070 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3071 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3072 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3073 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3074 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3075 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3076 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3079 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3083 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3087 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3091 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3093 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3095 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3096 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3097 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3098 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3100 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3101 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3103 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3104 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3105 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3107 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3109 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3110 NUM_BANKS(ADDR_SURF_16_BANK));
3112 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3114 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3115 NUM_BANKS(ADDR_SURF_16_BANK));
3117 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120 NUM_BANKS(ADDR_SURF_16_BANK));
3122 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125 NUM_BANKS(ADDR_SURF_16_BANK));
3127 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130 NUM_BANKS(ADDR_SURF_16_BANK));
3132 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3135 NUM_BANKS(ADDR_SURF_16_BANK));
3137 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140 NUM_BANKS(ADDR_SURF_16_BANK));
3142 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3144 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145 NUM_BANKS(ADDR_SURF_16_BANK));
3147 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3149 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3150 NUM_BANKS(ADDR_SURF_16_BANK));
3152 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3154 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3155 NUM_BANKS(ADDR_SURF_16_BANK));
3157 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3160 NUM_BANKS(ADDR_SURF_16_BANK));
3162 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3165 NUM_BANKS(ADDR_SURF_8_BANK));
3167 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3170 NUM_BANKS(ADDR_SURF_4_BANK));
3172 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3175 NUM_BANKS(ADDR_SURF_4_BANK));
3177 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3178 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3180 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3181 if (reg_offset != 7)
3182 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3186 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3189 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3191 PIPE_CONFIG(ADDR_SURF_P2) |
3192 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3193 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3194 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 PIPE_CONFIG(ADDR_SURF_P2) |
3196 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3197 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3198 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3199 PIPE_CONFIG(ADDR_SURF_P2) |
3200 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3201 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3202 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3203 PIPE_CONFIG(ADDR_SURF_P2) |
3204 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3205 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3206 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3207 PIPE_CONFIG(ADDR_SURF_P2) |
3208 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3209 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3211 PIPE_CONFIG(ADDR_SURF_P2) |
3212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3215 PIPE_CONFIG(ADDR_SURF_P2));
3216 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3217 PIPE_CONFIG(ADDR_SURF_P2) |
3218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3219 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3220 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221 PIPE_CONFIG(ADDR_SURF_P2) |
3222 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3223 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3224 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225 PIPE_CONFIG(ADDR_SURF_P2) |
3226 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3227 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3228 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3229 PIPE_CONFIG(ADDR_SURF_P2) |
3230 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233 PIPE_CONFIG(ADDR_SURF_P2) |
3234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3235 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3237 PIPE_CONFIG(ADDR_SURF_P2) |
3238 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3241 PIPE_CONFIG(ADDR_SURF_P2) |
3242 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3244 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3245 PIPE_CONFIG(ADDR_SURF_P2) |
3246 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3247 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3249 PIPE_CONFIG(ADDR_SURF_P2) |
3250 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3253 PIPE_CONFIG(ADDR_SURF_P2) |
3254 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3255 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3256 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3257 PIPE_CONFIG(ADDR_SURF_P2) |
3258 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3259 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3260 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3261 PIPE_CONFIG(ADDR_SURF_P2) |
3262 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3263 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3264 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3265 PIPE_CONFIG(ADDR_SURF_P2) |
3266 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3269 PIPE_CONFIG(ADDR_SURF_P2) |
3270 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3273 PIPE_CONFIG(ADDR_SURF_P2) |
3274 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3287 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3289 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292 NUM_BANKS(ADDR_SURF_8_BANK));
3293 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3295 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296 NUM_BANKS(ADDR_SURF_8_BANK));
3297 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3300 NUM_BANKS(ADDR_SURF_8_BANK));
3301 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3304 NUM_BANKS(ADDR_SURF_8_BANK));
3305 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3307 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3308 NUM_BANKS(ADDR_SURF_8_BANK));
3309 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3312 NUM_BANKS(ADDR_SURF_8_BANK));
3313 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3316 NUM_BANKS(ADDR_SURF_8_BANK));
3317 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3318 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3319 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320 NUM_BANKS(ADDR_SURF_16_BANK));
3321 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3323 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324 NUM_BANKS(ADDR_SURF_16_BANK));
3325 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3326 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3327 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3328 NUM_BANKS(ADDR_SURF_16_BANK));
3329 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3330 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3331 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3332 NUM_BANKS(ADDR_SURF_16_BANK));
3333 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3335 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336 NUM_BANKS(ADDR_SURF_16_BANK));
3337 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340 NUM_BANKS(ADDR_SURF_16_BANK));
3341 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3342 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3343 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3344 NUM_BANKS(ADDR_SURF_8_BANK));
3346 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3347 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3349 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3351 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3352 if (reg_offset != 7)
3353 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3358 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3362 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3365 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3366 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3367 PIPE_CONFIG(ADDR_SURF_P2) |
3368 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3369 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3370 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371 PIPE_CONFIG(ADDR_SURF_P2) |
3372 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3373 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3374 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3375 PIPE_CONFIG(ADDR_SURF_P2) |
3376 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3377 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3378 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3379 PIPE_CONFIG(ADDR_SURF_P2) |
3380 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3381 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3382 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3383 PIPE_CONFIG(ADDR_SURF_P2) |
3384 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3385 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3387 PIPE_CONFIG(ADDR_SURF_P2) |
3388 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3389 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3391 PIPE_CONFIG(ADDR_SURF_P2));
3392 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3393 PIPE_CONFIG(ADDR_SURF_P2) |
3394 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3396 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3397 PIPE_CONFIG(ADDR_SURF_P2) |
3398 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3400 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3401 PIPE_CONFIG(ADDR_SURF_P2) |
3402 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3404 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3405 PIPE_CONFIG(ADDR_SURF_P2) |
3406 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3408 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3409 PIPE_CONFIG(ADDR_SURF_P2) |
3410 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3412 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3413 PIPE_CONFIG(ADDR_SURF_P2) |
3414 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3416 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3417 PIPE_CONFIG(ADDR_SURF_P2) |
3418 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3420 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3421 PIPE_CONFIG(ADDR_SURF_P2) |
3422 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3424 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3425 PIPE_CONFIG(ADDR_SURF_P2) |
3426 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3428 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3429 PIPE_CONFIG(ADDR_SURF_P2) |
3430 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3432 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3433 PIPE_CONFIG(ADDR_SURF_P2) |
3434 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3436 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3437 PIPE_CONFIG(ADDR_SURF_P2) |
3438 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3440 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3441 PIPE_CONFIG(ADDR_SURF_P2) |
3442 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3445 PIPE_CONFIG(ADDR_SURF_P2) |
3446 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3449 PIPE_CONFIG(ADDR_SURF_P2) |
3450 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3453 PIPE_CONFIG(ADDR_SURF_P2) |
3454 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3455 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3456 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3457 PIPE_CONFIG(ADDR_SURF_P2) |
3458 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3459 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3460 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3461 PIPE_CONFIG(ADDR_SURF_P2) |
3462 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3463 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3465 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3468 NUM_BANKS(ADDR_SURF_8_BANK));
3469 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3472 NUM_BANKS(ADDR_SURF_8_BANK));
3473 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3476 NUM_BANKS(ADDR_SURF_8_BANK));
3477 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3480 NUM_BANKS(ADDR_SURF_8_BANK));
3481 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3484 NUM_BANKS(ADDR_SURF_8_BANK));
3485 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3488 NUM_BANKS(ADDR_SURF_8_BANK));
3489 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3492 NUM_BANKS(ADDR_SURF_8_BANK));
3493 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3496 NUM_BANKS(ADDR_SURF_16_BANK));
3497 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3498 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3499 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3500 NUM_BANKS(ADDR_SURF_16_BANK));
3501 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3502 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3503 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3504 NUM_BANKS(ADDR_SURF_16_BANK));
3505 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3506 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3507 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3508 NUM_BANKS(ADDR_SURF_16_BANK));
3509 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3511 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3512 NUM_BANKS(ADDR_SURF_16_BANK));
3513 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3514 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3515 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3516 NUM_BANKS(ADDR_SURF_16_BANK));
3517 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3518 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3519 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3520 NUM_BANKS(ADDR_SURF_8_BANK));
3522 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3523 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3525 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3527 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3528 if (reg_offset != 7)
3529 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3535 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3536 u32 se_num, u32 sh_num, u32 instance)
3540 if (instance == 0xffffffff)
3541 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3543 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3545 if (se_num == 0xffffffff)
3546 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3548 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3550 if (sh_num == 0xffffffff)
3551 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3553 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3555 WREG32(mmGRBM_GFX_INDEX, data);
3558 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3560 return (u32)((1ULL << bit_width) - 1);
3563 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3567 data = RREG32(mmCC_RB_BACKEND_DISABLE) |
3568 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3570 data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3572 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3573 adev->gfx.config.max_sh_per_se);
3575 return (~data) & mask;
3579 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3581 switch (adev->asic_type) {
3583 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3584 RB_XSEL2(1) | PKR_MAP(2) |
3585 PKR_XSEL(1) | PKR_YSEL(1) |
3586 SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3587 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3591 case CHIP_POLARIS10:
3592 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3593 SE_XSEL(1) | SE_YSEL(1);
3594 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3599 *rconf |= RB_MAP_PKR0(2);
3602 case CHIP_POLARIS11:
3603 case CHIP_POLARIS12:
3604 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3605 SE_XSEL(1) | SE_YSEL(1);
3613 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3619 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3620 u32 raster_config, u32 raster_config_1,
3621 unsigned rb_mask, unsigned num_rb)
3623 unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3624 unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3625 unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3626 unsigned rb_per_se = num_rb / num_se;
3627 unsigned se_mask[4];
3630 se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3631 se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3632 se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3633 se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3635 WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3636 WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3637 WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3639 if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3640 (!se_mask[2] && !se_mask[3]))) {
3641 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3643 if (!se_mask[0] && !se_mask[1]) {
3645 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3648 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3652 for (se = 0; se < num_se; se++) {
3653 unsigned raster_config_se = raster_config;
3654 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3655 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3656 int idx = (se / 2) * 2;
3658 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3659 raster_config_se &= ~SE_MAP_MASK;
3661 if (!se_mask[idx]) {
3662 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3664 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3668 pkr0_mask &= rb_mask;
3669 pkr1_mask &= rb_mask;
3670 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3671 raster_config_se &= ~PKR_MAP_MASK;
3674 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3676 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3680 if (rb_per_se >= 2) {
3681 unsigned rb0_mask = 1 << (se * rb_per_se);
3682 unsigned rb1_mask = rb0_mask << 1;
3684 rb0_mask &= rb_mask;
3685 rb1_mask &= rb_mask;
3686 if (!rb0_mask || !rb1_mask) {
3687 raster_config_se &= ~RB_MAP_PKR0_MASK;
3691 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3694 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3698 if (rb_per_se > 2) {
3699 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3700 rb1_mask = rb0_mask << 1;
3701 rb0_mask &= rb_mask;
3702 rb1_mask &= rb_mask;
3703 if (!rb0_mask || !rb1_mask) {
3704 raster_config_se &= ~RB_MAP_PKR1_MASK;
3708 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3711 RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3717 /* GRBM_GFX_INDEX has a different offset on VI */
3718 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3719 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3720 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3723 /* GRBM_GFX_INDEX has a different offset on VI */
3724 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3727 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3731 u32 raster_config = 0, raster_config_1 = 0;
3733 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3734 adev->gfx.config.max_sh_per_se;
3735 unsigned num_rb_pipes;
3737 mutex_lock(&adev->grbm_idx_mutex);
3738 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3739 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3740 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3741 data = gfx_v8_0_get_rb_active_bitmap(adev);
3742 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3743 rb_bitmap_width_per_sh);
3746 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3748 adev->gfx.config.backend_enable_mask = active_rbs;
3749 adev->gfx.config.num_rbs = hweight32(active_rbs);
3751 num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3752 adev->gfx.config.max_shader_engines, 16);
3754 gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3756 if (!adev->gfx.config.backend_enable_mask ||
3757 adev->gfx.config.num_rbs >= num_rb_pipes) {
3758 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3759 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3761 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3762 adev->gfx.config.backend_enable_mask,
3766 /* cache the values for userspace */
3767 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3768 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3769 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3770 adev->gfx.config.rb_config[i][j].rb_backend_disable =
3771 RREG32(mmCC_RB_BACKEND_DISABLE);
3772 adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3773 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3774 adev->gfx.config.rb_config[i][j].raster_config =
3775 RREG32(mmPA_SC_RASTER_CONFIG);
3776 adev->gfx.config.rb_config[i][j].raster_config_1 =
3777 RREG32(mmPA_SC_RASTER_CONFIG_1);
3780 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3781 mutex_unlock(&adev->grbm_idx_mutex);
3785 * gfx_v8_0_init_compute_vmid - gart enable
3787 * @rdev: amdgpu_device pointer
3789 * Initialize compute vmid sh_mem registers
3792 #define DEFAULT_SH_MEM_BASES (0x6000)
3793 #define FIRST_COMPUTE_VMID (8)
3794 #define LAST_COMPUTE_VMID (16)
3795 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3798 uint32_t sh_mem_config;
3799 uint32_t sh_mem_bases;
3802 * Configure apertures:
3803 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3804 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3805 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3807 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3809 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3810 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3811 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3812 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3813 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3814 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3816 mutex_lock(&adev->srbm_mutex);
3817 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3818 vi_srbm_select(adev, 0, 0, 0, i);
3819 /* CP and shaders */
3820 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3821 WREG32(mmSH_MEM_APE1_BASE, 1);
3822 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3823 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3825 vi_srbm_select(adev, 0, 0, 0, 0);
3826 mutex_unlock(&adev->srbm_mutex);
3829 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3834 WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3835 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3836 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3837 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3839 gfx_v8_0_tiling_mode_table_init(adev);
3840 gfx_v8_0_setup_rb(adev);
3841 gfx_v8_0_get_cu_info(adev);
3843 /* XXX SH_MEM regs */
3844 /* where to put LDS, scratch, GPUVM in FSA64 space */
3845 mutex_lock(&adev->srbm_mutex);
3846 for (i = 0; i < 16; i++) {
3847 vi_srbm_select(adev, 0, 0, 0, i);
3848 /* CP and shaders */
3850 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3851 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3852 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3853 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3854 WREG32(mmSH_MEM_CONFIG, tmp);
3856 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3858 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860 WREG32(mmSH_MEM_CONFIG, tmp);
3863 WREG32(mmSH_MEM_APE1_BASE, 1);
3864 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3865 WREG32(mmSH_MEM_BASES, 0);
3867 vi_srbm_select(adev, 0, 0, 0, 0);
3868 mutex_unlock(&adev->srbm_mutex);
3870 gfx_v8_0_init_compute_vmid(adev);
3872 mutex_lock(&adev->grbm_idx_mutex);
3874 * making sure that the following register writes will be broadcasted
3875 * to all the shaders
3877 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3879 WREG32(mmPA_SC_FIFO_SIZE,
3880 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3881 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3882 (adev->gfx.config.sc_prim_fifo_size_backend <<
3883 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3884 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3885 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3886 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3887 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3888 mutex_unlock(&adev->grbm_idx_mutex);
3892 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3897 mutex_lock(&adev->grbm_idx_mutex);
3898 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3899 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3900 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3901 for (k = 0; k < adev->usec_timeout; k++) {
3902 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3908 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3909 mutex_unlock(&adev->grbm_idx_mutex);
3911 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3912 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3913 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3914 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3915 for (k = 0; k < adev->usec_timeout; k++) {
3916 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3922 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3925 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3927 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3928 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3929 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3930 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3932 WREG32(mmCP_INT_CNTL_RING0, tmp);
3935 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3938 WREG32(mmRLC_CSIB_ADDR_HI,
3939 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3940 WREG32(mmRLC_CSIB_ADDR_LO,
3941 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3942 WREG32(mmRLC_CSIB_LENGTH,
3943 adev->gfx.rlc.clear_state_size);
3946 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3949 int *unique_indices,
3952 int *ind_start_offsets,
3957 bool new_entry = true;
3959 for (; ind_offset < list_size; ind_offset++) {
3963 ind_start_offsets[*offset_count] = ind_offset;
3964 *offset_count = *offset_count + 1;
3965 BUG_ON(*offset_count >= max_offset);
3968 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3975 /* look for the matching indice */
3977 indices < *indices_count;
3979 if (unique_indices[indices] ==
3980 register_list_format[ind_offset])
3984 if (indices >= *indices_count) {
3985 unique_indices[*indices_count] =
3986 register_list_format[ind_offset];
3987 indices = *indices_count;
3988 *indices_count = *indices_count + 1;
3989 BUG_ON(*indices_count >= max_indices);
3992 register_list_format[ind_offset] = indices;
3996 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3999 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4000 int indices_count = 0;
4001 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4002 int offset_count = 0;
4005 unsigned int *register_list_format =
4006 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4007 if (!register_list_format)
4009 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4010 adev->gfx.rlc.reg_list_format_size_bytes);
4012 gfx_v8_0_parse_ind_reg_list(register_list_format,
4013 RLC_FormatDirectRegListLength,
4014 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4017 sizeof(unique_indices) / sizeof(int),
4018 indirect_start_offsets,
4020 sizeof(indirect_start_offsets)/sizeof(int));
4022 /* save and restore list */
4023 WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4025 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4026 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4027 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4030 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4031 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4032 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4034 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4035 list_size = list_size >> 1;
4036 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4037 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4039 /* starting offsets starts */
4040 WREG32(mmRLC_GPM_SCRATCH_ADDR,
4041 adev->gfx.rlc.starting_offsets_start);
4042 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4043 WREG32(mmRLC_GPM_SCRATCH_DATA,
4044 indirect_start_offsets[i]);
4046 /* unique indices */
4047 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4048 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4049 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4050 if (unique_indices[i] != 0) {
4051 amdgpu_mm_wreg(adev, temp + i,
4052 unique_indices[i] & 0x3FFFF, false);
4053 amdgpu_mm_wreg(adev, data + i,
4054 unique_indices[i] >> 20, false);
4057 kfree(register_list_format);
4062 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4064 WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4067 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4071 WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4073 data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4074 data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4075 data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4076 data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4077 WREG32(mmRLC_PG_DELAY, data);
4079 WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4080 WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4084 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4087 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4090 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4093 WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4096 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4098 WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4101 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4103 if ((adev->asic_type == CHIP_CARRIZO) ||
4104 (adev->asic_type == CHIP_STONEY)) {
4105 gfx_v8_0_init_csb(adev);
4106 gfx_v8_0_init_save_restore_list(adev);
4107 gfx_v8_0_enable_save_restore_machine(adev);
4108 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4109 gfx_v8_0_init_power_gating(adev);
4110 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4111 } else if ((adev->asic_type == CHIP_POLARIS11) ||
4112 (adev->asic_type == CHIP_POLARIS12)) {
4113 gfx_v8_0_init_csb(adev);
4114 gfx_v8_0_init_save_restore_list(adev);
4115 gfx_v8_0_enable_save_restore_machine(adev);
4116 gfx_v8_0_init_power_gating(adev);
4121 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4123 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4125 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4126 gfx_v8_0_wait_for_rlc_serdes(adev);
4129 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4131 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4134 WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4138 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4140 WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4142 /* carrizo do enable cp interrupt after cp inited */
4143 if (!(adev->flags & AMD_IS_APU))
4144 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4149 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4151 const struct rlc_firmware_header_v2_0 *hdr;
4152 const __le32 *fw_data;
4153 unsigned i, fw_size;
4155 if (!adev->gfx.rlc_fw)
4158 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4159 amdgpu_ucode_print_rlc_hdr(&hdr->header);
4161 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4162 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4163 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4165 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4166 for (i = 0; i < fw_size; i++)
4167 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4168 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4173 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4178 gfx_v8_0_rlc_stop(adev);
4181 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4182 tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4183 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4184 WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4185 if (adev->asic_type == CHIP_POLARIS11 ||
4186 adev->asic_type == CHIP_POLARIS10 ||
4187 adev->asic_type == CHIP_POLARIS12) {
4188 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4190 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4194 WREG32(mmRLC_PG_CNTL, 0);
4196 gfx_v8_0_rlc_reset(adev);
4197 gfx_v8_0_init_pg(adev);
4199 if (!adev->pp_enabled) {
4200 if (!adev->firmware.smu_load) {
4201 /* legacy rlc firmware loading */
4202 r = gfx_v8_0_rlc_load_microcode(adev);
4206 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4207 AMDGPU_UCODE_ID_RLC_G);
4213 gfx_v8_0_rlc_start(adev);
4218 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4221 u32 tmp = RREG32(mmCP_ME_CNTL);
4224 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4225 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4226 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4228 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4229 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4230 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4231 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4232 adev->gfx.gfx_ring[i].ready = false;
4234 WREG32(mmCP_ME_CNTL, tmp);
4238 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4240 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4241 const struct gfx_firmware_header_v1_0 *ce_hdr;
4242 const struct gfx_firmware_header_v1_0 *me_hdr;
4243 const __le32 *fw_data;
4244 unsigned i, fw_size;
4246 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4249 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4250 adev->gfx.pfp_fw->data;
4251 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4252 adev->gfx.ce_fw->data;
4253 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4254 adev->gfx.me_fw->data;
4256 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4257 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4258 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4260 gfx_v8_0_cp_gfx_enable(adev, false);
4263 fw_data = (const __le32 *)
4264 (adev->gfx.pfp_fw->data +
4265 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4266 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4267 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4268 for (i = 0; i < fw_size; i++)
4269 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4270 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4273 fw_data = (const __le32 *)
4274 (adev->gfx.ce_fw->data +
4275 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4276 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4277 WREG32(mmCP_CE_UCODE_ADDR, 0);
4278 for (i = 0; i < fw_size; i++)
4279 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4280 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4283 fw_data = (const __le32 *)
4284 (adev->gfx.me_fw->data +
4285 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4286 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4287 WREG32(mmCP_ME_RAM_WADDR, 0);
4288 for (i = 0; i < fw_size; i++)
4289 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4290 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4295 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4298 const struct cs_section_def *sect = NULL;
4299 const struct cs_extent_def *ext = NULL;
4301 /* begin clear state */
4303 /* context control state */
4306 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4307 for (ext = sect->section; ext->extent != NULL; ++ext) {
4308 if (sect->id == SECT_CONTEXT)
4309 count += 2 + ext->reg_count;
4314 /* pa_sc_raster_config/pa_sc_raster_config1 */
4316 /* end clear state */
4324 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4326 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4327 const struct cs_section_def *sect = NULL;
4328 const struct cs_extent_def *ext = NULL;
4332 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4333 WREG32(mmCP_ENDIAN_SWAP, 0);
4334 WREG32(mmCP_DEVICE_ID, 1);
4336 gfx_v8_0_cp_gfx_enable(adev, true);
4338 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4340 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4344 /* clear state buffer */
4345 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4346 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4348 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4349 amdgpu_ring_write(ring, 0x80000000);
4350 amdgpu_ring_write(ring, 0x80000000);
4352 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4353 for (ext = sect->section; ext->extent != NULL; ++ext) {
4354 if (sect->id == SECT_CONTEXT) {
4355 amdgpu_ring_write(ring,
4356 PACKET3(PACKET3_SET_CONTEXT_REG,
4358 amdgpu_ring_write(ring,
4359 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4360 for (i = 0; i < ext->reg_count; i++)
4361 amdgpu_ring_write(ring, ext->extent[i]);
4366 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4367 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4368 switch (adev->asic_type) {
4370 case CHIP_POLARIS10:
4371 amdgpu_ring_write(ring, 0x16000012);
4372 amdgpu_ring_write(ring, 0x0000002A);
4374 case CHIP_POLARIS11:
4375 case CHIP_POLARIS12:
4376 amdgpu_ring_write(ring, 0x16000012);
4377 amdgpu_ring_write(ring, 0x00000000);
4380 amdgpu_ring_write(ring, 0x3a00161a);
4381 amdgpu_ring_write(ring, 0x0000002e);
4384 amdgpu_ring_write(ring, 0x00000002);
4385 amdgpu_ring_write(ring, 0x00000000);
4388 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4389 0x00000000 : 0x00000002);
4390 amdgpu_ring_write(ring, 0x00000000);
4393 amdgpu_ring_write(ring, 0x00000000);
4394 amdgpu_ring_write(ring, 0x00000000);
4400 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4403 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4404 amdgpu_ring_write(ring, 0);
4406 /* init the CE partitions */
4407 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4408 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4409 amdgpu_ring_write(ring, 0x8000);
4410 amdgpu_ring_write(ring, 0x8000);
4412 amdgpu_ring_commit(ring);
4417 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4419 struct amdgpu_ring *ring;
4422 u64 rb_addr, rptr_addr, wptr_gpu_addr;
4425 /* Set the write pointer delay */
4426 WREG32(mmCP_RB_WPTR_DELAY, 0);
4428 /* set the RB to use vmid 0 */
4429 WREG32(mmCP_RB_VMID, 0);
4431 /* Set ring buffer size */
4432 ring = &adev->gfx.gfx_ring[0];
4433 rb_bufsz = order_base_2(ring->ring_size / 8);
4434 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4435 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4436 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4437 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4439 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4441 WREG32(mmCP_RB0_CNTL, tmp);
4443 /* Initialize the ring buffer's read and write pointers */
4444 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4446 WREG32(mmCP_RB0_WPTR, ring->wptr);
4448 /* set the wb address wether it's enabled or not */
4449 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4450 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4451 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4453 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4454 WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4455 WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4457 WREG32(mmCP_RB0_CNTL, tmp);
4459 rb_addr = ring->gpu_addr >> 8;
4460 WREG32(mmCP_RB0_BASE, rb_addr);
4461 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4463 /* no gfx doorbells on iceland */
4464 if (adev->asic_type != CHIP_TOPAZ) {
4465 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4466 if (ring->use_doorbell) {
4467 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4468 DOORBELL_OFFSET, ring->doorbell_index);
4469 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4471 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4474 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4477 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4479 if (adev->asic_type == CHIP_TONGA) {
4480 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4481 DOORBELL_RANGE_LOWER,
4482 AMDGPU_DOORBELL_GFX_RING0);
4483 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4485 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4486 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4491 /* start the ring */
4492 gfx_v8_0_cp_gfx_start(adev);
4494 r = amdgpu_ring_test_ring(ring);
4496 ring->ready = false;
4501 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4506 WREG32(mmCP_MEC_CNTL, 0);
4508 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4509 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4510 adev->gfx.compute_ring[i].ready = false;
4515 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4517 const struct gfx_firmware_header_v1_0 *mec_hdr;
4518 const __le32 *fw_data;
4519 unsigned i, fw_size;
4521 if (!adev->gfx.mec_fw)
4524 gfx_v8_0_cp_compute_enable(adev, false);
4526 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4527 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4529 fw_data = (const __le32 *)
4530 (adev->gfx.mec_fw->data +
4531 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4532 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4535 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4536 for (i = 0; i < fw_size; i++)
4537 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4538 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4540 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4541 if (adev->gfx.mec2_fw) {
4542 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4544 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4545 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4547 fw_data = (const __le32 *)
4548 (adev->gfx.mec2_fw->data +
4549 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4550 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4552 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4553 for (i = 0; i < fw_size; i++)
4554 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4555 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4561 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4565 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4566 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4568 if (ring->mqd_obj) {
4569 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4570 if (unlikely(r != 0))
4571 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4573 amdgpu_bo_unpin(ring->mqd_obj);
4574 amdgpu_bo_unreserve(ring->mqd_obj);
4576 amdgpu_bo_unref(&ring->mqd_obj);
4577 ring->mqd_obj = NULL;
4583 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4586 struct amdgpu_device *adev = ring->adev;
4588 /* tell RLC which is KIQ queue */
4589 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4591 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4592 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4594 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4597 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4599 amdgpu_ring_alloc(ring, 8);
4601 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4602 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4603 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4604 amdgpu_ring_write(ring, 0); /* queue mask hi */
4605 amdgpu_ring_write(ring, 0); /* gws mask lo */
4606 amdgpu_ring_write(ring, 0); /* gws mask hi */
4607 amdgpu_ring_write(ring, 0); /* oac mask */
4608 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4609 amdgpu_ring_commit(ring);
4613 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4614 struct amdgpu_ring *ring)
4616 struct amdgpu_device *adev = kiq_ring->adev;
4617 uint64_t mqd_addr, wptr_addr;
4619 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4620 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4621 amdgpu_ring_alloc(kiq_ring, 8);
4623 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4624 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4625 amdgpu_ring_write(kiq_ring, 0x21010000);
4626 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4627 (ring->queue << 26) |
4628 (ring->pipe << 29) |
4629 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4630 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4631 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4632 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4633 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4634 amdgpu_ring_commit(kiq_ring);
4638 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4640 uint64_t mqd_gpu_addr,
4641 uint64_t eop_gpu_addr,
4642 struct amdgpu_ring *ring)
4644 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4647 mqd->header = 0xC0310800;
4648 mqd->compute_pipelinestat_enable = 0x00000001;
4649 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4650 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4651 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4652 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4653 mqd->compute_misc_reserved = 0x00000003;
4655 eop_base_addr = eop_gpu_addr >> 8;
4656 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4657 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4659 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4660 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4661 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4662 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4664 mqd->cp_hqd_eop_control = tmp;
4666 /* enable doorbell? */
4667 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4669 if (ring->use_doorbell)
4670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4673 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4676 mqd->cp_hqd_pq_doorbell_control = tmp;
4678 /* disable the queue if it's active */
4679 mqd->cp_hqd_dequeue_request = 0;
4680 mqd->cp_hqd_pq_rptr = 0;
4681 mqd->cp_hqd_pq_wptr = 0;
4683 /* set the pointer to the MQD */
4684 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4685 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4687 /* set MQD vmid to 0 */
4688 tmp = RREG32(mmCP_MQD_CONTROL);
4689 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4690 mqd->cp_mqd_control = tmp;
4692 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4693 hqd_gpu_addr = ring->gpu_addr >> 8;
4694 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4695 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4697 /* set up the HQD, this is similar to CP_RB0_CNTL */
4698 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4700 (order_base_2(ring->ring_size / 4) - 1));
4701 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4702 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4704 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4706 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4707 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4708 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4709 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4710 mqd->cp_hqd_pq_control = tmp;
4712 /* set the wb address whether it's enabled or not */
4713 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4714 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4715 mqd->cp_hqd_pq_rptr_report_addr_hi =
4716 upper_32_bits(wb_gpu_addr) & 0xffff;
4718 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4719 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4720 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4721 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4724 /* enable the doorbell if requested */
4725 if (ring->use_doorbell) {
4726 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4727 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728 DOORBELL_OFFSET, ring->doorbell_index);
4730 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4732 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4733 DOORBELL_SOURCE, 0);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4738 mqd->cp_hqd_pq_doorbell_control = tmp;
4740 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4742 mqd->cp_hqd_pq_wptr = ring->wptr;
4743 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4745 /* set the vmid for the queue */
4746 mqd->cp_hqd_vmid = 0;
4748 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4749 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4750 mqd->cp_hqd_persistent_state = tmp;
4752 /* activate the queue */
4753 mqd->cp_hqd_active = 1;
4758 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4760 struct amdgpu_ring *ring)
4765 /* disable wptr polling */
4766 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4767 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4768 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4770 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4771 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4773 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4774 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4776 /* enable doorbell? */
4777 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4779 /* disable the queue if it's active */
4780 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4781 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4782 for (j = 0; j < adev->usec_timeout; j++) {
4783 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4787 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4788 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4789 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4792 /* set the pointer to the MQD */
4793 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4794 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4796 /* set MQD vmid to 0 */
4797 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4799 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4800 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4801 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4803 /* set up the HQD, this is similar to CP_RB0_CNTL */
4804 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4806 /* set the wb address whether it's enabled or not */
4807 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4808 mqd->cp_hqd_pq_rptr_report_addr_lo);
4809 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4810 mqd->cp_hqd_pq_rptr_report_addr_hi);
4812 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4813 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4814 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4816 /* enable the doorbell if requested */
4817 if (ring->use_doorbell) {
4818 if ((adev->asic_type == CHIP_CARRIZO) ||
4819 (adev->asic_type == CHIP_FIJI) ||
4820 (adev->asic_type == CHIP_STONEY)) {
4821 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4822 AMDGPU_DOORBELL_KIQ << 2);
4823 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4824 AMDGPU_DOORBELL_MEC_RING7 << 2);
4827 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4829 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4830 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4832 /* set the vmid for the queue */
4833 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4835 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4837 /* activate the queue */
4838 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4840 if (ring->use_doorbell) {
4841 tmp = RREG32(mmCP_PQ_STATUS);
4842 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4843 WREG32(mmCP_PQ_STATUS, tmp);
4849 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4853 struct amdgpu_device *adev = ring->adev;
4854 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4855 uint64_t eop_gpu_addr;
4856 bool is_kiq = false;
4858 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4862 eop_gpu_addr = kiq->eop_gpu_addr;
4863 gfx_v8_0_kiq_setting(&kiq->ring);
4865 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4866 ring->queue * MEC_HPD_SIZE;
4868 mutex_lock(&adev->srbm_mutex);
4869 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4871 gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4874 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4876 vi_srbm_select(adev, 0, 0, 0, 0);
4877 mutex_unlock(&adev->srbm_mutex);
4880 gfx_v8_0_kiq_enable(ring);
4882 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4887 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4889 struct amdgpu_ring *ring = NULL;
4892 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4893 ring = &adev->gfx.compute_ring[i];
4894 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4895 ring->mqd_obj = NULL;
4898 ring = &adev->gfx.kiq.ring;
4899 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4900 ring->mqd_obj = NULL;
4903 static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4904 struct amdgpu_ring *ring)
4911 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4912 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4913 &mqd_gpu_addr, (void **)&buf);
4915 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4919 /* init the mqd struct */
4920 memset(buf, 0, sizeof(struct vi_mqd));
4921 mqd = (struct vi_mqd *)buf;
4923 r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4927 amdgpu_bo_kunmap(ring->mqd_obj);
4932 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4934 struct amdgpu_ring *ring = NULL;
4937 ring = &adev->gfx.kiq.ring;
4938 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4942 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4943 ring = &adev->gfx.compute_ring[i];
4944 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4949 gfx_v8_0_cp_compute_enable(adev, true);
4951 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4952 ring = &adev->gfx.compute_ring[i];
4955 r = amdgpu_ring_test_ring(ring);
4957 ring->ready = false;
4960 ring = &adev->gfx.kiq.ring;
4962 r = amdgpu_ring_test_ring(ring);
4964 ring->ready = false;
4969 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4973 bool use_doorbell = true;
4981 /* init the queues. */
4982 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4983 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4985 if (ring->mqd_obj == NULL) {
4986 r = amdgpu_bo_create(adev,
4987 sizeof(struct vi_mqd),
4989 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4990 NULL, &ring->mqd_obj);
4992 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4997 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4998 if (unlikely(r != 0)) {
4999 gfx_v8_0_cp_compute_fini(adev);
5002 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5005 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5006 gfx_v8_0_cp_compute_fini(adev);
5009 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5011 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5012 gfx_v8_0_cp_compute_fini(adev);
5016 /* init the mqd struct */
5017 memset(buf, 0, sizeof(struct vi_mqd));
5019 mqd = (struct vi_mqd *)buf;
5020 mqd->header = 0xC0310800;
5021 mqd->compute_pipelinestat_enable = 0x00000001;
5022 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5023 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5024 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5025 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5026 mqd->compute_misc_reserved = 0x00000003;
5028 mutex_lock(&adev->srbm_mutex);
5029 vi_srbm_select(adev, ring->me,
5033 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5036 /* write the EOP addr */
5037 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5038 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5040 /* set the VMID assigned */
5041 WREG32(mmCP_HQD_VMID, 0);
5043 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5044 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5045 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5046 (order_base_2(MEC_HPD_SIZE / 4) - 1));
5047 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5049 /* disable wptr polling */
5050 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5051 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5052 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5054 mqd->cp_hqd_eop_base_addr_lo =
5055 RREG32(mmCP_HQD_EOP_BASE_ADDR);
5056 mqd->cp_hqd_eop_base_addr_hi =
5057 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5059 /* enable doorbell? */
5060 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5062 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5064 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5066 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5067 mqd->cp_hqd_pq_doorbell_control = tmp;
5069 /* disable the queue if it's active */
5070 mqd->cp_hqd_dequeue_request = 0;
5071 mqd->cp_hqd_pq_rptr = 0;
5072 mqd->cp_hqd_pq_wptr= 0;
5073 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5074 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5075 for (j = 0; j < adev->usec_timeout; j++) {
5076 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5080 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5081 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5082 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5085 /* set the pointer to the MQD */
5086 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5087 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5088 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5089 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5091 /* set MQD vmid to 0 */
5092 tmp = RREG32(mmCP_MQD_CONTROL);
5093 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5094 WREG32(mmCP_MQD_CONTROL, tmp);
5095 mqd->cp_mqd_control = tmp;
5097 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5098 hqd_gpu_addr = ring->gpu_addr >> 8;
5099 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5100 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5101 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5102 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5104 /* set up the HQD, this is similar to CP_RB0_CNTL */
5105 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5106 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5107 (order_base_2(ring->ring_size / 4) - 1));
5108 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5109 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5111 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5113 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5114 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5115 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5116 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5117 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5118 mqd->cp_hqd_pq_control = tmp;
5120 /* set the wb address wether it's enabled or not */
5121 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5122 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5123 mqd->cp_hqd_pq_rptr_report_addr_hi =
5124 upper_32_bits(wb_gpu_addr) & 0xffff;
5125 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5126 mqd->cp_hqd_pq_rptr_report_addr_lo);
5127 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5128 mqd->cp_hqd_pq_rptr_report_addr_hi);
5130 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5131 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5132 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5133 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5134 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5135 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5136 mqd->cp_hqd_pq_wptr_poll_addr_hi);
5138 /* enable the doorbell if requested */
5140 if ((adev->asic_type == CHIP_CARRIZO) ||
5141 (adev->asic_type == CHIP_FIJI) ||
5142 (adev->asic_type == CHIP_STONEY) ||
5143 (adev->asic_type == CHIP_POLARIS11) ||
5144 (adev->asic_type == CHIP_POLARIS10) ||
5145 (adev->asic_type == CHIP_POLARIS12)) {
5146 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5147 AMDGPU_DOORBELL_KIQ << 2);
5148 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5149 AMDGPU_DOORBELL_MEC_RING7 << 2);
5151 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5152 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5153 DOORBELL_OFFSET, ring->doorbell_index);
5154 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5155 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5156 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5157 mqd->cp_hqd_pq_doorbell_control = tmp;
5160 mqd->cp_hqd_pq_doorbell_control = 0;
5162 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5163 mqd->cp_hqd_pq_doorbell_control);
5165 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5167 mqd->cp_hqd_pq_wptr = ring->wptr;
5168 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5169 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5171 /* set the vmid for the queue */
5172 mqd->cp_hqd_vmid = 0;
5173 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5175 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5176 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5177 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5178 mqd->cp_hqd_persistent_state = tmp;
5179 if (adev->asic_type == CHIP_STONEY ||
5180 adev->asic_type == CHIP_POLARIS11 ||
5181 adev->asic_type == CHIP_POLARIS10 ||
5182 adev->asic_type == CHIP_POLARIS12) {
5183 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5184 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5185 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5188 /* activate the queue */
5189 mqd->cp_hqd_active = 1;
5190 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5192 vi_srbm_select(adev, 0, 0, 0, 0);
5193 mutex_unlock(&adev->srbm_mutex);
5195 amdgpu_bo_kunmap(ring->mqd_obj);
5196 amdgpu_bo_unreserve(ring->mqd_obj);
5200 tmp = RREG32(mmCP_PQ_STATUS);
5201 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5202 WREG32(mmCP_PQ_STATUS, tmp);
5205 gfx_v8_0_cp_compute_enable(adev, true);
5207 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5208 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5211 r = amdgpu_ring_test_ring(ring);
5213 ring->ready = false;
5219 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5223 if (!(adev->flags & AMD_IS_APU))
5224 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5226 if (!adev->pp_enabled) {
5227 if (!adev->firmware.smu_load) {
5228 /* legacy firmware loading */
5229 r = gfx_v8_0_cp_gfx_load_microcode(adev);
5233 r = gfx_v8_0_cp_compute_load_microcode(adev);
5237 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5238 AMDGPU_UCODE_ID_CP_CE);
5242 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5243 AMDGPU_UCODE_ID_CP_PFP);
5247 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5248 AMDGPU_UCODE_ID_CP_ME);
5252 if (adev->asic_type == CHIP_TOPAZ) {
5253 r = gfx_v8_0_cp_compute_load_microcode(adev);
5257 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5258 AMDGPU_UCODE_ID_CP_MEC1);
5265 r = gfx_v8_0_cp_gfx_resume(adev);
5269 if (amdgpu_sriov_vf(adev))
5270 r = gfx_v8_0_kiq_resume(adev);
5272 r = gfx_v8_0_cp_compute_resume(adev);
5276 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5281 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5283 gfx_v8_0_cp_gfx_enable(adev, enable);
5284 gfx_v8_0_cp_compute_enable(adev, enable);
5287 static int gfx_v8_0_hw_init(void *handle)
5290 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5292 gfx_v8_0_init_golden_registers(adev);
5293 gfx_v8_0_gpu_init(adev);
5295 r = gfx_v8_0_rlc_resume(adev);
5299 r = gfx_v8_0_cp_resume(adev);
5304 static int gfx_v8_0_hw_fini(void *handle)
5306 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5308 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5309 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5310 if (amdgpu_sriov_vf(adev)) {
5311 gfx_v8_0_kiq_free_queue(adev);
5312 pr_debug("For SRIOV client, shouldn't do anything.\n");
5315 gfx_v8_0_cp_enable(adev, false);
5316 gfx_v8_0_rlc_stop(adev);
5317 gfx_v8_0_cp_compute_fini(adev);
5319 amdgpu_set_powergating_state(adev,
5320 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5325 static int gfx_v8_0_suspend(void *handle)
5327 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5329 return gfx_v8_0_hw_fini(adev);
5332 static int gfx_v8_0_resume(void *handle)
5334 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5336 return gfx_v8_0_hw_init(adev);
5339 static bool gfx_v8_0_is_idle(void *handle)
5341 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5343 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5349 static int gfx_v8_0_wait_for_idle(void *handle)
5352 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5354 for (i = 0; i < adev->usec_timeout; i++) {
5355 if (gfx_v8_0_is_idle(handle))
5363 static bool gfx_v8_0_check_soft_reset(void *handle)
5365 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5366 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5370 tmp = RREG32(mmGRBM_STATUS);
5371 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5372 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5373 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5374 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5375 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5376 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5377 GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5378 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5379 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5380 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5381 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5382 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5383 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5387 tmp = RREG32(mmGRBM_STATUS2);
5388 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5389 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5390 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5392 if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5393 REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5394 REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5395 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5397 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5399 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5401 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5402 SOFT_RESET_GRBM, 1);
5406 tmp = RREG32(mmSRBM_STATUS);
5407 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5408 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5409 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5410 if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5411 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5412 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5414 if (grbm_soft_reset || srbm_soft_reset) {
5415 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5416 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5419 adev->gfx.grbm_soft_reset = 0;
5420 adev->gfx.srbm_soft_reset = 0;
5425 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5426 struct amdgpu_ring *ring)
5430 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5431 if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5433 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5434 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5436 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5437 for (i = 0; i < adev->usec_timeout; i++) {
5438 if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5445 static int gfx_v8_0_pre_soft_reset(void *handle)
5447 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5448 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5450 if ((!adev->gfx.grbm_soft_reset) &&
5451 (!adev->gfx.srbm_soft_reset))
5454 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5455 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5458 gfx_v8_0_rlc_stop(adev);
5460 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5461 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5462 /* Disable GFX parsing/prefetching */
5463 gfx_v8_0_cp_gfx_enable(adev, false);
5465 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5466 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5467 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5468 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5471 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5472 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5474 gfx_v8_0_inactive_hqd(adev, ring);
5476 /* Disable MEC parsing/prefetching */
5477 gfx_v8_0_cp_compute_enable(adev, false);
5483 static int gfx_v8_0_soft_reset(void *handle)
5485 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5486 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5489 if ((!adev->gfx.grbm_soft_reset) &&
5490 (!adev->gfx.srbm_soft_reset))
5493 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5494 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5496 if (grbm_soft_reset || srbm_soft_reset) {
5497 tmp = RREG32(mmGMCON_DEBUG);
5498 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5499 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5500 WREG32(mmGMCON_DEBUG, tmp);
5504 if (grbm_soft_reset) {
5505 tmp = RREG32(mmGRBM_SOFT_RESET);
5506 tmp |= grbm_soft_reset;
5507 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5508 WREG32(mmGRBM_SOFT_RESET, tmp);
5509 tmp = RREG32(mmGRBM_SOFT_RESET);
5513 tmp &= ~grbm_soft_reset;
5514 WREG32(mmGRBM_SOFT_RESET, tmp);
5515 tmp = RREG32(mmGRBM_SOFT_RESET);
5518 if (srbm_soft_reset) {
5519 tmp = RREG32(mmSRBM_SOFT_RESET);
5520 tmp |= srbm_soft_reset;
5521 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5522 WREG32(mmSRBM_SOFT_RESET, tmp);
5523 tmp = RREG32(mmSRBM_SOFT_RESET);
5527 tmp &= ~srbm_soft_reset;
5528 WREG32(mmSRBM_SOFT_RESET, tmp);
5529 tmp = RREG32(mmSRBM_SOFT_RESET);
5532 if (grbm_soft_reset || srbm_soft_reset) {
5533 tmp = RREG32(mmGMCON_DEBUG);
5534 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5535 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5536 WREG32(mmGMCON_DEBUG, tmp);
5539 /* Wait a little for things to settle down */
5545 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5546 struct amdgpu_ring *ring)
5548 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5549 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5550 WREG32(mmCP_HQD_PQ_RPTR, 0);
5551 WREG32(mmCP_HQD_PQ_WPTR, 0);
5552 vi_srbm_select(adev, 0, 0, 0, 0);
5555 static int gfx_v8_0_post_soft_reset(void *handle)
5557 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5558 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5560 if ((!adev->gfx.grbm_soft_reset) &&
5561 (!adev->gfx.srbm_soft_reset))
5564 grbm_soft_reset = adev->gfx.grbm_soft_reset;
5565 srbm_soft_reset = adev->gfx.srbm_soft_reset;
5567 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5568 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5569 gfx_v8_0_cp_gfx_resume(adev);
5571 if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5572 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5573 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5574 REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5577 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5578 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5580 gfx_v8_0_init_hqd(adev, ring);
5582 gfx_v8_0_cp_compute_resume(adev);
5584 gfx_v8_0_rlc_start(adev);
5590 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5592 * @adev: amdgpu_device pointer
5594 * Fetches a GPU clock counter snapshot.
5595 * Returns the 64 bit clock counter snapshot.
5597 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5601 mutex_lock(&adev->gfx.gpu_clock_mutex);
5602 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5603 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5604 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5605 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5609 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5611 uint32_t gds_base, uint32_t gds_size,
5612 uint32_t gws_base, uint32_t gws_size,
5613 uint32_t oa_base, uint32_t oa_size)
5615 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5616 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5618 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5619 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5621 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5622 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5625 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5626 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5627 WRITE_DATA_DST_SEL(0)));
5628 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5629 amdgpu_ring_write(ring, 0);
5630 amdgpu_ring_write(ring, gds_base);
5633 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5634 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5635 WRITE_DATA_DST_SEL(0)));
5636 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5637 amdgpu_ring_write(ring, 0);
5638 amdgpu_ring_write(ring, gds_size);
5641 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5642 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5643 WRITE_DATA_DST_SEL(0)));
5644 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5645 amdgpu_ring_write(ring, 0);
5646 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5649 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651 WRITE_DATA_DST_SEL(0)));
5652 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5653 amdgpu_ring_write(ring, 0);
5654 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5657 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5659 WREG32(mmSQ_IND_INDEX,
5660 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5661 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5662 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5663 (SQ_IND_INDEX__FORCE_READ_MASK));
5664 return RREG32(mmSQ_IND_DATA);
5667 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5668 uint32_t wave, uint32_t thread,
5669 uint32_t regno, uint32_t num, uint32_t *out)
5671 WREG32(mmSQ_IND_INDEX,
5672 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5673 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5674 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5675 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5676 (SQ_IND_INDEX__FORCE_READ_MASK) |
5677 (SQ_IND_INDEX__AUTO_INCR_MASK));
5679 *(out++) = RREG32(mmSQ_IND_DATA);
5682 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5684 /* type 0 wave data */
5685 dst[(*no_fields)++] = 0;
5686 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5687 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5688 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5689 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5690 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5691 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5692 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5693 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5694 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5695 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5696 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5697 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5698 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5699 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5700 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5701 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5702 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5703 dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5706 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5707 uint32_t wave, uint32_t start,
5708 uint32_t size, uint32_t *dst)
5711 adev, simd, wave, 0,
5712 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5716 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5717 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5718 .select_se_sh = &gfx_v8_0_select_se_sh,
5719 .read_wave_data = &gfx_v8_0_read_wave_data,
5720 .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5723 static int gfx_v8_0_early_init(void *handle)
5725 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5727 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5728 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5729 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5730 gfx_v8_0_set_ring_funcs(adev);
5731 gfx_v8_0_set_irq_funcs(adev);
5732 gfx_v8_0_set_gds_init(adev);
5733 gfx_v8_0_set_rlc_funcs(adev);
5738 static int gfx_v8_0_late_init(void *handle)
5740 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5743 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5747 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5751 /* requires IBs so do in late init after IB pool is initialized */
5752 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5756 amdgpu_set_powergating_state(adev,
5757 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5762 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5765 if ((adev->asic_type == CHIP_POLARIS11) ||
5766 (adev->asic_type == CHIP_POLARIS12))
5767 /* Send msg to SMU via Powerplay */
5768 amdgpu_set_powergating_state(adev,
5769 AMD_IP_BLOCK_TYPE_SMC,
5771 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5773 WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5776 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5779 WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5782 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5785 WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5788 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5791 WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5794 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5797 WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5799 /* Read any GFX register to wake up GFX. */
5801 RREG32(mmDB_RENDER_CONTROL);
5804 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5807 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5808 cz_enable_gfx_cg_power_gating(adev, true);
5809 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5810 cz_enable_gfx_pipeline_power_gating(adev, true);
5812 cz_enable_gfx_cg_power_gating(adev, false);
5813 cz_enable_gfx_pipeline_power_gating(adev, false);
5817 static int gfx_v8_0_set_powergating_state(void *handle,
5818 enum amd_powergating_state state)
5820 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5821 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5823 switch (adev->asic_type) {
5827 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5828 cz_enable_sck_slow_down_on_power_up(adev, true);
5829 cz_enable_sck_slow_down_on_power_down(adev, true);
5831 cz_enable_sck_slow_down_on_power_up(adev, false);
5832 cz_enable_sck_slow_down_on_power_down(adev, false);
5834 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5835 cz_enable_cp_power_gating(adev, true);
5837 cz_enable_cp_power_gating(adev, false);
5839 cz_update_gfx_cg_power_gating(adev, enable);
5841 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5842 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5844 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5846 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5847 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5849 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5851 case CHIP_POLARIS11:
5852 case CHIP_POLARIS12:
5853 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5854 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5856 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5858 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5859 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5861 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5863 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5864 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5866 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5875 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5876 uint32_t reg_addr, uint32_t cmd)
5880 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5882 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5883 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5885 data = RREG32(mmRLC_SERDES_WR_CTRL);
5886 if (adev->asic_type == CHIP_STONEY)
5887 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5888 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5889 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5890 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5891 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5892 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5893 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5894 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5895 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5897 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5898 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5899 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5900 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5901 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5902 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5903 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5904 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5905 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5906 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5907 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5908 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5909 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5910 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5911 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5913 WREG32(mmRLC_SERDES_WR_CTRL, data);
5916 #define MSG_ENTER_RLC_SAFE_MODE 1
5917 #define MSG_EXIT_RLC_SAFE_MODE 0
5918 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5919 #define RLC_GPR_REG2__REQ__SHIFT 0
5920 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5921 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5923 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5928 data = RREG32(mmRLC_CNTL);
5929 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5932 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5933 data |= RLC_SAFE_MODE__CMD_MASK;
5934 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5935 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5936 WREG32(mmRLC_SAFE_MODE, data);
5938 for (i = 0; i < adev->usec_timeout; i++) {
5939 if ((RREG32(mmRLC_GPM_STAT) &
5940 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5941 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5942 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5943 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5948 for (i = 0; i < adev->usec_timeout; i++) {
5949 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5953 adev->gfx.rlc.in_safe_mode = true;
5957 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5962 data = RREG32(mmRLC_CNTL);
5963 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5966 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5967 if (adev->gfx.rlc.in_safe_mode) {
5968 data |= RLC_SAFE_MODE__CMD_MASK;
5969 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5970 WREG32(mmRLC_SAFE_MODE, data);
5971 adev->gfx.rlc.in_safe_mode = false;
5975 for (i = 0; i < adev->usec_timeout; i++) {
5976 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5982 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5983 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5984 .exit_safe_mode = iceland_exit_rlc_safe_mode
5987 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5990 uint32_t temp, data;
5992 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5994 /* It is disabled by HW by default */
5995 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5996 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5997 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5998 /* 1 - RLC memory Light sleep */
5999 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6001 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6002 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6005 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6006 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6007 if (adev->flags & AMD_IS_APU)
6008 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6009 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6010 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6012 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6013 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6014 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6015 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6018 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6020 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6021 gfx_v8_0_wait_for_rlc_serdes(adev);
6023 /* 5 - clear mgcg override */
6024 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6026 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6027 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6028 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6029 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6030 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6031 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6032 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6033 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6034 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6035 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6036 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6037 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6039 WREG32(mmCGTS_SM_CTRL_REG, data);
6043 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6044 gfx_v8_0_wait_for_rlc_serdes(adev);
6046 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6047 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6048 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6049 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6050 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6051 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6053 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6055 /* 2 - disable MGLS in RLC */
6056 data = RREG32(mmRLC_MEM_SLP_CNTL);
6057 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6058 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6059 WREG32(mmRLC_MEM_SLP_CNTL, data);
6062 /* 3 - disable MGLS in CP */
6063 data = RREG32(mmCP_MEM_SLP_CNTL);
6064 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6065 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6066 WREG32(mmCP_MEM_SLP_CNTL, data);
6069 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6070 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6071 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6072 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6074 WREG32(mmCGTS_SM_CTRL_REG, data);
6076 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6077 gfx_v8_0_wait_for_rlc_serdes(adev);
6079 /* 6 - set mgcg override */
6080 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6084 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6085 gfx_v8_0_wait_for_rlc_serdes(adev);
6088 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6091 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6094 uint32_t temp, temp1, data, data1;
6096 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6098 adev->gfx.rlc.funcs->enter_safe_mode(adev);
6100 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6101 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6102 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6104 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6106 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6107 gfx_v8_0_wait_for_rlc_serdes(adev);
6109 /* 2 - clear cgcg override */
6110 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6112 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6113 gfx_v8_0_wait_for_rlc_serdes(adev);
6115 /* 3 - write cmd to set CGLS */
6116 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6118 /* 4 - enable cgcg */
6119 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6121 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6123 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6125 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6126 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6129 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6131 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6135 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6137 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6138 * Cmp_busy/GFX_Idle interrupts
6140 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6142 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6143 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6146 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6147 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6148 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6150 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6152 /* read gfx register to wake up cgcg */
6153 RREG32(mmCB_CGTT_SCLK_CTRL);
6154 RREG32(mmCB_CGTT_SCLK_CTRL);
6155 RREG32(mmCB_CGTT_SCLK_CTRL);
6156 RREG32(mmCB_CGTT_SCLK_CTRL);
6158 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6159 gfx_v8_0_wait_for_rlc_serdes(adev);
6161 /* write cmd to Set CGCG Overrride */
6162 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6164 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6165 gfx_v8_0_wait_for_rlc_serdes(adev);
6167 /* write cmd to Clear CGLS */
6168 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6170 /* disable cgcg, cgls should be disabled too. */
6171 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6172 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6174 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6177 gfx_v8_0_wait_for_rlc_serdes(adev);
6179 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6181 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6185 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6186 * === MGCG + MGLS + TS(CG/LS) ===
6188 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6189 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6191 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6192 * === CGCG + CGLS ===
6194 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6195 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6200 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6201 enum amd_clockgating_state state)
6203 uint32_t msg_id, pp_state = 0;
6204 uint32_t pp_support_state = 0;
6205 void *pp_handle = adev->powerplay.pp_handle;
6207 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6208 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6209 pp_support_state = PP_STATE_SUPPORT_LS;
6210 pp_state = PP_STATE_LS;
6212 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6213 pp_support_state |= PP_STATE_SUPPORT_CG;
6214 pp_state |= PP_STATE_CG;
6216 if (state == AMD_CG_STATE_UNGATE)
6219 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6223 amd_set_clockgating_by_smu(pp_handle, msg_id);
6226 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6227 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6228 pp_support_state = PP_STATE_SUPPORT_LS;
6229 pp_state = PP_STATE_LS;
6232 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6233 pp_support_state |= PP_STATE_SUPPORT_CG;
6234 pp_state |= PP_STATE_CG;
6237 if (state == AMD_CG_STATE_UNGATE)
6240 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6244 amd_set_clockgating_by_smu(pp_handle, msg_id);
6250 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6251 enum amd_clockgating_state state)
6254 uint32_t msg_id, pp_state = 0;
6255 uint32_t pp_support_state = 0;
6256 void *pp_handle = adev->powerplay.pp_handle;
6258 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6259 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6260 pp_support_state = PP_STATE_SUPPORT_LS;
6261 pp_state = PP_STATE_LS;
6263 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6264 pp_support_state |= PP_STATE_SUPPORT_CG;
6265 pp_state |= PP_STATE_CG;
6267 if (state == AMD_CG_STATE_UNGATE)
6270 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6274 amd_set_clockgating_by_smu(pp_handle, msg_id);
6277 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6278 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6279 pp_support_state = PP_STATE_SUPPORT_LS;
6280 pp_state = PP_STATE_LS;
6282 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6283 pp_support_state |= PP_STATE_SUPPORT_CG;
6284 pp_state |= PP_STATE_CG;
6286 if (state == AMD_CG_STATE_UNGATE)
6289 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6293 amd_set_clockgating_by_smu(pp_handle, msg_id);
6296 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6297 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6298 pp_support_state = PP_STATE_SUPPORT_LS;
6299 pp_state = PP_STATE_LS;
6302 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6303 pp_support_state |= PP_STATE_SUPPORT_CG;
6304 pp_state |= PP_STATE_CG;
6307 if (state == AMD_CG_STATE_UNGATE)
6310 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6314 amd_set_clockgating_by_smu(pp_handle, msg_id);
6317 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6318 pp_support_state = PP_STATE_SUPPORT_LS;
6320 if (state == AMD_CG_STATE_UNGATE)
6323 pp_state = PP_STATE_LS;
6325 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6329 amd_set_clockgating_by_smu(pp_handle, msg_id);
6332 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6333 pp_support_state = PP_STATE_SUPPORT_LS;
6335 if (state == AMD_CG_STATE_UNGATE)
6338 pp_state = PP_STATE_LS;
6339 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6343 amd_set_clockgating_by_smu(pp_handle, msg_id);
6349 static int gfx_v8_0_set_clockgating_state(void *handle,
6350 enum amd_clockgating_state state)
6352 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6354 switch (adev->asic_type) {
6358 gfx_v8_0_update_gfx_clock_gating(adev,
6359 state == AMD_CG_STATE_GATE ? true : false);
6362 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6364 case CHIP_POLARIS10:
6365 case CHIP_POLARIS11:
6366 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6374 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6376 return ring->adev->wb.wb[ring->rptr_offs];
6379 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6381 struct amdgpu_device *adev = ring->adev;
6383 if (ring->use_doorbell)
6384 /* XXX check if swapping is necessary on BE */
6385 return ring->adev->wb.wb[ring->wptr_offs];
6387 return RREG32(mmCP_RB0_WPTR);
6390 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6392 struct amdgpu_device *adev = ring->adev;
6394 if (ring->use_doorbell) {
6395 /* XXX check if swapping is necessary on BE */
6396 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6397 WDOORBELL32(ring->doorbell_index, ring->wptr);
6399 WREG32(mmCP_RB0_WPTR, ring->wptr);
6400 (void)RREG32(mmCP_RB0_WPTR);
6404 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6406 u32 ref_and_mask, reg_mem_engine;
6408 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6409 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6412 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6415 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6422 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6423 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6426 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6427 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6428 WAIT_REG_MEM_FUNCTION(3) | /* == */
6430 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6431 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6432 amdgpu_ring_write(ring, ref_and_mask);
6433 amdgpu_ring_write(ring, ref_and_mask);
6434 amdgpu_ring_write(ring, 0x20); /* poll interval */
6437 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6439 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6440 amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6443 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6444 amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6449 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6451 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6452 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6453 WRITE_DATA_DST_SEL(0) |
6455 amdgpu_ring_write(ring, mmHDP_DEBUG0);
6456 amdgpu_ring_write(ring, 0);
6457 amdgpu_ring_write(ring, 1);
6461 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6462 struct amdgpu_ib *ib,
6463 unsigned vm_id, bool ctx_switch)
6465 u32 header, control = 0;
6467 if (ib->flags & AMDGPU_IB_FLAG_CE)
6468 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6470 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6472 control |= ib->length_dw | (vm_id << 24);
6474 amdgpu_ring_write(ring, header);
6475 amdgpu_ring_write(ring,
6479 (ib->gpu_addr & 0xFFFFFFFC));
6480 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6481 amdgpu_ring_write(ring, control);
6484 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6485 struct amdgpu_ib *ib,
6486 unsigned vm_id, bool ctx_switch)
6488 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6490 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6491 amdgpu_ring_write(ring,
6495 (ib->gpu_addr & 0xFFFFFFFC));
6496 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6497 amdgpu_ring_write(ring, control);
6500 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6501 u64 seq, unsigned flags)
6503 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6504 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6506 /* EVENT_WRITE_EOP - flush caches, send int */
6507 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6508 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6510 EOP_TC_WB_ACTION_EN |
6511 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6513 amdgpu_ring_write(ring, addr & 0xfffffffc);
6514 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6515 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6516 amdgpu_ring_write(ring, lower_32_bits(seq));
6517 amdgpu_ring_write(ring, upper_32_bits(seq));
6521 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6523 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6524 uint32_t seq = ring->fence_drv.sync_seq;
6525 uint64_t addr = ring->fence_drv.gpu_addr;
6527 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6528 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6529 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6530 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6531 amdgpu_ring_write(ring, addr & 0xfffffffc);
6532 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6533 amdgpu_ring_write(ring, seq);
6534 amdgpu_ring_write(ring, 0xffffffff);
6535 amdgpu_ring_write(ring, 4); /* poll interval */
6538 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6539 unsigned vm_id, uint64_t pd_addr)
6541 int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6543 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6544 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6545 WRITE_DATA_DST_SEL(0)) |
6548 amdgpu_ring_write(ring,
6549 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6551 amdgpu_ring_write(ring,
6552 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6554 amdgpu_ring_write(ring, 0);
6555 amdgpu_ring_write(ring, pd_addr >> 12);
6557 /* bits 0-15 are the VM contexts0-15 */
6558 /* invalidate the cache */
6559 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6560 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6561 WRITE_DATA_DST_SEL(0)));
6562 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6563 amdgpu_ring_write(ring, 0);
6564 amdgpu_ring_write(ring, 1 << vm_id);
6566 /* wait for the invalidate to complete */
6567 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6568 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6569 WAIT_REG_MEM_FUNCTION(0) | /* always */
6570 WAIT_REG_MEM_ENGINE(0))); /* me */
6571 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6572 amdgpu_ring_write(ring, 0);
6573 amdgpu_ring_write(ring, 0); /* ref */
6574 amdgpu_ring_write(ring, 0); /* mask */
6575 amdgpu_ring_write(ring, 0x20); /* poll interval */
6577 /* compute doesn't have PFP */
6579 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6580 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6581 amdgpu_ring_write(ring, 0x0);
6582 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6583 amdgpu_ring_insert_nop(ring, 128);
6587 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6589 return ring->adev->wb.wb[ring->wptr_offs];
6592 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6594 struct amdgpu_device *adev = ring->adev;
6596 /* XXX check if swapping is necessary on BE */
6597 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6598 WDOORBELL32(ring->doorbell_index, ring->wptr);
6601 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6605 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6606 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6608 /* RELEASE_MEM - flush caches, send int */
6609 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6610 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6612 EOP_TC_WB_ACTION_EN |
6613 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6615 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6616 amdgpu_ring_write(ring, addr & 0xfffffffc);
6617 amdgpu_ring_write(ring, upper_32_bits(addr));
6618 amdgpu_ring_write(ring, lower_32_bits(seq));
6619 amdgpu_ring_write(ring, upper_32_bits(seq));
6622 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6623 u64 seq, unsigned int flags)
6625 /* we only allocate 32bit for each seq wb address */
6626 if (flags & AMDGPU_FENCE_FLAG_64BIT)
6629 /* write fence seq to the "addr" */
6630 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6631 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6632 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6633 amdgpu_ring_write(ring, lower_32_bits(addr));
6634 amdgpu_ring_write(ring, upper_32_bits(addr));
6635 amdgpu_ring_write(ring, lower_32_bits(seq));
6637 if (flags & AMDGPU_FENCE_FLAG_INT) {
6638 /* set register to trigger INT */
6639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6641 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6642 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6643 amdgpu_ring_write(ring, 0);
6644 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6648 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6650 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6651 amdgpu_ring_write(ring, 0);
6654 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6658 dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6659 if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6660 gfx_v8_0_ring_emit_vgt_flush(ring);
6661 /* set load_global_config & load_global_uconfig */
6663 /* set load_cs_sh_regs */
6665 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6668 /* set load_ce_ram if preamble presented */
6669 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6672 /* still load_ce_ram if this is the first time preamble presented
6673 * although there is no context switch happens.
6675 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6679 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6680 amdgpu_ring_write(ring, dw2);
6681 amdgpu_ring_write(ring, 0);
6684 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6685 enum amdgpu_interrupt_state state)
6687 WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6688 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6691 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6693 enum amdgpu_interrupt_state state)
6696 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6697 * handles the setting of interrupts for this specific pipe. All other
6698 * pipes' interrupts are set by amdkfd.
6706 DRM_DEBUG("invalid pipe %d\n", pipe);
6710 DRM_DEBUG("invalid me %d\n", me);
6714 WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6715 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6718 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6719 struct amdgpu_irq_src *source,
6721 enum amdgpu_interrupt_state state)
6723 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6724 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6729 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6730 struct amdgpu_irq_src *source,
6732 enum amdgpu_interrupt_state state)
6734 WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6735 state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6740 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6741 struct amdgpu_irq_src *src,
6743 enum amdgpu_interrupt_state state)
6746 case AMDGPU_CP_IRQ_GFX_EOP:
6747 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6749 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6750 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6752 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6753 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6755 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6756 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6758 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6759 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6761 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6762 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6764 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6765 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6767 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6768 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6770 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6771 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6779 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6780 struct amdgpu_irq_src *source,
6781 struct amdgpu_iv_entry *entry)
6784 u8 me_id, pipe_id, queue_id;
6785 struct amdgpu_ring *ring;
6787 DRM_DEBUG("IH: CP EOP\n");
6788 me_id = (entry->ring_id & 0x0c) >> 2;
6789 pipe_id = (entry->ring_id & 0x03) >> 0;
6790 queue_id = (entry->ring_id & 0x70) >> 4;
6794 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6798 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6799 ring = &adev->gfx.compute_ring[i];
6800 /* Per-queue interrupt is supported for MEC starting from VI.
6801 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6803 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6804 amdgpu_fence_process(ring);
6811 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6812 struct amdgpu_irq_src *source,
6813 struct amdgpu_iv_entry *entry)
6815 DRM_ERROR("Illegal register access in command stream\n");
6816 schedule_work(&adev->reset_work);
6820 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6821 struct amdgpu_irq_src *source,
6822 struct amdgpu_iv_entry *entry)
6824 DRM_ERROR("Illegal instruction in command stream\n");
6825 schedule_work(&adev->reset_work);
6829 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6830 struct amdgpu_irq_src *src,
6832 enum amdgpu_interrupt_state state)
6834 uint32_t tmp, target;
6835 struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6837 BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6840 target = mmCP_ME1_PIPE0_INT_CNTL;
6842 target = mmCP_ME2_PIPE0_INT_CNTL;
6843 target += ring->pipe;
6846 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6847 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6848 tmp = RREG32(mmCPC_INT_CNTL);
6849 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6850 GENERIC2_INT_ENABLE, 0);
6851 WREG32(mmCPC_INT_CNTL, tmp);
6853 tmp = RREG32(target);
6854 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6855 GENERIC2_INT_ENABLE, 0);
6856 WREG32(target, tmp);
6858 tmp = RREG32(mmCPC_INT_CNTL);
6859 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6860 GENERIC2_INT_ENABLE, 1);
6861 WREG32(mmCPC_INT_CNTL, tmp);
6863 tmp = RREG32(target);
6864 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6865 GENERIC2_INT_ENABLE, 1);
6866 WREG32(target, tmp);
6870 BUG(); /* kiq only support GENERIC2_INT now */
6876 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6877 struct amdgpu_irq_src *source,
6878 struct amdgpu_iv_entry *entry)
6880 u8 me_id, pipe_id, queue_id;
6881 struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6883 BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6885 me_id = (entry->ring_id & 0x0c) >> 2;
6886 pipe_id = (entry->ring_id & 0x03) >> 0;
6887 queue_id = (entry->ring_id & 0x70) >> 4;
6888 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6889 me_id, pipe_id, queue_id);
6891 amdgpu_fence_process(ring);
6895 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6897 .early_init = gfx_v8_0_early_init,
6898 .late_init = gfx_v8_0_late_init,
6899 .sw_init = gfx_v8_0_sw_init,
6900 .sw_fini = gfx_v8_0_sw_fini,
6901 .hw_init = gfx_v8_0_hw_init,
6902 .hw_fini = gfx_v8_0_hw_fini,
6903 .suspend = gfx_v8_0_suspend,
6904 .resume = gfx_v8_0_resume,
6905 .is_idle = gfx_v8_0_is_idle,
6906 .wait_for_idle = gfx_v8_0_wait_for_idle,
6907 .check_soft_reset = gfx_v8_0_check_soft_reset,
6908 .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6909 .soft_reset = gfx_v8_0_soft_reset,
6910 .post_soft_reset = gfx_v8_0_post_soft_reset,
6911 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6912 .set_powergating_state = gfx_v8_0_set_powergating_state,
6915 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6916 .type = AMDGPU_RING_TYPE_GFX,
6918 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6919 .get_rptr = gfx_v8_0_ring_get_rptr,
6920 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6921 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6923 20 + /* gfx_v8_0_ring_emit_gds_switch */
6924 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6925 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6926 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6927 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6928 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6929 2 + /* gfx_v8_ring_emit_sb */
6930 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6931 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6932 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6933 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6934 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6935 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6936 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6937 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6938 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6939 .test_ring = gfx_v8_0_ring_test_ring,
6940 .test_ib = gfx_v8_0_ring_test_ib,
6941 .insert_nop = amdgpu_ring_insert_nop,
6942 .pad_ib = amdgpu_ring_generic_pad_ib,
6943 .emit_switch_buffer = gfx_v8_ring_emit_sb,
6944 .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6947 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6948 .type = AMDGPU_RING_TYPE_COMPUTE,
6950 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6951 .get_rptr = gfx_v8_0_ring_get_rptr,
6952 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6953 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6955 20 + /* gfx_v8_0_ring_emit_gds_switch */
6956 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6957 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6958 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6959 17 + /* gfx_v8_0_ring_emit_vm_flush */
6960 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6961 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6962 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6963 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6964 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6965 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6966 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6967 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6968 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6969 .test_ring = gfx_v8_0_ring_test_ring,
6970 .test_ib = gfx_v8_0_ring_test_ib,
6971 .insert_nop = amdgpu_ring_insert_nop,
6972 .pad_ib = amdgpu_ring_generic_pad_ib,
6975 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6976 .type = AMDGPU_RING_TYPE_KIQ,
6978 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6979 .get_rptr = gfx_v8_0_ring_get_rptr,
6980 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6981 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6983 20 + /* gfx_v8_0_ring_emit_gds_switch */
6984 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6985 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6986 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6987 17 + /* gfx_v8_0_ring_emit_vm_flush */
6988 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6989 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6990 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6991 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6992 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6993 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6994 .test_ring = gfx_v8_0_ring_test_ring,
6995 .test_ib = gfx_v8_0_ring_test_ib,
6996 .insert_nop = amdgpu_ring_insert_nop,
6997 .pad_ib = amdgpu_ring_generic_pad_ib,
7000 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7004 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7006 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7007 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7009 for (i = 0; i < adev->gfx.num_compute_rings; i++)
7010 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7013 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7014 .set = gfx_v8_0_set_eop_interrupt_state,
7015 .process = gfx_v8_0_eop_irq,
7018 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7019 .set = gfx_v8_0_set_priv_reg_fault_state,
7020 .process = gfx_v8_0_priv_reg_irq,
7023 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7024 .set = gfx_v8_0_set_priv_inst_fault_state,
7025 .process = gfx_v8_0_priv_inst_irq,
7028 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7029 .set = gfx_v8_0_kiq_set_interrupt_state,
7030 .process = gfx_v8_0_kiq_irq,
7033 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7035 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7036 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7038 adev->gfx.priv_reg_irq.num_types = 1;
7039 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7041 adev->gfx.priv_inst_irq.num_types = 1;
7042 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7044 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7045 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7048 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7050 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7053 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7055 /* init asci gds info */
7056 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7057 adev->gds.gws.total_size = 64;
7058 adev->gds.oa.total_size = 16;
7060 if (adev->gds.mem.total_size == 64 * 1024) {
7061 adev->gds.mem.gfx_partition_size = 4096;
7062 adev->gds.mem.cs_partition_size = 4096;
7064 adev->gds.gws.gfx_partition_size = 4;
7065 adev->gds.gws.cs_partition_size = 4;
7067 adev->gds.oa.gfx_partition_size = 4;
7068 adev->gds.oa.cs_partition_size = 1;
7070 adev->gds.mem.gfx_partition_size = 1024;
7071 adev->gds.mem.cs_partition_size = 1024;
7073 adev->gds.gws.gfx_partition_size = 16;
7074 adev->gds.gws.cs_partition_size = 16;
7076 adev->gds.oa.gfx_partition_size = 4;
7077 adev->gds.oa.cs_partition_size = 4;
7081 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7089 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7090 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7092 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7095 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7099 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7100 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7102 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7104 return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7107 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7109 int i, j, k, counter, active_cu_number = 0;
7110 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7111 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7112 unsigned disable_masks[4 * 2];
7114 memset(cu_info, 0, sizeof(*cu_info));
7116 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7118 mutex_lock(&adev->grbm_idx_mutex);
7119 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7120 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7124 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7126 gfx_v8_0_set_user_cu_inactive_bitmap(
7127 adev, disable_masks[i * 2 + j]);
7128 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7129 cu_info->bitmap[i][j] = bitmap;
7131 for (k = 0; k < 16; k ++) {
7132 if (bitmap & mask) {
7139 active_cu_number += counter;
7140 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7143 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7144 mutex_unlock(&adev->grbm_idx_mutex);
7146 cu_info->number = active_cu_number;
7147 cu_info->ao_cu_mask = ao_cu_mask;
7150 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7152 .type = AMD_IP_BLOCK_TYPE_GFX,
7156 .funcs = &gfx_v8_0_ip_funcs,
7159 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7161 .type = AMD_IP_BLOCK_TYPE_GFX,
7165 .funcs = &gfx_v8_0_ip_funcs,