OSDN Git Service

d604ba37541f3a52eb38fca27baf4f10ebf290b1
[tomoyo/tomoyo-test1.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vi_structs.h"
29 #include "vid.h"
30 #include "amdgpu_ucode.h"
31 #include "amdgpu_atombios.h"
32 #include "atombios_i2c.h"
33 #include "clearstate_vi.h"
34
35 #include "gmc/gmc_8_2_d.h"
36 #include "gmc/gmc_8_2_sh_mask.h"
37
38 #include "oss/oss_3_0_d.h"
39 #include "oss/oss_3_0_sh_mask.h"
40
41 #include "bif/bif_5_0_d.h"
42 #include "bif/bif_5_0_sh_mask.h"
43
44 #include "gca/gfx_8_0_d.h"
45 #include "gca/gfx_8_0_enum.h"
46 #include "gca/gfx_8_0_sh_mask.h"
47 #include "gca/gfx_8_0_enum.h"
48
49 #include "dce/dce_10_0_d.h"
50 #include "dce/dce_10_0_sh_mask.h"
51
52 #include "smu/smu_7_1_3_d.h"
53
54 #define GFX8_NUM_GFX_RINGS     1
55 #define GFX8_NUM_COMPUTE_RINGS 8
56
57 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
58 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
59 #define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
60 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
61
62 #define ARRAY_MODE(x)                                   ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
63 #define PIPE_CONFIG(x)                                  ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
64 #define TILE_SPLIT(x)                                   ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
65 #define MICRO_TILE_MODE_NEW(x)                          ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
66 #define SAMPLE_SPLIT(x)                                 ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
67 #define BANK_WIDTH(x)                                   ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
68 #define BANK_HEIGHT(x)                                  ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
69 #define MACRO_TILE_ASPECT(x)                            ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
70 #define NUM_BANKS(x)                                    ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
71
72 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK            0x00000001L
73 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK            0x00000002L
74 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK           0x00000004L
75 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK           0x00000008L
76 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK           0x00000010L
77 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK           0x00000020L
78
79 /* BPM SERDES CMD */
80 #define SET_BPM_SERDES_CMD    1
81 #define CLE_BPM_SERDES_CMD    0
82
83 /* BPM Register Address*/
84 enum {
85         BPM_REG_CGLS_EN = 0,        /* Enable/Disable CGLS */
86         BPM_REG_CGLS_ON,            /* ON/OFF CGLS: shall be controlled by RLC FW */
87         BPM_REG_CGCG_OVERRIDE,      /* Set/Clear CGCG Override */
88         BPM_REG_MGCG_OVERRIDE,      /* Set/Clear MGCG Override */
89         BPM_REG_FGCG_OVERRIDE,      /* Set/Clear FGCG Override */
90         BPM_REG_FGCG_MAX
91 };
92
93 #define RLC_FormatDirectRegListLength        14
94
95 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
96 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
97 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
98 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
99 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
100 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
101
102 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
103 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
105 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
106 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
107
108 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
109 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
110 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
111 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
112 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
113 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
114
115 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
116 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
117 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
118 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
119 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
120
121 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
122 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
123 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
124 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
125 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
126 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
127
128 MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
129 MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
130 MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
131 MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
132 MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
133 MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
134
135 MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
136 MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
137 MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
138 MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
139 MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
140 MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
141
142 MODULE_FIRMWARE("amdgpu/polaris12_ce.bin");
143 MODULE_FIRMWARE("amdgpu/polaris12_pfp.bin");
144 MODULE_FIRMWARE("amdgpu/polaris12_me.bin");
145 MODULE_FIRMWARE("amdgpu/polaris12_mec.bin");
146 MODULE_FIRMWARE("amdgpu/polaris12_mec2.bin");
147 MODULE_FIRMWARE("amdgpu/polaris12_rlc.bin");
148
149 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
150 {
151         {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
152         {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
153         {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
154         {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
155         {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
156         {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
157         {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
158         {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
159         {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
160         {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
161         {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
162         {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
163         {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
164         {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
165         {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
166         {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
167 };
168
169 static const u32 golden_settings_tonga_a11[] =
170 {
171         mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
172         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
173         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
174         mmGB_GPU_ID, 0x0000000f, 0x00000000,
175         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
176         mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
177         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
178         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
179         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
180         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
181         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
182         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
183         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
184         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
185         mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
186         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
187 };
188
189 static const u32 tonga_golden_common_all[] =
190 {
191         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
192         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
193         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
194         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
195         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
196         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
197         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
198         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
199 };
200
201 static const u32 tonga_mgcg_cgcg_init[] =
202 {
203         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
204         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
205         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
206         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
207         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
208         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
209         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
210         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
211         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
212         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
213         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
214         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
215         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
216         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
217         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
218         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
219         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
220         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
221         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
222         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
223         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
224         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
225         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
226         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
227         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
228         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
229         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
230         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
231         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
232         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
233         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
234         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
237         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
242         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
245         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
246         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
247         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
248         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
249         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
250         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
251         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
252         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
253         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
254         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
255         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
256         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
257         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
258         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
259         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
260         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
261         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
262         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
263         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
264         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
265         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
266         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
267         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
268         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
269         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
270         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
271         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
272         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
273         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
274         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
275         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
276         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
277         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
278 };
279
280 static const u32 golden_settings_polaris11_a11[] =
281 {
282         mmCB_HW_CONTROL, 0x0000f3cf, 0x00007208,
283         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
284         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
285         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
286         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
287         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
288         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
289         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
290         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
291         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
292         mmSQ_CONFIG, 0x07f80000, 0x01180000,
293         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
294         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
295         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
296         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
297         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
298         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
299 };
300
301 static const u32 polaris11_golden_common_all[] =
302 {
303         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
304         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
305         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
306         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
307         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
308         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
309 };
310
311 static const u32 golden_settings_polaris10_a11[] =
312 {
313         mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
314         mmCB_HW_CONTROL, 0x0001f3cf, 0x00007208,
315         mmCB_HW_CONTROL_2, 0x0f000000, 0x0f000000,
316         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
317         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
319         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
320         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
321         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
322         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
323         mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
324         mmSQ_CONFIG, 0x07f80000, 0x07180000,
325         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
326         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
327         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
328         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
329         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
330 };
331
332 static const u32 polaris10_golden_common_all[] =
333 {
334         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
335         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
336         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
337         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
338         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
339         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
340         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
341         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
342 };
343
344 static const u32 fiji_golden_common_all[] =
345 {
346         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
347         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
348         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
349         mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
350         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
351         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
352         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
353         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
354         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
355         mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
356 };
357
358 static const u32 golden_settings_fiji_a10[] =
359 {
360         mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
361         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
362         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
363         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
364         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
365         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
366         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
367         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
368         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
369         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
370         mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
371 };
372
373 static const u32 fiji_mgcg_cgcg_init[] =
374 {
375         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
376         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
377         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
378         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
379         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
380         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
381         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
382         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
383         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
384         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
385         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
386         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
387         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
388         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
389         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
390         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
391         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
392         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
393         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
394         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
395         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
396         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
397         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
398         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
399         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
400         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
401         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
402         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
403         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
404         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
405         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
406         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
407         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
408         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
409         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
410 };
411
412 static const u32 golden_settings_iceland_a11[] =
413 {
414         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
415         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
416         mmDB_DEBUG3, 0xc0000000, 0xc0000000,
417         mmGB_GPU_ID, 0x0000000f, 0x00000000,
418         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
419         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
420         mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
421         mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
422         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
423         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
424         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
425         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
426         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427         mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
428         mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
429         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
430 };
431
432 static const u32 iceland_golden_common_all[] =
433 {
434         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
435         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
436         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
437         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
438         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
439         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
440         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
441         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
442 };
443
444 static const u32 iceland_mgcg_cgcg_init[] =
445 {
446         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
447         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
448         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
449         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
450         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
451         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
452         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
453         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
454         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
455         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
456         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
457         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
458         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
459         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
460         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
461         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
462         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
463         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
464         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
465         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
466         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
467         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
468         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
469         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
470         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
471         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
472         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
473         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
475         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
476         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
477         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
478         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
479         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
480         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
481         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
482         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
483         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
484         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
485         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
486         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
487         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
488         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
489         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
490         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
491         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
492         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
493         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
494         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
495         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
496         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
497         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
498         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
499         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
500         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
501         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
502         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
503         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
504         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
505         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
506         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
507         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
508         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
509         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
510 };
511
512 static const u32 cz_golden_settings_a11[] =
513 {
514         mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
515         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
516         mmGB_GPU_ID, 0x0000000f, 0x00000000,
517         mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
518         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
519         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0000003c,
520         mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
521         mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
522         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
523         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
524         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
525         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
526 };
527
528 static const u32 cz_golden_common_all[] =
529 {
530         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
532         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
533         mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
534         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
535         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
536         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
537         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
538 };
539
540 static const u32 cz_mgcg_cgcg_init[] =
541 {
542         mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
543         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
544         mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
545         mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
546         mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
547         mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
548         mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
549         mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
550         mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
551         mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
552         mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
553         mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
554         mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
555         mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
556         mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
557         mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
558         mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
559         mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
560         mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
561         mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
562         mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
563         mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
564         mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
565         mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
566         mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
567         mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
568         mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
569         mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
570         mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
571         mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
572         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
573         mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
574         mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
575         mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
576         mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
577         mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
578         mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
579         mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
580         mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
581         mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
582         mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
583         mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
584         mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
585         mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
586         mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
587         mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
588         mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
589         mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
590         mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
591         mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
592         mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
593         mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
594         mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
595         mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
596         mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
597         mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
598         mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
599         mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
600         mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
601         mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
602         mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
603         mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
604         mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
605         mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
606         mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
607         mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
608         mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
609         mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
610         mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
611         mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
612         mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
613         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
614         mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
615         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
616         mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
617 };
618
619 static const u32 stoney_golden_settings_a11[] =
620 {
621         mmDB_DEBUG2, 0xf00fffff, 0x00000400,
622         mmGB_GPU_ID, 0x0000000f, 0x00000000,
623         mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
624         mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
625         mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
626         mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
627         mmTCC_CTRL, 0x00100000, 0xf31fff7f,
628         mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
629         mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
630         mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
631 };
632
633 static const u32 stoney_golden_common_all[] =
634 {
635         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
636         mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
637         mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
638         mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
639         mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
640         mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
641         mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
642         mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
643 };
644
645 static const u32 stoney_mgcg_cgcg_init[] =
646 {
647         mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
648         mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
649         mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
650         mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
651         mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
652 };
653
654 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
655 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
656 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
657 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
658 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
659 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
660
661 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
662 {
663         switch (adev->asic_type) {
664         case CHIP_TOPAZ:
665                 amdgpu_program_register_sequence(adev,
666                                                  iceland_mgcg_cgcg_init,
667                                                  (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
668                 amdgpu_program_register_sequence(adev,
669                                                  golden_settings_iceland_a11,
670                                                  (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
671                 amdgpu_program_register_sequence(adev,
672                                                  iceland_golden_common_all,
673                                                  (const u32)ARRAY_SIZE(iceland_golden_common_all));
674                 break;
675         case CHIP_FIJI:
676                 amdgpu_program_register_sequence(adev,
677                                                  fiji_mgcg_cgcg_init,
678                                                  (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
679                 amdgpu_program_register_sequence(adev,
680                                                  golden_settings_fiji_a10,
681                                                  (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
682                 amdgpu_program_register_sequence(adev,
683                                                  fiji_golden_common_all,
684                                                  (const u32)ARRAY_SIZE(fiji_golden_common_all));
685                 break;
686
687         case CHIP_TONGA:
688                 amdgpu_program_register_sequence(adev,
689                                                  tonga_mgcg_cgcg_init,
690                                                  (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
691                 amdgpu_program_register_sequence(adev,
692                                                  golden_settings_tonga_a11,
693                                                  (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
694                 amdgpu_program_register_sequence(adev,
695                                                  tonga_golden_common_all,
696                                                  (const u32)ARRAY_SIZE(tonga_golden_common_all));
697                 break;
698         case CHIP_POLARIS11:
699         case CHIP_POLARIS12:
700                 amdgpu_program_register_sequence(adev,
701                                                  golden_settings_polaris11_a11,
702                                                  (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
703                 amdgpu_program_register_sequence(adev,
704                                                  polaris11_golden_common_all,
705                                                  (const u32)ARRAY_SIZE(polaris11_golden_common_all));
706                 break;
707         case CHIP_POLARIS10:
708                 amdgpu_program_register_sequence(adev,
709                                                  golden_settings_polaris10_a11,
710                                                  (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
711                 amdgpu_program_register_sequence(adev,
712                                                  polaris10_golden_common_all,
713                                                  (const u32)ARRAY_SIZE(polaris10_golden_common_all));
714                 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
715                 if (adev->pdev->revision == 0xc7 &&
716                     ((adev->pdev->subsystem_device == 0xb37 && adev->pdev->subsystem_vendor == 0x1002) ||
717                      (adev->pdev->subsystem_device == 0x4a8 && adev->pdev->subsystem_vendor == 0x1043) ||
718                      (adev->pdev->subsystem_device == 0x9480 && adev->pdev->subsystem_vendor == 0x1682))) {
719                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
720                         amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
721                 }
722                 break;
723         case CHIP_CARRIZO:
724                 amdgpu_program_register_sequence(adev,
725                                                  cz_mgcg_cgcg_init,
726                                                  (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
727                 amdgpu_program_register_sequence(adev,
728                                                  cz_golden_settings_a11,
729                                                  (const u32)ARRAY_SIZE(cz_golden_settings_a11));
730                 amdgpu_program_register_sequence(adev,
731                                                  cz_golden_common_all,
732                                                  (const u32)ARRAY_SIZE(cz_golden_common_all));
733                 break;
734         case CHIP_STONEY:
735                 amdgpu_program_register_sequence(adev,
736                                                  stoney_mgcg_cgcg_init,
737                                                  (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
738                 amdgpu_program_register_sequence(adev,
739                                                  stoney_golden_settings_a11,
740                                                  (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
741                 amdgpu_program_register_sequence(adev,
742                                                  stoney_golden_common_all,
743                                                  (const u32)ARRAY_SIZE(stoney_golden_common_all));
744                 break;
745         default:
746                 break;
747         }
748 }
749
750 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
751 {
752         int i;
753
754         adev->gfx.scratch.num_reg = 7;
755         adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
756         for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
757                 adev->gfx.scratch.free[i] = true;
758                 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
759         }
760 }
761
762 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
763 {
764         struct amdgpu_device *adev = ring->adev;
765         uint32_t scratch;
766         uint32_t tmp = 0;
767         unsigned i;
768         int r;
769
770         r = amdgpu_gfx_scratch_get(adev, &scratch);
771         if (r) {
772                 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
773                 return r;
774         }
775         WREG32(scratch, 0xCAFEDEAD);
776         r = amdgpu_ring_alloc(ring, 3);
777         if (r) {
778                 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
779                           ring->idx, r);
780                 amdgpu_gfx_scratch_free(adev, scratch);
781                 return r;
782         }
783         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
784         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
785         amdgpu_ring_write(ring, 0xDEADBEEF);
786         amdgpu_ring_commit(ring);
787
788         for (i = 0; i < adev->usec_timeout; i++) {
789                 tmp = RREG32(scratch);
790                 if (tmp == 0xDEADBEEF)
791                         break;
792                 DRM_UDELAY(1);
793         }
794         if (i < adev->usec_timeout) {
795                 DRM_INFO("ring test on %d succeeded in %d usecs\n",
796                          ring->idx, i);
797         } else {
798                 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
799                           ring->idx, scratch, tmp);
800                 r = -EINVAL;
801         }
802         amdgpu_gfx_scratch_free(adev, scratch);
803         return r;
804 }
805
806 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
807 {
808         struct amdgpu_device *adev = ring->adev;
809         struct amdgpu_ib ib;
810         struct dma_fence *f = NULL;
811         uint32_t scratch;
812         uint32_t tmp = 0;
813         long r;
814
815         r = amdgpu_gfx_scratch_get(adev, &scratch);
816         if (r) {
817                 DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r);
818                 return r;
819         }
820         WREG32(scratch, 0xCAFEDEAD);
821         memset(&ib, 0, sizeof(ib));
822         r = amdgpu_ib_get(adev, NULL, 256, &ib);
823         if (r) {
824                 DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
825                 goto err1;
826         }
827         ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
828         ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
829         ib.ptr[2] = 0xDEADBEEF;
830         ib.length_dw = 3;
831
832         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
833         if (r)
834                 goto err2;
835
836         r = dma_fence_wait_timeout(f, false, timeout);
837         if (r == 0) {
838                 DRM_ERROR("amdgpu: IB test timed out.\n");
839                 r = -ETIMEDOUT;
840                 goto err2;
841         } else if (r < 0) {
842                 DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
843                 goto err2;
844         }
845         tmp = RREG32(scratch);
846         if (tmp == 0xDEADBEEF) {
847                 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
848                 r = 0;
849         } else {
850                 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
851                           scratch, tmp);
852                 r = -EINVAL;
853         }
854 err2:
855         amdgpu_ib_free(adev, &ib, NULL);
856         dma_fence_put(f);
857 err1:
858         amdgpu_gfx_scratch_free(adev, scratch);
859         return r;
860 }
861
862
863 static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
864         release_firmware(adev->gfx.pfp_fw);
865         adev->gfx.pfp_fw = NULL;
866         release_firmware(adev->gfx.me_fw);
867         adev->gfx.me_fw = NULL;
868         release_firmware(adev->gfx.ce_fw);
869         adev->gfx.ce_fw = NULL;
870         release_firmware(adev->gfx.rlc_fw);
871         adev->gfx.rlc_fw = NULL;
872         release_firmware(adev->gfx.mec_fw);
873         adev->gfx.mec_fw = NULL;
874         if ((adev->asic_type != CHIP_STONEY) &&
875             (adev->asic_type != CHIP_TOPAZ))
876                 release_firmware(adev->gfx.mec2_fw);
877         adev->gfx.mec2_fw = NULL;
878
879         kfree(adev->gfx.rlc.register_list_format);
880 }
881
882 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
883 {
884         const char *chip_name;
885         char fw_name[30];
886         int err;
887         struct amdgpu_firmware_info *info = NULL;
888         const struct common_firmware_header *header = NULL;
889         const struct gfx_firmware_header_v1_0 *cp_hdr;
890         const struct rlc_firmware_header_v2_0 *rlc_hdr;
891         unsigned int *tmp = NULL, i;
892
893         DRM_DEBUG("\n");
894
895         switch (adev->asic_type) {
896         case CHIP_TOPAZ:
897                 chip_name = "topaz";
898                 break;
899         case CHIP_TONGA:
900                 chip_name = "tonga";
901                 break;
902         case CHIP_CARRIZO:
903                 chip_name = "carrizo";
904                 break;
905         case CHIP_FIJI:
906                 chip_name = "fiji";
907                 break;
908         case CHIP_POLARIS11:
909                 chip_name = "polaris11";
910                 break;
911         case CHIP_POLARIS10:
912                 chip_name = "polaris10";
913                 break;
914         case CHIP_POLARIS12:
915                 chip_name = "polaris12";
916                 break;
917         case CHIP_STONEY:
918                 chip_name = "stoney";
919                 break;
920         default:
921                 BUG();
922         }
923
924         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
925         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
926         if (err)
927                 goto out;
928         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
929         if (err)
930                 goto out;
931         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
932         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
933         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
934
935         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
936         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
937         if (err)
938                 goto out;
939         err = amdgpu_ucode_validate(adev->gfx.me_fw);
940         if (err)
941                 goto out;
942         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
943         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
944         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
945
946         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
947         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
948         if (err)
949                 goto out;
950         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
951         if (err)
952                 goto out;
953         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
954         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
955         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
956
957         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
958         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
959         if (err)
960                 goto out;
961         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
962         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
963         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
964         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
965
966         adev->gfx.rlc.save_and_restore_offset =
967                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
968         adev->gfx.rlc.clear_state_descriptor_offset =
969                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
970         adev->gfx.rlc.avail_scratch_ram_locations =
971                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
972         adev->gfx.rlc.reg_restore_list_size =
973                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
974         adev->gfx.rlc.reg_list_format_start =
975                         le32_to_cpu(rlc_hdr->reg_list_format_start);
976         adev->gfx.rlc.reg_list_format_separate_start =
977                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
978         adev->gfx.rlc.starting_offsets_start =
979                         le32_to_cpu(rlc_hdr->starting_offsets_start);
980         adev->gfx.rlc.reg_list_format_size_bytes =
981                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
982         adev->gfx.rlc.reg_list_size_bytes =
983                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
984
985         adev->gfx.rlc.register_list_format =
986                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
987                                         adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
988
989         if (!adev->gfx.rlc.register_list_format) {
990                 err = -ENOMEM;
991                 goto out;
992         }
993
994         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
995                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
996         for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
997                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
998
999         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1000
1001         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1002                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1003         for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
1004                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1005
1006         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1007         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1008         if (err)
1009                 goto out;
1010         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1011         if (err)
1012                 goto out;
1013         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1014         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1015         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1016
1017         if ((adev->asic_type != CHIP_STONEY) &&
1018             (adev->asic_type != CHIP_TOPAZ)) {
1019                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1020                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1021                 if (!err) {
1022                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1023                         if (err)
1024                                 goto out;
1025                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1026                                 adev->gfx.mec2_fw->data;
1027                         adev->gfx.mec2_fw_version =
1028                                 le32_to_cpu(cp_hdr->header.ucode_version);
1029                         adev->gfx.mec2_feature_version =
1030                                 le32_to_cpu(cp_hdr->ucode_feature_version);
1031                 } else {
1032                         err = 0;
1033                         adev->gfx.mec2_fw = NULL;
1034                 }
1035         }
1036
1037         if (adev->firmware.smu_load) {
1038                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1039                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1040                 info->fw = adev->gfx.pfp_fw;
1041                 header = (const struct common_firmware_header *)info->fw->data;
1042                 adev->firmware.fw_size +=
1043                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1044
1045                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1046                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1047                 info->fw = adev->gfx.me_fw;
1048                 header = (const struct common_firmware_header *)info->fw->data;
1049                 adev->firmware.fw_size +=
1050                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1051
1052                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1053                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1054                 info->fw = adev->gfx.ce_fw;
1055                 header = (const struct common_firmware_header *)info->fw->data;
1056                 adev->firmware.fw_size +=
1057                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1058
1059                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1060                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1061                 info->fw = adev->gfx.rlc_fw;
1062                 header = (const struct common_firmware_header *)info->fw->data;
1063                 adev->firmware.fw_size +=
1064                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1065
1066                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1067                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1068                 info->fw = adev->gfx.mec_fw;
1069                 header = (const struct common_firmware_header *)info->fw->data;
1070                 adev->firmware.fw_size +=
1071                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1072
1073                 /* we need account JT in */
1074                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1075                 adev->firmware.fw_size +=
1076                         ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
1077
1078                 if (amdgpu_sriov_vf(adev)) {
1079                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE];
1080                         info->ucode_id = AMDGPU_UCODE_ID_STORAGE;
1081                         info->fw = adev->gfx.mec_fw;
1082                         adev->firmware.fw_size +=
1083                                 ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE);
1084                 }
1085
1086                 if (adev->gfx.mec2_fw) {
1087                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1088                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1089                         info->fw = adev->gfx.mec2_fw;
1090                         header = (const struct common_firmware_header *)info->fw->data;
1091                         adev->firmware.fw_size +=
1092                                 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1093                 }
1094
1095         }
1096
1097 out:
1098         if (err) {
1099                 dev_err(adev->dev,
1100                         "gfx8: Failed to load firmware \"%s\"\n",
1101                         fw_name);
1102                 release_firmware(adev->gfx.pfp_fw);
1103                 adev->gfx.pfp_fw = NULL;
1104                 release_firmware(adev->gfx.me_fw);
1105                 adev->gfx.me_fw = NULL;
1106                 release_firmware(adev->gfx.ce_fw);
1107                 adev->gfx.ce_fw = NULL;
1108                 release_firmware(adev->gfx.rlc_fw);
1109                 adev->gfx.rlc_fw = NULL;
1110                 release_firmware(adev->gfx.mec_fw);
1111                 adev->gfx.mec_fw = NULL;
1112                 release_firmware(adev->gfx.mec2_fw);
1113                 adev->gfx.mec2_fw = NULL;
1114         }
1115         return err;
1116 }
1117
1118 static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1119                                     volatile u32 *buffer)
1120 {
1121         u32 count = 0, i;
1122         const struct cs_section_def *sect = NULL;
1123         const struct cs_extent_def *ext = NULL;
1124
1125         if (adev->gfx.rlc.cs_data == NULL)
1126                 return;
1127         if (buffer == NULL)
1128                 return;
1129
1130         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1131         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1132
1133         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1134         buffer[count++] = cpu_to_le32(0x80000000);
1135         buffer[count++] = cpu_to_le32(0x80000000);
1136
1137         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1138                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1139                         if (sect->id == SECT_CONTEXT) {
1140                                 buffer[count++] =
1141                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1142                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1143                                                 PACKET3_SET_CONTEXT_REG_START);
1144                                 for (i = 0; i < ext->reg_count; i++)
1145                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1146                         } else {
1147                                 return;
1148                         }
1149                 }
1150         }
1151
1152         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1153         buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1154                         PACKET3_SET_CONTEXT_REG_START);
1155         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config);
1156         buffer[count++] = cpu_to_le32(adev->gfx.config.rb_config[0][0].raster_config_1);
1157
1158         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1159         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1160
1161         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1162         buffer[count++] = cpu_to_le32(0);
1163 }
1164
1165 static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1166 {
1167         const __le32 *fw_data;
1168         volatile u32 *dst_ptr;
1169         int me, i, max_me = 4;
1170         u32 bo_offset = 0;
1171         u32 table_offset, table_size;
1172
1173         if (adev->asic_type == CHIP_CARRIZO)
1174                 max_me = 5;
1175
1176         /* write the cp table buffer */
1177         dst_ptr = adev->gfx.rlc.cp_table_ptr;
1178         for (me = 0; me < max_me; me++) {
1179                 if (me == 0) {
1180                         const struct gfx_firmware_header_v1_0 *hdr =
1181                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1182                         fw_data = (const __le32 *)
1183                                 (adev->gfx.ce_fw->data +
1184                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1185                         table_offset = le32_to_cpu(hdr->jt_offset);
1186                         table_size = le32_to_cpu(hdr->jt_size);
1187                 } else if (me == 1) {
1188                         const struct gfx_firmware_header_v1_0 *hdr =
1189                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1190                         fw_data = (const __le32 *)
1191                                 (adev->gfx.pfp_fw->data +
1192                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1193                         table_offset = le32_to_cpu(hdr->jt_offset);
1194                         table_size = le32_to_cpu(hdr->jt_size);
1195                 } else if (me == 2) {
1196                         const struct gfx_firmware_header_v1_0 *hdr =
1197                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1198                         fw_data = (const __le32 *)
1199                                 (adev->gfx.me_fw->data +
1200                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1201                         table_offset = le32_to_cpu(hdr->jt_offset);
1202                         table_size = le32_to_cpu(hdr->jt_size);
1203                 } else if (me == 3) {
1204                         const struct gfx_firmware_header_v1_0 *hdr =
1205                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1206                         fw_data = (const __le32 *)
1207                                 (adev->gfx.mec_fw->data +
1208                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1209                         table_offset = le32_to_cpu(hdr->jt_offset);
1210                         table_size = le32_to_cpu(hdr->jt_size);
1211                 } else  if (me == 4) {
1212                         const struct gfx_firmware_header_v1_0 *hdr =
1213                                 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1214                         fw_data = (const __le32 *)
1215                                 (adev->gfx.mec2_fw->data +
1216                                  le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1217                         table_offset = le32_to_cpu(hdr->jt_offset);
1218                         table_size = le32_to_cpu(hdr->jt_size);
1219                 }
1220
1221                 for (i = 0; i < table_size; i ++) {
1222                         dst_ptr[bo_offset + i] =
1223                                 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1224                 }
1225
1226                 bo_offset += table_size;
1227         }
1228 }
1229
1230 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1231 {
1232         int r;
1233
1234         /* clear state block */
1235         if (adev->gfx.rlc.clear_state_obj) {
1236                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1237                 if (unlikely(r != 0))
1238                         dev_warn(adev->dev, "(%d) reserve RLC cbs bo failed\n", r);
1239                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1240                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1241                 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1242                 adev->gfx.rlc.clear_state_obj = NULL;
1243         }
1244
1245         /* jump table block */
1246         if (adev->gfx.rlc.cp_table_obj) {
1247                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1248                 if (unlikely(r != 0))
1249                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1250                 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1251                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1252                 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1253                 adev->gfx.rlc.cp_table_obj = NULL;
1254         }
1255 }
1256
1257 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1258 {
1259         volatile u32 *dst_ptr;
1260         u32 dws;
1261         const struct cs_section_def *cs_data;
1262         int r;
1263
1264         adev->gfx.rlc.cs_data = vi_cs_data;
1265
1266         cs_data = adev->gfx.rlc.cs_data;
1267
1268         if (cs_data) {
1269                 /* clear state block */
1270                 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1271
1272                 if (adev->gfx.rlc.clear_state_obj == NULL) {
1273                         r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1274                                              AMDGPU_GEM_DOMAIN_VRAM,
1275                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1276                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1277                                              NULL, NULL,
1278                                              &adev->gfx.rlc.clear_state_obj);
1279                         if (r) {
1280                                 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1281                                 gfx_v8_0_rlc_fini(adev);
1282                                 return r;
1283                         }
1284                 }
1285                 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1286                 if (unlikely(r != 0)) {
1287                         gfx_v8_0_rlc_fini(adev);
1288                         return r;
1289                 }
1290                 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1291                                   &adev->gfx.rlc.clear_state_gpu_addr);
1292                 if (r) {
1293                         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1294                         dev_warn(adev->dev, "(%d) pin RLC cbs bo failed\n", r);
1295                         gfx_v8_0_rlc_fini(adev);
1296                         return r;
1297                 }
1298
1299                 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1300                 if (r) {
1301                         dev_warn(adev->dev, "(%d) map RLC cbs bo failed\n", r);
1302                         gfx_v8_0_rlc_fini(adev);
1303                         return r;
1304                 }
1305                 /* set up the cs buffer */
1306                 dst_ptr = adev->gfx.rlc.cs_ptr;
1307                 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1308                 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1309                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1310         }
1311
1312         if ((adev->asic_type == CHIP_CARRIZO) ||
1313             (adev->asic_type == CHIP_STONEY)) {
1314                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1315                 if (adev->gfx.rlc.cp_table_obj == NULL) {
1316                         r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1317                                              AMDGPU_GEM_DOMAIN_VRAM,
1318                                              AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
1319                                              AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
1320                                              NULL, NULL,
1321                                              &adev->gfx.rlc.cp_table_obj);
1322                         if (r) {
1323                                 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1324                                 return r;
1325                         }
1326                 }
1327
1328                 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1329                 if (unlikely(r != 0)) {
1330                         dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1331                         return r;
1332                 }
1333                 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1334                                   &adev->gfx.rlc.cp_table_gpu_addr);
1335                 if (r) {
1336                         amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1337                         dev_warn(adev->dev, "(%d) pin RLC cp table bo failed\n", r);
1338                         return r;
1339                 }
1340                 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1341                 if (r) {
1342                         dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1343                         return r;
1344                 }
1345
1346                 cz_init_cp_jump_table(adev);
1347
1348                 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1349                 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1350         }
1351
1352         return 0;
1353 }
1354
1355 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1356 {
1357         int r;
1358
1359         if (adev->gfx.mec.hpd_eop_obj) {
1360                 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1361                 if (unlikely(r != 0))
1362                         dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1363                 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1364                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1365                 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1366                 adev->gfx.mec.hpd_eop_obj = NULL;
1367         }
1368 }
1369
1370 static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1371                                   struct amdgpu_ring *ring,
1372                                   struct amdgpu_irq_src *irq)
1373 {
1374         int r = 0;
1375
1376         ring->adev = NULL;
1377         ring->ring_obj = NULL;
1378         ring->use_doorbell = true;
1379         ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1380         if (adev->gfx.mec2_fw) {
1381                 ring->me = 2;
1382                 ring->pipe = 0;
1383         } else {
1384                 ring->me = 1;
1385                 ring->pipe = 1;
1386         }
1387
1388         irq->data = ring;
1389         ring->queue = 0;
1390         sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1391         r = amdgpu_ring_init(adev, ring, 1024,
1392                              irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1393         if (r)
1394                 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1395
1396         return r;
1397 }
1398
1399 static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1400                                    struct amdgpu_irq_src *irq)
1401 {
1402         amdgpu_ring_fini(ring);
1403         irq->data = NULL;
1404 }
1405
1406 #define MEC_HPD_SIZE 2048
1407
1408 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1409 {
1410         int r;
1411         u32 *hpd;
1412
1413         /*
1414          * we assign only 1 pipe because all other pipes will
1415          * be handled by KFD
1416          */
1417         adev->gfx.mec.num_mec = 1;
1418         adev->gfx.mec.num_pipe = 1;
1419         adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1420
1421         if (adev->gfx.mec.hpd_eop_obj == NULL) {
1422                 r = amdgpu_bo_create(adev,
1423                                      adev->gfx.mec.num_queue * MEC_HPD_SIZE,
1424                                      PAGE_SIZE, true,
1425                                      AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
1426                                      &adev->gfx.mec.hpd_eop_obj);
1427                 if (r) {
1428                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1429                         return r;
1430                 }
1431         }
1432
1433         r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1434         if (unlikely(r != 0)) {
1435                 gfx_v8_0_mec_fini(adev);
1436                 return r;
1437         }
1438         r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1439                           &adev->gfx.mec.hpd_eop_gpu_addr);
1440         if (r) {
1441                 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1442                 gfx_v8_0_mec_fini(adev);
1443                 return r;
1444         }
1445         r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1446         if (r) {
1447                 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1448                 gfx_v8_0_mec_fini(adev);
1449                 return r;
1450         }
1451
1452         memset(hpd, 0, adev->gfx.mec.num_queue * MEC_HPD_SIZE);
1453
1454         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1455         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1456
1457         return 0;
1458 }
1459
1460 static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1461 {
1462         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1463
1464         amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1465         kiq->eop_obj = NULL;
1466 }
1467
1468 static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1469 {
1470         int r;
1471         u32 *hpd;
1472         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474         r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1475                                     AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1476                                     &kiq->eop_gpu_addr, (void **)&hpd);
1477         if (r) {
1478                 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1479                 return r;
1480         }
1481
1482         memset(hpd, 0, MEC_HPD_SIZE);
1483
1484         amdgpu_bo_kunmap(kiq->eop_obj);
1485
1486         return 0;
1487 }
1488
1489 static const u32 vgpr_init_compute_shader[] =
1490 {
1491         0x7e000209, 0x7e020208,
1492         0x7e040207, 0x7e060206,
1493         0x7e080205, 0x7e0a0204,
1494         0x7e0c0203, 0x7e0e0202,
1495         0x7e100201, 0x7e120200,
1496         0x7e140209, 0x7e160208,
1497         0x7e180207, 0x7e1a0206,
1498         0x7e1c0205, 0x7e1e0204,
1499         0x7e200203, 0x7e220202,
1500         0x7e240201, 0x7e260200,
1501         0x7e280209, 0x7e2a0208,
1502         0x7e2c0207, 0x7e2e0206,
1503         0x7e300205, 0x7e320204,
1504         0x7e340203, 0x7e360202,
1505         0x7e380201, 0x7e3a0200,
1506         0x7e3c0209, 0x7e3e0208,
1507         0x7e400207, 0x7e420206,
1508         0x7e440205, 0x7e460204,
1509         0x7e480203, 0x7e4a0202,
1510         0x7e4c0201, 0x7e4e0200,
1511         0x7e500209, 0x7e520208,
1512         0x7e540207, 0x7e560206,
1513         0x7e580205, 0x7e5a0204,
1514         0x7e5c0203, 0x7e5e0202,
1515         0x7e600201, 0x7e620200,
1516         0x7e640209, 0x7e660208,
1517         0x7e680207, 0x7e6a0206,
1518         0x7e6c0205, 0x7e6e0204,
1519         0x7e700203, 0x7e720202,
1520         0x7e740201, 0x7e760200,
1521         0x7e780209, 0x7e7a0208,
1522         0x7e7c0207, 0x7e7e0206,
1523         0xbf8a0000, 0xbf810000,
1524 };
1525
1526 static const u32 sgpr_init_compute_shader[] =
1527 {
1528         0xbe8a0100, 0xbe8c0102,
1529         0xbe8e0104, 0xbe900106,
1530         0xbe920108, 0xbe940100,
1531         0xbe960102, 0xbe980104,
1532         0xbe9a0106, 0xbe9c0108,
1533         0xbe9e0100, 0xbea00102,
1534         0xbea20104, 0xbea40106,
1535         0xbea60108, 0xbea80100,
1536         0xbeaa0102, 0xbeac0104,
1537         0xbeae0106, 0xbeb00108,
1538         0xbeb20100, 0xbeb40102,
1539         0xbeb60104, 0xbeb80106,
1540         0xbeba0108, 0xbebc0100,
1541         0xbebe0102, 0xbec00104,
1542         0xbec20106, 0xbec40108,
1543         0xbec60100, 0xbec80102,
1544         0xbee60004, 0xbee70005,
1545         0xbeea0006, 0xbeeb0007,
1546         0xbee80008, 0xbee90009,
1547         0xbefc0000, 0xbf8a0000,
1548         0xbf810000, 0x00000000,
1549 };
1550
1551 static const u32 vgpr_init_regs[] =
1552 {
1553         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1554         mmCOMPUTE_RESOURCE_LIMITS, 0,
1555         mmCOMPUTE_NUM_THREAD_X, 256*4,
1556         mmCOMPUTE_NUM_THREAD_Y, 1,
1557         mmCOMPUTE_NUM_THREAD_Z, 1,
1558         mmCOMPUTE_PGM_RSRC2, 20,
1559         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1560         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1561         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1562         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1563         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1564         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1565         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1566         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1567         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1568         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1569 };
1570
1571 static const u32 sgpr1_init_regs[] =
1572 {
1573         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1574         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1575         mmCOMPUTE_NUM_THREAD_X, 256*5,
1576         mmCOMPUTE_NUM_THREAD_Y, 1,
1577         mmCOMPUTE_NUM_THREAD_Z, 1,
1578         mmCOMPUTE_PGM_RSRC2, 20,
1579         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1580         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1581         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1582         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1583         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1584         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1585         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1586         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1587         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1588         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1589 };
1590
1591 static const u32 sgpr2_init_regs[] =
1592 {
1593         mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1594         mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1595         mmCOMPUTE_NUM_THREAD_X, 256*5,
1596         mmCOMPUTE_NUM_THREAD_Y, 1,
1597         mmCOMPUTE_NUM_THREAD_Z, 1,
1598         mmCOMPUTE_PGM_RSRC2, 20,
1599         mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1600         mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1601         mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1602         mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1603         mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1604         mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1605         mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1606         mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1607         mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1608         mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1609 };
1610
1611 static const u32 sec_ded_counter_registers[] =
1612 {
1613         mmCPC_EDC_ATC_CNT,
1614         mmCPC_EDC_SCRATCH_CNT,
1615         mmCPC_EDC_UCODE_CNT,
1616         mmCPF_EDC_ATC_CNT,
1617         mmCPF_EDC_ROQ_CNT,
1618         mmCPF_EDC_TAG_CNT,
1619         mmCPG_EDC_ATC_CNT,
1620         mmCPG_EDC_DMA_CNT,
1621         mmCPG_EDC_TAG_CNT,
1622         mmDC_EDC_CSINVOC_CNT,
1623         mmDC_EDC_RESTORE_CNT,
1624         mmDC_EDC_STATE_CNT,
1625         mmGDS_EDC_CNT,
1626         mmGDS_EDC_GRBM_CNT,
1627         mmGDS_EDC_OA_DED,
1628         mmSPI_EDC_CNT,
1629         mmSQC_ATC_EDC_GATCL1_CNT,
1630         mmSQC_EDC_CNT,
1631         mmSQ_EDC_DED_CNT,
1632         mmSQ_EDC_INFO,
1633         mmSQ_EDC_SEC_CNT,
1634         mmTCC_EDC_CNT,
1635         mmTCP_ATC_EDC_GATCL1_CNT,
1636         mmTCP_EDC_CNT,
1637         mmTD_EDC_CNT
1638 };
1639
1640 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1641 {
1642         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1643         struct amdgpu_ib ib;
1644         struct dma_fence *f = NULL;
1645         int r, i;
1646         u32 tmp;
1647         unsigned total_size, vgpr_offset, sgpr_offset;
1648         u64 gpu_addr;
1649
1650         /* only supported on CZ */
1651         if (adev->asic_type != CHIP_CARRIZO)
1652                 return 0;
1653
1654         /* bail if the compute ring is not ready */
1655         if (!ring->ready)
1656                 return 0;
1657
1658         tmp = RREG32(mmGB_EDC_MODE);
1659         WREG32(mmGB_EDC_MODE, 0);
1660
1661         total_size =
1662                 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1663         total_size +=
1664                 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1665         total_size +=
1666                 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1667         total_size = ALIGN(total_size, 256);
1668         vgpr_offset = total_size;
1669         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1670         sgpr_offset = total_size;
1671         total_size += sizeof(sgpr_init_compute_shader);
1672
1673         /* allocate an indirect buffer to put the commands in */
1674         memset(&ib, 0, sizeof(ib));
1675         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1676         if (r) {
1677                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1678                 return r;
1679         }
1680
1681         /* load the compute shaders */
1682         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1683                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1684
1685         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1686                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1687
1688         /* init the ib length to 0 */
1689         ib.length_dw = 0;
1690
1691         /* VGPR */
1692         /* write the register state for the compute dispatch */
1693         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1694                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1695                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1696                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1697         }
1698         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1699         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1700         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1701         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1702         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1703         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1704
1705         /* write dispatch packet */
1706         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1707         ib.ptr[ib.length_dw++] = 8; /* x */
1708         ib.ptr[ib.length_dw++] = 1; /* y */
1709         ib.ptr[ib.length_dw++] = 1; /* z */
1710         ib.ptr[ib.length_dw++] =
1711                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1712
1713         /* write CS partial flush packet */
1714         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1715         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1716
1717         /* SGPR1 */
1718         /* write the register state for the compute dispatch */
1719         for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1720                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1721                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1722                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1723         }
1724         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1725         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1726         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1727         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1728         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1729         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1730
1731         /* write dispatch packet */
1732         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1733         ib.ptr[ib.length_dw++] = 8; /* x */
1734         ib.ptr[ib.length_dw++] = 1; /* y */
1735         ib.ptr[ib.length_dw++] = 1; /* z */
1736         ib.ptr[ib.length_dw++] =
1737                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1738
1739         /* write CS partial flush packet */
1740         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1741         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1742
1743         /* SGPR2 */
1744         /* write the register state for the compute dispatch */
1745         for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1746                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1747                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1748                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1749         }
1750         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1751         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1752         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1753         ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1754         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1755         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1756
1757         /* write dispatch packet */
1758         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1759         ib.ptr[ib.length_dw++] = 8; /* x */
1760         ib.ptr[ib.length_dw++] = 1; /* y */
1761         ib.ptr[ib.length_dw++] = 1; /* z */
1762         ib.ptr[ib.length_dw++] =
1763                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1764
1765         /* write CS partial flush packet */
1766         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1767         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1768
1769         /* shedule the ib on the ring */
1770         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
1771         if (r) {
1772                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1773                 goto fail;
1774         }
1775
1776         /* wait for the GPU to finish processing the IB */
1777         r = dma_fence_wait(f, false);
1778         if (r) {
1779                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1780                 goto fail;
1781         }
1782
1783         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1784         tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1785         WREG32(mmGB_EDC_MODE, tmp);
1786
1787         tmp = RREG32(mmCC_GC_EDC_CONFIG);
1788         tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1789         WREG32(mmCC_GC_EDC_CONFIG, tmp);
1790
1791
1792         /* read back registers to clear the counters */
1793         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1794                 RREG32(sec_ded_counter_registers[i]);
1795
1796 fail:
1797         amdgpu_ib_free(adev, &ib, NULL);
1798         dma_fence_put(f);
1799
1800         return r;
1801 }
1802
1803 static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1804 {
1805         u32 gb_addr_config;
1806         u32 mc_shared_chmap, mc_arb_ramcfg;
1807         u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1808         u32 tmp;
1809         int ret;
1810
1811         switch (adev->asic_type) {
1812         case CHIP_TOPAZ:
1813                 adev->gfx.config.max_shader_engines = 1;
1814                 adev->gfx.config.max_tile_pipes = 2;
1815                 adev->gfx.config.max_cu_per_sh = 6;
1816                 adev->gfx.config.max_sh_per_se = 1;
1817                 adev->gfx.config.max_backends_per_se = 2;
1818                 adev->gfx.config.max_texture_channel_caches = 2;
1819                 adev->gfx.config.max_gprs = 256;
1820                 adev->gfx.config.max_gs_threads = 32;
1821                 adev->gfx.config.max_hw_contexts = 8;
1822
1823                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1824                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1825                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1826                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1827                 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1828                 break;
1829         case CHIP_FIJI:
1830                 adev->gfx.config.max_shader_engines = 4;
1831                 adev->gfx.config.max_tile_pipes = 16;
1832                 adev->gfx.config.max_cu_per_sh = 16;
1833                 adev->gfx.config.max_sh_per_se = 1;
1834                 adev->gfx.config.max_backends_per_se = 4;
1835                 adev->gfx.config.max_texture_channel_caches = 16;
1836                 adev->gfx.config.max_gprs = 256;
1837                 adev->gfx.config.max_gs_threads = 32;
1838                 adev->gfx.config.max_hw_contexts = 8;
1839
1840                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1841                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1842                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1843                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1844                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1845                 break;
1846         case CHIP_POLARIS11:
1847         case CHIP_POLARIS12:
1848                 ret = amdgpu_atombios_get_gfx_info(adev);
1849                 if (ret)
1850                         return ret;
1851                 adev->gfx.config.max_gprs = 256;
1852                 adev->gfx.config.max_gs_threads = 32;
1853                 adev->gfx.config.max_hw_contexts = 8;
1854
1855                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1856                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1857                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1858                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1859                 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
1860                 break;
1861         case CHIP_POLARIS10:
1862                 ret = amdgpu_atombios_get_gfx_info(adev);
1863                 if (ret)
1864                         return ret;
1865                 adev->gfx.config.max_gprs = 256;
1866                 adev->gfx.config.max_gs_threads = 32;
1867                 adev->gfx.config.max_hw_contexts = 8;
1868
1869                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1870                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1871                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1872                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1873                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1874                 break;
1875         case CHIP_TONGA:
1876                 adev->gfx.config.max_shader_engines = 4;
1877                 adev->gfx.config.max_tile_pipes = 8;
1878                 adev->gfx.config.max_cu_per_sh = 8;
1879                 adev->gfx.config.max_sh_per_se = 1;
1880                 adev->gfx.config.max_backends_per_se = 2;
1881                 adev->gfx.config.max_texture_channel_caches = 8;
1882                 adev->gfx.config.max_gprs = 256;
1883                 adev->gfx.config.max_gs_threads = 32;
1884                 adev->gfx.config.max_hw_contexts = 8;
1885
1886                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1887                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1888                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1889                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1890                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1891                 break;
1892         case CHIP_CARRIZO:
1893                 adev->gfx.config.max_shader_engines = 1;
1894                 adev->gfx.config.max_tile_pipes = 2;
1895                 adev->gfx.config.max_sh_per_se = 1;
1896                 adev->gfx.config.max_backends_per_se = 2;
1897
1898                 switch (adev->pdev->revision) {
1899                 case 0xc4:
1900                 case 0x84:
1901                 case 0xc8:
1902                 case 0xcc:
1903                 case 0xe1:
1904                 case 0xe3:
1905                         /* B10 */
1906                         adev->gfx.config.max_cu_per_sh = 8;
1907                         break;
1908                 case 0xc5:
1909                 case 0x81:
1910                 case 0x85:
1911                 case 0xc9:
1912                 case 0xcd:
1913                 case 0xe2:
1914                 case 0xe4:
1915                         /* B8 */
1916                         adev->gfx.config.max_cu_per_sh = 6;
1917                         break;
1918                 case 0xc6:
1919                 case 0xca:
1920                 case 0xce:
1921                 case 0x88:
1922                         /* B6 */
1923                         adev->gfx.config.max_cu_per_sh = 6;
1924                         break;
1925                 case 0xc7:
1926                 case 0x87:
1927                 case 0xcb:
1928                 case 0xe5:
1929                 case 0x89:
1930                 default:
1931                         /* B4 */
1932                         adev->gfx.config.max_cu_per_sh = 4;
1933                         break;
1934                 }
1935
1936                 adev->gfx.config.max_texture_channel_caches = 2;
1937                 adev->gfx.config.max_gprs = 256;
1938                 adev->gfx.config.max_gs_threads = 32;
1939                 adev->gfx.config.max_hw_contexts = 8;
1940
1941                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1942                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1943                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1944                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1945                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1946                 break;
1947         case CHIP_STONEY:
1948                 adev->gfx.config.max_shader_engines = 1;
1949                 adev->gfx.config.max_tile_pipes = 2;
1950                 adev->gfx.config.max_sh_per_se = 1;
1951                 adev->gfx.config.max_backends_per_se = 1;
1952
1953                 switch (adev->pdev->revision) {
1954                 case 0xc0:
1955                 case 0xc1:
1956                 case 0xc2:
1957                 case 0xc4:
1958                 case 0xc8:
1959                 case 0xc9:
1960                         adev->gfx.config.max_cu_per_sh = 3;
1961                         break;
1962                 case 0xd0:
1963                 case 0xd1:
1964                 case 0xd2:
1965                 default:
1966                         adev->gfx.config.max_cu_per_sh = 2;
1967                         break;
1968                 }
1969
1970                 adev->gfx.config.max_texture_channel_caches = 2;
1971                 adev->gfx.config.max_gprs = 256;
1972                 adev->gfx.config.max_gs_threads = 16;
1973                 adev->gfx.config.max_hw_contexts = 8;
1974
1975                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1976                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1977                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1978                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1979                 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1980                 break;
1981         default:
1982                 adev->gfx.config.max_shader_engines = 2;
1983                 adev->gfx.config.max_tile_pipes = 4;
1984                 adev->gfx.config.max_cu_per_sh = 2;
1985                 adev->gfx.config.max_sh_per_se = 1;
1986                 adev->gfx.config.max_backends_per_se = 2;
1987                 adev->gfx.config.max_texture_channel_caches = 4;
1988                 adev->gfx.config.max_gprs = 256;
1989                 adev->gfx.config.max_gs_threads = 32;
1990                 adev->gfx.config.max_hw_contexts = 8;
1991
1992                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1993                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1994                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1995                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1996                 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1997                 break;
1998         }
1999
2000         mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
2001         adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
2002         mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
2003
2004         adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
2005         adev->gfx.config.mem_max_burst_length_bytes = 256;
2006         if (adev->flags & AMD_IS_APU) {
2007                 /* Get memory bank mapping mode. */
2008                 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
2009                 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2010                 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2011
2012                 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
2013                 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
2014                 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
2015
2016                 /* Validate settings in case only one DIMM installed. */
2017                 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
2018                         dimm00_addr_map = 0;
2019                 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
2020                         dimm01_addr_map = 0;
2021                 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
2022                         dimm10_addr_map = 0;
2023                 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
2024                         dimm11_addr_map = 0;
2025
2026                 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
2027                 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
2028                 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
2029                         adev->gfx.config.mem_row_size_in_kb = 2;
2030                 else
2031                         adev->gfx.config.mem_row_size_in_kb = 1;
2032         } else {
2033                 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2034                 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2035                 if (adev->gfx.config.mem_row_size_in_kb > 4)
2036                         adev->gfx.config.mem_row_size_in_kb = 4;
2037         }
2038
2039         adev->gfx.config.shader_engine_tile_size = 32;
2040         adev->gfx.config.num_gpus = 1;
2041         adev->gfx.config.multi_gpu_tile_size = 64;
2042
2043         /* fix up row size */
2044         switch (adev->gfx.config.mem_row_size_in_kb) {
2045         case 1:
2046         default:
2047                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2048                 break;
2049         case 2:
2050                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2051                 break;
2052         case 4:
2053                 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2054                 break;
2055         }
2056         adev->gfx.config.gb_addr_config = gb_addr_config;
2057
2058         return 0;
2059 }
2060
2061 static int gfx_v8_0_sw_init(void *handle)
2062 {
2063         int i, r;
2064         struct amdgpu_ring *ring;
2065         struct amdgpu_kiq *kiq;
2066         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2067
2068         /* KIQ event */
2069         r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2070         if (r)
2071                 return r;
2072
2073         /* EOP Event */
2074         r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2075         if (r)
2076                 return r;
2077
2078         /* Privileged reg */
2079         r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
2080         if (r)
2081                 return r;
2082
2083         /* Privileged inst */
2084         r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2085         if (r)
2086                 return r;
2087
2088         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2089
2090         gfx_v8_0_scratch_init(adev);
2091
2092         r = gfx_v8_0_init_microcode(adev);
2093         if (r) {
2094                 DRM_ERROR("Failed to load gfx firmware!\n");
2095                 return r;
2096         }
2097
2098         r = gfx_v8_0_rlc_init(adev);
2099         if (r) {
2100                 DRM_ERROR("Failed to init rlc BOs!\n");
2101                 return r;
2102         }
2103
2104         r = gfx_v8_0_mec_init(adev);
2105         if (r) {
2106                 DRM_ERROR("Failed to init MEC BOs!\n");
2107                 return r;
2108         }
2109
2110         r = gfx_v8_0_kiq_init(adev);
2111         if (r) {
2112                 DRM_ERROR("Failed to init KIQ BOs!\n");
2113                 return r;
2114         }
2115
2116         kiq = &adev->gfx.kiq;
2117         r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118         if (r)
2119                 return r;
2120
2121         /* set up the gfx ring */
2122         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2123                 ring = &adev->gfx.gfx_ring[i];
2124                 ring->ring_obj = NULL;
2125                 sprintf(ring->name, "gfx");
2126                 /* no gfx doorbells on iceland */
2127                 if (adev->asic_type != CHIP_TOPAZ) {
2128                         ring->use_doorbell = true;
2129                         ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2130                 }
2131
2132                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2133                                      AMDGPU_CP_IRQ_GFX_EOP);
2134                 if (r)
2135                         return r;
2136         }
2137
2138         /* set up the compute queues */
2139         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2140                 unsigned irq_type;
2141
2142                 /* max 32 queues per MEC */
2143                 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2144                         DRM_ERROR("Too many (%d) compute rings!\n", i);
2145                         break;
2146                 }
2147                 ring = &adev->gfx.compute_ring[i];
2148                 ring->ring_obj = NULL;
2149                 ring->use_doorbell = true;
2150                 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2151                 ring->me = 1; /* first MEC */
2152                 ring->pipe = i / 8;
2153                 ring->queue = i % 8;
2154                 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2155                 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2156                 /* type-2 packets are deprecated on MEC, use type-3 instead */
2157                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2158                                      irq_type);
2159                 if (r)
2160                         return r;
2161         }
2162
2163         /* reserve GDS, GWS and OA resource for gfx */
2164         r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
2165                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
2166                                     &adev->gds.gds_gfx_bo, NULL, NULL);
2167         if (r)
2168                 return r;
2169
2170         r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
2171                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
2172                                     &adev->gds.gws_gfx_bo, NULL, NULL);
2173         if (r)
2174                 return r;
2175
2176         r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
2177                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
2178                                     &adev->gds.oa_gfx_bo, NULL, NULL);
2179         if (r)
2180                 return r;
2181
2182         adev->gfx.ce_ram_size = 0x8000;
2183
2184         r = gfx_v8_0_gpu_early_init(adev);
2185         if (r)
2186                 return r;
2187
2188         return 0;
2189 }
2190
2191 static int gfx_v8_0_sw_fini(void *handle)
2192 {
2193         int i;
2194         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2195
2196         amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
2197         amdgpu_bo_free_kernel(&adev->gds.gws_gfx_bo, NULL, NULL);
2198         amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
2199
2200         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2201                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2202         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2203                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2204         gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2205
2206         gfx_v8_0_kiq_fini(adev);
2207         gfx_v8_0_mec_fini(adev);
2208         gfx_v8_0_rlc_fini(adev);
2209         gfx_v8_0_free_microcode(adev);
2210
2211         return 0;
2212 }
2213
2214 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2215 {
2216         uint32_t *modearray, *mod2array;
2217         const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2218         const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
2219         u32 reg_offset;
2220
2221         modearray = adev->gfx.config.tile_mode_array;
2222         mod2array = adev->gfx.config.macrotile_mode_array;
2223
2224         for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2225                 modearray[reg_offset] = 0;
2226
2227         for (reg_offset = 0; reg_offset <  num_secondary_tile_mode_states; reg_offset++)
2228                 mod2array[reg_offset] = 0;
2229
2230         switch (adev->asic_type) {
2231         case CHIP_TOPAZ:
2232                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233                                 PIPE_CONFIG(ADDR_SURF_P2) |
2234                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2235                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237                                 PIPE_CONFIG(ADDR_SURF_P2) |
2238                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2239                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241                                 PIPE_CONFIG(ADDR_SURF_P2) |
2242                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2243                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2245                                 PIPE_CONFIG(ADDR_SURF_P2) |
2246                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2247                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2249                                 PIPE_CONFIG(ADDR_SURF_P2) |
2250                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2253                                 PIPE_CONFIG(ADDR_SURF_P2) |
2254                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2255                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2256                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2257                                 PIPE_CONFIG(ADDR_SURF_P2) |
2258                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2259                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2260                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2261                                 PIPE_CONFIG(ADDR_SURF_P2));
2262                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2263                                 PIPE_CONFIG(ADDR_SURF_P2) |
2264                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2266                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2267                                  PIPE_CONFIG(ADDR_SURF_P2) |
2268                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2269                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2271                                  PIPE_CONFIG(ADDR_SURF_P2) |
2272                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2273                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2274                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2275                                  PIPE_CONFIG(ADDR_SURF_P2) |
2276                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279                                  PIPE_CONFIG(ADDR_SURF_P2) |
2280                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2283                                  PIPE_CONFIG(ADDR_SURF_P2) |
2284                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2286                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2287                                  PIPE_CONFIG(ADDR_SURF_P2) |
2288                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2289                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2290                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2291                                  PIPE_CONFIG(ADDR_SURF_P2) |
2292                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2293                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2295                                  PIPE_CONFIG(ADDR_SURF_P2) |
2296                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2299                                  PIPE_CONFIG(ADDR_SURF_P2) |
2300                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2303                                  PIPE_CONFIG(ADDR_SURF_P2) |
2304                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2305                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2307                                  PIPE_CONFIG(ADDR_SURF_P2) |
2308                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2311                                  PIPE_CONFIG(ADDR_SURF_P2) |
2312                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2313                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2315                                  PIPE_CONFIG(ADDR_SURF_P2) |
2316                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2317                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2318                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2319                                  PIPE_CONFIG(ADDR_SURF_P2) |
2320                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2321                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2322                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2323                                  PIPE_CONFIG(ADDR_SURF_P2) |
2324                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327                                  PIPE_CONFIG(ADDR_SURF_P2) |
2328                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2329                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2330                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2331                                  PIPE_CONFIG(ADDR_SURF_P2) |
2332                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2333                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2334
2335                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2336                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2337                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338                                 NUM_BANKS(ADDR_SURF_8_BANK));
2339                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2340                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2341                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342                                 NUM_BANKS(ADDR_SURF_8_BANK));
2343                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2344                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346                                 NUM_BANKS(ADDR_SURF_8_BANK));
2347                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2349                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2350                                 NUM_BANKS(ADDR_SURF_8_BANK));
2351                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2353                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354                                 NUM_BANKS(ADDR_SURF_8_BANK));
2355                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358                                 NUM_BANKS(ADDR_SURF_8_BANK));
2359                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2360                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2361                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2362                                 NUM_BANKS(ADDR_SURF_8_BANK));
2363                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2364                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2365                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366                                 NUM_BANKS(ADDR_SURF_16_BANK));
2367                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2368                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2369                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370                                 NUM_BANKS(ADDR_SURF_16_BANK));
2371                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2372                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2373                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374                                  NUM_BANKS(ADDR_SURF_16_BANK));
2375                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2376                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2377                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378                                  NUM_BANKS(ADDR_SURF_16_BANK));
2379                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2381                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2382                                  NUM_BANKS(ADDR_SURF_16_BANK));
2383                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2386                                  NUM_BANKS(ADDR_SURF_16_BANK));
2387                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2388                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2389                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2390                                  NUM_BANKS(ADDR_SURF_8_BANK));
2391
2392                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2393                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2394                             reg_offset != 23)
2395                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2396
2397                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2398                         if (reg_offset != 7)
2399                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2400
2401                 break;
2402         case CHIP_FIJI:
2403                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2404                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2405                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2406                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2408                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2409                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2410                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2411                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2412                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2413                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2414                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2415                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2416                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2417                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2418                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2419                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2420                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2421                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2422                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2423                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2424                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2425                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2426                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2427                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2429                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2430                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2431                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2432                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2433                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2434                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2435                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2436                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2437                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2438                                 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2439                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2440                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2441                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2442                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2443                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2445                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2446                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2447                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2448                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2449                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2450                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2451                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2452                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2453                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2454                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2455                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2456                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2458                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2459                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2460                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2461                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2462                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2463                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2464                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2465                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2466                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2467                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2468                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2469                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2470                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2471                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2472                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2473                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2474                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2475                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2476                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2477                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2478                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2480                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2481                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2482                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2483                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2484                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2485                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2486                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2487                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2488                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2489                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2490                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2492                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2493                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2494                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2496                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2497                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2498                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2499                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2500                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2501                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2502                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2503                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2504                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2505                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2506                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2507                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2508                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2509                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2510                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2511                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2512                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2513                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2514                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2515                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2516                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2517                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2518                                  PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2519                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2520                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2521                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2522                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2523                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2524                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2525
2526                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2528                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529                                 NUM_BANKS(ADDR_SURF_8_BANK));
2530                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2532                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2533                                 NUM_BANKS(ADDR_SURF_8_BANK));
2534                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2535                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2536                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2537                                 NUM_BANKS(ADDR_SURF_8_BANK));
2538                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2539                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2540                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2541                                 NUM_BANKS(ADDR_SURF_8_BANK));
2542                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2545                                 NUM_BANKS(ADDR_SURF_8_BANK));
2546                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2548                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2549                                 NUM_BANKS(ADDR_SURF_8_BANK));
2550                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2551                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2552                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2553                                 NUM_BANKS(ADDR_SURF_8_BANK));
2554                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2555                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2556                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2557                                 NUM_BANKS(ADDR_SURF_8_BANK));
2558                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2559                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2560                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2561                                 NUM_BANKS(ADDR_SURF_8_BANK));
2562                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2563                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2564                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2565                                  NUM_BANKS(ADDR_SURF_8_BANK));
2566                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2568                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2569                                  NUM_BANKS(ADDR_SURF_8_BANK));
2570                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2571                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2572                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2573                                  NUM_BANKS(ADDR_SURF_8_BANK));
2574                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2575                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2576                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2577                                  NUM_BANKS(ADDR_SURF_8_BANK));
2578                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2580                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2581                                  NUM_BANKS(ADDR_SURF_4_BANK));
2582
2583                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2584                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2585
2586                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2587                         if (reg_offset != 7)
2588                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2589
2590                 break;
2591         case CHIP_TONGA:
2592                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2593                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2594                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2595                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2597                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2598                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2599                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2600                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2602                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2603                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2604                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2607                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2608                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2609                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2610                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2611                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2612                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2613                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2614                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2615                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2616                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2617                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2619                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2620                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2621                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2622                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2623                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2624                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2625                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2626                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2627                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2628                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2629                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2631                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2632                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2633                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2635                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2636                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2637                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2638                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2639                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2640                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2641                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2642                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2643                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2644                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2645                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2646                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2647                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2648                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2649                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2651                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2652                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2653                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2654                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2655                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2657                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2658                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2659                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2660                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2661                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2662                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2663                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2664                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2665                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2666                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2667                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2669                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2670                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2671                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2672                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2673                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2674                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2675                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2676                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2677                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2678                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2679                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2680                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2681                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2682                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2683                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2684                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2685                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2686                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2687                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2688                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2689                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2690                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2691                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2692                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2693                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2694                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2695                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2697                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2698                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2699                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2700                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2702                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2703                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2704                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2705                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2706                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2707                                  PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2708                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2709                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2710                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2711                                  PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2712                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2714
2715                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2717                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2718                                 NUM_BANKS(ADDR_SURF_16_BANK));
2719                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2720                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2721                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2722                                 NUM_BANKS(ADDR_SURF_16_BANK));
2723                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2724                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2725                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2726                                 NUM_BANKS(ADDR_SURF_16_BANK));
2727                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2730                                 NUM_BANKS(ADDR_SURF_16_BANK));
2731                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2732                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2733                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2734                                 NUM_BANKS(ADDR_SURF_16_BANK));
2735                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2736                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2737                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2738                                 NUM_BANKS(ADDR_SURF_16_BANK));
2739                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2741                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2742                                 NUM_BANKS(ADDR_SURF_16_BANK));
2743                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2745                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2746                                 NUM_BANKS(ADDR_SURF_16_BANK));
2747                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2748                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2749                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2750                                 NUM_BANKS(ADDR_SURF_16_BANK));
2751                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2752                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2753                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2754                                  NUM_BANKS(ADDR_SURF_16_BANK));
2755                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2756                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2757                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2758                                  NUM_BANKS(ADDR_SURF_16_BANK));
2759                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2760                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2761                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2762                                  NUM_BANKS(ADDR_SURF_8_BANK));
2763                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2766                                  NUM_BANKS(ADDR_SURF_4_BANK));
2767                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2768                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2769                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2770                                  NUM_BANKS(ADDR_SURF_4_BANK));
2771
2772                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2773                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2774
2775                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2776                         if (reg_offset != 7)
2777                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2778
2779                 break;
2780         case CHIP_POLARIS11:
2781         case CHIP_POLARIS12:
2782                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2783                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2785                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2786                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2787                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2789                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2790                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2793                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2794                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2797                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2798                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2799                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2801                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2802                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2805                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2806                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2807                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2809                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2810                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2813                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2814                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2815                                 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2816                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2817                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2818                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2819                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2820                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2821                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2822                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2823                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2824                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2825                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2826                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2827                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2828                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2829                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2831                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2832                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2833                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2834                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2835                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2836                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2837                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2838                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2839                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2840                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2841                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2843                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2844                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2845                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2846                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2847                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2848                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2849                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2850                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2851                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2852                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2853                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2854                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2855                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2856                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2857                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2858                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2859                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2860                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2861                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2862                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2863                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2864                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2865                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2866                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2867                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2868                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2869                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2870                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2871                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2872                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2873                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2875                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2876                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2877                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2878                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2879                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2880                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2881                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2882                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2883                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2884                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2885                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2886                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2887                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2888                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2889                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2890                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2891                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2892                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2893                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2894                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2896                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2897                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2898                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2899                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2900                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2901                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2902                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2903                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2904
2905                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2907                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2908                                 NUM_BANKS(ADDR_SURF_16_BANK));
2909
2910                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2912                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2913                                 NUM_BANKS(ADDR_SURF_16_BANK));
2914
2915                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2916                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2917                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2918                                 NUM_BANKS(ADDR_SURF_16_BANK));
2919
2920                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923                                 NUM_BANKS(ADDR_SURF_16_BANK));
2924
2925                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2926                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2927                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2928                                 NUM_BANKS(ADDR_SURF_16_BANK));
2929
2930                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2931                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2932                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2933                                 NUM_BANKS(ADDR_SURF_16_BANK));
2934
2935                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2936                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2937                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2938                                 NUM_BANKS(ADDR_SURF_16_BANK));
2939
2940                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2941                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2942                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2943                                 NUM_BANKS(ADDR_SURF_16_BANK));
2944
2945                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2946                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2947                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2948                                 NUM_BANKS(ADDR_SURF_16_BANK));
2949
2950                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2951                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2952                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953                                 NUM_BANKS(ADDR_SURF_16_BANK));
2954
2955                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2956                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2957                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2958                                 NUM_BANKS(ADDR_SURF_16_BANK));
2959
2960                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2961                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2962                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2963                                 NUM_BANKS(ADDR_SURF_16_BANK));
2964
2965                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2966                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2967                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2968                                 NUM_BANKS(ADDR_SURF_8_BANK));
2969
2970                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2971                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2972                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2973                                 NUM_BANKS(ADDR_SURF_4_BANK));
2974
2975                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2976                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2977
2978                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2979                         if (reg_offset != 7)
2980                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2981
2982                 break;
2983         case CHIP_POLARIS10:
2984                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2985                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2987                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2988                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2989                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2990                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2991                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2992                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2993                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2995                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2996                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2997                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2999                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3000                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3001                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3003                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3004                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3007                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3008                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3009                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3011                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3012                 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3014                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3015                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3016                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3017                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
3018                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3019                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3020                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3022                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3023                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3024                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3025                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3026                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3027                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3028                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3029                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3030                 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3031                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3032                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3033                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3034                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3035                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3036                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3037                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3038                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3039                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3040                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3041                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3042                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3043                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3044                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3045                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3047                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3048                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3049                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3050                 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3051                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3052                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3053                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3054                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3055                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3056                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3057                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3058                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3059                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3060                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3061                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3062                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3063                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3064                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3065                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3066                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3067                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3068                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3069                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3070                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3071                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3072                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3073                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3074                 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3075                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3076                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3077                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3078                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3079                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3080                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3081                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3082                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3083                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3084                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3085                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3086                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3087                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3088                                 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3089                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3090                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3091                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3092                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3093                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3094                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3095                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3096                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3097                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3098                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3099                                 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3100                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3101                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3102                 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3103                                 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3104                                 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3105                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3106
3107                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3108                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3109                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3110                                 NUM_BANKS(ADDR_SURF_16_BANK));
3111
3112                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3113                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3114                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3115                                 NUM_BANKS(ADDR_SURF_16_BANK));
3116
3117                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3119                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3120                                 NUM_BANKS(ADDR_SURF_16_BANK));
3121
3122                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3123                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3124                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3125                                 NUM_BANKS(ADDR_SURF_16_BANK));
3126
3127                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3128                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3129                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3130                                 NUM_BANKS(ADDR_SURF_16_BANK));
3131
3132                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3133                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3134                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3135                                 NUM_BANKS(ADDR_SURF_16_BANK));
3136
3137                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3138                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3139                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3140                                 NUM_BANKS(ADDR_SURF_16_BANK));
3141
3142                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3143                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3144                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3145                                 NUM_BANKS(ADDR_SURF_16_BANK));
3146
3147                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3149                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3150                                 NUM_BANKS(ADDR_SURF_16_BANK));
3151
3152                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3153                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3154                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3155                                 NUM_BANKS(ADDR_SURF_16_BANK));
3156
3157                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3158                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3159                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3160                                 NUM_BANKS(ADDR_SURF_16_BANK));
3161
3162                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3163                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3164                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3165                                 NUM_BANKS(ADDR_SURF_8_BANK));
3166
3167                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3168                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3169                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3170                                 NUM_BANKS(ADDR_SURF_4_BANK));
3171
3172                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3173                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3174                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3175                                 NUM_BANKS(ADDR_SURF_4_BANK));
3176
3177                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3178                         WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3179
3180                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3181                         if (reg_offset != 7)
3182                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3183
3184                 break;
3185         case CHIP_STONEY:
3186                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3187                                 PIPE_CONFIG(ADDR_SURF_P2) |
3188                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3189                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3190                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3191                                 PIPE_CONFIG(ADDR_SURF_P2) |
3192                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3193                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3194                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195                                 PIPE_CONFIG(ADDR_SURF_P2) |
3196                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3197                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3198                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3199                                 PIPE_CONFIG(ADDR_SURF_P2) |
3200                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3201                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3202                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3203                                 PIPE_CONFIG(ADDR_SURF_P2) |
3204                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3205                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3206                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3207                                 PIPE_CONFIG(ADDR_SURF_P2) |
3208                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3209                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3210                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3211                                 PIPE_CONFIG(ADDR_SURF_P2) |
3212                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3213                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3214                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3215                                 PIPE_CONFIG(ADDR_SURF_P2));
3216                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3217                                 PIPE_CONFIG(ADDR_SURF_P2) |
3218                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3219                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3220                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3221                                  PIPE_CONFIG(ADDR_SURF_P2) |
3222                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3223                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3224                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3225                                  PIPE_CONFIG(ADDR_SURF_P2) |
3226                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3227                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3228                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3229                                  PIPE_CONFIG(ADDR_SURF_P2) |
3230                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3231                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3232                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3233                                  PIPE_CONFIG(ADDR_SURF_P2) |
3234                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3235                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3236                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3237                                  PIPE_CONFIG(ADDR_SURF_P2) |
3238                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3239                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3240                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3241                                  PIPE_CONFIG(ADDR_SURF_P2) |
3242                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3243                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3244                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3245                                  PIPE_CONFIG(ADDR_SURF_P2) |
3246                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3247                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3248                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3249                                  PIPE_CONFIG(ADDR_SURF_P2) |
3250                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3251                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3252                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3253                                  PIPE_CONFIG(ADDR_SURF_P2) |
3254                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3255                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3256                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3257                                  PIPE_CONFIG(ADDR_SURF_P2) |
3258                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3259                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3260                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3261                                  PIPE_CONFIG(ADDR_SURF_P2) |
3262                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3263                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3264                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3265                                  PIPE_CONFIG(ADDR_SURF_P2) |
3266                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3267                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3268                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3269                                  PIPE_CONFIG(ADDR_SURF_P2) |
3270                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3271                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3272                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3273                                  PIPE_CONFIG(ADDR_SURF_P2) |
3274                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3275                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3276                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3277                                  PIPE_CONFIG(ADDR_SURF_P2) |
3278                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3280                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281                                  PIPE_CONFIG(ADDR_SURF_P2) |
3282                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3283                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3284                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3285                                  PIPE_CONFIG(ADDR_SURF_P2) |
3286                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3287                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3288
3289                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3290                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3291                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3292                                 NUM_BANKS(ADDR_SURF_8_BANK));
3293                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3294                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3295                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3296                                 NUM_BANKS(ADDR_SURF_8_BANK));
3297                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3298                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3299                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3300                                 NUM_BANKS(ADDR_SURF_8_BANK));
3301                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3302                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3303                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3304                                 NUM_BANKS(ADDR_SURF_8_BANK));
3305                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3306                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3307                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3308                                 NUM_BANKS(ADDR_SURF_8_BANK));
3309                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3310                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3311                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3312                                 NUM_BANKS(ADDR_SURF_8_BANK));
3313                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3314                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3315                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3316                                 NUM_BANKS(ADDR_SURF_8_BANK));
3317                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3318                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3319                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3320                                 NUM_BANKS(ADDR_SURF_16_BANK));
3321                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3322                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3323                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3324                                 NUM_BANKS(ADDR_SURF_16_BANK));
3325                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3326                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3327                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3328                                  NUM_BANKS(ADDR_SURF_16_BANK));
3329                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3330                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3331                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3332                                  NUM_BANKS(ADDR_SURF_16_BANK));
3333                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3334                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3335                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3336                                  NUM_BANKS(ADDR_SURF_16_BANK));
3337                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3338                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3339                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3340                                  NUM_BANKS(ADDR_SURF_16_BANK));
3341                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3342                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3343                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3344                                  NUM_BANKS(ADDR_SURF_8_BANK));
3345
3346                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3347                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3348                             reg_offset != 23)
3349                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3350
3351                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3352                         if (reg_offset != 7)
3353                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3354
3355                 break;
3356         default:
3357                 dev_warn(adev->dev,
3358                          "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3359                          adev->asic_type);
3360
3361         case CHIP_CARRIZO:
3362                 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3363                                 PIPE_CONFIG(ADDR_SURF_P2) |
3364                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3365                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3366                 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3367                                 PIPE_CONFIG(ADDR_SURF_P2) |
3368                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3369                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3370                 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371                                 PIPE_CONFIG(ADDR_SURF_P2) |
3372                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3373                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3374                 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3375                                 PIPE_CONFIG(ADDR_SURF_P2) |
3376                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3377                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3378                 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3379                                 PIPE_CONFIG(ADDR_SURF_P2) |
3380                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3381                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3382                 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3383                                 PIPE_CONFIG(ADDR_SURF_P2) |
3384                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3385                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3386                 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3387                                 PIPE_CONFIG(ADDR_SURF_P2) |
3388                                 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3389                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3390                 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3391                                 PIPE_CONFIG(ADDR_SURF_P2));
3392                 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3393                                 PIPE_CONFIG(ADDR_SURF_P2) |
3394                                 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3395                                 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3396                 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3397                                  PIPE_CONFIG(ADDR_SURF_P2) |
3398                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3399                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3400                 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3401                                  PIPE_CONFIG(ADDR_SURF_P2) |
3402                                  MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3403                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3404                 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3405                                  PIPE_CONFIG(ADDR_SURF_P2) |
3406                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3407                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3408                 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3409                                  PIPE_CONFIG(ADDR_SURF_P2) |
3410                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3411                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3412                 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3413                                  PIPE_CONFIG(ADDR_SURF_P2) |
3414                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3415                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3416                 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3417                                  PIPE_CONFIG(ADDR_SURF_P2) |
3418                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3419                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3420                 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3421                                  PIPE_CONFIG(ADDR_SURF_P2) |
3422                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3423                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3424                 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3425                                  PIPE_CONFIG(ADDR_SURF_P2) |
3426                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3427                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3428                 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3429                                  PIPE_CONFIG(ADDR_SURF_P2) |
3430                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3431                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3432                 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3433                                  PIPE_CONFIG(ADDR_SURF_P2) |
3434                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3435                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3436                 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3437                                  PIPE_CONFIG(ADDR_SURF_P2) |
3438                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3439                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3440                 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3441                                  PIPE_CONFIG(ADDR_SURF_P2) |
3442                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3443                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3444                 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3445                                  PIPE_CONFIG(ADDR_SURF_P2) |
3446                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3447                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3448                 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3449                                  PIPE_CONFIG(ADDR_SURF_P2) |
3450                                  MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3451                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3452                 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3453                                  PIPE_CONFIG(ADDR_SURF_P2) |
3454                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3455                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3456                 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3457                                  PIPE_CONFIG(ADDR_SURF_P2) |
3458                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3459                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3460                 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3461                                  PIPE_CONFIG(ADDR_SURF_P2) |
3462                                  MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3463                                  SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3464
3465                 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3466                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3467                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3468                                 NUM_BANKS(ADDR_SURF_8_BANK));
3469                 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3470                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3471                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3472                                 NUM_BANKS(ADDR_SURF_8_BANK));
3473                 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3474                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3475                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3476                                 NUM_BANKS(ADDR_SURF_8_BANK));
3477                 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3478                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3479                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3480                                 NUM_BANKS(ADDR_SURF_8_BANK));
3481                 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3482                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3483                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3484                                 NUM_BANKS(ADDR_SURF_8_BANK));
3485                 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3486                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3487                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3488                                 NUM_BANKS(ADDR_SURF_8_BANK));
3489                 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3490                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3491                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3492                                 NUM_BANKS(ADDR_SURF_8_BANK));
3493                 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3494                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3495                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3496                                 NUM_BANKS(ADDR_SURF_16_BANK));
3497                 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3498                                 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3499                                 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3500                                 NUM_BANKS(ADDR_SURF_16_BANK));
3501                 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3502                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3503                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3504                                  NUM_BANKS(ADDR_SURF_16_BANK));
3505                 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3506                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3507                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3508                                  NUM_BANKS(ADDR_SURF_16_BANK));
3509                 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3510                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3511                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3512                                  NUM_BANKS(ADDR_SURF_16_BANK));
3513                 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3514                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3515                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3516                                  NUM_BANKS(ADDR_SURF_16_BANK));
3517                 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3518                                  BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3519                                  MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3520                                  NUM_BANKS(ADDR_SURF_8_BANK));
3521
3522                 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3523                         if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3524                             reg_offset != 23)
3525                                 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3526
3527                 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3528                         if (reg_offset != 7)
3529                                 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3530
3531                 break;
3532         }
3533 }
3534
3535 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
3536                                   u32 se_num, u32 sh_num, u32 instance)
3537 {
3538         u32 data;
3539
3540         if (instance == 0xffffffff)
3541                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3542         else
3543                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
3544
3545         if (se_num == 0xffffffff)
3546                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3547         else
3548                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3549
3550         if (sh_num == 0xffffffff)
3551                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3552         else
3553                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3554
3555         WREG32(mmGRBM_GFX_INDEX, data);
3556 }
3557
3558 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3559 {
3560         return (u32)((1ULL << bit_width) - 1);
3561 }
3562
3563 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
3564 {
3565         u32 data, mask;
3566
3567         data =  RREG32(mmCC_RB_BACKEND_DISABLE) |
3568                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3569
3570         data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
3571
3572         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3573                                        adev->gfx.config.max_sh_per_se);
3574
3575         return (~data) & mask;
3576 }
3577
3578 static void
3579 gfx_v8_0_raster_config(struct amdgpu_device *adev, u32 *rconf, u32 *rconf1)
3580 {
3581         switch (adev->asic_type) {
3582         case CHIP_FIJI:
3583                 *rconf |= RB_MAP_PKR0(2) | RB_MAP_PKR1(2) |
3584                           RB_XSEL2(1) | PKR_MAP(2) |
3585                           PKR_XSEL(1) | PKR_YSEL(1) |
3586                           SE_MAP(2) | SE_XSEL(2) | SE_YSEL(3);
3587                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(3) |
3588                            SE_PAIR_YSEL(2);
3589                 break;
3590         case CHIP_TONGA:
3591         case CHIP_POLARIS10:
3592                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3593                           SE_XSEL(1) | SE_YSEL(1);
3594                 *rconf1 |= SE_PAIR_MAP(2) | SE_PAIR_XSEL(2) |
3595                            SE_PAIR_YSEL(2);
3596                 break;
3597         case CHIP_TOPAZ:
3598         case CHIP_CARRIZO:
3599                 *rconf |= RB_MAP_PKR0(2);
3600                 *rconf1 |= 0x0;
3601                 break;
3602         case CHIP_POLARIS11:
3603         case CHIP_POLARIS12:
3604                 *rconf |= RB_MAP_PKR0(2) | RB_XSEL2(1) | SE_MAP(2) |
3605                           SE_XSEL(1) | SE_YSEL(1);
3606                 *rconf1 |= 0x0;
3607                 break;
3608         case CHIP_STONEY:
3609                 *rconf |= 0x0;
3610                 *rconf1 |= 0x0;
3611                 break;
3612         default:
3613                 DRM_ERROR("unknown asic: 0x%x\n", adev->asic_type);
3614                 break;
3615         }
3616 }
3617
3618 static void
3619 gfx_v8_0_write_harvested_raster_configs(struct amdgpu_device *adev,
3620                                         u32 raster_config, u32 raster_config_1,
3621                                         unsigned rb_mask, unsigned num_rb)
3622 {
3623         unsigned sh_per_se = max_t(unsigned, adev->gfx.config.max_sh_per_se, 1);
3624         unsigned num_se = max_t(unsigned, adev->gfx.config.max_shader_engines, 1);
3625         unsigned rb_per_pkr = min_t(unsigned, num_rb / num_se / sh_per_se, 2);
3626         unsigned rb_per_se = num_rb / num_se;
3627         unsigned se_mask[4];
3628         unsigned se;
3629
3630         se_mask[0] = ((1 << rb_per_se) - 1) & rb_mask;
3631         se_mask[1] = (se_mask[0] << rb_per_se) & rb_mask;
3632         se_mask[2] = (se_mask[1] << rb_per_se) & rb_mask;
3633         se_mask[3] = (se_mask[2] << rb_per_se) & rb_mask;
3634
3635         WARN_ON(!(num_se == 1 || num_se == 2 || num_se == 4));
3636         WARN_ON(!(sh_per_se == 1 || sh_per_se == 2));
3637         WARN_ON(!(rb_per_pkr == 1 || rb_per_pkr == 2));
3638
3639         if ((num_se > 2) && ((!se_mask[0] && !se_mask[1]) ||
3640                              (!se_mask[2] && !se_mask[3]))) {
3641                 raster_config_1 &= ~SE_PAIR_MAP_MASK;
3642
3643                 if (!se_mask[0] && !se_mask[1]) {
3644                         raster_config_1 |=
3645                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_3);
3646                 } else {
3647                         raster_config_1 |=
3648                                 SE_PAIR_MAP(RASTER_CONFIG_SE_PAIR_MAP_0);
3649                 }
3650         }
3651
3652         for (se = 0; se < num_se; se++) {
3653                 unsigned raster_config_se = raster_config;
3654                 unsigned pkr0_mask = ((1 << rb_per_pkr) - 1) << (se * rb_per_se);
3655                 unsigned pkr1_mask = pkr0_mask << rb_per_pkr;
3656                 int idx = (se / 2) * 2;
3657
3658                 if ((num_se > 1) && (!se_mask[idx] || !se_mask[idx + 1])) {
3659                         raster_config_se &= ~SE_MAP_MASK;
3660
3661                         if (!se_mask[idx]) {
3662                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_3);
3663                         } else {
3664                                 raster_config_se |= SE_MAP(RASTER_CONFIG_SE_MAP_0);
3665                         }
3666                 }
3667
3668                 pkr0_mask &= rb_mask;
3669                 pkr1_mask &= rb_mask;
3670                 if (rb_per_se > 2 && (!pkr0_mask || !pkr1_mask)) {
3671                         raster_config_se &= ~PKR_MAP_MASK;
3672
3673                         if (!pkr0_mask) {
3674                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_3);
3675                         } else {
3676                                 raster_config_se |= PKR_MAP(RASTER_CONFIG_PKR_MAP_0);
3677                         }
3678                 }
3679
3680                 if (rb_per_se >= 2) {
3681                         unsigned rb0_mask = 1 << (se * rb_per_se);
3682                         unsigned rb1_mask = rb0_mask << 1;
3683
3684                         rb0_mask &= rb_mask;
3685                         rb1_mask &= rb_mask;
3686                         if (!rb0_mask || !rb1_mask) {
3687                                 raster_config_se &= ~RB_MAP_PKR0_MASK;
3688
3689                                 if (!rb0_mask) {
3690                                         raster_config_se |=
3691                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_3);
3692                                 } else {
3693                                         raster_config_se |=
3694                                                 RB_MAP_PKR0(RASTER_CONFIG_RB_MAP_0);
3695                                 }
3696                         }
3697
3698                         if (rb_per_se > 2) {
3699                                 rb0_mask = 1 << (se * rb_per_se + rb_per_pkr);
3700                                 rb1_mask = rb0_mask << 1;
3701                                 rb0_mask &= rb_mask;
3702                                 rb1_mask &= rb_mask;
3703                                 if (!rb0_mask || !rb1_mask) {
3704                                         raster_config_se &= ~RB_MAP_PKR1_MASK;
3705
3706                                         if (!rb0_mask) {
3707                                                 raster_config_se |=
3708                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_3);
3709                                         } else {
3710                                                 raster_config_se |=
3711                                                         RB_MAP_PKR1(RASTER_CONFIG_RB_MAP_0);
3712                                         }
3713                                 }
3714                         }
3715                 }
3716
3717                 /* GRBM_GFX_INDEX has a different offset on VI */
3718                 gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
3719                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
3720                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3721         }
3722
3723         /* GRBM_GFX_INDEX has a different offset on VI */
3724         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3725 }
3726
3727 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
3728 {
3729         int i, j;
3730         u32 data;
3731         u32 raster_config = 0, raster_config_1 = 0;
3732         u32 active_rbs = 0;
3733         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3734                                         adev->gfx.config.max_sh_per_se;
3735         unsigned num_rb_pipes;
3736
3737         mutex_lock(&adev->grbm_idx_mutex);
3738         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3739                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3740                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3741                         data = gfx_v8_0_get_rb_active_bitmap(adev);
3742                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
3743                                                rb_bitmap_width_per_sh);
3744                 }
3745         }
3746         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3747
3748         adev->gfx.config.backend_enable_mask = active_rbs;
3749         adev->gfx.config.num_rbs = hweight32(active_rbs);
3750
3751         num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
3752                              adev->gfx.config.max_shader_engines, 16);
3753
3754         gfx_v8_0_raster_config(adev, &raster_config, &raster_config_1);
3755
3756         if (!adev->gfx.config.backend_enable_mask ||
3757                         adev->gfx.config.num_rbs >= num_rb_pipes) {
3758                 WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
3759                 WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
3760         } else {
3761                 gfx_v8_0_write_harvested_raster_configs(adev, raster_config, raster_config_1,
3762                                                         adev->gfx.config.backend_enable_mask,
3763                                                         num_rb_pipes);
3764         }
3765
3766         /* cache the values for userspace */
3767         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3768                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3769                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3770                         adev->gfx.config.rb_config[i][j].rb_backend_disable =
3771                                 RREG32(mmCC_RB_BACKEND_DISABLE);
3772                         adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
3773                                 RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3774                         adev->gfx.config.rb_config[i][j].raster_config =
3775                                 RREG32(mmPA_SC_RASTER_CONFIG);
3776                         adev->gfx.config.rb_config[i][j].raster_config_1 =
3777                                 RREG32(mmPA_SC_RASTER_CONFIG_1);
3778                 }
3779         }
3780         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3781         mutex_unlock(&adev->grbm_idx_mutex);
3782 }
3783
3784 /**
3785  * gfx_v8_0_init_compute_vmid - gart enable
3786  *
3787  * @rdev: amdgpu_device pointer
3788  *
3789  * Initialize compute vmid sh_mem registers
3790  *
3791  */
3792 #define DEFAULT_SH_MEM_BASES    (0x6000)
3793 #define FIRST_COMPUTE_VMID      (8)
3794 #define LAST_COMPUTE_VMID       (16)
3795 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
3796 {
3797         int i;
3798         uint32_t sh_mem_config;
3799         uint32_t sh_mem_bases;
3800
3801         /*
3802          * Configure apertures:
3803          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
3804          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
3805          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
3806          */
3807         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3808
3809         sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3810                         SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3811                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3812                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3813                         MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3814                         SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3815
3816         mutex_lock(&adev->srbm_mutex);
3817         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3818                 vi_srbm_select(adev, 0, 0, 0, i);
3819                 /* CP and shaders */
3820                 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3821                 WREG32(mmSH_MEM_APE1_BASE, 1);
3822                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3823                 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3824         }
3825         vi_srbm_select(adev, 0, 0, 0, 0);
3826         mutex_unlock(&adev->srbm_mutex);
3827 }
3828
3829 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3830 {
3831         u32 tmp;
3832         int i;
3833
3834         WREG32_FIELD(GRBM_CNTL, READ_TIMEOUT, 0xFF);
3835         WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3836         WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3837         WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
3838
3839         gfx_v8_0_tiling_mode_table_init(adev);
3840         gfx_v8_0_setup_rb(adev);
3841         gfx_v8_0_get_cu_info(adev);
3842
3843         /* XXX SH_MEM regs */
3844         /* where to put LDS, scratch, GPUVM in FSA64 space */
3845         mutex_lock(&adev->srbm_mutex);
3846         for (i = 0; i < 16; i++) {
3847                 vi_srbm_select(adev, 0, 0, 0, i);
3848                 /* CP and shaders */
3849                 if (i == 0) {
3850                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3851                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
3852                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3853                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3854                         WREG32(mmSH_MEM_CONFIG, tmp);
3855                 } else {
3856                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3857                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
3858                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
3859                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
3860                         WREG32(mmSH_MEM_CONFIG, tmp);
3861                 }
3862
3863                 WREG32(mmSH_MEM_APE1_BASE, 1);
3864                 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3865                 WREG32(mmSH_MEM_BASES, 0);
3866         }
3867         vi_srbm_select(adev, 0, 0, 0, 0);
3868         mutex_unlock(&adev->srbm_mutex);
3869
3870         gfx_v8_0_init_compute_vmid(adev);
3871
3872         mutex_lock(&adev->grbm_idx_mutex);
3873         /*
3874          * making sure that the following register writes will be broadcasted
3875          * to all the shaders
3876          */
3877         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3878
3879         WREG32(mmPA_SC_FIFO_SIZE,
3880                    (adev->gfx.config.sc_prim_fifo_size_frontend <<
3881                         PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3882                    (adev->gfx.config.sc_prim_fifo_size_backend <<
3883                         PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3884                    (adev->gfx.config.sc_hiz_tile_fifo_size <<
3885                         PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3886                    (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3887                         PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3888         mutex_unlock(&adev->grbm_idx_mutex);
3889
3890 }
3891
3892 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3893 {
3894         u32 i, j, k;
3895         u32 mask;
3896
3897         mutex_lock(&adev->grbm_idx_mutex);
3898         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3899                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
3900                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
3901                         for (k = 0; k < adev->usec_timeout; k++) {
3902                                 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3903                                         break;
3904                                 udelay(1);
3905                         }
3906                 }
3907         }
3908         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
3909         mutex_unlock(&adev->grbm_idx_mutex);
3910
3911         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3912                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3913                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3914                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3915         for (k = 0; k < adev->usec_timeout; k++) {
3916                 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3917                         break;
3918                 udelay(1);
3919         }
3920 }
3921
3922 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3923                                                bool enable)
3924 {
3925         u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3926
3927         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3928         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3929         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3930         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3931
3932         WREG32(mmCP_INT_CNTL_RING0, tmp);
3933 }
3934
3935 static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3936 {
3937         /* csib */
3938         WREG32(mmRLC_CSIB_ADDR_HI,
3939                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
3940         WREG32(mmRLC_CSIB_ADDR_LO,
3941                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3942         WREG32(mmRLC_CSIB_LENGTH,
3943                         adev->gfx.rlc.clear_state_size);
3944 }
3945
3946 static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3947                                 int ind_offset,
3948                                 int list_size,
3949                                 int *unique_indices,
3950                                 int *indices_count,
3951                                 int max_indices,
3952                                 int *ind_start_offsets,
3953                                 int *offset_count,
3954                                 int max_offset)
3955 {
3956         int indices;
3957         bool new_entry = true;
3958
3959         for (; ind_offset < list_size; ind_offset++) {
3960
3961                 if (new_entry) {
3962                         new_entry = false;
3963                         ind_start_offsets[*offset_count] = ind_offset;
3964                         *offset_count = *offset_count + 1;
3965                         BUG_ON(*offset_count >= max_offset);
3966                 }
3967
3968                 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3969                         new_entry = true;
3970                         continue;
3971                 }
3972
3973                 ind_offset += 2;
3974
3975                 /* look for the matching indice */
3976                 for (indices = 0;
3977                         indices < *indices_count;
3978                         indices++) {
3979                         if (unique_indices[indices] ==
3980                                 register_list_format[ind_offset])
3981                                 break;
3982                 }
3983
3984                 if (indices >= *indices_count) {
3985                         unique_indices[*indices_count] =
3986                                 register_list_format[ind_offset];
3987                         indices = *indices_count;
3988                         *indices_count = *indices_count + 1;
3989                         BUG_ON(*indices_count >= max_indices);
3990                 }
3991
3992                 register_list_format[ind_offset] = indices;
3993         }
3994 }
3995
3996 static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3997 {
3998         int i, temp, data;
3999         int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
4000         int indices_count = 0;
4001         int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
4002         int offset_count = 0;
4003
4004         int list_size;
4005         unsigned int *register_list_format =
4006                 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
4007         if (!register_list_format)
4008                 return -ENOMEM;
4009         memcpy(register_list_format, adev->gfx.rlc.register_list_format,
4010                         adev->gfx.rlc.reg_list_format_size_bytes);
4011
4012         gfx_v8_0_parse_ind_reg_list(register_list_format,
4013                                 RLC_FormatDirectRegListLength,
4014                                 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
4015                                 unique_indices,
4016                                 &indices_count,
4017                                 sizeof(unique_indices) / sizeof(int),
4018                                 indirect_start_offsets,
4019                                 &offset_count,
4020                                 sizeof(indirect_start_offsets)/sizeof(int));
4021
4022         /* save and restore list */
4023         WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1);
4024
4025         WREG32(mmRLC_SRM_ARAM_ADDR, 0);
4026         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
4027                 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
4028
4029         /* indirect list */
4030         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
4031         for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
4032                 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
4033
4034         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
4035         list_size = list_size >> 1;
4036         WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
4037         WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
4038
4039         /* starting offsets starts */
4040         WREG32(mmRLC_GPM_SCRATCH_ADDR,
4041                 adev->gfx.rlc.starting_offsets_start);
4042         for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
4043                 WREG32(mmRLC_GPM_SCRATCH_DATA,
4044                                 indirect_start_offsets[i]);
4045
4046         /* unique indices */
4047         temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
4048         data = mmRLC_SRM_INDEX_CNTL_DATA_0;
4049         for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
4050                 if (unique_indices[i] != 0) {
4051                         amdgpu_mm_wreg(adev, temp + i,
4052                                         unique_indices[i] & 0x3FFFF, false);
4053                         amdgpu_mm_wreg(adev, data + i,
4054                                         unique_indices[i] >> 20, false);
4055                 }
4056         }
4057         kfree(register_list_format);
4058
4059         return 0;
4060 }
4061
4062 static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
4063 {
4064         WREG32_FIELD(RLC_SRM_CNTL, SRM_ENABLE, 1);
4065 }
4066
4067 static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
4068 {
4069         uint32_t data;
4070
4071         WREG32_FIELD(CP_RB_WPTR_POLL_CNTL, IDLE_POLL_COUNT, 0x60);
4072
4073         data = REG_SET_FIELD(0, RLC_PG_DELAY, POWER_UP_DELAY, 0x10);
4074         data = REG_SET_FIELD(data, RLC_PG_DELAY, POWER_DOWN_DELAY, 0x10);
4075         data = REG_SET_FIELD(data, RLC_PG_DELAY, CMD_PROPAGATE_DELAY, 0x10);
4076         data = REG_SET_FIELD(data, RLC_PG_DELAY, MEM_SLEEP_DELAY, 0x10);
4077         WREG32(mmRLC_PG_DELAY, data);
4078
4079         WREG32_FIELD(RLC_PG_DELAY_2, SERDES_CMD_DELAY, 0x3);
4080         WREG32_FIELD(RLC_AUTO_PG_CTRL, GRBM_REG_SAVE_GFX_IDLE_THRESHOLD, 0x55f0);
4081
4082 }
4083
4084 static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
4085                                                 bool enable)
4086 {
4087         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PU_ENABLE, enable ? 1 : 0);
4088 }
4089
4090 static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
4091                                                   bool enable)
4092 {
4093         WREG32_FIELD(RLC_PG_CNTL, SMU_CLK_SLOWDOWN_ON_PD_ENABLE, enable ? 1 : 0);
4094 }
4095
4096 static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
4097 {
4098         WREG32_FIELD(RLC_PG_CNTL, CP_PG_DISABLE, enable ? 0 : 1);
4099 }
4100
4101 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
4102 {
4103         if ((adev->asic_type == CHIP_CARRIZO) ||
4104             (adev->asic_type == CHIP_STONEY)) {
4105                 gfx_v8_0_init_csb(adev);
4106                 gfx_v8_0_init_save_restore_list(adev);
4107                 gfx_v8_0_enable_save_restore_machine(adev);
4108                 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
4109                 gfx_v8_0_init_power_gating(adev);
4110                 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
4111         } else if ((adev->asic_type == CHIP_POLARIS11) ||
4112                    (adev->asic_type == CHIP_POLARIS12)) {
4113                 gfx_v8_0_init_csb(adev);
4114                 gfx_v8_0_init_save_restore_list(adev);
4115                 gfx_v8_0_enable_save_restore_machine(adev);
4116                 gfx_v8_0_init_power_gating(adev);
4117         }
4118
4119 }
4120
4121 static void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
4122 {
4123         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 0);
4124
4125         gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4126         gfx_v8_0_wait_for_rlc_serdes(adev);
4127 }
4128
4129 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
4130 {
4131         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4132         udelay(50);
4133
4134         WREG32_FIELD(GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
4135         udelay(50);
4136 }
4137
4138 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
4139 {
4140         WREG32_FIELD(RLC_CNTL, RLC_ENABLE_F32, 1);
4141
4142         /* carrizo do enable cp interrupt after cp inited */
4143         if (!(adev->flags & AMD_IS_APU))
4144                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4145
4146         udelay(50);
4147 }
4148
4149 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
4150 {
4151         const struct rlc_firmware_header_v2_0 *hdr;
4152         const __le32 *fw_data;
4153         unsigned i, fw_size;
4154
4155         if (!adev->gfx.rlc_fw)
4156                 return -EINVAL;
4157
4158         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
4159         amdgpu_ucode_print_rlc_hdr(&hdr->header);
4160
4161         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
4162                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
4163         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
4164
4165         WREG32(mmRLC_GPM_UCODE_ADDR, 0);
4166         for (i = 0; i < fw_size; i++)
4167                 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
4168         WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
4169
4170         return 0;
4171 }
4172
4173 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
4174 {
4175         int r;
4176         u32 tmp;
4177
4178         gfx_v8_0_rlc_stop(adev);
4179
4180         /* disable CG */
4181         tmp = RREG32(mmRLC_CGCG_CGLS_CTRL);
4182         tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4183                  RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4184         WREG32(mmRLC_CGCG_CGLS_CTRL, tmp);
4185         if (adev->asic_type == CHIP_POLARIS11 ||
4186             adev->asic_type == CHIP_POLARIS10 ||
4187             adev->asic_type == CHIP_POLARIS12) {
4188                 tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D);
4189                 tmp &= ~0x3;
4190                 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp);
4191         }
4192
4193         /* disable PG */
4194         WREG32(mmRLC_PG_CNTL, 0);
4195
4196         gfx_v8_0_rlc_reset(adev);
4197         gfx_v8_0_init_pg(adev);
4198
4199         if (!adev->pp_enabled) {
4200                 if (!adev->firmware.smu_load) {
4201                         /* legacy rlc firmware loading */
4202                         r = gfx_v8_0_rlc_load_microcode(adev);
4203                         if (r)
4204                                 return r;
4205                 } else {
4206                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4207                                                         AMDGPU_UCODE_ID_RLC_G);
4208                         if (r)
4209                                 return -EINVAL;
4210                 }
4211         }
4212
4213         gfx_v8_0_rlc_start(adev);
4214
4215         return 0;
4216 }
4217
4218 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4219 {
4220         int i;
4221         u32 tmp = RREG32(mmCP_ME_CNTL);
4222
4223         if (enable) {
4224                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4225                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4226                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4227         } else {
4228                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4229                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4230                 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4231                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4232                         adev->gfx.gfx_ring[i].ready = false;
4233         }
4234         WREG32(mmCP_ME_CNTL, tmp);
4235         udelay(50);
4236 }
4237
4238 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4239 {
4240         const struct gfx_firmware_header_v1_0 *pfp_hdr;
4241         const struct gfx_firmware_header_v1_0 *ce_hdr;
4242         const struct gfx_firmware_header_v1_0 *me_hdr;
4243         const __le32 *fw_data;
4244         unsigned i, fw_size;
4245
4246         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4247                 return -EINVAL;
4248
4249         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4250                 adev->gfx.pfp_fw->data;
4251         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4252                 adev->gfx.ce_fw->data;
4253         me_hdr = (const struct gfx_firmware_header_v1_0 *)
4254                 adev->gfx.me_fw->data;
4255
4256         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4257         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4258         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
4259
4260         gfx_v8_0_cp_gfx_enable(adev, false);
4261
4262         /* PFP */
4263         fw_data = (const __le32 *)
4264                 (adev->gfx.pfp_fw->data +
4265                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4266         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4267         WREG32(mmCP_PFP_UCODE_ADDR, 0);
4268         for (i = 0; i < fw_size; i++)
4269                 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4270         WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4271
4272         /* CE */
4273         fw_data = (const __le32 *)
4274                 (adev->gfx.ce_fw->data +
4275                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4276         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4277         WREG32(mmCP_CE_UCODE_ADDR, 0);
4278         for (i = 0; i < fw_size; i++)
4279                 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4280         WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4281
4282         /* ME */
4283         fw_data = (const __le32 *)
4284                 (adev->gfx.me_fw->data +
4285                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4286         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4287         WREG32(mmCP_ME_RAM_WADDR, 0);
4288         for (i = 0; i < fw_size; i++)
4289                 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4290         WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4291
4292         return 0;
4293 }
4294
4295 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4296 {
4297         u32 count = 0;
4298         const struct cs_section_def *sect = NULL;
4299         const struct cs_extent_def *ext = NULL;
4300
4301         /* begin clear state */
4302         count += 2;
4303         /* context control state */
4304         count += 3;
4305
4306         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4307                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4308                         if (sect->id == SECT_CONTEXT)
4309                                 count += 2 + ext->reg_count;
4310                         else
4311                                 return 0;
4312                 }
4313         }
4314         /* pa_sc_raster_config/pa_sc_raster_config1 */
4315         count += 4;
4316         /* end clear state */
4317         count += 2;
4318         /* clear state */
4319         count += 2;
4320
4321         return count;
4322 }
4323
4324 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4325 {
4326         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4327         const struct cs_section_def *sect = NULL;
4328         const struct cs_extent_def *ext = NULL;
4329         int r, i;
4330
4331         /* init the CP */
4332         WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4333         WREG32(mmCP_ENDIAN_SWAP, 0);
4334         WREG32(mmCP_DEVICE_ID, 1);
4335
4336         gfx_v8_0_cp_gfx_enable(adev, true);
4337
4338         r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
4339         if (r) {
4340                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4341                 return r;
4342         }
4343
4344         /* clear state buffer */
4345         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4346         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4347
4348         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4349         amdgpu_ring_write(ring, 0x80000000);
4350         amdgpu_ring_write(ring, 0x80000000);
4351
4352         for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4353                 for (ext = sect->section; ext->extent != NULL; ++ext) {
4354                         if (sect->id == SECT_CONTEXT) {
4355                                 amdgpu_ring_write(ring,
4356                                        PACKET3(PACKET3_SET_CONTEXT_REG,
4357                                                ext->reg_count));
4358                                 amdgpu_ring_write(ring,
4359                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4360                                 for (i = 0; i < ext->reg_count; i++)
4361                                         amdgpu_ring_write(ring, ext->extent[i]);
4362                         }
4363                 }
4364         }
4365
4366         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4367         amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4368         switch (adev->asic_type) {
4369         case CHIP_TONGA:
4370         case CHIP_POLARIS10:
4371                 amdgpu_ring_write(ring, 0x16000012);
4372                 amdgpu_ring_write(ring, 0x0000002A);
4373                 break;
4374         case CHIP_POLARIS11:
4375         case CHIP_POLARIS12:
4376                 amdgpu_ring_write(ring, 0x16000012);
4377                 amdgpu_ring_write(ring, 0x00000000);
4378                 break;
4379         case CHIP_FIJI:
4380                 amdgpu_ring_write(ring, 0x3a00161a);
4381                 amdgpu_ring_write(ring, 0x0000002e);
4382                 break;
4383         case CHIP_CARRIZO:
4384                 amdgpu_ring_write(ring, 0x00000002);
4385                 amdgpu_ring_write(ring, 0x00000000);
4386                 break;
4387         case CHIP_TOPAZ:
4388                 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4389                                 0x00000000 : 0x00000002);
4390                 amdgpu_ring_write(ring, 0x00000000);
4391                 break;
4392         case CHIP_STONEY:
4393                 amdgpu_ring_write(ring, 0x00000000);
4394                 amdgpu_ring_write(ring, 0x00000000);
4395                 break;
4396         default:
4397                 BUG();
4398         }
4399
4400         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4401         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4402
4403         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4404         amdgpu_ring_write(ring, 0);
4405
4406         /* init the CE partitions */
4407         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4408         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4409         amdgpu_ring_write(ring, 0x8000);
4410         amdgpu_ring_write(ring, 0x8000);
4411
4412         amdgpu_ring_commit(ring);
4413
4414         return 0;
4415 }
4416
4417 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4418 {
4419         struct amdgpu_ring *ring;
4420         u32 tmp;
4421         u32 rb_bufsz;
4422         u64 rb_addr, rptr_addr, wptr_gpu_addr;
4423         int r;
4424
4425         /* Set the write pointer delay */
4426         WREG32(mmCP_RB_WPTR_DELAY, 0);
4427
4428         /* set the RB to use vmid 0 */
4429         WREG32(mmCP_RB_VMID, 0);
4430
4431         /* Set ring buffer size */
4432         ring = &adev->gfx.gfx_ring[0];
4433         rb_bufsz = order_base_2(ring->ring_size / 8);
4434         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4435         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4436         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4437         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4438 #ifdef __BIG_ENDIAN
4439         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4440 #endif
4441         WREG32(mmCP_RB0_CNTL, tmp);
4442
4443         /* Initialize the ring buffer's read and write pointers */
4444         WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4445         ring->wptr = 0;
4446         WREG32(mmCP_RB0_WPTR, ring->wptr);
4447
4448         /* set the wb address wether it's enabled or not */
4449         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4450         WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4451         WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4452
4453         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4454         WREG32(mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
4455         WREG32(mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
4456         mdelay(1);
4457         WREG32(mmCP_RB0_CNTL, tmp);
4458
4459         rb_addr = ring->gpu_addr >> 8;
4460         WREG32(mmCP_RB0_BASE, rb_addr);
4461         WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4462
4463         /* no gfx doorbells on iceland */
4464         if (adev->asic_type != CHIP_TOPAZ) {
4465                 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4466                 if (ring->use_doorbell) {
4467                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4468                                             DOORBELL_OFFSET, ring->doorbell_index);
4469                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4470                                             DOORBELL_HIT, 0);
4471                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4472                                             DOORBELL_EN, 1);
4473                 } else {
4474                         tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4475                                             DOORBELL_EN, 0);
4476                 }
4477                 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4478
4479                 if (adev->asic_type == CHIP_TONGA) {
4480                         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4481                                             DOORBELL_RANGE_LOWER,
4482                                             AMDGPU_DOORBELL_GFX_RING0);
4483                         WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4484
4485                         WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4486                                CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4487                 }
4488
4489         }
4490
4491         /* start the ring */
4492         gfx_v8_0_cp_gfx_start(adev);
4493         ring->ready = true;
4494         r = amdgpu_ring_test_ring(ring);
4495         if (r)
4496                 ring->ready = false;
4497
4498         return r;
4499 }
4500
4501 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4502 {
4503         int i;
4504
4505         if (enable) {
4506                 WREG32(mmCP_MEC_CNTL, 0);
4507         } else {
4508                 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4509                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4510                         adev->gfx.compute_ring[i].ready = false;
4511         }
4512         udelay(50);
4513 }
4514
4515 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4516 {
4517         const struct gfx_firmware_header_v1_0 *mec_hdr;
4518         const __le32 *fw_data;
4519         unsigned i, fw_size;
4520
4521         if (!adev->gfx.mec_fw)
4522                 return -EINVAL;
4523
4524         gfx_v8_0_cp_compute_enable(adev, false);
4525
4526         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4527         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
4528
4529         fw_data = (const __le32 *)
4530                 (adev->gfx.mec_fw->data +
4531                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4532         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4533
4534         /* MEC1 */
4535         WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4536         for (i = 0; i < fw_size; i++)
4537                 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4538         WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4539
4540         /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4541         if (adev->gfx.mec2_fw) {
4542                 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4543
4544                 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4545                 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
4546
4547                 fw_data = (const __le32 *)
4548                         (adev->gfx.mec2_fw->data +
4549                          le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4550                 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4551
4552                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4553                 for (i = 0; i < fw_size; i++)
4554                         WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4555                 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4556         }
4557
4558         return 0;
4559 }
4560
4561 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4562 {
4563         int i, r;
4564
4565         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4566                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4567
4568                 if (ring->mqd_obj) {
4569                         r = amdgpu_bo_reserve(ring->mqd_obj, false);
4570                         if (unlikely(r != 0))
4571                                 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4572
4573                         amdgpu_bo_unpin(ring->mqd_obj);
4574                         amdgpu_bo_unreserve(ring->mqd_obj);
4575
4576                         amdgpu_bo_unref(&ring->mqd_obj);
4577                         ring->mqd_obj = NULL;
4578                 }
4579         }
4580 }
4581
4582 /* KIQ functions */
4583 static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4584 {
4585         uint32_t tmp;
4586         struct amdgpu_device *adev = ring->adev;
4587
4588         /* tell RLC which is KIQ queue */
4589         tmp = RREG32(mmRLC_CP_SCHEDULERS);
4590         tmp &= 0xffffff00;
4591         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4592         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4593         tmp |= 0x80;
4594         WREG32(mmRLC_CP_SCHEDULERS, tmp);
4595 }
4596
4597 static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4598 {
4599         amdgpu_ring_alloc(ring, 8);
4600         /* set resources */
4601         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4602         amdgpu_ring_write(ring, 0);     /* vmid_mask:0 queue_type:0 (KIQ) */
4603         amdgpu_ring_write(ring, 0x000000FF);    /* queue mask lo */
4604         amdgpu_ring_write(ring, 0);     /* queue mask hi */
4605         amdgpu_ring_write(ring, 0);     /* gws mask lo */
4606         amdgpu_ring_write(ring, 0);     /* gws mask hi */
4607         amdgpu_ring_write(ring, 0);     /* oac mask */
4608         amdgpu_ring_write(ring, 0);     /* gds heap base:0, gds heap size:0 */
4609         amdgpu_ring_commit(ring);
4610         udelay(50);
4611 }
4612
4613 static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4614                                    struct amdgpu_ring *ring)
4615 {
4616         struct amdgpu_device *adev = kiq_ring->adev;
4617         uint64_t mqd_addr, wptr_addr;
4618
4619         mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4620         wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4621         amdgpu_ring_alloc(kiq_ring, 8);
4622
4623         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4624         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4625         amdgpu_ring_write(kiq_ring, 0x21010000);
4626         amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4627                         (ring->queue << 26) |
4628                         (ring->pipe << 29) |
4629                         ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4630         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4631         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4632         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4633         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4634         amdgpu_ring_commit(kiq_ring);
4635         udelay(50);
4636 }
4637
4638 static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4639                              struct vi_mqd *mqd,
4640                              uint64_t mqd_gpu_addr,
4641                              uint64_t eop_gpu_addr,
4642                              struct amdgpu_ring *ring)
4643 {
4644         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4645         uint32_t tmp;
4646
4647         mqd->header = 0xC0310800;
4648         mqd->compute_pipelinestat_enable = 0x00000001;
4649         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4650         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4651         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4652         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4653         mqd->compute_misc_reserved = 0x00000003;
4654
4655         eop_base_addr = eop_gpu_addr >> 8;
4656         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4657         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4658
4659         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4660         tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4661         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4662                         (order_base_2(MEC_HPD_SIZE / 4) - 1));
4663
4664         mqd->cp_hqd_eop_control = tmp;
4665
4666         /* enable doorbell? */
4667         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4668
4669         if (ring->use_doorbell)
4670                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4671                                          DOORBELL_EN, 1);
4672         else
4673                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4674                                          DOORBELL_EN, 0);
4675
4676         mqd->cp_hqd_pq_doorbell_control = tmp;
4677
4678         /* disable the queue if it's active */
4679         mqd->cp_hqd_dequeue_request = 0;
4680         mqd->cp_hqd_pq_rptr = 0;
4681         mqd->cp_hqd_pq_wptr = 0;
4682
4683         /* set the pointer to the MQD */
4684         mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4685         mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4686
4687         /* set MQD vmid to 0 */
4688         tmp = RREG32(mmCP_MQD_CONTROL);
4689         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4690         mqd->cp_mqd_control = tmp;
4691
4692         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4693         hqd_gpu_addr = ring->gpu_addr >> 8;
4694         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4695         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4696
4697         /* set up the HQD, this is similar to CP_RB0_CNTL */
4698         tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4699         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4700                             (order_base_2(ring->ring_size / 4) - 1));
4701         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4702                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4703 #ifdef __BIG_ENDIAN
4704         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4705 #endif
4706         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4707         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4708         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4709         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4710         mqd->cp_hqd_pq_control = tmp;
4711
4712         /* set the wb address whether it's enabled or not */
4713         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4714         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4715         mqd->cp_hqd_pq_rptr_report_addr_hi =
4716                 upper_32_bits(wb_gpu_addr) & 0xffff;
4717
4718         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4719         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4720         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4721         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4722
4723         tmp = 0;
4724         /* enable the doorbell if requested */
4725         if (ring->use_doorbell) {
4726                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4727                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728                                 DOORBELL_OFFSET, ring->doorbell_index);
4729
4730                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4731                                          DOORBELL_EN, 1);
4732                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4733                                          DOORBELL_SOURCE, 0);
4734                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4735                                          DOORBELL_HIT, 0);
4736         }
4737
4738         mqd->cp_hqd_pq_doorbell_control = tmp;
4739
4740         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4741         ring->wptr = 0;
4742         mqd->cp_hqd_pq_wptr = ring->wptr;
4743         mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4744
4745         /* set the vmid for the queue */
4746         mqd->cp_hqd_vmid = 0;
4747
4748         tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4749         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4750         mqd->cp_hqd_persistent_state = tmp;
4751
4752         /* activate the queue */
4753         mqd->cp_hqd_active = 1;
4754
4755         return 0;
4756 }
4757
4758 static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4759                                       struct vi_mqd *mqd,
4760                                       struct amdgpu_ring *ring)
4761 {
4762         uint32_t tmp;
4763         int j;
4764
4765         /* disable wptr polling */
4766         tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4767         tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4768         WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4769
4770         WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4771         WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4772
4773         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4774         WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4775
4776         /* enable doorbell? */
4777         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4778
4779         /* disable the queue if it's active */
4780         if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4781                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4782                 for (j = 0; j < adev->usec_timeout; j++) {
4783                         if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4784                                 break;
4785                         udelay(1);
4786                 }
4787                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4788                 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4789                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4790         }
4791
4792         /* set the pointer to the MQD */
4793         WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4794         WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4795
4796         /* set MQD vmid to 0 */
4797         WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4798
4799         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4800         WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4801         WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4802
4803         /* set up the HQD, this is similar to CP_RB0_CNTL */
4804         WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4805
4806         /* set the wb address whether it's enabled or not */
4807         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4808                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
4809         WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4810                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
4811
4812         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4813         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4814         WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4815
4816         /* enable the doorbell if requested */
4817         if (ring->use_doorbell) {
4818                 if ((adev->asic_type == CHIP_CARRIZO) ||
4819                                 (adev->asic_type == CHIP_FIJI) ||
4820                                 (adev->asic_type == CHIP_STONEY)) {
4821                         WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4822                                                 AMDGPU_DOORBELL_KIQ << 2);
4823                         WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4824                                                 AMDGPU_DOORBELL_MEC_RING7 << 2);
4825                 }
4826         }
4827         WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4828
4829         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4830         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4831
4832         /* set the vmid for the queue */
4833         WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4834
4835         WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4836
4837         /* activate the queue */
4838         WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4839
4840         if (ring->use_doorbell) {
4841                 tmp = RREG32(mmCP_PQ_STATUS);
4842                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4843                 WREG32(mmCP_PQ_STATUS, tmp);
4844         }
4845
4846         return 0;
4847 }
4848
4849 static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4850                                    struct vi_mqd *mqd,
4851                                    u64 mqd_gpu_addr)
4852 {
4853         struct amdgpu_device *adev = ring->adev;
4854         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4855         uint64_t eop_gpu_addr;
4856         bool is_kiq = false;
4857
4858         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4859                 is_kiq = true;
4860
4861         if (is_kiq) {
4862                 eop_gpu_addr = kiq->eop_gpu_addr;
4863                 gfx_v8_0_kiq_setting(&kiq->ring);
4864         } else
4865                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4866                                         ring->queue * MEC_HPD_SIZE;
4867
4868         mutex_lock(&adev->srbm_mutex);
4869         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4870
4871         gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4872
4873         if (is_kiq)
4874                 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4875
4876         vi_srbm_select(adev, 0, 0, 0, 0);
4877         mutex_unlock(&adev->srbm_mutex);
4878
4879         if (is_kiq)
4880                 gfx_v8_0_kiq_enable(ring);
4881         else
4882                 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4883
4884         return 0;
4885 }
4886
4887 static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4888 {
4889         struct amdgpu_ring *ring = NULL;
4890         int i;
4891
4892         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4893                 ring = &adev->gfx.compute_ring[i];
4894                 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4895                 ring->mqd_obj = NULL;
4896         }
4897
4898         ring = &adev->gfx.kiq.ring;
4899         amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4900         ring->mqd_obj = NULL;
4901 }
4902
4903 static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4904                                     struct amdgpu_ring *ring)
4905 {
4906         struct vi_mqd *mqd;
4907         u64 mqd_gpu_addr;
4908         u32 *buf;
4909         int r = 0;
4910
4911         r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4912                                     AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4913                                     &mqd_gpu_addr, (void **)&buf);
4914         if (r) {
4915                 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4916                 return r;
4917         }
4918
4919         /* init the mqd struct */
4920         memset(buf, 0, sizeof(struct vi_mqd));
4921         mqd = (struct vi_mqd *)buf;
4922
4923         r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4924         if (r)
4925                 return r;
4926
4927         amdgpu_bo_kunmap(ring->mqd_obj);
4928
4929         return 0;
4930 }
4931
4932 static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4933 {
4934         struct amdgpu_ring *ring = NULL;
4935         int r, i;
4936
4937         ring = &adev->gfx.kiq.ring;
4938         r = gfx_v8_0_kiq_setup_queue(adev, ring);
4939         if (r)
4940                 return r;
4941
4942         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4943                 ring = &adev->gfx.compute_ring[i];
4944                 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4945                 if (r)
4946                         return r;
4947         }
4948
4949         gfx_v8_0_cp_compute_enable(adev, true);
4950
4951         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4952                 ring = &adev->gfx.compute_ring[i];
4953
4954                 ring->ready = true;
4955                 r = amdgpu_ring_test_ring(ring);
4956                 if (r)
4957                         ring->ready = false;
4958         }
4959
4960         ring = &adev->gfx.kiq.ring;
4961         ring->ready = true;
4962         r = amdgpu_ring_test_ring(ring);
4963         if (r)
4964                 ring->ready = false;
4965
4966         return 0;
4967 }
4968
4969 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4970 {
4971         int r, i, j;
4972         u32 tmp;
4973         bool use_doorbell = true;
4974         u64 hqd_gpu_addr;
4975         u64 mqd_gpu_addr;
4976         u64 eop_gpu_addr;
4977         u64 wb_gpu_addr;
4978         u32 *buf;
4979         struct vi_mqd *mqd;
4980
4981         /* init the queues.  */
4982         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4983                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4984
4985                 if (ring->mqd_obj == NULL) {
4986                         r = amdgpu_bo_create(adev,
4987                                              sizeof(struct vi_mqd),
4988                                              PAGE_SIZE, true,
4989                                              AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4990                                              NULL, &ring->mqd_obj);
4991                         if (r) {
4992                                 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4993                                 return r;
4994                         }
4995                 }
4996
4997                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4998                 if (unlikely(r != 0)) {
4999                         gfx_v8_0_cp_compute_fini(adev);
5000                         return r;
5001                 }
5002                 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
5003                                   &mqd_gpu_addr);
5004                 if (r) {
5005                         dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
5006                         gfx_v8_0_cp_compute_fini(adev);
5007                         return r;
5008                 }
5009                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
5010                 if (r) {
5011                         dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
5012                         gfx_v8_0_cp_compute_fini(adev);
5013                         return r;
5014                 }
5015
5016                 /* init the mqd struct */
5017                 memset(buf, 0, sizeof(struct vi_mqd));
5018
5019                 mqd = (struct vi_mqd *)buf;
5020                 mqd->header = 0xC0310800;
5021                 mqd->compute_pipelinestat_enable = 0x00000001;
5022                 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
5023                 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
5024                 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
5025                 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
5026                 mqd->compute_misc_reserved = 0x00000003;
5027
5028                 mutex_lock(&adev->srbm_mutex);
5029                 vi_srbm_select(adev, ring->me,
5030                                ring->pipe,
5031                                ring->queue, 0);
5032
5033                 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
5034                 eop_gpu_addr >>= 8;
5035
5036                 /* write the EOP addr */
5037                 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
5038                 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
5039
5040                 /* set the VMID assigned */
5041                 WREG32(mmCP_HQD_VMID, 0);
5042
5043                 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
5044                 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
5045                 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
5046                                     (order_base_2(MEC_HPD_SIZE / 4) - 1));
5047                 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
5048
5049                 /* disable wptr polling */
5050                 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
5051                 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
5052                 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
5053
5054                 mqd->cp_hqd_eop_base_addr_lo =
5055                         RREG32(mmCP_HQD_EOP_BASE_ADDR);
5056                 mqd->cp_hqd_eop_base_addr_hi =
5057                         RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
5058
5059                 /* enable doorbell? */
5060                 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5061                 if (use_doorbell) {
5062                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5063                 } else {
5064                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
5065                 }
5066                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
5067                 mqd->cp_hqd_pq_doorbell_control = tmp;
5068
5069                 /* disable the queue if it's active */
5070                 mqd->cp_hqd_dequeue_request = 0;
5071                 mqd->cp_hqd_pq_rptr = 0;
5072                 mqd->cp_hqd_pq_wptr= 0;
5073                 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
5074                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
5075                         for (j = 0; j < adev->usec_timeout; j++) {
5076                                 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
5077                                         break;
5078                                 udelay(1);
5079                         }
5080                         WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
5081                         WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
5082                         WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5083                 }
5084
5085                 /* set the pointer to the MQD */
5086                 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
5087                 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5088                 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
5089                 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
5090
5091                 /* set MQD vmid to 0 */
5092                 tmp = RREG32(mmCP_MQD_CONTROL);
5093                 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
5094                 WREG32(mmCP_MQD_CONTROL, tmp);
5095                 mqd->cp_mqd_control = tmp;
5096
5097                 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5098                 hqd_gpu_addr = ring->gpu_addr >> 8;
5099                 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
5100                 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5101                 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
5102                 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
5103
5104                 /* set up the HQD, this is similar to CP_RB0_CNTL */
5105                 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
5106                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
5107                                     (order_base_2(ring->ring_size / 4) - 1));
5108                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
5109                                ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
5110 #ifdef __BIG_ENDIAN
5111                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
5112 #endif
5113                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
5114                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
5115                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
5116                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
5117                 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
5118                 mqd->cp_hqd_pq_control = tmp;
5119
5120                 /* set the wb address wether it's enabled or not */
5121                 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
5122                 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
5123                 mqd->cp_hqd_pq_rptr_report_addr_hi =
5124                         upper_32_bits(wb_gpu_addr) & 0xffff;
5125                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
5126                        mqd->cp_hqd_pq_rptr_report_addr_lo);
5127                 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5128                        mqd->cp_hqd_pq_rptr_report_addr_hi);
5129
5130                 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
5131                 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
5132                 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
5133                 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5134                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
5135                 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
5136                        mqd->cp_hqd_pq_wptr_poll_addr_hi);
5137
5138                 /* enable the doorbell if requested */
5139                 if (use_doorbell) {
5140                         if ((adev->asic_type == CHIP_CARRIZO) ||
5141                             (adev->asic_type == CHIP_FIJI) ||
5142                             (adev->asic_type == CHIP_STONEY) ||
5143                             (adev->asic_type == CHIP_POLARIS11) ||
5144                             (adev->asic_type == CHIP_POLARIS10) ||
5145                             (adev->asic_type == CHIP_POLARIS12)) {
5146                                 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
5147                                        AMDGPU_DOORBELL_KIQ << 2);
5148                                 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
5149                                        AMDGPU_DOORBELL_MEC_RING7 << 2);
5150                         }
5151                         tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
5152                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
5153                                             DOORBELL_OFFSET, ring->doorbell_index);
5154                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
5155                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
5156                         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
5157                         mqd->cp_hqd_pq_doorbell_control = tmp;
5158
5159                 } else {
5160                         mqd->cp_hqd_pq_doorbell_control = 0;
5161                 }
5162                 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
5163                        mqd->cp_hqd_pq_doorbell_control);
5164
5165                 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5166                 ring->wptr = 0;
5167                 mqd->cp_hqd_pq_wptr = ring->wptr;
5168                 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
5169                 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
5170
5171                 /* set the vmid for the queue */
5172                 mqd->cp_hqd_vmid = 0;
5173                 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
5174
5175                 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
5176                 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
5177                 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
5178                 mqd->cp_hqd_persistent_state = tmp;
5179                 if (adev->asic_type == CHIP_STONEY ||
5180                         adev->asic_type == CHIP_POLARIS11 ||
5181                         adev->asic_type == CHIP_POLARIS10 ||
5182                         adev->asic_type == CHIP_POLARIS12) {
5183                         tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
5184                         tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
5185                         WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
5186                 }
5187
5188                 /* activate the queue */
5189                 mqd->cp_hqd_active = 1;
5190                 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
5191
5192                 vi_srbm_select(adev, 0, 0, 0, 0);
5193                 mutex_unlock(&adev->srbm_mutex);
5194
5195                 amdgpu_bo_kunmap(ring->mqd_obj);
5196                 amdgpu_bo_unreserve(ring->mqd_obj);
5197         }
5198
5199         if (use_doorbell) {
5200                 tmp = RREG32(mmCP_PQ_STATUS);
5201                 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
5202                 WREG32(mmCP_PQ_STATUS, tmp);
5203         }
5204
5205         gfx_v8_0_cp_compute_enable(adev, true);
5206
5207         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5208                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5209
5210                 ring->ready = true;
5211                 r = amdgpu_ring_test_ring(ring);
5212                 if (r)
5213                         ring->ready = false;
5214         }
5215
5216         return 0;
5217 }
5218
5219 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
5220 {
5221         int r;
5222
5223         if (!(adev->flags & AMD_IS_APU))
5224                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5225
5226         if (!adev->pp_enabled) {
5227                 if (!adev->firmware.smu_load) {
5228                         /* legacy firmware loading */
5229                         r = gfx_v8_0_cp_gfx_load_microcode(adev);
5230                         if (r)
5231                                 return r;
5232
5233                         r = gfx_v8_0_cp_compute_load_microcode(adev);
5234                         if (r)
5235                                 return r;
5236                 } else {
5237                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5238                                                         AMDGPU_UCODE_ID_CP_CE);
5239                         if (r)
5240                                 return -EINVAL;
5241
5242                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5243                                                         AMDGPU_UCODE_ID_CP_PFP);
5244                         if (r)
5245                                 return -EINVAL;
5246
5247                         r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5248                                                         AMDGPU_UCODE_ID_CP_ME);
5249                         if (r)
5250                                 return -EINVAL;
5251
5252                         if (adev->asic_type == CHIP_TOPAZ) {
5253                                 r = gfx_v8_0_cp_compute_load_microcode(adev);
5254                                 if (r)
5255                                         return r;
5256                         } else {
5257                                 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
5258                                                                                  AMDGPU_UCODE_ID_CP_MEC1);
5259                                 if (r)
5260                                         return -EINVAL;
5261                         }
5262                 }
5263         }
5264
5265         r = gfx_v8_0_cp_gfx_resume(adev);
5266         if (r)
5267                 return r;
5268
5269         if (amdgpu_sriov_vf(adev))
5270                 r = gfx_v8_0_kiq_resume(adev);
5271         else
5272                 r = gfx_v8_0_cp_compute_resume(adev);
5273         if (r)
5274                 return r;
5275
5276         gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5277
5278         return 0;
5279 }
5280
5281 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
5282 {
5283         gfx_v8_0_cp_gfx_enable(adev, enable);
5284         gfx_v8_0_cp_compute_enable(adev, enable);
5285 }
5286
5287 static int gfx_v8_0_hw_init(void *handle)
5288 {
5289         int r;
5290         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5291
5292         gfx_v8_0_init_golden_registers(adev);
5293         gfx_v8_0_gpu_init(adev);
5294
5295         r = gfx_v8_0_rlc_resume(adev);
5296         if (r)
5297                 return r;
5298
5299         r = gfx_v8_0_cp_resume(adev);
5300
5301         return r;
5302 }
5303
5304 static int gfx_v8_0_hw_fini(void *handle)
5305 {
5306         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5307
5308         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
5309         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
5310         if (amdgpu_sriov_vf(adev)) {
5311                 gfx_v8_0_kiq_free_queue(adev);
5312                 pr_debug("For SRIOV client, shouldn't do anything.\n");
5313                 return 0;
5314         }
5315         gfx_v8_0_cp_enable(adev, false);
5316         gfx_v8_0_rlc_stop(adev);
5317         gfx_v8_0_cp_compute_fini(adev);
5318
5319         amdgpu_set_powergating_state(adev,
5320                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5321
5322         return 0;
5323 }
5324
5325 static int gfx_v8_0_suspend(void *handle)
5326 {
5327         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5328
5329         return gfx_v8_0_hw_fini(adev);
5330 }
5331
5332 static int gfx_v8_0_resume(void *handle)
5333 {
5334         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5335
5336         return gfx_v8_0_hw_init(adev);
5337 }
5338
5339 static bool gfx_v8_0_is_idle(void *handle)
5340 {
5341         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5342
5343         if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5344                 return false;
5345         else
5346                 return true;
5347 }
5348
5349 static int gfx_v8_0_wait_for_idle(void *handle)
5350 {
5351         unsigned i;
5352         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5353
5354         for (i = 0; i < adev->usec_timeout; i++) {
5355                 if (gfx_v8_0_is_idle(handle))
5356                         return 0;
5357
5358                 udelay(1);
5359         }
5360         return -ETIMEDOUT;
5361 }
5362
5363 static bool gfx_v8_0_check_soft_reset(void *handle)
5364 {
5365         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5366         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5367         u32 tmp;
5368
5369         /* GRBM_STATUS */
5370         tmp = RREG32(mmGRBM_STATUS);
5371         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5372                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5373                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5374                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5375                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5376                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK |
5377                    GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5378                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5379                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5380                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5381                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5382                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5383                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5384         }
5385
5386         /* GRBM_STATUS2 */
5387         tmp = RREG32(mmGRBM_STATUS2);
5388         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5389                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5390                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5391
5392         if (REG_GET_FIELD(tmp, GRBM_STATUS2, CPF_BUSY) ||
5393             REG_GET_FIELD(tmp, GRBM_STATUS2, CPC_BUSY) ||
5394             REG_GET_FIELD(tmp, GRBM_STATUS2, CPG_BUSY)) {
5395                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5396                                                 SOFT_RESET_CPF, 1);
5397                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5398                                                 SOFT_RESET_CPC, 1);
5399                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
5400                                                 SOFT_RESET_CPG, 1);
5401                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET,
5402                                                 SOFT_RESET_GRBM, 1);
5403         }
5404
5405         /* SRBM_STATUS */
5406         tmp = RREG32(mmSRBM_STATUS);
5407         if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5408                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5409                                                 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5410         if (REG_GET_FIELD(tmp, SRBM_STATUS, SEM_BUSY))
5411                 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5412                                                 SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
5413
5414         if (grbm_soft_reset || srbm_soft_reset) {
5415                 adev->gfx.grbm_soft_reset = grbm_soft_reset;
5416                 adev->gfx.srbm_soft_reset = srbm_soft_reset;
5417                 return true;
5418         } else {
5419                 adev->gfx.grbm_soft_reset = 0;
5420                 adev->gfx.srbm_soft_reset = 0;
5421                 return false;
5422         }
5423 }
5424
5425 static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
5426                                   struct amdgpu_ring *ring)
5427 {
5428         int i;
5429
5430         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5431         if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
5432                 u32 tmp;
5433                 tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
5434                 tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
5435                                     DEQUEUE_REQ, 2);
5436                 WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
5437                 for (i = 0; i < adev->usec_timeout; i++) {
5438                         if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
5439                                 break;
5440                         udelay(1);
5441                 }
5442         }
5443 }
5444
5445 static int gfx_v8_0_pre_soft_reset(void *handle)
5446 {
5447         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5448         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5449
5450         if ((!adev->gfx.grbm_soft_reset) &&
5451             (!adev->gfx.srbm_soft_reset))
5452                 return 0;
5453
5454         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5455         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5456
5457         /* stop the rlc */
5458         gfx_v8_0_rlc_stop(adev);
5459
5460         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5461             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5462                 /* Disable GFX parsing/prefetching */
5463                 gfx_v8_0_cp_gfx_enable(adev, false);
5464
5465         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5466             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5467             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5468             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5469                 int i;
5470
5471                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5472                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5473
5474                         gfx_v8_0_inactive_hqd(adev, ring);
5475                 }
5476                 /* Disable MEC parsing/prefetching */
5477                 gfx_v8_0_cp_compute_enable(adev, false);
5478         }
5479
5480        return 0;
5481 }
5482
5483 static int gfx_v8_0_soft_reset(void *handle)
5484 {
5485         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5486         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5487         u32 tmp;
5488
5489         if ((!adev->gfx.grbm_soft_reset) &&
5490             (!adev->gfx.srbm_soft_reset))
5491                 return 0;
5492
5493         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5494         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5495
5496         if (grbm_soft_reset || srbm_soft_reset) {
5497                 tmp = RREG32(mmGMCON_DEBUG);
5498                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 1);
5499                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 1);
5500                 WREG32(mmGMCON_DEBUG, tmp);
5501                 udelay(50);
5502         }
5503
5504         if (grbm_soft_reset) {
5505                 tmp = RREG32(mmGRBM_SOFT_RESET);
5506                 tmp |= grbm_soft_reset;
5507                 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5508                 WREG32(mmGRBM_SOFT_RESET, tmp);
5509                 tmp = RREG32(mmGRBM_SOFT_RESET);
5510
5511                 udelay(50);
5512
5513                 tmp &= ~grbm_soft_reset;
5514                 WREG32(mmGRBM_SOFT_RESET, tmp);
5515                 tmp = RREG32(mmGRBM_SOFT_RESET);
5516         }
5517
5518         if (srbm_soft_reset) {
5519                 tmp = RREG32(mmSRBM_SOFT_RESET);
5520                 tmp |= srbm_soft_reset;
5521                 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5522                 WREG32(mmSRBM_SOFT_RESET, tmp);
5523                 tmp = RREG32(mmSRBM_SOFT_RESET);
5524
5525                 udelay(50);
5526
5527                 tmp &= ~srbm_soft_reset;
5528                 WREG32(mmSRBM_SOFT_RESET, tmp);
5529                 tmp = RREG32(mmSRBM_SOFT_RESET);
5530         }
5531
5532         if (grbm_soft_reset || srbm_soft_reset) {
5533                 tmp = RREG32(mmGMCON_DEBUG);
5534                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_STALL, 0);
5535                 tmp = REG_SET_FIELD(tmp, GMCON_DEBUG, GFX_CLEAR, 0);
5536                 WREG32(mmGMCON_DEBUG, tmp);
5537         }
5538
5539         /* Wait a little for things to settle down */
5540         udelay(50);
5541
5542         return 0;
5543 }
5544
5545 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
5546                               struct amdgpu_ring *ring)
5547 {
5548         vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5549         WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
5550         WREG32(mmCP_HQD_PQ_RPTR, 0);
5551         WREG32(mmCP_HQD_PQ_WPTR, 0);
5552         vi_srbm_select(adev, 0, 0, 0, 0);
5553 }
5554
5555 static int gfx_v8_0_post_soft_reset(void *handle)
5556 {
5557         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5558         u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5559
5560         if ((!adev->gfx.grbm_soft_reset) &&
5561             (!adev->gfx.srbm_soft_reset))
5562                 return 0;
5563
5564         grbm_soft_reset = adev->gfx.grbm_soft_reset;
5565         srbm_soft_reset = adev->gfx.srbm_soft_reset;
5566
5567         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5568             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
5569                 gfx_v8_0_cp_gfx_resume(adev);
5570
5571         if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
5572             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
5573             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPC) ||
5574             REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPG)) {
5575                 int i;
5576
5577                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5578                         struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
5579
5580                         gfx_v8_0_init_hqd(adev, ring);
5581                 }
5582                 gfx_v8_0_cp_compute_resume(adev);
5583         }
5584         gfx_v8_0_rlc_start(adev);
5585
5586         return 0;
5587 }
5588
5589 /**
5590  * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5591  *
5592  * @adev: amdgpu_device pointer
5593  *
5594  * Fetches a GPU clock counter snapshot.
5595  * Returns the 64 bit clock counter snapshot.
5596  */
5597 static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
5598 {
5599         uint64_t clock;
5600
5601         mutex_lock(&adev->gfx.gpu_clock_mutex);
5602         WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5603         clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5604                 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5605         mutex_unlock(&adev->gfx.gpu_clock_mutex);
5606         return clock;
5607 }
5608
5609 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5610                                           uint32_t vmid,
5611                                           uint32_t gds_base, uint32_t gds_size,
5612                                           uint32_t gws_base, uint32_t gws_size,
5613                                           uint32_t oa_base, uint32_t oa_size)
5614 {
5615         gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5616         gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5617
5618         gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5619         gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5620
5621         oa_base = oa_base >> AMDGPU_OA_SHIFT;
5622         oa_size = oa_size >> AMDGPU_OA_SHIFT;
5623
5624         /* GDS Base */
5625         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5626         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5627                                 WRITE_DATA_DST_SEL(0)));
5628         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5629         amdgpu_ring_write(ring, 0);
5630         amdgpu_ring_write(ring, gds_base);
5631
5632         /* GDS Size */
5633         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5634         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5635                                 WRITE_DATA_DST_SEL(0)));
5636         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5637         amdgpu_ring_write(ring, 0);
5638         amdgpu_ring_write(ring, gds_size);
5639
5640         /* GWS */
5641         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5642         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5643                                 WRITE_DATA_DST_SEL(0)));
5644         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5645         amdgpu_ring_write(ring, 0);
5646         amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5647
5648         /* OA */
5649         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5650         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5651                                 WRITE_DATA_DST_SEL(0)));
5652         amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5653         amdgpu_ring_write(ring, 0);
5654         amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5655 }
5656
5657 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
5658 {
5659         WREG32(mmSQ_IND_INDEX,
5660                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5661                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5662                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
5663                 (SQ_IND_INDEX__FORCE_READ_MASK));
5664         return RREG32(mmSQ_IND_DATA);
5665 }
5666
5667 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
5668                            uint32_t wave, uint32_t thread,
5669                            uint32_t regno, uint32_t num, uint32_t *out)
5670 {
5671         WREG32(mmSQ_IND_INDEX,
5672                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
5673                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
5674                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
5675                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
5676                 (SQ_IND_INDEX__FORCE_READ_MASK) |
5677                 (SQ_IND_INDEX__AUTO_INCR_MASK));
5678         while (num--)
5679                 *(out++) = RREG32(mmSQ_IND_DATA);
5680 }
5681
5682 static void gfx_v8_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
5683 {
5684         /* type 0 wave data */
5685         dst[(*no_fields)++] = 0;
5686         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
5687         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
5688         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
5689         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
5690         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
5691         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
5692         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
5693         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
5694         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
5695         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
5696         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
5697         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
5698         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_LO);
5699         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TBA_HI);
5700         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_LO);
5701         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TMA_HI);
5702         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
5703         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
5704 }
5705
5706 static void gfx_v8_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
5707                                      uint32_t wave, uint32_t start,
5708                                      uint32_t size, uint32_t *dst)
5709 {
5710         wave_read_regs(
5711                 adev, simd, wave, 0,
5712                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
5713 }
5714
5715
5716 static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5717         .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
5718         .select_se_sh = &gfx_v8_0_select_se_sh,
5719         .read_wave_data = &gfx_v8_0_read_wave_data,
5720         .read_wave_sgprs = &gfx_v8_0_read_wave_sgprs,
5721 };
5722
5723 static int gfx_v8_0_early_init(void *handle)
5724 {
5725         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5726
5727         adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5728         adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
5729         adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
5730         gfx_v8_0_set_ring_funcs(adev);
5731         gfx_v8_0_set_irq_funcs(adev);
5732         gfx_v8_0_set_gds_init(adev);
5733         gfx_v8_0_set_rlc_funcs(adev);
5734
5735         return 0;
5736 }
5737
5738 static int gfx_v8_0_late_init(void *handle)
5739 {
5740         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5741         int r;
5742
5743         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5744         if (r)
5745                 return r;
5746
5747         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5748         if (r)
5749                 return r;
5750
5751         /* requires IBs so do in late init after IB pool is initialized */
5752         r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5753         if (r)
5754                 return r;
5755
5756         amdgpu_set_powergating_state(adev,
5757                         AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5758
5759         return 0;
5760 }
5761
5762 static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5763                                                        bool enable)
5764 {
5765         if ((adev->asic_type == CHIP_POLARIS11) ||
5766             (adev->asic_type == CHIP_POLARIS12))
5767                 /* Send msg to SMU via Powerplay */
5768                 amdgpu_set_powergating_state(adev,
5769                                              AMD_IP_BLOCK_TYPE_SMC,
5770                                              enable ?
5771                                              AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
5772
5773         WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0);
5774 }
5775
5776 static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5777                                                         bool enable)
5778 {
5779         WREG32_FIELD(RLC_PG_CNTL, DYN_PER_CU_PG_ENABLE, enable ? 1 : 0);
5780 }
5781
5782 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
5783                 bool enable)
5784 {
5785         WREG32_FIELD(RLC_PG_CNTL, QUICK_PG_ENABLE, enable ? 1 : 0);
5786 }
5787
5788 static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5789                                           bool enable)
5790 {
5791         WREG32_FIELD(RLC_PG_CNTL, GFX_POWER_GATING_ENABLE, enable ? 1 : 0);
5792 }
5793
5794 static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5795                                                 bool enable)
5796 {
5797         WREG32_FIELD(RLC_PG_CNTL, GFX_PIPELINE_PG_ENABLE, enable ? 1 : 0);
5798
5799         /* Read any GFX register to wake up GFX. */
5800         if (!enable)
5801                 RREG32(mmDB_RENDER_CONTROL);
5802 }
5803
5804 static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5805                                           bool enable)
5806 {
5807         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5808                 cz_enable_gfx_cg_power_gating(adev, true);
5809                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5810                         cz_enable_gfx_pipeline_power_gating(adev, true);
5811         } else {
5812                 cz_enable_gfx_cg_power_gating(adev, false);
5813                 cz_enable_gfx_pipeline_power_gating(adev, false);
5814         }
5815 }
5816
5817 static int gfx_v8_0_set_powergating_state(void *handle,
5818                                           enum amd_powergating_state state)
5819 {
5820         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5821         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
5822
5823         switch (adev->asic_type) {
5824         case CHIP_CARRIZO:
5825         case CHIP_STONEY:
5826
5827                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5828                         cz_enable_sck_slow_down_on_power_up(adev, true);
5829                         cz_enable_sck_slow_down_on_power_down(adev, true);
5830                 } else {
5831                         cz_enable_sck_slow_down_on_power_up(adev, false);
5832                         cz_enable_sck_slow_down_on_power_down(adev, false);
5833                 }
5834                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5835                         cz_enable_cp_power_gating(adev, true);
5836                 else
5837                         cz_enable_cp_power_gating(adev, false);
5838
5839                 cz_update_gfx_cg_power_gating(adev, enable);
5840
5841                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5842                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5843                 else
5844                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5845
5846                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5847                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5848                 else
5849                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5850                 break;
5851         case CHIP_POLARIS11:
5852         case CHIP_POLARIS12:
5853                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5854                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5855                 else
5856                         gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5857
5858                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5859                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5860                 else
5861                         gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5862
5863                 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5864                         polaris11_enable_gfx_quick_mg_power_gating(adev, true);
5865                 else
5866                         polaris11_enable_gfx_quick_mg_power_gating(adev, false);
5867                 break;
5868         default:
5869                 break;
5870         }
5871
5872         return 0;
5873 }
5874
5875 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
5876                                      uint32_t reg_addr, uint32_t cmd)
5877 {
5878         uint32_t data;
5879
5880         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
5881
5882         WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5883         WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5884
5885         data = RREG32(mmRLC_SERDES_WR_CTRL);
5886         if (adev->asic_type == CHIP_STONEY)
5887                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5888                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5889                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5890                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5891                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5892                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5893                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5894                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5895                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5896         else
5897                 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5898                           RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5899                           RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5900                           RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5901                           RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5902                           RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5903                           RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5904                           RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5905                           RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5906                           RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5907                           RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
5908         data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
5909                  (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5910                  (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5911                  (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
5912
5913         WREG32(mmRLC_SERDES_WR_CTRL, data);
5914 }
5915
5916 #define MSG_ENTER_RLC_SAFE_MODE     1
5917 #define MSG_EXIT_RLC_SAFE_MODE      0
5918 #define RLC_GPR_REG2__REQ_MASK 0x00000001
5919 #define RLC_GPR_REG2__REQ__SHIFT 0
5920 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5921 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5922
5923 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5924 {
5925         u32 data;
5926         unsigned i;
5927
5928         data = RREG32(mmRLC_CNTL);
5929         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5930                 return;
5931
5932         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5933                 data |= RLC_SAFE_MODE__CMD_MASK;
5934                 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5935                 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5936                 WREG32(mmRLC_SAFE_MODE, data);
5937
5938                 for (i = 0; i < adev->usec_timeout; i++) {
5939                         if ((RREG32(mmRLC_GPM_STAT) &
5940                              (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5941                               RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5942                             (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5943                              RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5944                                 break;
5945                         udelay(1);
5946                 }
5947
5948                 for (i = 0; i < adev->usec_timeout; i++) {
5949                         if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5950                                 break;
5951                         udelay(1);
5952                 }
5953                 adev->gfx.rlc.in_safe_mode = true;
5954         }
5955 }
5956
5957 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5958 {
5959         u32 data = 0;
5960         unsigned i;
5961
5962         data = RREG32(mmRLC_CNTL);
5963         if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5964                 return;
5965
5966         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5967                 if (adev->gfx.rlc.in_safe_mode) {
5968                         data |= RLC_SAFE_MODE__CMD_MASK;
5969                         data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5970                         WREG32(mmRLC_SAFE_MODE, data);
5971                         adev->gfx.rlc.in_safe_mode = false;
5972                 }
5973         }
5974
5975         for (i = 0; i < adev->usec_timeout; i++) {
5976                 if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
5977                         break;
5978                 udelay(1);
5979         }
5980 }
5981
5982 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5983         .enter_safe_mode = iceland_enter_rlc_safe_mode,
5984         .exit_safe_mode = iceland_exit_rlc_safe_mode
5985 };
5986
5987 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5988                                                       bool enable)
5989 {
5990         uint32_t temp, data;
5991
5992         adev->gfx.rlc.funcs->enter_safe_mode(adev);
5993
5994         /* It is disabled by HW by default */
5995         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5996                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5997                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS)
5998                                 /* 1 - RLC memory Light sleep */
5999                                 WREG32_FIELD(RLC_MEM_SLP_CNTL, RLC_MEM_LS_EN, 1);
6000
6001                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS)
6002                                 WREG32_FIELD(CP_MEM_SLP_CNTL, CP_MEM_LS_EN, 1);
6003                 }
6004
6005                 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
6006                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6007                 if (adev->flags & AMD_IS_APU)
6008                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6009                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6010                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
6011                 else
6012                         data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6013                                   RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6014                                   RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6015                                   RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6016
6017                 if (temp != data)
6018                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6019
6020                 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6021                 gfx_v8_0_wait_for_rlc_serdes(adev);
6022
6023                 /* 5 - clear mgcg override */
6024                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6025
6026                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
6027                         /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
6028                         temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6029                         data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
6030                         data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
6031                         data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
6032                         data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
6033                         if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
6034                             (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
6035                                 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
6036                         data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
6037                         data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
6038                         if (temp != data)
6039                                 WREG32(mmCGTS_SM_CTRL_REG, data);
6040                 }
6041                 udelay(50);
6042
6043                 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6044                 gfx_v8_0_wait_for_rlc_serdes(adev);
6045         } else {
6046                 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
6047                 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6048                 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
6049                                 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
6050                                 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
6051                                 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6052                 if (temp != data)
6053                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
6054
6055                 /* 2 - disable MGLS in RLC */
6056                 data = RREG32(mmRLC_MEM_SLP_CNTL);
6057                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
6058                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
6059                         WREG32(mmRLC_MEM_SLP_CNTL, data);
6060                 }
6061
6062                 /* 3 - disable MGLS in CP */
6063                 data = RREG32(mmCP_MEM_SLP_CNTL);
6064                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
6065                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
6066                         WREG32(mmCP_MEM_SLP_CNTL, data);
6067                 }
6068
6069                 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
6070                 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
6071                 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
6072                                 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
6073                 if (temp != data)
6074                         WREG32(mmCGTS_SM_CTRL_REG, data);
6075
6076                 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6077                 gfx_v8_0_wait_for_rlc_serdes(adev);
6078
6079                 /* 6 - set mgcg override */
6080                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6081
6082                 udelay(50);
6083
6084                 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6085                 gfx_v8_0_wait_for_rlc_serdes(adev);
6086         }
6087
6088         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6089 }
6090
6091 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
6092                                                       bool enable)
6093 {
6094         uint32_t temp, temp1, data, data1;
6095
6096         temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
6097
6098         adev->gfx.rlc.funcs->enter_safe_mode(adev);
6099
6100         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6101                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6102                 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
6103                 if (temp1 != data1)
6104                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6105
6106                 /* : wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6107                 gfx_v8_0_wait_for_rlc_serdes(adev);
6108
6109                 /* 2 - clear cgcg override */
6110                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6111
6112                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6113                 gfx_v8_0_wait_for_rlc_serdes(adev);
6114
6115                 /* 3 - write cmd to set CGLS */
6116                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6117
6118                 /* 4 - enable cgcg */
6119                 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
6120
6121                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6122                         /* enable cgls*/
6123                         data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6124
6125                         temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6126                         data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6127
6128                         if (temp1 != data1)
6129                                 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6130                 } else {
6131                         data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6132                 }
6133
6134                 if (temp != data)
6135                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6136
6137                 /* 5 enable cntx_empty_int_enable/cntx_busy_int_enable/
6138                  * Cmp_busy/GFX_Idle interrupts
6139                  */
6140                 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
6141         } else {
6142                 /* disable cntx_empty_int_enable & GFX Idle interrupt */
6143                 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
6144
6145                 /* TEST CGCG */
6146                 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
6147                 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
6148                                 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
6149                 if (temp1 != data1)
6150                         WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
6151
6152                 /* read gfx register to wake up cgcg */
6153                 RREG32(mmCB_CGTT_SCLK_CTRL);
6154                 RREG32(mmCB_CGTT_SCLK_CTRL);
6155                 RREG32(mmCB_CGTT_SCLK_CTRL);
6156                 RREG32(mmCB_CGTT_SCLK_CTRL);
6157
6158                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6159                 gfx_v8_0_wait_for_rlc_serdes(adev);
6160
6161                 /* write cmd to Set CGCG Overrride */
6162                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6163
6164                 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
6165                 gfx_v8_0_wait_for_rlc_serdes(adev);
6166
6167                 /* write cmd to Clear CGLS */
6168                 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6169
6170                 /* disable cgcg, cgls should be disabled too. */
6171                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
6172                           RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6173                 if (temp != data)
6174                         WREG32(mmRLC_CGCG_CGLS_CTRL, data);
6175         }
6176
6177         gfx_v8_0_wait_for_rlc_serdes(adev);
6178
6179         adev->gfx.rlc.funcs->exit_safe_mode(adev);
6180 }
6181 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
6182                                             bool enable)
6183 {
6184         if (enable) {
6185                 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
6186                  * ===  MGCG + MGLS + TS(CG/LS) ===
6187                  */
6188                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6189                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6190         } else {
6191                 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
6192                  * ===  CGCG + CGLS ===
6193                  */
6194                 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6195                 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6196         }
6197         return 0;
6198 }
6199
6200 static int gfx_v8_0_tonga_update_gfx_clock_gating(struct amdgpu_device *adev,
6201                                           enum amd_clockgating_state state)
6202 {
6203         uint32_t msg_id, pp_state = 0;
6204         uint32_t pp_support_state = 0;
6205         void *pp_handle = adev->powerplay.pp_handle;
6206
6207         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6208                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6209                         pp_support_state = PP_STATE_SUPPORT_LS;
6210                         pp_state = PP_STATE_LS;
6211                 }
6212                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6213                         pp_support_state |= PP_STATE_SUPPORT_CG;
6214                         pp_state |= PP_STATE_CG;
6215                 }
6216                 if (state == AMD_CG_STATE_UNGATE)
6217                         pp_state = 0;
6218
6219                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6220                                 PP_BLOCK_GFX_CG,
6221                                 pp_support_state,
6222                                 pp_state);
6223                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6224         }
6225
6226         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6227                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6228                         pp_support_state = PP_STATE_SUPPORT_LS;
6229                         pp_state = PP_STATE_LS;
6230                 }
6231
6232                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6233                         pp_support_state |= PP_STATE_SUPPORT_CG;
6234                         pp_state |= PP_STATE_CG;
6235                 }
6236
6237                 if (state == AMD_CG_STATE_UNGATE)
6238                         pp_state = 0;
6239
6240                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6241                                 PP_BLOCK_GFX_MG,
6242                                 pp_support_state,
6243                                 pp_state);
6244                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6245         }
6246
6247         return 0;
6248 }
6249
6250 static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev,
6251                                           enum amd_clockgating_state state)
6252 {
6253
6254         uint32_t msg_id, pp_state = 0;
6255         uint32_t pp_support_state = 0;
6256         void *pp_handle = adev->powerplay.pp_handle;
6257
6258         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_CGLS)) {
6259                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
6260                         pp_support_state = PP_STATE_SUPPORT_LS;
6261                         pp_state = PP_STATE_LS;
6262                 }
6263                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG) {
6264                         pp_support_state |= PP_STATE_SUPPORT_CG;
6265                         pp_state |= PP_STATE_CG;
6266                 }
6267                 if (state == AMD_CG_STATE_UNGATE)
6268                         pp_state = 0;
6269
6270                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6271                                 PP_BLOCK_GFX_CG,
6272                                 pp_support_state,
6273                                 pp_state);
6274                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6275         }
6276
6277         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_GFX_3D_CGLS)) {
6278                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS) {
6279                         pp_support_state = PP_STATE_SUPPORT_LS;
6280                         pp_state = PP_STATE_LS;
6281                 }
6282                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG) {
6283                         pp_support_state |= PP_STATE_SUPPORT_CG;
6284                         pp_state |= PP_STATE_CG;
6285                 }
6286                 if (state == AMD_CG_STATE_UNGATE)
6287                         pp_state = 0;
6288
6289                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6290                                 PP_BLOCK_GFX_3D,
6291                                 pp_support_state,
6292                                 pp_state);
6293                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6294         }
6295
6296         if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS)) {
6297                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
6298                         pp_support_state = PP_STATE_SUPPORT_LS;
6299                         pp_state = PP_STATE_LS;
6300                 }
6301
6302                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG) {
6303                         pp_support_state |= PP_STATE_SUPPORT_CG;
6304                         pp_state |= PP_STATE_CG;
6305                 }
6306
6307                 if (state == AMD_CG_STATE_UNGATE)
6308                         pp_state = 0;
6309
6310                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6311                                 PP_BLOCK_GFX_MG,
6312                                 pp_support_state,
6313                                 pp_state);
6314                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6315         }
6316
6317         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
6318                 pp_support_state = PP_STATE_SUPPORT_LS;
6319
6320                 if (state == AMD_CG_STATE_UNGATE)
6321                         pp_state = 0;
6322                 else
6323                         pp_state = PP_STATE_LS;
6324
6325                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6326                                 PP_BLOCK_GFX_RLC,
6327                                 pp_support_state,
6328                                 pp_state);
6329                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6330         }
6331
6332         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
6333                 pp_support_state = PP_STATE_SUPPORT_LS;
6334
6335                 if (state == AMD_CG_STATE_UNGATE)
6336                         pp_state = 0;
6337                 else
6338                         pp_state = PP_STATE_LS;
6339                 msg_id = PP_CG_MSG_ID(PP_GROUP_GFX,
6340                         PP_BLOCK_GFX_CP,
6341                         pp_support_state,
6342                         pp_state);
6343                 amd_set_clockgating_by_smu(pp_handle, msg_id);
6344         }
6345
6346         return 0;
6347 }
6348
6349 static int gfx_v8_0_set_clockgating_state(void *handle,
6350                                           enum amd_clockgating_state state)
6351 {
6352         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
6353
6354         switch (adev->asic_type) {
6355         case CHIP_FIJI:
6356         case CHIP_CARRIZO:
6357         case CHIP_STONEY:
6358                 gfx_v8_0_update_gfx_clock_gating(adev,
6359                                                  state == AMD_CG_STATE_GATE ? true : false);
6360                 break;
6361         case CHIP_TONGA:
6362                 gfx_v8_0_tonga_update_gfx_clock_gating(adev, state);
6363                 break;
6364         case CHIP_POLARIS10:
6365         case CHIP_POLARIS11:
6366                 gfx_v8_0_polaris_update_gfx_clock_gating(adev, state);
6367                 break;
6368         default:
6369                 break;
6370         }
6371         return 0;
6372 }
6373
6374 static u32 gfx_v8_0_ring_get_rptr(struct amdgpu_ring *ring)
6375 {
6376         return ring->adev->wb.wb[ring->rptr_offs];
6377 }
6378
6379 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
6380 {
6381         struct amdgpu_device *adev = ring->adev;
6382
6383         if (ring->use_doorbell)
6384                 /* XXX check if swapping is necessary on BE */
6385                 return ring->adev->wb.wb[ring->wptr_offs];
6386         else
6387                 return RREG32(mmCP_RB0_WPTR);
6388 }
6389
6390 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
6391 {
6392         struct amdgpu_device *adev = ring->adev;
6393
6394         if (ring->use_doorbell) {
6395                 /* XXX check if swapping is necessary on BE */
6396                 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6397                 WDOORBELL32(ring->doorbell_index, ring->wptr);
6398         } else {
6399                 WREG32(mmCP_RB0_WPTR, ring->wptr);
6400                 (void)RREG32(mmCP_RB0_WPTR);
6401         }
6402 }
6403
6404 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
6405 {
6406         u32 ref_and_mask, reg_mem_engine;
6407
6408         if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6409             (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
6410                 switch (ring->me) {
6411                 case 1:
6412                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
6413                         break;
6414                 case 2:
6415                         ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
6416                         break;
6417                 default:
6418                         return;
6419                 }
6420                 reg_mem_engine = 0;
6421         } else {
6422                 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
6423                 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
6424         }
6425
6426         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6427         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
6428                                  WAIT_REG_MEM_FUNCTION(3) |  /* == */
6429                                  reg_mem_engine));
6430         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
6431         amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
6432         amdgpu_ring_write(ring, ref_and_mask);
6433         amdgpu_ring_write(ring, ref_and_mask);
6434         amdgpu_ring_write(ring, 0x20); /* poll interval */
6435 }
6436
6437 static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
6438 {
6439         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6440         amdgpu_ring_write(ring, EVENT_TYPE(VS_PARTIAL_FLUSH) |
6441                 EVENT_INDEX(4));
6442
6443         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE, 0));
6444         amdgpu_ring_write(ring, EVENT_TYPE(VGT_FLUSH) |
6445                 EVENT_INDEX(0));
6446 }
6447
6448
6449 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
6450 {
6451         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6452         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6453                                  WRITE_DATA_DST_SEL(0) |
6454                                  WR_CONFIRM));
6455         amdgpu_ring_write(ring, mmHDP_DEBUG0);
6456         amdgpu_ring_write(ring, 0);
6457         amdgpu_ring_write(ring, 1);
6458
6459 }
6460
6461 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
6462                                       struct amdgpu_ib *ib,
6463                                       unsigned vm_id, bool ctx_switch)
6464 {
6465         u32 header, control = 0;
6466
6467         if (ib->flags & AMDGPU_IB_FLAG_CE)
6468                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
6469         else
6470                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
6471
6472         control |= ib->length_dw | (vm_id << 24);
6473
6474         amdgpu_ring_write(ring, header);
6475         amdgpu_ring_write(ring,
6476 #ifdef __BIG_ENDIAN
6477                           (2 << 0) |
6478 #endif
6479                           (ib->gpu_addr & 0xFFFFFFFC));
6480         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6481         amdgpu_ring_write(ring, control);
6482 }
6483
6484 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
6485                                           struct amdgpu_ib *ib,
6486                                           unsigned vm_id, bool ctx_switch)
6487 {
6488         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
6489
6490         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
6491         amdgpu_ring_write(ring,
6492 #ifdef __BIG_ENDIAN
6493                                 (2 << 0) |
6494 #endif
6495                                 (ib->gpu_addr & 0xFFFFFFFC));
6496         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
6497         amdgpu_ring_write(ring, control);
6498 }
6499
6500 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
6501                                          u64 seq, unsigned flags)
6502 {
6503         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6504         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6505
6506         /* EVENT_WRITE_EOP - flush caches, send int */
6507         amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
6508         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6509                                  EOP_TC_ACTION_EN |
6510                                  EOP_TC_WB_ACTION_EN |
6511                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6512                                  EVENT_INDEX(5)));
6513         amdgpu_ring_write(ring, addr & 0xfffffffc);
6514         amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
6515                           DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6516         amdgpu_ring_write(ring, lower_32_bits(seq));
6517         amdgpu_ring_write(ring, upper_32_bits(seq));
6518
6519 }
6520
6521 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
6522 {
6523         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6524         uint32_t seq = ring->fence_drv.sync_seq;
6525         uint64_t addr = ring->fence_drv.gpu_addr;
6526
6527         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6528         amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
6529                                  WAIT_REG_MEM_FUNCTION(3) | /* equal */
6530                                  WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
6531         amdgpu_ring_write(ring, addr & 0xfffffffc);
6532         amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6533         amdgpu_ring_write(ring, seq);
6534         amdgpu_ring_write(ring, 0xffffffff);
6535         amdgpu_ring_write(ring, 4); /* poll interval */
6536 }
6537
6538 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6539                                         unsigned vm_id, uint64_t pd_addr)
6540 {
6541         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
6542
6543         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6544         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6545                                  WRITE_DATA_DST_SEL(0)) |
6546                                  WR_CONFIRM);
6547         if (vm_id < 8) {
6548                 amdgpu_ring_write(ring,
6549                                   (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6550         } else {
6551                 amdgpu_ring_write(ring,
6552                                   (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6553         }
6554         amdgpu_ring_write(ring, 0);
6555         amdgpu_ring_write(ring, pd_addr >> 12);
6556
6557         /* bits 0-15 are the VM contexts0-15 */
6558         /* invalidate the cache */
6559         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6560         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6561                                  WRITE_DATA_DST_SEL(0)));
6562         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6563         amdgpu_ring_write(ring, 0);
6564         amdgpu_ring_write(ring, 1 << vm_id);
6565
6566         /* wait for the invalidate to complete */
6567         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6568         amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6569                                  WAIT_REG_MEM_FUNCTION(0) |  /* always */
6570                                  WAIT_REG_MEM_ENGINE(0))); /* me */
6571         amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6572         amdgpu_ring_write(ring, 0);
6573         amdgpu_ring_write(ring, 0); /* ref */
6574         amdgpu_ring_write(ring, 0); /* mask */
6575         amdgpu_ring_write(ring, 0x20); /* poll interval */
6576
6577         /* compute doesn't have PFP */
6578         if (usepfp) {
6579                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6580                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6581                 amdgpu_ring_write(ring, 0x0);
6582                 /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */
6583                 amdgpu_ring_insert_nop(ring, 128);
6584         }
6585 }
6586
6587 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6588 {
6589         return ring->adev->wb.wb[ring->wptr_offs];
6590 }
6591
6592 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6593 {
6594         struct amdgpu_device *adev = ring->adev;
6595
6596         /* XXX check if swapping is necessary on BE */
6597         adev->wb.wb[ring->wptr_offs] = ring->wptr;
6598         WDOORBELL32(ring->doorbell_index, ring->wptr);
6599 }
6600
6601 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6602                                              u64 addr, u64 seq,
6603                                              unsigned flags)
6604 {
6605         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6606         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6607
6608         /* RELEASE_MEM - flush caches, send int */
6609         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6610         amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6611                                  EOP_TC_ACTION_EN |
6612                                  EOP_TC_WB_ACTION_EN |
6613                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6614                                  EVENT_INDEX(5)));
6615         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
6616         amdgpu_ring_write(ring, addr & 0xfffffffc);
6617         amdgpu_ring_write(ring, upper_32_bits(addr));
6618         amdgpu_ring_write(ring, lower_32_bits(seq));
6619         amdgpu_ring_write(ring, upper_32_bits(seq));
6620 }
6621
6622 static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6623                                          u64 seq, unsigned int flags)
6624 {
6625         /* we only allocate 32bit for each seq wb address */
6626         if (flags & AMDGPU_FENCE_FLAG_64BIT)
6627                 BUG();
6628
6629         /* write fence seq to the "addr" */
6630         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6631         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6632                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6633         amdgpu_ring_write(ring, lower_32_bits(addr));
6634         amdgpu_ring_write(ring, upper_32_bits(addr));
6635         amdgpu_ring_write(ring, lower_32_bits(seq));
6636
6637         if (flags & AMDGPU_FENCE_FLAG_INT) {
6638                 /* set register to trigger INT */
6639                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6640                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6641                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6642                 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6643                 amdgpu_ring_write(ring, 0);
6644                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6645         }
6646 }
6647
6648 static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6649 {
6650         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6651         amdgpu_ring_write(ring, 0);
6652 }
6653
6654 static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
6655 {
6656         uint32_t dw2 = 0;
6657
6658         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
6659         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
6660                 gfx_v8_0_ring_emit_vgt_flush(ring);
6661                 /* set load_global_config & load_global_uconfig */
6662                 dw2 |= 0x8001;
6663                 /* set load_cs_sh_regs */
6664                 dw2 |= 0x01000000;
6665                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
6666                 dw2 |= 0x10002;
6667
6668                 /* set load_ce_ram if preamble presented */
6669                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
6670                         dw2 |= 0x10000000;
6671         } else {
6672                 /* still load_ce_ram if this is the first time preamble presented
6673                  * although there is no context switch happens.
6674                  */
6675                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
6676                         dw2 |= 0x10000000;
6677         }
6678
6679         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6680         amdgpu_ring_write(ring, dw2);
6681         amdgpu_ring_write(ring, 0);
6682 }
6683
6684 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6685                                                  enum amdgpu_interrupt_state state)
6686 {
6687         WREG32_FIELD(CP_INT_CNTL_RING0, TIME_STAMP_INT_ENABLE,
6688                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6689 }
6690
6691 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6692                                                      int me, int pipe,
6693                                                      enum amdgpu_interrupt_state state)
6694 {
6695         /*
6696          * amdgpu controls only pipe 0 of MEC1. That's why this function only
6697          * handles the setting of interrupts for this specific pipe. All other
6698          * pipes' interrupts are set by amdkfd.
6699          */
6700
6701         if (me == 1) {
6702                 switch (pipe) {
6703                 case 0:
6704                         break;
6705                 default:
6706                         DRM_DEBUG("invalid pipe %d\n", pipe);
6707                         return;
6708                 }
6709         } else {
6710                 DRM_DEBUG("invalid me %d\n", me);
6711                 return;
6712         }
6713
6714         WREG32_FIELD(CP_ME1_PIPE0_INT_CNTL, TIME_STAMP_INT_ENABLE,
6715                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6716 }
6717
6718 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6719                                              struct amdgpu_irq_src *source,
6720                                              unsigned type,
6721                                              enum amdgpu_interrupt_state state)
6722 {
6723         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE,
6724                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6725
6726         return 0;
6727 }
6728
6729 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6730                                               struct amdgpu_irq_src *source,
6731                                               unsigned type,
6732                                               enum amdgpu_interrupt_state state)
6733 {
6734         WREG32_FIELD(CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE,
6735                      state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
6736
6737         return 0;
6738 }
6739
6740 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6741                                             struct amdgpu_irq_src *src,
6742                                             unsigned type,
6743                                             enum amdgpu_interrupt_state state)
6744 {
6745         switch (type) {
6746         case AMDGPU_CP_IRQ_GFX_EOP:
6747                 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6748                 break;
6749         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6750                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6751                 break;
6752         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6753                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6754                 break;
6755         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6756                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6757                 break;
6758         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6759                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6760                 break;
6761         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6762                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6763                 break;
6764         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6765                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6766                 break;
6767         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6768                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6769                 break;
6770         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6771                 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6772                 break;
6773         default:
6774                 break;
6775         }
6776         return 0;
6777 }
6778
6779 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6780                             struct amdgpu_irq_src *source,
6781                             struct amdgpu_iv_entry *entry)
6782 {
6783         int i;
6784         u8 me_id, pipe_id, queue_id;
6785         struct amdgpu_ring *ring;
6786
6787         DRM_DEBUG("IH: CP EOP\n");
6788         me_id = (entry->ring_id & 0x0c) >> 2;
6789         pipe_id = (entry->ring_id & 0x03) >> 0;
6790         queue_id = (entry->ring_id & 0x70) >> 4;
6791
6792         switch (me_id) {
6793         case 0:
6794                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6795                 break;
6796         case 1:
6797         case 2:
6798                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6799                         ring = &adev->gfx.compute_ring[i];
6800                         /* Per-queue interrupt is supported for MEC starting from VI.
6801                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
6802                           */
6803                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6804                                 amdgpu_fence_process(ring);
6805                 }
6806                 break;
6807         }
6808         return 0;
6809 }
6810
6811 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6812                                  struct amdgpu_irq_src *source,
6813                                  struct amdgpu_iv_entry *entry)
6814 {
6815         DRM_ERROR("Illegal register access in command stream\n");
6816         schedule_work(&adev->reset_work);
6817         return 0;
6818 }
6819
6820 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6821                                   struct amdgpu_irq_src *source,
6822                                   struct amdgpu_iv_entry *entry)
6823 {
6824         DRM_ERROR("Illegal instruction in command stream\n");
6825         schedule_work(&adev->reset_work);
6826         return 0;
6827 }
6828
6829 static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6830                                             struct amdgpu_irq_src *src,
6831                                             unsigned int type,
6832                                             enum amdgpu_interrupt_state state)
6833 {
6834         uint32_t tmp, target;
6835         struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6836
6837         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6838
6839         if (ring->me == 1)
6840                 target = mmCP_ME1_PIPE0_INT_CNTL;
6841         else
6842                 target = mmCP_ME2_PIPE0_INT_CNTL;
6843         target += ring->pipe;
6844
6845         switch (type) {
6846         case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6847                 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6848                         tmp = RREG32(mmCPC_INT_CNTL);
6849                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6850                                                  GENERIC2_INT_ENABLE, 0);
6851                         WREG32(mmCPC_INT_CNTL, tmp);
6852
6853                         tmp = RREG32(target);
6854                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6855                                                  GENERIC2_INT_ENABLE, 0);
6856                         WREG32(target, tmp);
6857                 } else {
6858                         tmp = RREG32(mmCPC_INT_CNTL);
6859                         tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6860                                                  GENERIC2_INT_ENABLE, 1);
6861                         WREG32(mmCPC_INT_CNTL, tmp);
6862
6863                         tmp = RREG32(target);
6864                         tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6865                                                  GENERIC2_INT_ENABLE, 1);
6866                         WREG32(target, tmp);
6867                 }
6868                 break;
6869         default:
6870                 BUG(); /* kiq only support GENERIC2_INT now */
6871                 break;
6872         }
6873         return 0;
6874 }
6875
6876 static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6877                             struct amdgpu_irq_src *source,
6878                             struct amdgpu_iv_entry *entry)
6879 {
6880         u8 me_id, pipe_id, queue_id;
6881         struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6882
6883         BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6884
6885         me_id = (entry->ring_id & 0x0c) >> 2;
6886         pipe_id = (entry->ring_id & 0x03) >> 0;
6887         queue_id = (entry->ring_id & 0x70) >> 4;
6888         DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6889                    me_id, pipe_id, queue_id);
6890
6891         amdgpu_fence_process(ring);
6892         return 0;
6893 }
6894
6895 static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6896         .name = "gfx_v8_0",
6897         .early_init = gfx_v8_0_early_init,
6898         .late_init = gfx_v8_0_late_init,
6899         .sw_init = gfx_v8_0_sw_init,
6900         .sw_fini = gfx_v8_0_sw_fini,
6901         .hw_init = gfx_v8_0_hw_init,
6902         .hw_fini = gfx_v8_0_hw_fini,
6903         .suspend = gfx_v8_0_suspend,
6904         .resume = gfx_v8_0_resume,
6905         .is_idle = gfx_v8_0_is_idle,
6906         .wait_for_idle = gfx_v8_0_wait_for_idle,
6907         .check_soft_reset = gfx_v8_0_check_soft_reset,
6908         .pre_soft_reset = gfx_v8_0_pre_soft_reset,
6909         .soft_reset = gfx_v8_0_soft_reset,
6910         .post_soft_reset = gfx_v8_0_post_soft_reset,
6911         .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6912         .set_powergating_state = gfx_v8_0_set_powergating_state,
6913 };
6914
6915 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6916         .type = AMDGPU_RING_TYPE_GFX,
6917         .align_mask = 0xff,
6918         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6919         .get_rptr = gfx_v8_0_ring_get_rptr,
6920         .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6921         .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6922         .emit_frame_size =
6923                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6924                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6925                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6926                 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */
6927                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6928                 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */
6929                 2 + /* gfx_v8_ring_emit_sb */
6930                 3 + 4, /* gfx_v8_ring_emit_cntxcntl including vgt flush */
6931         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */
6932         .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
6933         .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
6934         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6935         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6936         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6937         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6938         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6939         .test_ring = gfx_v8_0_ring_test_ring,
6940         .test_ib = gfx_v8_0_ring_test_ib,
6941         .insert_nop = amdgpu_ring_insert_nop,
6942         .pad_ib = amdgpu_ring_generic_pad_ib,
6943         .emit_switch_buffer = gfx_v8_ring_emit_sb,
6944         .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
6945 };
6946
6947 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6948         .type = AMDGPU_RING_TYPE_COMPUTE,
6949         .align_mask = 0xff,
6950         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6951         .get_rptr = gfx_v8_0_ring_get_rptr,
6952         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6953         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6954         .emit_frame_size =
6955                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6956                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6957                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6958                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6959                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6960                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
6961         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6962         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6963         .emit_fence = gfx_v8_0_ring_emit_fence_compute,
6964         .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
6965         .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6966         .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
6967         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6968         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6969         .test_ring = gfx_v8_0_ring_test_ring,
6970         .test_ib = gfx_v8_0_ring_test_ib,
6971         .insert_nop = amdgpu_ring_insert_nop,
6972         .pad_ib = amdgpu_ring_generic_pad_ib,
6973 };
6974
6975 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6976         .type = AMDGPU_RING_TYPE_KIQ,
6977         .align_mask = 0xff,
6978         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6979         .get_rptr = gfx_v8_0_ring_get_rptr,
6980         .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6981         .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6982         .emit_frame_size =
6983                 20 + /* gfx_v8_0_ring_emit_gds_switch */
6984                 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6985                 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6986                 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6987                 17 + /* gfx_v8_0_ring_emit_vm_flush */
6988                 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6989         .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6990         .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6991         .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6992         .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6993         .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6994         .test_ring = gfx_v8_0_ring_test_ring,
6995         .test_ib = gfx_v8_0_ring_test_ib,
6996         .insert_nop = amdgpu_ring_insert_nop,
6997         .pad_ib = amdgpu_ring_generic_pad_ib,
6998 };
6999
7000 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
7001 {
7002         int i;
7003
7004         adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7005
7006         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7007                 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
7008
7009         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7010                 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
7011 }
7012
7013 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
7014         .set = gfx_v8_0_set_eop_interrupt_state,
7015         .process = gfx_v8_0_eop_irq,
7016 };
7017
7018 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
7019         .set = gfx_v8_0_set_priv_reg_fault_state,
7020         .process = gfx_v8_0_priv_reg_irq,
7021 };
7022
7023 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
7024         .set = gfx_v8_0_set_priv_inst_fault_state,
7025         .process = gfx_v8_0_priv_inst_irq,
7026 };
7027
7028 static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7029         .set = gfx_v8_0_kiq_set_interrupt_state,
7030         .process = gfx_v8_0_kiq_irq,
7031 };
7032
7033 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
7034 {
7035         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7036         adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
7037
7038         adev->gfx.priv_reg_irq.num_types = 1;
7039         adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
7040
7041         adev->gfx.priv_inst_irq.num_types = 1;
7042         adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7043
7044         adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7045         adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
7046 }
7047
7048 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
7049 {
7050         adev->gfx.rlc.funcs = &iceland_rlc_funcs;
7051 }
7052
7053 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
7054 {
7055         /* init asci gds info */
7056         adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
7057         adev->gds.gws.total_size = 64;
7058         adev->gds.oa.total_size = 16;
7059
7060         if (adev->gds.mem.total_size == 64 * 1024) {
7061                 adev->gds.mem.gfx_partition_size = 4096;
7062                 adev->gds.mem.cs_partition_size = 4096;
7063
7064                 adev->gds.gws.gfx_partition_size = 4;
7065                 adev->gds.gws.cs_partition_size = 4;
7066
7067                 adev->gds.oa.gfx_partition_size = 4;
7068                 adev->gds.oa.cs_partition_size = 1;
7069         } else {
7070                 adev->gds.mem.gfx_partition_size = 1024;
7071                 adev->gds.mem.cs_partition_size = 1024;
7072
7073                 adev->gds.gws.gfx_partition_size = 16;
7074                 adev->gds.gws.cs_partition_size = 16;
7075
7076                 adev->gds.oa.gfx_partition_size = 4;
7077                 adev->gds.oa.cs_partition_size = 4;
7078         }
7079 }
7080
7081 static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7082                                                  u32 bitmap)
7083 {
7084         u32 data;
7085
7086         if (!bitmap)
7087                 return;
7088
7089         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7090         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7091
7092         WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
7093 }
7094
7095 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7096 {
7097         u32 data, mask;
7098
7099         data =  RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
7100                 RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
7101
7102         mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
7103
7104         return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
7105 }
7106
7107 static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
7108 {
7109         int i, j, k, counter, active_cu_number = 0;
7110         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7111         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
7112         unsigned disable_masks[4 * 2];
7113
7114         memset(cu_info, 0, sizeof(*cu_info));
7115
7116         amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
7117
7118         mutex_lock(&adev->grbm_idx_mutex);
7119         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7120                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7121                         mask = 1;
7122                         ao_bitmap = 0;
7123                         counter = 0;
7124                         gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
7125                         if (i < 4 && j < 2)
7126                                 gfx_v8_0_set_user_cu_inactive_bitmap(
7127                                         adev, disable_masks[i * 2 + j]);
7128                         bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
7129                         cu_info->bitmap[i][j] = bitmap;
7130
7131                         for (k = 0; k < 16; k ++) {
7132                                 if (bitmap & mask) {
7133                                         if (counter < 2)
7134                                                 ao_bitmap |= mask;
7135                                         counter ++;
7136                                 }
7137                                 mask <<= 1;
7138                         }
7139                         active_cu_number += counter;
7140                         ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7141                 }
7142         }
7143         gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7144         mutex_unlock(&adev->grbm_idx_mutex);
7145
7146         cu_info->number = active_cu_number;
7147         cu_info->ao_cu_mask = ao_cu_mask;
7148 }
7149
7150 const struct amdgpu_ip_block_version gfx_v8_0_ip_block =
7151 {
7152         .type = AMD_IP_BLOCK_TYPE_GFX,
7153         .major = 8,
7154         .minor = 0,
7155         .rev = 0,
7156         .funcs = &gfx_v8_0_ip_funcs,
7157 };
7158
7159 const struct amdgpu_ip_block_version gfx_v8_1_ip_block =
7160 {
7161         .type = AMD_IP_BLOCK_TYPE_GFX,
7162         .major = 8,
7163         .minor = 1,
7164         .rev = 0,
7165         .funcs = &gfx_v8_0_ip_funcs,
7166 };