OSDN Git Service

bb40ab83fc22da367100fcdf0303f9330b40811e
[uclinux-h8/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41
42 #include "soc15_common.h"
43 #include "clearstate_gfx9.h"
44 #include "v9_structs.h"
45
46 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
47
48 #include "amdgpu_ras.h"
49
50 #include "gfx_v9_4.h"
51 #include "gfx_v9_0.h"
52 #include "gfx_v9_4_2.h"
53
54 #include "asic_reg/pwr/pwr_10_0_offset.h"
55 #include "asic_reg/pwr/pwr_10_0_sh_mask.h"
56 #include "asic_reg/gc/gc_9_0_default.h"
57
58 #define GFX9_NUM_GFX_RINGS     1
59 #define GFX9_MEC_HPD_SIZE 4096
60 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
61 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
114 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
118
119 MODULE_FIRMWARE("amdgpu/green_sardine_ce.bin");
120 MODULE_FIRMWARE("amdgpu/green_sardine_pfp.bin");
121 MODULE_FIRMWARE("amdgpu/green_sardine_me.bin");
122 MODULE_FIRMWARE("amdgpu/green_sardine_mec.bin");
123 MODULE_FIRMWARE("amdgpu/green_sardine_mec2.bin");
124 MODULE_FIRMWARE("amdgpu/green_sardine_rlc.bin");
125
126 MODULE_FIRMWARE("amdgpu/aldebaran_mec.bin");
127 MODULE_FIRMWARE("amdgpu/aldebaran_mec2.bin");
128 MODULE_FIRMWARE("amdgpu/aldebaran_rlc.bin");
129
130 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
131 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
132 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
133 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
134 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
135 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
136 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
137 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
138 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
139 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
140 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
141 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
142
143 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir                0x0025
144 #define mmGOLDEN_TSC_COUNT_UPPER_Renoir_BASE_IDX       1
145 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir                0x0026
146 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX       1
147
148 enum ta_ras_gfx_subblock {
149         /*CPC*/
150         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
151         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
152         TA_RAS_BLOCK__GFX_CPC_UCODE,
153         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
154         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
155         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
156         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
157         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
158         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
159         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
160         /* CPF*/
161         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
162         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
163         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
164         TA_RAS_BLOCK__GFX_CPF_TAG,
165         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
166         /* CPG*/
167         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
168         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
169         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
170         TA_RAS_BLOCK__GFX_CPG_TAG,
171         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
172         /* GDS*/
173         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
174         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
175         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
176         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
177         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
178         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
179         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
180         /* SPI*/
181         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
182         /* SQ*/
183         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
184         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
185         TA_RAS_BLOCK__GFX_SQ_LDS_D,
186         TA_RAS_BLOCK__GFX_SQ_LDS_I,
187         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
188         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
189         /* SQC (3 ranges)*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
191         /* SQC range 0*/
192         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
194                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
195         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
196         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
197         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
198         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
200         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
201         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
203         /* SQC range 1*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
217         /* SQC range 2*/
218         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
219         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
220                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
221         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
222         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
223         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
224         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
225         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
226         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
227         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
228         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
229         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
230                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
231         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
232         /* TA*/
233         TA_RAS_BLOCK__GFX_TA_INDEX_START,
234         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
235         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
236         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
237         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
238         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
239         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
240         /* TCA*/
241         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
242         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
243         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
244         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
245         /* TCC (5 sub-ranges)*/
246         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
247         /* TCC range 0*/
248         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
249         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
250         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
251         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
252         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
253         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
254         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
255         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
256         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
257         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
258         /* TCC range 1*/
259         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
260         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
261         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
262         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
263                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
264         /* TCC range 2*/
265         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
266         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
267         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
268         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
269         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
270         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
271         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
272         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
273         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
274         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
275                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
276         /* TCC range 3*/
277         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
278         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
279         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
280         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
281                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
282         /* TCC range 4*/
283         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
284         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
285                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
286         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
287         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
288                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
289         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
290         /* TCI*/
291         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
292         /* TCP*/
293         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
294         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
295         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
296         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
297         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
298         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
299         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
300         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
301         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
302         /* TD*/
303         TA_RAS_BLOCK__GFX_TD_INDEX_START,
304         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
305         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
306         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
307         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
308         /* EA (3 sub-ranges)*/
309         TA_RAS_BLOCK__GFX_EA_INDEX_START,
310         /* EA range 0*/
311         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
312         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
313         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
314         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
315         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
316         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
317         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
318         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
319         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
320         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
321         /* EA range 1*/
322         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
323         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
324         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
325         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
326         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
327         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
328         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
329         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
330         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
331         /* EA range 2*/
332         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
333         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
334         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
335         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
336         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
337         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
338         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
339         /* UTC VM L2 bank*/
340         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
341         /* UTC VM walker*/
342         TA_RAS_BLOCK__UTC_VML2_WALKER,
343         /* UTC ATC L2 2MB cache*/
344         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
345         /* UTC ATC L2 4KB cache*/
346         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
347         TA_RAS_BLOCK__GFX_MAX
348 };
349
350 struct ras_gfx_subblock {
351         unsigned char *name;
352         int ta_subblock;
353         int hw_supported_error_type;
354         int sw_supported_error_type;
355 };
356
357 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
358         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
359                 #subblock,                                                     \
360                 TA_RAS_BLOCK__##subblock,                                      \
361                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
362                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
363         }
364
365 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
366         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
367         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
368         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
369         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
371         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
376         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
377         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
380         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
382         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
383                              0),
384         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
385                              0),
386         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
388         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
390         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
392         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
394                              0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
398                              0, 0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
402                              0, 0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
406                              1),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
408                              0, 0, 0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
412                              0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
420                              0, 0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
424                              0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
426                              0, 0, 0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
428                              0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
430                              0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
432                              0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
434                              0),
435         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
436                              0),
437         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
438                              0, 0),
439         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
440                              0),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
442         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
444         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
450                              1),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
452                              1),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
454                              1),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
456                              0),
457         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
458                              0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
460         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
462         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
464         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
471                              0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
474                              0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
476                              0, 0),
477         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
478                              0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
481         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
488         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
499         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
500         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
501         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
502         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
503         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
504         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
505         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
506         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
507         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
508         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
509         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
510         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
511         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
512         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
513 };
514
515 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
516 {
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
523         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
524         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
525         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
526         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x00ffff87),
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x00ffff8f),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
537 };
538
539 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
540 {
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
545         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
546         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
547         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
548         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
559 };
560
561 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
562 {
563         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
574 };
575
576 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
577 {
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
588         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
589         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
590         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
591         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
599         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
600         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
601         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
602 };
603
604 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
605 {
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
613 };
614
615 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
616 {
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
622         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
623         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
624         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
625         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
636 };
637
638 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
639 {
640         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
641         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
645         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
646         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
647         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
648         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
652 };
653
654 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
655 {
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
659 };
660
661 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
662 {
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
665         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
666         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
667         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
668         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
679 };
680
681 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
682 {
683         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
684         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
685         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000800, 0x00000800),
694         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000800, 0x00000800),
695         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00008000, 0x00008000)
696 };
697
698 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
699 {
700         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
701         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
702         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
703         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
704         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
705         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
706         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
707         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
708         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_CONFIG, 0xffffffff, 0x011A0000),
709         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQ_FIFO_SIZES, 0xffffffff, 0x00000f00),
710         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_UTCL1_CNTL1, 0x30000000, 0x30000000)
711 };
712
713 static const struct soc15_reg_rlcg rlcg_access_gc_9_0[] = {
714         {SOC15_REG_ENTRY(GC, 0, mmGRBM_GFX_INDEX)},
715         {SOC15_REG_ENTRY(GC, 0, mmSQ_IND_INDEX)},
716 };
717
718 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
719 {
720         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
721         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
722         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
723         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
724         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
725         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
726         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
727         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
728 };
729
730 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
731 {
732         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
733         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
734         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
735         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
736         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
737         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
738         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
739         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
740 };
741
742 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
743 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
744 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
745 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
746
747 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
748 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
749 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
750 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
751 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
752                                 struct amdgpu_cu_info *cu_info);
753 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
754 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
755 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
756 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
757                                           void *ras_error_status);
758 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
759                                      void *inject_if);
760 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev);
761
762 static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring,
763                                 uint64_t queue_mask)
764 {
765         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
766         amdgpu_ring_write(kiq_ring,
767                 PACKET3_SET_RESOURCES_VMID_MASK(0) |
768                 /* vmid_mask:0* queue_type:0 (KIQ) */
769                 PACKET3_SET_RESOURCES_QUEUE_TYPE(0));
770         amdgpu_ring_write(kiq_ring,
771                         lower_32_bits(queue_mask));     /* queue mask lo */
772         amdgpu_ring_write(kiq_ring,
773                         upper_32_bits(queue_mask));     /* queue mask hi */
774         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
775         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
776         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
777         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
778 }
779
780 static void gfx_v9_0_kiq_map_queues(struct amdgpu_ring *kiq_ring,
781                                  struct amdgpu_ring *ring)
782 {
783         struct amdgpu_device *adev = kiq_ring->adev;
784         uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
785         uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
786         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
787
788         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
789         /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
790         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
791                          PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
792                          PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
793                          PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
794                          PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
795                          PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
796                          /*queue_type: normal compute queue */
797                          PACKET3_MAP_QUEUES_QUEUE_TYPE(0) |
798                          /* alloc format: all_on_one_pipe */
799                          PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) |
800                          PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
801                          /* num_queues: must be 1 */
802                          PACKET3_MAP_QUEUES_NUM_QUEUES(1));
803         amdgpu_ring_write(kiq_ring,
804                         PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
805         amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
806         amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
807         amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
808         amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
809 }
810
811 static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
812                                    struct amdgpu_ring *ring,
813                                    enum amdgpu_unmap_queues_action action,
814                                    u64 gpu_addr, u64 seq)
815 {
816         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
817
818         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
819         amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
820                           PACKET3_UNMAP_QUEUES_ACTION(action) |
821                           PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
822                           PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
823                           PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
824         amdgpu_ring_write(kiq_ring,
825                         PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
826
827         if (action == PREEMPT_QUEUES_NO_UNMAP) {
828                 amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
829                 amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
830                 amdgpu_ring_write(kiq_ring, seq);
831         } else {
832                 amdgpu_ring_write(kiq_ring, 0);
833                 amdgpu_ring_write(kiq_ring, 0);
834                 amdgpu_ring_write(kiq_ring, 0);
835         }
836 }
837
838 static void gfx_v9_0_kiq_query_status(struct amdgpu_ring *kiq_ring,
839                                    struct amdgpu_ring *ring,
840                                    u64 addr,
841                                    u64 seq)
842 {
843         uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
844
845         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
846         amdgpu_ring_write(kiq_ring,
847                           PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
848                           PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
849                           PACKET3_QUERY_STATUS_COMMAND(2));
850         /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
851         amdgpu_ring_write(kiq_ring,
852                         PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
853                         PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
854         amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
855         amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
856         amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
857         amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
858 }
859
860 static void gfx_v9_0_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
861                                 uint16_t pasid, uint32_t flush_type,
862                                 bool all_hub)
863 {
864         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
865         amdgpu_ring_write(kiq_ring,
866                         PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
867                         PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
868                         PACKET3_INVALIDATE_TLBS_PASID(pasid) |
869                         PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
870 }
871
872 static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = {
873         .kiq_set_resources = gfx_v9_0_kiq_set_resources,
874         .kiq_map_queues = gfx_v9_0_kiq_map_queues,
875         .kiq_unmap_queues = gfx_v9_0_kiq_unmap_queues,
876         .kiq_query_status = gfx_v9_0_kiq_query_status,
877         .kiq_invalidate_tlbs = gfx_v9_0_kiq_invalidate_tlbs,
878         .set_resources_size = 8,
879         .map_queues_size = 7,
880         .unmap_queues_size = 6,
881         .query_status_size = 7,
882         .invalidate_tlbs_size = 2,
883 };
884
885 static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev)
886 {
887         adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs;
888 }
889
890 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
891 {
892         switch (adev->ip_versions[GC_HWIP][0]) {
893         case IP_VERSION(9, 0, 1):
894                 soc15_program_register_sequence(adev,
895                                                 golden_settings_gc_9_0,
896                                                 ARRAY_SIZE(golden_settings_gc_9_0));
897                 soc15_program_register_sequence(adev,
898                                                 golden_settings_gc_9_0_vg10,
899                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
900                 break;
901         case IP_VERSION(9, 2, 1):
902                 soc15_program_register_sequence(adev,
903                                                 golden_settings_gc_9_2_1,
904                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
905                 soc15_program_register_sequence(adev,
906                                                 golden_settings_gc_9_2_1_vg12,
907                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
908                 break;
909         case IP_VERSION(9, 4, 0):
910                 soc15_program_register_sequence(adev,
911                                                 golden_settings_gc_9_0,
912                                                 ARRAY_SIZE(golden_settings_gc_9_0));
913                 soc15_program_register_sequence(adev,
914                                                 golden_settings_gc_9_0_vg20,
915                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
916                 break;
917         case IP_VERSION(9, 4, 1):
918                 soc15_program_register_sequence(adev,
919                                                 golden_settings_gc_9_4_1_arct,
920                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
921                 break;
922         case IP_VERSION(9, 2, 2):
923         case IP_VERSION(9, 1, 0):
924                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
925                                                 ARRAY_SIZE(golden_settings_gc_9_1));
926                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
927                         soc15_program_register_sequence(adev,
928                                                         golden_settings_gc_9_1_rv2,
929                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
930                 else
931                         soc15_program_register_sequence(adev,
932                                                         golden_settings_gc_9_1_rv1,
933                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
934                 break;
935          case IP_VERSION(9, 3, 0):
936                 soc15_program_register_sequence(adev,
937                                                 golden_settings_gc_9_1_rn,
938                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
939                 return; /* for renoir, don't need common goldensetting */
940         case IP_VERSION(9, 4, 2):
941                 gfx_v9_4_2_init_golden_registers(adev,
942                                                  adev->smuio.funcs->get_die_id(adev));
943                 break;
944         default:
945                 break;
946         }
947
948         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
949             (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2)))
950                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
951                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
952 }
953
954 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
955 {
956         adev->gfx.scratch.num_reg = 8;
957         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
958         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
959 }
960
961 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
962                                        bool wc, uint32_t reg, uint32_t val)
963 {
964         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
965         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
966                                 WRITE_DATA_DST_SEL(0) |
967                                 (wc ? WR_CONFIRM : 0));
968         amdgpu_ring_write(ring, reg);
969         amdgpu_ring_write(ring, 0);
970         amdgpu_ring_write(ring, val);
971 }
972
973 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
974                                   int mem_space, int opt, uint32_t addr0,
975                                   uint32_t addr1, uint32_t ref, uint32_t mask,
976                                   uint32_t inv)
977 {
978         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
979         amdgpu_ring_write(ring,
980                                  /* memory (1) or register (0) */
981                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
982                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
983                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
984                                  WAIT_REG_MEM_ENGINE(eng_sel)));
985
986         if (mem_space)
987                 BUG_ON(addr0 & 0x3); /* Dword align */
988         amdgpu_ring_write(ring, addr0);
989         amdgpu_ring_write(ring, addr1);
990         amdgpu_ring_write(ring, ref);
991         amdgpu_ring_write(ring, mask);
992         amdgpu_ring_write(ring, inv); /* poll interval */
993 }
994
995 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
996 {
997         struct amdgpu_device *adev = ring->adev;
998         uint32_t scratch;
999         uint32_t tmp = 0;
1000         unsigned i;
1001         int r;
1002
1003         r = amdgpu_gfx_scratch_get(adev, &scratch);
1004         if (r)
1005                 return r;
1006
1007         WREG32(scratch, 0xCAFEDEAD);
1008         r = amdgpu_ring_alloc(ring, 3);
1009         if (r)
1010                 goto error_free_scratch;
1011
1012         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
1013         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
1014         amdgpu_ring_write(ring, 0xDEADBEEF);
1015         amdgpu_ring_commit(ring);
1016
1017         for (i = 0; i < adev->usec_timeout; i++) {
1018                 tmp = RREG32(scratch);
1019                 if (tmp == 0xDEADBEEF)
1020                         break;
1021                 udelay(1);
1022         }
1023
1024         if (i >= adev->usec_timeout)
1025                 r = -ETIMEDOUT;
1026
1027 error_free_scratch:
1028         amdgpu_gfx_scratch_free(adev, scratch);
1029         return r;
1030 }
1031
1032 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1033 {
1034         struct amdgpu_device *adev = ring->adev;
1035         struct amdgpu_ib ib;
1036         struct dma_fence *f = NULL;
1037
1038         unsigned index;
1039         uint64_t gpu_addr;
1040         uint32_t tmp;
1041         long r;
1042
1043         r = amdgpu_device_wb_get(adev, &index);
1044         if (r)
1045                 return r;
1046
1047         gpu_addr = adev->wb.gpu_addr + (index * 4);
1048         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
1049         memset(&ib, 0, sizeof(ib));
1050         r = amdgpu_ib_get(adev, NULL, 16,
1051                                         AMDGPU_IB_POOL_DIRECT, &ib);
1052         if (r)
1053                 goto err1;
1054
1055         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
1056         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1057         ib.ptr[2] = lower_32_bits(gpu_addr);
1058         ib.ptr[3] = upper_32_bits(gpu_addr);
1059         ib.ptr[4] = 0xDEADBEEF;
1060         ib.length_dw = 5;
1061
1062         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1063         if (r)
1064                 goto err2;
1065
1066         r = dma_fence_wait_timeout(f, false, timeout);
1067         if (r == 0) {
1068                 r = -ETIMEDOUT;
1069                 goto err2;
1070         } else if (r < 0) {
1071                 goto err2;
1072         }
1073
1074         tmp = adev->wb.wb[index];
1075         if (tmp == 0xDEADBEEF)
1076                 r = 0;
1077         else
1078                 r = -EINVAL;
1079
1080 err2:
1081         amdgpu_ib_free(adev, &ib, NULL);
1082         dma_fence_put(f);
1083 err1:
1084         amdgpu_device_wb_free(adev, index);
1085         return r;
1086 }
1087
1088
1089 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
1090 {
1091         release_firmware(adev->gfx.pfp_fw);
1092         adev->gfx.pfp_fw = NULL;
1093         release_firmware(adev->gfx.me_fw);
1094         adev->gfx.me_fw = NULL;
1095         release_firmware(adev->gfx.ce_fw);
1096         adev->gfx.ce_fw = NULL;
1097         release_firmware(adev->gfx.rlc_fw);
1098         adev->gfx.rlc_fw = NULL;
1099         release_firmware(adev->gfx.mec_fw);
1100         adev->gfx.mec_fw = NULL;
1101         release_firmware(adev->gfx.mec2_fw);
1102         adev->gfx.mec2_fw = NULL;
1103
1104         kfree(adev->gfx.rlc.register_list_format);
1105 }
1106
1107 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
1108 {
1109         const struct rlc_firmware_header_v2_1 *rlc_hdr;
1110
1111         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
1112         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
1113         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
1114         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
1115         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
1116         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
1117         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
1118         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
1119         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
1120         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
1121         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
1122         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
1123         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
1124         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
1125                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
1126 }
1127
1128 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
1129 {
1130         adev->gfx.me_fw_write_wait = false;
1131         adev->gfx.mec_fw_write_wait = false;
1132
1133         if ((adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) &&
1134             ((adev->gfx.mec_fw_version < 0x000001a5) ||
1135             (adev->gfx.mec_feature_version < 46) ||
1136             (adev->gfx.pfp_fw_version < 0x000000b7) ||
1137             (adev->gfx.pfp_feature_version < 46)))
1138                 DRM_WARN_ONCE("CP firmware version too old, please update!");
1139
1140         switch (adev->ip_versions[GC_HWIP][0]) {
1141         case IP_VERSION(9, 0, 1):
1142                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1143                     (adev->gfx.me_feature_version >= 42) &&
1144                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1145                     (adev->gfx.pfp_feature_version >= 42))
1146                         adev->gfx.me_fw_write_wait = true;
1147
1148                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
1149                     (adev->gfx.mec_feature_version >= 42))
1150                         adev->gfx.mec_fw_write_wait = true;
1151                 break;
1152         case IP_VERSION(9, 2, 1):
1153                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1154                     (adev->gfx.me_feature_version >= 44) &&
1155                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1156                     (adev->gfx.pfp_feature_version >= 44))
1157                         adev->gfx.me_fw_write_wait = true;
1158
1159                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1160                     (adev->gfx.mec_feature_version >= 44))
1161                         adev->gfx.mec_fw_write_wait = true;
1162                 break;
1163         case IP_VERSION(9, 4, 0):
1164                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1165                     (adev->gfx.me_feature_version >= 44) &&
1166                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1167                     (adev->gfx.pfp_feature_version >= 44))
1168                         adev->gfx.me_fw_write_wait = true;
1169
1170                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1171                     (adev->gfx.mec_feature_version >= 44))
1172                         adev->gfx.mec_fw_write_wait = true;
1173                 break;
1174         case IP_VERSION(9, 1, 0):
1175         case IP_VERSION(9, 2, 2):
1176                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1177                     (adev->gfx.me_feature_version >= 42) &&
1178                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1179                     (adev->gfx.pfp_feature_version >= 42))
1180                         adev->gfx.me_fw_write_wait = true;
1181
1182                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1183                     (adev->gfx.mec_feature_version >= 42))
1184                         adev->gfx.mec_fw_write_wait = true;
1185                 break;
1186         default:
1187                 adev->gfx.me_fw_write_wait = true;
1188                 adev->gfx.mec_fw_write_wait = true;
1189                 break;
1190         }
1191 }
1192
1193 struct amdgpu_gfxoff_quirk {
1194         u16 chip_vendor;
1195         u16 chip_device;
1196         u16 subsys_vendor;
1197         u16 subsys_device;
1198         u8 revision;
1199 };
1200
1201 static const struct amdgpu_gfxoff_quirk amdgpu_gfxoff_quirk_list[] = {
1202         /* https://bugzilla.kernel.org/show_bug.cgi?id=204689 */
1203         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc8 },
1204         /* https://bugzilla.kernel.org/show_bug.cgi?id=207171 */
1205         { 0x1002, 0x15dd, 0x103c, 0x83e7, 0xd3 },
1206         /* GFXOFF is unstable on C6 parts with a VBIOS 113-RAVEN-114 */
1207         { 0x1002, 0x15dd, 0x1002, 0x15dd, 0xc6 },
1208         { 0, 0, 0, 0, 0 },
1209 };
1210
1211 static bool gfx_v9_0_should_disable_gfxoff(struct pci_dev *pdev)
1212 {
1213         const struct amdgpu_gfxoff_quirk *p = amdgpu_gfxoff_quirk_list;
1214
1215         while (p && p->chip_device != 0) {
1216                 if (pdev->vendor == p->chip_vendor &&
1217                     pdev->device == p->chip_device &&
1218                     pdev->subsystem_vendor == p->subsys_vendor &&
1219                     pdev->subsystem_device == p->subsys_device &&
1220                     pdev->revision == p->revision) {
1221                         return true;
1222                 }
1223                 ++p;
1224         }
1225         return false;
1226 }
1227
1228 static bool is_raven_kicker(struct amdgpu_device *adev)
1229 {
1230         if (adev->pm.fw_version >= 0x41e2b)
1231                 return true;
1232         else
1233                 return false;
1234 }
1235
1236 static bool check_if_enlarge_doorbell_range(struct amdgpu_device *adev)
1237 {
1238         if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0)) &&
1239             (adev->gfx.me_fw_version >= 0x000000a5) &&
1240             (adev->gfx.me_feature_version >= 52))
1241                 return true;
1242         else
1243                 return false;
1244 }
1245
1246 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1247 {
1248         if (gfx_v9_0_should_disable_gfxoff(adev->pdev))
1249                 adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1250
1251         switch (adev->ip_versions[GC_HWIP][0]) {
1252         case IP_VERSION(9, 0, 1):
1253         case IP_VERSION(9, 2, 1):
1254         case IP_VERSION(9, 4, 0):
1255                 break;
1256         case IP_VERSION(9, 2, 2):
1257         case IP_VERSION(9, 1, 0):
1258                 if (!((adev->apu_flags & AMD_APU_IS_RAVEN2) ||
1259                       (adev->apu_flags & AMD_APU_IS_PICASSO)) &&
1260                     ((!is_raven_kicker(adev) &&
1261                       adev->gfx.rlc_fw_version < 531) ||
1262                      (adev->gfx.rlc_feature_version < 1) ||
1263                      !adev->gfx.rlc.is_rlc_v2_1))
1264                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1265
1266                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1267                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1268                                 AMD_PG_SUPPORT_CP |
1269                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1270                 break;
1271         case IP_VERSION(9, 3, 0):
1272                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1273                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1274                                 AMD_PG_SUPPORT_CP |
1275                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1276                 break;
1277         default:
1278                 break;
1279         }
1280 }
1281
1282 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1283                                           const char *chip_name)
1284 {
1285         char fw_name[30];
1286         int err;
1287         struct amdgpu_firmware_info *info = NULL;
1288         const struct common_firmware_header *header = NULL;
1289         const struct gfx_firmware_header_v1_0 *cp_hdr;
1290
1291         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1292         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1293         if (err)
1294                 goto out;
1295         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1296         if (err)
1297                 goto out;
1298         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1299         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1300         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1301
1302         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1303         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1304         if (err)
1305                 goto out;
1306         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1307         if (err)
1308                 goto out;
1309         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1310         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1311         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1312
1313         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1314         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1315         if (err)
1316                 goto out;
1317         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1318         if (err)
1319                 goto out;
1320         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1321         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1322         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1323
1324         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1325                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1326                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1327                 info->fw = adev->gfx.pfp_fw;
1328                 header = (const struct common_firmware_header *)info->fw->data;
1329                 adev->firmware.fw_size +=
1330                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1331
1332                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1333                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1334                 info->fw = adev->gfx.me_fw;
1335                 header = (const struct common_firmware_header *)info->fw->data;
1336                 adev->firmware.fw_size +=
1337                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1338
1339                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1340                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1341                 info->fw = adev->gfx.ce_fw;
1342                 header = (const struct common_firmware_header *)info->fw->data;
1343                 adev->firmware.fw_size +=
1344                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1345         }
1346
1347 out:
1348         if (err) {
1349                 dev_err(adev->dev,
1350                         "gfx9: Failed to load firmware \"%s\"\n",
1351                         fw_name);
1352                 release_firmware(adev->gfx.pfp_fw);
1353                 adev->gfx.pfp_fw = NULL;
1354                 release_firmware(adev->gfx.me_fw);
1355                 adev->gfx.me_fw = NULL;
1356                 release_firmware(adev->gfx.ce_fw);
1357                 adev->gfx.ce_fw = NULL;
1358         }
1359         return err;
1360 }
1361
1362 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1363                                           const char *chip_name)
1364 {
1365         char fw_name[30];
1366         int err;
1367         struct amdgpu_firmware_info *info = NULL;
1368         const struct common_firmware_header *header = NULL;
1369         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1370         unsigned int *tmp = NULL;
1371         unsigned int i = 0;
1372         uint16_t version_major;
1373         uint16_t version_minor;
1374         uint32_t smu_version;
1375
1376         /*
1377          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1378          * instead of picasso_rlc.bin.
1379          * Judgment method:
1380          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1381          *          or revision >= 0xD8 && revision <= 0xDF
1382          * otherwise is PCO FP5
1383          */
1384         if (!strcmp(chip_name, "picasso") &&
1385                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1386                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1387                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1388         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1389                 (smu_version >= 0x41e2b))
1390                 /**
1391                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1392                 */
1393                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1394         else
1395                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1396         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1397         if (err)
1398                 goto out;
1399         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1400         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1401
1402         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1403         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1404         if (version_major == 2 && version_minor == 1)
1405                 adev->gfx.rlc.is_rlc_v2_1 = true;
1406
1407         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1408         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1409         adev->gfx.rlc.save_and_restore_offset =
1410                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1411         adev->gfx.rlc.clear_state_descriptor_offset =
1412                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1413         adev->gfx.rlc.avail_scratch_ram_locations =
1414                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1415         adev->gfx.rlc.reg_restore_list_size =
1416                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1417         adev->gfx.rlc.reg_list_format_start =
1418                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1419         adev->gfx.rlc.reg_list_format_separate_start =
1420                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1421         adev->gfx.rlc.starting_offsets_start =
1422                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1423         adev->gfx.rlc.reg_list_format_size_bytes =
1424                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1425         adev->gfx.rlc.reg_list_size_bytes =
1426                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1427         adev->gfx.rlc.register_list_format =
1428                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1429                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1430         if (!adev->gfx.rlc.register_list_format) {
1431                 err = -ENOMEM;
1432                 goto out;
1433         }
1434
1435         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1436                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1437         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1438                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1439
1440         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1441
1442         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1443                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1444         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1445                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1446
1447         if (adev->gfx.rlc.is_rlc_v2_1)
1448                 gfx_v9_0_init_rlc_ext_microcode(adev);
1449
1450         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1451                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1452                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1453                 info->fw = adev->gfx.rlc_fw;
1454                 header = (const struct common_firmware_header *)info->fw->data;
1455                 adev->firmware.fw_size +=
1456                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1457
1458                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1459                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1460                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1461                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1462                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1463                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1464                         info->fw = adev->gfx.rlc_fw;
1465                         adev->firmware.fw_size +=
1466                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1467
1468                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1469                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1470                         info->fw = adev->gfx.rlc_fw;
1471                         adev->firmware.fw_size +=
1472                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1473
1474                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1475                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1476                         info->fw = adev->gfx.rlc_fw;
1477                         adev->firmware.fw_size +=
1478                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1479                 }
1480         }
1481
1482 out:
1483         if (err) {
1484                 dev_err(adev->dev,
1485                         "gfx9: Failed to load firmware \"%s\"\n",
1486                         fw_name);
1487                 release_firmware(adev->gfx.rlc_fw);
1488                 adev->gfx.rlc_fw = NULL;
1489         }
1490         return err;
1491 }
1492
1493 static bool gfx_v9_0_load_mec2_fw_bin_support(struct amdgpu_device *adev)
1494 {
1495         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2) ||
1496             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
1497             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 3, 0))
1498                 return false;
1499
1500         return true;
1501 }
1502
1503 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1504                                           const char *chip_name)
1505 {
1506         char fw_name[30];
1507         int err;
1508         struct amdgpu_firmware_info *info = NULL;
1509         const struct common_firmware_header *header = NULL;
1510         const struct gfx_firmware_header_v1_0 *cp_hdr;
1511
1512         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1513         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1514         if (err)
1515                 goto out;
1516         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1517         if (err)
1518                 goto out;
1519         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1520         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1521         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1522
1523
1524         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1525                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1526                 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1527                 if (!err) {
1528                         err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1529                         if (err)
1530                                 goto out;
1531                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1532                         adev->gfx.mec2_fw->data;
1533                         adev->gfx.mec2_fw_version =
1534                         le32_to_cpu(cp_hdr->header.ucode_version);
1535                         adev->gfx.mec2_feature_version =
1536                         le32_to_cpu(cp_hdr->ucode_feature_version);
1537                 } else {
1538                         err = 0;
1539                         adev->gfx.mec2_fw = NULL;
1540                 }
1541         } else {
1542                 adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version;
1543                 adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version;
1544         }
1545
1546         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1547                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1548                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1549                 info->fw = adev->gfx.mec_fw;
1550                 header = (const struct common_firmware_header *)info->fw->data;
1551                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1552                 adev->firmware.fw_size +=
1553                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1554
1555                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1556                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1557                 info->fw = adev->gfx.mec_fw;
1558                 adev->firmware.fw_size +=
1559                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1560
1561                 if (adev->gfx.mec2_fw) {
1562                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1563                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1564                         info->fw = adev->gfx.mec2_fw;
1565                         header = (const struct common_firmware_header *)info->fw->data;
1566                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1567                         adev->firmware.fw_size +=
1568                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1569
1570                         /* TODO: Determine if MEC2 JT FW loading can be removed
1571                                  for all GFX V9 asic and above */
1572                         if (gfx_v9_0_load_mec2_fw_bin_support(adev)) {
1573                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1574                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1575                                 info->fw = adev->gfx.mec2_fw;
1576                                 adev->firmware.fw_size +=
1577                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1578                                         PAGE_SIZE);
1579                         }
1580                 }
1581         }
1582
1583 out:
1584         gfx_v9_0_check_if_need_gfxoff(adev);
1585         gfx_v9_0_check_fw_write_wait(adev);
1586         if (err) {
1587                 dev_err(adev->dev,
1588                         "gfx9: Failed to load firmware \"%s\"\n",
1589                         fw_name);
1590                 release_firmware(adev->gfx.mec_fw);
1591                 adev->gfx.mec_fw = NULL;
1592                 release_firmware(adev->gfx.mec2_fw);
1593                 adev->gfx.mec2_fw = NULL;
1594         }
1595         return err;
1596 }
1597
1598 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1599 {
1600         const char *chip_name;
1601         int r;
1602
1603         DRM_DEBUG("\n");
1604
1605         switch (adev->ip_versions[GC_HWIP][0]) {
1606         case IP_VERSION(9, 0, 1):
1607                 chip_name = "vega10";
1608                 break;
1609         case IP_VERSION(9, 2, 1):
1610                 chip_name = "vega12";
1611                 break;
1612         case IP_VERSION(9, 4, 0):
1613                 chip_name = "vega20";
1614                 break;
1615         case IP_VERSION(9, 2, 2):
1616         case IP_VERSION(9, 1, 0):
1617                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
1618                         chip_name = "raven2";
1619                 else if (adev->apu_flags & AMD_APU_IS_PICASSO)
1620                         chip_name = "picasso";
1621                 else
1622                         chip_name = "raven";
1623                 break;
1624         case IP_VERSION(9, 4, 1):
1625                 chip_name = "arcturus";
1626                 break;
1627         case IP_VERSION(9, 3, 0):
1628                 if (adev->apu_flags & AMD_APU_IS_RENOIR)
1629                         chip_name = "renoir";
1630                 else
1631                         chip_name = "green_sardine";
1632                 break;
1633         case IP_VERSION(9, 4, 2):
1634                 chip_name = "aldebaran";
1635                 break;
1636         default:
1637                 BUG();
1638         }
1639
1640         /* No CPG in Arcturus */
1641         if (adev->gfx.num_gfx_rings) {
1642                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1643                 if (r)
1644                         return r;
1645         }
1646
1647         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1648         if (r)
1649                 return r;
1650
1651         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1652         if (r)
1653                 return r;
1654
1655         return r;
1656 }
1657
1658 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1659 {
1660         u32 count = 0;
1661         const struct cs_section_def *sect = NULL;
1662         const struct cs_extent_def *ext = NULL;
1663
1664         /* begin clear state */
1665         count += 2;
1666         /* context control state */
1667         count += 3;
1668
1669         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1670                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1671                         if (sect->id == SECT_CONTEXT)
1672                                 count += 2 + ext->reg_count;
1673                         else
1674                                 return 0;
1675                 }
1676         }
1677
1678         /* end clear state */
1679         count += 2;
1680         /* clear state */
1681         count += 2;
1682
1683         return count;
1684 }
1685
1686 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1687                                     volatile u32 *buffer)
1688 {
1689         u32 count = 0, i;
1690         const struct cs_section_def *sect = NULL;
1691         const struct cs_extent_def *ext = NULL;
1692
1693         if (adev->gfx.rlc.cs_data == NULL)
1694                 return;
1695         if (buffer == NULL)
1696                 return;
1697
1698         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1699         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1700
1701         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1702         buffer[count++] = cpu_to_le32(0x80000000);
1703         buffer[count++] = cpu_to_le32(0x80000000);
1704
1705         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1706                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1707                         if (sect->id == SECT_CONTEXT) {
1708                                 buffer[count++] =
1709                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1710                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1711                                                 PACKET3_SET_CONTEXT_REG_START);
1712                                 for (i = 0; i < ext->reg_count; i++)
1713                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1714                         } else {
1715                                 return;
1716                         }
1717                 }
1718         }
1719
1720         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1721         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1722
1723         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1724         buffer[count++] = cpu_to_le32(0);
1725 }
1726
1727 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1728 {
1729         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1730         uint32_t pg_always_on_cu_num = 2;
1731         uint32_t always_on_cu_num;
1732         uint32_t i, j, k;
1733         uint32_t mask, cu_bitmap, counter;
1734
1735         if (adev->flags & AMD_IS_APU)
1736                 always_on_cu_num = 4;
1737         else if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1))
1738                 always_on_cu_num = 8;
1739         else
1740                 always_on_cu_num = 12;
1741
1742         mutex_lock(&adev->grbm_idx_mutex);
1743         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1744                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1745                         mask = 1;
1746                         cu_bitmap = 0;
1747                         counter = 0;
1748                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1749
1750                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1751                                 if (cu_info->bitmap[i][j] & mask) {
1752                                         if (counter == pg_always_on_cu_num)
1753                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1754                                         if (counter < always_on_cu_num)
1755                                                 cu_bitmap |= mask;
1756                                         else
1757                                                 break;
1758                                         counter++;
1759                                 }
1760                                 mask <<= 1;
1761                         }
1762
1763                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1764                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1765                 }
1766         }
1767         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1768         mutex_unlock(&adev->grbm_idx_mutex);
1769 }
1770
1771 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1772 {
1773         uint32_t data;
1774
1775         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1776         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1777         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1778         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1779         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1780
1781         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1782         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1783
1784         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1785         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1786
1787         mutex_lock(&adev->grbm_idx_mutex);
1788         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1789         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1790         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1791
1792         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1793         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1794         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1795         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1796         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1797
1798         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1799         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1800         data &= 0x0000FFFF;
1801         data |= 0x00C00000;
1802         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1803
1804         /*
1805          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1806          * programmed in gfx_v9_0_init_always_on_cu_mask()
1807          */
1808
1809         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1810          * but used for RLC_LB_CNTL configuration */
1811         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1812         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1813         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1814         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1815         mutex_unlock(&adev->grbm_idx_mutex);
1816
1817         gfx_v9_0_init_always_on_cu_mask(adev);
1818 }
1819
1820 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1821 {
1822         uint32_t data;
1823
1824         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1825         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1826         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1827         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1828         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1829
1830         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1831         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1832
1833         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1834         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1835
1836         mutex_lock(&adev->grbm_idx_mutex);
1837         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1838         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1839         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1840
1841         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1842         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1843         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1844         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1845         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1846
1847         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1848         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1849         data &= 0x0000FFFF;
1850         data |= 0x00C00000;
1851         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1852
1853         /*
1854          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1855          * programmed in gfx_v9_0_init_always_on_cu_mask()
1856          */
1857
1858         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1859          * but used for RLC_LB_CNTL configuration */
1860         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1861         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1862         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1863         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1864         mutex_unlock(&adev->grbm_idx_mutex);
1865
1866         gfx_v9_0_init_always_on_cu_mask(adev);
1867 }
1868
1869 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1870 {
1871         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1872 }
1873
1874 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1875 {
1876         if (gfx_v9_0_load_mec2_fw_bin_support(adev))
1877                 return 5;
1878         else
1879                 return 4;
1880 }
1881
1882 static void gfx_v9_0_init_rlcg_reg_access_ctrl(struct amdgpu_device *adev)
1883 {
1884         struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1885
1886         reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl;
1887         reg_access_ctrl->scratch_reg0 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
1888         reg_access_ctrl->scratch_reg1 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG1);
1889         reg_access_ctrl->scratch_reg2 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG2);
1890         reg_access_ctrl->scratch_reg3 = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG3);
1891         reg_access_ctrl->grbm_cntl = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_CNTL);
1892         reg_access_ctrl->grbm_idx = SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX);
1893         reg_access_ctrl->spare_int = SOC15_REG_OFFSET(GC, 0, mmRLC_SPARE_INT);
1894         adev->gfx.rlc.rlcg_reg_access_supported = true;
1895 }
1896
1897 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1898 {
1899         const struct cs_section_def *cs_data;
1900         int r;
1901
1902         adev->gfx.rlc.cs_data = gfx9_cs_data;
1903
1904         cs_data = adev->gfx.rlc.cs_data;
1905
1906         if (cs_data) {
1907                 /* init clear state block */
1908                 r = amdgpu_gfx_rlc_init_csb(adev);
1909                 if (r)
1910                         return r;
1911         }
1912
1913         if (adev->flags & AMD_IS_APU) {
1914                 /* TODO: double check the cp_table_size for RV */
1915                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1916                 r = amdgpu_gfx_rlc_init_cpt(adev);
1917                 if (r)
1918                         return r;
1919         }
1920
1921         switch (adev->ip_versions[GC_HWIP][0]) {
1922         case IP_VERSION(9, 2, 2):
1923         case IP_VERSION(9, 1, 0):
1924                 gfx_v9_0_init_lbpw(adev);
1925                 break;
1926         case IP_VERSION(9, 4, 0):
1927                 gfx_v9_4_init_lbpw(adev);
1928                 break;
1929         default:
1930                 break;
1931         }
1932
1933         /* init spm vmid with 0xf */
1934         if (adev->gfx.rlc.funcs->update_spm_vmid)
1935                 adev->gfx.rlc.funcs->update_spm_vmid(adev, 0xf);
1936
1937         return 0;
1938 }
1939
1940 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1941 {
1942         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1943         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1944 }
1945
1946 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1947 {
1948         int r;
1949         u32 *hpd;
1950         const __le32 *fw_data;
1951         unsigned fw_size;
1952         u32 *fw;
1953         size_t mec_hpd_size;
1954
1955         const struct gfx_firmware_header_v1_0 *mec_hdr;
1956
1957         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1958
1959         /* take ownership of the relevant compute queues */
1960         amdgpu_gfx_compute_queue_acquire(adev);
1961         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1962         if (mec_hpd_size) {
1963                 r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1964                                               AMDGPU_GEM_DOMAIN_VRAM,
1965                                               &adev->gfx.mec.hpd_eop_obj,
1966                                               &adev->gfx.mec.hpd_eop_gpu_addr,
1967                                               (void **)&hpd);
1968                 if (r) {
1969                         dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1970                         gfx_v9_0_mec_fini(adev);
1971                         return r;
1972                 }
1973
1974                 memset(hpd, 0, mec_hpd_size);
1975
1976                 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1977                 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1978         }
1979
1980         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1981
1982         fw_data = (const __le32 *)
1983                 (adev->gfx.mec_fw->data +
1984                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1985         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes);
1986
1987         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1988                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1989                                       &adev->gfx.mec.mec_fw_obj,
1990                                       &adev->gfx.mec.mec_fw_gpu_addr,
1991                                       (void **)&fw);
1992         if (r) {
1993                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1994                 gfx_v9_0_mec_fini(adev);
1995                 return r;
1996         }
1997
1998         memcpy(fw, fw_data, fw_size);
1999
2000         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
2001         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
2002
2003         return 0;
2004 }
2005
2006 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
2007 {
2008         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2009                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2010                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2011                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
2012                 (SQ_IND_INDEX__FORCE_READ_MASK));
2013         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2014 }
2015
2016 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
2017                            uint32_t wave, uint32_t thread,
2018                            uint32_t regno, uint32_t num, uint32_t *out)
2019 {
2020         WREG32_SOC15_RLC(GC, 0, mmSQ_IND_INDEX,
2021                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
2022                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
2023                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
2024                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
2025                 (SQ_IND_INDEX__FORCE_READ_MASK) |
2026                 (SQ_IND_INDEX__AUTO_INCR_MASK));
2027         while (num--)
2028                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
2029 }
2030
2031 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
2032 {
2033         /* type 1 wave data */
2034         dst[(*no_fields)++] = 1;
2035         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
2036         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
2037         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
2038         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
2039         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
2040         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
2041         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
2042         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
2043         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
2044         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
2045         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
2046         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
2047         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
2048         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
2049         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE);
2050 }
2051
2052 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
2053                                      uint32_t wave, uint32_t start,
2054                                      uint32_t size, uint32_t *dst)
2055 {
2056         wave_read_regs(
2057                 adev, simd, wave, 0,
2058                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
2059 }
2060
2061 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
2062                                      uint32_t wave, uint32_t thread,
2063                                      uint32_t start, uint32_t size,
2064                                      uint32_t *dst)
2065 {
2066         wave_read_regs(
2067                 adev, simd, wave, thread,
2068                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
2069 }
2070
2071 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
2072                                   u32 me, u32 pipe, u32 q, u32 vm)
2073 {
2074         soc15_grbm_select(adev, me, pipe, q, vm);
2075 }
2076
2077 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
2078         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
2079         .select_se_sh = &gfx_v9_0_select_se_sh,
2080         .read_wave_data = &gfx_v9_0_read_wave_data,
2081         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
2082         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
2083         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
2084 };
2085
2086 const struct amdgpu_ras_block_hw_ops  gfx_v9_0_ras_ops = {
2087                 .ras_error_inject = &gfx_v9_0_ras_error_inject,
2088                 .query_ras_error_count = &gfx_v9_0_query_ras_error_count,
2089                 .reset_ras_error_count = &gfx_v9_0_reset_ras_error_count,
2090 };
2091
2092 static struct amdgpu_gfx_ras gfx_v9_0_ras = {
2093         .ras_block = {
2094                 .hw_ops = &gfx_v9_0_ras_ops,
2095         },
2096 };
2097
2098 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
2099 {
2100         u32 gb_addr_config;
2101         int err;
2102
2103         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
2104
2105         switch (adev->ip_versions[GC_HWIP][0]) {
2106         case IP_VERSION(9, 0, 1):
2107                 adev->gfx.config.max_hw_contexts = 8;
2108                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2109                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2110                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2111                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2112                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
2113                 break;
2114         case IP_VERSION(9, 2, 1):
2115                 adev->gfx.config.max_hw_contexts = 8;
2116                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2117                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2118                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2119                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2120                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
2121                 DRM_INFO("fix gfx.config for vega12\n");
2122                 break;
2123         case IP_VERSION(9, 4, 0):
2124                 adev->gfx.ras = &gfx_v9_0_ras;
2125                 adev->gfx.config.max_hw_contexts = 8;
2126                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2127                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2128                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2129                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2130                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2131                 gb_addr_config &= ~0xf3e777ff;
2132                 gb_addr_config |= 0x22014042;
2133                 /* check vbios table if gpu info is not available */
2134                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2135                 if (err)
2136                         return err;
2137                 break;
2138         case IP_VERSION(9, 2, 2):
2139         case IP_VERSION(9, 1, 0):
2140                 adev->gfx.config.max_hw_contexts = 8;
2141                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2142                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2143                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2144                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2145                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2146                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
2147                 else
2148                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
2149                 break;
2150         case IP_VERSION(9, 4, 1):
2151                 adev->gfx.ras = &gfx_v9_4_ras;
2152                 adev->gfx.config.max_hw_contexts = 8;
2153                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2154                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2155                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2156                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2157                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2158                 gb_addr_config &= ~0xf3e777ff;
2159                 gb_addr_config |= 0x22014042;
2160                 break;
2161         case IP_VERSION(9, 3, 0):
2162                 adev->gfx.config.max_hw_contexts = 8;
2163                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2164                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2165                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
2166                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2167                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2168                 gb_addr_config &= ~0xf3e777ff;
2169                 gb_addr_config |= 0x22010042;
2170                 break;
2171         case IP_VERSION(9, 4, 2):
2172                 adev->gfx.ras = &gfx_v9_4_2_ras;
2173                 adev->gfx.config.max_hw_contexts = 8;
2174                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
2175                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
2176                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
2177                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
2178                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
2179                 gb_addr_config &= ~0xf3e777ff;
2180                 gb_addr_config |= 0x22014042;
2181                 /* check vbios table if gpu info is not available */
2182                 err = amdgpu_atomfirmware_get_gfx_info(adev);
2183                 if (err)
2184                         return err;
2185                 break;
2186         default:
2187                 BUG();
2188                 break;
2189         }
2190
2191         if (adev->gfx.ras) {
2192                 err = amdgpu_ras_register_ras_block(adev, &adev->gfx.ras->ras_block);
2193                 if (err) {
2194                         DRM_ERROR("Failed to register gfx ras block!\n");
2195                         return err;
2196                 }
2197
2198                 strcpy(adev->gfx.ras->ras_block.ras_comm.name, "gfx");
2199                 adev->gfx.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__GFX;
2200                 adev->gfx.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
2201                 adev->gfx.ras_if = &adev->gfx.ras->ras_block.ras_comm;
2202
2203                 /* If not define special ras_late_init function, use gfx default ras_late_init */
2204                 if (!adev->gfx.ras->ras_block.ras_late_init)
2205                         adev->gfx.ras->ras_block.ras_late_init = amdgpu_gfx_ras_late_init;
2206
2207                 /* If not define special ras_fini function, use gfx default ras_fini */
2208                 if (!adev->gfx.ras->ras_block.ras_fini)
2209                         adev->gfx.ras->ras_block.ras_fini = amdgpu_gfx_ras_fini;
2210
2211                 /* If not defined special ras_cb function, use default ras_cb */
2212                 if (!adev->gfx.ras->ras_block.ras_cb)
2213                         adev->gfx.ras->ras_block.ras_cb = amdgpu_gfx_process_ras_data_cb;
2214         }
2215
2216         adev->gfx.config.gb_addr_config = gb_addr_config;
2217
2218         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
2219                         REG_GET_FIELD(
2220                                         adev->gfx.config.gb_addr_config,
2221                                         GB_ADDR_CONFIG,
2222                                         NUM_PIPES);
2223
2224         adev->gfx.config.max_tile_pipes =
2225                 adev->gfx.config.gb_addr_config_fields.num_pipes;
2226
2227         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
2228                         REG_GET_FIELD(
2229                                         adev->gfx.config.gb_addr_config,
2230                                         GB_ADDR_CONFIG,
2231                                         NUM_BANKS);
2232         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
2233                         REG_GET_FIELD(
2234                                         adev->gfx.config.gb_addr_config,
2235                                         GB_ADDR_CONFIG,
2236                                         MAX_COMPRESSED_FRAGS);
2237         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
2238                         REG_GET_FIELD(
2239                                         adev->gfx.config.gb_addr_config,
2240                                         GB_ADDR_CONFIG,
2241                                         NUM_RB_PER_SE);
2242         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
2243                         REG_GET_FIELD(
2244                                         adev->gfx.config.gb_addr_config,
2245                                         GB_ADDR_CONFIG,
2246                                         NUM_SHADER_ENGINES);
2247         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
2248                         REG_GET_FIELD(
2249                                         adev->gfx.config.gb_addr_config,
2250                                         GB_ADDR_CONFIG,
2251                                         PIPE_INTERLEAVE_SIZE));
2252
2253         return 0;
2254 }
2255
2256 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2257                                       int mec, int pipe, int queue)
2258 {
2259         unsigned irq_type;
2260         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2261         unsigned int hw_prio;
2262
2263         ring = &adev->gfx.compute_ring[ring_id];
2264
2265         /* mec0 is me1 */
2266         ring->me = mec + 1;
2267         ring->pipe = pipe;
2268         ring->queue = queue;
2269
2270         ring->ring_obj = NULL;
2271         ring->use_doorbell = true;
2272         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2273         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2274                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2275         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2276
2277         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2278                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2279                 + ring->pipe;
2280         hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ?
2281                         AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_GFX_PIPE_PRIO_NORMAL;
2282         /* type-2 packets are deprecated on MEC, use type-3 instead */
2283         return amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, irq_type,
2284                                 hw_prio, NULL);
2285 }
2286
2287 static int gfx_v9_0_sw_init(void *handle)
2288 {
2289         int i, j, k, r, ring_id;
2290         struct amdgpu_ring *ring;
2291         struct amdgpu_kiq *kiq;
2292         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2293
2294         switch (adev->ip_versions[GC_HWIP][0]) {
2295         case IP_VERSION(9, 0, 1):
2296         case IP_VERSION(9, 2, 1):
2297         case IP_VERSION(9, 4, 0):
2298         case IP_VERSION(9, 2, 2):
2299         case IP_VERSION(9, 1, 0):
2300         case IP_VERSION(9, 4, 1):
2301         case IP_VERSION(9, 3, 0):
2302         case IP_VERSION(9, 4, 2):
2303                 adev->gfx.mec.num_mec = 2;
2304                 break;
2305         default:
2306                 adev->gfx.mec.num_mec = 1;
2307                 break;
2308         }
2309
2310         adev->gfx.mec.num_pipe_per_mec = 4;
2311         adev->gfx.mec.num_queue_per_pipe = 8;
2312
2313         /* EOP Event */
2314         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2315         if (r)
2316                 return r;
2317
2318         /* Privileged reg */
2319         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2320                               &adev->gfx.priv_reg_irq);
2321         if (r)
2322                 return r;
2323
2324         /* Privileged inst */
2325         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2326                               &adev->gfx.priv_inst_irq);
2327         if (r)
2328                 return r;
2329
2330         /* ECC error */
2331         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2332                               &adev->gfx.cp_ecc_error_irq);
2333         if (r)
2334                 return r;
2335
2336         /* FUE error */
2337         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2338                               &adev->gfx.cp_ecc_error_irq);
2339         if (r)
2340                 return r;
2341
2342         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2343
2344         gfx_v9_0_scratch_init(adev);
2345
2346         r = gfx_v9_0_init_microcode(adev);
2347         if (r) {
2348                 DRM_ERROR("Failed to load gfx firmware!\n");
2349                 return r;
2350         }
2351
2352         if (adev->gfx.rlc.funcs) {
2353                 if (adev->gfx.rlc.funcs->init) {
2354                         r = adev->gfx.rlc.funcs->init(adev);
2355                         if (r) {
2356                                 dev_err(adev->dev, "Failed to init rlc BOs!\n");
2357                                 return r;
2358                         }
2359                 }
2360         }
2361
2362         r = gfx_v9_0_mec_init(adev);
2363         if (r) {
2364                 DRM_ERROR("Failed to init MEC BOs!\n");
2365                 return r;
2366         }
2367
2368         /* set up the gfx ring */
2369         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2370                 ring = &adev->gfx.gfx_ring[i];
2371                 ring->ring_obj = NULL;
2372                 if (!i)
2373                         sprintf(ring->name, "gfx");
2374                 else
2375                         sprintf(ring->name, "gfx_%d", i);
2376                 ring->use_doorbell = true;
2377                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2378                 r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
2379                                      AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
2380                                      AMDGPU_RING_PRIO_DEFAULT, NULL);
2381                 if (r)
2382                         return r;
2383         }
2384
2385         /* set up the compute queues - allocate horizontally across pipes */
2386         ring_id = 0;
2387         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2388                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2389                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2390                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2391                                         continue;
2392
2393                                 r = gfx_v9_0_compute_ring_init(adev,
2394                                                                ring_id,
2395                                                                i, k, j);
2396                                 if (r)
2397                                         return r;
2398
2399                                 ring_id++;
2400                         }
2401                 }
2402         }
2403
2404         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2405         if (r) {
2406                 DRM_ERROR("Failed to init KIQ BOs!\n");
2407                 return r;
2408         }
2409
2410         kiq = &adev->gfx.kiq;
2411         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2412         if (r)
2413                 return r;
2414
2415         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2416         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2417         if (r)
2418                 return r;
2419
2420         adev->gfx.ce_ram_size = 0x8000;
2421
2422         r = gfx_v9_0_gpu_early_init(adev);
2423         if (r)
2424                 return r;
2425
2426         return 0;
2427 }
2428
2429
2430 static int gfx_v9_0_sw_fini(void *handle)
2431 {
2432         int i;
2433         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2434
2435         if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini)
2436                 adev->gfx.ras->ras_block.ras_fini(adev);
2437
2438         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2439                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2440         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2441                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2442
2443         amdgpu_gfx_mqd_sw_fini(adev);
2444         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring);
2445         amdgpu_gfx_kiq_fini(adev);
2446
2447         gfx_v9_0_mec_fini(adev);
2448         amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj,
2449                                 &adev->gfx.rlc.clear_state_gpu_addr,
2450                                 (void **)&adev->gfx.rlc.cs_ptr);
2451         if (adev->flags & AMD_IS_APU) {
2452                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2453                                 &adev->gfx.rlc.cp_table_gpu_addr,
2454                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2455         }
2456         gfx_v9_0_free_microcode(adev);
2457
2458         return 0;
2459 }
2460
2461
2462 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2463 {
2464         /* TODO */
2465 }
2466
2467 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
2468                            u32 instance)
2469 {
2470         u32 data;
2471
2472         if (instance == 0xffffffff)
2473                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2474         else
2475                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2476
2477         if (se_num == 0xffffffff)
2478                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2479         else
2480                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2481
2482         if (sh_num == 0xffffffff)
2483                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2484         else
2485                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2486
2487         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2488 }
2489
2490 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2491 {
2492         u32 data, mask;
2493
2494         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2495         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2496
2497         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2498         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2499
2500         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2501                                          adev->gfx.config.max_sh_per_se);
2502
2503         return (~data) & mask;
2504 }
2505
2506 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2507 {
2508         int i, j;
2509         u32 data;
2510         u32 active_rbs = 0;
2511         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2512                                         adev->gfx.config.max_sh_per_se;
2513
2514         mutex_lock(&adev->grbm_idx_mutex);
2515         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2516                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2517                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2518                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2519                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2520                                                rb_bitmap_width_per_sh);
2521                 }
2522         }
2523         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2524         mutex_unlock(&adev->grbm_idx_mutex);
2525
2526         adev->gfx.config.backend_enable_mask = active_rbs;
2527         adev->gfx.config.num_rbs = hweight32(active_rbs);
2528 }
2529
2530 #define DEFAULT_SH_MEM_BASES    (0x6000)
2531 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2532 {
2533         int i;
2534         uint32_t sh_mem_config;
2535         uint32_t sh_mem_bases;
2536
2537         /*
2538          * Configure apertures:
2539          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2540          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2541          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2542          */
2543         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2544
2545         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2546                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2547                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2548
2549         mutex_lock(&adev->srbm_mutex);
2550         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2551                 soc15_grbm_select(adev, 0, 0, 0, i);
2552                 /* CP and shaders */
2553                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2554                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2555         }
2556         soc15_grbm_select(adev, 0, 0, 0, 0);
2557         mutex_unlock(&adev->srbm_mutex);
2558
2559         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2560            acccess. These should be enabled by FW for target VMIDs. */
2561         for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) {
2562                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2563                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2564                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2565                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2566         }
2567 }
2568
2569 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2570 {
2571         int vmid;
2572
2573         /*
2574          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2575          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2576          * the driver can enable them for graphics. VMID0 should maintain
2577          * access so that HWS firmware can save/restore entries.
2578          */
2579         for (vmid = 1; vmid < AMDGPU_NUM_VMID; vmid++) {
2580                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2581                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2582                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2583                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2584         }
2585 }
2586
2587 static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev)
2588 {
2589         uint32_t tmp;
2590
2591         switch (adev->ip_versions[GC_HWIP][0]) {
2592         case IP_VERSION(9, 4, 1):
2593                 tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG);
2594                 tmp = REG_SET_FIELD(tmp, SQ_CONFIG,
2595                                         DISABLE_BARRIER_WAITCNT, 1);
2596                 WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp);
2597                 break;
2598         default:
2599                 break;
2600         }
2601 }
2602
2603 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2604 {
2605         u32 tmp;
2606         int i;
2607
2608         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2609
2610         gfx_v9_0_tiling_mode_table_init(adev);
2611
2612         gfx_v9_0_setup_rb(adev);
2613         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2614         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2615
2616         /* XXX SH_MEM regs */
2617         /* where to put LDS, scratch, GPUVM in FSA64 space */
2618         mutex_lock(&adev->srbm_mutex);
2619         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2620                 soc15_grbm_select(adev, 0, 0, 0, i);
2621                 /* CP and shaders */
2622                 if (i == 0) {
2623                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2624                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2625                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2626                                             !!adev->gmc.noretry);
2627                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2628                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2629                 } else {
2630                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2631                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2632                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2633                                             !!adev->gmc.noretry);
2634                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2635                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2636                                 (adev->gmc.private_aperture_start >> 48));
2637                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2638                                 (adev->gmc.shared_aperture_start >> 48));
2639                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2640                 }
2641         }
2642         soc15_grbm_select(adev, 0, 0, 0, 0);
2643
2644         mutex_unlock(&adev->srbm_mutex);
2645
2646         gfx_v9_0_init_compute_vmid(adev);
2647         gfx_v9_0_init_gds_vmid(adev);
2648         gfx_v9_0_init_sq_config(adev);
2649 }
2650
2651 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2652 {
2653         u32 i, j, k;
2654         u32 mask;
2655
2656         mutex_lock(&adev->grbm_idx_mutex);
2657         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2658                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2659                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2660                         for (k = 0; k < adev->usec_timeout; k++) {
2661                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2662                                         break;
2663                                 udelay(1);
2664                         }
2665                         if (k == adev->usec_timeout) {
2666                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2667                                                       0xffffffff, 0xffffffff);
2668                                 mutex_unlock(&adev->grbm_idx_mutex);
2669                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2670                                          i, j);
2671                                 return;
2672                         }
2673                 }
2674         }
2675         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2676         mutex_unlock(&adev->grbm_idx_mutex);
2677
2678         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2679                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2680                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2681                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2682         for (k = 0; k < adev->usec_timeout; k++) {
2683                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2684                         break;
2685                 udelay(1);
2686         }
2687 }
2688
2689 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2690                                                bool enable)
2691 {
2692         u32 tmp;
2693
2694         /* These interrupts should be enabled to drive DS clock */
2695
2696         tmp= RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2697
2698         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2699         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2700         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2701         if(adev->gfx.num_gfx_rings)
2702                 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2703
2704         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2705 }
2706
2707 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2708 {
2709         adev->gfx.rlc.funcs->get_csb_buffer(adev, adev->gfx.rlc.cs_ptr);
2710         /* csib */
2711         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2712                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2713         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2714                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2715         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2716                         adev->gfx.rlc.clear_state_size);
2717 }
2718
2719 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2720                                 int indirect_offset,
2721                                 int list_size,
2722                                 int *unique_indirect_regs,
2723                                 int unique_indirect_reg_count,
2724                                 int *indirect_start_offsets,
2725                                 int *indirect_start_offsets_count,
2726                                 int max_start_offsets_count)
2727 {
2728         int idx;
2729
2730         for (; indirect_offset < list_size; indirect_offset++) {
2731                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2732                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2733                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2734
2735                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2736                         indirect_offset += 2;
2737
2738                         /* look for the matching indice */
2739                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2740                                 if (unique_indirect_regs[idx] ==
2741                                         register_list_format[indirect_offset] ||
2742                                         !unique_indirect_regs[idx])
2743                                         break;
2744                         }
2745
2746                         BUG_ON(idx >= unique_indirect_reg_count);
2747
2748                         if (!unique_indirect_regs[idx])
2749                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2750
2751                         indirect_offset++;
2752                 }
2753         }
2754 }
2755
2756 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2757 {
2758         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2759         int unique_indirect_reg_count = 0;
2760
2761         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2762         int indirect_start_offsets_count = 0;
2763
2764         int list_size = 0;
2765         int i = 0, j = 0;
2766         u32 tmp = 0;
2767
2768         u32 *register_list_format =
2769                 kmemdup(adev->gfx.rlc.register_list_format,
2770                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2771         if (!register_list_format)
2772                 return -ENOMEM;
2773
2774         /* setup unique_indirect_regs array and indirect_start_offsets array */
2775         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2776         gfx_v9_1_parse_ind_reg_list(register_list_format,
2777                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2778                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2779                                     unique_indirect_regs,
2780                                     unique_indirect_reg_count,
2781                                     indirect_start_offsets,
2782                                     &indirect_start_offsets_count,
2783                                     ARRAY_SIZE(indirect_start_offsets));
2784
2785         /* enable auto inc in case it is disabled */
2786         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2787         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2788         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2789
2790         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2791         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2792                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2793         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2794                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2795                         adev->gfx.rlc.register_restore[i]);
2796
2797         /* load indirect register */
2798         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2799                 adev->gfx.rlc.reg_list_format_start);
2800
2801         /* direct register portion */
2802         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2803                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2804                         register_list_format[i]);
2805
2806         /* indirect register portion */
2807         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2808                 if (register_list_format[i] == 0xFFFFFFFF) {
2809                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2810                         continue;
2811                 }
2812
2813                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2814                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2815
2816                 for (j = 0; j < unique_indirect_reg_count; j++) {
2817                         if (register_list_format[i] == unique_indirect_regs[j]) {
2818                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2819                                 break;
2820                         }
2821                 }
2822
2823                 BUG_ON(j >= unique_indirect_reg_count);
2824
2825                 i++;
2826         }
2827
2828         /* set save/restore list size */
2829         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2830         list_size = list_size >> 1;
2831         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2832                 adev->gfx.rlc.reg_restore_list_size);
2833         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2834
2835         /* write the starting offsets to RLC scratch ram */
2836         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2837                 adev->gfx.rlc.starting_offsets_start);
2838         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2839                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2840                        indirect_start_offsets[i]);
2841
2842         /* load unique indirect regs*/
2843         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2844                 if (unique_indirect_regs[i] != 0) {
2845                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2846                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2847                                unique_indirect_regs[i] & 0x3FFFF);
2848
2849                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2850                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2851                                unique_indirect_regs[i] >> 20);
2852                 }
2853         }
2854
2855         kfree(register_list_format);
2856         return 0;
2857 }
2858
2859 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2860 {
2861         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2862 }
2863
2864 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2865                                              bool enable)
2866 {
2867         uint32_t data = 0;
2868         uint32_t default_data = 0;
2869
2870         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2871         if (enable) {
2872                 /* enable GFXIP control over CGPG */
2873                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2874                 if(default_data != data)
2875                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2876
2877                 /* update status */
2878                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2879                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2880                 if(default_data != data)
2881                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2882         } else {
2883                 /* restore GFXIP control over GCPG */
2884                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2885                 if(default_data != data)
2886                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2887         }
2888 }
2889
2890 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2891 {
2892         uint32_t data = 0;
2893
2894         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2895                               AMD_PG_SUPPORT_GFX_SMG |
2896                               AMD_PG_SUPPORT_GFX_DMG)) {
2897                 /* init IDLE_POLL_COUNT = 60 */
2898                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2899                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2900                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2901                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2902
2903                 /* init RLC PG Delay */
2904                 data = 0;
2905                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2906                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2907                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2908                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2909                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2910
2911                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2912                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2913                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2914                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2915
2916                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2917                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2918                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2919                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2920
2921                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2922                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2923
2924                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2925                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2926                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2927                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 3, 0))
2928                         pwr_10_0_gfxip_control_over_cgpg(adev, true);
2929         }
2930 }
2931
2932 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2933                                                 bool enable)
2934 {
2935         uint32_t data = 0;
2936         uint32_t default_data = 0;
2937
2938         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2939         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2940                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2941                              enable ? 1 : 0);
2942         if (default_data != data)
2943                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2944 }
2945
2946 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2947                                                 bool enable)
2948 {
2949         uint32_t data = 0;
2950         uint32_t default_data = 0;
2951
2952         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2953         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2954                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2955                              enable ? 1 : 0);
2956         if(default_data != data)
2957                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2958 }
2959
2960 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2961                                         bool enable)
2962 {
2963         uint32_t data = 0;
2964         uint32_t default_data = 0;
2965
2966         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2967         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2968                              CP_PG_DISABLE,
2969                              enable ? 0 : 1);
2970         if(default_data != data)
2971                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2972 }
2973
2974 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2975                                                 bool enable)
2976 {
2977         uint32_t data, default_data;
2978
2979         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2980         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2981                              GFX_POWER_GATING_ENABLE,
2982                              enable ? 1 : 0);
2983         if(default_data != data)
2984                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2985 }
2986
2987 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2988                                                 bool enable)
2989 {
2990         uint32_t data, default_data;
2991
2992         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2993         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2994                              GFX_PIPELINE_PG_ENABLE,
2995                              enable ? 1 : 0);
2996         if(default_data != data)
2997                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2998
2999         if (!enable)
3000                 /* read any GFX register to wake up GFX */
3001                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
3002 }
3003
3004 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
3005                                                        bool enable)
3006 {
3007         uint32_t data, default_data;
3008
3009         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3010         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3011                              STATIC_PER_CU_PG_ENABLE,
3012                              enable ? 1 : 0);
3013         if(default_data != data)
3014                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3015 }
3016
3017 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
3018                                                 bool enable)
3019 {
3020         uint32_t data, default_data;
3021
3022         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
3023         data = REG_SET_FIELD(data, RLC_PG_CNTL,
3024                              DYN_PER_CU_PG_ENABLE,
3025                              enable ? 1 : 0);
3026         if(default_data != data)
3027                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
3028 }
3029
3030 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
3031 {
3032         gfx_v9_0_init_csb(adev);
3033
3034         /*
3035          * Rlc save restore list is workable since v2_1.
3036          * And it's needed by gfxoff feature.
3037          */
3038         if (adev->gfx.rlc.is_rlc_v2_1) {
3039                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 2, 1) ||
3040                     (adev->apu_flags & AMD_APU_IS_RAVEN2))
3041                         gfx_v9_1_init_rlc_save_restore_list(adev);
3042                 gfx_v9_0_enable_save_restore_machine(adev);
3043         }
3044
3045         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3046                               AMD_PG_SUPPORT_GFX_SMG |
3047                               AMD_PG_SUPPORT_GFX_DMG |
3048                               AMD_PG_SUPPORT_CP |
3049                               AMD_PG_SUPPORT_GDS |
3050                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
3051                 WREG32_SOC15(GC, 0, mmRLC_JUMP_TABLE_RESTORE,
3052                              adev->gfx.rlc.cp_table_gpu_addr >> 8);
3053                 gfx_v9_0_init_gfx_power_gating(adev);
3054         }
3055 }
3056
3057 static void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
3058 {
3059         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
3060         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3061         gfx_v9_0_wait_for_rlc_serdes(adev);
3062 }
3063
3064 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
3065 {
3066         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3067         udelay(50);
3068         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3069         udelay(50);
3070 }
3071
3072 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
3073 {
3074 #ifdef AMDGPU_RLC_DEBUG_RETRY
3075         u32 rlc_ucode_ver;
3076 #endif
3077
3078         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
3079         udelay(50);
3080
3081         /* carrizo do enable cp interrupt after cp inited */
3082         if (!(adev->flags & AMD_IS_APU)) {
3083                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3084                 udelay(50);
3085         }
3086
3087 #ifdef AMDGPU_RLC_DEBUG_RETRY
3088         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
3089         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
3090         if(rlc_ucode_ver == 0x108) {
3091                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
3092                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
3093                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
3094                  * default is 0x9C4 to create a 100us interval */
3095                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
3096                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
3097                  * to disable the page fault retry interrupts, default is
3098                  * 0x100 (256) */
3099                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
3100         }
3101 #endif
3102 }
3103
3104 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
3105 {
3106         const struct rlc_firmware_header_v2_0 *hdr;
3107         const __le32 *fw_data;
3108         unsigned i, fw_size;
3109
3110         if (!adev->gfx.rlc_fw)
3111                 return -EINVAL;
3112
3113         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3114         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3115
3116         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3117                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3118         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3119
3120         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3121                         RLCG_UCODE_LOADING_START_ADDRESS);
3122         for (i = 0; i < fw_size; i++)
3123                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3124         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3125
3126         return 0;
3127 }
3128
3129 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3130 {
3131         int r;
3132
3133         if (amdgpu_sriov_vf(adev)) {
3134                 gfx_v9_0_init_csb(adev);
3135                 return 0;
3136         }
3137
3138         adev->gfx.rlc.funcs->stop(adev);
3139
3140         /* disable CG */
3141         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3142
3143         gfx_v9_0_init_pg(adev);
3144
3145         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3146                 /* legacy rlc firmware loading */
3147                 r = gfx_v9_0_rlc_load_microcode(adev);
3148                 if (r)
3149                         return r;
3150         }
3151
3152         switch (adev->ip_versions[GC_HWIP][0]) {
3153         case IP_VERSION(9, 2, 2):
3154         case IP_VERSION(9, 1, 0):
3155                 if (amdgpu_lbpw == 0)
3156                         gfx_v9_0_enable_lbpw(adev, false);
3157                 else
3158                         gfx_v9_0_enable_lbpw(adev, true);
3159                 break;
3160         case IP_VERSION(9, 4, 0):
3161                 if (amdgpu_lbpw > 0)
3162                         gfx_v9_0_enable_lbpw(adev, true);
3163                 else
3164                         gfx_v9_0_enable_lbpw(adev, false);
3165                 break;
3166         default:
3167                 break;
3168         }
3169
3170         adev->gfx.rlc.funcs->start(adev);
3171
3172         return 0;
3173 }
3174
3175 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3176 {
3177         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3178
3179         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3180         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3181         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3182         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3183         udelay(50);
3184 }
3185
3186 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3187 {
3188         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3189         const struct gfx_firmware_header_v1_0 *ce_hdr;
3190         const struct gfx_firmware_header_v1_0 *me_hdr;
3191         const __le32 *fw_data;
3192         unsigned i, fw_size;
3193
3194         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3195                 return -EINVAL;
3196
3197         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3198                 adev->gfx.pfp_fw->data;
3199         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3200                 adev->gfx.ce_fw->data;
3201         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3202                 adev->gfx.me_fw->data;
3203
3204         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3205         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3206         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3207
3208         gfx_v9_0_cp_gfx_enable(adev, false);
3209
3210         /* PFP */
3211         fw_data = (const __le32 *)
3212                 (adev->gfx.pfp_fw->data +
3213                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3214         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3215         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3216         for (i = 0; i < fw_size; i++)
3217                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3218         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3219
3220         /* CE */
3221         fw_data = (const __le32 *)
3222                 (adev->gfx.ce_fw->data +
3223                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3224         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3225         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3226         for (i = 0; i < fw_size; i++)
3227                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3228         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3229
3230         /* ME */
3231         fw_data = (const __le32 *)
3232                 (adev->gfx.me_fw->data +
3233                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3234         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3235         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3236         for (i = 0; i < fw_size; i++)
3237                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3238         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3239
3240         return 0;
3241 }
3242
3243 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3244 {
3245         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3246         const struct cs_section_def *sect = NULL;
3247         const struct cs_extent_def *ext = NULL;
3248         int r, i, tmp;
3249
3250         /* init the CP */
3251         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3252         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3253
3254         gfx_v9_0_cp_gfx_enable(adev, true);
3255
3256         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3257         if (r) {
3258                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3259                 return r;
3260         }
3261
3262         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3263         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3264
3265         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3266         amdgpu_ring_write(ring, 0x80000000);
3267         amdgpu_ring_write(ring, 0x80000000);
3268
3269         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3270                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3271                         if (sect->id == SECT_CONTEXT) {
3272                                 amdgpu_ring_write(ring,
3273                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3274                                                ext->reg_count));
3275                                 amdgpu_ring_write(ring,
3276                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3277                                 for (i = 0; i < ext->reg_count; i++)
3278                                         amdgpu_ring_write(ring, ext->extent[i]);
3279                         }
3280                 }
3281         }
3282
3283         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3284         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3285
3286         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3287         amdgpu_ring_write(ring, 0);
3288
3289         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3290         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3291         amdgpu_ring_write(ring, 0x8000);
3292         amdgpu_ring_write(ring, 0x8000);
3293
3294         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3295         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3296                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3297         amdgpu_ring_write(ring, tmp);
3298         amdgpu_ring_write(ring, 0);
3299
3300         amdgpu_ring_commit(ring);
3301
3302         return 0;
3303 }
3304
3305 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3306 {
3307         struct amdgpu_ring *ring;
3308         u32 tmp;
3309         u32 rb_bufsz;
3310         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3311
3312         /* Set the write pointer delay */
3313         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3314
3315         /* set the RB to use vmid 0 */
3316         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3317
3318         /* Set ring buffer size */
3319         ring = &adev->gfx.gfx_ring[0];
3320         rb_bufsz = order_base_2(ring->ring_size / 8);
3321         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3322         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3323 #ifdef __BIG_ENDIAN
3324         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3325 #endif
3326         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3327
3328         /* Initialize the ring buffer's write pointers */
3329         ring->wptr = 0;
3330         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3331         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3332
3333         /* set the wb address wether it's enabled or not */
3334         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3335         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3336         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3337
3338         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3339         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3340         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3341
3342         mdelay(1);
3343         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3344
3345         rb_addr = ring->gpu_addr >> 8;
3346         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3347         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3348
3349         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3350         if (ring->use_doorbell) {
3351                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3352                                     DOORBELL_OFFSET, ring->doorbell_index);
3353                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3354                                     DOORBELL_EN, 1);
3355         } else {
3356                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3357         }
3358         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3359
3360         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3361                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3362         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3363
3364         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3365                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3366
3367
3368         /* start the ring */
3369         gfx_v9_0_cp_gfx_start(adev);
3370         ring->sched.ready = true;
3371
3372         return 0;
3373 }
3374
3375 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3376 {
3377         if (enable) {
3378                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3379         } else {
3380                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3381                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3382                 adev->gfx.kiq.ring.sched.ready = false;
3383         }
3384         udelay(50);
3385 }
3386
3387 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3388 {
3389         const struct gfx_firmware_header_v1_0 *mec_hdr;
3390         const __le32 *fw_data;
3391         unsigned i;
3392         u32 tmp;
3393
3394         if (!adev->gfx.mec_fw)
3395                 return -EINVAL;
3396
3397         gfx_v9_0_cp_compute_enable(adev, false);
3398
3399         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3400         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3401
3402         fw_data = (const __le32 *)
3403                 (adev->gfx.mec_fw->data +
3404                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3405         tmp = 0;
3406         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3407         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3408         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3409
3410         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3411                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3412         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3413                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3414
3415         /* MEC1 */
3416         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3417                          mec_hdr->jt_offset);
3418         for (i = 0; i < mec_hdr->jt_size; i++)
3419                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3420                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3421
3422         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3423                         adev->gfx.mec_fw_version);
3424         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3425
3426         return 0;
3427 }
3428
3429 /* KIQ functions */
3430 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3431 {
3432         uint32_t tmp;
3433         struct amdgpu_device *adev = ring->adev;
3434
3435         /* tell RLC which is KIQ queue */
3436         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3437         tmp &= 0xffffff00;
3438         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3439         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3440         tmp |= 0x80;
3441         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3442 }
3443
3444 static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd)
3445 {
3446         struct amdgpu_device *adev = ring->adev;
3447
3448         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
3449                 if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) {
3450                         mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
3451                         mqd->cp_hqd_queue_priority =
3452                                 AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
3453                 }
3454         }
3455 }
3456
3457 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3458 {
3459         struct amdgpu_device *adev = ring->adev;
3460         struct v9_mqd *mqd = ring->mqd_ptr;
3461         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3462         uint32_t tmp;
3463
3464         mqd->header = 0xC0310800;
3465         mqd->compute_pipelinestat_enable = 0x00000001;
3466         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3467         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3468         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3469         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3470         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3471         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3472         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3473         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3474         mqd->compute_misc_reserved = 0x00000003;
3475
3476         mqd->dynamic_cu_mask_addr_lo =
3477                 lower_32_bits(ring->mqd_gpu_addr
3478                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3479         mqd->dynamic_cu_mask_addr_hi =
3480                 upper_32_bits(ring->mqd_gpu_addr
3481                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3482
3483         eop_base_addr = ring->eop_gpu_addr >> 8;
3484         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3485         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3486
3487         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3488         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3489         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3490                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3491
3492         mqd->cp_hqd_eop_control = tmp;
3493
3494         /* enable doorbell? */
3495         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3496
3497         if (ring->use_doorbell) {
3498                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3499                                     DOORBELL_OFFSET, ring->doorbell_index);
3500                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3501                                     DOORBELL_EN, 1);
3502                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3503                                     DOORBELL_SOURCE, 0);
3504                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3505                                     DOORBELL_HIT, 0);
3506         } else {
3507                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3508                                          DOORBELL_EN, 0);
3509         }
3510
3511         mqd->cp_hqd_pq_doorbell_control = tmp;
3512
3513         /* disable the queue if it's active */
3514         ring->wptr = 0;
3515         mqd->cp_hqd_dequeue_request = 0;
3516         mqd->cp_hqd_pq_rptr = 0;
3517         mqd->cp_hqd_pq_wptr_lo = 0;
3518         mqd->cp_hqd_pq_wptr_hi = 0;
3519
3520         /* set the pointer to the MQD */
3521         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3522         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3523
3524         /* set MQD vmid to 0 */
3525         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3526         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3527         mqd->cp_mqd_control = tmp;
3528
3529         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3530         hqd_gpu_addr = ring->gpu_addr >> 8;
3531         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3532         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3533
3534         /* set up the HQD, this is similar to CP_RB0_CNTL */
3535         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3536         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3537                             (order_base_2(ring->ring_size / 4) - 1));
3538         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3539                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3540 #ifdef __BIG_ENDIAN
3541         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3542 #endif
3543         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3544         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3545         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3546         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3547         mqd->cp_hqd_pq_control = tmp;
3548
3549         /* set the wb address whether it's enabled or not */
3550         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3551         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3552         mqd->cp_hqd_pq_rptr_report_addr_hi =
3553                 upper_32_bits(wb_gpu_addr) & 0xffff;
3554
3555         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3556         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3557         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3558         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3559
3560         tmp = 0;
3561         /* enable the doorbell if requested */
3562         if (ring->use_doorbell) {
3563                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3564                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3565                                 DOORBELL_OFFSET, ring->doorbell_index);
3566
3567                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3568                                          DOORBELL_EN, 1);
3569                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3570                                          DOORBELL_SOURCE, 0);
3571                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3572                                          DOORBELL_HIT, 0);
3573         }
3574
3575         mqd->cp_hqd_pq_doorbell_control = tmp;
3576
3577         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3578         ring->wptr = 0;
3579         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3580
3581         /* set the vmid for the queue */
3582         mqd->cp_hqd_vmid = 0;
3583
3584         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3585         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3586         mqd->cp_hqd_persistent_state = tmp;
3587
3588         /* set MIN_IB_AVAIL_SIZE */
3589         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3590         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3591         mqd->cp_hqd_ib_control = tmp;
3592
3593         /* set static priority for a queue/ring */
3594         gfx_v9_0_mqd_set_priority(ring, mqd);
3595         mqd->cp_hqd_quantum = RREG32_SOC15(GC, 0, mmCP_HQD_QUANTUM);
3596
3597         /* map_queues packet doesn't need activate the queue,
3598          * so only kiq need set this field.
3599          */
3600         if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
3601                 mqd->cp_hqd_active = 1;
3602
3603         return 0;
3604 }
3605
3606 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3607 {
3608         struct amdgpu_device *adev = ring->adev;
3609         struct v9_mqd *mqd = ring->mqd_ptr;
3610         int j;
3611
3612         /* disable wptr polling */
3613         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3614
3615         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3616                mqd->cp_hqd_eop_base_addr_lo);
3617         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3618                mqd->cp_hqd_eop_base_addr_hi);
3619
3620         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3621         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3622                mqd->cp_hqd_eop_control);
3623
3624         /* enable doorbell? */
3625         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3626                mqd->cp_hqd_pq_doorbell_control);
3627
3628         /* disable the queue if it's active */
3629         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3630                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3631                 for (j = 0; j < adev->usec_timeout; j++) {
3632                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3633                                 break;
3634                         udelay(1);
3635                 }
3636                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3637                        mqd->cp_hqd_dequeue_request);
3638                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3639                        mqd->cp_hqd_pq_rptr);
3640                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3641                        mqd->cp_hqd_pq_wptr_lo);
3642                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3643                        mqd->cp_hqd_pq_wptr_hi);
3644         }
3645
3646         /* set the pointer to the MQD */
3647         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3648                mqd->cp_mqd_base_addr_lo);
3649         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3650                mqd->cp_mqd_base_addr_hi);
3651
3652         /* set MQD vmid to 0 */
3653         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3654                mqd->cp_mqd_control);
3655
3656         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3657         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3658                mqd->cp_hqd_pq_base_lo);
3659         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3660                mqd->cp_hqd_pq_base_hi);
3661
3662         /* set up the HQD, this is similar to CP_RB0_CNTL */
3663         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3664                mqd->cp_hqd_pq_control);
3665
3666         /* set the wb address whether it's enabled or not */
3667         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3668                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3669         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3670                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3671
3672         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3673         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3674                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3675         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3676                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3677
3678         /* enable the doorbell if requested */
3679         if (ring->use_doorbell) {
3680                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3681                                         (adev->doorbell_index.kiq * 2) << 2);
3682                 /* If GC has entered CGPG, ringing doorbell > first page
3683                  * doesn't wakeup GC. Enlarge CP_MEC_DOORBELL_RANGE_UPPER to
3684                  * workaround this issue. And this change has to align with firmware
3685                  * update.
3686                  */
3687                 if (check_if_enlarge_doorbell_range(adev))
3688                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3689                                         (adev->doorbell.size - 4));
3690                 else
3691                         WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3692                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3693         }
3694
3695         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3696                mqd->cp_hqd_pq_doorbell_control);
3697
3698         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3699         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3700                mqd->cp_hqd_pq_wptr_lo);
3701         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3702                mqd->cp_hqd_pq_wptr_hi);
3703
3704         /* set the vmid for the queue */
3705         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3706
3707         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3708                mqd->cp_hqd_persistent_state);
3709
3710         /* activate the queue */
3711         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3712                mqd->cp_hqd_active);
3713
3714         if (ring->use_doorbell)
3715                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3716
3717         return 0;
3718 }
3719
3720 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3721 {
3722         struct amdgpu_device *adev = ring->adev;
3723         int j;
3724
3725         /* disable the queue if it's active */
3726         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3727
3728                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3729
3730                 for (j = 0; j < adev->usec_timeout; j++) {
3731                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3732                                 break;
3733                         udelay(1);
3734                 }
3735
3736                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3737                         DRM_DEBUG("KIQ dequeue request failed.\n");
3738
3739                         /* Manual disable if dequeue request times out */
3740                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3741                 }
3742
3743                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3744                       0);
3745         }
3746
3747         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3748         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3749         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3750         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3751         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3752         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3753         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3754         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3755
3756         return 0;
3757 }
3758
3759 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3760 {
3761         struct amdgpu_device *adev = ring->adev;
3762         struct v9_mqd *mqd = ring->mqd_ptr;
3763         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3764         struct v9_mqd *tmp_mqd;
3765
3766         gfx_v9_0_kiq_setting(ring);
3767
3768         /* GPU could be in bad state during probe, driver trigger the reset
3769          * after load the SMU, in this case , the mqd is not be initialized.
3770          * driver need to re-init the mqd.
3771          * check mqd->cp_hqd_pq_control since this value should not be 0
3772          */
3773         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3774         if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){
3775                 /* for GPU_RESET case , reset MQD to a clean status */
3776                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3777                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3778
3779                 /* reset ring buffer */
3780                 ring->wptr = 0;
3781                 amdgpu_ring_clear_ring(ring);
3782
3783                 mutex_lock(&adev->srbm_mutex);
3784                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3785                 gfx_v9_0_kiq_init_register(ring);
3786                 soc15_grbm_select(adev, 0, 0, 0, 0);
3787                 mutex_unlock(&adev->srbm_mutex);
3788         } else {
3789                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3790                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3791                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3792                 mutex_lock(&adev->srbm_mutex);
3793                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3794                 gfx_v9_0_mqd_init(ring);
3795                 gfx_v9_0_kiq_init_register(ring);
3796                 soc15_grbm_select(adev, 0, 0, 0, 0);
3797                 mutex_unlock(&adev->srbm_mutex);
3798
3799                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3800                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3801         }
3802
3803         return 0;
3804 }
3805
3806 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3807 {
3808         struct amdgpu_device *adev = ring->adev;
3809         struct v9_mqd *mqd = ring->mqd_ptr;
3810         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3811         struct v9_mqd *tmp_mqd;
3812
3813         /* Same as above kiq init, driver need to re-init the mqd if mqd->cp_hqd_pq_control
3814          * is not be initialized before
3815          */
3816         tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx];
3817
3818         if (!tmp_mqd->cp_hqd_pq_control ||
3819             (!amdgpu_in_reset(adev) && !adev->in_suspend)) {
3820                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3821                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3822                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3823                 mutex_lock(&adev->srbm_mutex);
3824                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3825                 gfx_v9_0_mqd_init(ring);
3826                 soc15_grbm_select(adev, 0, 0, 0, 0);
3827                 mutex_unlock(&adev->srbm_mutex);
3828
3829                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3830                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3831         } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */
3832                 /* reset MQD to a clean status */
3833                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3834                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3835
3836                 /* reset ring buffer */
3837                 ring->wptr = 0;
3838                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], 0);
3839                 amdgpu_ring_clear_ring(ring);
3840         } else {
3841                 amdgpu_ring_clear_ring(ring);
3842         }
3843
3844         return 0;
3845 }
3846
3847 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3848 {
3849         struct amdgpu_ring *ring;
3850         int r;
3851
3852         ring = &adev->gfx.kiq.ring;
3853
3854         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3855         if (unlikely(r != 0))
3856                 return r;
3857
3858         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3859         if (unlikely(r != 0))
3860                 return r;
3861
3862         gfx_v9_0_kiq_init_queue(ring);
3863         amdgpu_bo_kunmap(ring->mqd_obj);
3864         ring->mqd_ptr = NULL;
3865         amdgpu_bo_unreserve(ring->mqd_obj);
3866         ring->sched.ready = true;
3867         return 0;
3868 }
3869
3870 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3871 {
3872         struct amdgpu_ring *ring = NULL;
3873         int r = 0, i;
3874
3875         gfx_v9_0_cp_compute_enable(adev, true);
3876
3877         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3878                 ring = &adev->gfx.compute_ring[i];
3879
3880                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3881                 if (unlikely(r != 0))
3882                         goto done;
3883                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3884                 if (!r) {
3885                         r = gfx_v9_0_kcq_init_queue(ring);
3886                         amdgpu_bo_kunmap(ring->mqd_obj);
3887                         ring->mqd_ptr = NULL;
3888                 }
3889                 amdgpu_bo_unreserve(ring->mqd_obj);
3890                 if (r)
3891                         goto done;
3892         }
3893
3894         r = amdgpu_gfx_enable_kcq(adev);
3895 done:
3896         return r;
3897 }
3898
3899 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3900 {
3901         int r, i;
3902         struct amdgpu_ring *ring;
3903
3904         if (!(adev->flags & AMD_IS_APU))
3905                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3906
3907         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3908                 if (adev->gfx.num_gfx_rings) {
3909                         /* legacy firmware loading */
3910                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3911                         if (r)
3912                                 return r;
3913                 }
3914
3915                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3916                 if (r)
3917                         return r;
3918         }
3919
3920         r = gfx_v9_0_kiq_resume(adev);
3921         if (r)
3922                 return r;
3923
3924         if (adev->gfx.num_gfx_rings) {
3925                 r = gfx_v9_0_cp_gfx_resume(adev);
3926                 if (r)
3927                         return r;
3928         }
3929
3930         r = gfx_v9_0_kcq_resume(adev);
3931         if (r)
3932                 return r;
3933
3934         if (adev->gfx.num_gfx_rings) {
3935                 ring = &adev->gfx.gfx_ring[0];
3936                 r = amdgpu_ring_test_helper(ring);
3937                 if (r)
3938                         return r;
3939         }
3940
3941         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3942                 ring = &adev->gfx.compute_ring[i];
3943                 amdgpu_ring_test_helper(ring);
3944         }
3945
3946         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3947
3948         return 0;
3949 }
3950
3951 static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
3952 {
3953         u32 tmp;
3954
3955         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1) &&
3956             adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 2))
3957                 return;
3958
3959         tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
3960         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE64KHASH,
3961                                 adev->df.hash_status.hash_64k);
3962         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE2MHASH,
3963                                 adev->df.hash_status.hash_2m);
3964         tmp = REG_SET_FIELD(tmp, TCP_ADDR_CONFIG, ENABLE1GHASH,
3965                                 adev->df.hash_status.hash_1g);
3966         WREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG, tmp);
3967 }
3968
3969 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3970 {
3971         if (adev->gfx.num_gfx_rings)
3972                 gfx_v9_0_cp_gfx_enable(adev, enable);
3973         gfx_v9_0_cp_compute_enable(adev, enable);
3974 }
3975
3976 static int gfx_v9_0_hw_init(void *handle)
3977 {
3978         int r;
3979         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3980
3981         if (!amdgpu_sriov_vf(adev))
3982                 gfx_v9_0_init_golden_registers(adev);
3983
3984         gfx_v9_0_constants_init(adev);
3985
3986         gfx_v9_0_init_tcp_config(adev);
3987
3988         r = adev->gfx.rlc.funcs->resume(adev);
3989         if (r)
3990                 return r;
3991
3992         r = gfx_v9_0_cp_resume(adev);
3993         if (r)
3994                 return r;
3995
3996         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
3997                 gfx_v9_4_2_set_power_brake_sequence(adev);
3998
3999         return r;
4000 }
4001
4002 static int gfx_v9_0_hw_fini(void *handle)
4003 {
4004         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4005
4006         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
4007         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4008         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4009
4010         /* DF freeze and kcq disable will fail */
4011         if (!amdgpu_ras_intr_triggered())
4012                 /* disable KCQ to avoid CPC touch memory not valid anymore */
4013                 amdgpu_gfx_disable_kcq(adev);
4014
4015         if (amdgpu_sriov_vf(adev)) {
4016                 gfx_v9_0_cp_gfx_enable(adev, false);
4017                 /* must disable polling for SRIOV when hw finished, otherwise
4018                  * CPC engine may still keep fetching WB address which is already
4019                  * invalid after sw finished and trigger DMAR reading error in
4020                  * hypervisor side.
4021                  */
4022                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4023                 return 0;
4024         }
4025
4026         /* Use deinitialize sequence from CAIL when unbinding device from driver,
4027          * otherwise KIQ is hanging when binding back
4028          */
4029         if (!amdgpu_in_reset(adev) && !adev->in_suspend) {
4030                 mutex_lock(&adev->srbm_mutex);
4031                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
4032                                 adev->gfx.kiq.ring.pipe,
4033                                 adev->gfx.kiq.ring.queue, 0);
4034                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
4035                 soc15_grbm_select(adev, 0, 0, 0, 0);
4036                 mutex_unlock(&adev->srbm_mutex);
4037         }
4038
4039         gfx_v9_0_cp_enable(adev, false);
4040
4041         /* Skip stopping RLC with A+A reset or when RLC controls GFX clock */
4042         if ((adev->gmc.xgmi.connected_to_cpu && amdgpu_in_reset(adev)) ||
4043             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2))) {
4044                 dev_dbg(adev->dev, "Skipping RLC halt\n");
4045                 return 0;
4046         }
4047
4048         adev->gfx.rlc.funcs->stop(adev);
4049         return 0;
4050 }
4051
4052 static int gfx_v9_0_suspend(void *handle)
4053 {
4054         return gfx_v9_0_hw_fini(handle);
4055 }
4056
4057 static int gfx_v9_0_resume(void *handle)
4058 {
4059         return gfx_v9_0_hw_init(handle);
4060 }
4061
4062 static bool gfx_v9_0_is_idle(void *handle)
4063 {
4064         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4065
4066         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
4067                                 GRBM_STATUS, GUI_ACTIVE))
4068                 return false;
4069         else
4070                 return true;
4071 }
4072
4073 static int gfx_v9_0_wait_for_idle(void *handle)
4074 {
4075         unsigned i;
4076         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4077
4078         for (i = 0; i < adev->usec_timeout; i++) {
4079                 if (gfx_v9_0_is_idle(handle))
4080                         return 0;
4081                 udelay(1);
4082         }
4083         return -ETIMEDOUT;
4084 }
4085
4086 static int gfx_v9_0_soft_reset(void *handle)
4087 {
4088         u32 grbm_soft_reset = 0;
4089         u32 tmp;
4090         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4091
4092         /* GRBM_STATUS */
4093         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4094         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4095                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4096                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4097                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4098                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4099                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4100                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4101                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4102                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4104         }
4105
4106         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4107                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4108                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4109         }
4110
4111         /* GRBM_STATUS2 */
4112         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4113         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4114                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4115                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4116
4117
4118         if (grbm_soft_reset) {
4119                 /* stop the rlc */
4120                 adev->gfx.rlc.funcs->stop(adev);
4121
4122                 if (adev->gfx.num_gfx_rings)
4123                         /* Disable GFX parsing/prefetching */
4124                         gfx_v9_0_cp_gfx_enable(adev, false);
4125
4126                 /* Disable MEC parsing/prefetching */
4127                 gfx_v9_0_cp_compute_enable(adev, false);
4128
4129                 if (grbm_soft_reset) {
4130                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4131                         tmp |= grbm_soft_reset;
4132                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4133                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4134                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4135
4136                         udelay(50);
4137
4138                         tmp &= ~grbm_soft_reset;
4139                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4140                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4141                 }
4142
4143                 /* Wait a little for things to settle down */
4144                 udelay(50);
4145         }
4146         return 0;
4147 }
4148
4149 static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev)
4150 {
4151         signed long r, cnt = 0;
4152         unsigned long flags;
4153         uint32_t seq, reg_val_offs = 0;
4154         uint64_t value = 0;
4155         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4156         struct amdgpu_ring *ring = &kiq->ring;
4157
4158         BUG_ON(!ring->funcs->emit_rreg);
4159
4160         spin_lock_irqsave(&kiq->ring_lock, flags);
4161         if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
4162                 pr_err("critical bug! too many kiq readers\n");
4163                 goto failed_unlock;
4164         }
4165         amdgpu_ring_alloc(ring, 32);
4166         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
4167         amdgpu_ring_write(ring, 9 |     /* src: register*/
4168                                 (5 << 8) |      /* dst: memory */
4169                                 (1 << 16) |     /* count sel */
4170                                 (1 << 20));     /* write confirm */
4171         amdgpu_ring_write(ring, 0);
4172         amdgpu_ring_write(ring, 0);
4173         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
4174                                 reg_val_offs * 4));
4175         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
4176                                 reg_val_offs * 4));
4177         r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
4178         if (r)
4179                 goto failed_undo;
4180
4181         amdgpu_ring_commit(ring);
4182         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4183
4184         r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4185
4186         /* don't wait anymore for gpu reset case because this way may
4187          * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
4188          * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
4189          * never return if we keep waiting in virt_kiq_rreg, which cause
4190          * gpu_recover() hang there.
4191          *
4192          * also don't wait anymore for IRQ context
4193          * */
4194         if (r < 1 && (amdgpu_in_reset(adev)))
4195                 goto failed_kiq_read;
4196
4197         might_sleep();
4198         while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
4199                 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
4200                 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
4201         }
4202
4203         if (cnt > MAX_KIQ_REG_TRY)
4204                 goto failed_kiq_read;
4205
4206         mb();
4207         value = (uint64_t)adev->wb.wb[reg_val_offs] |
4208                 (uint64_t)adev->wb.wb[reg_val_offs + 1 ] << 32ULL;
4209         amdgpu_device_wb_free(adev, reg_val_offs);
4210         return value;
4211
4212 failed_undo:
4213         amdgpu_ring_undo(ring);
4214 failed_unlock:
4215         spin_unlock_irqrestore(&kiq->ring_lock, flags);
4216 failed_kiq_read:
4217         if (reg_val_offs)
4218                 amdgpu_device_wb_free(adev, reg_val_offs);
4219         pr_err("failed to read gpu clock\n");
4220         return ~0;
4221 }
4222
4223 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4224 {
4225         uint64_t clock, clock_lo, clock_hi, hi_check;
4226
4227         switch (adev->ip_versions[GC_HWIP][0]) {
4228         case IP_VERSION(9, 3, 0):
4229                 preempt_disable();
4230                 clock_hi = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4231                 clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4232                 hi_check = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_UPPER_Renoir);
4233                 /* The SMUIO TSC clock frequency is 100MHz, which sets 32-bit carry over
4234                  * roughly every 42 seconds.
4235                  */
4236                 if (hi_check != clock_hi) {
4237                         clock_lo = RREG32_SOC15_NO_KIQ(SMUIO, 0, mmGOLDEN_TSC_COUNT_LOWER_Renoir);
4238                         clock_hi = hi_check;
4239                 }
4240                 preempt_enable();
4241                 clock = clock_lo | (clock_hi << 32ULL);
4242                 break;
4243         default:
4244                 amdgpu_gfx_off_ctrl(adev, false);
4245                 mutex_lock(&adev->gfx.gpu_clock_mutex);
4246                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 0, 1) && amdgpu_sriov_runtime(adev)) {
4247                         clock = gfx_v9_0_kiq_read_clock(adev);
4248                 } else {
4249                         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4250                         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4251                                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4252                 }
4253                 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4254                 amdgpu_gfx_off_ctrl(adev, true);
4255                 break;
4256         }
4257         return clock;
4258 }
4259
4260 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4261                                           uint32_t vmid,
4262                                           uint32_t gds_base, uint32_t gds_size,
4263                                           uint32_t gws_base, uint32_t gws_size,
4264                                           uint32_t oa_base, uint32_t oa_size)
4265 {
4266         struct amdgpu_device *adev = ring->adev;
4267
4268         /* GDS Base */
4269         gfx_v9_0_write_data_to_reg(ring, 0, false,
4270                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4271                                    gds_base);
4272
4273         /* GDS Size */
4274         gfx_v9_0_write_data_to_reg(ring, 0, false,
4275                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4276                                    gds_size);
4277
4278         /* GWS */
4279         gfx_v9_0_write_data_to_reg(ring, 0, false,
4280                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4281                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4282
4283         /* OA */
4284         gfx_v9_0_write_data_to_reg(ring, 0, false,
4285                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4286                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4287 }
4288
4289 static const u32 vgpr_init_compute_shader[] =
4290 {
4291         0xb07c0000, 0xbe8000ff,
4292         0x000000f8, 0xbf110800,
4293         0x7e000280, 0x7e020280,
4294         0x7e040280, 0x7e060280,
4295         0x7e080280, 0x7e0a0280,
4296         0x7e0c0280, 0x7e0e0280,
4297         0x80808800, 0xbe803200,
4298         0xbf84fff5, 0xbf9c0000,
4299         0xd28c0001, 0x0001007f,
4300         0xd28d0001, 0x0002027e,
4301         0x10020288, 0xb8810904,
4302         0xb7814000, 0xd1196a01,
4303         0x00000301, 0xbe800087,
4304         0xbefc00c1, 0xd89c4000,
4305         0x00020201, 0xd89cc080,
4306         0x00040401, 0x320202ff,
4307         0x00000800, 0x80808100,
4308         0xbf84fff8, 0x7e020280,
4309         0xbf810000, 0x00000000,
4310 };
4311
4312 static const u32 sgpr_init_compute_shader[] =
4313 {
4314         0xb07c0000, 0xbe8000ff,
4315         0x0000005f, 0xbee50080,
4316         0xbe812c65, 0xbe822c65,
4317         0xbe832c65, 0xbe842c65,
4318         0xbe852c65, 0xb77c0005,
4319         0x80808500, 0xbf84fff8,
4320         0xbe800080, 0xbf810000,
4321 };
4322
4323 static const u32 vgpr_init_compute_shader_arcturus[] = {
4324         0xd3d94000, 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080,
4325         0xd3d94003, 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080,
4326         0xd3d94006, 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080,
4327         0xd3d94009, 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080,
4328         0xd3d9400c, 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080,
4329         0xd3d9400f, 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080,
4330         0xd3d94012, 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080,
4331         0xd3d94015, 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080,
4332         0xd3d94018, 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080,
4333         0xd3d9401b, 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080,
4334         0xd3d9401e, 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080,
4335         0xd3d94021, 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080,
4336         0xd3d94024, 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080,
4337         0xd3d94027, 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080,
4338         0xd3d9402a, 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080,
4339         0xd3d9402d, 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080,
4340         0xd3d94030, 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080,
4341         0xd3d94033, 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080,
4342         0xd3d94036, 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080,
4343         0xd3d94039, 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080,
4344         0xd3d9403c, 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080,
4345         0xd3d9403f, 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080,
4346         0xd3d94042, 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080,
4347         0xd3d94045, 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080,
4348         0xd3d94048, 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080,
4349         0xd3d9404b, 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080,
4350         0xd3d9404e, 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080,
4351         0xd3d94051, 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080,
4352         0xd3d94054, 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080,
4353         0xd3d94057, 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080,
4354         0xd3d9405a, 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080,
4355         0xd3d9405d, 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080,
4356         0xd3d94060, 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080,
4357         0xd3d94063, 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080,
4358         0xd3d94066, 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080,
4359         0xd3d94069, 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080,
4360         0xd3d9406c, 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080,
4361         0xd3d9406f, 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080,
4362         0xd3d94072, 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080,
4363         0xd3d94075, 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080,
4364         0xd3d94078, 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080,
4365         0xd3d9407b, 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080,
4366         0xd3d9407e, 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080,
4367         0xd3d94081, 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080,
4368         0xd3d94084, 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080,
4369         0xd3d94087, 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080,
4370         0xd3d9408a, 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080,
4371         0xd3d9408d, 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080,
4372         0xd3d94090, 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080,
4373         0xd3d94093, 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080,
4374         0xd3d94096, 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080,
4375         0xd3d94099, 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080,
4376         0xd3d9409c, 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080,
4377         0xd3d9409f, 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080,
4378         0xd3d940a2, 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080,
4379         0xd3d940a5, 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080,
4380         0xd3d940a8, 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080,
4381         0xd3d940ab, 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080,
4382         0xd3d940ae, 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080,
4383         0xd3d940b1, 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080,
4384         0xd3d940b4, 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080,
4385         0xd3d940b7, 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080,
4386         0xd3d940ba, 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080,
4387         0xd3d940bd, 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080,
4388         0xd3d940c0, 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080,
4389         0xd3d940c3, 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080,
4390         0xd3d940c6, 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080,
4391         0xd3d940c9, 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080,
4392         0xd3d940cc, 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080,
4393         0xd3d940cf, 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080,
4394         0xd3d940d2, 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080,
4395         0xd3d940d5, 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080,
4396         0xd3d940d8, 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080,
4397         0xd3d940db, 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080,
4398         0xd3d940de, 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080,
4399         0xd3d940e1, 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080,
4400         0xd3d940e4, 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080,
4401         0xd3d940e7, 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080,
4402         0xd3d940ea, 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080,
4403         0xd3d940ed, 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080,
4404         0xd3d940f0, 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080,
4405         0xd3d940f3, 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080,
4406         0xd3d940f6, 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080,
4407         0xd3d940f9, 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080,
4408         0xd3d940fc, 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080,
4409         0xd3d940ff, 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a,
4410         0x7e000280, 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280,
4411         0x7e0c0280, 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000,
4412         0xd28c0001, 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xb88b0904,
4413         0xb78b4000, 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000,
4414         0x00020201, 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a,
4415         0xbf84fff8, 0xbf810000,
4416 };
4417
4418 /* When below register arrays changed, please update gpr_reg_size,
4419   and sec_ded_counter_reg_size in function gfx_v9_0_do_edc_gpr_workarounds,
4420   to cover all gfx9 ASICs */
4421 static const struct soc15_reg_entry vgpr_init_regs[] = {
4422    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4423    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4424    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4425    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4426    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
4427    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4428    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4429    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4430    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4431    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4432    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4433    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4434    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4435    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4436 };
4437
4438 static const struct soc15_reg_entry vgpr_init_regs_arcturus[] = {
4439    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4440    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4441    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
4442    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4443    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0xbf },
4444    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4445    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4446    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4447    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4448    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4449    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
4450    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
4451    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
4452    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
4453 };
4454
4455 static const struct soc15_reg_entry sgpr1_init_regs[] = {
4456    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4457    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4458    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4459    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4460    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4461    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4462    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x000000ff },
4463    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x000000ff },
4464    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x000000ff },
4465    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x000000ff },
4466    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x000000ff },
4467    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x000000ff },
4468    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x000000ff },
4469    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x000000ff },
4470 };
4471
4472 static const struct soc15_reg_entry sgpr2_init_regs[] = {
4473    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
4474    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
4475    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
4476    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4477    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
4478    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4479    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x0000ff00 },
4480    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x0000ff00 },
4481    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x0000ff00 },
4482    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x0000ff00 },
4483    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE4), 0x0000ff00 },
4484    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE5), 0x0000ff00 },
4485    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE6), 0x0000ff00 },
4486    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE7), 0x0000ff00 },
4487 };
4488
4489 static const struct soc15_reg_entry gfx_v9_0_edc_counter_regs[] = {
4490    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4491    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4492    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4493    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4494    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4495    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4496    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4497    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4498    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4499    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4500    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4501    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4502    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4503    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4504    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4505    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4506    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4507    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4508    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4509    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4510    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 0, 4, 16},
4511    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4512    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4513    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4514    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4515    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4516    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4517    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4518    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4519    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4520    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4521    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4522    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4523 };
4524
4525 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4526 {
4527         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4528         int i, r;
4529
4530         /* only support when RAS is enabled */
4531         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4532                 return 0;
4533
4534         r = amdgpu_ring_alloc(ring, 7);
4535         if (r) {
4536                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4537                         ring->name, r);
4538                 return r;
4539         }
4540
4541         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4542         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4543
4544         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4545         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4546                                 PACKET3_DMA_DATA_DST_SEL(1) |
4547                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4548                                 PACKET3_DMA_DATA_ENGINE(0)));
4549         amdgpu_ring_write(ring, 0);
4550         amdgpu_ring_write(ring, 0);
4551         amdgpu_ring_write(ring, 0);
4552         amdgpu_ring_write(ring, 0);
4553         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4554                                 adev->gds.gds_size);
4555
4556         amdgpu_ring_commit(ring);
4557
4558         for (i = 0; i < adev->usec_timeout; i++) {
4559                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4560                         break;
4561                 udelay(1);
4562         }
4563
4564         if (i >= adev->usec_timeout)
4565                 r = -ETIMEDOUT;
4566
4567         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4568
4569         return r;
4570 }
4571
4572 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4573 {
4574         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4575         struct amdgpu_ib ib;
4576         struct dma_fence *f = NULL;
4577         int r, i;
4578         unsigned total_size, vgpr_offset, sgpr_offset;
4579         u64 gpu_addr;
4580
4581         int compute_dim_x = adev->gfx.config.max_shader_engines *
4582                                                 adev->gfx.config.max_cu_per_sh *
4583                                                 adev->gfx.config.max_sh_per_se;
4584         int sgpr_work_group_size = 5;
4585         int gpr_reg_size = adev->gfx.config.max_shader_engines + 6;
4586         int vgpr_init_shader_size;
4587         const u32 *vgpr_init_shader_ptr;
4588         const struct soc15_reg_entry *vgpr_init_regs_ptr;
4589
4590         /* only support when RAS is enabled */
4591         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4592                 return 0;
4593
4594         /* bail if the compute ring is not ready */
4595         if (!ring->sched.ready)
4596                 return 0;
4597
4598         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1)) {
4599                 vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
4600                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
4601                 vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
4602         } else {
4603                 vgpr_init_shader_ptr = vgpr_init_compute_shader;
4604                 vgpr_init_shader_size = sizeof(vgpr_init_compute_shader);
4605                 vgpr_init_regs_ptr = vgpr_init_regs;
4606         }
4607
4608         total_size =
4609                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* VGPRS */
4610         total_size +=
4611                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS1 */
4612         total_size +=
4613                 (gpr_reg_size * 3 + 4 + 5 + 2) * 4; /* SGPRS2 */
4614         total_size = ALIGN(total_size, 256);
4615         vgpr_offset = total_size;
4616         total_size += ALIGN(vgpr_init_shader_size, 256);
4617         sgpr_offset = total_size;
4618         total_size += sizeof(sgpr_init_compute_shader);
4619
4620         /* allocate an indirect buffer to put the commands in */
4621         memset(&ib, 0, sizeof(ib));
4622         r = amdgpu_ib_get(adev, NULL, total_size,
4623                                         AMDGPU_IB_POOL_DIRECT, &ib);
4624         if (r) {
4625                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4626                 return r;
4627         }
4628
4629         /* load the compute shaders */
4630         for (i = 0; i < vgpr_init_shader_size/sizeof(u32); i++)
4631                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_shader_ptr[i];
4632
4633         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4634                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4635
4636         /* init the ib length to 0 */
4637         ib.length_dw = 0;
4638
4639         /* VGPR */
4640         /* write the register state for the compute dispatch */
4641         for (i = 0; i < gpr_reg_size; i++) {
4642                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4643                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs_ptr[i])
4644                                                                 - PACKET3_SET_SH_REG_START;
4645                 ib.ptr[ib.length_dw++] = vgpr_init_regs_ptr[i].reg_value;
4646         }
4647         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4648         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4649         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4650         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4651                                                         - PACKET3_SET_SH_REG_START;
4652         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4653         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4654
4655         /* write dispatch packet */
4656         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4657         ib.ptr[ib.length_dw++] = compute_dim_x * 2; /* x */
4658         ib.ptr[ib.length_dw++] = 1; /* y */
4659         ib.ptr[ib.length_dw++] = 1; /* z */
4660         ib.ptr[ib.length_dw++] =
4661                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4662
4663         /* write CS partial flush packet */
4664         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4665         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4666
4667         /* SGPR1 */
4668         /* write the register state for the compute dispatch */
4669         for (i = 0; i < gpr_reg_size; i++) {
4670                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4671                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr1_init_regs[i])
4672                                                                 - PACKET3_SET_SH_REG_START;
4673                 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i].reg_value;
4674         }
4675         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4676         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4677         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4678         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4679                                                         - PACKET3_SET_SH_REG_START;
4680         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4681         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4682
4683         /* write dispatch packet */
4684         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4685         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4686         ib.ptr[ib.length_dw++] = 1; /* y */
4687         ib.ptr[ib.length_dw++] = 1; /* z */
4688         ib.ptr[ib.length_dw++] =
4689                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4690
4691         /* write CS partial flush packet */
4692         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4693         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4694
4695         /* SGPR2 */
4696         /* write the register state for the compute dispatch */
4697         for (i = 0; i < gpr_reg_size; i++) {
4698                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4699                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr2_init_regs[i])
4700                                                                 - PACKET3_SET_SH_REG_START;
4701                 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i].reg_value;
4702         }
4703         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4704         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4705         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4706         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4707                                                         - PACKET3_SET_SH_REG_START;
4708         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4709         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4710
4711         /* write dispatch packet */
4712         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4713         ib.ptr[ib.length_dw++] = compute_dim_x / 2 * sgpr_work_group_size; /* x */
4714         ib.ptr[ib.length_dw++] = 1; /* y */
4715         ib.ptr[ib.length_dw++] = 1; /* z */
4716         ib.ptr[ib.length_dw++] =
4717                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4718
4719         /* write CS partial flush packet */
4720         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4721         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4722
4723         /* shedule the ib on the ring */
4724         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4725         if (r) {
4726                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4727                 goto fail;
4728         }
4729
4730         /* wait for the GPU to finish processing the IB */
4731         r = dma_fence_wait(f, false);
4732         if (r) {
4733                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4734                 goto fail;
4735         }
4736
4737 fail:
4738         amdgpu_ib_free(adev, &ib, NULL);
4739         dma_fence_put(f);
4740
4741         return r;
4742 }
4743
4744 static int gfx_v9_0_early_init(void *handle)
4745 {
4746         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4747
4748         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
4749             adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4750                 adev->gfx.num_gfx_rings = 0;
4751         else
4752                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4753         adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
4754                                           AMDGPU_MAX_COMPUTE_RINGS);
4755         gfx_v9_0_set_kiq_pm4_funcs(adev);
4756         gfx_v9_0_set_ring_funcs(adev);
4757         gfx_v9_0_set_irq_funcs(adev);
4758         gfx_v9_0_set_gds_init(adev);
4759         gfx_v9_0_set_rlc_funcs(adev);
4760
4761         /* init rlcg reg access ctrl */
4762         gfx_v9_0_init_rlcg_reg_access_ctrl(adev);
4763
4764         return 0;
4765 }
4766
4767 static int gfx_v9_0_ecc_late_init(void *handle)
4768 {
4769         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4770         int r;
4771
4772         /*
4773          * Temp workaround to fix the issue that CP firmware fails to
4774          * update read pointer when CPDMA is writing clearing operation
4775          * to GDS in suspend/resume sequence on several cards. So just
4776          * limit this operation in cold boot sequence.
4777          */
4778         if ((!adev->in_suspend) &&
4779             (adev->gds.gds_size)) {
4780                 r = gfx_v9_0_do_edc_gds_workarounds(adev);
4781                 if (r)
4782                         return r;
4783         }
4784
4785         /* requires IBs so do in late init after IB pool is initialized */
4786         if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
4787                 r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
4788         else
4789                 r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4790
4791         if (r)
4792                 return r;
4793
4794         if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_late_init) {
4795                 r = adev->gfx.ras->ras_block.ras_late_init(adev, adev->gfx.ras_if);
4796                 if (r)
4797                         return r;
4798         }
4799
4800         if (adev->gfx.ras &&
4801             adev->gfx.ras->enable_watchdog_timer)
4802                 adev->gfx.ras->enable_watchdog_timer(adev);
4803
4804         return 0;
4805 }
4806
4807 static int gfx_v9_0_late_init(void *handle)
4808 {
4809         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4810         int r;
4811
4812         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4813         if (r)
4814                 return r;
4815
4816         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4817         if (r)
4818                 return r;
4819
4820         r = gfx_v9_0_ecc_late_init(handle);
4821         if (r)
4822                 return r;
4823
4824         return 0;
4825 }
4826
4827 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4828 {
4829         uint32_t rlc_setting;
4830
4831         /* if RLC is not enabled, do nothing */
4832         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4833         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4834                 return false;
4835
4836         return true;
4837 }
4838
4839 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4840 {
4841         uint32_t data;
4842         unsigned i;
4843
4844         data = RLC_SAFE_MODE__CMD_MASK;
4845         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4846         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4847
4848         /* wait for RLC_SAFE_MODE */
4849         for (i = 0; i < adev->usec_timeout; i++) {
4850                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4851                         break;
4852                 udelay(1);
4853         }
4854 }
4855
4856 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4857 {
4858         uint32_t data;
4859
4860         data = RLC_SAFE_MODE__CMD_MASK;
4861         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4862 }
4863
4864 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4865                                                 bool enable)
4866 {
4867         amdgpu_gfx_rlc_enter_safe_mode(adev);
4868
4869         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4870                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4871                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4872                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4873         } else {
4874                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4875                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4876                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4877         }
4878
4879         amdgpu_gfx_rlc_exit_safe_mode(adev);
4880 }
4881
4882 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4883                                                 bool enable)
4884 {
4885         /* TODO: double check if we need to perform under safe mode */
4886         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4887
4888         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4889                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4890         else
4891                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4892
4893         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4894                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4895         else
4896                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4897
4898         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4899 }
4900
4901 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4902                                                       bool enable)
4903 {
4904         uint32_t data, def;
4905
4906         amdgpu_gfx_rlc_enter_safe_mode(adev);
4907
4908         /* It is disabled by HW by default */
4909         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4910                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4911                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4912
4913                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4914                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4915
4916                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4917                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4918                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4919
4920                 /* only for Vega10 & Raven1 */
4921                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4922
4923                 if (def != data)
4924                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4925
4926                 /* MGLS is a global flag to control all MGLS in GFX */
4927                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4928                         /* 2 - RLC memory Light sleep */
4929                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4930                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4931                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4932                                 if (def != data)
4933                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4934                         }
4935                         /* 3 - CP memory Light sleep */
4936                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4937                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4938                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4939                                 if (def != data)
4940                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4941                         }
4942                 }
4943         } else {
4944                 /* 1 - MGCG_OVERRIDE */
4945                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4946
4947                 if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 2, 1))
4948                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4949
4950                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4951                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4952                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4953                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4954
4955                 if (def != data)
4956                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4957
4958                 /* 2 - disable MGLS in RLC */
4959                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4960                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4961                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4962                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4963                 }
4964
4965                 /* 3 - disable MGLS in CP */
4966                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4967                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4968                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4969                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4970                 }
4971         }
4972
4973         amdgpu_gfx_rlc_exit_safe_mode(adev);
4974 }
4975
4976 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4977                                            bool enable)
4978 {
4979         uint32_t data, def;
4980
4981         if (!adev->gfx.num_gfx_rings)
4982                 return;
4983
4984         amdgpu_gfx_rlc_enter_safe_mode(adev);
4985
4986         /* Enable 3D CGCG/CGLS */
4987         if (enable) {
4988                 /* write cmd to clear cgcg/cgls ov */
4989                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4990                 /* unset CGCG override */
4991                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4992                 /* update CGCG and CGLS override bits */
4993                 if (def != data)
4994                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4995
4996                 /* enable 3Dcgcg FSM(0x0000363f) */
4997                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4998
4999                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)
5000                         data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5001                                 RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
5002                 else
5003                         data = 0x0 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT;
5004
5005                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
5006                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5007                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
5008                 if (def != data)
5009                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5010
5011                 /* set IDLE_POLL_COUNT(0x00900100) */
5012                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5013                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5014                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5015                 if (def != data)
5016                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5017         } else {
5018                 /* Disable CGCG/CGLS */
5019                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
5020                 /* disable cgcg, cgls should be disabled */
5021                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
5022                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
5023                 /* disable cgcg and cgls in FSM */
5024                 if (def != data)
5025                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
5026         }
5027
5028         amdgpu_gfx_rlc_exit_safe_mode(adev);
5029 }
5030
5031 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5032                                                       bool enable)
5033 {
5034         uint32_t def, data;
5035
5036         amdgpu_gfx_rlc_enter_safe_mode(adev);
5037
5038         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
5039                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
5040                 /* unset CGCG override */
5041                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
5042                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5043                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5044                 else
5045                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
5046                 /* update CGCG and CGLS override bits */
5047                 if (def != data)
5048                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
5049
5050                 /* enable cgcg FSM(0x0000363F) */
5051                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5052
5053                 if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1))
5054                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5055                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5056                 else
5057                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
5058                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5059                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
5060                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
5061                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5062                 if (def != data)
5063                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5064
5065                 /* set IDLE_POLL_COUNT(0x00900100) */
5066                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
5067                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
5068                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
5069                 if (def != data)
5070                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
5071         } else {
5072                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
5073                 /* reset CGCG/CGLS bits */
5074                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
5075                 /* disable cgcg and cgls in FSM */
5076                 if (def != data)
5077                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
5078         }
5079
5080         amdgpu_gfx_rlc_exit_safe_mode(adev);
5081 }
5082
5083 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5084                                             bool enable)
5085 {
5086         if (enable) {
5087                 /* CGCG/CGLS should be enabled after MGCG/MGLS
5088                  * ===  MGCG + MGLS ===
5089                  */
5090                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5091                 /* ===  CGCG /CGLS for GFX 3D Only === */
5092                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5093                 /* ===  CGCG + CGLS === */
5094                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5095         } else {
5096                 /* CGCG/CGLS should be disabled before MGCG/MGLS
5097                  * ===  CGCG + CGLS ===
5098                  */
5099                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
5100                 /* ===  CGCG /CGLS for GFX 3D Only === */
5101                 gfx_v9_0_update_3d_clock_gating(adev, enable);
5102                 /* ===  MGCG + MGLS === */
5103                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
5104         }
5105         return 0;
5106 }
5107
5108 static void gfx_v9_0_update_spm_vmid(struct amdgpu_device *adev, unsigned vmid)
5109 {
5110         u32 reg, data;
5111
5112         amdgpu_gfx_off_ctrl(adev, false);
5113
5114         reg = SOC15_REG_OFFSET(GC, 0, mmRLC_SPM_MC_CNTL);
5115         if (amdgpu_sriov_is_pp_one_vf(adev))
5116                 data = RREG32_NO_KIQ(reg);
5117         else
5118                 data = RREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL);
5119
5120         data &= ~RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK;
5121         data |= (vmid & RLC_SPM_MC_CNTL__RLC_SPM_VMID_MASK) << RLC_SPM_MC_CNTL__RLC_SPM_VMID__SHIFT;
5122
5123         if (amdgpu_sriov_is_pp_one_vf(adev))
5124                 WREG32_SOC15_NO_KIQ(GC, 0, mmRLC_SPM_MC_CNTL, data);
5125         else
5126                 WREG32_SOC15(GC, 0, mmRLC_SPM_MC_CNTL, data);
5127
5128         amdgpu_gfx_off_ctrl(adev, true);
5129 }
5130
5131 static bool gfx_v9_0_check_rlcg_range(struct amdgpu_device *adev,
5132                                         uint32_t offset,
5133                                         struct soc15_reg_rlcg *entries, int arr_size)
5134 {
5135         int i;
5136         uint32_t reg;
5137
5138         if (!entries)
5139                 return false;
5140
5141         for (i = 0; i < arr_size; i++) {
5142                 const struct soc15_reg_rlcg *entry;
5143
5144                 entry = &entries[i];
5145                 reg = adev->reg_offset[entry->hwip][entry->instance][entry->segment] + entry->reg;
5146                 if (offset == reg)
5147                         return true;
5148         }
5149
5150         return false;
5151 }
5152
5153 static bool gfx_v9_0_is_rlcg_access_range(struct amdgpu_device *adev, u32 offset)
5154 {
5155         return gfx_v9_0_check_rlcg_range(adev, offset,
5156                                         (void *)rlcg_access_gc_9_0,
5157                                         ARRAY_SIZE(rlcg_access_gc_9_0));
5158 }
5159
5160 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
5161         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
5162         .set_safe_mode = gfx_v9_0_set_safe_mode,
5163         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
5164         .init = gfx_v9_0_rlc_init,
5165         .get_csb_size = gfx_v9_0_get_csb_size,
5166         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
5167         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
5168         .resume = gfx_v9_0_rlc_resume,
5169         .stop = gfx_v9_0_rlc_stop,
5170         .reset = gfx_v9_0_rlc_reset,
5171         .start = gfx_v9_0_rlc_start,
5172         .update_spm_vmid = gfx_v9_0_update_spm_vmid,
5173         .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range,
5174 };
5175
5176 static int gfx_v9_0_set_powergating_state(void *handle,
5177                                           enum amd_powergating_state state)
5178 {
5179         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5180         bool enable = (state == AMD_PG_STATE_GATE);
5181
5182         switch (adev->ip_versions[GC_HWIP][0]) {
5183         case IP_VERSION(9, 2, 2):
5184         case IP_VERSION(9, 1, 0):
5185         case IP_VERSION(9, 3, 0):
5186                 if (!enable)
5187                         amdgpu_gfx_off_ctrl(adev, false);
5188
5189                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
5190                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
5191                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
5192                 } else {
5193                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
5194                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
5195                 }
5196
5197                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
5198                         gfx_v9_0_enable_cp_power_gating(adev, true);
5199                 else
5200                         gfx_v9_0_enable_cp_power_gating(adev, false);
5201
5202                 /* update gfx cgpg state */
5203                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
5204
5205                 /* update mgcg state */
5206                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
5207
5208                 if (enable)
5209                         amdgpu_gfx_off_ctrl(adev, true);
5210                 break;
5211         case IP_VERSION(9, 2, 1):
5212                 amdgpu_gfx_off_ctrl(adev, enable);
5213                 break;
5214         default:
5215                 break;
5216         }
5217
5218         return 0;
5219 }
5220
5221 static int gfx_v9_0_set_clockgating_state(void *handle,
5222                                           enum amd_clockgating_state state)
5223 {
5224         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5225
5226         if (amdgpu_sriov_vf(adev))
5227                 return 0;
5228
5229         switch (adev->ip_versions[GC_HWIP][0]) {
5230         case IP_VERSION(9, 0, 1):
5231         case IP_VERSION(9, 2, 1):
5232         case IP_VERSION(9, 4, 0):
5233         case IP_VERSION(9, 2, 2):
5234         case IP_VERSION(9, 1, 0):
5235         case IP_VERSION(9, 4, 1):
5236         case IP_VERSION(9, 3, 0):
5237         case IP_VERSION(9, 4, 2):
5238                 gfx_v9_0_update_gfx_clock_gating(adev,
5239                                                  state == AMD_CG_STATE_GATE);
5240                 break;
5241         default:
5242                 break;
5243         }
5244         return 0;
5245 }
5246
5247 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
5248 {
5249         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5250         int data;
5251
5252         if (amdgpu_sriov_vf(adev))
5253                 *flags = 0;
5254
5255         /* AMD_CG_SUPPORT_GFX_MGCG */
5256         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE));
5257         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
5258                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
5259
5260         /* AMD_CG_SUPPORT_GFX_CGCG */
5261         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL));
5262         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
5263                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
5264
5265         /* AMD_CG_SUPPORT_GFX_CGLS */
5266         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
5267                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
5268
5269         /* AMD_CG_SUPPORT_GFX_RLC_LS */
5270         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_MEM_SLP_CNTL));
5271         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
5272                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
5273
5274         /* AMD_CG_SUPPORT_GFX_CP_LS */
5275         data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmCP_MEM_SLP_CNTL));
5276         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
5277                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
5278
5279         if (adev->ip_versions[GC_HWIP][0] != IP_VERSION(9, 4, 1)) {
5280                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
5281                 data = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D));
5282                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
5283                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
5284
5285                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
5286                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
5287                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
5288         }
5289 }
5290
5291 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5292 {
5293         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
5294 }
5295
5296 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5297 {
5298         struct amdgpu_device *adev = ring->adev;
5299         u64 wptr;
5300
5301         /* XXX check if swapping is necessary on BE */
5302         if (ring->use_doorbell) {
5303                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
5304         } else {
5305                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
5306                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
5307         }
5308
5309         return wptr;
5310 }
5311
5312 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5313 {
5314         struct amdgpu_device *adev = ring->adev;
5315
5316         if (ring->use_doorbell) {
5317                 /* XXX check if swapping is necessary on BE */
5318                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5319                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5320         } else {
5321                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
5322                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
5323         }
5324 }
5325
5326 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5327 {
5328         struct amdgpu_device *adev = ring->adev;
5329         u32 ref_and_mask, reg_mem_engine;
5330         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
5331
5332         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
5333                 switch (ring->me) {
5334                 case 1:
5335                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
5336                         break;
5337                 case 2:
5338                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
5339                         break;
5340                 default:
5341                         return;
5342                 }
5343                 reg_mem_engine = 0;
5344         } else {
5345                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5346                 reg_mem_engine = 1; /* pfp */
5347         }
5348
5349         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5350                               adev->nbio.funcs->get_hdp_flush_req_offset(adev),
5351                               adev->nbio.funcs->get_hdp_flush_done_offset(adev),
5352                               ref_and_mask, ref_and_mask, 0x20);
5353 }
5354
5355 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5356                                         struct amdgpu_job *job,
5357                                         struct amdgpu_ib *ib,
5358                                         uint32_t flags)
5359 {
5360         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5361         u32 header, control = 0;
5362
5363         if (ib->flags & AMDGPU_IB_FLAG_CE)
5364                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5365         else
5366                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5367
5368         control |= ib->length_dw | (vmid << 24);
5369
5370         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5371                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5372
5373                 if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
5374                         gfx_v9_0_ring_emit_de_meta(ring);
5375         }
5376
5377         amdgpu_ring_write(ring, header);
5378         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5379         amdgpu_ring_write(ring,
5380 #ifdef __BIG_ENDIAN
5381                 (2 << 0) |
5382 #endif
5383                 lower_32_bits(ib->gpu_addr));
5384         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5385         amdgpu_ring_write(ring, control);
5386 }
5387
5388 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5389                                           struct amdgpu_job *job,
5390                                           struct amdgpu_ib *ib,
5391                                           uint32_t flags)
5392 {
5393         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5394         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5395
5396         /* Currently, there is a high possibility to get wave ID mismatch
5397          * between ME and GDS, leading to a hw deadlock, because ME generates
5398          * different wave IDs than the GDS expects. This situation happens
5399          * randomly when at least 5 compute pipes use GDS ordered append.
5400          * The wave IDs generated by ME are also wrong after suspend/resume.
5401          * Those are probably bugs somewhere else in the kernel driver.
5402          *
5403          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5404          * GDS to 0 for this ring (me/pipe).
5405          */
5406         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5407                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5408                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5409                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5410         }
5411
5412         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5413         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5414         amdgpu_ring_write(ring,
5415 #ifdef __BIG_ENDIAN
5416                                 (2 << 0) |
5417 #endif
5418                                 lower_32_bits(ib->gpu_addr));
5419         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5420         amdgpu_ring_write(ring, control);
5421 }
5422
5423 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5424                                      u64 seq, unsigned flags)
5425 {
5426         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5427         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5428         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5429
5430         /* RELEASE_MEM - flush caches, send int */
5431         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5432         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5433                                                EOP_TC_NC_ACTION_EN) :
5434                                               (EOP_TCL1_ACTION_EN |
5435                                                EOP_TC_ACTION_EN |
5436                                                EOP_TC_WB_ACTION_EN |
5437                                                EOP_TC_MD_ACTION_EN)) |
5438                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5439                                  EVENT_INDEX(5)));
5440         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5441
5442         /*
5443          * the address should be Qword aligned if 64bit write, Dword
5444          * aligned if only send 32bit data low (discard data high)
5445          */
5446         if (write64bit)
5447                 BUG_ON(addr & 0x7);
5448         else
5449                 BUG_ON(addr & 0x3);
5450         amdgpu_ring_write(ring, lower_32_bits(addr));
5451         amdgpu_ring_write(ring, upper_32_bits(addr));
5452         amdgpu_ring_write(ring, lower_32_bits(seq));
5453         amdgpu_ring_write(ring, upper_32_bits(seq));
5454         amdgpu_ring_write(ring, 0);
5455 }
5456
5457 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5458 {
5459         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5460         uint32_t seq = ring->fence_drv.sync_seq;
5461         uint64_t addr = ring->fence_drv.gpu_addr;
5462
5463         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5464                               lower_32_bits(addr), upper_32_bits(addr),
5465                               seq, 0xffffffff, 4);
5466 }
5467
5468 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5469                                         unsigned vmid, uint64_t pd_addr)
5470 {
5471         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5472
5473         /* compute doesn't have PFP */
5474         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5475                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5476                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5477                 amdgpu_ring_write(ring, 0x0);
5478         }
5479 }
5480
5481 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5482 {
5483         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5484 }
5485
5486 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5487 {
5488         u64 wptr;
5489
5490         /* XXX check if swapping is necessary on BE */
5491         if (ring->use_doorbell)
5492                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5493         else
5494                 BUG();
5495         return wptr;
5496 }
5497
5498 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5499 {
5500         struct amdgpu_device *adev = ring->adev;
5501
5502         /* XXX check if swapping is necessary on BE */
5503         if (ring->use_doorbell) {
5504                 atomic64_set((atomic64_t *)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5505                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5506         } else{
5507                 BUG(); /* only DOORBELL method supported on gfx9 now */
5508         }
5509 }
5510
5511 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5512                                          u64 seq, unsigned int flags)
5513 {
5514         struct amdgpu_device *adev = ring->adev;
5515
5516         /* we only allocate 32bit for each seq wb address */
5517         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5518
5519         /* write fence seq to the "addr" */
5520         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5521         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5522                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5523         amdgpu_ring_write(ring, lower_32_bits(addr));
5524         amdgpu_ring_write(ring, upper_32_bits(addr));
5525         amdgpu_ring_write(ring, lower_32_bits(seq));
5526
5527         if (flags & AMDGPU_FENCE_FLAG_INT) {
5528                 /* set register to trigger INT */
5529                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5530                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5531                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5532                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5533                 amdgpu_ring_write(ring, 0);
5534                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5535         }
5536 }
5537
5538 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5539 {
5540         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5541         amdgpu_ring_write(ring, 0);
5542 }
5543
5544 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5545 {
5546         struct v9_ce_ib_state ce_payload = {0};
5547         uint64_t csa_addr;
5548         int cnt;
5549
5550         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5551         csa_addr = amdgpu_csa_vaddr(ring->adev);
5552
5553         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5554         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5555                                  WRITE_DATA_DST_SEL(8) |
5556                                  WR_CONFIRM) |
5557                                  WRITE_DATA_CACHE_POLICY(0));
5558         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5559         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5560         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5561 }
5562
5563 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5564 {
5565         struct v9_de_ib_state de_payload = {0};
5566         uint64_t csa_addr, gds_addr;
5567         int cnt;
5568
5569         csa_addr = amdgpu_csa_vaddr(ring->adev);
5570         gds_addr = csa_addr + 4096;
5571         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5572         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5573
5574         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5575         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5576         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5577                                  WRITE_DATA_DST_SEL(8) |
5578                                  WR_CONFIRM) |
5579                                  WRITE_DATA_CACHE_POLICY(0));
5580         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5581         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5582         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5583 }
5584
5585 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
5586                                    bool secure)
5587 {
5588         uint32_t v = secure ? FRAME_TMZ : 0;
5589
5590         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5591         amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
5592 }
5593
5594 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5595 {
5596         uint32_t dw2 = 0;
5597
5598         if (amdgpu_sriov_vf(ring->adev))
5599                 gfx_v9_0_ring_emit_ce_meta(ring);
5600
5601         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5602         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5603                 /* set load_global_config & load_global_uconfig */
5604                 dw2 |= 0x8001;
5605                 /* set load_cs_sh_regs */
5606                 dw2 |= 0x01000000;
5607                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5608                 dw2 |= 0x10002;
5609
5610                 /* set load_ce_ram if preamble presented */
5611                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5612                         dw2 |= 0x10000000;
5613         } else {
5614                 /* still load_ce_ram if this is the first time preamble presented
5615                  * although there is no context switch happens.
5616                  */
5617                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5618                         dw2 |= 0x10000000;
5619         }
5620
5621         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5622         amdgpu_ring_write(ring, dw2);
5623         amdgpu_ring_write(ring, 0);
5624 }
5625
5626 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5627 {
5628         unsigned ret;
5629         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5630         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5631         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5632         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5633         ret = ring->wptr & ring->buf_mask;
5634         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5635         return ret;
5636 }
5637
5638 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5639 {
5640         unsigned cur;
5641         BUG_ON(offset > ring->buf_mask);
5642         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5643
5644         cur = (ring->wptr & ring->buf_mask) - 1;
5645         if (likely(cur > offset))
5646                 ring->ring[offset] = cur - offset;
5647         else
5648                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5649 }
5650
5651 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
5652                                     uint32_t reg_val_offs)
5653 {
5654         struct amdgpu_device *adev = ring->adev;
5655
5656         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5657         amdgpu_ring_write(ring, 0 |     /* src: register*/
5658                                 (5 << 8) |      /* dst: memory */
5659                                 (1 << 20));     /* write confirm */
5660         amdgpu_ring_write(ring, reg);
5661         amdgpu_ring_write(ring, 0);
5662         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5663                                 reg_val_offs * 4));
5664         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5665                                 reg_val_offs * 4));
5666 }
5667
5668 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5669                                     uint32_t val)
5670 {
5671         uint32_t cmd = 0;
5672
5673         switch (ring->funcs->type) {
5674         case AMDGPU_RING_TYPE_GFX:
5675                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5676                 break;
5677         case AMDGPU_RING_TYPE_KIQ:
5678                 cmd = (1 << 16); /* no inc addr */
5679                 break;
5680         default:
5681                 cmd = WR_CONFIRM;
5682                 break;
5683         }
5684         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5685         amdgpu_ring_write(ring, cmd);
5686         amdgpu_ring_write(ring, reg);
5687         amdgpu_ring_write(ring, 0);
5688         amdgpu_ring_write(ring, val);
5689 }
5690
5691 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5692                                         uint32_t val, uint32_t mask)
5693 {
5694         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5695 }
5696
5697 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5698                                                   uint32_t reg0, uint32_t reg1,
5699                                                   uint32_t ref, uint32_t mask)
5700 {
5701         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5702         struct amdgpu_device *adev = ring->adev;
5703         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5704                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5705
5706         if (fw_version_ok)
5707                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5708                                       ref, mask, 0x20);
5709         else
5710                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5711                                                            ref, mask);
5712 }
5713
5714 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5715 {
5716         struct amdgpu_device *adev = ring->adev;
5717         uint32_t value = 0;
5718
5719         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5720         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5721         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5722         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5723         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5724 }
5725
5726 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5727                                                  enum amdgpu_interrupt_state state)
5728 {
5729         switch (state) {
5730         case AMDGPU_IRQ_STATE_DISABLE:
5731         case AMDGPU_IRQ_STATE_ENABLE:
5732                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5733                                TIME_STAMP_INT_ENABLE,
5734                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5735                 break;
5736         default:
5737                 break;
5738         }
5739 }
5740
5741 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5742                                                      int me, int pipe,
5743                                                      enum amdgpu_interrupt_state state)
5744 {
5745         u32 mec_int_cntl, mec_int_cntl_reg;
5746
5747         /*
5748          * amdgpu controls only the first MEC. That's why this function only
5749          * handles the setting of interrupts for this specific MEC. All other
5750          * pipes' interrupts are set by amdkfd.
5751          */
5752
5753         if (me == 1) {
5754                 switch (pipe) {
5755                 case 0:
5756                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5757                         break;
5758                 case 1:
5759                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5760                         break;
5761                 case 2:
5762                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5763                         break;
5764                 case 3:
5765                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5766                         break;
5767                 default:
5768                         DRM_DEBUG("invalid pipe %d\n", pipe);
5769                         return;
5770                 }
5771         } else {
5772                 DRM_DEBUG("invalid me %d\n", me);
5773                 return;
5774         }
5775
5776         switch (state) {
5777         case AMDGPU_IRQ_STATE_DISABLE:
5778                 mec_int_cntl = RREG32_SOC15_IP(GC,mec_int_cntl_reg);
5779                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5780                                              TIME_STAMP_INT_ENABLE, 0);
5781                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5782                 break;
5783         case AMDGPU_IRQ_STATE_ENABLE:
5784                 mec_int_cntl = RREG32_SOC15_IP(GC, mec_int_cntl_reg);
5785                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5786                                              TIME_STAMP_INT_ENABLE, 1);
5787                 WREG32_SOC15_IP(GC, mec_int_cntl_reg, mec_int_cntl);
5788                 break;
5789         default:
5790                 break;
5791         }
5792 }
5793
5794 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5795                                              struct amdgpu_irq_src *source,
5796                                              unsigned type,
5797                                              enum amdgpu_interrupt_state state)
5798 {
5799         switch (state) {
5800         case AMDGPU_IRQ_STATE_DISABLE:
5801         case AMDGPU_IRQ_STATE_ENABLE:
5802                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5803                                PRIV_REG_INT_ENABLE,
5804                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5805                 break;
5806         default:
5807                 break;
5808         }
5809
5810         return 0;
5811 }
5812
5813 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5814                                               struct amdgpu_irq_src *source,
5815                                               unsigned type,
5816                                               enum amdgpu_interrupt_state state)
5817 {
5818         switch (state) {
5819         case AMDGPU_IRQ_STATE_DISABLE:
5820         case AMDGPU_IRQ_STATE_ENABLE:
5821                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5822                                PRIV_INSTR_INT_ENABLE,
5823                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5824                 break;
5825         default:
5826                 break;
5827         }
5828
5829         return 0;
5830 }
5831
5832 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5833         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5834                         CP_ECC_ERROR_INT_ENABLE, 1)
5835
5836 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5837         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5838                         CP_ECC_ERROR_INT_ENABLE, 0)
5839
5840 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5841                                               struct amdgpu_irq_src *source,
5842                                               unsigned type,
5843                                               enum amdgpu_interrupt_state state)
5844 {
5845         switch (state) {
5846         case AMDGPU_IRQ_STATE_DISABLE:
5847                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5848                                 CP_ECC_ERROR_INT_ENABLE, 0);
5849                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5850                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5851                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5852                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5853                 break;
5854
5855         case AMDGPU_IRQ_STATE_ENABLE:
5856                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5857                                 CP_ECC_ERROR_INT_ENABLE, 1);
5858                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5859                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5860                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5861                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5862                 break;
5863         default:
5864                 break;
5865         }
5866
5867         return 0;
5868 }
5869
5870
5871 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5872                                             struct amdgpu_irq_src *src,
5873                                             unsigned type,
5874                                             enum amdgpu_interrupt_state state)
5875 {
5876         switch (type) {
5877         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5878                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5879                 break;
5880         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5881                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5882                 break;
5883         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5884                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5885                 break;
5886         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5887                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5888                 break;
5889         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5890                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5891                 break;
5892         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5893                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5894                 break;
5895         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5896                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5897                 break;
5898         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5899                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5900                 break;
5901         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5902                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5903                 break;
5904         default:
5905                 break;
5906         }
5907         return 0;
5908 }
5909
5910 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5911                             struct amdgpu_irq_src *source,
5912                             struct amdgpu_iv_entry *entry)
5913 {
5914         int i;
5915         u8 me_id, pipe_id, queue_id;
5916         struct amdgpu_ring *ring;
5917
5918         DRM_DEBUG("IH: CP EOP\n");
5919         me_id = (entry->ring_id & 0x0c) >> 2;
5920         pipe_id = (entry->ring_id & 0x03) >> 0;
5921         queue_id = (entry->ring_id & 0x70) >> 4;
5922
5923         switch (me_id) {
5924         case 0:
5925                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5926                 break;
5927         case 1:
5928         case 2:
5929                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5930                         ring = &adev->gfx.compute_ring[i];
5931                         /* Per-queue interrupt is supported for MEC starting from VI.
5932                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5933                           */
5934                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5935                                 amdgpu_fence_process(ring);
5936                 }
5937                 break;
5938         }
5939         return 0;
5940 }
5941
5942 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5943                            struct amdgpu_iv_entry *entry)
5944 {
5945         u8 me_id, pipe_id, queue_id;
5946         struct amdgpu_ring *ring;
5947         int i;
5948
5949         me_id = (entry->ring_id & 0x0c) >> 2;
5950         pipe_id = (entry->ring_id & 0x03) >> 0;
5951         queue_id = (entry->ring_id & 0x70) >> 4;
5952
5953         switch (me_id) {
5954         case 0:
5955                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5956                 break;
5957         case 1:
5958         case 2:
5959                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5960                         ring = &adev->gfx.compute_ring[i];
5961                         if (ring->me == me_id && ring->pipe == pipe_id &&
5962                             ring->queue == queue_id)
5963                                 drm_sched_fault(&ring->sched);
5964                 }
5965                 break;
5966         }
5967 }
5968
5969 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5970                                  struct amdgpu_irq_src *source,
5971                                  struct amdgpu_iv_entry *entry)
5972 {
5973         DRM_ERROR("Illegal register access in command stream\n");
5974         gfx_v9_0_fault(adev, entry);
5975         return 0;
5976 }
5977
5978 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5979                                   struct amdgpu_irq_src *source,
5980                                   struct amdgpu_iv_entry *entry)
5981 {
5982         DRM_ERROR("Illegal instruction in command stream\n");
5983         gfx_v9_0_fault(adev, entry);
5984         return 0;
5985 }
5986
5987
5988 static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = {
5989         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT),
5990           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5991           SOC15_REG_FIELD(CPC_EDC_SCRATCH_CNT, DED_COUNT)
5992         },
5993         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT),
5994           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, SEC_COUNT),
5995           SOC15_REG_FIELD(CPC_EDC_UCODE_CNT, DED_COUNT)
5996         },
5997         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
5998           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME1),
5999           0, 0
6000         },
6001         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT),
6002           SOC15_REG_FIELD(CPF_EDC_ROQ_CNT, COUNT_ME2),
6003           0, 0
6004         },
6005         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT),
6006           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, SEC_COUNT),
6007           SOC15_REG_FIELD(CPF_EDC_TAG_CNT, DED_COUNT)
6008         },
6009         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6010           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, ROQ_COUNT),
6011           0, 0
6012         },
6013         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT),
6014           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
6015           SOC15_REG_FIELD(CPG_EDC_DMA_CNT, TAG_DED_COUNT)
6016         },
6017         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT),
6018           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, SEC_COUNT),
6019           SOC15_REG_FIELD(CPG_EDC_TAG_CNT, DED_COUNT)
6020         },
6021         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT),
6022           SOC15_REG_FIELD(DC_EDC_CSINVOC_CNT, COUNT_ME1),
6023           0, 0
6024         },
6025         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT),
6026           SOC15_REG_FIELD(DC_EDC_RESTORE_CNT, COUNT_ME1),
6027           0, 0
6028         },
6029         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT),
6030           SOC15_REG_FIELD(DC_EDC_STATE_CNT, COUNT_ME1),
6031           0, 0
6032         },
6033         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6034           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_SEC),
6035           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_MEM_DED)
6036         },
6037         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT),
6038           SOC15_REG_FIELD(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED),
6039           0, 0
6040         },
6041         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6042           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
6043           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED)
6044         },
6045         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
6046           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6047           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
6048           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED)
6049         },
6050         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
6051           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
6052           SOC15_REG_FIELD(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED),
6053           0, 0
6054         },
6055         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
6056           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6057           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
6058           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED)
6059         },
6060         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
6061           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6062           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
6063           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED)
6064         },
6065         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
6066           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6067           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
6068           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED)
6069         },
6070         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
6071           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT),
6072           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
6073           SOC15_REG_FIELD(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED)
6074         },
6075         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT),
6076           SOC15_REG_FIELD(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT),
6077           0, 0
6078         },
6079         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6080           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
6081           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT)
6082         },
6083         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6084           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT),
6085           0, 0
6086         },
6087         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6088           SOC15_REG_FIELD(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT),
6089           0, 0
6090         },
6091         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6092           SOC15_REG_FIELD(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT),
6093           0, 0
6094         },
6095         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT),
6096           SOC15_REG_FIELD(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT),
6097           0, 0
6098         },
6099         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6100           SOC15_REG_FIELD(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT),
6101           0, 0
6102         },
6103         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT),
6104           SOC15_REG_FIELD(TCA_EDC_CNT, REQ_FIFO_SED_COUNT),
6105           0, 0
6106         },
6107         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6108           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
6109           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DATA_DED_COUNT)
6110         },
6111         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6112           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
6113           SOC15_REG_FIELD(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT)
6114         },
6115         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6116           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
6117           SOC15_REG_FIELD(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT)
6118         },
6119         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6120           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
6121           SOC15_REG_FIELD(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT)
6122         },
6123         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6124           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
6125           SOC15_REG_FIELD(TCC_EDC_CNT, SRC_FIFO_DED_COUNT)
6126         },
6127         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6128           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT),
6129           0, 0
6130         },
6131         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6132           SOC15_REG_FIELD(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT),
6133           0, 0
6134         },
6135         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6136           SOC15_REG_FIELD(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT),
6137           0, 0
6138         },
6139         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6140           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_DATA_SED_COUNT),
6141           0, 0
6142         },
6143         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6144           SOC15_REG_FIELD(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT),
6145           0, 0
6146         },
6147         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT),
6148           SOC15_REG_FIELD(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT),
6149           0, 0
6150         },
6151         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6152           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT),
6153           0, 0
6154         },
6155         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6156           SOC15_REG_FIELD(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT),
6157           0, 0
6158         },
6159         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6160           SOC15_REG_FIELD(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT),
6161           0, 0
6162         },
6163         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6164           SOC15_REG_FIELD(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
6165           0, 0
6166         },
6167         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6168           SOC15_REG_FIELD(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT),
6169           0, 0
6170         },
6171         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6172           SOC15_REG_FIELD(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
6173           0, 0
6174         },
6175         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
6176           SOC15_REG_FIELD(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT),
6177           0, 0
6178         },
6179         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT),
6180           SOC15_REG_FIELD(TCI_EDC_CNT, WRITE_RAM_SED_COUNT),
6181           0, 0
6182         },
6183         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6184           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
6185           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT)
6186         },
6187         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6188           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
6189           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT)
6190         },
6191         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6192           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT),
6193           0, 0
6194         },
6195         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6196           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT),
6197           0, 0
6198         },
6199         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6200           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT),
6201           0, 0
6202         },
6203         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6204           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
6205           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT)
6206         },
6207         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW),
6208           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
6209           SOC15_REG_FIELD(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT)
6210         },
6211         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6212           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
6213           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT)
6214         },
6215         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6216           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
6217           SOC15_REG_FIELD(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT)
6218         },
6219         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT),
6220           SOC15_REG_FIELD(TD_EDC_CNT, CS_FIFO_SED_COUNT),
6221           0, 0
6222         },
6223         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6224           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_SEC_COUNT),
6225           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_D_DED_COUNT)
6226         },
6227         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6228           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_SEC_COUNT),
6229           SOC15_REG_FIELD(SQ_EDC_CNT, LDS_I_DED_COUNT)
6230         },
6231         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6232           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_SEC_COUNT),
6233           SOC15_REG_FIELD(SQ_EDC_CNT, SGPR_DED_COUNT)
6234         },
6235         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6236           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_SEC_COUNT),
6237           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR0_DED_COUNT)
6238         },
6239         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6240           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_SEC_COUNT),
6241           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR1_DED_COUNT)
6242         },
6243         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6244           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_SEC_COUNT),
6245           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR2_DED_COUNT)
6246         },
6247         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT),
6248           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_SEC_COUNT),
6249           SOC15_REG_FIELD(SQ_EDC_CNT, VGPR3_DED_COUNT)
6250         },
6251         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6252           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
6253           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT)
6254         },
6255         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6256           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
6257           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT)
6258         },
6259         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6260           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
6261           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT)
6262         },
6263         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6264           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
6265           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT)
6266         },
6267         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6268           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
6269           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT)
6270         },
6271         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
6272           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
6273           SOC15_REG_FIELD(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT)
6274         },
6275         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6276           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
6277           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT)
6278         },
6279         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6280           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
6281           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT)
6282         },
6283         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6284           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
6285           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT)
6286         },
6287         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6288           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
6289           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT)
6290         },
6291         { "SQC_INST_BANKA_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6292           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
6293           0, 0
6294         },
6295         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6296           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT),
6297           0, 0
6298         },
6299         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6300           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT),
6301           0, 0
6302         },
6303         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6304           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT),
6305           0, 0
6306         },
6307         { "SQC_DATA_BANKA_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6308           SOC15_REG_FIELD(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT),
6309           0, 0
6310         },
6311         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2),
6312           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
6313           SOC15_REG_FIELD(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT)
6314         },
6315         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6316           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
6317           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT)
6318         },
6319         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6320           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
6321           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT)
6322         },
6323         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6324           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
6325           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT)
6326         },
6327         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6328           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
6329           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT)
6330         },
6331         { "SQC_INST_BANKB_UTCL1_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6332           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
6333           0, 0
6334         },
6335         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6336           SOC15_REG_FIELD(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT),
6337           0, 0
6338         },
6339         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6340           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT),
6341           0, 0
6342         },
6343         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6344           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT),
6345           0, 0
6346         },
6347         { "SQC_DATA_BANKB_DIRTY_BIT_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3),
6348           SOC15_REG_FIELD(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT),
6349           0, 0
6350         },
6351         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6352           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6353           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT)
6354         },
6355         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6356           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6357           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT)
6358         },
6359         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6360           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6361           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT)
6362         },
6363         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6364           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6365           SOC15_REG_FIELD(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT)
6366         },
6367         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6368           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6369           SOC15_REG_FIELD(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT)
6370         },
6371         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6372           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT),
6373           0, 0
6374         },
6375         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6376           SOC15_REG_FIELD(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT),
6377           0, 0
6378         },
6379         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6380           SOC15_REG_FIELD(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT),
6381           0, 0
6382         },
6383         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6384           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT),
6385           0, 0
6386         },
6387         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT),
6388           SOC15_REG_FIELD(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT),
6389           0, 0
6390         },
6391         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6392           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6393           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT)
6394         },
6395         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6396           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6397           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT)
6398         },
6399         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6400           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6401           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT)
6402         },
6403         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6404           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT),
6405           0, 0
6406         },
6407         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6408           SOC15_REG_FIELD(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT),
6409           0, 0
6410         },
6411         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6412           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT),
6413           0, 0
6414         },
6415         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6416           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT),
6417           0, 0
6418         },
6419         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6420           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT),
6421           0, 0
6422         },
6423         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2),
6424           SOC15_REG_FIELD(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT),
6425           0, 0
6426         }
6427 };
6428
6429 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6430                                      void *inject_if)
6431 {
6432         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6433         int ret;
6434         struct ta_ras_trigger_error_input block_info = { 0 };
6435
6436         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6437                 return -EINVAL;
6438
6439         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6440                 return -EINVAL;
6441
6442         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6443                 return -EPERM;
6444
6445         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6446               info->head.type)) {
6447                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6448                         ras_gfx_subblocks[info->head.sub_block_index].name,
6449                         info->head.type);
6450                 return -EPERM;
6451         }
6452
6453         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6454               info->head.type)) {
6455                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6456                         ras_gfx_subblocks[info->head.sub_block_index].name,
6457                         info->head.type);
6458                 return -EPERM;
6459         }
6460
6461         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6462         block_info.sub_block_index =
6463                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6464         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6465         block_info.address = info->address;
6466         block_info.value = info->value;
6467
6468         mutex_lock(&adev->grbm_idx_mutex);
6469         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6470         mutex_unlock(&adev->grbm_idx_mutex);
6471
6472         return ret;
6473 }
6474
6475 static const char *vml2_mems[] = {
6476         "UTC_VML2_BANK_CACHE_0_BIGK_MEM0",
6477         "UTC_VML2_BANK_CACHE_0_BIGK_MEM1",
6478         "UTC_VML2_BANK_CACHE_0_4K_MEM0",
6479         "UTC_VML2_BANK_CACHE_0_4K_MEM1",
6480         "UTC_VML2_BANK_CACHE_1_BIGK_MEM0",
6481         "UTC_VML2_BANK_CACHE_1_BIGK_MEM1",
6482         "UTC_VML2_BANK_CACHE_1_4K_MEM0",
6483         "UTC_VML2_BANK_CACHE_1_4K_MEM1",
6484         "UTC_VML2_BANK_CACHE_2_BIGK_MEM0",
6485         "UTC_VML2_BANK_CACHE_2_BIGK_MEM1",
6486         "UTC_VML2_BANK_CACHE_2_4K_MEM0",
6487         "UTC_VML2_BANK_CACHE_2_4K_MEM1",
6488         "UTC_VML2_BANK_CACHE_3_BIGK_MEM0",
6489         "UTC_VML2_BANK_CACHE_3_BIGK_MEM1",
6490         "UTC_VML2_BANK_CACHE_3_4K_MEM0",
6491         "UTC_VML2_BANK_CACHE_3_4K_MEM1",
6492 };
6493
6494 static const char *vml2_walker_mems[] = {
6495         "UTC_VML2_CACHE_PDE0_MEM0",
6496         "UTC_VML2_CACHE_PDE0_MEM1",
6497         "UTC_VML2_CACHE_PDE1_MEM0",
6498         "UTC_VML2_CACHE_PDE1_MEM1",
6499         "UTC_VML2_CACHE_PDE2_MEM0",
6500         "UTC_VML2_CACHE_PDE2_MEM1",
6501         "UTC_VML2_RDIF_LOG_FIFO",
6502 };
6503
6504 static const char *atc_l2_cache_2m_mems[] = {
6505         "UTC_ATCL2_CACHE_2M_BANK0_WAY0_MEM",
6506         "UTC_ATCL2_CACHE_2M_BANK0_WAY1_MEM",
6507         "UTC_ATCL2_CACHE_2M_BANK1_WAY0_MEM",
6508         "UTC_ATCL2_CACHE_2M_BANK1_WAY1_MEM",
6509 };
6510
6511 static const char *atc_l2_cache_4k_mems[] = {
6512         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM0",
6513         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM1",
6514         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM2",
6515         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM3",
6516         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM4",
6517         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM5",
6518         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM6",
6519         "UTC_ATCL2_CACHE_4K_BANK0_WAY0_MEM7",
6520         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM0",
6521         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM1",
6522         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM2",
6523         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM3",
6524         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM4",
6525         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM5",
6526         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM6",
6527         "UTC_ATCL2_CACHE_4K_BANK0_WAY1_MEM7",
6528         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM0",
6529         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM1",
6530         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM2",
6531         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM3",
6532         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM4",
6533         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM5",
6534         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM6",
6535         "UTC_ATCL2_CACHE_4K_BANK1_WAY0_MEM7",
6536         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM0",
6537         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM1",
6538         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM2",
6539         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM3",
6540         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM4",
6541         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM5",
6542         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM6",
6543         "UTC_ATCL2_CACHE_4K_BANK1_WAY1_MEM7",
6544 };
6545
6546 static int gfx_v9_0_query_utc_edc_status(struct amdgpu_device *adev,
6547                                          struct ras_err_data *err_data)
6548 {
6549         uint32_t i, data;
6550         uint32_t sec_count, ded_count;
6551
6552         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6553         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6554         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6555         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6556         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6557         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6558         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6559         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6560
6561         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6562                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6563                 data = RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6564
6565                 sec_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, SEC_COUNT);
6566                 if (sec_count) {
6567                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6568                                 "SEC %d\n", i, vml2_mems[i], sec_count);
6569                         err_data->ce_count += sec_count;
6570                 }
6571
6572                 ded_count = REG_GET_FIELD(data, VM_L2_MEM_ECC_CNT, DED_COUNT);
6573                 if (ded_count) {
6574                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6575                                 "DED %d\n", i, vml2_mems[i], ded_count);
6576                         err_data->ue_count += ded_count;
6577                 }
6578         }
6579
6580         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6581                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6582                 data = RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6583
6584                 sec_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6585                                                 SEC_COUNT);
6586                 if (sec_count) {
6587                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6588                                 "SEC %d\n", i, vml2_walker_mems[i], sec_count);
6589                         err_data->ce_count += sec_count;
6590                 }
6591
6592                 ded_count = REG_GET_FIELD(data, VM_L2_WALKER_MEM_ECC_CNT,
6593                                                 DED_COUNT);
6594                 if (ded_count) {
6595                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6596                                 "DED %d\n", i, vml2_walker_mems[i], ded_count);
6597                         err_data->ue_count += ded_count;
6598                 }
6599         }
6600
6601         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6602                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6603                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6604
6605                 sec_count = (data & 0x00006000L) >> 0xd;
6606                 if (sec_count) {
6607                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6608                                 "SEC %d\n", i, atc_l2_cache_2m_mems[i],
6609                                 sec_count);
6610                         err_data->ce_count += sec_count;
6611                 }
6612         }
6613
6614         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6615                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6616                 data = RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6617
6618                 sec_count = (data & 0x00006000L) >> 0xd;
6619                 if (sec_count) {
6620                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6621                                 "SEC %d\n", i, atc_l2_cache_4k_mems[i],
6622                                 sec_count);
6623                         err_data->ce_count += sec_count;
6624                 }
6625
6626                 ded_count = (data & 0x00018000L) >> 0xf;
6627                 if (ded_count) {
6628                         dev_info(adev->dev, "Instance[%d]: SubBlock %s, "
6629                                 "DED %d\n", i, atc_l2_cache_4k_mems[i],
6630                                 ded_count);
6631                         err_data->ue_count += ded_count;
6632                 }
6633         }
6634
6635         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6636         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6637         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6638         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6639
6640         return 0;
6641 }
6642
6643 static int gfx_v9_0_ras_error_count(struct amdgpu_device *adev,
6644         const struct soc15_reg_entry *reg,
6645         uint32_t se_id, uint32_t inst_id, uint32_t value,
6646         uint32_t *sec_count, uint32_t *ded_count)
6647 {
6648         uint32_t i;
6649         uint32_t sec_cnt, ded_cnt;
6650
6651         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_ras_fields); i++) {
6652                 if(gfx_v9_0_ras_fields[i].reg_offset != reg->reg_offset ||
6653                         gfx_v9_0_ras_fields[i].seg != reg->seg ||
6654                         gfx_v9_0_ras_fields[i].inst != reg->inst)
6655                         continue;
6656
6657                 sec_cnt = (value &
6658                                 gfx_v9_0_ras_fields[i].sec_count_mask) >>
6659                                 gfx_v9_0_ras_fields[i].sec_count_shift;
6660                 if (sec_cnt) {
6661                         dev_info(adev->dev, "GFX SubBlock %s, "
6662                                 "Instance[%d][%d], SEC %d\n",
6663                                 gfx_v9_0_ras_fields[i].name,
6664                                 se_id, inst_id,
6665                                 sec_cnt);
6666                         *sec_count += sec_cnt;
6667                 }
6668
6669                 ded_cnt = (value &
6670                                 gfx_v9_0_ras_fields[i].ded_count_mask) >>
6671                                 gfx_v9_0_ras_fields[i].ded_count_shift;
6672                 if (ded_cnt) {
6673                         dev_info(adev->dev, "GFX SubBlock %s, "
6674                                 "Instance[%d][%d], DED %d\n",
6675                                 gfx_v9_0_ras_fields[i].name,
6676                                 se_id, inst_id,
6677                                 ded_cnt);
6678                         *ded_count += ded_cnt;
6679                 }
6680         }
6681
6682         return 0;
6683 }
6684
6685 static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
6686 {
6687         int i, j, k;
6688
6689         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6690                 return;
6691
6692         /* read back registers to clear the counters */
6693         mutex_lock(&adev->grbm_idx_mutex);
6694         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6695                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6696                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6697                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
6698                                 RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6699                         }
6700                 }
6701         }
6702         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
6703         mutex_unlock(&adev->grbm_idx_mutex);
6704
6705         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6706         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT, 0);
6707         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6708         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT, 0);
6709         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6710         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT, 0);
6711         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6712         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT, 0);
6713
6714         for (i = 0; i < ARRAY_SIZE(vml2_mems); i++) {
6715                 WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, i);
6716                 RREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_CNT);
6717         }
6718
6719         for (i = 0; i < ARRAY_SIZE(vml2_walker_mems); i++) {
6720                 WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, i);
6721                 RREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_CNT);
6722         }
6723
6724         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_2m_mems); i++) {
6725                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, i);
6726                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_CNT);
6727         }
6728
6729         for (i = 0; i < ARRAY_SIZE(atc_l2_cache_4k_mems); i++) {
6730                 WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, i);
6731                 RREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_CNT);
6732         }
6733
6734         WREG32_SOC15(GC, 0, mmVM_L2_MEM_ECC_INDEX, 255);
6735         WREG32_SOC15(GC, 0, mmVM_L2_WALKER_MEM_ECC_INDEX, 255);
6736         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_2M_EDC_INDEX, 255);
6737         WREG32_SOC15(GC, 0, mmATC_L2_CACHE_4K_EDC_INDEX, 255);
6738 }
6739
6740 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6741                                           void *ras_error_status)
6742 {
6743         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6744         uint32_t sec_count = 0, ded_count = 0;
6745         uint32_t i, j, k;
6746         uint32_t reg_value;
6747
6748         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
6749                 return;
6750
6751         err_data->ue_count = 0;
6752         err_data->ce_count = 0;
6753
6754         mutex_lock(&adev->grbm_idx_mutex);
6755
6756         for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
6757                 for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
6758                         for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
6759                                 gfx_v9_0_select_se_sh(adev, j, 0, k);
6760                                 reg_value =
6761                                         RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
6762                                 if (reg_value)
6763                                         gfx_v9_0_ras_error_count(adev,
6764                                                 &gfx_v9_0_edc_counter_regs[i],
6765                                                 j, k, reg_value,
6766                                                 &sec_count, &ded_count);
6767                         }
6768                 }
6769         }
6770
6771         err_data->ce_count += sec_count;
6772         err_data->ue_count += ded_count;
6773
6774         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6775         mutex_unlock(&adev->grbm_idx_mutex);
6776
6777         gfx_v9_0_query_utc_edc_status(adev, err_data);
6778 }
6779
6780 static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
6781 {
6782         const unsigned int cp_coher_cntl =
6783                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(1) |
6784                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_ACTION_ENA(1) |
6785                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_ACTION_ENA(1) |
6786                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TCL1_ACTION_ENA(1) |
6787                         PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_TC_WB_ACTION_ENA(1);
6788
6789         /* ACQUIRE_MEM -make one or more surfaces valid for use by the subsequent operations */
6790         amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 5));
6791         amdgpu_ring_write(ring, cp_coher_cntl); /* CP_COHER_CNTL */
6792         amdgpu_ring_write(ring, 0xffffffff);  /* CP_COHER_SIZE */
6793         amdgpu_ring_write(ring, 0xffffff);  /* CP_COHER_SIZE_HI */
6794         amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
6795         amdgpu_ring_write(ring, 0);  /* CP_COHER_BASE_HI */
6796         amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
6797 }
6798
6799 static void gfx_v9_0_emit_wave_limit_cs(struct amdgpu_ring *ring,
6800                                         uint32_t pipe, bool enable)
6801 {
6802         struct amdgpu_device *adev = ring->adev;
6803         uint32_t val;
6804         uint32_t wcl_cs_reg;
6805
6806         /* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */
6807         val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS0_DEFAULT;
6808
6809         switch (pipe) {
6810         case 0:
6811                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS0);
6812                 break;
6813         case 1:
6814                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS1);
6815                 break;
6816         case 2:
6817                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS2);
6818                 break;
6819         case 3:
6820                 wcl_cs_reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_CS3);
6821                 break;
6822         default:
6823                 DRM_DEBUG("invalid pipe %d\n", pipe);
6824                 return;
6825         }
6826
6827         amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val);
6828
6829 }
6830 static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
6831 {
6832         struct amdgpu_device *adev = ring->adev;
6833         uint32_t val;
6834         int i;
6835
6836
6837         /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
6838          * number of gfx waves. Setting 5 bit will make sure gfx only gets
6839          * around 25% of gpu resources.
6840          */
6841         val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
6842         amdgpu_ring_emit_wreg(ring,
6843                               SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
6844                               val);
6845
6846         /* Restrict waves for normal/low priority compute queues as well
6847          * to get best QoS for high priority compute jobs.
6848          *
6849          * amdgpu controls only 1st ME(0-3 CS pipes).
6850          */
6851         for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) {
6852                 if (i != ring->pipe)
6853                         gfx_v9_0_emit_wave_limit_cs(ring, i, enable);
6854
6855         }
6856 }
6857
6858 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6859         .name = "gfx_v9_0",
6860         .early_init = gfx_v9_0_early_init,
6861         .late_init = gfx_v9_0_late_init,
6862         .sw_init = gfx_v9_0_sw_init,
6863         .sw_fini = gfx_v9_0_sw_fini,
6864         .hw_init = gfx_v9_0_hw_init,
6865         .hw_fini = gfx_v9_0_hw_fini,
6866         .suspend = gfx_v9_0_suspend,
6867         .resume = gfx_v9_0_resume,
6868         .is_idle = gfx_v9_0_is_idle,
6869         .wait_for_idle = gfx_v9_0_wait_for_idle,
6870         .soft_reset = gfx_v9_0_soft_reset,
6871         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6872         .set_powergating_state = gfx_v9_0_set_powergating_state,
6873         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6874 };
6875
6876 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6877         .type = AMDGPU_RING_TYPE_GFX,
6878         .align_mask = 0xff,
6879         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6880         .support_64bit_ptrs = true,
6881         .vmhub = AMDGPU_GFXHUB_0,
6882         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6883         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6884         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6885         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6886                 5 +  /* COND_EXEC */
6887                 7 +  /* PIPELINE_SYNC */
6888                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6889                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6890                 2 + /* VM_FLUSH */
6891                 8 +  /* FENCE for VM_FLUSH */
6892                 20 + /* GDS switch */
6893                 4 + /* double SWITCH_BUFFER,
6894                        the first COND_EXEC jump to the place just
6895                            prior to this double SWITCH_BUFFER  */
6896                 5 + /* COND_EXEC */
6897                 7 +      /*     HDP_flush */
6898                 4 +      /*     VGT_flush */
6899                 14 + /* CE_META */
6900                 31 + /* DE_META */
6901                 3 + /* CNTX_CTRL */
6902                 5 + /* HDP_INVL */
6903                 8 + 8 + /* FENCE x2 */
6904                 2 + /* SWITCH_BUFFER */
6905                 7, /* gfx_v9_0_emit_mem_sync */
6906         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6907         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6908         .emit_fence = gfx_v9_0_ring_emit_fence,
6909         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6910         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6911         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6912         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6913         .test_ring = gfx_v9_0_ring_test_ring,
6914         .test_ib = gfx_v9_0_ring_test_ib,
6915         .insert_nop = amdgpu_ring_insert_nop,
6916         .pad_ib = amdgpu_ring_generic_pad_ib,
6917         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6918         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6919         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6920         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6921         .emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
6922         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6923         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6924         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6925         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6926         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6927 };
6928
6929 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6930         .type = AMDGPU_RING_TYPE_COMPUTE,
6931         .align_mask = 0xff,
6932         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6933         .support_64bit_ptrs = true,
6934         .vmhub = AMDGPU_GFXHUB_0,
6935         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6936         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6937         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6938         .emit_frame_size =
6939                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6940                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6941                 5 + /* hdp invalidate */
6942                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6943                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6944                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6945                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6946                 8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6947                 7 + /* gfx_v9_0_emit_mem_sync */
6948                 5 + /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
6949                 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */
6950         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6951         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6952         .emit_fence = gfx_v9_0_ring_emit_fence,
6953         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6954         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6955         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6956         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6957         .test_ring = gfx_v9_0_ring_test_ring,
6958         .test_ib = gfx_v9_0_ring_test_ib,
6959         .insert_nop = amdgpu_ring_insert_nop,
6960         .pad_ib = amdgpu_ring_generic_pad_ib,
6961         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6962         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6963         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6964         .emit_mem_sync = gfx_v9_0_emit_mem_sync,
6965         .emit_wave_limit = gfx_v9_0_emit_wave_limit,
6966 };
6967
6968 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6969         .type = AMDGPU_RING_TYPE_KIQ,
6970         .align_mask = 0xff,
6971         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6972         .support_64bit_ptrs = true,
6973         .vmhub = AMDGPU_GFXHUB_0,
6974         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6975         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6976         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6977         .emit_frame_size =
6978                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6979                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6980                 5 + /* hdp invalidate */
6981                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6982                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6983                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6984                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6985                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6986         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6987         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6988         .test_ring = gfx_v9_0_ring_test_ring,
6989         .insert_nop = amdgpu_ring_insert_nop,
6990         .pad_ib = amdgpu_ring_generic_pad_ib,
6991         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6992         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6993         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6994         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6995 };
6996
6997 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6998 {
6999         int i;
7000
7001         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
7002
7003         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
7004                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
7005
7006         for (i = 0; i < adev->gfx.num_compute_rings; i++)
7007                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
7008 }
7009
7010 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
7011         .set = gfx_v9_0_set_eop_interrupt_state,
7012         .process = gfx_v9_0_eop_irq,
7013 };
7014
7015 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
7016         .set = gfx_v9_0_set_priv_reg_fault_state,
7017         .process = gfx_v9_0_priv_reg_irq,
7018 };
7019
7020 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
7021         .set = gfx_v9_0_set_priv_inst_fault_state,
7022         .process = gfx_v9_0_priv_inst_irq,
7023 };
7024
7025 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
7026         .set = gfx_v9_0_set_cp_ecc_error_state,
7027         .process = amdgpu_gfx_cp_ecc_error_irq,
7028 };
7029
7030
7031 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
7032 {
7033         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
7034         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
7035
7036         adev->gfx.priv_reg_irq.num_types = 1;
7037         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
7038
7039         adev->gfx.priv_inst_irq.num_types = 1;
7040         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
7041
7042         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
7043         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
7044 }
7045
7046 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
7047 {
7048         switch (adev->ip_versions[GC_HWIP][0]) {
7049         case IP_VERSION(9, 0, 1):
7050         case IP_VERSION(9, 2, 1):
7051         case IP_VERSION(9, 4, 0):
7052         case IP_VERSION(9, 2, 2):
7053         case IP_VERSION(9, 1, 0):
7054         case IP_VERSION(9, 4, 1):
7055         case IP_VERSION(9, 3, 0):
7056         case IP_VERSION(9, 4, 2):
7057                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
7058                 break;
7059         default:
7060                 break;
7061         }
7062 }
7063
7064 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
7065 {
7066         /* init asci gds info */
7067         switch (adev->ip_versions[GC_HWIP][0]) {
7068         case IP_VERSION(9, 0, 1):
7069         case IP_VERSION(9, 2, 1):
7070         case IP_VERSION(9, 4, 0):
7071                 adev->gds.gds_size = 0x10000;
7072                 break;
7073         case IP_VERSION(9, 2, 2):
7074         case IP_VERSION(9, 1, 0):
7075         case IP_VERSION(9, 4, 1):
7076                 adev->gds.gds_size = 0x1000;
7077                 break;
7078         case IP_VERSION(9, 4, 2):
7079                 /* aldebaran removed all the GDS internal memory,
7080                  * only support GWS opcode in kernel, like barrier
7081                  * semaphore.etc */
7082                 adev->gds.gds_size = 0;
7083                 break;
7084         default:
7085                 adev->gds.gds_size = 0x10000;
7086                 break;
7087         }
7088
7089         switch (adev->ip_versions[GC_HWIP][0]) {
7090         case IP_VERSION(9, 0, 1):
7091         case IP_VERSION(9, 4, 0):
7092                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7093                 break;
7094         case IP_VERSION(9, 2, 1):
7095                 adev->gds.gds_compute_max_wave_id = 0x27f;
7096                 break;
7097         case IP_VERSION(9, 2, 2):
7098         case IP_VERSION(9, 1, 0):
7099                 if (adev->apu_flags & AMD_APU_IS_RAVEN2)
7100                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
7101                 else
7102                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
7103                 break;
7104         case IP_VERSION(9, 4, 1):
7105                 adev->gds.gds_compute_max_wave_id = 0xfff;
7106                 break;
7107         case IP_VERSION(9, 4, 2):
7108                 /* deprecated for Aldebaran, no usage at all */
7109                 adev->gds.gds_compute_max_wave_id = 0;
7110                 break;
7111         default:
7112                 /* this really depends on the chip */
7113                 adev->gds.gds_compute_max_wave_id = 0x7ff;
7114                 break;
7115         }
7116
7117         adev->gds.gws_size = 64;
7118         adev->gds.oa_size = 16;
7119 }
7120
7121 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
7122                                                  u32 bitmap)
7123 {
7124         u32 data;
7125
7126         if (!bitmap)
7127                 return;
7128
7129         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7130         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7131
7132         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
7133 }
7134
7135 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
7136 {
7137         u32 data, mask;
7138
7139         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
7140         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
7141
7142         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
7143         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
7144
7145         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
7146
7147         return (~data) & mask;
7148 }
7149
7150 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
7151                                  struct amdgpu_cu_info *cu_info)
7152 {
7153         int i, j, k, counter, active_cu_number = 0;
7154         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7155         unsigned disable_masks[4 * 4];
7156
7157         if (!adev || !cu_info)
7158                 return -EINVAL;
7159
7160         /*
7161          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
7162          */
7163         if (adev->gfx.config.max_shader_engines *
7164                 adev->gfx.config.max_sh_per_se > 16)
7165                 return -EINVAL;
7166
7167         amdgpu_gfx_parse_disable_cu(disable_masks,
7168                                     adev->gfx.config.max_shader_engines,
7169                                     adev->gfx.config.max_sh_per_se);
7170
7171         mutex_lock(&adev->grbm_idx_mutex);
7172         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
7173                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
7174                         mask = 1;
7175                         ao_bitmap = 0;
7176                         counter = 0;
7177                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
7178                         gfx_v9_0_set_user_cu_inactive_bitmap(
7179                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
7180                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
7181
7182                         /*
7183                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
7184                          * 4x4 size array, and it's usually suitable for Vega
7185                          * ASICs which has 4*2 SE/SH layout.
7186                          * But for Arcturus, SE/SH layout is changed to 8*1.
7187                          * To mostly reduce the impact, we make it compatible
7188                          * with current bitmap array as below:
7189                          *    SE4,SH0 --> bitmap[0][1]
7190                          *    SE5,SH0 --> bitmap[1][1]
7191                          *    SE6,SH0 --> bitmap[2][1]
7192                          *    SE7,SH0 --> bitmap[3][1]
7193                          */
7194                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
7195
7196                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
7197                                 if (bitmap & mask) {
7198                                         if (counter < adev->gfx.config.max_cu_per_sh)
7199                                                 ao_bitmap |= mask;
7200                                         counter ++;
7201                                 }
7202                                 mask <<= 1;
7203                         }
7204                         active_cu_number += counter;
7205                         if (i < 2 && j < 2)
7206                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
7207                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
7208                 }
7209         }
7210         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
7211         mutex_unlock(&adev->grbm_idx_mutex);
7212
7213         cu_info->number = active_cu_number;
7214         cu_info->ao_cu_mask = ao_cu_mask;
7215         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
7216
7217         return 0;
7218 }
7219
7220 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
7221 {
7222         .type = AMD_IP_BLOCK_TYPE_GFX,
7223         .major = 9,
7224         .minor = 0,
7225         .rev = 0,
7226         .funcs = &gfx_v9_0_ip_funcs,
7227 };