OSDN Git Service

drm/amdgpu/renoir: move gfxoff handling into gfx9 module
[tomoyo/tomoyo-test1.git] / drivers / gpu / drm / amd / amdgpu / gfx_v9_0.c
1 /*
2  * Copyright 2016 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  */
23
24 #include <linux/delay.h>
25 #include <linux/kernel.h>
26 #include <linux/firmware.h>
27 #include <linux/module.h>
28 #include <linux/pci.h>
29
30 #include "amdgpu.h"
31 #include "amdgpu_gfx.h"
32 #include "soc15.h"
33 #include "soc15d.h"
34 #include "amdgpu_atomfirmware.h"
35 #include "amdgpu_pm.h"
36
37 #include "gc/gc_9_0_offset.h"
38 #include "gc/gc_9_0_sh_mask.h"
39
40 #include "vega10_enum.h"
41 #include "hdp/hdp_4_0_offset.h"
42
43 #include "soc15_common.h"
44 #include "clearstate_gfx9.h"
45 #include "v9_structs.h"
46
47 #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h"
48
49 #include "amdgpu_ras.h"
50
51 #define GFX9_NUM_GFX_RINGS     1
52 #define GFX9_MEC_HPD_SIZE 4096
53 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
54 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
55
56 #define mmPWR_MISC_CNTL_STATUS                                  0x0183
57 #define mmPWR_MISC_CNTL_STATUS_BASE_IDX                         0
58 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN__SHIFT        0x0
59 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT          0x1
60 #define PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK          0x00000001L
61 #define PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK            0x00000006L
62
63 #define mmGCEA_PROBE_MAP                        0x070c
64 #define mmGCEA_PROBE_MAP_BASE_IDX               0
65
66 MODULE_FIRMWARE("amdgpu/vega10_ce.bin");
67 MODULE_FIRMWARE("amdgpu/vega10_pfp.bin");
68 MODULE_FIRMWARE("amdgpu/vega10_me.bin");
69 MODULE_FIRMWARE("amdgpu/vega10_mec.bin");
70 MODULE_FIRMWARE("amdgpu/vega10_mec2.bin");
71 MODULE_FIRMWARE("amdgpu/vega10_rlc.bin");
72
73 MODULE_FIRMWARE("amdgpu/vega12_ce.bin");
74 MODULE_FIRMWARE("amdgpu/vega12_pfp.bin");
75 MODULE_FIRMWARE("amdgpu/vega12_me.bin");
76 MODULE_FIRMWARE("amdgpu/vega12_mec.bin");
77 MODULE_FIRMWARE("amdgpu/vega12_mec2.bin");
78 MODULE_FIRMWARE("amdgpu/vega12_rlc.bin");
79
80 MODULE_FIRMWARE("amdgpu/vega20_ce.bin");
81 MODULE_FIRMWARE("amdgpu/vega20_pfp.bin");
82 MODULE_FIRMWARE("amdgpu/vega20_me.bin");
83 MODULE_FIRMWARE("amdgpu/vega20_mec.bin");
84 MODULE_FIRMWARE("amdgpu/vega20_mec2.bin");
85 MODULE_FIRMWARE("amdgpu/vega20_rlc.bin");
86
87 MODULE_FIRMWARE("amdgpu/raven_ce.bin");
88 MODULE_FIRMWARE("amdgpu/raven_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/raven_me.bin");
90 MODULE_FIRMWARE("amdgpu/raven_mec.bin");
91 MODULE_FIRMWARE("amdgpu/raven_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/raven_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/picasso_ce.bin");
95 MODULE_FIRMWARE("amdgpu/picasso_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/picasso_me.bin");
97 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
98 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
99 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
100 MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
101
102 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
103 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
104 MODULE_FIRMWARE("amdgpu/raven2_me.bin");
105 MODULE_FIRMWARE("amdgpu/raven2_mec.bin");
106 MODULE_FIRMWARE("amdgpu/raven2_mec2.bin");
107 MODULE_FIRMWARE("amdgpu/raven2_rlc.bin");
108 MODULE_FIRMWARE("amdgpu/raven_kicker_rlc.bin");
109
110 MODULE_FIRMWARE("amdgpu/arcturus_mec.bin");
111 MODULE_FIRMWARE("amdgpu/arcturus_mec2.bin");
112 MODULE_FIRMWARE("amdgpu/arcturus_rlc.bin");
113
114 MODULE_FIRMWARE("amdgpu/renoir_ce.bin");
115 MODULE_FIRMWARE("amdgpu/renoir_pfp.bin");
116 MODULE_FIRMWARE("amdgpu/renoir_me.bin");
117 MODULE_FIRMWARE("amdgpu/renoir_mec.bin");
118 MODULE_FIRMWARE("amdgpu/renoir_mec2.bin");
119 MODULE_FIRMWARE("amdgpu/renoir_rlc.bin");
120
121 #define mmTCP_CHAN_STEER_0_ARCT                                                         0x0b03
122 #define mmTCP_CHAN_STEER_0_ARCT_BASE_IDX                                                        0
123 #define mmTCP_CHAN_STEER_1_ARCT                                                         0x0b04
124 #define mmTCP_CHAN_STEER_1_ARCT_BASE_IDX                                                        0
125 #define mmTCP_CHAN_STEER_2_ARCT                                                         0x0b09
126 #define mmTCP_CHAN_STEER_2_ARCT_BASE_IDX                                                        0
127 #define mmTCP_CHAN_STEER_3_ARCT                                                         0x0b0a
128 #define mmTCP_CHAN_STEER_3_ARCT_BASE_IDX                                                        0
129 #define mmTCP_CHAN_STEER_4_ARCT                                                         0x0b0b
130 #define mmTCP_CHAN_STEER_4_ARCT_BASE_IDX                                                        0
131 #define mmTCP_CHAN_STEER_5_ARCT                                                         0x0b0c
132 #define mmTCP_CHAN_STEER_5_ARCT_BASE_IDX                                                        0
133
134 enum ta_ras_gfx_subblock {
135         /*CPC*/
136         TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
137         TA_RAS_BLOCK__GFX_CPC_SCRATCH = TA_RAS_BLOCK__GFX_CPC_INDEX_START,
138         TA_RAS_BLOCK__GFX_CPC_UCODE,
139         TA_RAS_BLOCK__GFX_DC_STATE_ME1,
140         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
141         TA_RAS_BLOCK__GFX_DC_RESTORE_ME1,
142         TA_RAS_BLOCK__GFX_DC_STATE_ME2,
143         TA_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
144         TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
145         TA_RAS_BLOCK__GFX_CPC_INDEX_END = TA_RAS_BLOCK__GFX_DC_RESTORE_ME2,
146         /* CPF*/
147         TA_RAS_BLOCK__GFX_CPF_INDEX_START,
148         TA_RAS_BLOCK__GFX_CPF_ROQ_ME2 = TA_RAS_BLOCK__GFX_CPF_INDEX_START,
149         TA_RAS_BLOCK__GFX_CPF_ROQ_ME1,
150         TA_RAS_BLOCK__GFX_CPF_TAG,
151         TA_RAS_BLOCK__GFX_CPF_INDEX_END = TA_RAS_BLOCK__GFX_CPF_TAG,
152         /* CPG*/
153         TA_RAS_BLOCK__GFX_CPG_INDEX_START,
154         TA_RAS_BLOCK__GFX_CPG_DMA_ROQ = TA_RAS_BLOCK__GFX_CPG_INDEX_START,
155         TA_RAS_BLOCK__GFX_CPG_DMA_TAG,
156         TA_RAS_BLOCK__GFX_CPG_TAG,
157         TA_RAS_BLOCK__GFX_CPG_INDEX_END = TA_RAS_BLOCK__GFX_CPG_TAG,
158         /* GDS*/
159         TA_RAS_BLOCK__GFX_GDS_INDEX_START,
160         TA_RAS_BLOCK__GFX_GDS_MEM = TA_RAS_BLOCK__GFX_GDS_INDEX_START,
161         TA_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
162         TA_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
163         TA_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
164         TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
165         TA_RAS_BLOCK__GFX_GDS_INDEX_END = TA_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
166         /* SPI*/
167         TA_RAS_BLOCK__GFX_SPI_SR_MEM,
168         /* SQ*/
169         TA_RAS_BLOCK__GFX_SQ_INDEX_START,
170         TA_RAS_BLOCK__GFX_SQ_SGPR = TA_RAS_BLOCK__GFX_SQ_INDEX_START,
171         TA_RAS_BLOCK__GFX_SQ_LDS_D,
172         TA_RAS_BLOCK__GFX_SQ_LDS_I,
173         TA_RAS_BLOCK__GFX_SQ_VGPR, /* VGPR = SP*/
174         TA_RAS_BLOCK__GFX_SQ_INDEX_END = TA_RAS_BLOCK__GFX_SQ_VGPR,
175         /* SQC (3 ranges)*/
176         TA_RAS_BLOCK__GFX_SQC_INDEX_START,
177         /* SQC range 0*/
178         TA_RAS_BLOCK__GFX_SQC_INDEX0_START = TA_RAS_BLOCK__GFX_SQC_INDEX_START,
179         TA_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
180                 TA_RAS_BLOCK__GFX_SQC_INDEX0_START,
181         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
182         TA_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
183         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
184         TA_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
185         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
186         TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
187         TA_RAS_BLOCK__GFX_SQC_INDEX0_END =
188                 TA_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
189         /* SQC range 1*/
190         TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
191         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
192                 TA_RAS_BLOCK__GFX_SQC_INDEX1_START,
193         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
194         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
195         TA_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
196         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
197         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
198         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
199         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
200         TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
201         TA_RAS_BLOCK__GFX_SQC_INDEX1_END =
202                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
203         /* SQC range 2*/
204         TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
205         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
206                 TA_RAS_BLOCK__GFX_SQC_INDEX2_START,
207         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
208         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
209         TA_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
210         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
211         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
212         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
213         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
214         TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
215         TA_RAS_BLOCK__GFX_SQC_INDEX2_END =
216                 TA_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
217         TA_RAS_BLOCK__GFX_SQC_INDEX_END = TA_RAS_BLOCK__GFX_SQC_INDEX2_END,
218         /* TA*/
219         TA_RAS_BLOCK__GFX_TA_INDEX_START,
220         TA_RAS_BLOCK__GFX_TA_FS_DFIFO = TA_RAS_BLOCK__GFX_TA_INDEX_START,
221         TA_RAS_BLOCK__GFX_TA_FS_AFIFO,
222         TA_RAS_BLOCK__GFX_TA_FL_LFIFO,
223         TA_RAS_BLOCK__GFX_TA_FX_LFIFO,
224         TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
225         TA_RAS_BLOCK__GFX_TA_INDEX_END = TA_RAS_BLOCK__GFX_TA_FS_CFIFO,
226         /* TCA*/
227         TA_RAS_BLOCK__GFX_TCA_INDEX_START,
228         TA_RAS_BLOCK__GFX_TCA_HOLE_FIFO = TA_RAS_BLOCK__GFX_TCA_INDEX_START,
229         TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
230         TA_RAS_BLOCK__GFX_TCA_INDEX_END = TA_RAS_BLOCK__GFX_TCA_REQ_FIFO,
231         /* TCC (5 sub-ranges)*/
232         TA_RAS_BLOCK__GFX_TCC_INDEX_START,
233         /* TCC range 0*/
234         TA_RAS_BLOCK__GFX_TCC_INDEX0_START = TA_RAS_BLOCK__GFX_TCC_INDEX_START,
235         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX0_START,
236         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
237         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
238         TA_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
239         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
240         TA_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
241         TA_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
242         TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
243         TA_RAS_BLOCK__GFX_TCC_INDEX0_END = TA_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
244         /* TCC range 1*/
245         TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
246         TA_RAS_BLOCK__GFX_TCC_IN_USE_DEC = TA_RAS_BLOCK__GFX_TCC_INDEX1_START,
247         TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
248         TA_RAS_BLOCK__GFX_TCC_INDEX1_END =
249                 TA_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
250         /* TCC range 2*/
251         TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
252         TA_RAS_BLOCK__GFX_TCC_RETURN_DATA = TA_RAS_BLOCK__GFX_TCC_INDEX2_START,
253         TA_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
254         TA_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
255         TA_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
256         TA_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
257         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO,
258         TA_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
259         TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
260         TA_RAS_BLOCK__GFX_TCC_INDEX2_END =
261                 TA_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
262         /* TCC range 3*/
263         TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
264         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = TA_RAS_BLOCK__GFX_TCC_INDEX3_START,
265         TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
266         TA_RAS_BLOCK__GFX_TCC_INDEX3_END =
267                 TA_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
268         /* TCC range 4*/
269         TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
270         TA_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
271                 TA_RAS_BLOCK__GFX_TCC_INDEX4_START,
272         TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
273         TA_RAS_BLOCK__GFX_TCC_INDEX4_END =
274                 TA_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
275         TA_RAS_BLOCK__GFX_TCC_INDEX_END = TA_RAS_BLOCK__GFX_TCC_INDEX4_END,
276         /* TCI*/
277         TA_RAS_BLOCK__GFX_TCI_WRITE_RAM,
278         /* TCP*/
279         TA_RAS_BLOCK__GFX_TCP_INDEX_START,
280         TA_RAS_BLOCK__GFX_TCP_CACHE_RAM = TA_RAS_BLOCK__GFX_TCP_INDEX_START,
281         TA_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
282         TA_RAS_BLOCK__GFX_TCP_CMD_FIFO,
283         TA_RAS_BLOCK__GFX_TCP_VM_FIFO,
284         TA_RAS_BLOCK__GFX_TCP_DB_RAM,
285         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
286         TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
287         TA_RAS_BLOCK__GFX_TCP_INDEX_END = TA_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
288         /* TD*/
289         TA_RAS_BLOCK__GFX_TD_INDEX_START,
290         TA_RAS_BLOCK__GFX_TD_SS_FIFO_LO = TA_RAS_BLOCK__GFX_TD_INDEX_START,
291         TA_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
292         TA_RAS_BLOCK__GFX_TD_CS_FIFO,
293         TA_RAS_BLOCK__GFX_TD_INDEX_END = TA_RAS_BLOCK__GFX_TD_CS_FIFO,
294         /* EA (3 sub-ranges)*/
295         TA_RAS_BLOCK__GFX_EA_INDEX_START,
296         /* EA range 0*/
297         TA_RAS_BLOCK__GFX_EA_INDEX0_START = TA_RAS_BLOCK__GFX_EA_INDEX_START,
298         TA_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = TA_RAS_BLOCK__GFX_EA_INDEX0_START,
299         TA_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
300         TA_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
301         TA_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
302         TA_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
303         TA_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
304         TA_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
305         TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
306         TA_RAS_BLOCK__GFX_EA_INDEX0_END = TA_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
307         /* EA range 1*/
308         TA_RAS_BLOCK__GFX_EA_INDEX1_START,
309         TA_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = TA_RAS_BLOCK__GFX_EA_INDEX1_START,
310         TA_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
311         TA_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
312         TA_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
313         TA_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
314         TA_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
315         TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
316         TA_RAS_BLOCK__GFX_EA_INDEX1_END = TA_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
317         /* EA range 2*/
318         TA_RAS_BLOCK__GFX_EA_INDEX2_START,
319         TA_RAS_BLOCK__GFX_EA_MAM_D0MEM = TA_RAS_BLOCK__GFX_EA_INDEX2_START,
320         TA_RAS_BLOCK__GFX_EA_MAM_D1MEM,
321         TA_RAS_BLOCK__GFX_EA_MAM_D2MEM,
322         TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
323         TA_RAS_BLOCK__GFX_EA_INDEX2_END = TA_RAS_BLOCK__GFX_EA_MAM_D3MEM,
324         TA_RAS_BLOCK__GFX_EA_INDEX_END = TA_RAS_BLOCK__GFX_EA_INDEX2_END,
325         /* UTC VM L2 bank*/
326         TA_RAS_BLOCK__UTC_VML2_BANK_CACHE,
327         /* UTC VM walker*/
328         TA_RAS_BLOCK__UTC_VML2_WALKER,
329         /* UTC ATC L2 2MB cache*/
330         TA_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
331         /* UTC ATC L2 4KB cache*/
332         TA_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
333         TA_RAS_BLOCK__GFX_MAX
334 };
335
336 struct ras_gfx_subblock {
337         unsigned char *name;
338         int ta_subblock;
339         int hw_supported_error_type;
340         int sw_supported_error_type;
341 };
342
343 #define AMDGPU_RAS_SUB_BLOCK(subblock, a, b, c, d, e, f, g, h)                             \
344         [AMDGPU_RAS_BLOCK__##subblock] = {                                     \
345                 #subblock,                                                     \
346                 TA_RAS_BLOCK__##subblock,                                      \
347                 ((a) | ((b) << 1) | ((c) << 2) | ((d) << 3)),                  \
348                 (((e) << 1) | ((f) << 3) | (g) | ((h) << 2)),                  \
349         }
350
351 static const struct ras_gfx_subblock ras_gfx_subblocks[] = {
352         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_SCRATCH, 0, 1, 1, 1, 1, 0, 0, 1),
353         AMDGPU_RAS_SUB_BLOCK(GFX_CPC_UCODE, 0, 1, 1, 1, 1, 0, 0, 1),
354         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
355         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
356         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME1, 1, 0, 0, 1, 0, 0, 0, 0),
357         AMDGPU_RAS_SUB_BLOCK(GFX_DC_STATE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
358         AMDGPU_RAS_SUB_BLOCK(GFX_DC_CSINVOC_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
359         AMDGPU_RAS_SUB_BLOCK(GFX_DC_RESTORE_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
360         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME2, 1, 0, 0, 1, 0, 0, 0, 0),
361         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_ROQ_ME1, 1, 0, 0, 1, 0, 0, 1, 0),
362         AMDGPU_RAS_SUB_BLOCK(GFX_CPF_TAG, 0, 1, 1, 1, 1, 0, 0, 1),
363         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_ROQ, 1, 0, 0, 1, 0, 0, 1, 0),
364         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_DMA_TAG, 0, 1, 1, 1, 0, 1, 0, 1),
365         AMDGPU_RAS_SUB_BLOCK(GFX_CPG_TAG, 0, 1, 1, 1, 1, 1, 0, 1),
366         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
367         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_INPUT_QUEUE, 1, 0, 0, 1, 0, 0, 0, 0),
368         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_CMD_RAM_MEM, 0, 1, 1, 1, 0, 0, 0,
369                              0),
370         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PHY_DATA_RAM_MEM, 1, 0, 0, 1, 0, 0, 0,
371                              0),
372         AMDGPU_RAS_SUB_BLOCK(GFX_GDS_OA_PIPE_MEM, 0, 1, 1, 1, 0, 0, 0, 0),
373         AMDGPU_RAS_SUB_BLOCK(GFX_SPI_SR_MEM, 1, 0, 0, 1, 0, 0, 0, 0),
374         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_SGPR, 0, 1, 1, 1, 0, 0, 0, 0),
375         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_D, 0, 1, 1, 1, 1, 0, 0, 1),
376         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_LDS_I, 0, 1, 1, 1, 0, 0, 0, 0),
377         AMDGPU_RAS_SUB_BLOCK(GFX_SQ_VGPR, 0, 1, 1, 1, 0, 0, 0, 0),
378         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0, 1),
379         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
380                              0, 0),
381         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU0_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
382                              0),
383         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
384                              0, 0),
385         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU1_UTCL1_LFIFO, 0, 1, 1, 1, 1, 0, 0,
386                              0),
387         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_WRITE_DATA_BUF, 0, 1, 1, 1, 0, 0,
388                              0, 0),
389         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_CU2_UTCL1_LFIFO, 0, 1, 1, 1, 0, 0, 0,
390                              0),
391         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
392                              1),
393         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
394                              0, 0, 0),
395         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
396                              0),
397         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
398                              0),
399         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
400                              0),
401         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
402                              0),
403         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
404                              0),
405         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
406                              0, 0),
407         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKA_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
408                              0),
409         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_TAG_RAM, 0, 1, 1, 1, 1, 0, 0,
410                              0),
411         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, 1, 0, 0, 1, 0,
412                              0, 0, 0),
413         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
414                              0),
415         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_INST_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
416                              0),
417         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_TAG_RAM, 0, 1, 1, 1, 0, 0, 0,
418                              0),
419         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_HIT_FIFO, 1, 0, 0, 1, 0, 0, 0,
420                              0),
421         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_MISS_FIFO, 1, 0, 0, 1, 0, 0, 0,
422                              0),
423         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, 1, 0, 0, 1, 0, 0,
424                              0, 0),
425         AMDGPU_RAS_SUB_BLOCK(GFX_SQC_DATA_BANKB_BANK_RAM, 0, 1, 1, 1, 0, 0, 0,
426                              0),
427         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_DFIFO, 0, 1, 1, 1, 1, 0, 0, 1),
428         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_AFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
429         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FL_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
430         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FX_LFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
431         AMDGPU_RAS_SUB_BLOCK(GFX_TA_FS_CFIFO, 1, 0, 0, 1, 0, 0, 0, 0),
432         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_HOLE_FIFO, 1, 0, 0, 1, 0, 1, 1, 0),
433         AMDGPU_RAS_SUB_BLOCK(GFX_TCA_REQ_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
434         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA, 0, 1, 1, 1, 1, 0, 0, 1),
435         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_0_1, 0, 1, 1, 1, 1, 0, 0,
436                              1),
437         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_0, 0, 1, 1, 1, 1, 0, 0,
438                              1),
439         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DATA_BANK_1_1, 0, 1, 1, 1, 1, 0, 0,
440                              1),
441         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_0, 0, 1, 1, 1, 0, 0, 0,
442                              0),
443         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_DIRTY_BANK_1, 0, 1, 1, 1, 0, 0, 0,
444                              0),
445         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_HIGH_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
446         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LOW_RATE_TAG, 0, 1, 1, 1, 0, 0, 0, 0),
447         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_DEC, 1, 0, 0, 1, 0, 0, 0, 0),
448         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_IN_USE_TRANSFER, 1, 0, 0, 1, 0, 0, 0, 0),
449         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_DATA, 1, 0, 0, 1, 0, 0, 0, 0),
450         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_RETURN_CONTROL, 1, 0, 0, 1, 0, 0, 0, 0),
451         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_UC_ATOMIC_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
452         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_RETURN, 1, 0, 0, 1, 0, 1, 1, 0),
453         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRITE_CACHE_READ, 1, 0, 0, 1, 0, 0, 0, 0),
454         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
455         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_SRC_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 1, 0),
456         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_CACHE_TAG_PROBE_FIFO, 1, 0, 0, 1, 0, 0, 0,
457                              0),
458         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
459         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_LATENCY_FIFO_NEXT_RAM, 1, 0, 0, 1, 0, 0, 0,
460                              0),
461         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_WRRET_TAG_WRITE_RETURN, 1, 0, 0, 1, 0, 0,
462                              0, 0),
463         AMDGPU_RAS_SUB_BLOCK(GFX_TCC_ATOMIC_RETURN_BUFFER, 1, 0, 0, 1, 0, 0, 0,
464                              0),
465         AMDGPU_RAS_SUB_BLOCK(GFX_TCI_WRITE_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
466         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CACHE_RAM, 0, 1, 1, 1, 1, 0, 0, 1),
467         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_LFIFO_RAM, 0, 1, 1, 1, 0, 0, 0, 0),
468         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_CMD_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
469         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_VM_FIFO, 0, 1, 1, 1, 0, 0, 0, 0),
470         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_DB_RAM, 1, 0, 0, 1, 0, 0, 0, 0),
471         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO0, 0, 1, 1, 1, 0, 0, 0, 0),
472         AMDGPU_RAS_SUB_BLOCK(GFX_TCP_UTCL1_LFIFO1, 0, 1, 1, 1, 0, 0, 0, 0),
473         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_LO, 0, 1, 1, 1, 1, 0, 0, 1),
474         AMDGPU_RAS_SUB_BLOCK(GFX_TD_SS_FIFO_HI, 0, 1, 1, 1, 0, 0, 0, 0),
475         AMDGPU_RAS_SUB_BLOCK(GFX_TD_CS_FIFO, 1, 0, 0, 1, 0, 0, 0, 0),
476         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_CMDMEM, 0, 1, 1, 1, 1, 0, 0, 1),
477         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
478         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
479         AMDGPU_RAS_SUB_BLOCK(GFX_EA_RRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
480         AMDGPU_RAS_SUB_BLOCK(GFX_EA_WRET_TAGMEM, 0, 1, 1, 1, 0, 0, 0, 0),
481         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
482         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_CMDMEM, 0, 1, 1, 1, 0, 0, 0, 0),
483         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_DATAMEM, 0, 1, 1, 1, 0, 0, 0, 0),
484         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
485         AMDGPU_RAS_SUB_BLOCK(GFX_EA_DRAMWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
486         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IORD_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
487         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_CMDMEM, 1, 0, 0, 1, 0, 0, 0, 0),
488         AMDGPU_RAS_SUB_BLOCK(GFX_EA_IOWR_DATAMEM, 1, 0, 0, 1, 0, 0, 0, 0),
489         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIRD_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
490         AMDGPU_RAS_SUB_BLOCK(GFX_EA_GMIWR_PAGEMEM, 1, 0, 0, 1, 0, 0, 0, 0),
491         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D0MEM, 1, 0, 0, 1, 0, 0, 0, 0),
492         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D1MEM, 1, 0, 0, 1, 0, 0, 0, 0),
493         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D2MEM, 1, 0, 0, 1, 0, 0, 0, 0),
494         AMDGPU_RAS_SUB_BLOCK(GFX_EA_MAM_D3MEM, 1, 0, 0, 1, 0, 0, 0, 0),
495         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_BANK_CACHE, 0, 1, 1, 1, 0, 0, 0, 0),
496         AMDGPU_RAS_SUB_BLOCK(UTC_VML2_WALKER, 0, 1, 1, 1, 0, 0, 0, 0),
497         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_2M_BANK, 1, 0, 0, 1, 0, 0, 0, 0),
498         AMDGPU_RAS_SUB_BLOCK(UTC_ATCL2_CACHE_4K_BANK, 0, 1, 1, 1, 0, 0, 0, 0),
499 };
500
501 static const struct soc15_reg_golden golden_settings_gc_9_0[] =
502 {
503         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
504         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000),
505         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
506         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
507         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
508         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
509         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
510         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
511         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
512         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
513         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
514         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
515         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
516         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
517         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
518         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
519         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
520         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
521         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
522         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
523 };
524
525 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
526 {
527         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0x0000f000, 0x00012107),
528         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
529         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
530         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
531         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
532         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
533         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x2a114042),
534         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
535         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0x00008000, 0x00048000),
536         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
537         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
538         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
539         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
540         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
541         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
542         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
543         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
544         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
545 };
546
547 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
548 {
549         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x0f000080, 0x04000080),
550         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
551         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
552         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x22014042),
553         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x22014042),
554         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0x00003e00, 0x00000400),
555         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xff840000, 0x04040000),
556         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00030000),
557         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff010f, 0x01000107),
558         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0x000b0000, 0x000b0000),
559         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01000000, 0x01000000)
560 };
561
562 static const struct soc15_reg_golden golden_settings_gc_9_1[] =
563 {
564         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
565         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x08000000, 0x08000080),
566         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0x08000000, 0x08000080),
567         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x08000000, 0x08000080),
568         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
569         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
570         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x08000000, 0x08000080),
571         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
572         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
573         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
574         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0x08000000, 0x08000080),
575         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0x08000000, 0x08000080),
576         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0x08000000, 0x08000080),
577         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0x08000000, 0x08000080),
578         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0x08000000, 0x08000080),
579         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
580         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
581         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
582         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
583         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff),
584         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
585         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
586         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
587         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
588 };
589
590 static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] =
591 {
592         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_3, 0x30000000, 0x10000000),
593         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24000042),
594         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24000042),
595         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04048000),
596         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_MODE_CNTL_1, 0x06000000, 0x06000000),
597         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
598         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800)
599 };
600
601 static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] =
602 {
603         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000),
604         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
605         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
606         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080),
607         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080),
608         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080),
609         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041),
610         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041),
611         SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
612         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
613         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080),
614         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080),
615         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080),
616         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080),
617         SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080),
618         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
619         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010),
620         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
621         SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080),
622 };
623
624 static const struct soc15_reg_golden golden_settings_gc_9_1_rn[] =
625 {
626         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
627         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000),
628         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400),
629         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xf3e777ff, 0x24000042),
630         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xf3e777ff, 0x24000042),
631         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
632         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
633         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
634         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
635         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
636         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120),
637         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGCEA_PROBE_MAP, 0xffffffff, 0x0000cccc),
638 };
639
640 static const struct soc15_reg_golden golden_settings_gc_9_x_common[] =
641 {
642         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_SD_CNTL, 0xffffffff, 0x000001ff),
643         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000),
644         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_DATA, 0xffffffff, 0x2544c382)
645 };
646
647 static const struct soc15_reg_golden golden_settings_gc_9_2_1[] =
648 {
649         SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420),
650         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000),
651         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024),
652         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001),
653         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000),
654         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSH_MEM_CONFIG, 0x00001000, 0x00001000),
655         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_0, 0x0007ffff, 0x00000800),
656         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_CU_1, 0x0007ffff, 0x00000800),
657         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_0, 0x01ffffff, 0x0000ff87),
658         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_RESOURCE_RESERVE_EN_CU_1, 0x01ffffff, 0x0000ff8f),
659         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSQC_CONFIG, 0x03000000, 0x020a2000),
660         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x010b0000),
661         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
662         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
663         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
664         SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
665 };
666
667 static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] =
668 {
669         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0x00000080, 0x04000080),
670         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104),
671         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0x0f000000, 0x0a000000),
672         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x24104041),
673         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x24104041),
674         SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000),
675         SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107),
676         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000),
677         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410),
678         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000),
679         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
680         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
681         SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
682 };
683
684 static const struct soc15_reg_golden golden_settings_gc_9_4_1_arct[] =
685 {
686         SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x2a114042),
687         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTA_CNTL_AUX, 0xfffffeef, 0x10b0000),
688         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_0_ARCT, 0x3fffffff, 0x346f0a4e),
689         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_1_ARCT, 0x3fffffff, 0x1c642ca),
690         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_2_ARCT, 0x3fffffff, 0x26f45098),
691         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_3_ARCT, 0x3fffffff, 0x2ebd9fe3),
692         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_4_ARCT, 0x3fffffff, 0xb90f5b1),
693         SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_5_ARCT, 0x3ff, 0x135),
694 };
695
696 static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] =
697 {
698         mmRLC_SRM_INDEX_CNTL_ADDR_0 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
699         mmRLC_SRM_INDEX_CNTL_ADDR_1 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
700         mmRLC_SRM_INDEX_CNTL_ADDR_2 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
701         mmRLC_SRM_INDEX_CNTL_ADDR_3 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
702         mmRLC_SRM_INDEX_CNTL_ADDR_4 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
703         mmRLC_SRM_INDEX_CNTL_ADDR_5 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
704         mmRLC_SRM_INDEX_CNTL_ADDR_6 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
705         mmRLC_SRM_INDEX_CNTL_ADDR_7 - mmRLC_SRM_INDEX_CNTL_ADDR_0,
706 };
707
708 static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] =
709 {
710         mmRLC_SRM_INDEX_CNTL_DATA_0 - mmRLC_SRM_INDEX_CNTL_DATA_0,
711         mmRLC_SRM_INDEX_CNTL_DATA_1 - mmRLC_SRM_INDEX_CNTL_DATA_0,
712         mmRLC_SRM_INDEX_CNTL_DATA_2 - mmRLC_SRM_INDEX_CNTL_DATA_0,
713         mmRLC_SRM_INDEX_CNTL_DATA_3 - mmRLC_SRM_INDEX_CNTL_DATA_0,
714         mmRLC_SRM_INDEX_CNTL_DATA_4 - mmRLC_SRM_INDEX_CNTL_DATA_0,
715         mmRLC_SRM_INDEX_CNTL_DATA_5 - mmRLC_SRM_INDEX_CNTL_DATA_0,
716         mmRLC_SRM_INDEX_CNTL_DATA_6 - mmRLC_SRM_INDEX_CNTL_DATA_0,
717         mmRLC_SRM_INDEX_CNTL_DATA_7 - mmRLC_SRM_INDEX_CNTL_DATA_0,
718 };
719
720 #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042
721 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041
722 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042
723 #define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041
724
725 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev);
726 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev);
727 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev);
728 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
729 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
730                                  struct amdgpu_cu_info *cu_info);
731 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
732 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
733 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
734 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
735 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
736                                           void *ras_error_status);
737 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
738                                      void *inject_if);
739
740 static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev)
741 {
742         switch (adev->asic_type) {
743         case CHIP_VEGA10:
744                 soc15_program_register_sequence(adev,
745                                                 golden_settings_gc_9_0,
746                                                 ARRAY_SIZE(golden_settings_gc_9_0));
747                 soc15_program_register_sequence(adev,
748                                                 golden_settings_gc_9_0_vg10,
749                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg10));
750                 break;
751         case CHIP_VEGA12:
752                 soc15_program_register_sequence(adev,
753                                                 golden_settings_gc_9_2_1,
754                                                 ARRAY_SIZE(golden_settings_gc_9_2_1));
755                 soc15_program_register_sequence(adev,
756                                                 golden_settings_gc_9_2_1_vg12,
757                                                 ARRAY_SIZE(golden_settings_gc_9_2_1_vg12));
758                 break;
759         case CHIP_VEGA20:
760                 soc15_program_register_sequence(adev,
761                                                 golden_settings_gc_9_0,
762                                                 ARRAY_SIZE(golden_settings_gc_9_0));
763                 soc15_program_register_sequence(adev,
764                                                 golden_settings_gc_9_0_vg20,
765                                                 ARRAY_SIZE(golden_settings_gc_9_0_vg20));
766                 break;
767         case CHIP_ARCTURUS:
768                 soc15_program_register_sequence(adev,
769                                                 golden_settings_gc_9_4_1_arct,
770                                                 ARRAY_SIZE(golden_settings_gc_9_4_1_arct));
771                 break;
772         case CHIP_RAVEN:
773                 soc15_program_register_sequence(adev, golden_settings_gc_9_1,
774                                                 ARRAY_SIZE(golden_settings_gc_9_1));
775                 if (adev->rev_id >= 8)
776                         soc15_program_register_sequence(adev,
777                                                         golden_settings_gc_9_1_rv2,
778                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv2));
779                 else
780                         soc15_program_register_sequence(adev,
781                                                         golden_settings_gc_9_1_rv1,
782                                                         ARRAY_SIZE(golden_settings_gc_9_1_rv1));
783                 break;
784          case CHIP_RENOIR:
785                 soc15_program_register_sequence(adev,
786                                                 golden_settings_gc_9_1_rn,
787                                                 ARRAY_SIZE(golden_settings_gc_9_1_rn));
788                 return; /* for renoir, don't need common goldensetting */
789         default:
790                 break;
791         }
792
793         if (adev->asic_type != CHIP_ARCTURUS)
794                 soc15_program_register_sequence(adev, golden_settings_gc_9_x_common,
795                                                 (const u32)ARRAY_SIZE(golden_settings_gc_9_x_common));
796 }
797
798 static void gfx_v9_0_scratch_init(struct amdgpu_device *adev)
799 {
800         adev->gfx.scratch.num_reg = 8;
801         adev->gfx.scratch.reg_base = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
802         adev->gfx.scratch.free_mask = (1u << adev->gfx.scratch.num_reg) - 1;
803 }
804
805 static void gfx_v9_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
806                                        bool wc, uint32_t reg, uint32_t val)
807 {
808         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
809         amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
810                                 WRITE_DATA_DST_SEL(0) |
811                                 (wc ? WR_CONFIRM : 0));
812         amdgpu_ring_write(ring, reg);
813         amdgpu_ring_write(ring, 0);
814         amdgpu_ring_write(ring, val);
815 }
816
817 static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
818                                   int mem_space, int opt, uint32_t addr0,
819                                   uint32_t addr1, uint32_t ref, uint32_t mask,
820                                   uint32_t inv)
821 {
822         amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
823         amdgpu_ring_write(ring,
824                                  /* memory (1) or register (0) */
825                                  (WAIT_REG_MEM_MEM_SPACE(mem_space) |
826                                  WAIT_REG_MEM_OPERATION(opt) | /* wait */
827                                  WAIT_REG_MEM_FUNCTION(3) |  /* equal */
828                                  WAIT_REG_MEM_ENGINE(eng_sel)));
829
830         if (mem_space)
831                 BUG_ON(addr0 & 0x3); /* Dword align */
832         amdgpu_ring_write(ring, addr0);
833         amdgpu_ring_write(ring, addr1);
834         amdgpu_ring_write(ring, ref);
835         amdgpu_ring_write(ring, mask);
836         amdgpu_ring_write(ring, inv); /* poll interval */
837 }
838
839 static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
840 {
841         struct amdgpu_device *adev = ring->adev;
842         uint32_t scratch;
843         uint32_t tmp = 0;
844         unsigned i;
845         int r;
846
847         r = amdgpu_gfx_scratch_get(adev, &scratch);
848         if (r)
849                 return r;
850
851         WREG32(scratch, 0xCAFEDEAD);
852         r = amdgpu_ring_alloc(ring, 3);
853         if (r)
854                 goto error_free_scratch;
855
856         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
857         amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
858         amdgpu_ring_write(ring, 0xDEADBEEF);
859         amdgpu_ring_commit(ring);
860
861         for (i = 0; i < adev->usec_timeout; i++) {
862                 tmp = RREG32(scratch);
863                 if (tmp == 0xDEADBEEF)
864                         break;
865                 udelay(1);
866         }
867
868         if (i >= adev->usec_timeout)
869                 r = -ETIMEDOUT;
870
871 error_free_scratch:
872         amdgpu_gfx_scratch_free(adev, scratch);
873         return r;
874 }
875
876 static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout)
877 {
878         struct amdgpu_device *adev = ring->adev;
879         struct amdgpu_ib ib;
880         struct dma_fence *f = NULL;
881
882         unsigned index;
883         uint64_t gpu_addr;
884         uint32_t tmp;
885         long r;
886
887         r = amdgpu_device_wb_get(adev, &index);
888         if (r)
889                 return r;
890
891         gpu_addr = adev->wb.gpu_addr + (index * 4);
892         adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD);
893         memset(&ib, 0, sizeof(ib));
894         r = amdgpu_ib_get(adev, NULL, 16, &ib);
895         if (r)
896                 goto err1;
897
898         ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3);
899         ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
900         ib.ptr[2] = lower_32_bits(gpu_addr);
901         ib.ptr[3] = upper_32_bits(gpu_addr);
902         ib.ptr[4] = 0xDEADBEEF;
903         ib.length_dw = 5;
904
905         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
906         if (r)
907                 goto err2;
908
909         r = dma_fence_wait_timeout(f, false, timeout);
910         if (r == 0) {
911                 r = -ETIMEDOUT;
912                 goto err2;
913         } else if (r < 0) {
914                 goto err2;
915         }
916
917         tmp = adev->wb.wb[index];
918         if (tmp == 0xDEADBEEF)
919                 r = 0;
920         else
921                 r = -EINVAL;
922
923 err2:
924         amdgpu_ib_free(adev, &ib, NULL);
925         dma_fence_put(f);
926 err1:
927         amdgpu_device_wb_free(adev, index);
928         return r;
929 }
930
931
932 static void gfx_v9_0_free_microcode(struct amdgpu_device *adev)
933 {
934         release_firmware(adev->gfx.pfp_fw);
935         adev->gfx.pfp_fw = NULL;
936         release_firmware(adev->gfx.me_fw);
937         adev->gfx.me_fw = NULL;
938         release_firmware(adev->gfx.ce_fw);
939         adev->gfx.ce_fw = NULL;
940         release_firmware(adev->gfx.rlc_fw);
941         adev->gfx.rlc_fw = NULL;
942         release_firmware(adev->gfx.mec_fw);
943         adev->gfx.mec_fw = NULL;
944         release_firmware(adev->gfx.mec2_fw);
945         adev->gfx.mec2_fw = NULL;
946
947         kfree(adev->gfx.rlc.register_list_format);
948 }
949
950 static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev)
951 {
952         const struct rlc_firmware_header_v2_1 *rlc_hdr;
953
954         rlc_hdr = (const struct rlc_firmware_header_v2_1 *)adev->gfx.rlc_fw->data;
955         adev->gfx.rlc_srlc_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_ucode_ver);
956         adev->gfx.rlc_srlc_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_cntl_feature_ver);
957         adev->gfx.rlc.save_restore_list_cntl_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_cntl_size_bytes);
958         adev->gfx.rlc.save_restore_list_cntl = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_cntl_offset_bytes);
959         adev->gfx.rlc_srlg_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_ucode_ver);
960         adev->gfx.rlc_srlg_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_gpm_feature_ver);
961         adev->gfx.rlc.save_restore_list_gpm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_gpm_size_bytes);
962         adev->gfx.rlc.save_restore_list_gpm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_gpm_offset_bytes);
963         adev->gfx.rlc_srls_fw_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_ucode_ver);
964         adev->gfx.rlc_srls_feature_version = le32_to_cpu(rlc_hdr->save_restore_list_srm_feature_ver);
965         adev->gfx.rlc.save_restore_list_srm_size_bytes = le32_to_cpu(rlc_hdr->save_restore_list_srm_size_bytes);
966         adev->gfx.rlc.save_restore_list_srm = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->save_restore_list_srm_offset_bytes);
967         adev->gfx.rlc.reg_list_format_direct_reg_list_length =
968                         le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length);
969 }
970
971 static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev)
972 {
973         adev->gfx.me_fw_write_wait = false;
974         adev->gfx.mec_fw_write_wait = false;
975
976         if ((adev->gfx.mec_fw_version < 0x000001a5) ||
977             (adev->gfx.mec_feature_version < 46) ||
978             (adev->gfx.pfp_fw_version < 0x000000b7) ||
979             (adev->gfx.pfp_feature_version < 46))
980                 DRM_WARN_ONCE("Warning: check cp_fw_version and update it to realize \
981                               GRBM requires 1-cycle delay in cp firmware\n");
982
983         switch (adev->asic_type) {
984         case CHIP_VEGA10:
985                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
986                     (adev->gfx.me_feature_version >= 42) &&
987                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
988                     (adev->gfx.pfp_feature_version >= 42))
989                         adev->gfx.me_fw_write_wait = true;
990
991                 if ((adev->gfx.mec_fw_version >=  0x00000193) &&
992                     (adev->gfx.mec_feature_version >= 42))
993                         adev->gfx.mec_fw_write_wait = true;
994                 break;
995         case CHIP_VEGA12:
996                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
997                     (adev->gfx.me_feature_version >= 44) &&
998                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
999                     (adev->gfx.pfp_feature_version >= 44))
1000                         adev->gfx.me_fw_write_wait = true;
1001
1002                 if ((adev->gfx.mec_fw_version >=  0x00000196) &&
1003                     (adev->gfx.mec_feature_version >= 44))
1004                         adev->gfx.mec_fw_write_wait = true;
1005                 break;
1006         case CHIP_VEGA20:
1007                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1008                     (adev->gfx.me_feature_version >= 44) &&
1009                     (adev->gfx.pfp_fw_version >=  0x000000b2) &&
1010                     (adev->gfx.pfp_feature_version >= 44))
1011                         adev->gfx.me_fw_write_wait = true;
1012
1013                 if ((adev->gfx.mec_fw_version >=  0x00000197) &&
1014                     (adev->gfx.mec_feature_version >= 44))
1015                         adev->gfx.mec_fw_write_wait = true;
1016                 break;
1017         case CHIP_RAVEN:
1018                 if ((adev->gfx.me_fw_version >= 0x0000009c) &&
1019                     (adev->gfx.me_feature_version >= 42) &&
1020                     (adev->gfx.pfp_fw_version >=  0x000000b1) &&
1021                     (adev->gfx.pfp_feature_version >= 42))
1022                         adev->gfx.me_fw_write_wait = true;
1023
1024                 if ((adev->gfx.mec_fw_version >=  0x00000192) &&
1025                     (adev->gfx.mec_feature_version >= 42))
1026                         adev->gfx.mec_fw_write_wait = true;
1027                 break;
1028         default:
1029                 break;
1030         }
1031 }
1032
1033 static void gfx_v9_0_check_if_need_gfxoff(struct amdgpu_device *adev)
1034 {
1035         switch (adev->asic_type) {
1036         case CHIP_VEGA10:
1037         case CHIP_VEGA12:
1038         case CHIP_VEGA20:
1039                 break;
1040         case CHIP_RAVEN:
1041                 if (!(adev->rev_id >= 0x8 || adev->pdev->device == 0x15d8)
1042                         &&((adev->gfx.rlc_fw_version != 106 &&
1043                              adev->gfx.rlc_fw_version < 531) ||
1044                             (adev->gfx.rlc_fw_version == 53815) ||
1045                             (adev->gfx.rlc_feature_version < 1) ||
1046                             !adev->gfx.rlc.is_rlc_v2_1))
1047                         adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
1048
1049                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1050                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1051                                 AMD_PG_SUPPORT_CP |
1052                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1053                 break;
1054         case CHIP_RENOIR:
1055                 if (adev->pm.pp_feature & PP_GFXOFF_MASK)
1056                         adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
1057                                 AMD_PG_SUPPORT_CP |
1058                                 AMD_PG_SUPPORT_RLC_SMU_HS;
1059                 break;
1060         default:
1061                 break;
1062         }
1063 }
1064
1065 static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev,
1066                                           const char *chip_name)
1067 {
1068         char fw_name[30];
1069         int err;
1070         struct amdgpu_firmware_info *info = NULL;
1071         const struct common_firmware_header *header = NULL;
1072         const struct gfx_firmware_header_v1_0 *cp_hdr;
1073
1074         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
1075         err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
1076         if (err)
1077                 goto out;
1078         err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
1079         if (err)
1080                 goto out;
1081         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1082         adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1083         adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1084
1085         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
1086         err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
1087         if (err)
1088                 goto out;
1089         err = amdgpu_ucode_validate(adev->gfx.me_fw);
1090         if (err)
1091                 goto out;
1092         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1093         adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1094         adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1095
1096         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
1097         err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
1098         if (err)
1099                 goto out;
1100         err = amdgpu_ucode_validate(adev->gfx.ce_fw);
1101         if (err)
1102                 goto out;
1103         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1104         adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1105         adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1106
1107         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1108                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1109                 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1110                 info->fw = adev->gfx.pfp_fw;
1111                 header = (const struct common_firmware_header *)info->fw->data;
1112                 adev->firmware.fw_size +=
1113                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1114
1115                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1116                 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1117                 info->fw = adev->gfx.me_fw;
1118                 header = (const struct common_firmware_header *)info->fw->data;
1119                 adev->firmware.fw_size +=
1120                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1121
1122                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1123                 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1124                 info->fw = adev->gfx.ce_fw;
1125                 header = (const struct common_firmware_header *)info->fw->data;
1126                 adev->firmware.fw_size +=
1127                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1128         }
1129
1130 out:
1131         if (err) {
1132                 dev_err(adev->dev,
1133                         "gfx9: Failed to load firmware \"%s\"\n",
1134                         fw_name);
1135                 release_firmware(adev->gfx.pfp_fw);
1136                 adev->gfx.pfp_fw = NULL;
1137                 release_firmware(adev->gfx.me_fw);
1138                 adev->gfx.me_fw = NULL;
1139                 release_firmware(adev->gfx.ce_fw);
1140                 adev->gfx.ce_fw = NULL;
1141         }
1142         return err;
1143 }
1144
1145 static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev,
1146                                           const char *chip_name)
1147 {
1148         char fw_name[30];
1149         int err;
1150         struct amdgpu_firmware_info *info = NULL;
1151         const struct common_firmware_header *header = NULL;
1152         const struct rlc_firmware_header_v2_0 *rlc_hdr;
1153         unsigned int *tmp = NULL;
1154         unsigned int i = 0;
1155         uint16_t version_major;
1156         uint16_t version_minor;
1157         uint32_t smu_version;
1158
1159         /*
1160          * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
1161          * instead of picasso_rlc.bin.
1162          * Judgment method:
1163          * PCO AM4: revision >= 0xC8 && revision <= 0xCF
1164          *          or revision >= 0xD8 && revision <= 0xDF
1165          * otherwise is PCO FP5
1166          */
1167         if (!strcmp(chip_name, "picasso") &&
1168                 (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
1169                 ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
1170                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
1171         else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) &&
1172                 (smu_version >= 0x41e2b))
1173                 /**
1174                 *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly.
1175                 */
1176                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_kicker_rlc.bin", chip_name);
1177         else
1178                 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
1179         err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
1180         if (err)
1181                 goto out;
1182         err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
1183         rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
1184
1185         version_major = le16_to_cpu(rlc_hdr->header.header_version_major);
1186         version_minor = le16_to_cpu(rlc_hdr->header.header_version_minor);
1187         if (version_major == 2 && version_minor == 1)
1188                 adev->gfx.rlc.is_rlc_v2_1 = true;
1189
1190         adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
1191         adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
1192         adev->gfx.rlc.save_and_restore_offset =
1193                         le32_to_cpu(rlc_hdr->save_and_restore_offset);
1194         adev->gfx.rlc.clear_state_descriptor_offset =
1195                         le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
1196         adev->gfx.rlc.avail_scratch_ram_locations =
1197                         le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
1198         adev->gfx.rlc.reg_restore_list_size =
1199                         le32_to_cpu(rlc_hdr->reg_restore_list_size);
1200         adev->gfx.rlc.reg_list_format_start =
1201                         le32_to_cpu(rlc_hdr->reg_list_format_start);
1202         adev->gfx.rlc.reg_list_format_separate_start =
1203                         le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
1204         adev->gfx.rlc.starting_offsets_start =
1205                         le32_to_cpu(rlc_hdr->starting_offsets_start);
1206         adev->gfx.rlc.reg_list_format_size_bytes =
1207                         le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
1208         adev->gfx.rlc.reg_list_size_bytes =
1209                         le32_to_cpu(rlc_hdr->reg_list_size_bytes);
1210         adev->gfx.rlc.register_list_format =
1211                         kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
1212                                 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
1213         if (!adev->gfx.rlc.register_list_format) {
1214                 err = -ENOMEM;
1215                 goto out;
1216         }
1217
1218         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1219                         le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
1220         for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++)
1221                 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
1222
1223         adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
1224
1225         tmp = (unsigned int *)((uintptr_t)rlc_hdr +
1226                         le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
1227         for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++)
1228                 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
1229
1230         if (adev->gfx.rlc.is_rlc_v2_1)
1231                 gfx_v9_0_init_rlc_ext_microcode(adev);
1232
1233         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1234                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1235                 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1236                 info->fw = adev->gfx.rlc_fw;
1237                 header = (const struct common_firmware_header *)info->fw->data;
1238                 adev->firmware.fw_size +=
1239                         ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1240
1241                 if (adev->gfx.rlc.is_rlc_v2_1 &&
1242                     adev->gfx.rlc.save_restore_list_cntl_size_bytes &&
1243                     adev->gfx.rlc.save_restore_list_gpm_size_bytes &&
1244                     adev->gfx.rlc.save_restore_list_srm_size_bytes) {
1245                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL];
1246                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL;
1247                         info->fw = adev->gfx.rlc_fw;
1248                         adev->firmware.fw_size +=
1249                                 ALIGN(adev->gfx.rlc.save_restore_list_cntl_size_bytes, PAGE_SIZE);
1250
1251                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM];
1252                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM;
1253                         info->fw = adev->gfx.rlc_fw;
1254                         adev->firmware.fw_size +=
1255                                 ALIGN(adev->gfx.rlc.save_restore_list_gpm_size_bytes, PAGE_SIZE);
1256
1257                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM];
1258                         info->ucode_id = AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM;
1259                         info->fw = adev->gfx.rlc_fw;
1260                         adev->firmware.fw_size +=
1261                                 ALIGN(adev->gfx.rlc.save_restore_list_srm_size_bytes, PAGE_SIZE);
1262                 }
1263         }
1264
1265 out:
1266         if (err) {
1267                 dev_err(adev->dev,
1268                         "gfx9: Failed to load firmware \"%s\"\n",
1269                         fw_name);
1270                 release_firmware(adev->gfx.rlc_fw);
1271                 adev->gfx.rlc_fw = NULL;
1272         }
1273         return err;
1274 }
1275
1276 static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev,
1277                                           const char *chip_name)
1278 {
1279         char fw_name[30];
1280         int err;
1281         struct amdgpu_firmware_info *info = NULL;
1282         const struct common_firmware_header *header = NULL;
1283         const struct gfx_firmware_header_v1_0 *cp_hdr;
1284
1285         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
1286         err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
1287         if (err)
1288                 goto out;
1289         err = amdgpu_ucode_validate(adev->gfx.mec_fw);
1290         if (err)
1291                 goto out;
1292         cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1293         adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
1294         adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
1295
1296
1297         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
1298         err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
1299         if (!err) {
1300                 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
1301                 if (err)
1302                         goto out;
1303                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1304                 adev->gfx.mec2_fw->data;
1305                 adev->gfx.mec2_fw_version =
1306                 le32_to_cpu(cp_hdr->header.ucode_version);
1307                 adev->gfx.mec2_feature_version =
1308                 le32_to_cpu(cp_hdr->ucode_feature_version);
1309         } else {
1310                 err = 0;
1311                 adev->gfx.mec2_fw = NULL;
1312         }
1313
1314         if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1315                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1316                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1317                 info->fw = adev->gfx.mec_fw;
1318                 header = (const struct common_firmware_header *)info->fw->data;
1319                 cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1320                 adev->firmware.fw_size +=
1321                         ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1322
1323                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1_JT];
1324                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1_JT;
1325                 info->fw = adev->gfx.mec_fw;
1326                 adev->firmware.fw_size +=
1327                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1328
1329                 if (adev->gfx.mec2_fw) {
1330                         info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1331                         info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1332                         info->fw = adev->gfx.mec2_fw;
1333                         header = (const struct common_firmware_header *)info->fw->data;
1334                         cp_hdr = (const struct gfx_firmware_header_v1_0 *)info->fw->data;
1335                         adev->firmware.fw_size +=
1336                                 ALIGN(le32_to_cpu(header->ucode_size_bytes) - le32_to_cpu(cp_hdr->jt_size) * 4, PAGE_SIZE);
1337
1338                         /* TODO: Determine if MEC2 JT FW loading can be removed
1339                                  for all GFX V9 asic and above */
1340                         if (adev->asic_type != CHIP_ARCTURUS) {
1341                                 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2_JT];
1342                                 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2_JT;
1343                                 info->fw = adev->gfx.mec2_fw;
1344                                 adev->firmware.fw_size +=
1345                                         ALIGN(le32_to_cpu(cp_hdr->jt_size) * 4,
1346                                         PAGE_SIZE);
1347                         }
1348                 }
1349         }
1350
1351 out:
1352         gfx_v9_0_check_if_need_gfxoff(adev);
1353         gfx_v9_0_check_fw_write_wait(adev);
1354         if (err) {
1355                 dev_err(adev->dev,
1356                         "gfx9: Failed to load firmware \"%s\"\n",
1357                         fw_name);
1358                 release_firmware(adev->gfx.mec_fw);
1359                 adev->gfx.mec_fw = NULL;
1360                 release_firmware(adev->gfx.mec2_fw);
1361                 adev->gfx.mec2_fw = NULL;
1362         }
1363         return err;
1364 }
1365
1366 static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
1367 {
1368         const char *chip_name;
1369         int r;
1370
1371         DRM_DEBUG("\n");
1372
1373         switch (adev->asic_type) {
1374         case CHIP_VEGA10:
1375                 chip_name = "vega10";
1376                 break;
1377         case CHIP_VEGA12:
1378                 chip_name = "vega12";
1379                 break;
1380         case CHIP_VEGA20:
1381                 chip_name = "vega20";
1382                 break;
1383         case CHIP_RAVEN:
1384                 if (adev->rev_id >= 8)
1385                         chip_name = "raven2";
1386                 else if (adev->pdev->device == 0x15d8)
1387                         chip_name = "picasso";
1388                 else
1389                         chip_name = "raven";
1390                 break;
1391         case CHIP_ARCTURUS:
1392                 chip_name = "arcturus";
1393                 break;
1394         case CHIP_RENOIR:
1395                 chip_name = "renoir";
1396                 break;
1397         default:
1398                 BUG();
1399         }
1400
1401         /* No CPG in Arcturus */
1402         if (adev->asic_type != CHIP_ARCTURUS) {
1403                 r = gfx_v9_0_init_cp_gfx_microcode(adev, chip_name);
1404                 if (r)
1405                         return r;
1406         }
1407
1408         r = gfx_v9_0_init_rlc_microcode(adev, chip_name);
1409         if (r)
1410                 return r;
1411
1412         r = gfx_v9_0_init_cp_compute_microcode(adev, chip_name);
1413         if (r)
1414                 return r;
1415
1416         return r;
1417 }
1418
1419 static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev)
1420 {
1421         u32 count = 0;
1422         const struct cs_section_def *sect = NULL;
1423         const struct cs_extent_def *ext = NULL;
1424
1425         /* begin clear state */
1426         count += 2;
1427         /* context control state */
1428         count += 3;
1429
1430         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
1431                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1432                         if (sect->id == SECT_CONTEXT)
1433                                 count += 2 + ext->reg_count;
1434                         else
1435                                 return 0;
1436                 }
1437         }
1438
1439         /* end clear state */
1440         count += 2;
1441         /* clear state */
1442         count += 2;
1443
1444         return count;
1445 }
1446
1447 static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev,
1448                                     volatile u32 *buffer)
1449 {
1450         u32 count = 0, i;
1451         const struct cs_section_def *sect = NULL;
1452         const struct cs_extent_def *ext = NULL;
1453
1454         if (adev->gfx.rlc.cs_data == NULL)
1455                 return;
1456         if (buffer == NULL)
1457                 return;
1458
1459         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1460         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1461
1462         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1463         buffer[count++] = cpu_to_le32(0x80000000);
1464         buffer[count++] = cpu_to_le32(0x80000000);
1465
1466         for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1467                 for (ext = sect->section; ext->extent != NULL; ++ext) {
1468                         if (sect->id == SECT_CONTEXT) {
1469                                 buffer[count++] =
1470                                         cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1471                                 buffer[count++] = cpu_to_le32(ext->reg_index -
1472                                                 PACKET3_SET_CONTEXT_REG_START);
1473                                 for (i = 0; i < ext->reg_count; i++)
1474                                         buffer[count++] = cpu_to_le32(ext->extent[i]);
1475                         } else {
1476                                 return;
1477                         }
1478                 }
1479         }
1480
1481         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1482         buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1483
1484         buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1485         buffer[count++] = cpu_to_le32(0);
1486 }
1487
1488 static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
1489 {
1490         struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
1491         uint32_t pg_always_on_cu_num = 2;
1492         uint32_t always_on_cu_num;
1493         uint32_t i, j, k;
1494         uint32_t mask, cu_bitmap, counter;
1495
1496         if (adev->flags & AMD_IS_APU)
1497                 always_on_cu_num = 4;
1498         else if (adev->asic_type == CHIP_VEGA12)
1499                 always_on_cu_num = 8;
1500         else
1501                 always_on_cu_num = 12;
1502
1503         mutex_lock(&adev->grbm_idx_mutex);
1504         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
1505                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
1506                         mask = 1;
1507                         cu_bitmap = 0;
1508                         counter = 0;
1509                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
1510
1511                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
1512                                 if (cu_info->bitmap[i][j] & mask) {
1513                                         if (counter == pg_always_on_cu_num)
1514                                                 WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap);
1515                                         if (counter < always_on_cu_num)
1516                                                 cu_bitmap |= mask;
1517                                         else
1518                                                 break;
1519                                         counter++;
1520                                 }
1521                                 mask <<= 1;
1522                         }
1523
1524                         WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap);
1525                         cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
1526                 }
1527         }
1528         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1529         mutex_unlock(&adev->grbm_idx_mutex);
1530 }
1531
1532 static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
1533 {
1534         uint32_t data;
1535
1536         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1537         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1538         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x0333A5A7);
1539         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1540         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x30 | 0x40 << 8 | 0x02FA << 16));
1541
1542         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1543         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1544
1545         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1546         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000500);
1547
1548         mutex_lock(&adev->grbm_idx_mutex);
1549         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1550         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1551         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1552
1553         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1554         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1555         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1556         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1557         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1558
1559         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1560         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1561         data &= 0x0000FFFF;
1562         data |= 0x00C00000;
1563         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1564
1565         /*
1566          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven),
1567          * programmed in gfx_v9_0_init_always_on_cu_mask()
1568          */
1569
1570         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1571          * but used for RLC_LB_CNTL configuration */
1572         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1573         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1574         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1575         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1576         mutex_unlock(&adev->grbm_idx_mutex);
1577
1578         gfx_v9_0_init_always_on_cu_mask(adev);
1579 }
1580
1581 static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
1582 {
1583         uint32_t data;
1584
1585         /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */
1586         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F);
1587         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8);
1588         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077);
1589         WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16));
1590
1591         /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */
1592         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000);
1593
1594         /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */
1595         WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800);
1596
1597         mutex_lock(&adev->grbm_idx_mutex);
1598         /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
1599         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
1600         WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
1601
1602         /* set mmRLC_LB_PARAMS = 0x003F_1006 */
1603         data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003);
1604         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010);
1605         data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F);
1606         WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data);
1607
1608         /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */
1609         data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7);
1610         data &= 0x0000FFFF;
1611         data |= 0x00C00000;
1612         WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data);
1613
1614         /*
1615          * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON),
1616          * programmed in gfx_v9_0_init_always_on_cu_mask()
1617          */
1618
1619         /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved,
1620          * but used for RLC_LB_CNTL configuration */
1621         data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK;
1622         data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09);
1623         data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000);
1624         WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data);
1625         mutex_unlock(&adev->grbm_idx_mutex);
1626
1627         gfx_v9_0_init_always_on_cu_mask(adev);
1628 }
1629
1630 static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable)
1631 {
1632         WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
1633 }
1634
1635 static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev)
1636 {
1637         return 5;
1638 }
1639
1640 static int gfx_v9_0_rlc_init(struct amdgpu_device *adev)
1641 {
1642         const struct cs_section_def *cs_data;
1643         int r;
1644
1645         adev->gfx.rlc.cs_data = gfx9_cs_data;
1646
1647         cs_data = adev->gfx.rlc.cs_data;
1648
1649         if (cs_data) {
1650                 /* init clear state block */
1651                 r = amdgpu_gfx_rlc_init_csb(adev);
1652                 if (r)
1653                         return r;
1654         }
1655
1656         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
1657                 /* TODO: double check the cp_table_size for RV */
1658                 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
1659                 r = amdgpu_gfx_rlc_init_cpt(adev);
1660                 if (r)
1661                         return r;
1662         }
1663
1664         switch (adev->asic_type) {
1665         case CHIP_RAVEN:
1666                 gfx_v9_0_init_lbpw(adev);
1667                 break;
1668         case CHIP_VEGA20:
1669                 gfx_v9_4_init_lbpw(adev);
1670                 break;
1671         default:
1672                 break;
1673         }
1674
1675         return 0;
1676 }
1677
1678 static int gfx_v9_0_csb_vram_pin(struct amdgpu_device *adev)
1679 {
1680         int r;
1681
1682         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1683         if (unlikely(r != 0))
1684                 return r;
1685
1686         r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj,
1687                         AMDGPU_GEM_DOMAIN_VRAM);
1688         if (!r)
1689                 adev->gfx.rlc.clear_state_gpu_addr =
1690                         amdgpu_bo_gpu_offset(adev->gfx.rlc.clear_state_obj);
1691
1692         amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1693
1694         return r;
1695 }
1696
1697 static void gfx_v9_0_csb_vram_unpin(struct amdgpu_device *adev)
1698 {
1699         int r;
1700
1701         if (!adev->gfx.rlc.clear_state_obj)
1702                 return;
1703
1704         r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, true);
1705         if (likely(r == 0)) {
1706                 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1707                 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1708         }
1709 }
1710
1711 static void gfx_v9_0_mec_fini(struct amdgpu_device *adev)
1712 {
1713         amdgpu_bo_free_kernel(&adev->gfx.mec.hpd_eop_obj, NULL, NULL);
1714         amdgpu_bo_free_kernel(&adev->gfx.mec.mec_fw_obj, NULL, NULL);
1715 }
1716
1717 static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
1718 {
1719         int r;
1720         u32 *hpd;
1721         const __le32 *fw_data;
1722         unsigned fw_size;
1723         u32 *fw;
1724         size_t mec_hpd_size;
1725
1726         const struct gfx_firmware_header_v1_0 *mec_hdr;
1727
1728         bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
1729
1730         /* take ownership of the relevant compute queues */
1731         amdgpu_gfx_compute_queue_acquire(adev);
1732         mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
1733
1734         r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
1735                                       AMDGPU_GEM_DOMAIN_VRAM,
1736                                       &adev->gfx.mec.hpd_eop_obj,
1737                                       &adev->gfx.mec.hpd_eop_gpu_addr,
1738                                       (void **)&hpd);
1739         if (r) {
1740                 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1741                 gfx_v9_0_mec_fini(adev);
1742                 return r;
1743         }
1744
1745         memset(hpd, 0, adev->gfx.mec.hpd_eop_obj->tbo.mem.size);
1746
1747         amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1748         amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1749
1750         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1751
1752         fw_data = (const __le32 *)
1753                 (adev->gfx.mec_fw->data +
1754                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
1755         fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
1756
1757         r = amdgpu_bo_create_reserved(adev, mec_hdr->header.ucode_size_bytes,
1758                                       PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1759                                       &adev->gfx.mec.mec_fw_obj,
1760                                       &adev->gfx.mec.mec_fw_gpu_addr,
1761                                       (void **)&fw);
1762         if (r) {
1763                 dev_warn(adev->dev, "(%d) create mec firmware bo failed\n", r);
1764                 gfx_v9_0_mec_fini(adev);
1765                 return r;
1766         }
1767
1768         memcpy(fw, fw_data, fw_size);
1769
1770         amdgpu_bo_kunmap(adev->gfx.mec.mec_fw_obj);
1771         amdgpu_bo_unreserve(adev->gfx.mec.mec_fw_obj);
1772
1773         return 0;
1774 }
1775
1776 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
1777 {
1778         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1779                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1780                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1781                 (address << SQ_IND_INDEX__INDEX__SHIFT) |
1782                 (SQ_IND_INDEX__FORCE_READ_MASK));
1783         return RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1784 }
1785
1786 static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd,
1787                            uint32_t wave, uint32_t thread,
1788                            uint32_t regno, uint32_t num, uint32_t *out)
1789 {
1790         WREG32_SOC15(GC, 0, mmSQ_IND_INDEX,
1791                 (wave << SQ_IND_INDEX__WAVE_ID__SHIFT) |
1792                 (simd << SQ_IND_INDEX__SIMD_ID__SHIFT) |
1793                 (regno << SQ_IND_INDEX__INDEX__SHIFT) |
1794                 (thread << SQ_IND_INDEX__THREAD_ID__SHIFT) |
1795                 (SQ_IND_INDEX__FORCE_READ_MASK) |
1796                 (SQ_IND_INDEX__AUTO_INCR_MASK));
1797         while (num--)
1798                 *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA);
1799 }
1800
1801 static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields)
1802 {
1803         /* type 1 wave data */
1804         dst[(*no_fields)++] = 1;
1805         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_STATUS);
1806         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_LO);
1807         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_PC_HI);
1808         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_LO);
1809         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_EXEC_HI);
1810         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_HW_ID);
1811         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW0);
1812         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_INST_DW1);
1813         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_GPR_ALLOC);
1814         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_LDS_ALLOC);
1815         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_TRAPSTS);
1816         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_STS);
1817         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_IB_DBG0);
1818         dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_M0);
1819 }
1820
1821 static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
1822                                      uint32_t wave, uint32_t start,
1823                                      uint32_t size, uint32_t *dst)
1824 {
1825         wave_read_regs(
1826                 adev, simd, wave, 0,
1827                 start + SQIND_WAVE_SGPRS_OFFSET, size, dst);
1828 }
1829
1830 static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd,
1831                                      uint32_t wave, uint32_t thread,
1832                                      uint32_t start, uint32_t size,
1833                                      uint32_t *dst)
1834 {
1835         wave_read_regs(
1836                 adev, simd, wave, thread,
1837                 start + SQIND_WAVE_VGPRS_OFFSET, size, dst);
1838 }
1839
1840 static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev,
1841                                   u32 me, u32 pipe, u32 q, u32 vm)
1842 {
1843         soc15_grbm_select(adev, me, pipe, q, vm);
1844 }
1845
1846 static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = {
1847         .get_gpu_clock_counter = &gfx_v9_0_get_gpu_clock_counter,
1848         .select_se_sh = &gfx_v9_0_select_se_sh,
1849         .read_wave_data = &gfx_v9_0_read_wave_data,
1850         .read_wave_sgprs = &gfx_v9_0_read_wave_sgprs,
1851         .read_wave_vgprs = &gfx_v9_0_read_wave_vgprs,
1852         .select_me_pipe_q = &gfx_v9_0_select_me_pipe_q,
1853         .ras_error_inject = &gfx_v9_0_ras_error_inject,
1854         .query_ras_error_count = &gfx_v9_0_query_ras_error_count
1855 };
1856
1857 static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
1858 {
1859         u32 gb_addr_config;
1860         int err;
1861
1862         adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
1863
1864         switch (adev->asic_type) {
1865         case CHIP_VEGA10:
1866                 adev->gfx.config.max_hw_contexts = 8;
1867                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1868                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1869                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1870                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1871                 gb_addr_config = VEGA10_GB_ADDR_CONFIG_GOLDEN;
1872                 break;
1873         case CHIP_VEGA12:
1874                 adev->gfx.config.max_hw_contexts = 8;
1875                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1876                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1877                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1878                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1879                 gb_addr_config = VEGA12_GB_ADDR_CONFIG_GOLDEN;
1880                 DRM_INFO("fix gfx.config for vega12\n");
1881                 break;
1882         case CHIP_VEGA20:
1883                 adev->gfx.config.max_hw_contexts = 8;
1884                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1885                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1886                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1887                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1888                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1889                 gb_addr_config &= ~0xf3e777ff;
1890                 gb_addr_config |= 0x22014042;
1891                 /* check vbios table if gpu info is not available */
1892                 err = amdgpu_atomfirmware_get_gfx_info(adev);
1893                 if (err)
1894                         return err;
1895                 break;
1896         case CHIP_RAVEN:
1897                 adev->gfx.config.max_hw_contexts = 8;
1898                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1899                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1900                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1901                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1902                 if (adev->rev_id >= 8)
1903                         gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN;
1904                 else
1905                         gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN;
1906                 break;
1907         case CHIP_ARCTURUS:
1908                 adev->gfx.config.max_hw_contexts = 8;
1909                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1910                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1911                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1912                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1913                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1914                 gb_addr_config &= ~0xf3e777ff;
1915                 gb_addr_config |= 0x22014042;
1916                 break;
1917         case CHIP_RENOIR:
1918                 adev->gfx.config.max_hw_contexts = 8;
1919                 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1920                 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1921                 adev->gfx.config.sc_hiz_tile_fifo_size = 0x80;
1922                 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
1923                 gb_addr_config = RREG32_SOC15(GC, 0, mmGB_ADDR_CONFIG);
1924                 gb_addr_config &= ~0xf3e777ff;
1925                 gb_addr_config |= 0x22010042;
1926                 break;
1927         default:
1928                 BUG();
1929                 break;
1930         }
1931
1932         adev->gfx.config.gb_addr_config = gb_addr_config;
1933
1934         adev->gfx.config.gb_addr_config_fields.num_pipes = 1 <<
1935                         REG_GET_FIELD(
1936                                         adev->gfx.config.gb_addr_config,
1937                                         GB_ADDR_CONFIG,
1938                                         NUM_PIPES);
1939
1940         adev->gfx.config.max_tile_pipes =
1941                 adev->gfx.config.gb_addr_config_fields.num_pipes;
1942
1943         adev->gfx.config.gb_addr_config_fields.num_banks = 1 <<
1944                         REG_GET_FIELD(
1945                                         adev->gfx.config.gb_addr_config,
1946                                         GB_ADDR_CONFIG,
1947                                         NUM_BANKS);
1948         adev->gfx.config.gb_addr_config_fields.max_compress_frags = 1 <<
1949                         REG_GET_FIELD(
1950                                         adev->gfx.config.gb_addr_config,
1951                                         GB_ADDR_CONFIG,
1952                                         MAX_COMPRESSED_FRAGS);
1953         adev->gfx.config.gb_addr_config_fields.num_rb_per_se = 1 <<
1954                         REG_GET_FIELD(
1955                                         adev->gfx.config.gb_addr_config,
1956                                         GB_ADDR_CONFIG,
1957                                         NUM_RB_PER_SE);
1958         adev->gfx.config.gb_addr_config_fields.num_se = 1 <<
1959                         REG_GET_FIELD(
1960                                         adev->gfx.config.gb_addr_config,
1961                                         GB_ADDR_CONFIG,
1962                                         NUM_SHADER_ENGINES);
1963         adev->gfx.config.gb_addr_config_fields.pipe_interleave_size = 1 << (8 +
1964                         REG_GET_FIELD(
1965                                         adev->gfx.config.gb_addr_config,
1966                                         GB_ADDR_CONFIG,
1967                                         PIPE_INTERLEAVE_SIZE));
1968
1969         return 0;
1970 }
1971
1972 static int gfx_v9_0_ngg_create_buf(struct amdgpu_device *adev,
1973                                    struct amdgpu_ngg_buf *ngg_buf,
1974                                    int size_se,
1975                                    int default_size_se)
1976 {
1977         int r;
1978
1979         if (size_se < 0) {
1980                 dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
1981                 return -EINVAL;
1982         }
1983         size_se = size_se ? size_se : default_size_se;
1984
1985         ngg_buf->size = size_se * adev->gfx.config.max_shader_engines;
1986         r = amdgpu_bo_create_kernel(adev, ngg_buf->size,
1987                                     PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM,
1988                                     &ngg_buf->bo,
1989                                     &ngg_buf->gpu_addr,
1990                                     NULL);
1991         if (r) {
1992                 dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
1993                 return r;
1994         }
1995         ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
1996
1997         return r;
1998 }
1999
2000 static int gfx_v9_0_ngg_fini(struct amdgpu_device *adev)
2001 {
2002         int i;
2003
2004         for (i = 0; i < NGG_BUF_MAX; i++)
2005                 amdgpu_bo_free_kernel(&adev->gfx.ngg.buf[i].bo,
2006                                       &adev->gfx.ngg.buf[i].gpu_addr,
2007                                       NULL);
2008
2009         memset(&adev->gfx.ngg.buf[0], 0,
2010                         sizeof(struct amdgpu_ngg_buf) * NGG_BUF_MAX);
2011
2012         adev->gfx.ngg.init = false;
2013
2014         return 0;
2015 }
2016
2017 static int gfx_v9_0_ngg_init(struct amdgpu_device *adev)
2018 {
2019         int r;
2020
2021         if (!amdgpu_ngg || adev->gfx.ngg.init == true)
2022                 return 0;
2023
2024         /* GDS reserve memory: 64 bytes alignment */
2025         adev->gfx.ngg.gds_reserve_size = ALIGN(5 * 4, 0x40);
2026         adev->gds.gds_size -= adev->gfx.ngg.gds_reserve_size;
2027         adev->gfx.ngg.gds_reserve_addr = RREG32_SOC15(GC, 0, mmGDS_VMID0_BASE);
2028         adev->gfx.ngg.gds_reserve_addr += RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE);
2029
2030         /* Primitive Buffer */
2031         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PRIM],
2032                                     amdgpu_prim_buf_per_se,
2033                                     64 * 1024);
2034         if (r) {
2035                 dev_err(adev->dev, "Failed to create Primitive Buffer\n");
2036                 goto err;
2037         }
2038
2039         /* Position Buffer */
2040         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_POS],
2041                                     amdgpu_pos_buf_per_se,
2042                                     256 * 1024);
2043         if (r) {
2044                 dev_err(adev->dev, "Failed to create Position Buffer\n");
2045                 goto err;
2046         }
2047
2048         /* Control Sideband */
2049         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_CNTL],
2050                                     amdgpu_cntl_sb_buf_per_se,
2051                                     256);
2052         if (r) {
2053                 dev_err(adev->dev, "Failed to create Control Sideband Buffer\n");
2054                 goto err;
2055         }
2056
2057         /* Parameter Cache, not created by default */
2058         if (amdgpu_param_buf_per_se <= 0)
2059                 goto out;
2060
2061         r = gfx_v9_0_ngg_create_buf(adev, &adev->gfx.ngg.buf[NGG_PARAM],
2062                                     amdgpu_param_buf_per_se,
2063                                     512 * 1024);
2064         if (r) {
2065                 dev_err(adev->dev, "Failed to create Parameter Cache\n");
2066                 goto err;
2067         }
2068
2069 out:
2070         adev->gfx.ngg.init = true;
2071         return 0;
2072 err:
2073         gfx_v9_0_ngg_fini(adev);
2074         return r;
2075 }
2076
2077 static int gfx_v9_0_ngg_en(struct amdgpu_device *adev)
2078 {
2079         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2080         int r;
2081         u32 data, base;
2082
2083         if (!amdgpu_ngg)
2084                 return 0;
2085
2086         /* Program buffer size */
2087         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_1, INDEX_BUF_SIZE,
2088                              adev->gfx.ngg.buf[NGG_PRIM].size >> 8);
2089         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_1, POS_BUF_SIZE,
2090                              adev->gfx.ngg.buf[NGG_POS].size >> 8);
2091         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_1, data);
2092
2093         data = REG_SET_FIELD(0, WD_BUF_RESOURCE_2, CNTL_SB_BUF_SIZE,
2094                              adev->gfx.ngg.buf[NGG_CNTL].size >> 8);
2095         data = REG_SET_FIELD(data, WD_BUF_RESOURCE_2, PARAM_BUF_SIZE,
2096                              adev->gfx.ngg.buf[NGG_PARAM].size >> 10);
2097         WREG32_SOC15(GC, 0, mmWD_BUF_RESOURCE_2, data);
2098
2099         /* Program buffer base address */
2100         base = lower_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2101         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE, BASE, base);
2102         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE, data);
2103
2104         base = upper_32_bits(adev->gfx.ngg.buf[NGG_PRIM].gpu_addr);
2105         data = REG_SET_FIELD(0, WD_INDEX_BUF_BASE_HI, BASE_HI, base);
2106         WREG32_SOC15(GC, 0, mmWD_INDEX_BUF_BASE_HI, data);
2107
2108         base = lower_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2109         data = REG_SET_FIELD(0, WD_POS_BUF_BASE, BASE, base);
2110         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE, data);
2111
2112         base = upper_32_bits(adev->gfx.ngg.buf[NGG_POS].gpu_addr);
2113         data = REG_SET_FIELD(0, WD_POS_BUF_BASE_HI, BASE_HI, base);
2114         WREG32_SOC15(GC, 0, mmWD_POS_BUF_BASE_HI, data);
2115
2116         base = lower_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2117         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE, BASE, base);
2118         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE, data);
2119
2120         base = upper_32_bits(adev->gfx.ngg.buf[NGG_CNTL].gpu_addr);
2121         data = REG_SET_FIELD(0, WD_CNTL_SB_BUF_BASE_HI, BASE_HI, base);
2122         WREG32_SOC15(GC, 0, mmWD_CNTL_SB_BUF_BASE_HI, data);
2123
2124         /* Clear GDS reserved memory */
2125         r = amdgpu_ring_alloc(ring, 17);
2126         if (r) {
2127                 DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n",
2128                           ring->name, r);
2129                 return r;
2130         }
2131
2132         gfx_v9_0_write_data_to_reg(ring, 0, false,
2133                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE),
2134                                    (adev->gds.gds_size +
2135                                     adev->gfx.ngg.gds_reserve_size));
2136
2137         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
2138         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
2139                                 PACKET3_DMA_DATA_DST_SEL(1) |
2140                                 PACKET3_DMA_DATA_SRC_SEL(2)));
2141         amdgpu_ring_write(ring, 0);
2142         amdgpu_ring_write(ring, 0);
2143         amdgpu_ring_write(ring, adev->gfx.ngg.gds_reserve_addr);
2144         amdgpu_ring_write(ring, 0);
2145         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
2146                                 adev->gfx.ngg.gds_reserve_size);
2147
2148         gfx_v9_0_write_data_to_reg(ring, 0, false,
2149                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), 0);
2150
2151         amdgpu_ring_commit(ring);
2152
2153         return 0;
2154 }
2155
2156 static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id,
2157                                       int mec, int pipe, int queue)
2158 {
2159         int r;
2160         unsigned irq_type;
2161         struct amdgpu_ring *ring = &adev->gfx.compute_ring[ring_id];
2162
2163         ring = &adev->gfx.compute_ring[ring_id];
2164
2165         /* mec0 is me1 */
2166         ring->me = mec + 1;
2167         ring->pipe = pipe;
2168         ring->queue = queue;
2169
2170         ring->ring_obj = NULL;
2171         ring->use_doorbell = true;
2172         ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1;
2173         ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr
2174                                 + (ring_id * GFX9_MEC_HPD_SIZE);
2175         sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
2176
2177         irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP
2178                 + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec)
2179                 + ring->pipe;
2180
2181         /* type-2 packets are deprecated on MEC, use type-3 instead */
2182         r = amdgpu_ring_init(adev, ring, 1024,
2183                              &adev->gfx.eop_irq, irq_type);
2184         if (r)
2185                 return r;
2186
2187
2188         return 0;
2189 }
2190
2191 static int gfx_v9_0_sw_init(void *handle)
2192 {
2193         int i, j, k, r, ring_id;
2194         struct amdgpu_ring *ring;
2195         struct amdgpu_kiq *kiq;
2196         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2197
2198         switch (adev->asic_type) {
2199         case CHIP_VEGA10:
2200         case CHIP_VEGA12:
2201         case CHIP_VEGA20:
2202         case CHIP_RAVEN:
2203         case CHIP_ARCTURUS:
2204         case CHIP_RENOIR:
2205                 adev->gfx.mec.num_mec = 2;
2206                 break;
2207         default:
2208                 adev->gfx.mec.num_mec = 1;
2209                 break;
2210         }
2211
2212         adev->gfx.mec.num_pipe_per_mec = 4;
2213         adev->gfx.mec.num_queue_per_pipe = 8;
2214
2215         /* EOP Event */
2216         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq);
2217         if (r)
2218                 return r;
2219
2220         /* Privileged reg */
2221         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_REG_FAULT,
2222                               &adev->gfx.priv_reg_irq);
2223         if (r)
2224                 return r;
2225
2226         /* Privileged inst */
2227         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_PRIV_INSTR_FAULT,
2228                               &adev->gfx.priv_inst_irq);
2229         if (r)
2230                 return r;
2231
2232         /* ECC error */
2233         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_ECC_ERROR,
2234                               &adev->gfx.cp_ecc_error_irq);
2235         if (r)
2236                 return r;
2237
2238         /* FUE error */
2239         r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_FUE_ERROR,
2240                               &adev->gfx.cp_ecc_error_irq);
2241         if (r)
2242                 return r;
2243
2244         adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2245
2246         gfx_v9_0_scratch_init(adev);
2247
2248         r = gfx_v9_0_init_microcode(adev);
2249         if (r) {
2250                 DRM_ERROR("Failed to load gfx firmware!\n");
2251                 return r;
2252         }
2253
2254         r = adev->gfx.rlc.funcs->init(adev);
2255         if (r) {
2256                 DRM_ERROR("Failed to init rlc BOs!\n");
2257                 return r;
2258         }
2259
2260         r = gfx_v9_0_mec_init(adev);
2261         if (r) {
2262                 DRM_ERROR("Failed to init MEC BOs!\n");
2263                 return r;
2264         }
2265
2266         /* set up the gfx ring */
2267         for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2268                 ring = &adev->gfx.gfx_ring[i];
2269                 ring->ring_obj = NULL;
2270                 if (!i)
2271                         sprintf(ring->name, "gfx");
2272                 else
2273                         sprintf(ring->name, "gfx_%d", i);
2274                 ring->use_doorbell = true;
2275                 ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
2276                 r = amdgpu_ring_init(adev, ring, 1024,
2277                                      &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP);
2278                 if (r)
2279                         return r;
2280         }
2281
2282         /* set up the compute queues - allocate horizontally across pipes */
2283         ring_id = 0;
2284         for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
2285                 for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
2286                         for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
2287                                 if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j))
2288                                         continue;
2289
2290                                 r = gfx_v9_0_compute_ring_init(adev,
2291                                                                ring_id,
2292                                                                i, k, j);
2293                                 if (r)
2294                                         return r;
2295
2296                                 ring_id++;
2297                         }
2298                 }
2299         }
2300
2301         r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE);
2302         if (r) {
2303                 DRM_ERROR("Failed to init KIQ BOs!\n");
2304                 return r;
2305         }
2306
2307         kiq = &adev->gfx.kiq;
2308         r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2309         if (r)
2310                 return r;
2311
2312         /* create MQD for all compute queues as wel as KIQ for SRIOV case */
2313         r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation));
2314         if (r)
2315                 return r;
2316
2317         adev->gfx.ce_ram_size = 0x8000;
2318
2319         r = gfx_v9_0_gpu_early_init(adev);
2320         if (r)
2321                 return r;
2322
2323         r = gfx_v9_0_ngg_init(adev);
2324         if (r)
2325                 return r;
2326
2327         return 0;
2328 }
2329
2330
2331 static int gfx_v9_0_sw_fini(void *handle)
2332 {
2333         int i;
2334         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2335
2336         if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) &&
2337                         adev->gfx.ras_if) {
2338                 struct ras_common_if *ras_if = adev->gfx.ras_if;
2339                 struct ras_ih_if ih_info = {
2340                         .head = *ras_if,
2341                 };
2342
2343                 amdgpu_ras_debugfs_remove(adev, ras_if);
2344                 amdgpu_ras_sysfs_remove(adev, ras_if);
2345                 amdgpu_ras_interrupt_remove_handler(adev,  &ih_info);
2346                 amdgpu_ras_feature_enable(adev, ras_if, 0);
2347                 kfree(ras_if);
2348         }
2349
2350         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2351                 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2352         for (i = 0; i < adev->gfx.num_compute_rings; i++)
2353                 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2354
2355         amdgpu_gfx_mqd_sw_fini(adev);
2356         amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2357         amdgpu_gfx_kiq_fini(adev);
2358
2359         gfx_v9_0_mec_fini(adev);
2360         gfx_v9_0_ngg_fini(adev);
2361         amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
2362         if (adev->asic_type == CHIP_RAVEN || adev->asic_type == CHIP_RENOIR) {
2363                 amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj,
2364                                 &adev->gfx.rlc.cp_table_gpu_addr,
2365                                 (void **)&adev->gfx.rlc.cp_table_ptr);
2366         }
2367         gfx_v9_0_free_microcode(adev);
2368
2369         return 0;
2370 }
2371
2372
2373 static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev)
2374 {
2375         /* TODO */
2376 }
2377
2378 static void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance)
2379 {
2380         u32 data;
2381
2382         if (instance == 0xffffffff)
2383                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2384         else
2385                 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
2386
2387         if (se_num == 0xffffffff)
2388                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2389         else
2390                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2391
2392         if (sh_num == 0xffffffff)
2393                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2394         else
2395                 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2396
2397         WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
2398 }
2399
2400 static u32 gfx_v9_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2401 {
2402         u32 data, mask;
2403
2404         data = RREG32_SOC15(GC, 0, mmCC_RB_BACKEND_DISABLE);
2405         data |= RREG32_SOC15(GC, 0, mmGC_USER_RB_BACKEND_DISABLE);
2406
2407         data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2408         data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2409
2410         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_backends_per_se /
2411                                          adev->gfx.config.max_sh_per_se);
2412
2413         return (~data) & mask;
2414 }
2415
2416 static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
2417 {
2418         int i, j;
2419         u32 data;
2420         u32 active_rbs = 0;
2421         u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2422                                         adev->gfx.config.max_sh_per_se;
2423
2424         mutex_lock(&adev->grbm_idx_mutex);
2425         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2426                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2427                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2428                         data = gfx_v9_0_get_rb_active_bitmap(adev);
2429                         active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2430                                                rb_bitmap_width_per_sh);
2431                 }
2432         }
2433         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2434         mutex_unlock(&adev->grbm_idx_mutex);
2435
2436         adev->gfx.config.backend_enable_mask = active_rbs;
2437         adev->gfx.config.num_rbs = hweight32(active_rbs);
2438 }
2439
2440 #define DEFAULT_SH_MEM_BASES    (0x6000)
2441 #define FIRST_COMPUTE_VMID      (8)
2442 #define LAST_COMPUTE_VMID       (16)
2443 static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev)
2444 {
2445         int i;
2446         uint32_t sh_mem_config;
2447         uint32_t sh_mem_bases;
2448
2449         /*
2450          * Configure apertures:
2451          * LDS:         0x60000000'00000000 - 0x60000001'00000000 (4GB)
2452          * Scratch:     0x60000001'00000000 - 0x60000002'00000000 (4GB)
2453          * GPUVM:       0x60010000'00000000 - 0x60020000'00000000 (1TB)
2454          */
2455         sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2456
2457         sh_mem_config = SH_MEM_ADDRESS_MODE_64 |
2458                         SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2459                         SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
2460
2461         mutex_lock(&adev->srbm_mutex);
2462         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2463                 soc15_grbm_select(adev, 0, 0, 0, i);
2464                 /* CP and shaders */
2465                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config);
2466                 WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases);
2467         }
2468         soc15_grbm_select(adev, 0, 0, 0, 0);
2469         mutex_unlock(&adev->srbm_mutex);
2470
2471         /* Initialize all compute VMIDs to have no GDS, GWS, or OA
2472            acccess. These should be enabled by FW for target VMIDs. */
2473         for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2474                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
2475                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
2476                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
2477                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
2478         }
2479 }
2480
2481 static void gfx_v9_0_init_gds_vmid(struct amdgpu_device *adev)
2482 {
2483         int vmid;
2484
2485         /*
2486          * Initialize all compute and user-gfx VMIDs to have no GDS, GWS, or OA
2487          * access. Compute VMIDs should be enabled by FW for target VMIDs,
2488          * the driver can enable them for graphics. VMID0 should maintain
2489          * access so that HWS firmware can save/restore entries.
2490          */
2491         for (vmid = 1; vmid < 16; vmid++) {
2492                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * vmid, 0);
2493                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * vmid, 0);
2494                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, vmid, 0);
2495                 WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, vmid, 0);
2496         }
2497 }
2498
2499 static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
2500 {
2501         u32 tmp;
2502         int i;
2503
2504         WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
2505
2506         gfx_v9_0_tiling_mode_table_init(adev);
2507
2508         gfx_v9_0_setup_rb(adev);
2509         gfx_v9_0_get_cu_info(adev, &adev->gfx.cu_info);
2510         adev->gfx.config.db_debug2 = RREG32_SOC15(GC, 0, mmDB_DEBUG2);
2511
2512         /* XXX SH_MEM regs */
2513         /* where to put LDS, scratch, GPUVM in FSA64 space */
2514         mutex_lock(&adev->srbm_mutex);
2515         for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) {
2516                 soc15_grbm_select(adev, 0, 0, 0, i);
2517                 /* CP and shaders */
2518                 if (i == 0) {
2519                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2520                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2521                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2522                                             !!amdgpu_noretry);
2523                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2524                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, 0);
2525                 } else {
2526                         tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
2527                                             SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2528                         tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, RETRY_DISABLE,
2529                                             !!amdgpu_noretry);
2530                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, tmp);
2531                         tmp = REG_SET_FIELD(0, SH_MEM_BASES, PRIVATE_BASE,
2532                                 (adev->gmc.private_aperture_start >> 48));
2533                         tmp = REG_SET_FIELD(tmp, SH_MEM_BASES, SHARED_BASE,
2534                                 (adev->gmc.shared_aperture_start >> 48));
2535                         WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp);
2536                 }
2537         }
2538         soc15_grbm_select(adev, 0, 0, 0, 0);
2539
2540         mutex_unlock(&adev->srbm_mutex);
2541
2542         gfx_v9_0_init_compute_vmid(adev);
2543         gfx_v9_0_init_gds_vmid(adev);
2544 }
2545
2546 static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2547 {
2548         u32 i, j, k;
2549         u32 mask;
2550
2551         mutex_lock(&adev->grbm_idx_mutex);
2552         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2553                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2554                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
2555                         for (k = 0; k < adev->usec_timeout; k++) {
2556                                 if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2557                                         break;
2558                                 udelay(1);
2559                         }
2560                         if (k == adev->usec_timeout) {
2561                                 gfx_v9_0_select_se_sh(adev, 0xffffffff,
2562                                                       0xffffffff, 0xffffffff);
2563                                 mutex_unlock(&adev->grbm_idx_mutex);
2564                                 DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
2565                                          i, j);
2566                                 return;
2567                         }
2568                 }
2569         }
2570         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
2571         mutex_unlock(&adev->grbm_idx_mutex);
2572
2573         mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2574                 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2575                 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2576                 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2577         for (k = 0; k < adev->usec_timeout; k++) {
2578                 if ((RREG32_SOC15(GC, 0, mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2579                         break;
2580                 udelay(1);
2581         }
2582 }
2583
2584 static void gfx_v9_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2585                                                bool enable)
2586 {
2587         u32 tmp = RREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0);
2588
2589         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2590         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2591         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2592         tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2593
2594         WREG32_SOC15(GC, 0, mmCP_INT_CNTL_RING0, tmp);
2595 }
2596
2597 static void gfx_v9_0_init_csb(struct amdgpu_device *adev)
2598 {
2599         /* csib */
2600         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_HI),
2601                         adev->gfx.rlc.clear_state_gpu_addr >> 32);
2602         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_ADDR_LO),
2603                         adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
2604         WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CSIB_LENGTH),
2605                         adev->gfx.rlc.clear_state_size);
2606 }
2607
2608 static void gfx_v9_1_parse_ind_reg_list(int *register_list_format,
2609                                 int indirect_offset,
2610                                 int list_size,
2611                                 int *unique_indirect_regs,
2612                                 int unique_indirect_reg_count,
2613                                 int *indirect_start_offsets,
2614                                 int *indirect_start_offsets_count,
2615                                 int max_start_offsets_count)
2616 {
2617         int idx;
2618
2619         for (; indirect_offset < list_size; indirect_offset++) {
2620                 WARN_ON(*indirect_start_offsets_count >= max_start_offsets_count);
2621                 indirect_start_offsets[*indirect_start_offsets_count] = indirect_offset;
2622                 *indirect_start_offsets_count = *indirect_start_offsets_count + 1;
2623
2624                 while (register_list_format[indirect_offset] != 0xFFFFFFFF) {
2625                         indirect_offset += 2;
2626
2627                         /* look for the matching indice */
2628                         for (idx = 0; idx < unique_indirect_reg_count; idx++) {
2629                                 if (unique_indirect_regs[idx] ==
2630                                         register_list_format[indirect_offset] ||
2631                                         !unique_indirect_regs[idx])
2632                                         break;
2633                         }
2634
2635                         BUG_ON(idx >= unique_indirect_reg_count);
2636
2637                         if (!unique_indirect_regs[idx])
2638                                 unique_indirect_regs[idx] = register_list_format[indirect_offset];
2639
2640                         indirect_offset++;
2641                 }
2642         }
2643 }
2644
2645 static int gfx_v9_1_init_rlc_save_restore_list(struct amdgpu_device *adev)
2646 {
2647         int unique_indirect_regs[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2648         int unique_indirect_reg_count = 0;
2649
2650         int indirect_start_offsets[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
2651         int indirect_start_offsets_count = 0;
2652
2653         int list_size = 0;
2654         int i = 0, j = 0;
2655         u32 tmp = 0;
2656
2657         u32 *register_list_format =
2658                 kmemdup(adev->gfx.rlc.register_list_format,
2659                         adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
2660         if (!register_list_format)
2661                 return -ENOMEM;
2662
2663         /* setup unique_indirect_regs array and indirect_start_offsets array */
2664         unique_indirect_reg_count = ARRAY_SIZE(unique_indirect_regs);
2665         gfx_v9_1_parse_ind_reg_list(register_list_format,
2666                                     adev->gfx.rlc.reg_list_format_direct_reg_list_length,
2667                                     adev->gfx.rlc.reg_list_format_size_bytes >> 2,
2668                                     unique_indirect_regs,
2669                                     unique_indirect_reg_count,
2670                                     indirect_start_offsets,
2671                                     &indirect_start_offsets_count,
2672                                     ARRAY_SIZE(indirect_start_offsets));
2673
2674         /* enable auto inc in case it is disabled */
2675         tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL));
2676         tmp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
2677         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL), tmp);
2678
2679         /* write register_restore table to offset 0x0 using RLC_SRM_ARAM_ADDR/DATA */
2680         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_ADDR),
2681                 RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET);
2682         for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
2683                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_ARAM_DATA),
2684                         adev->gfx.rlc.register_restore[i]);
2685
2686         /* load indirect register */
2687         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2688                 adev->gfx.rlc.reg_list_format_start);
2689
2690         /* direct register portion */
2691         for (i = 0; i < adev->gfx.rlc.reg_list_format_direct_reg_list_length; i++)
2692                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2693                         register_list_format[i]);
2694
2695         /* indirect register portion */
2696         while (i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2)) {
2697                 if (register_list_format[i] == 0xFFFFFFFF) {
2698                         WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2699                         continue;
2700                 }
2701
2702                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2703                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, register_list_format[i++]);
2704
2705                 for (j = 0; j < unique_indirect_reg_count; j++) {
2706                         if (register_list_format[i] == unique_indirect_regs[j]) {
2707                                 WREG32_SOC15(GC, 0, mmRLC_GPM_SCRATCH_DATA, j);
2708                                 break;
2709                         }
2710                 }
2711
2712                 BUG_ON(j >= unique_indirect_reg_count);
2713
2714                 i++;
2715         }
2716
2717         /* set save/restore list size */
2718         list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
2719         list_size = list_size >> 1;
2720         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2721                 adev->gfx.rlc.reg_restore_list_size);
2722         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), list_size);
2723
2724         /* write the starting offsets to RLC scratch ram */
2725         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR),
2726                 adev->gfx.rlc.starting_offsets_start);
2727         for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++)
2728                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA),
2729                        indirect_start_offsets[i]);
2730
2731         /* load unique indirect regs*/
2732         for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) {
2733                 if (unique_indirect_regs[i] != 0) {
2734                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0)
2735                                + GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[i],
2736                                unique_indirect_regs[i] & 0x3FFFF);
2737
2738                         WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0)
2739                                + GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[i],
2740                                unique_indirect_regs[i] >> 20);
2741                 }
2742         }
2743
2744         kfree(register_list_format);
2745         return 0;
2746 }
2747
2748 static void gfx_v9_0_enable_save_restore_machine(struct amdgpu_device *adev)
2749 {
2750         WREG32_FIELD15(GC, 0, RLC_SRM_CNTL, SRM_ENABLE, 1);
2751 }
2752
2753 static void pwr_10_0_gfxip_control_over_cgpg(struct amdgpu_device *adev,
2754                                              bool enable)
2755 {
2756         uint32_t data = 0;
2757         uint32_t default_data = 0;
2758
2759         default_data = data = RREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS));
2760         if (enable == true) {
2761                 /* enable GFXIP control over CGPG */
2762                 data |= PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2763                 if(default_data != data)
2764                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2765
2766                 /* update status */
2767                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS_MASK;
2768                 data |= (2 << PWR_MISC_CNTL_STATUS__PWR_GFXOFF_STATUS__SHIFT);
2769                 if(default_data != data)
2770                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2771         } else {
2772                 /* restore GFXIP control over GCPG */
2773                 data &= ~PWR_MISC_CNTL_STATUS__PWR_GFX_RLC_CGPG_EN_MASK;
2774                 if(default_data != data)
2775                         WREG32(SOC15_REG_OFFSET(PWR, 0, mmPWR_MISC_CNTL_STATUS), data);
2776         }
2777 }
2778
2779 static void gfx_v9_0_init_gfx_power_gating(struct amdgpu_device *adev)
2780 {
2781         uint32_t data = 0;
2782
2783         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2784                               AMD_PG_SUPPORT_GFX_SMG |
2785                               AMD_PG_SUPPORT_GFX_DMG)) {
2786                 /* init IDLE_POLL_COUNT = 60 */
2787                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL));
2788                 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
2789                 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
2790                 WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_POLL_CNTL), data);
2791
2792                 /* init RLC PG Delay */
2793                 data = 0;
2794                 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
2795                 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
2796                 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
2797                 data |= (0x40 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
2798                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY), data);
2799
2800                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2));
2801                 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
2802                 data |= (0x4 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
2803                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_2), data);
2804
2805                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3));
2806                 data &= ~RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK;
2807                 data |= (0xff << RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT);
2808                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_DELAY_3), data);
2809
2810                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL));
2811                 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
2812
2813                 /* program GRBM_REG_SAVE_GFX_IDLE_THRESHOLD to 0x55f0 */
2814                 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
2815                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_AUTO_PG_CTRL), data);
2816
2817                 pwr_10_0_gfxip_control_over_cgpg(adev, true);
2818         }
2819 }
2820
2821 static void gfx_v9_0_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
2822                                                 bool enable)
2823 {
2824         uint32_t data = 0;
2825         uint32_t default_data = 0;
2826
2827         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2828         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2829                              SMU_CLK_SLOWDOWN_ON_PU_ENABLE,
2830                              enable ? 1 : 0);
2831         if (default_data != data)
2832                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2833 }
2834
2835 static void gfx_v9_0_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
2836                                                 bool enable)
2837 {
2838         uint32_t data = 0;
2839         uint32_t default_data = 0;
2840
2841         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2842         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2843                              SMU_CLK_SLOWDOWN_ON_PD_ENABLE,
2844                              enable ? 1 : 0);
2845         if(default_data != data)
2846                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2847 }
2848
2849 static void gfx_v9_0_enable_cp_power_gating(struct amdgpu_device *adev,
2850                                         bool enable)
2851 {
2852         uint32_t data = 0;
2853         uint32_t default_data = 0;
2854
2855         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2856         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2857                              CP_PG_DISABLE,
2858                              enable ? 0 : 1);
2859         if(default_data != data)
2860                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2861 }
2862
2863 static void gfx_v9_0_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
2864                                                 bool enable)
2865 {
2866         uint32_t data, default_data;
2867
2868         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2869         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2870                              GFX_POWER_GATING_ENABLE,
2871                              enable ? 1 : 0);
2872         if(default_data != data)
2873                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2874 }
2875
2876 static void gfx_v9_0_enable_gfx_pipeline_powergating(struct amdgpu_device *adev,
2877                                                 bool enable)
2878 {
2879         uint32_t data, default_data;
2880
2881         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2882         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2883                              GFX_PIPELINE_PG_ENABLE,
2884                              enable ? 1 : 0);
2885         if(default_data != data)
2886                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2887
2888         if (!enable)
2889                 /* read any GFX register to wake up GFX */
2890                 data = RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_RENDER_CONTROL));
2891 }
2892
2893 static void gfx_v9_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
2894                                                        bool enable)
2895 {
2896         uint32_t data, default_data;
2897
2898         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2899         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2900                              STATIC_PER_CU_PG_ENABLE,
2901                              enable ? 1 : 0);
2902         if(default_data != data)
2903                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2904 }
2905
2906 static void gfx_v9_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
2907                                                 bool enable)
2908 {
2909         uint32_t data, default_data;
2910
2911         default_data = data = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL));
2912         data = REG_SET_FIELD(data, RLC_PG_CNTL,
2913                              DYN_PER_CU_PG_ENABLE,
2914                              enable ? 1 : 0);
2915         if(default_data != data)
2916                 WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_CNTL), data);
2917 }
2918
2919 static void gfx_v9_0_init_pg(struct amdgpu_device *adev)
2920 {
2921         gfx_v9_0_init_csb(adev);
2922
2923         /*
2924          * Rlc save restore list is workable since v2_1.
2925          * And it's needed by gfxoff feature.
2926          */
2927         if (adev->gfx.rlc.is_rlc_v2_1) {
2928                 gfx_v9_1_init_rlc_save_restore_list(adev);
2929                 gfx_v9_0_enable_save_restore_machine(adev);
2930         }
2931
2932         if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
2933                               AMD_PG_SUPPORT_GFX_SMG |
2934                               AMD_PG_SUPPORT_GFX_DMG |
2935                               AMD_PG_SUPPORT_CP |
2936                               AMD_PG_SUPPORT_GDS |
2937                               AMD_PG_SUPPORT_RLC_SMU_HS)) {
2938                 WREG32(mmRLC_JUMP_TABLE_RESTORE,
2939                        adev->gfx.rlc.cp_table_gpu_addr >> 8);
2940                 gfx_v9_0_init_gfx_power_gating(adev);
2941         }
2942 }
2943
2944 void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
2945 {
2946         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 0);
2947         gfx_v9_0_enable_gui_idle_interrupt(adev, false);
2948         gfx_v9_0_wait_for_rlc_serdes(adev);
2949 }
2950
2951 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
2952 {
2953         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2954         udelay(50);
2955         WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2956         udelay(50);
2957 }
2958
2959 static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
2960 {
2961 #ifdef AMDGPU_RLC_DEBUG_RETRY
2962         u32 rlc_ucode_ver;
2963 #endif
2964
2965         WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
2966         udelay(50);
2967
2968         /* carrizo do enable cp interrupt after cp inited */
2969         if (!(adev->flags & AMD_IS_APU)) {
2970                 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2971                 udelay(50);
2972         }
2973
2974 #ifdef AMDGPU_RLC_DEBUG_RETRY
2975         /* RLC_GPM_GENERAL_6 : RLC Ucode version */
2976         rlc_ucode_ver = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_6);
2977         if(rlc_ucode_ver == 0x108) {
2978                 DRM_INFO("Using rlc debug ucode. mmRLC_GPM_GENERAL_6 ==0x08%x / fw_ver == %i \n",
2979                                 rlc_ucode_ver, adev->gfx.rlc_fw_version);
2980                 /* RLC_GPM_TIMER_INT_3 : Timer interval in RefCLK cycles,
2981                  * default is 0x9C4 to create a 100us interval */
2982                 WREG32_SOC15(GC, 0, mmRLC_GPM_TIMER_INT_3, 0x9C4);
2983                 /* RLC_GPM_GENERAL_12 : Minimum gap between wptr and rptr
2984                  * to disable the page fault retry interrupts, default is
2985                  * 0x100 (256) */
2986                 WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_12, 0x100);
2987         }
2988 #endif
2989 }
2990
2991 static int gfx_v9_0_rlc_load_microcode(struct amdgpu_device *adev)
2992 {
2993         const struct rlc_firmware_header_v2_0 *hdr;
2994         const __le32 *fw_data;
2995         unsigned i, fw_size;
2996
2997         if (!adev->gfx.rlc_fw)
2998                 return -EINVAL;
2999
3000         hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3001         amdgpu_ucode_print_rlc_hdr(&hdr->header);
3002
3003         fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3004                            le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3005         fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3006
3007         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR,
3008                         RLCG_UCODE_LOADING_START_ADDRESS);
3009         for (i = 0; i < fw_size; i++)
3010                 WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3011         WREG32_SOC15(GC, 0, mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3012
3013         return 0;
3014 }
3015
3016 static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev)
3017 {
3018         int r;
3019
3020         if (amdgpu_sriov_vf(adev)) {
3021                 gfx_v9_0_init_csb(adev);
3022                 return 0;
3023         }
3024
3025         adev->gfx.rlc.funcs->stop(adev);
3026
3027         /* disable CG */
3028         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0);
3029
3030         gfx_v9_0_init_pg(adev);
3031
3032         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3033                 /* legacy rlc firmware loading */
3034                 r = gfx_v9_0_rlc_load_microcode(adev);
3035                 if (r)
3036                         return r;
3037         }
3038
3039         switch (adev->asic_type) {
3040         case CHIP_RAVEN:
3041                 if (amdgpu_lbpw == 0)
3042                         gfx_v9_0_enable_lbpw(adev, false);
3043                 else
3044                         gfx_v9_0_enable_lbpw(adev, true);
3045                 break;
3046         case CHIP_VEGA20:
3047                 if (amdgpu_lbpw > 0)
3048                         gfx_v9_0_enable_lbpw(adev, true);
3049                 else
3050                         gfx_v9_0_enable_lbpw(adev, false);
3051                 break;
3052         default:
3053                 break;
3054         }
3055
3056         adev->gfx.rlc.funcs->start(adev);
3057
3058         return 0;
3059 }
3060
3061 static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
3062 {
3063         int i;
3064         u32 tmp = RREG32_SOC15(GC, 0, mmCP_ME_CNTL);
3065
3066         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
3067         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
3068         tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
3069         if (!enable) {
3070                 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
3071                         adev->gfx.gfx_ring[i].sched.ready = false;
3072         }
3073         WREG32_SOC15_RLC(GC, 0, mmCP_ME_CNTL, tmp);
3074         udelay(50);
3075 }
3076
3077 static int gfx_v9_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
3078 {
3079         const struct gfx_firmware_header_v1_0 *pfp_hdr;
3080         const struct gfx_firmware_header_v1_0 *ce_hdr;
3081         const struct gfx_firmware_header_v1_0 *me_hdr;
3082         const __le32 *fw_data;
3083         unsigned i, fw_size;
3084
3085         if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
3086                 return -EINVAL;
3087
3088         pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
3089                 adev->gfx.pfp_fw->data;
3090         ce_hdr = (const struct gfx_firmware_header_v1_0 *)
3091                 adev->gfx.ce_fw->data;
3092         me_hdr = (const struct gfx_firmware_header_v1_0 *)
3093                 adev->gfx.me_fw->data;
3094
3095         amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
3096         amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
3097         amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
3098
3099         gfx_v9_0_cp_gfx_enable(adev, false);
3100
3101         /* PFP */
3102         fw_data = (const __le32 *)
3103                 (adev->gfx.pfp_fw->data +
3104                  le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3105         fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3106         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, 0);
3107         for (i = 0; i < fw_size; i++)
3108                 WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3109         WREG32_SOC15(GC, 0, mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
3110
3111         /* CE */
3112         fw_data = (const __le32 *)
3113                 (adev->gfx.ce_fw->data +
3114                  le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3115         fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3116         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, 0);
3117         for (i = 0; i < fw_size; i++)
3118                 WREG32_SOC15(GC, 0, mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3119         WREG32_SOC15(GC, 0, mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
3120
3121         /* ME */
3122         fw_data = (const __le32 *)
3123                 (adev->gfx.me_fw->data +
3124                  le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3125         fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3126         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, 0);
3127         for (i = 0; i < fw_size; i++)
3128                 WREG32_SOC15(GC, 0, mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3129         WREG32_SOC15(GC, 0, mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
3130
3131         return 0;
3132 }
3133
3134 static int gfx_v9_0_cp_gfx_start(struct amdgpu_device *adev)
3135 {
3136         struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
3137         const struct cs_section_def *sect = NULL;
3138         const struct cs_extent_def *ext = NULL;
3139         int r, i, tmp;
3140
3141         /* init the CP */
3142         WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3143         WREG32_SOC15(GC, 0, mmCP_DEVICE_ID, 1);
3144
3145         gfx_v9_0_cp_gfx_enable(adev, true);
3146
3147         r = amdgpu_ring_alloc(ring, gfx_v9_0_get_csb_size(adev) + 4 + 3);
3148         if (r) {
3149                 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3150                 return r;
3151         }
3152
3153         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3154         amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3155
3156         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3157         amdgpu_ring_write(ring, 0x80000000);
3158         amdgpu_ring_write(ring, 0x80000000);
3159
3160         for (sect = gfx9_cs_data; sect->section != NULL; ++sect) {
3161                 for (ext = sect->section; ext->extent != NULL; ++ext) {
3162                         if (sect->id == SECT_CONTEXT) {
3163                                 amdgpu_ring_write(ring,
3164                                        PACKET3(PACKET3_SET_CONTEXT_REG,
3165                                                ext->reg_count));
3166                                 amdgpu_ring_write(ring,
3167                                        ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3168                                 for (i = 0; i < ext->reg_count; i++)
3169                                         amdgpu_ring_write(ring, ext->extent[i]);
3170                         }
3171                 }
3172         }
3173
3174         amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3175         amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3176
3177         amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3178         amdgpu_ring_write(ring, 0);
3179
3180         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3181         amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3182         amdgpu_ring_write(ring, 0x8000);
3183         amdgpu_ring_write(ring, 0x8000);
3184
3185         amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG,1));
3186         tmp = (PACKET3_SET_UCONFIG_REG_INDEX_TYPE |
3187                 (SOC15_REG_OFFSET(GC, 0, mmVGT_INDEX_TYPE) - PACKET3_SET_UCONFIG_REG_START));
3188         amdgpu_ring_write(ring, tmp);
3189         amdgpu_ring_write(ring, 0);
3190
3191         amdgpu_ring_commit(ring);
3192
3193         return 0;
3194 }
3195
3196 static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev)
3197 {
3198         struct amdgpu_ring *ring;
3199         u32 tmp;
3200         u32 rb_bufsz;
3201         u64 rb_addr, rptr_addr, wptr_gpu_addr;
3202
3203         /* Set the write pointer delay */
3204         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_DELAY, 0);
3205
3206         /* set the RB to use vmid 0 */
3207         WREG32_SOC15(GC, 0, mmCP_RB_VMID, 0);
3208
3209         /* Set ring buffer size */
3210         ring = &adev->gfx.gfx_ring[0];
3211         rb_bufsz = order_base_2(ring->ring_size / 8);
3212         tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3213         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3214 #ifdef __BIG_ENDIAN
3215         tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3216 #endif
3217         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3218
3219         /* Initialize the ring buffer's write pointers */
3220         ring->wptr = 0;
3221         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
3222         WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
3223
3224         /* set the wb address wether it's enabled or not */
3225         rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3226         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3227         WREG32_SOC15(GC, 0, mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & CP_RB_RPTR_ADDR_HI__RB_RPTR_ADDR_HI_MASK);
3228
3229         wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3230         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_LO, lower_32_bits(wptr_gpu_addr));
3231         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_ADDR_HI, upper_32_bits(wptr_gpu_addr));
3232
3233         mdelay(1);
3234         WREG32_SOC15(GC, 0, mmCP_RB0_CNTL, tmp);
3235
3236         rb_addr = ring->gpu_addr >> 8;
3237         WREG32_SOC15(GC, 0, mmCP_RB0_BASE, rb_addr);
3238         WREG32_SOC15(GC, 0, mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3239
3240         tmp = RREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL);
3241         if (ring->use_doorbell) {
3242                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3243                                     DOORBELL_OFFSET, ring->doorbell_index);
3244                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3245                                     DOORBELL_EN, 1);
3246         } else {
3247                 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_EN, 0);
3248         }
3249         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_CONTROL, tmp);
3250
3251         tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3252                         DOORBELL_RANGE_LOWER, ring->doorbell_index);
3253         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3254
3255         WREG32_SOC15(GC, 0, mmCP_RB_DOORBELL_RANGE_UPPER,
3256                        CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3257
3258
3259         /* start the ring */
3260         gfx_v9_0_cp_gfx_start(adev);
3261         ring->sched.ready = true;
3262
3263         return 0;
3264 }
3265
3266 static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3267 {
3268         int i;
3269
3270         if (enable) {
3271                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, 0);
3272         } else {
3273                 WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL,
3274                         (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3275                 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3276                         adev->gfx.compute_ring[i].sched.ready = false;
3277                 adev->gfx.kiq.ring.sched.ready = false;
3278         }
3279         udelay(50);
3280 }
3281
3282 static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3283 {
3284         const struct gfx_firmware_header_v1_0 *mec_hdr;
3285         const __le32 *fw_data;
3286         unsigned i;
3287         u32 tmp;
3288
3289         if (!adev->gfx.mec_fw)
3290                 return -EINVAL;
3291
3292         gfx_v9_0_cp_compute_enable(adev, false);
3293
3294         mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3295         amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3296
3297         fw_data = (const __le32 *)
3298                 (adev->gfx.mec_fw->data +
3299                  le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3300         tmp = 0;
3301         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, VMID, 0);
3302         tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0);
3303         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL, tmp);
3304
3305         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_LO,
3306                 adev->gfx.mec.mec_fw_gpu_addr & 0xFFFFF000);
3307         WREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_HI,
3308                 upper_32_bits(adev->gfx.mec.mec_fw_gpu_addr));
3309
3310         /* MEC1 */
3311         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3312                          mec_hdr->jt_offset);
3313         for (i = 0; i < mec_hdr->jt_size; i++)
3314                 WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_DATA,
3315                         le32_to_cpup(fw_data + mec_hdr->jt_offset + i));
3316
3317         WREG32_SOC15(GC, 0, mmCP_MEC_ME1_UCODE_ADDR,
3318                         adev->gfx.mec_fw_version);
3319         /* Todo : Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3320
3321         return 0;
3322 }
3323
3324 /* KIQ functions */
3325 static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
3326 {
3327         uint32_t tmp;
3328         struct amdgpu_device *adev = ring->adev;
3329
3330         /* tell RLC which is KIQ queue */
3331         tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS);
3332         tmp &= 0xffffff00;
3333         tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
3334         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3335         tmp |= 0x80;
3336         WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
3337 }
3338
3339 static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
3340 {
3341         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3342         uint64_t queue_mask = 0;
3343         int r, i;
3344
3345         for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
3346                 if (!test_bit(i, adev->gfx.mec.queue_bitmap))
3347                         continue;
3348
3349                 /* This situation may be hit in the future if a new HW
3350                  * generation exposes more than 64 queues. If so, the
3351                  * definition of queue_mask needs updating */
3352                 if (WARN_ON(i >= (sizeof(queue_mask)*8))) {
3353                         DRM_ERROR("Invalid KCQ enabled: %d\n", i);
3354                         break;
3355                 }
3356
3357                 queue_mask |= (1ull << i);
3358         }
3359
3360         r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8);
3361         if (r) {
3362                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3363                 return r;
3364         }
3365
3366         /* set resources */
3367         amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
3368         amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
3369                           PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
3370         amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
3371         amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
3372         amdgpu_ring_write(kiq_ring, 0); /* gws mask lo */
3373         amdgpu_ring_write(kiq_ring, 0); /* gws mask hi */
3374         amdgpu_ring_write(kiq_ring, 0); /* oac mask */
3375         amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
3376         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3377                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3378                 uint64_t mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
3379                 uint64_t wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3380
3381                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
3382                 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
3383                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3384                                   PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
3385                                   PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
3386                                   PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
3387                                   PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
3388                                   PACKET3_MAP_QUEUES_ME((ring->me == 1 ? 0 : 1)) |
3389                                   PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
3390                                   PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
3391                                   PACKET3_MAP_QUEUES_ENGINE_SEL(0) | /* engine_sel: compute */
3392                                   PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
3393                 amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
3394                 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
3395                 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
3396                 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
3397                 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
3398         }
3399
3400         r = amdgpu_ring_test_helper(kiq_ring);
3401         if (r)
3402                 DRM_ERROR("KCQ enable failed\n");
3403
3404         return r;
3405 }
3406
3407 static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
3408 {
3409         struct amdgpu_device *adev = ring->adev;
3410         struct v9_mqd *mqd = ring->mqd_ptr;
3411         uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
3412         uint32_t tmp;
3413
3414         mqd->header = 0xC0310800;
3415         mqd->compute_pipelinestat_enable = 0x00000001;
3416         mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3417         mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3418         mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3419         mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3420         mqd->compute_static_thread_mgmt_se4 = 0xffffffff;
3421         mqd->compute_static_thread_mgmt_se5 = 0xffffffff;
3422         mqd->compute_static_thread_mgmt_se6 = 0xffffffff;
3423         mqd->compute_static_thread_mgmt_se7 = 0xffffffff;
3424         mqd->compute_misc_reserved = 0x00000003;
3425
3426         mqd->dynamic_cu_mask_addr_lo =
3427                 lower_32_bits(ring->mqd_gpu_addr
3428                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3429         mqd->dynamic_cu_mask_addr_hi =
3430                 upper_32_bits(ring->mqd_gpu_addr
3431                               + offsetof(struct v9_mqd_allocation, dynamic_cu_mask));
3432
3433         eop_base_addr = ring->eop_gpu_addr >> 8;
3434         mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
3435         mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
3436
3437         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3438         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
3439         tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3440                         (order_base_2(GFX9_MEC_HPD_SIZE / 4) - 1));
3441
3442         mqd->cp_hqd_eop_control = tmp;
3443
3444         /* enable doorbell? */
3445         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3446
3447         if (ring->use_doorbell) {
3448                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3449                                     DOORBELL_OFFSET, ring->doorbell_index);
3450                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3451                                     DOORBELL_EN, 1);
3452                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3453                                     DOORBELL_SOURCE, 0);
3454                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3455                                     DOORBELL_HIT, 0);
3456         } else {
3457                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3458                                          DOORBELL_EN, 0);
3459         }
3460
3461         mqd->cp_hqd_pq_doorbell_control = tmp;
3462
3463         /* disable the queue if it's active */
3464         ring->wptr = 0;
3465         mqd->cp_hqd_dequeue_request = 0;
3466         mqd->cp_hqd_pq_rptr = 0;
3467         mqd->cp_hqd_pq_wptr_lo = 0;
3468         mqd->cp_hqd_pq_wptr_hi = 0;
3469
3470         /* set the pointer to the MQD */
3471         mqd->cp_mqd_base_addr_lo = ring->mqd_gpu_addr & 0xfffffffc;
3472         mqd->cp_mqd_base_addr_hi = upper_32_bits(ring->mqd_gpu_addr);
3473
3474         /* set MQD vmid to 0 */
3475         tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
3476         tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3477         mqd->cp_mqd_control = tmp;
3478
3479         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3480         hqd_gpu_addr = ring->gpu_addr >> 8;
3481         mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3482         mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3483
3484         /* set up the HQD, this is similar to CP_RB0_CNTL */
3485         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
3486         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3487                             (order_base_2(ring->ring_size / 4) - 1));
3488         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3489                         ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3490 #ifdef __BIG_ENDIAN
3491         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3492 #endif
3493         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3494         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3495         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3496         tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3497         mqd->cp_hqd_pq_control = tmp;
3498
3499         /* set the wb address whether it's enabled or not */
3500         wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3501         mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3502         mqd->cp_hqd_pq_rptr_report_addr_hi =
3503                 upper_32_bits(wb_gpu_addr) & 0xffff;
3504
3505         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3506         wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3507         mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
3508         mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3509
3510         tmp = 0;
3511         /* enable the doorbell if requested */
3512         if (ring->use_doorbell) {
3513                 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
3514                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3515                                 DOORBELL_OFFSET, ring->doorbell_index);
3516
3517                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3518                                          DOORBELL_EN, 1);
3519                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3520                                          DOORBELL_SOURCE, 0);
3521                 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3522                                          DOORBELL_HIT, 0);
3523         }
3524
3525         mqd->cp_hqd_pq_doorbell_control = tmp;
3526
3527         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3528         ring->wptr = 0;
3529         mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR);
3530
3531         /* set the vmid for the queue */
3532         mqd->cp_hqd_vmid = 0;
3533
3534         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
3535         tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3536         mqd->cp_hqd_persistent_state = tmp;
3537
3538         /* set MIN_IB_AVAIL_SIZE */
3539         tmp = RREG32_SOC15(GC, 0, mmCP_HQD_IB_CONTROL);
3540         tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
3541         mqd->cp_hqd_ib_control = tmp;
3542
3543         /* activate the queue */
3544         mqd->cp_hqd_active = 1;
3545
3546         return 0;
3547 }
3548
3549 static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
3550 {
3551         struct amdgpu_device *adev = ring->adev;
3552         struct v9_mqd *mqd = ring->mqd_ptr;
3553         int j;
3554
3555         /* disable wptr polling */
3556         WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3557
3558         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR,
3559                mqd->cp_hqd_eop_base_addr_lo);
3560         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI,
3561                mqd->cp_hqd_eop_base_addr_hi);
3562
3563         /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3564         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_EOP_CONTROL,
3565                mqd->cp_hqd_eop_control);
3566
3567         /* enable doorbell? */
3568         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3569                mqd->cp_hqd_pq_doorbell_control);
3570
3571         /* disable the queue if it's active */
3572         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3573                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3574                 for (j = 0; j < adev->usec_timeout; j++) {
3575                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3576                                 break;
3577                         udelay(1);
3578                 }
3579                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3580                        mqd->cp_hqd_dequeue_request);
3581                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR,
3582                        mqd->cp_hqd_pq_rptr);
3583                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3584                        mqd->cp_hqd_pq_wptr_lo);
3585                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3586                        mqd->cp_hqd_pq_wptr_hi);
3587         }
3588
3589         /* set the pointer to the MQD */
3590         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR,
3591                mqd->cp_mqd_base_addr_lo);
3592         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_BASE_ADDR_HI,
3593                mqd->cp_mqd_base_addr_hi);
3594
3595         /* set MQD vmid to 0 */
3596         WREG32_SOC15_RLC(GC, 0, mmCP_MQD_CONTROL,
3597                mqd->cp_mqd_control);
3598
3599         /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3600         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE,
3601                mqd->cp_hqd_pq_base_lo);
3602         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_BASE_HI,
3603                mqd->cp_hqd_pq_base_hi);
3604
3605         /* set up the HQD, this is similar to CP_RB0_CNTL */
3606         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_CONTROL,
3607                mqd->cp_hqd_pq_control);
3608
3609         /* set the wb address whether it's enabled or not */
3610         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3611                                 mqd->cp_hqd_pq_rptr_report_addr_lo);
3612         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3613                                 mqd->cp_hqd_pq_rptr_report_addr_hi);
3614
3615         /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3616         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
3617                mqd->cp_hqd_pq_wptr_poll_addr_lo);
3618         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3619                mqd->cp_hqd_pq_wptr_poll_addr_hi);
3620
3621         /* enable the doorbell if requested */
3622         if (ring->use_doorbell) {
3623                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
3624                                         (adev->doorbell_index.kiq * 2) << 2);
3625                 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
3626                                         (adev->doorbell_index.userqueue_end * 2) << 2);
3627         }
3628
3629         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
3630                mqd->cp_hqd_pq_doorbell_control);
3631
3632         /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3633         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO,
3634                mqd->cp_hqd_pq_wptr_lo);
3635         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI,
3636                mqd->cp_hqd_pq_wptr_hi);
3637
3638         /* set the vmid for the queue */
3639         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3640
3641         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE,
3642                mqd->cp_hqd_persistent_state);
3643
3644         /* activate the queue */
3645         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE,
3646                mqd->cp_hqd_active);
3647
3648         if (ring->use_doorbell)
3649                 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3650
3651         return 0;
3652 }
3653
3654 static int gfx_v9_0_kiq_fini_register(struct amdgpu_ring *ring)
3655 {
3656         struct amdgpu_device *adev = ring->adev;
3657         int j;
3658
3659         /* disable the queue if it's active */
3660         if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
3661
3662                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
3663
3664                 for (j = 0; j < adev->usec_timeout; j++) {
3665                         if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
3666                                 break;
3667                         udelay(1);
3668                 }
3669
3670                 if (j == AMDGPU_MAX_USEC_TIMEOUT) {
3671                         DRM_DEBUG("KIQ dequeue request failed.\n");
3672
3673                         /* Manual disable if dequeue request times out */
3674                         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_ACTIVE, 0);
3675                 }
3676
3677                 WREG32_SOC15_RLC(GC, 0, mmCP_HQD_DEQUEUE_REQUEST,
3678                       0);
3679         }
3680
3681         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IQ_TIMER, 0);
3682         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_IB_CONTROL, 0);
3683         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PERSISTENT_STATE, 0);
3684         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
3685         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
3686         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_RPTR, 0);
3687         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_HI, 0);
3688         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PQ_WPTR_LO, 0);
3689
3690         return 0;
3691 }
3692
3693 static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
3694 {
3695         struct amdgpu_device *adev = ring->adev;
3696         struct v9_mqd *mqd = ring->mqd_ptr;
3697         int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS;
3698
3699         gfx_v9_0_kiq_setting(ring);
3700
3701         if (adev->in_gpu_reset) { /* for GPU_RESET case */
3702                 /* reset MQD to a clean status */
3703                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3704                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3705
3706                 /* reset ring buffer */
3707                 ring->wptr = 0;
3708                 amdgpu_ring_clear_ring(ring);
3709
3710                 mutex_lock(&adev->srbm_mutex);
3711                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3712                 gfx_v9_0_kiq_init_register(ring);
3713                 soc15_grbm_select(adev, 0, 0, 0, 0);
3714                 mutex_unlock(&adev->srbm_mutex);
3715         } else {
3716                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3717                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3718                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3719                 mutex_lock(&adev->srbm_mutex);
3720                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3721                 gfx_v9_0_mqd_init(ring);
3722                 gfx_v9_0_kiq_init_register(ring);
3723                 soc15_grbm_select(adev, 0, 0, 0, 0);
3724                 mutex_unlock(&adev->srbm_mutex);
3725
3726                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3727                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3728         }
3729
3730         return 0;
3731 }
3732
3733 static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
3734 {
3735         struct amdgpu_device *adev = ring->adev;
3736         struct v9_mqd *mqd = ring->mqd_ptr;
3737         int mqd_idx = ring - &adev->gfx.compute_ring[0];
3738
3739         if (!adev->in_gpu_reset && !adev->in_suspend) {
3740                 memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
3741                 ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
3742                 ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
3743                 mutex_lock(&adev->srbm_mutex);
3744                 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
3745                 gfx_v9_0_mqd_init(ring);
3746                 soc15_grbm_select(adev, 0, 0, 0, 0);
3747                 mutex_unlock(&adev->srbm_mutex);
3748
3749                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3750                         memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation));
3751         } else if (adev->in_gpu_reset) { /* for GPU_RESET case */
3752                 /* reset MQD to a clean status */
3753                 if (adev->gfx.mec.mqd_backup[mqd_idx])
3754                         memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation));
3755
3756                 /* reset ring buffer */
3757                 ring->wptr = 0;
3758                 amdgpu_ring_clear_ring(ring);
3759         } else {
3760                 amdgpu_ring_clear_ring(ring);
3761         }
3762
3763         return 0;
3764 }
3765
3766 static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev)
3767 {
3768         struct amdgpu_ring *ring;
3769         int r;
3770
3771         ring = &adev->gfx.kiq.ring;
3772
3773         r = amdgpu_bo_reserve(ring->mqd_obj, false);
3774         if (unlikely(r != 0))
3775                 return r;
3776
3777         r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3778         if (unlikely(r != 0))
3779                 return r;
3780
3781         gfx_v9_0_kiq_init_queue(ring);
3782         amdgpu_bo_kunmap(ring->mqd_obj);
3783         ring->mqd_ptr = NULL;
3784         amdgpu_bo_unreserve(ring->mqd_obj);
3785         ring->sched.ready = true;
3786         return 0;
3787 }
3788
3789 static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev)
3790 {
3791         struct amdgpu_ring *ring = NULL;
3792         int r = 0, i;
3793
3794         gfx_v9_0_cp_compute_enable(adev, true);
3795
3796         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3797                 ring = &adev->gfx.compute_ring[i];
3798
3799                 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3800                 if (unlikely(r != 0))
3801                         goto done;
3802                 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr);
3803                 if (!r) {
3804                         r = gfx_v9_0_kcq_init_queue(ring);
3805                         amdgpu_bo_kunmap(ring->mqd_obj);
3806                         ring->mqd_ptr = NULL;
3807                 }
3808                 amdgpu_bo_unreserve(ring->mqd_obj);
3809                 if (r)
3810                         goto done;
3811         }
3812
3813         r = gfx_v9_0_kiq_kcq_enable(adev);
3814 done:
3815         return r;
3816 }
3817
3818 static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
3819 {
3820         int r, i;
3821         struct amdgpu_ring *ring;
3822
3823         if (!(adev->flags & AMD_IS_APU))
3824                 gfx_v9_0_enable_gui_idle_interrupt(adev, false);
3825
3826         if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
3827                 if (adev->asic_type != CHIP_ARCTURUS) {
3828                         /* legacy firmware loading */
3829                         r = gfx_v9_0_cp_gfx_load_microcode(adev);
3830                         if (r)
3831                                 return r;
3832                 }
3833
3834                 r = gfx_v9_0_cp_compute_load_microcode(adev);
3835                 if (r)
3836                         return r;
3837         }
3838
3839         r = gfx_v9_0_kiq_resume(adev);
3840         if (r)
3841                 return r;
3842
3843         if (adev->asic_type != CHIP_ARCTURUS) {
3844                 r = gfx_v9_0_cp_gfx_resume(adev);
3845                 if (r)
3846                         return r;
3847         }
3848
3849         r = gfx_v9_0_kcq_resume(adev);
3850         if (r)
3851                 return r;
3852
3853         if (adev->asic_type != CHIP_ARCTURUS) {
3854                 ring = &adev->gfx.gfx_ring[0];
3855                 r = amdgpu_ring_test_helper(ring);
3856                 if (r)
3857                         return r;
3858         }
3859
3860         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3861                 ring = &adev->gfx.compute_ring[i];
3862                 amdgpu_ring_test_helper(ring);
3863         }
3864
3865         gfx_v9_0_enable_gui_idle_interrupt(adev, true);
3866
3867         return 0;
3868 }
3869
3870 static void gfx_v9_0_cp_enable(struct amdgpu_device *adev, bool enable)
3871 {
3872         if (adev->asic_type != CHIP_ARCTURUS)
3873                 gfx_v9_0_cp_gfx_enable(adev, enable);
3874         gfx_v9_0_cp_compute_enable(adev, enable);
3875 }
3876
3877 static int gfx_v9_0_hw_init(void *handle)
3878 {
3879         int r;
3880         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881
3882         if (!amdgpu_sriov_vf(adev))
3883                 gfx_v9_0_init_golden_registers(adev);
3884
3885         gfx_v9_0_constants_init(adev);
3886
3887         r = gfx_v9_0_csb_vram_pin(adev);
3888         if (r)
3889                 return r;
3890
3891         r = adev->gfx.rlc.funcs->resume(adev);
3892         if (r)
3893                 return r;
3894
3895         r = gfx_v9_0_cp_resume(adev);
3896         if (r)
3897                 return r;
3898
3899         if (adev->asic_type != CHIP_ARCTURUS) {
3900                 r = gfx_v9_0_ngg_en(adev);
3901                 if (r)
3902                         return r;
3903         }
3904
3905         return r;
3906 }
3907
3908 static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev)
3909 {
3910         int r, i;
3911         struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
3912
3913         r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings);
3914         if (r)
3915                 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
3916
3917         for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3918                 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3919
3920                 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
3921                 amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
3922                                                 PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */
3923                                                 PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
3924                                                 PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) |
3925                                                 PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
3926                 amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
3927                 amdgpu_ring_write(kiq_ring, 0);
3928                 amdgpu_ring_write(kiq_ring, 0);
3929                 amdgpu_ring_write(kiq_ring, 0);
3930         }
3931         r = amdgpu_ring_test_helper(kiq_ring);
3932         if (r)
3933                 DRM_ERROR("KCQ disable failed\n");
3934
3935         return r;
3936 }
3937
3938 static int gfx_v9_0_hw_fini(void *handle)
3939 {
3940         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3941
3942         amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0);
3943         amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3944         amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3945
3946         /* disable KCQ to avoid CPC touch memory not valid anymore */
3947         gfx_v9_0_kcq_disable(adev);
3948
3949         if (amdgpu_sriov_vf(adev)) {
3950                 gfx_v9_0_cp_gfx_enable(adev, false);
3951                 /* must disable polling for SRIOV when hw finished, otherwise
3952                  * CPC engine may still keep fetching WB address which is already
3953                  * invalid after sw finished and trigger DMAR reading error in
3954                  * hypervisor side.
3955                  */
3956                 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3957                 return 0;
3958         }
3959
3960         /* Use deinitialize sequence from CAIL when unbinding device from driver,
3961          * otherwise KIQ is hanging when binding back
3962          */
3963         if (!adev->in_gpu_reset && !adev->in_suspend) {
3964                 mutex_lock(&adev->srbm_mutex);
3965                 soc15_grbm_select(adev, adev->gfx.kiq.ring.me,
3966                                 adev->gfx.kiq.ring.pipe,
3967                                 adev->gfx.kiq.ring.queue, 0);
3968                 gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring);
3969                 soc15_grbm_select(adev, 0, 0, 0, 0);
3970                 mutex_unlock(&adev->srbm_mutex);
3971         }
3972
3973         gfx_v9_0_cp_enable(adev, false);
3974         adev->gfx.rlc.funcs->stop(adev);
3975
3976         gfx_v9_0_csb_vram_unpin(adev);
3977
3978         return 0;
3979 }
3980
3981 static int gfx_v9_0_suspend(void *handle)
3982 {
3983         return gfx_v9_0_hw_fini(handle);
3984 }
3985
3986 static int gfx_v9_0_resume(void *handle)
3987 {
3988         return gfx_v9_0_hw_init(handle);
3989 }
3990
3991 static bool gfx_v9_0_is_idle(void *handle)
3992 {
3993         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3994
3995         if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS),
3996                                 GRBM_STATUS, GUI_ACTIVE))
3997                 return false;
3998         else
3999                 return true;
4000 }
4001
4002 static int gfx_v9_0_wait_for_idle(void *handle)
4003 {
4004         unsigned i;
4005         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4006
4007         for (i = 0; i < adev->usec_timeout; i++) {
4008                 if (gfx_v9_0_is_idle(handle))
4009                         return 0;
4010                 udelay(1);
4011         }
4012         return -ETIMEDOUT;
4013 }
4014
4015 static int gfx_v9_0_soft_reset(void *handle)
4016 {
4017         u32 grbm_soft_reset = 0;
4018         u32 tmp;
4019         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4020
4021         /* GRBM_STATUS */
4022         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS);
4023         if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4024                    GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4025                    GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4026                    GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4027                    GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4028                    GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4029                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4030                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4031                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4032                                                 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4033         }
4034
4035         if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4036                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4037                                                 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4038         }
4039
4040         /* GRBM_STATUS2 */
4041         tmp = RREG32_SOC15(GC, 0, mmGRBM_STATUS2);
4042         if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4043                 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4044                                                 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4045
4046
4047         if (grbm_soft_reset) {
4048                 /* stop the rlc */
4049                 adev->gfx.rlc.funcs->stop(adev);
4050
4051                 if (adev->asic_type != CHIP_ARCTURUS)
4052                         /* Disable GFX parsing/prefetching */
4053                         gfx_v9_0_cp_gfx_enable(adev, false);
4054
4055                 /* Disable MEC parsing/prefetching */
4056                 gfx_v9_0_cp_compute_enable(adev, false);
4057
4058                 if (grbm_soft_reset) {
4059                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4060                         tmp |= grbm_soft_reset;
4061                         dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4062                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4063                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4064
4065                         udelay(50);
4066
4067                         tmp &= ~grbm_soft_reset;
4068                         WREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET, tmp);
4069                         tmp = RREG32_SOC15(GC, 0, mmGRBM_SOFT_RESET);
4070                 }
4071
4072                 /* Wait a little for things to settle down */
4073                 udelay(50);
4074         }
4075         return 0;
4076 }
4077
4078 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4079 {
4080         uint64_t clock;
4081
4082         mutex_lock(&adev->gfx.gpu_clock_mutex);
4083         WREG32_SOC15(GC, 0, mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4084         clock = (uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_LSB) |
4085                 ((uint64_t)RREG32_SOC15(GC, 0, mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4086         mutex_unlock(&adev->gfx.gpu_clock_mutex);
4087         return clock;
4088 }
4089
4090 static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4091                                           uint32_t vmid,
4092                                           uint32_t gds_base, uint32_t gds_size,
4093                                           uint32_t gws_base, uint32_t gws_size,
4094                                           uint32_t oa_base, uint32_t oa_size)
4095 {
4096         struct amdgpu_device *adev = ring->adev;
4097
4098         /* GDS Base */
4099         gfx_v9_0_write_data_to_reg(ring, 0, false,
4100                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid,
4101                                    gds_base);
4102
4103         /* GDS Size */
4104         gfx_v9_0_write_data_to_reg(ring, 0, false,
4105                                    SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE) + 2 * vmid,
4106                                    gds_size);
4107
4108         /* GWS */
4109         gfx_v9_0_write_data_to_reg(ring, 0, false,
4110                                    SOC15_REG_OFFSET(GC, 0, mmGDS_GWS_VMID0) + vmid,
4111                                    gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4112
4113         /* OA */
4114         gfx_v9_0_write_data_to_reg(ring, 0, false,
4115                                    SOC15_REG_OFFSET(GC, 0, mmGDS_OA_VMID0) + vmid,
4116                                    (1 << (oa_size + oa_base)) - (1 << oa_base));
4117 }
4118
4119 static const u32 vgpr_init_compute_shader[] =
4120 {
4121         0xb07c0000, 0xbe8000ff,
4122         0x000000f8, 0xbf110800,
4123         0x7e000280, 0x7e020280,
4124         0x7e040280, 0x7e060280,
4125         0x7e080280, 0x7e0a0280,
4126         0x7e0c0280, 0x7e0e0280,
4127         0x80808800, 0xbe803200,
4128         0xbf84fff5, 0xbf9c0000,
4129         0xd28c0001, 0x0001007f,
4130         0xd28d0001, 0x0002027e,
4131         0x10020288, 0xb8810904,
4132         0xb7814000, 0xd1196a01,
4133         0x00000301, 0xbe800087,
4134         0xbefc00c1, 0xd89c4000,
4135         0x00020201, 0xd89cc080,
4136         0x00040401, 0x320202ff,
4137         0x00000800, 0x80808100,
4138         0xbf84fff8, 0x7e020280,
4139         0xbf810000, 0x00000000,
4140 };
4141
4142 static const u32 sgpr_init_compute_shader[] =
4143 {
4144         0xb07c0000, 0xbe8000ff,
4145         0x0000005f, 0xbee50080,
4146         0xbe812c65, 0xbe822c65,
4147         0xbe832c65, 0xbe842c65,
4148         0xbe852c65, 0xb77c0005,
4149         0x80808500, 0xbf84fff8,
4150         0xbe800080, 0xbf810000,
4151 };
4152
4153 static const struct soc15_reg_entry vgpr_init_regs[] = {
4154    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4155    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4156    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4157    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4158    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4159    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4160    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4161    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4162    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x100007f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
4163    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x400000 },  /* 64KB LDS */
4164 };
4165
4166 static const struct soc15_reg_entry sgpr_init_regs[] = {
4167    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
4168    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
4169    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
4170    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
4171    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x1000000 }, /* CU_GROUP_COUNT=1 */
4172    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
4173    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
4174    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
4175    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */
4176    { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
4177 };
4178
4179 static const struct soc15_reg_entry sec_ded_counter_registers[] = {
4180    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1, 1},
4181    { SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1, 1},
4182    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1, 1},
4183    { SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1, 1},
4184    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1, 1},
4185    { SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1, 1},
4186    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1, 1},
4187    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1, 1},
4188    { SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1, 1},
4189    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1, 1},
4190    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_GRBM_CNT), 0, 1, 1},
4191    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_DED), 0, 1, 1},
4192    { SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 0, 4, 1},
4193    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 0, 4, 6},
4194    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_DED_CNT), 0, 4, 16},
4195    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_INFO), 0, 4, 16},
4196    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_SEC_CNT), 0, 4, 16},
4197    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 1, 16},
4198    { SOC15_REG_ENTRY(GC, 0, mmTCP_ATC_EDC_GATCL1_CNT), 0, 4, 16},
4199    { SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT), 0, 4, 16},
4200    { SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 0, 4, 16},
4201    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 0, 4, 6},
4202    { SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 0, 4, 16},
4203    { SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 0, 4, 16},
4204    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1, 1},
4205    { SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1, 1},
4206    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 1, 32},
4207    { SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 1, 32},
4208    { SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 1, 72},
4209    { SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 1, 16},
4210    { SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 1, 2},
4211    { SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 0, 4, 6},
4212 };
4213
4214 static int gfx_v9_0_do_edc_gds_workarounds(struct amdgpu_device *adev)
4215 {
4216         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4217         int i, r;
4218
4219         r = amdgpu_ring_alloc(ring, 7);
4220         if (r) {
4221                 DRM_ERROR("amdgpu: GDS workarounds failed to lock ring %s (%d).\n",
4222                         ring->name, r);
4223                 return r;
4224         }
4225
4226         WREG32_SOC15(GC, 0, mmGDS_VMID0_BASE, 0x00000000);
4227         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, adev->gds.gds_size);
4228
4229         amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4230         amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC |
4231                                 PACKET3_DMA_DATA_DST_SEL(1) |
4232                                 PACKET3_DMA_DATA_SRC_SEL(2) |
4233                                 PACKET3_DMA_DATA_ENGINE(0)));
4234         amdgpu_ring_write(ring, 0);
4235         amdgpu_ring_write(ring, 0);
4236         amdgpu_ring_write(ring, 0);
4237         amdgpu_ring_write(ring, 0);
4238         amdgpu_ring_write(ring, PACKET3_DMA_DATA_CMD_RAW_WAIT |
4239                                 adev->gds.gds_size);
4240
4241         amdgpu_ring_commit(ring);
4242
4243         for (i = 0; i < adev->usec_timeout; i++) {
4244                 if (ring->wptr == gfx_v9_0_ring_get_rptr_compute(ring))
4245                         break;
4246                 udelay(1);
4247         }
4248
4249         if (i >= adev->usec_timeout)
4250                 r = -ETIMEDOUT;
4251
4252         WREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE, 0x00000000);
4253
4254         return r;
4255 }
4256
4257 static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
4258 {
4259         struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
4260         struct amdgpu_ib ib;
4261         struct dma_fence *f = NULL;
4262         int r, i, j, k;
4263         unsigned total_size, vgpr_offset, sgpr_offset;
4264         u64 gpu_addr;
4265
4266         /* only support when RAS is enabled */
4267         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
4268                 return 0;
4269
4270         /* bail if the compute ring is not ready */
4271         if (!ring->sched.ready)
4272                 return 0;
4273
4274         total_size =
4275                 ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4276         total_size +=
4277                 ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
4278         total_size = ALIGN(total_size, 256);
4279         vgpr_offset = total_size;
4280         total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
4281         sgpr_offset = total_size;
4282         total_size += sizeof(sgpr_init_compute_shader);
4283
4284         /* allocate an indirect buffer to put the commands in */
4285         memset(&ib, 0, sizeof(ib));
4286         r = amdgpu_ib_get(adev, NULL, total_size, &ib);
4287         if (r) {
4288                 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
4289                 return r;
4290         }
4291
4292         /* load the compute shaders */
4293         for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
4294                 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
4295
4296         for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
4297                 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
4298
4299         /* init the ib length to 0 */
4300         ib.length_dw = 0;
4301
4302         /* VGPR */
4303         /* write the register state for the compute dispatch */
4304         for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i++) {
4305                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4306                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(vgpr_init_regs[i])
4307                                                                 - PACKET3_SET_SH_REG_START;
4308                 ib.ptr[ib.length_dw++] = vgpr_init_regs[i].reg_value;
4309         }
4310         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4311         gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
4312         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4313         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4314                                                         - PACKET3_SET_SH_REG_START;
4315         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4316         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4317
4318         /* write dispatch packet */
4319         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4320         ib.ptr[ib.length_dw++] = 128; /* x */
4321         ib.ptr[ib.length_dw++] = 1; /* y */
4322         ib.ptr[ib.length_dw++] = 1; /* z */
4323         ib.ptr[ib.length_dw++] =
4324                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4325
4326         /* write CS partial flush packet */
4327         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4328         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4329
4330         /* SGPR */
4331         /* write the register state for the compute dispatch */
4332         for (i = 0; i < ARRAY_SIZE(sgpr_init_regs); i++) {
4333                 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
4334                 ib.ptr[ib.length_dw++] = SOC15_REG_ENTRY_OFFSET(sgpr_init_regs[i])
4335                                                                 - PACKET3_SET_SH_REG_START;
4336                 ib.ptr[ib.length_dw++] = sgpr_init_regs[i].reg_value;
4337         }
4338         /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
4339         gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
4340         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
4341         ib.ptr[ib.length_dw++] = SOC15_REG_OFFSET(GC, 0, mmCOMPUTE_PGM_LO)
4342                                                         - PACKET3_SET_SH_REG_START;
4343         ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
4344         ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
4345
4346         /* write dispatch packet */
4347         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
4348         ib.ptr[ib.length_dw++] = 128; /* x */
4349         ib.ptr[ib.length_dw++] = 1; /* y */
4350         ib.ptr[ib.length_dw++] = 1; /* z */
4351         ib.ptr[ib.length_dw++] =
4352                 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
4353
4354         /* write CS partial flush packet */
4355         ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
4356         ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
4357
4358         /* shedule the ib on the ring */
4359         r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
4360         if (r) {
4361                 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
4362                 goto fail;
4363         }
4364
4365         /* wait for the GPU to finish processing the IB */
4366         r = dma_fence_wait(f, false);
4367         if (r) {
4368                 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
4369                 goto fail;
4370         }
4371
4372         /* read back registers to clear the counters */
4373         mutex_lock(&adev->grbm_idx_mutex);
4374         for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++) {
4375                 for (j = 0; j < sec_ded_counter_registers[i].se_num; j++) {
4376                         for (k = 0; k < sec_ded_counter_registers[i].instance; k++) {
4377                                 gfx_v9_0_select_se_sh(adev, j, 0x0, k);
4378                                 RREG32(SOC15_REG_ENTRY_OFFSET(sec_ded_counter_registers[i]));
4379                         }
4380                 }
4381         }
4382         WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, 0xe0000000);
4383         mutex_unlock(&adev->grbm_idx_mutex);
4384
4385 fail:
4386         amdgpu_ib_free(adev, &ib, NULL);
4387         dma_fence_put(f);
4388
4389         return r;
4390 }
4391
4392 static int gfx_v9_0_early_init(void *handle)
4393 {
4394         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4395
4396         if (adev->asic_type == CHIP_ARCTURUS)
4397                 adev->gfx.num_gfx_rings = 0;
4398         else
4399                 adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
4400         adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
4401         gfx_v9_0_set_ring_funcs(adev);
4402         gfx_v9_0_set_irq_funcs(adev);
4403         gfx_v9_0_set_gds_init(adev);
4404         gfx_v9_0_set_rlc_funcs(adev);
4405
4406         return 0;
4407 }
4408
4409 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
4410                 struct ras_err_data *err_data,
4411                 struct amdgpu_iv_entry *entry);
4412
4413 static int gfx_v9_0_ecc_late_init(void *handle)
4414 {
4415         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4416         struct ras_common_if **ras_if = &adev->gfx.ras_if;
4417         struct ras_ih_if ih_info = {
4418                 .cb = gfx_v9_0_process_ras_data_cb,
4419         };
4420         struct ras_fs_if fs_info = {
4421                 .sysfs_name = "gfx_err_count",
4422                 .debugfs_name = "gfx_err_inject",
4423         };
4424         struct ras_common_if ras_block = {
4425                 .block = AMDGPU_RAS_BLOCK__GFX,
4426                 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
4427                 .sub_block_index = 0,
4428                 .name = "gfx",
4429         };
4430         int r;
4431
4432         if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
4433                 amdgpu_ras_feature_enable_on_boot(adev, &ras_block, 0);
4434                 return 0;
4435         }
4436
4437         r = gfx_v9_0_do_edc_gds_workarounds(adev);
4438         if (r)
4439                 return r;
4440
4441         /* requires IBs so do in late init after IB pool is initialized */
4442         r = gfx_v9_0_do_edc_gpr_workarounds(adev);
4443         if (r)
4444                 return r;
4445
4446         /* handle resume path. */
4447         if (*ras_if) {
4448                 /* resend ras TA enable cmd during resume.
4449                  * prepare to handle failure.
4450                  */
4451                 ih_info.head = **ras_if;
4452                 r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4453                 if (r) {
4454                         if (r == -EAGAIN) {
4455                                 /* request a gpu reset. will run again. */
4456                                 amdgpu_ras_request_reset_on_boot(adev,
4457                                                 AMDGPU_RAS_BLOCK__GFX);
4458                                 return 0;
4459                         }
4460                         /* fail to enable ras, cleanup all. */
4461                         goto irq;
4462                 }
4463                 /* enable successfully. continue. */
4464                 goto resume;
4465         }
4466
4467         *ras_if = kmalloc(sizeof(**ras_if), GFP_KERNEL);
4468         if (!*ras_if)
4469                 return -ENOMEM;
4470
4471         **ras_if = ras_block;
4472
4473         r = amdgpu_ras_feature_enable_on_boot(adev, *ras_if, 1);
4474         if (r) {
4475                 if (r == -EAGAIN) {
4476                         amdgpu_ras_request_reset_on_boot(adev,
4477                                         AMDGPU_RAS_BLOCK__GFX);
4478                         r = 0;
4479                 }
4480                 goto feature;
4481         }
4482
4483         ih_info.head = **ras_if;
4484         fs_info.head = **ras_if;
4485
4486         r = amdgpu_ras_interrupt_add_handler(adev, &ih_info);
4487         if (r)
4488                 goto interrupt;
4489
4490         amdgpu_ras_debugfs_create(adev, &fs_info);
4491
4492         r = amdgpu_ras_sysfs_create(adev, &fs_info);
4493         if (r)
4494                 goto sysfs;
4495 resume:
4496         r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
4497         if (r)
4498                 goto irq;
4499
4500         return 0;
4501 irq:
4502         amdgpu_ras_sysfs_remove(adev, *ras_if);
4503 sysfs:
4504         amdgpu_ras_debugfs_remove(adev, *ras_if);
4505         amdgpu_ras_interrupt_remove_handler(adev, &ih_info);
4506 interrupt:
4507         amdgpu_ras_feature_enable(adev, *ras_if, 0);
4508 feature:
4509         kfree(*ras_if);
4510         *ras_if = NULL;
4511         return r;
4512 }
4513
4514 static int gfx_v9_0_late_init(void *handle)
4515 {
4516         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4517         int r;
4518
4519         r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4520         if (r)
4521                 return r;
4522
4523         r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4524         if (r)
4525                 return r;
4526
4527         r = gfx_v9_0_ecc_late_init(handle);
4528         if (r)
4529                 return r;
4530
4531         return 0;
4532 }
4533
4534 static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev)
4535 {
4536         uint32_t rlc_setting;
4537
4538         /* if RLC is not enabled, do nothing */
4539         rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL);
4540         if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK))
4541                 return false;
4542
4543         return true;
4544 }
4545
4546 static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev)
4547 {
4548         uint32_t data;
4549         unsigned i;
4550
4551         data = RLC_SAFE_MODE__CMD_MASK;
4552         data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4553         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4554
4555         /* wait for RLC_SAFE_MODE */
4556         for (i = 0; i < adev->usec_timeout; i++) {
4557                 if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD))
4558                         break;
4559                 udelay(1);
4560         }
4561 }
4562
4563 static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev)
4564 {
4565         uint32_t data;
4566
4567         data = RLC_SAFE_MODE__CMD_MASK;
4568         WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data);
4569 }
4570
4571 static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev,
4572                                                 bool enable)
4573 {
4574         amdgpu_gfx_rlc_enter_safe_mode(adev);
4575
4576         if (is_support_sw_smu(adev) && !enable)
4577                 smu_set_gfx_cgpg(&adev->smu, enable);
4578
4579         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
4580                 gfx_v9_0_enable_gfx_cg_power_gating(adev, true);
4581                 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
4582                         gfx_v9_0_enable_gfx_pipeline_powergating(adev, true);
4583         } else {
4584                 gfx_v9_0_enable_gfx_cg_power_gating(adev, false);
4585                 gfx_v9_0_enable_gfx_pipeline_powergating(adev, false);
4586         }
4587
4588         amdgpu_gfx_rlc_exit_safe_mode(adev);
4589 }
4590
4591 static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev,
4592                                                 bool enable)
4593 {
4594         /* TODO: double check if we need to perform under safe mode */
4595         /* gfx_v9_0_enter_rlc_safe_mode(adev); */
4596
4597         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
4598                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, true);
4599         else
4600                 gfx_v9_0_enable_gfx_static_mg_power_gating(adev, false);
4601
4602         if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
4603                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, true);
4604         else
4605                 gfx_v9_0_enable_gfx_dynamic_mg_power_gating(adev, false);
4606
4607         /* gfx_v9_0_exit_rlc_safe_mode(adev); */
4608 }
4609
4610 static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4611                                                       bool enable)
4612 {
4613         uint32_t data, def;
4614
4615         amdgpu_gfx_rlc_enter_safe_mode(adev);
4616
4617         /* It is disabled by HW by default */
4618         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4619                 /* 1 - RLC_CGTT_MGCG_OVERRIDE */
4620                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4621
4622                 if (adev->asic_type != CHIP_VEGA12)
4623                         data &= ~RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4624
4625                 data &= ~(RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4626                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4627                           RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4628
4629                 /* only for Vega10 & Raven1 */
4630                 data |= RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK;
4631
4632                 if (def != data)
4633                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4634
4635                 /* MGLS is a global flag to control all MGLS in GFX */
4636                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4637                         /* 2 - RLC memory Light sleep */
4638                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4639                                 def = data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4640                                 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4641                                 if (def != data)
4642                                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4643                         }
4644                         /* 3 - CP memory Light sleep */
4645                         if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4646                                 def = data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4647                                 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4648                                 if (def != data)
4649                                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4650                         }
4651                 }
4652         } else {
4653                 /* 1 - MGCG_OVERRIDE */
4654                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4655
4656                 if (adev->asic_type != CHIP_VEGA12)
4657                         data |= RLC_CGTT_MGCG_OVERRIDE__CPF_CGTT_SCLK_OVERRIDE_MASK;
4658
4659                 data |= (RLC_CGTT_MGCG_OVERRIDE__RLC_CGTT_SCLK_OVERRIDE_MASK |
4660                          RLC_CGTT_MGCG_OVERRIDE__GRBM_CGTT_SCLK_OVERRIDE_MASK |
4661                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK |
4662                          RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGLS_OVERRIDE_MASK);
4663
4664                 if (def != data)
4665                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4666
4667                 /* 2 - disable MGLS in RLC */
4668                 data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4669                 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4670                         data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4671                         WREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL, data);
4672                 }
4673
4674                 /* 3 - disable MGLS in CP */
4675                 data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4676                 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4677                         data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4678                         WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
4679                 }
4680         }
4681
4682         amdgpu_gfx_rlc_exit_safe_mode(adev);
4683 }
4684
4685 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
4686                                            bool enable)
4687 {
4688         uint32_t data, def;
4689
4690         if (adev->asic_type == CHIP_ARCTURUS)
4691                 return;
4692
4693         amdgpu_gfx_rlc_enter_safe_mode(adev);
4694
4695         /* Enable 3D CGCG/CGLS */
4696         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) {
4697                 /* write cmd to clear cgcg/cgls ov */
4698                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4699                 /* unset CGCG override */
4700                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_GFX3D_CG_OVERRIDE_MASK;
4701                 /* update CGCG and CGLS override bits */
4702                 if (def != data)
4703                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4704
4705                 /* enable 3Dcgcg FSM(0x0000363f) */
4706                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4707
4708                 data = (0x36 << RLC_CGCG_CGLS_CTRL_3D__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4709                         RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK;
4710                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGLS)
4711                         data |= (0x000F << RLC_CGCG_CGLS_CTRL_3D__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4712                                 RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK;
4713                 if (def != data)
4714                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4715
4716                 /* set IDLE_POLL_COUNT(0x00900100) */
4717                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4718                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4719                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4720                 if (def != data)
4721                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4722         } else {
4723                 /* Disable CGCG/CGLS */
4724                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4725                 /* disable cgcg, cgls should be disabled */
4726                 data &= ~(RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK |
4727                           RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK);
4728                 /* disable cgcg and cgls in FSM */
4729                 if (def != data)
4730                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data);
4731         }
4732
4733         amdgpu_gfx_rlc_exit_safe_mode(adev);
4734 }
4735
4736 static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4737                                                       bool enable)
4738 {
4739         uint32_t def, data;
4740
4741         amdgpu_gfx_rlc_enter_safe_mode(adev);
4742
4743         if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4744                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4745                 /* unset CGCG override */
4746                 data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGCG_OVERRIDE_MASK;
4747                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4748                         data &= ~RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4749                 else
4750                         data |= RLC_CGTT_MGCG_OVERRIDE__GFXIP_CGLS_OVERRIDE_MASK;
4751                 /* update CGCG and CGLS override bits */
4752                 if (def != data)
4753                         WREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE, data);
4754
4755                 /* enable cgcg FSM(0x0000363F) */
4756                 def = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4757
4758                 if (adev->asic_type == CHIP_ARCTURUS)
4759                         data = (0x2000 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4760                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4761                 else
4762                         data = (0x36 << RLC_CGCG_CGLS_CTRL__CGCG_GFX_IDLE_THRESHOLD__SHIFT) |
4763                                 RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4764                 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS)
4765                         data |= (0x000F << RLC_CGCG_CGLS_CTRL__CGLS_REP_COMPANSAT_DELAY__SHIFT) |
4766                                 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4767                 if (def != data)
4768                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4769
4770                 /* set IDLE_POLL_COUNT(0x00900100) */
4771                 def = RREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL);
4772                 data = (0x0100 << CP_RB_WPTR_POLL_CNTL__POLL_FREQUENCY__SHIFT) |
4773                         (0x0090 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
4774                 if (def != data)
4775                         WREG32_SOC15(GC, 0, mmCP_RB_WPTR_POLL_CNTL, data);
4776         } else {
4777                 def = data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4778                 /* reset CGCG/CGLS bits */
4779                 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4780                 /* disable cgcg and cgls in FSM */
4781                 if (def != data)
4782                         WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data);
4783         }
4784
4785         amdgpu_gfx_rlc_exit_safe_mode(adev);
4786 }
4787
4788 static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4789                                             bool enable)
4790 {
4791         if (enable) {
4792                 /* CGCG/CGLS should be enabled after MGCG/MGLS
4793                  * ===  MGCG + MGLS ===
4794                  */
4795                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4796                 /* ===  CGCG /CGLS for GFX 3D Only === */
4797                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4798                 /* ===  CGCG + CGLS === */
4799                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4800         } else {
4801                 /* CGCG/CGLS should be disabled before MGCG/MGLS
4802                  * ===  CGCG + CGLS ===
4803                  */
4804                 gfx_v9_0_update_coarse_grain_clock_gating(adev, enable);
4805                 /* ===  CGCG /CGLS for GFX 3D Only === */
4806                 gfx_v9_0_update_3d_clock_gating(adev, enable);
4807                 /* ===  MGCG + MGLS === */
4808                 gfx_v9_0_update_medium_grain_clock_gating(adev, enable);
4809         }
4810         return 0;
4811 }
4812
4813 static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = {
4814         .is_rlc_enabled = gfx_v9_0_is_rlc_enabled,
4815         .set_safe_mode = gfx_v9_0_set_safe_mode,
4816         .unset_safe_mode = gfx_v9_0_unset_safe_mode,
4817         .init = gfx_v9_0_rlc_init,
4818         .get_csb_size = gfx_v9_0_get_csb_size,
4819         .get_csb_buffer = gfx_v9_0_get_csb_buffer,
4820         .get_cp_table_num = gfx_v9_0_cp_jump_table_num,
4821         .resume = gfx_v9_0_rlc_resume,
4822         .stop = gfx_v9_0_rlc_stop,
4823         .reset = gfx_v9_0_rlc_reset,
4824         .start = gfx_v9_0_rlc_start
4825 };
4826
4827 static int gfx_v9_0_set_powergating_state(void *handle,
4828                                           enum amd_powergating_state state)
4829 {
4830         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4831         bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
4832
4833         switch (adev->asic_type) {
4834         case CHIP_RAVEN:
4835         case CHIP_RENOIR:
4836                 if (!enable) {
4837                         amdgpu_gfx_off_ctrl(adev, false);
4838                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4839                 }
4840                 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
4841                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true);
4842                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true);
4843                 } else {
4844                         gfx_v9_0_enable_sck_slow_down_on_power_up(adev, false);
4845                         gfx_v9_0_enable_sck_slow_down_on_power_down(adev, false);
4846                 }
4847
4848                 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
4849                         gfx_v9_0_enable_cp_power_gating(adev, true);
4850                 else
4851                         gfx_v9_0_enable_cp_power_gating(adev, false);
4852
4853                 /* update gfx cgpg state */
4854                 if (is_support_sw_smu(adev) && enable)
4855                         smu_set_gfx_cgpg(&adev->smu, enable);
4856                 gfx_v9_0_update_gfx_cg_power_gating(adev, enable);
4857
4858                 /* update mgcg state */
4859                 gfx_v9_0_update_gfx_mg_power_gating(adev, enable);
4860
4861                 if (enable)
4862                         amdgpu_gfx_off_ctrl(adev, true);
4863                 break;
4864         case CHIP_VEGA12:
4865                 if (!enable) {
4866                         amdgpu_gfx_off_ctrl(adev, false);
4867                         cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
4868                 } else {
4869                         amdgpu_gfx_off_ctrl(adev, true);
4870                 }
4871                 break;
4872         default:
4873                 break;
4874         }
4875
4876         return 0;
4877 }
4878
4879 static int gfx_v9_0_set_clockgating_state(void *handle,
4880                                           enum amd_clockgating_state state)
4881 {
4882         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4883
4884         if (amdgpu_sriov_vf(adev))
4885                 return 0;
4886
4887         switch (adev->asic_type) {
4888         case CHIP_VEGA10:
4889         case CHIP_VEGA12:
4890         case CHIP_VEGA20:
4891         case CHIP_RAVEN:
4892         case CHIP_ARCTURUS:
4893         case CHIP_RENOIR:
4894                 gfx_v9_0_update_gfx_clock_gating(adev,
4895                                                  state == AMD_CG_STATE_GATE ? true : false);
4896                 break;
4897         default:
4898                 break;
4899         }
4900         return 0;
4901 }
4902
4903 static void gfx_v9_0_get_clockgating_state(void *handle, u32 *flags)
4904 {
4905         struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4906         int data;
4907
4908         if (amdgpu_sriov_vf(adev))
4909                 *flags = 0;
4910
4911         /* AMD_CG_SUPPORT_GFX_MGCG */
4912         data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE);
4913         if (!(data & RLC_CGTT_MGCG_OVERRIDE__GFXIP_MGCG_OVERRIDE_MASK))
4914                 *flags |= AMD_CG_SUPPORT_GFX_MGCG;
4915
4916         /* AMD_CG_SUPPORT_GFX_CGCG */
4917         data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL);
4918         if (data & RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK)
4919                 *flags |= AMD_CG_SUPPORT_GFX_CGCG;
4920
4921         /* AMD_CG_SUPPORT_GFX_CGLS */
4922         if (data & RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK)
4923                 *flags |= AMD_CG_SUPPORT_GFX_CGLS;
4924
4925         /* AMD_CG_SUPPORT_GFX_RLC_LS */
4926         data = RREG32_SOC15(GC, 0, mmRLC_MEM_SLP_CNTL);
4927         if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK)
4928                 *flags |= AMD_CG_SUPPORT_GFX_RLC_LS | AMD_CG_SUPPORT_GFX_MGLS;
4929
4930         /* AMD_CG_SUPPORT_GFX_CP_LS */
4931         data = RREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL);
4932         if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK)
4933                 *flags |= AMD_CG_SUPPORT_GFX_CP_LS | AMD_CG_SUPPORT_GFX_MGLS;
4934
4935         if (adev->asic_type != CHIP_ARCTURUS) {
4936                 /* AMD_CG_SUPPORT_GFX_3D_CGCG */
4937                 data = RREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D);
4938                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGCG_EN_MASK)
4939                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGCG;
4940
4941                 /* AMD_CG_SUPPORT_GFX_3D_CGLS */
4942                 if (data & RLC_CGCG_CGLS_CTRL_3D__CGLS_EN_MASK)
4943                         *flags |= AMD_CG_SUPPORT_GFX_3D_CGLS;
4944         }
4945 }
4946
4947 static u64 gfx_v9_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4948 {
4949         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 is 32bit rptr*/
4950 }
4951
4952 static u64 gfx_v9_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4953 {
4954         struct amdgpu_device *adev = ring->adev;
4955         u64 wptr;
4956
4957         /* XXX check if swapping is necessary on BE */
4958         if (ring->use_doorbell) {
4959                 wptr = atomic64_read((atomic64_t *)&adev->wb.wb[ring->wptr_offs]);
4960         } else {
4961                 wptr = RREG32_SOC15(GC, 0, mmCP_RB0_WPTR);
4962                 wptr += (u64)RREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI) << 32;
4963         }
4964
4965         return wptr;
4966 }
4967
4968 static void gfx_v9_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4969 {
4970         struct amdgpu_device *adev = ring->adev;
4971
4972         if (ring->use_doorbell) {
4973                 /* XXX check if swapping is necessary on BE */
4974                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
4975                 WDOORBELL64(ring->doorbell_index, ring->wptr);
4976         } else {
4977                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
4978                 WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
4979         }
4980 }
4981
4982 static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4983 {
4984         struct amdgpu_device *adev = ring->adev;
4985         u32 ref_and_mask, reg_mem_engine;
4986         const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio_funcs->hdp_flush_reg;
4987
4988         if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
4989                 switch (ring->me) {
4990                 case 1:
4991                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp2 << ring->pipe;
4992                         break;
4993                 case 2:
4994                         ref_and_mask = nbio_hf_reg->ref_and_mask_cp6 << ring->pipe;
4995                         break;
4996                 default:
4997                         return;
4998                 }
4999                 reg_mem_engine = 0;
5000         } else {
5001                 ref_and_mask = nbio_hf_reg->ref_and_mask_cp0;
5002                 reg_mem_engine = 1; /* pfp */
5003         }
5004
5005         gfx_v9_0_wait_reg_mem(ring, reg_mem_engine, 0, 1,
5006                               adev->nbio_funcs->get_hdp_flush_req_offset(adev),
5007                               adev->nbio_funcs->get_hdp_flush_done_offset(adev),
5008                               ref_and_mask, ref_and_mask, 0x20);
5009 }
5010
5011 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
5012                                         struct amdgpu_job *job,
5013                                         struct amdgpu_ib *ib,
5014                                         uint32_t flags)
5015 {
5016         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5017         u32 header, control = 0;
5018
5019         if (ib->flags & AMDGPU_IB_FLAG_CE)
5020                 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5021         else
5022                 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5023
5024         control |= ib->length_dw | (vmid << 24);
5025
5026         if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
5027                 control |= INDIRECT_BUFFER_PRE_ENB(1);
5028
5029                 if (!(ib->flags & AMDGPU_IB_FLAG_CE))
5030                         gfx_v9_0_ring_emit_de_meta(ring);
5031         }
5032
5033         amdgpu_ring_write(ring, header);
5034         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5035         amdgpu_ring_write(ring,
5036 #ifdef __BIG_ENDIAN
5037                 (2 << 0) |
5038 #endif
5039                 lower_32_bits(ib->gpu_addr));
5040         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5041         amdgpu_ring_write(ring, control);
5042 }
5043
5044 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
5045                                           struct amdgpu_job *job,
5046                                           struct amdgpu_ib *ib,
5047                                           uint32_t flags)
5048 {
5049         unsigned vmid = AMDGPU_JOB_GET_VMID(job);
5050         u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
5051
5052         /* Currently, there is a high possibility to get wave ID mismatch
5053          * between ME and GDS, leading to a hw deadlock, because ME generates
5054          * different wave IDs than the GDS expects. This situation happens
5055          * randomly when at least 5 compute pipes use GDS ordered append.
5056          * The wave IDs generated by ME are also wrong after suspend/resume.
5057          * Those are probably bugs somewhere else in the kernel driver.
5058          *
5059          * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
5060          * GDS to 0 for this ring (me/pipe).
5061          */
5062         if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
5063                 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
5064                 amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
5065                 amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
5066         }
5067
5068         amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
5069         BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
5070         amdgpu_ring_write(ring,
5071 #ifdef __BIG_ENDIAN
5072                                 (2 << 0) |
5073 #endif
5074                                 lower_32_bits(ib->gpu_addr));
5075         amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
5076         amdgpu_ring_write(ring, control);
5077 }
5078
5079 static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
5080                                      u64 seq, unsigned flags)
5081 {
5082         bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5083         bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5084         bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
5085
5086         /* RELEASE_MEM - flush caches, send int */
5087         amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
5088         amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
5089                                                EOP_TC_NC_ACTION_EN) :
5090                                               (EOP_TCL1_ACTION_EN |
5091                                                EOP_TC_ACTION_EN |
5092                                                EOP_TC_WB_ACTION_EN |
5093                                                EOP_TC_MD_ACTION_EN)) |
5094                                  EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5095                                  EVENT_INDEX(5)));
5096         amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5097
5098         /*
5099          * the address should be Qword aligned if 64bit write, Dword
5100          * aligned if only send 32bit data low (discard data high)
5101          */
5102         if (write64bit)
5103                 BUG_ON(addr & 0x7);
5104         else
5105                 BUG_ON(addr & 0x3);
5106         amdgpu_ring_write(ring, lower_32_bits(addr));
5107         amdgpu_ring_write(ring, upper_32_bits(addr));
5108         amdgpu_ring_write(ring, lower_32_bits(seq));
5109         amdgpu_ring_write(ring, upper_32_bits(seq));
5110         amdgpu_ring_write(ring, 0);
5111 }
5112
5113 static void gfx_v9_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
5114 {
5115         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5116         uint32_t seq = ring->fence_drv.sync_seq;
5117         uint64_t addr = ring->fence_drv.gpu_addr;
5118
5119         gfx_v9_0_wait_reg_mem(ring, usepfp, 1, 0,
5120                               lower_32_bits(addr), upper_32_bits(addr),
5121                               seq, 0xffffffff, 4);
5122 }
5123
5124 static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
5125                                         unsigned vmid, uint64_t pd_addr)
5126 {
5127         amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
5128
5129         /* compute doesn't have PFP */
5130         if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
5131                 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5132                 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5133                 amdgpu_ring_write(ring, 0x0);
5134         }
5135 }
5136
5137 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
5138 {
5139         return ring->adev->wb.wb[ring->rptr_offs]; /* gfx9 hardware is 32bit rptr */
5140 }
5141
5142 static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
5143 {
5144         u64 wptr;
5145
5146         /* XXX check if swapping is necessary on BE */
5147         if (ring->use_doorbell)
5148                 wptr = atomic64_read((atomic64_t *)&ring->adev->wb.wb[ring->wptr_offs]);
5149         else
5150                 BUG();
5151         return wptr;
5152 }
5153
5154 static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
5155                                            bool acquire)
5156 {
5157         struct amdgpu_device *adev = ring->adev;
5158         int pipe_num, tmp, reg;
5159         int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
5160
5161         pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
5162
5163         /* first me only has 2 entries, GFX and HP3D */
5164         if (ring->me > 0)
5165                 pipe_num -= 2;
5166
5167         reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
5168         tmp = RREG32(reg);
5169         tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
5170         WREG32(reg, tmp);
5171 }
5172
5173 static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
5174                                             struct amdgpu_ring *ring,
5175                                             bool acquire)
5176 {
5177         int i, pipe;
5178         bool reserve;
5179         struct amdgpu_ring *iring;
5180
5181         mutex_lock(&adev->gfx.pipe_reserve_mutex);
5182         pipe = amdgpu_gfx_mec_queue_to_bit(adev, ring->me, ring->pipe, 0);
5183         if (acquire)
5184                 set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5185         else
5186                 clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5187
5188         if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
5189                 /* Clear all reservations - everyone reacquires all resources */
5190                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
5191                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
5192                                                        true);
5193
5194                 for (i = 0; i < adev->gfx.num_compute_rings; ++i)
5195                         gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
5196                                                        true);
5197         } else {
5198                 /* Lower all pipes without a current reservation */
5199                 for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
5200                         iring = &adev->gfx.gfx_ring[i];
5201                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5202                                                            iring->me,
5203                                                            iring->pipe,
5204                                                            0);
5205                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5206                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5207                 }
5208
5209                 for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
5210                         iring = &adev->gfx.compute_ring[i];
5211                         pipe = amdgpu_gfx_mec_queue_to_bit(adev,
5212                                                            iring->me,
5213                                                            iring->pipe,
5214                                                            0);
5215                         reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
5216                         gfx_v9_0_ring_set_pipe_percent(iring, reserve);
5217                 }
5218         }
5219
5220         mutex_unlock(&adev->gfx.pipe_reserve_mutex);
5221 }
5222
5223 static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
5224                                       struct amdgpu_ring *ring,
5225                                       bool acquire)
5226 {
5227         uint32_t pipe_priority = acquire ? 0x2 : 0x0;
5228         uint32_t queue_priority = acquire ? 0xf : 0x0;
5229
5230         mutex_lock(&adev->srbm_mutex);
5231         soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
5232
5233         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
5234         WREG32_SOC15_RLC(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
5235
5236         soc15_grbm_select(adev, 0, 0, 0, 0);
5237         mutex_unlock(&adev->srbm_mutex);
5238 }
5239
5240 static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
5241                                                enum drm_sched_priority priority)
5242 {
5243         struct amdgpu_device *adev = ring->adev;
5244         bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
5245
5246         if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
5247                 return;
5248
5249         gfx_v9_0_hqd_set_priority(adev, ring, acquire);
5250         gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
5251 }
5252
5253 static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
5254 {
5255         struct amdgpu_device *adev = ring->adev;
5256
5257         /* XXX check if swapping is necessary on BE */
5258         if (ring->use_doorbell) {
5259                 atomic64_set((atomic64_t*)&adev->wb.wb[ring->wptr_offs], ring->wptr);
5260                 WDOORBELL64(ring->doorbell_index, ring->wptr);
5261         } else{
5262                 BUG(); /* only DOORBELL method supported on gfx9 now */
5263         }
5264 }
5265
5266 static void gfx_v9_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
5267                                          u64 seq, unsigned int flags)
5268 {
5269         struct amdgpu_device *adev = ring->adev;
5270
5271         /* we only allocate 32bit for each seq wb address */
5272         BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
5273
5274         /* write fence seq to the "addr" */
5275         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5276         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5277                                  WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
5278         amdgpu_ring_write(ring, lower_32_bits(addr));
5279         amdgpu_ring_write(ring, upper_32_bits(addr));
5280         amdgpu_ring_write(ring, lower_32_bits(seq));
5281
5282         if (flags & AMDGPU_FENCE_FLAG_INT) {
5283                 /* set register to trigger INT */
5284                 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5285                 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5286                                          WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
5287                 amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
5288                 amdgpu_ring_write(ring, 0);
5289                 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
5290         }
5291 }
5292
5293 static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
5294 {
5295         amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5296         amdgpu_ring_write(ring, 0);
5297 }
5298
5299 static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
5300 {
5301         struct v9_ce_ib_state ce_payload = {0};
5302         uint64_t csa_addr;
5303         int cnt;
5304
5305         cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
5306         csa_addr = amdgpu_csa_vaddr(ring->adev);
5307
5308         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5309         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
5310                                  WRITE_DATA_DST_SEL(8) |
5311                                  WR_CONFIRM) |
5312                                  WRITE_DATA_CACHE_POLICY(0));
5313         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5314         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
5315         amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
5316 }
5317
5318 static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
5319 {
5320         struct v9_de_ib_state de_payload = {0};
5321         uint64_t csa_addr, gds_addr;
5322         int cnt;
5323
5324         csa_addr = amdgpu_csa_vaddr(ring->adev);
5325         gds_addr = csa_addr + 4096;
5326         de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
5327         de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
5328
5329         cnt = (sizeof(de_payload) >> 2) + 4 - 2;
5330         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
5331         amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5332                                  WRITE_DATA_DST_SEL(8) |
5333                                  WR_CONFIRM) |
5334                                  WRITE_DATA_CACHE_POLICY(0));
5335         amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5336         amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
5337         amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
5338 }
5339
5340 static void gfx_v9_0_ring_emit_tmz(struct amdgpu_ring *ring, bool start)
5341 {
5342         amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
5343         amdgpu_ring_write(ring, FRAME_CMD(start ? 0 : 1)); /* frame_end */
5344 }
5345
5346 static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
5347 {
5348         uint32_t dw2 = 0;
5349
5350         if (amdgpu_sriov_vf(ring->adev))
5351                 gfx_v9_0_ring_emit_ce_meta(ring);
5352
5353         gfx_v9_0_ring_emit_tmz(ring, true);
5354
5355         dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
5356         if (flags & AMDGPU_HAVE_CTX_SWITCH) {
5357                 /* set load_global_config & load_global_uconfig */
5358                 dw2 |= 0x8001;
5359                 /* set load_cs_sh_regs */
5360                 dw2 |= 0x01000000;
5361                 /* set load_per_context_state & load_gfx_sh_regs for GFX */
5362                 dw2 |= 0x10002;
5363
5364                 /* set load_ce_ram if preamble presented */
5365                 if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
5366                         dw2 |= 0x10000000;
5367         } else {
5368                 /* still load_ce_ram if this is the first time preamble presented
5369                  * although there is no context switch happens.
5370                  */
5371                 if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
5372                         dw2 |= 0x10000000;
5373         }
5374
5375         amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5376         amdgpu_ring_write(ring, dw2);
5377         amdgpu_ring_write(ring, 0);
5378 }
5379
5380 static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
5381 {
5382         unsigned ret;
5383         amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
5384         amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
5385         amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
5386         amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
5387         ret = ring->wptr & ring->buf_mask;
5388         amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
5389         return ret;
5390 }
5391
5392 static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
5393 {
5394         unsigned cur;
5395         BUG_ON(offset > ring->buf_mask);
5396         BUG_ON(ring->ring[offset] != 0x55aa55aa);
5397
5398         cur = (ring->wptr & ring->buf_mask) - 1;
5399         if (likely(cur > offset))
5400                 ring->ring[offset] = cur - offset;
5401         else
5402                 ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
5403 }
5404
5405 static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg)
5406 {
5407         struct amdgpu_device *adev = ring->adev;
5408
5409         amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
5410         amdgpu_ring_write(ring, 0 |     /* src: register*/
5411                                 (5 << 8) |      /* dst: memory */
5412                                 (1 << 20));     /* write confirm */
5413         amdgpu_ring_write(ring, reg);
5414         amdgpu_ring_write(ring, 0);
5415         amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
5416                                 adev->virt.reg_val_offs * 4));
5417         amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
5418                                 adev->virt.reg_val_offs * 4));
5419 }
5420
5421 static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
5422                                     uint32_t val)
5423 {
5424         uint32_t cmd = 0;
5425
5426         switch (ring->funcs->type) {
5427         case AMDGPU_RING_TYPE_GFX:
5428                 cmd = WRITE_DATA_ENGINE_SEL(1) | WR_CONFIRM;
5429                 break;
5430         case AMDGPU_RING_TYPE_KIQ:
5431                 cmd = (1 << 16); /* no inc addr */
5432                 break;
5433         default:
5434                 cmd = WR_CONFIRM;
5435                 break;
5436         }
5437         amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5438         amdgpu_ring_write(ring, cmd);
5439         amdgpu_ring_write(ring, reg);
5440         amdgpu_ring_write(ring, 0);
5441         amdgpu_ring_write(ring, val);
5442 }
5443
5444 static void gfx_v9_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
5445                                         uint32_t val, uint32_t mask)
5446 {
5447         gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, reg, 0, val, mask, 0x20);
5448 }
5449
5450 static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring,
5451                                                   uint32_t reg0, uint32_t reg1,
5452                                                   uint32_t ref, uint32_t mask)
5453 {
5454         int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
5455         struct amdgpu_device *adev = ring->adev;
5456         bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ?
5457                 adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait;
5458
5459         if (fw_version_ok)
5460                 gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1,
5461                                       ref, mask, 0x20);
5462         else
5463                 amdgpu_ring_emit_reg_write_reg_wait_helper(ring, reg0, reg1,
5464                                                            ref, mask);
5465 }
5466
5467 static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid)
5468 {
5469         struct amdgpu_device *adev = ring->adev;
5470         uint32_t value = 0;
5471
5472         value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03);
5473         value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01);
5474         value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1);
5475         value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid);
5476         WREG32_SOC15(GC, 0, mmSQ_CMD, value);
5477 }
5478
5479 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5480                                                  enum amdgpu_interrupt_state state)
5481 {
5482         switch (state) {
5483         case AMDGPU_IRQ_STATE_DISABLE:
5484         case AMDGPU_IRQ_STATE_ENABLE:
5485                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5486                                TIME_STAMP_INT_ENABLE,
5487                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5488                 break;
5489         default:
5490                 break;
5491         }
5492 }
5493
5494 static void gfx_v9_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5495                                                      int me, int pipe,
5496                                                      enum amdgpu_interrupt_state state)
5497 {
5498         u32 mec_int_cntl, mec_int_cntl_reg;
5499
5500         /*
5501          * amdgpu controls only the first MEC. That's why this function only
5502          * handles the setting of interrupts for this specific MEC. All other
5503          * pipes' interrupts are set by amdkfd.
5504          */
5505
5506         if (me == 1) {
5507                 switch (pipe) {
5508                 case 0:
5509                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL);
5510                         break;
5511                 case 1:
5512                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE1_INT_CNTL);
5513                         break;
5514                 case 2:
5515                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE2_INT_CNTL);
5516                         break;
5517                 case 3:
5518                         mec_int_cntl_reg = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE3_INT_CNTL);
5519                         break;
5520                 default:
5521                         DRM_DEBUG("invalid pipe %d\n", pipe);
5522                         return;
5523                 }
5524         } else {
5525                 DRM_DEBUG("invalid me %d\n", me);
5526                 return;
5527         }
5528
5529         switch (state) {
5530         case AMDGPU_IRQ_STATE_DISABLE:
5531                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5532                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5533                                              TIME_STAMP_INT_ENABLE, 0);
5534                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5535                 break;
5536         case AMDGPU_IRQ_STATE_ENABLE:
5537                 mec_int_cntl = RREG32(mec_int_cntl_reg);
5538                 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5539                                              TIME_STAMP_INT_ENABLE, 1);
5540                 WREG32(mec_int_cntl_reg, mec_int_cntl);
5541                 break;
5542         default:
5543                 break;
5544         }
5545 }
5546
5547 static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5548                                              struct amdgpu_irq_src *source,
5549                                              unsigned type,
5550                                              enum amdgpu_interrupt_state state)
5551 {
5552         switch (state) {
5553         case AMDGPU_IRQ_STATE_DISABLE:
5554         case AMDGPU_IRQ_STATE_ENABLE:
5555                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5556                                PRIV_REG_INT_ENABLE,
5557                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5558                 break;
5559         default:
5560                 break;
5561         }
5562
5563         return 0;
5564 }
5565
5566 static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5567                                               struct amdgpu_irq_src *source,
5568                                               unsigned type,
5569                                               enum amdgpu_interrupt_state state)
5570 {
5571         switch (state) {
5572         case AMDGPU_IRQ_STATE_DISABLE:
5573         case AMDGPU_IRQ_STATE_ENABLE:
5574                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5575                                PRIV_INSTR_INT_ENABLE,
5576                                state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
5577         default:
5578                 break;
5579         }
5580
5581         return 0;
5582 }
5583
5584 #define ENABLE_ECC_ON_ME_PIPE(me, pipe)                         \
5585         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5586                         CP_ECC_ERROR_INT_ENABLE, 1)
5587
5588 #define DISABLE_ECC_ON_ME_PIPE(me, pipe)                        \
5589         WREG32_FIELD15(GC, 0, CP_ME##me##_PIPE##pipe##_INT_CNTL,\
5590                         CP_ECC_ERROR_INT_ENABLE, 0)
5591
5592 static int gfx_v9_0_set_cp_ecc_error_state(struct amdgpu_device *adev,
5593                                               struct amdgpu_irq_src *source,
5594                                               unsigned type,
5595                                               enum amdgpu_interrupt_state state)
5596 {
5597         switch (state) {
5598         case AMDGPU_IRQ_STATE_DISABLE:
5599                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5600                                 CP_ECC_ERROR_INT_ENABLE, 0);
5601                 DISABLE_ECC_ON_ME_PIPE(1, 0);
5602                 DISABLE_ECC_ON_ME_PIPE(1, 1);
5603                 DISABLE_ECC_ON_ME_PIPE(1, 2);
5604                 DISABLE_ECC_ON_ME_PIPE(1, 3);
5605                 break;
5606
5607         case AMDGPU_IRQ_STATE_ENABLE:
5608                 WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
5609                                 CP_ECC_ERROR_INT_ENABLE, 1);
5610                 ENABLE_ECC_ON_ME_PIPE(1, 0);
5611                 ENABLE_ECC_ON_ME_PIPE(1, 1);
5612                 ENABLE_ECC_ON_ME_PIPE(1, 2);
5613                 ENABLE_ECC_ON_ME_PIPE(1, 3);
5614                 break;
5615         default:
5616                 break;
5617         }
5618
5619         return 0;
5620 }
5621
5622
5623 static int gfx_v9_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5624                                             struct amdgpu_irq_src *src,
5625                                             unsigned type,
5626                                             enum amdgpu_interrupt_state state)
5627 {
5628         switch (type) {
5629         case AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP:
5630                 gfx_v9_0_set_gfx_eop_interrupt_state(adev, state);
5631                 break;
5632         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5633                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5634                 break;
5635         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5636                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5637                 break;
5638         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5639                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5640                 break;
5641         case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5642                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5643                 break;
5644         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5645                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5646                 break;
5647         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5648                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5649                 break;
5650         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5651                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5652                 break;
5653         case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5654                 gfx_v9_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5655                 break;
5656         default:
5657                 break;
5658         }
5659         return 0;
5660 }
5661
5662 static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
5663                             struct amdgpu_irq_src *source,
5664                             struct amdgpu_iv_entry *entry)
5665 {
5666         int i;
5667         u8 me_id, pipe_id, queue_id;
5668         struct amdgpu_ring *ring;
5669
5670         DRM_DEBUG("IH: CP EOP\n");
5671         me_id = (entry->ring_id & 0x0c) >> 2;
5672         pipe_id = (entry->ring_id & 0x03) >> 0;
5673         queue_id = (entry->ring_id & 0x70) >> 4;
5674
5675         switch (me_id) {
5676         case 0:
5677                 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5678                 break;
5679         case 1:
5680         case 2:
5681                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5682                         ring = &adev->gfx.compute_ring[i];
5683                         /* Per-queue interrupt is supported for MEC starting from VI.
5684                           * The interrupt can only be enabled/disabled per pipe instead of per queue.
5685                           */
5686                         if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5687                                 amdgpu_fence_process(ring);
5688                 }
5689                 break;
5690         }
5691         return 0;
5692 }
5693
5694 static void gfx_v9_0_fault(struct amdgpu_device *adev,
5695                            struct amdgpu_iv_entry *entry)
5696 {
5697         u8 me_id, pipe_id, queue_id;
5698         struct amdgpu_ring *ring;
5699         int i;
5700
5701         me_id = (entry->ring_id & 0x0c) >> 2;
5702         pipe_id = (entry->ring_id & 0x03) >> 0;
5703         queue_id = (entry->ring_id & 0x70) >> 4;
5704
5705         switch (me_id) {
5706         case 0:
5707                 drm_sched_fault(&adev->gfx.gfx_ring[0].sched);
5708                 break;
5709         case 1:
5710         case 2:
5711                 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5712                         ring = &adev->gfx.compute_ring[i];
5713                         if (ring->me == me_id && ring->pipe == pipe_id &&
5714                             ring->queue == queue_id)
5715                                 drm_sched_fault(&ring->sched);
5716                 }
5717                 break;
5718         }
5719 }
5720
5721 static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev,
5722                                  struct amdgpu_irq_src *source,
5723                                  struct amdgpu_iv_entry *entry)
5724 {
5725         DRM_ERROR("Illegal register access in command stream\n");
5726         gfx_v9_0_fault(adev, entry);
5727         return 0;
5728 }
5729
5730 static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev,
5731                                   struct amdgpu_irq_src *source,
5732                                   struct amdgpu_iv_entry *entry)
5733 {
5734         DRM_ERROR("Illegal instruction in command stream\n");
5735         gfx_v9_0_fault(adev, entry);
5736         return 0;
5737 }
5738
5739 static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
5740                 struct ras_err_data *err_data,
5741                 struct amdgpu_iv_entry *entry)
5742 {
5743         /* TODO ue will trigger an interrupt. */
5744         kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
5745         if (adev->gfx.funcs->query_ras_error_count)
5746                 adev->gfx.funcs->query_ras_error_count(adev, err_data);
5747         amdgpu_ras_reset_gpu(adev, 0);
5748         return AMDGPU_RAS_SUCCESS;
5749 }
5750
5751 static const struct {
5752         const char *name;
5753         uint32_t ip;
5754         uint32_t inst;
5755         uint32_t seg;
5756         uint32_t reg_offset;
5757         uint32_t per_se_instance;
5758         int32_t num_instance;
5759         uint32_t sec_count_mask;
5760         uint32_t ded_count_mask;
5761 } gfx_ras_edc_regs[] = {
5762         { "CPC_SCRATCH", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_SCRATCH_CNT), 0, 1,
5763           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, SEC_COUNT),
5764           REG_FIELD_MASK(CPC_EDC_SCRATCH_CNT, DED_COUNT) },
5765         { "CPC_UCODE", SOC15_REG_ENTRY(GC, 0, mmCPC_EDC_UCODE_CNT), 0, 1,
5766           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, SEC_COUNT),
5767           REG_FIELD_MASK(CPC_EDC_UCODE_CNT, DED_COUNT) },
5768         { "CPF_ROQ_ME1", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5769           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME1), 0 },
5770         { "CPF_ROQ_ME2", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_ROQ_CNT), 0, 1,
5771           REG_FIELD_MASK(CPF_EDC_ROQ_CNT, COUNT_ME2), 0 },
5772         { "CPF_TAG", SOC15_REG_ENTRY(GC, 0, mmCPF_EDC_TAG_CNT), 0, 1,
5773           REG_FIELD_MASK(CPF_EDC_TAG_CNT, SEC_COUNT),
5774           REG_FIELD_MASK(CPF_EDC_TAG_CNT, DED_COUNT) },
5775         { "CPG_DMA_ROQ", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5776           REG_FIELD_MASK(CPG_EDC_DMA_CNT, ROQ_COUNT), 0 },
5777         { "CPG_DMA_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_DMA_CNT), 0, 1,
5778           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_SEC_COUNT),
5779           REG_FIELD_MASK(CPG_EDC_DMA_CNT, TAG_DED_COUNT) },
5780         { "CPG_TAG", SOC15_REG_ENTRY(GC, 0, mmCPG_EDC_TAG_CNT), 0, 1,
5781           REG_FIELD_MASK(CPG_EDC_TAG_CNT, SEC_COUNT),
5782           REG_FIELD_MASK(CPG_EDC_TAG_CNT, DED_COUNT) },
5783         { "DC_CSINVOC", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_CSINVOC_CNT), 0, 1,
5784           REG_FIELD_MASK(DC_EDC_CSINVOC_CNT, COUNT_ME1), 0 },
5785         { "DC_RESTORE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_RESTORE_CNT), 0, 1,
5786           REG_FIELD_MASK(DC_EDC_RESTORE_CNT, COUNT_ME1), 0 },
5787         { "DC_STATE", SOC15_REG_ENTRY(GC, 0, mmDC_EDC_STATE_CNT), 0, 1,
5788           REG_FIELD_MASK(DC_EDC_STATE_CNT, COUNT_ME1), 0 },
5789         { "GDS_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5790           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_SEC),
5791           REG_FIELD_MASK(GDS_EDC_CNT, GDS_MEM_DED) },
5792         { "GDS_INPUT_QUEUE", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_CNT), 0, 1,
5793           REG_FIELD_MASK(GDS_EDC_CNT, GDS_INPUT_QUEUE_SED), 0 },
5794         { "GDS_ME0_CS_PIPE_MEM", SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT),
5795           0, 1, REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_SEC),
5796           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, ME0_CS_PIPE_MEM_DED) },
5797         { "GDS_OA_PHY_PHY_CMD_RAM_MEM",
5798           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5799           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_SEC),
5800           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_CMD_RAM_MEM_DED) },
5801         { "GDS_OA_PHY_PHY_DATA_RAM_MEM",
5802           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PHY_CNT), 0, 1,
5803           REG_FIELD_MASK(GDS_EDC_OA_PHY_CNT, PHY_DATA_RAM_MEM_SED), 0 },
5804         { "GDS_OA_PIPE_ME1_PIPE0_PIPE_MEM",
5805           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5806           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_SEC),
5807           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE0_PIPE_MEM_DED) },
5808         { "GDS_OA_PIPE_ME1_PIPE1_PIPE_MEM",
5809           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5810           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_SEC),
5811           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE1_PIPE_MEM_DED) },
5812         { "GDS_OA_PIPE_ME1_PIPE2_PIPE_MEM",
5813           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5814           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_SEC),
5815           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE2_PIPE_MEM_DED) },
5816         { "GDS_OA_PIPE_ME1_PIPE3_PIPE_MEM",
5817           SOC15_REG_ENTRY(GC, 0, mmGDS_EDC_OA_PIPE_CNT), 0, 1,
5818           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_SEC),
5819           REG_FIELD_MASK(GDS_EDC_OA_PIPE_CNT, ME1_PIPE3_PIPE_MEM_DED) },
5820         { "SPI_SR_MEM", SOC15_REG_ENTRY(GC, 0, mmSPI_EDC_CNT), 1, 1,
5821           REG_FIELD_MASK(SPI_EDC_CNT, SPI_SR_MEM_SED_COUNT), 0 },
5822         { "TA_FS_DFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5823           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_SEC_COUNT),
5824           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_DFIFO_DED_COUNT) },
5825         { "TA_FS_AFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5826           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_AFIFO_SED_COUNT), 0 },
5827         { "TA_FL_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5828           REG_FIELD_MASK(TA_EDC_CNT, TA_FL_LFIFO_SED_COUNT), 0 },
5829         { "TA_FX_LFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5830           REG_FIELD_MASK(TA_EDC_CNT, TA_FX_LFIFO_SED_COUNT), 0 },
5831         { "TA_FS_CFIFO", SOC15_REG_ENTRY(GC, 0, mmTA_EDC_CNT), 1, 16,
5832           REG_FIELD_MASK(TA_EDC_CNT, TA_FS_CFIFO_SED_COUNT), 0 },
5833         { "TCA_HOLE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5834           REG_FIELD_MASK(TCA_EDC_CNT, HOLE_FIFO_SED_COUNT), 0 },
5835         { "TCA_REQ_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCA_EDC_CNT), 0, 2,
5836           REG_FIELD_MASK(TCA_EDC_CNT, REQ_FIFO_SED_COUNT), 0 },
5837         { "TCC_CACHE_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5838           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_SEC_COUNT),
5839           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DATA_DED_COUNT) },
5840         { "TCC_CACHE_DIRTY", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5841           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_SEC_COUNT),
5842           REG_FIELD_MASK(TCC_EDC_CNT, CACHE_DIRTY_DED_COUNT) },
5843         { "TCC_HIGH_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5844           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_SEC_COUNT),
5845           REG_FIELD_MASK(TCC_EDC_CNT, HIGH_RATE_TAG_DED_COUNT) },
5846         { "TCC_LOW_RATE_TAG", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5847           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_SEC_COUNT),
5848           REG_FIELD_MASK(TCC_EDC_CNT, LOW_RATE_TAG_DED_COUNT) },
5849         { "TCC_SRC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5850           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_SEC_COUNT),
5851           REG_FIELD_MASK(TCC_EDC_CNT, SRC_FIFO_DED_COUNT) },
5852         { "TCC_IN_USE_DEC", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5853           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_DEC_SED_COUNT), 0 },
5854         { "TCC_IN_USE_TRANSFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5855           REG_FIELD_MASK(TCC_EDC_CNT, IN_USE_TRANSFER_SED_COUNT), 0 },
5856         { "TCC_LATENCY_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5857           REG_FIELD_MASK(TCC_EDC_CNT, LATENCY_FIFO_SED_COUNT), 0 },
5858         { "TCC_RETURN_DATA", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5859           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_DATA_SED_COUNT), 0 },
5860         { "TCC_RETURN_CONTROL", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5861           REG_FIELD_MASK(TCC_EDC_CNT, RETURN_CONTROL_SED_COUNT), 0 },
5862         { "TCC_UC_ATOMIC_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT), 0, 16,
5863           REG_FIELD_MASK(TCC_EDC_CNT, UC_ATOMIC_FIFO_SED_COUNT), 0 },
5864         { "TCC_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5865           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_RETURN_SED_COUNT), 0 },
5866         { "TCC_WRITE_CACHE_READ", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0, 16,
5867           REG_FIELD_MASK(TCC_EDC_CNT2, WRITE_CACHE_READ_SED_COUNT), 0 },
5868         { "TCC_SRC_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5869           16, REG_FIELD_MASK(TCC_EDC_CNT2, SRC_FIFO_NEXT_RAM_SED_COUNT), 0 },
5870         { "TCC_LATENCY_FIFO_NEXT_RAM", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5871           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, LATENCY_FIFO_NEXT_RAM_SED_COUNT),
5872           0 },
5873         { "TCC_CACHE_TAG_PROBE_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5874           16, REG_FIELD_MASK(TCC_EDC_CNT2, CACHE_TAG_PROBE_FIFO_SED_COUNT), 0 },
5875         { "TCC_WRRET_TAG_WRITE_RETURN", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2),
5876           0, 16, REG_FIELD_MASK(TCC_EDC_CNT2, WRRET_TAG_WRITE_RETURN_SED_COUNT),
5877           0 },
5878         { "TCC_ATOMIC_RETURN_BUFFER", SOC15_REG_ENTRY(GC, 0, mmTCC_EDC_CNT2), 0,
5879           16, REG_FIELD_MASK(TCC_EDC_CNT2, ATOMIC_RETURN_BUFFER_SED_COUNT), 0 },
5880         { "TCI_WRITE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCI_EDC_CNT), 0, 72,
5881           REG_FIELD_MASK(TCI_EDC_CNT, WRITE_RAM_SED_COUNT), 0 },
5882         { "TCP_CACHE_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5883           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_SEC_COUNT),
5884           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CACHE_RAM_DED_COUNT) },
5885         { "TCP_LFIFO_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5886           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_SEC_COUNT),
5887           REG_FIELD_MASK(TCP_EDC_CNT_NEW, LFIFO_RAM_DED_COUNT) },
5888         { "TCP_CMD_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5889           REG_FIELD_MASK(TCP_EDC_CNT_NEW, CMD_FIFO_SED_COUNT), 0 },
5890         { "TCP_VM_FIFO", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5891           REG_FIELD_MASK(TCP_EDC_CNT_NEW, VM_FIFO_SEC_COUNT), 0 },
5892         { "TCP_DB_RAM", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5893           REG_FIELD_MASK(TCP_EDC_CNT_NEW, DB_RAM_SED_COUNT), 0 },
5894         { "TCP_UTCL1_LFIFO0", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5895           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_SEC_COUNT),
5896           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO0_DED_COUNT) },
5897         { "TCP_UTCL1_LFIFO1", SOC15_REG_ENTRY(GC, 0, mmTCP_EDC_CNT_NEW), 1, 16,
5898           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_SEC_COUNT),
5899           REG_FIELD_MASK(TCP_EDC_CNT_NEW, UTCL1_LFIFO1_DED_COUNT) },
5900         { "TD_SS_FIFO_LO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5901           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_SEC_COUNT),
5902           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_LO_DED_COUNT) },
5903         { "TD_SS_FIFO_HI", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5904           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_SEC_COUNT),
5905           REG_FIELD_MASK(TD_EDC_CNT, SS_FIFO_HI_DED_COUNT) },
5906         { "TD_CS_FIFO", SOC15_REG_ENTRY(GC, 0, mmTD_EDC_CNT), 1, 16,
5907           REG_FIELD_MASK(TD_EDC_CNT, CS_FIFO_SED_COUNT), 0 },
5908         { "SQ_LDS_D", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5909           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_SEC_COUNT),
5910           REG_FIELD_MASK(SQ_EDC_CNT, LDS_D_DED_COUNT) },
5911         { "SQ_LDS_I", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5912           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_SEC_COUNT),
5913           REG_FIELD_MASK(SQ_EDC_CNT, LDS_I_DED_COUNT) },
5914         { "SQ_SGPR", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5915           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_SEC_COUNT),
5916           REG_FIELD_MASK(SQ_EDC_CNT, SGPR_DED_COUNT) },
5917         { "SQ_VGPR0", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5918           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_SEC_COUNT),
5919           REG_FIELD_MASK(SQ_EDC_CNT, VGPR0_DED_COUNT) },
5920         { "SQ_VGPR1", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5921           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_SEC_COUNT),
5922           REG_FIELD_MASK(SQ_EDC_CNT, VGPR1_DED_COUNT) },
5923         { "SQ_VGPR2", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5924           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_SEC_COUNT),
5925           REG_FIELD_MASK(SQ_EDC_CNT, VGPR2_DED_COUNT) },
5926         { "SQ_VGPR3", SOC15_REG_ENTRY(GC, 0, mmSQ_EDC_CNT), 1, 16,
5927           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_SEC_COUNT),
5928           REG_FIELD_MASK(SQ_EDC_CNT, VGPR3_DED_COUNT) },
5929         { "SQC_DATA_CU0_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5930           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_SEC_COUNT),
5931           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_WRITE_DATA_BUF_DED_COUNT) },
5932         { "SQC_DATA_CU0_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5933           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_SEC_COUNT),
5934           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU0_UTCL1_LFIFO_DED_COUNT) },
5935         { "SQC_DATA_CU1_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5936           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_SEC_COUNT),
5937           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_WRITE_DATA_BUF_DED_COUNT) },
5938         { "SQC_DATA_CU1_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5939           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_SEC_COUNT),
5940           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU1_UTCL1_LFIFO_DED_COUNT) },
5941         { "SQC_DATA_CU2_WRITE_DATA_BUF", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT),
5942           1, 6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_SEC_COUNT),
5943           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_WRITE_DATA_BUF_DED_COUNT) },
5944         { "SQC_DATA_CU2_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT), 1,
5945           6, REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_SEC_COUNT),
5946           REG_FIELD_MASK(SQC_EDC_CNT, DATA_CU2_UTCL1_LFIFO_DED_COUNT) },
5947         { "SQC_INST_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5948           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_SEC_COUNT),
5949           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_TAG_RAM_DED_COUNT) },
5950         { "SQC_INST_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5951           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_SEC_COUNT),
5952           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_BANK_RAM_DED_COUNT) },
5953         { "SQC_DATA_BANKA_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5954           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_SEC_COUNT),
5955           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_TAG_RAM_DED_COUNT) },
5956         { "SQC_DATA_BANKA_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5957           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_SEC_COUNT),
5958           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_BANK_RAM_DED_COUNT) },
5959         { "SQC_INST_BANKA_UTCL1_MISS_FIFO",
5960           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5961           REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_UTCL1_MISS_FIFO_SED_COUNT),
5962           0 },
5963         { "SQC_INST_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5964           6, REG_FIELD_MASK(SQC_EDC_CNT2, INST_BANKA_MISS_FIFO_SED_COUNT), 0 },
5965         { "SQC_DATA_BANKA_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5966           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_HIT_FIFO_SED_COUNT), 0 },
5967         { "SQC_DATA_BANKA_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1,
5968           6, REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_MISS_FIFO_SED_COUNT), 0 },
5969         { "SQC_DATA_BANKA_DIRTY_BIT_RAM",
5970           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5971           REG_FIELD_MASK(SQC_EDC_CNT2, DATA_BANKA_DIRTY_BIT_RAM_SED_COUNT), 0 },
5972         { "SQC_INST_UTCL1_LFIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT2), 1, 6,
5973           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_SEC_COUNT),
5974           REG_FIELD_MASK(SQC_EDC_CNT2, INST_UTCL1_LFIFO_DED_COUNT) },
5975         { "SQC_INST_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5976           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_SEC_COUNT),
5977           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_TAG_RAM_DED_COUNT) },
5978         { "SQC_INST_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5979           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_SEC_COUNT),
5980           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_BANK_RAM_DED_COUNT) },
5981         { "SQC_DATA_BANKB_TAG_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5982           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_SEC_COUNT),
5983           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_TAG_RAM_DED_COUNT) },
5984         { "SQC_DATA_BANKB_BANK_RAM", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5985           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_SEC_COUNT),
5986           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_BANK_RAM_DED_COUNT) },
5987         { "SQC_INST_BANKB_UTCL1_MISS_FIFO",
5988           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5989           REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_UTCL1_MISS_FIFO_SED_COUNT),
5990           0 },
5991         { "SQC_INST_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5992           6, REG_FIELD_MASK(SQC_EDC_CNT3, INST_BANKB_MISS_FIFO_SED_COUNT), 0 },
5993         { "SQC_DATA_BANKB_HIT_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5994           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_HIT_FIFO_SED_COUNT), 0 },
5995         { "SQC_DATA_BANKB_MISS_FIFO", SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1,
5996           6, REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_MISS_FIFO_SED_COUNT), 0 },
5997         { "SQC_DATA_BANKB_DIRTY_BIT_RAM",
5998           SOC15_REG_ENTRY(GC, 0, mmSQC_EDC_CNT3), 1, 6,
5999           REG_FIELD_MASK(SQC_EDC_CNT3, DATA_BANKB_DIRTY_BIT_RAM_SED_COUNT), 0 },
6000         { "EA_DRAMRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6001           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_SEC_COUNT),
6002           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_CMDMEM_DED_COUNT) },
6003         { "EA_DRAMWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6004           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_SEC_COUNT),
6005           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_CMDMEM_DED_COUNT) },
6006         { "EA_DRAMWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6007           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_SEC_COUNT),
6008           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_DATAMEM_DED_COUNT) },
6009         { "EA_RRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6010           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_SEC_COUNT),
6011           REG_FIELD_MASK(GCEA_EDC_CNT, RRET_TAGMEM_DED_COUNT) },
6012         { "EA_WRET_TAGMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6013           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_SEC_COUNT),
6014           REG_FIELD_MASK(GCEA_EDC_CNT, WRET_TAGMEM_DED_COUNT) },
6015         { "EA_DRAMRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6016           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMRD_PAGEMEM_SED_COUNT), 0 },
6017         { "EA_DRAMWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6018           REG_FIELD_MASK(GCEA_EDC_CNT, DRAMWR_PAGEMEM_SED_COUNT), 0 },
6019         { "EA_IORD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6020           REG_FIELD_MASK(GCEA_EDC_CNT, IORD_CMDMEM_SED_COUNT), 0 },
6021         { "EA_IOWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6022           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_CMDMEM_SED_COUNT), 0 },
6023         { "EA_IOWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT), 0, 32,
6024           REG_FIELD_MASK(GCEA_EDC_CNT, IOWR_DATAMEM_SED_COUNT), 0 },
6025         { "GMIRD_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6026           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_SEC_COUNT),
6027           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_CMDMEM_DED_COUNT) },
6028         { "GMIWR_CMDMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6029           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_SEC_COUNT),
6030           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_CMDMEM_DED_COUNT) },
6031         { "GMIWR_DATAMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6032           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_SEC_COUNT),
6033           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_DATAMEM_DED_COUNT) },
6034         { "GMIRD_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6035           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIRD_PAGEMEM_SED_COUNT), 0 },
6036         { "GMIWR_PAGEMEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6037           REG_FIELD_MASK(GCEA_EDC_CNT2, GMIWR_PAGEMEM_SED_COUNT), 0 },
6038         { "MAM_D0MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6039           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D0MEM_SED_COUNT), 0 },
6040         { "MAM_D1MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6041           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D1MEM_SED_COUNT), 0 },
6042         { "MAM_D2MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6043           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D2MEM_SED_COUNT), 0 },
6044         { "MAM_D3MEM", SOC15_REG_ENTRY(GC, 0, mmGCEA_EDC_CNT2), 0, 32,
6045           REG_FIELD_MASK(GCEA_EDC_CNT2, MAM_D3MEM_SED_COUNT), 0 },
6046 };
6047
6048 static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev,
6049                                      void *inject_if)
6050 {
6051         struct ras_inject_if *info = (struct ras_inject_if *)inject_if;
6052         int ret;
6053         struct ta_ras_trigger_error_input block_info = { 0 };
6054
6055         if (adev->asic_type != CHIP_VEGA20)
6056                 return -EINVAL;
6057
6058         if (info->head.sub_block_index >= ARRAY_SIZE(ras_gfx_subblocks))
6059                 return -EINVAL;
6060
6061         if (!ras_gfx_subblocks[info->head.sub_block_index].name)
6062                 return -EPERM;
6063
6064         if (!(ras_gfx_subblocks[info->head.sub_block_index].hw_supported_error_type &
6065               info->head.type)) {
6066                 DRM_ERROR("GFX Subblock %s, hardware do not support type 0x%x\n",
6067                         ras_gfx_subblocks[info->head.sub_block_index].name,
6068                         info->head.type);
6069                 return -EPERM;
6070         }
6071
6072         if (!(ras_gfx_subblocks[info->head.sub_block_index].sw_supported_error_type &
6073               info->head.type)) {
6074                 DRM_ERROR("GFX Subblock %s, driver do not support type 0x%x\n",
6075                         ras_gfx_subblocks[info->head.sub_block_index].name,
6076                         info->head.type);
6077                 return -EPERM;
6078         }
6079
6080         block_info.block_id = amdgpu_ras_block_to_ta(info->head.block);
6081         block_info.sub_block_index =
6082                 ras_gfx_subblocks[info->head.sub_block_index].ta_subblock;
6083         block_info.inject_error_type = amdgpu_ras_error_to_ta(info->head.type);
6084         block_info.address = info->address;
6085         block_info.value = info->value;
6086
6087         mutex_lock(&adev->grbm_idx_mutex);
6088         ret = psp_ras_trigger_error(&adev->psp, &block_info);
6089         mutex_unlock(&adev->grbm_idx_mutex);
6090
6091         return ret;
6092 }
6093
6094 static int gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
6095                                           void *ras_error_status)
6096 {
6097         struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
6098         uint32_t sec_count, ded_count;
6099         uint32_t i;
6100         uint32_t reg_value;
6101         uint32_t se_id, instance_id;
6102
6103         if (adev->asic_type != CHIP_VEGA20)
6104                 return -EINVAL;
6105
6106         err_data->ue_count = 0;
6107         err_data->ce_count = 0;
6108
6109         mutex_lock(&adev->grbm_idx_mutex);
6110         for (se_id = 0; se_id < adev->gfx.config.max_shader_engines; se_id++) {
6111                 for (instance_id = 0; instance_id < 256; instance_id++) {
6112                         for (i = 0;
6113                              i < sizeof(gfx_ras_edc_regs) / sizeof(gfx_ras_edc_regs[0]);
6114                              i++) {
6115                                 if (se_id != 0 &&
6116                                     !gfx_ras_edc_regs[i].per_se_instance)
6117                                         continue;
6118                                 if (instance_id >= gfx_ras_edc_regs[i].num_instance)
6119                                         continue;
6120
6121                                 gfx_v9_0_select_se_sh(adev, se_id, 0,
6122                                                       instance_id);
6123
6124                                 reg_value = RREG32(
6125                                         adev->reg_offset[gfx_ras_edc_regs[i].ip]
6126                                                         [gfx_ras_edc_regs[i].inst]
6127                                                         [gfx_ras_edc_regs[i].seg] +
6128                                         gfx_ras_edc_regs[i].reg_offset);
6129                                 sec_count = reg_value &
6130                                             gfx_ras_edc_regs[i].sec_count_mask;
6131                                 ded_count = reg_value &
6132                                             gfx_ras_edc_regs[i].ded_count_mask;
6133                                 if (sec_count) {
6134                                         DRM_INFO(
6135                                                 "Instance[%d][%d]: SubBlock %s, SEC %d\n",
6136                                                 se_id, instance_id,
6137                                                 gfx_ras_edc_regs[i].name,
6138                                                 sec_count);
6139                                         err_data->ce_count++;
6140                                 }
6141
6142                                 if (ded_count) {
6143                                         DRM_INFO(
6144                                                 "Instance[%d][%d]: SubBlock %s, DED %d\n",
6145                                                 se_id, instance_id,
6146                                                 gfx_ras_edc_regs[i].name,
6147                                                 ded_count);
6148                                         err_data->ue_count++;
6149                                 }
6150                         }
6151                 }
6152         }
6153         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6154         mutex_unlock(&adev->grbm_idx_mutex);
6155
6156         return 0;
6157 }
6158
6159 static int gfx_v9_0_cp_ecc_error_irq(struct amdgpu_device *adev,
6160                                   struct amdgpu_irq_src *source,
6161                                   struct amdgpu_iv_entry *entry)
6162 {
6163         struct ras_common_if *ras_if = adev->gfx.ras_if;
6164         struct ras_dispatch_if ih_data = {
6165                 .entry = entry,
6166         };
6167
6168         if (!ras_if)
6169                 return 0;
6170
6171         ih_data.head = *ras_if;
6172
6173         DRM_ERROR("CP ECC ERROR IRQ\n");
6174         amdgpu_ras_interrupt_dispatch(adev, &ih_data);
6175         return 0;
6176 }
6177
6178 static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
6179         .name = "gfx_v9_0",
6180         .early_init = gfx_v9_0_early_init,
6181         .late_init = gfx_v9_0_late_init,
6182         .sw_init = gfx_v9_0_sw_init,
6183         .sw_fini = gfx_v9_0_sw_fini,
6184         .hw_init = gfx_v9_0_hw_init,
6185         .hw_fini = gfx_v9_0_hw_fini,
6186         .suspend = gfx_v9_0_suspend,
6187         .resume = gfx_v9_0_resume,
6188         .is_idle = gfx_v9_0_is_idle,
6189         .wait_for_idle = gfx_v9_0_wait_for_idle,
6190         .soft_reset = gfx_v9_0_soft_reset,
6191         .set_clockgating_state = gfx_v9_0_set_clockgating_state,
6192         .set_powergating_state = gfx_v9_0_set_powergating_state,
6193         .get_clockgating_state = gfx_v9_0_get_clockgating_state,
6194 };
6195
6196 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
6197         .type = AMDGPU_RING_TYPE_GFX,
6198         .align_mask = 0xff,
6199         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6200         .support_64bit_ptrs = true,
6201         .vmhub = AMDGPU_GFXHUB_0,
6202         .get_rptr = gfx_v9_0_ring_get_rptr_gfx,
6203         .get_wptr = gfx_v9_0_ring_get_wptr_gfx,
6204         .set_wptr = gfx_v9_0_ring_set_wptr_gfx,
6205         .emit_frame_size = /* totally 242 maximum if 16 IBs */
6206                 5 +  /* COND_EXEC */
6207                 7 +  /* PIPELINE_SYNC */
6208                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6209                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6210                 2 + /* VM_FLUSH */
6211                 8 +  /* FENCE for VM_FLUSH */
6212                 20 + /* GDS switch */
6213                 4 + /* double SWITCH_BUFFER,
6214                        the first COND_EXEC jump to the place just
6215                            prior to this double SWITCH_BUFFER  */
6216                 5 + /* COND_EXEC */
6217                 7 +      /*     HDP_flush */
6218                 4 +      /*     VGT_flush */
6219                 14 + /* CE_META */
6220                 31 + /* DE_META */
6221                 3 + /* CNTX_CTRL */
6222                 5 + /* HDP_INVL */
6223                 8 + 8 + /* FENCE x2 */
6224                 2, /* SWITCH_BUFFER */
6225         .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */
6226         .emit_ib = gfx_v9_0_ring_emit_ib_gfx,
6227         .emit_fence = gfx_v9_0_ring_emit_fence,
6228         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6229         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6230         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6231         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6232         .test_ring = gfx_v9_0_ring_test_ring,
6233         .test_ib = gfx_v9_0_ring_test_ib,
6234         .insert_nop = amdgpu_ring_insert_nop,
6235         .pad_ib = amdgpu_ring_generic_pad_ib,
6236         .emit_switch_buffer = gfx_v9_ring_emit_sb,
6237         .emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
6238         .init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
6239         .patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
6240         .emit_tmz = gfx_v9_0_ring_emit_tmz,
6241         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6242         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6243         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6244         .soft_recovery = gfx_v9_0_ring_soft_recovery,
6245 };
6246
6247 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
6248         .type = AMDGPU_RING_TYPE_COMPUTE,
6249         .align_mask = 0xff,
6250         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6251         .support_64bit_ptrs = true,
6252         .vmhub = AMDGPU_GFXHUB_0,
6253         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6254         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6255         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6256         .emit_frame_size =
6257                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6258                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6259                 5 + /* hdp invalidate */
6260                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6261                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6262                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6263                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6264                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
6265         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6266         .emit_ib = gfx_v9_0_ring_emit_ib_compute,
6267         .emit_fence = gfx_v9_0_ring_emit_fence,
6268         .emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
6269         .emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
6270         .emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
6271         .emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
6272         .test_ring = gfx_v9_0_ring_test_ring,
6273         .test_ib = gfx_v9_0_ring_test_ib,
6274         .insert_nop = amdgpu_ring_insert_nop,
6275         .pad_ib = amdgpu_ring_generic_pad_ib,
6276         .set_priority = gfx_v9_0_ring_set_priority_compute,
6277         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6278         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6279         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6280 };
6281
6282 static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
6283         .type = AMDGPU_RING_TYPE_KIQ,
6284         .align_mask = 0xff,
6285         .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6286         .support_64bit_ptrs = true,
6287         .vmhub = AMDGPU_GFXHUB_0,
6288         .get_rptr = gfx_v9_0_ring_get_rptr_compute,
6289         .get_wptr = gfx_v9_0_ring_get_wptr_compute,
6290         .set_wptr = gfx_v9_0_ring_set_wptr_compute,
6291         .emit_frame_size =
6292                 20 + /* gfx_v9_0_ring_emit_gds_switch */
6293                 7 + /* gfx_v9_0_ring_emit_hdp_flush */
6294                 5 + /* hdp invalidate */
6295                 7 + /* gfx_v9_0_ring_emit_pipeline_sync */
6296                 SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
6297                 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
6298                 2 + /* gfx_v9_0_ring_emit_vm_flush */
6299                 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6300         .emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
6301         .emit_fence = gfx_v9_0_ring_emit_fence_kiq,
6302         .test_ring = gfx_v9_0_ring_test_ring,
6303         .insert_nop = amdgpu_ring_insert_nop,
6304         .pad_ib = amdgpu_ring_generic_pad_ib,
6305         .emit_rreg = gfx_v9_0_ring_emit_rreg,
6306         .emit_wreg = gfx_v9_0_ring_emit_wreg,
6307         .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
6308         .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
6309 };
6310
6311 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
6312 {
6313         int i;
6314
6315         adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq;
6316
6317         for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6318                 adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
6319
6320         for (i = 0; i < adev->gfx.num_compute_rings; i++)
6321                 adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
6322 }
6323
6324 static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = {
6325         .set = gfx_v9_0_set_eop_interrupt_state,
6326         .process = gfx_v9_0_eop_irq,
6327 };
6328
6329 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_reg_irq_funcs = {
6330         .set = gfx_v9_0_set_priv_reg_fault_state,
6331         .process = gfx_v9_0_priv_reg_irq,
6332 };
6333
6334 static const struct amdgpu_irq_src_funcs gfx_v9_0_priv_inst_irq_funcs = {
6335         .set = gfx_v9_0_set_priv_inst_fault_state,
6336         .process = gfx_v9_0_priv_inst_irq,
6337 };
6338
6339 static const struct amdgpu_irq_src_funcs gfx_v9_0_cp_ecc_error_irq_funcs = {
6340         .set = gfx_v9_0_set_cp_ecc_error_state,
6341         .process = gfx_v9_0_cp_ecc_error_irq,
6342 };
6343
6344
6345 static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev)
6346 {
6347         adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6348         adev->gfx.eop_irq.funcs = &gfx_v9_0_eop_irq_funcs;
6349
6350         adev->gfx.priv_reg_irq.num_types = 1;
6351         adev->gfx.priv_reg_irq.funcs = &gfx_v9_0_priv_reg_irq_funcs;
6352
6353         adev->gfx.priv_inst_irq.num_types = 1;
6354         adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs;
6355
6356         adev->gfx.cp_ecc_error_irq.num_types = 2; /*C5 ECC error and C9 FUE error*/
6357         adev->gfx.cp_ecc_error_irq.funcs = &gfx_v9_0_cp_ecc_error_irq_funcs;
6358 }
6359
6360 static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev)
6361 {
6362         switch (adev->asic_type) {
6363         case CHIP_VEGA10:
6364         case CHIP_VEGA12:
6365         case CHIP_VEGA20:
6366         case CHIP_RAVEN:
6367         case CHIP_ARCTURUS:
6368         case CHIP_RENOIR:
6369                 adev->gfx.rlc.funcs = &gfx_v9_0_rlc_funcs;
6370                 break;
6371         default:
6372                 break;
6373         }
6374 }
6375
6376 static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
6377 {
6378         /* init asci gds info */
6379         switch (adev->asic_type) {
6380         case CHIP_VEGA10:
6381         case CHIP_VEGA12:
6382         case CHIP_VEGA20:
6383                 adev->gds.gds_size = 0x10000;
6384                 break;
6385         case CHIP_RAVEN:
6386         case CHIP_ARCTURUS:
6387                 adev->gds.gds_size = 0x1000;
6388                 break;
6389         default:
6390                 adev->gds.gds_size = 0x10000;
6391                 break;
6392         }
6393
6394         switch (adev->asic_type) {
6395         case CHIP_VEGA10:
6396         case CHIP_VEGA20:
6397                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6398                 break;
6399         case CHIP_VEGA12:
6400                 adev->gds.gds_compute_max_wave_id = 0x27f;
6401                 break;
6402         case CHIP_RAVEN:
6403                 if (adev->rev_id >= 0x8)
6404                         adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
6405                 else
6406                         adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
6407                 break;
6408         case CHIP_ARCTURUS:
6409                 adev->gds.gds_compute_max_wave_id = 0xfff;
6410                 break;
6411         default:
6412                 /* this really depends on the chip */
6413                 adev->gds.gds_compute_max_wave_id = 0x7ff;
6414                 break;
6415         }
6416
6417         adev->gds.gws_size = 64;
6418         adev->gds.oa_size = 16;
6419 }
6420
6421 static void gfx_v9_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6422                                                  u32 bitmap)
6423 {
6424         u32 data;
6425
6426         if (!bitmap)
6427                 return;
6428
6429         data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6430         data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6431
6432         WREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG, data);
6433 }
6434
6435 static u32 gfx_v9_0_get_cu_active_bitmap(struct amdgpu_device *adev)
6436 {
6437         u32 data, mask;
6438
6439         data = RREG32_SOC15(GC, 0, mmCC_GC_SHADER_ARRAY_CONFIG);
6440         data |= RREG32_SOC15(GC, 0, mmGC_USER_SHADER_ARRAY_CONFIG);
6441
6442         data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6443         data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6444
6445         mask = amdgpu_gfx_create_bitmask(adev->gfx.config.max_cu_per_sh);
6446
6447         return (~data) & mask;
6448 }
6449
6450 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
6451                                  struct amdgpu_cu_info *cu_info)
6452 {
6453         int i, j, k, counter, active_cu_number = 0;
6454         u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
6455         unsigned disable_masks[4 * 4];
6456
6457         if (!adev || !cu_info)
6458                 return -EINVAL;
6459
6460         /*
6461          * 16 comes from bitmap array size 4*4, and it can cover all gfx9 ASICs
6462          */
6463         if (adev->gfx.config.max_shader_engines *
6464                 adev->gfx.config.max_sh_per_se > 16)
6465                 return -EINVAL;
6466
6467         amdgpu_gfx_parse_disable_cu(disable_masks,
6468                                     adev->gfx.config.max_shader_engines,
6469                                     adev->gfx.config.max_sh_per_se);
6470
6471         mutex_lock(&adev->grbm_idx_mutex);
6472         for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6473                 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6474                         mask = 1;
6475                         ao_bitmap = 0;
6476                         counter = 0;
6477                         gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
6478                         gfx_v9_0_set_user_cu_inactive_bitmap(
6479                                 adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
6480                         bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
6481
6482                         /*
6483                          * The bitmap(and ao_cu_bitmap) in cu_info structure is
6484                          * 4x4 size array, and it's usually suitable for Vega
6485                          * ASICs which has 4*2 SE/SH layout.
6486                          * But for Arcturus, SE/SH layout is changed to 8*1.
6487                          * To mostly reduce the impact, we make it compatible
6488                          * with current bitmap array as below:
6489                          *    SE4,SH0 --> bitmap[0][1]
6490                          *    SE5,SH0 --> bitmap[1][1]
6491                          *    SE6,SH0 --> bitmap[2][1]
6492                          *    SE7,SH0 --> bitmap[3][1]
6493                          */
6494                         cu_info->bitmap[i % 4][j + i / 4] = bitmap;
6495
6496                         for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
6497                                 if (bitmap & mask) {
6498                                         if (counter < adev->gfx.config.max_cu_per_sh)
6499                                                 ao_bitmap |= mask;
6500                                         counter ++;
6501                                 }
6502                                 mask <<= 1;
6503                         }
6504                         active_cu_number += counter;
6505                         if (i < 2 && j < 2)
6506                                 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6507                         cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
6508                 }
6509         }
6510         gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6511         mutex_unlock(&adev->grbm_idx_mutex);
6512
6513         cu_info->number = active_cu_number;
6514         cu_info->ao_cu_mask = ao_cu_mask;
6515         cu_info->simd_per_cu = NUM_SIMD_PER_CU;
6516
6517         return 0;
6518 }
6519
6520 const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
6521 {
6522         .type = AMD_IP_BLOCK_TYPE_GFX,
6523         .major = 9,
6524         .minor = 0,
6525         .rev = 0,
6526         .funcs = &gfx_v9_0_ip_funcs,
6527 };