OSDN Git Service

amdgpu: remove json package dependence
[android-x86/external-libdrm.git] / tests / amdgpu / ras_tests.c
1 /*
2  * Copyright 2017 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22 */
23
24 #include "CUnit/Basic.h"
25
26 #include "amdgpu_test.h"
27 #include "amdgpu_drm.h"
28 #include "amdgpu_internal.h"
29 #include <unistd.h>
30 #include <fcntl.h>
31 #include <stdio.h>
32 #include "xf86drm.h"
33 #include "stdlib.h"
34
35 const char *ras_block_string[] = {
36         "umc",
37         "sdma",
38         "gfx",
39         "mmhub",
40         "athub",
41         "pcie_bif",
42         "hdp",
43         "xgmi_wafl",
44         "df",
45         "smn",
46         "sem",
47         "mp0",
48         "mp1",
49         "fuse",
50 };
51
52 #define ras_block_str(i) (ras_block_string[i])
53
54 enum amdgpu_ras_block {
55         AMDGPU_RAS_BLOCK__UMC = 0,
56         AMDGPU_RAS_BLOCK__SDMA,
57         AMDGPU_RAS_BLOCK__GFX,
58         AMDGPU_RAS_BLOCK__MMHUB,
59         AMDGPU_RAS_BLOCK__ATHUB,
60         AMDGPU_RAS_BLOCK__PCIE_BIF,
61         AMDGPU_RAS_BLOCK__HDP,
62         AMDGPU_RAS_BLOCK__XGMI_WAFL,
63         AMDGPU_RAS_BLOCK__DF,
64         AMDGPU_RAS_BLOCK__SMN,
65         AMDGPU_RAS_BLOCK__SEM,
66         AMDGPU_RAS_BLOCK__MP0,
67         AMDGPU_RAS_BLOCK__MP1,
68         AMDGPU_RAS_BLOCK__FUSE,
69
70         AMDGPU_RAS_BLOCK__LAST
71 };
72
73 #define AMDGPU_RAS_BLOCK_COUNT  AMDGPU_RAS_BLOCK__LAST
74 #define AMDGPU_RAS_BLOCK_MASK   ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
75
76 enum amdgpu_ras_gfx_subblock {
77         /* CPC */
78         AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
79         AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
80                 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
81         AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
82         AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
83         AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
84         AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
85         AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
86         AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
87         AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
88         AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
89                 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
90         /* CPF */
91         AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
92         AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
93                 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
94         AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
95         AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
96         AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
97         /* CPG */
98         AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
99         AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
100                 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
101         AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
102         AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
103         AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
104         /* GDS */
105         AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
106         AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
107         AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
108         AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
109         AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
110         AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
111         AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
112                 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
113         /* SPI */
114         AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
115         /* SQ */
116         AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
117         AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
118         AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
119         AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
120         AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
121         AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
122         /* SQC (3 ranges) */
123         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
124         /* SQC range 0 */
125         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
126                 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
127         AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
128                 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
129         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
130         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
131         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
132         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
133         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
134         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
135         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
136                 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
137         /* SQC range 1 */
138         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
139         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
140                 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
141         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
142         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
143         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
144         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
145         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
146         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
147         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
148         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
149         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
150                 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
151         /* SQC range 2 */
152         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
153         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
154                 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
155         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
156         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
157         AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
158         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
159         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
160         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
161         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
162         AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
163         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
164                 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
165         AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
166                 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
167         /* TA */
168         AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
169         AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
170                 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
171         AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
172         AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
173         AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
174         AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
175         AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
176         /* TCA */
177         AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
178         AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
179                 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
180         AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
181         AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
182                 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
183         /* TCC (5 sub-ranges) */
184         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
185         /* TCC range 0 */
186         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
187                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
188         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
189                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
190         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
191         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
192         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
193         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
194         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
195         AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
196         AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
197         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
198                 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
199         /* TCC range 1 */
200         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
201         AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
202                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
203         AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
204         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
205                 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
206         /* TCC range 2 */
207         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
208         AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
209                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
210         AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
211         AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
212         AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
213         AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
214         AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
215         AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
216         AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
217         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
218                 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
219         /* TCC range 3 */
220         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
221         AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
222                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
223         AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
224         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
225                 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
226         /* TCC range 4 */
227         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
228         AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
229                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
230         AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
231         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
232                 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
233         AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
234                 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
235         /* TCI */
236         AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
237         /* TCP */
238         AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
239         AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
240                 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
241         AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
242         AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
243         AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
244         AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
245         AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
246         AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
247         AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
248                 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
249         /* TD */
250         AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
251         AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
252                 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
253         AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
254         AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
255         AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
256         /* EA (3 sub-ranges) */
257         AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
258         /* EA range 0 */
259         AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
260                 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
261         AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
262                 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
263         AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
264         AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
265         AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
266         AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
267         AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
268         AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
269         AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
270         AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
271                 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
272         /* EA range 1 */
273         AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
274         AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
275                 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
276         AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
277         AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
278         AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
279         AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
280         AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
281         AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
282         AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
283                 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
284         /* EA range 2 */
285         AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
286         AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
287                 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
288         AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
289         AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
290         AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
291         AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
292                 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
293         AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
294                 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
295         /* UTC VM L2 bank */
296         AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
297         /* UTC VM walker */
298         AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
299         /* UTC ATC L2 2MB cache */
300         AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
301         /* UTC ATC L2 4KB cache */
302         AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
303         AMDGPU_RAS_BLOCK__GFX_MAX
304 };
305
306 enum amdgpu_ras_error_type {
307         AMDGPU_RAS_ERROR__NONE                                  = 0,
308         AMDGPU_RAS_ERROR__PARITY                                = 1,
309         AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE                    = 2,
310         AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE                   = 4,
311         AMDGPU_RAS_ERROR__POISON                                = 8,
312 };
313
314 struct ras_test_item {
315         char name[64];
316         int block;
317         int sub_block;
318         char error_type_str[64];
319         enum amdgpu_ras_error_type type;
320         uint64_t address;
321         uint64_t value;
322 };
323
324 struct ras_common_if {
325         enum amdgpu_ras_block block;
326         enum amdgpu_ras_error_type type;
327         uint32_t sub_block_index;
328         char name[32];
329 };
330
331 struct ras_inject_if {
332         struct ras_common_if head;
333         uint64_t address;
334         uint64_t value;
335 };
336
337 struct ras_debug_if {
338         union {
339                 struct ras_common_if head;
340                 struct ras_inject_if inject;
341         };
342         int op;
343 };
344 /* for now, only umc, gfx, sdma has implemented. */
345 #define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
346                 (1 << AMDGPU_RAS_BLOCK__GFX))
347 #define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
348                 (1 << AMDGPU_RAS_BLOCK__GFX))
349 #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
350                 (1 << AMDGPU_RAS_BLOCK__SDMA) |\
351                 (1 << AMDGPU_RAS_BLOCK__GFX))
352
353 static uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT;
354 static uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT;
355 static uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC;
356
357 struct ras_test_mask {
358         uint32_t inject_mask;
359         uint32_t query_mask;
360         uint32_t basic_mask;
361 };
362
363 struct amdgpu_ras_data {
364         amdgpu_device_handle device_handle;
365         uint32_t  id;
366         uint32_t  capability;
367         struct ras_test_mask test_mask;
368 };
369
370 /* all devices who has ras supported */
371 static struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED];
372 static int devices_count;
373
374 struct ras_DID_test_mask{
375         uint16_t device_id;
376         uint16_t revision_id;
377         struct ras_test_mask test_mask;
378 };
379
380 /* white list for inject test. */
381 #define RAS_BLOCK_MASK_ALL {\
382         DEFAULT_RAS_BLOCK_MASK_INJECT,\
383         DEFAULT_RAS_BLOCK_MASK_QUERY,\
384         DEFAULT_RAS_BLOCK_MASK_BASIC\
385 }
386
387 #define RAS_BLOCK_MASK_QUERY_BASIC {\
388         0,\
389         DEFAULT_RAS_BLOCK_MASK_QUERY,\
390         DEFAULT_RAS_BLOCK_MASK_BASIC\
391 }
392
393 static const struct ras_DID_test_mask ras_DID_array[] = {
394         {0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
395         {0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
396         {0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
397 };
398
399 static struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
400 {
401         int i;
402         static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC;
403
404         for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) {
405                 if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id &&
406                                 ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id)
407                         return ras_DID_array[i].test_mask;
408         }
409         return default_test_mask;
410 }
411
412 static uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)
413 {
414         union {
415                 uint64_t feature_mask;
416                 struct {
417                         uint32_t enabled_features;
418                         uint32_t supported_features;
419                 };
420         } features = { 0 };
421         int ret;
422
423         ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
424                         sizeof(features), &features);
425         if (ret)
426                 return 0;
427
428         return features.supported_features;
429 }
430
431 static int get_file_contents(char *file, char *buf, int size);
432
433 static int amdgpu_ras_lookup_id(drmDevicePtr device)
434 {
435         char path[1024];
436         char str[128];
437         drmPciBusInfo info;
438         int i;
439         int ret;
440
441         for (i = 0; i < MAX_CARDS_SUPPORTED; i++) {
442                 memset(str, 0, sizeof(str));
443                 memset(&info, 0, sizeof(info));
444                 sprintf(path, "/sys/kernel/debug/dri/%d/name", i);
445                 if (get_file_contents(path, str, sizeof(str)) <= 0)
446                         continue;
447
448                 ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx",
449                                 &info.domain, &info.bus, &info.dev, &info.func);
450                 if (ret != 4)
451                         continue;
452
453                 if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0)
454                                 return i;
455         }
456         return -1;
457 }
458
459 CU_BOOL suite_ras_tests_enable(void)
460 {
461         amdgpu_device_handle device_handle;
462         uint32_t  major_version;
463         uint32_t  minor_version;
464         int i;
465         drmDevicePtr device;
466
467         for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
468                 if (amdgpu_device_initialize(drm_amdgpu[i], &major_version,
469                                         &minor_version, &device_handle))
470                         continue;
471
472                 if (drmGetDevice2(drm_amdgpu[i],
473                                         DRM_DEVICE_GET_PCI_REVISION,
474                                         &device))
475                         continue;
476
477                 if (device->bustype == DRM_BUS_PCI &&
478                                 amdgpu_ras_lookup_capability(device_handle)) {
479                         amdgpu_device_deinitialize(device_handle);
480                         return CU_TRUE;
481                 }
482
483                 if (amdgpu_device_deinitialize(device_handle))
484                         continue;
485         }
486
487         return CU_FALSE;
488 }
489
490 int suite_ras_tests_init(void)
491 {
492         drmDevicePtr device;
493         amdgpu_device_handle device_handle;
494         uint32_t  major_version;
495         uint32_t  minor_version;
496         uint32_t  capability;
497         struct ras_test_mask test_mask;
498         int id;
499         int i;
500         int r;
501
502         for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
503                 r = amdgpu_device_initialize(drm_amdgpu[i], &major_version,
504                                 &minor_version, &device_handle);
505                 if (r)
506                         continue;
507
508                 if (drmGetDevice2(drm_amdgpu[i],
509                                         DRM_DEVICE_GET_PCI_REVISION,
510                                         &device)) {
511                         amdgpu_device_deinitialize(device_handle);
512                         continue;
513                 }
514
515                 if (device->bustype != DRM_BUS_PCI) {
516                         amdgpu_device_deinitialize(device_handle);
517                         continue;
518                 }
519
520                 capability = amdgpu_ras_lookup_capability(device_handle);
521                 if (capability == 0) {
522                         amdgpu_device_deinitialize(device_handle);
523                         continue;
524
525                 }
526
527                 id = amdgpu_ras_lookup_id(device);
528                 if (id == -1) {
529                         amdgpu_device_deinitialize(device_handle);
530                         continue;
531                 }
532
533                 test_mask = amdgpu_ras_get_test_mask(device);
534
535                 devices[devices_count++] = (struct amdgpu_ras_data) {
536                         device_handle, id, capability, test_mask,
537                 };
538         }
539
540         if (devices_count == 0)
541                 return CUE_SINIT_FAILED;
542
543         return CUE_SUCCESS;
544 }
545
546 int suite_ras_tests_clean(void)
547 {
548         int r;
549         int i;
550         int ret = CUE_SUCCESS;
551
552         for (i = 0; i < devices_count; i++) {
553                 r = amdgpu_device_deinitialize(devices[i].device_handle);
554                 if (r)
555                         ret = CUE_SCLEAN_FAILED;
556         }
557         return ret;
558 }
559
560 static void amdgpu_ras_disable_test(void);
561 static void amdgpu_ras_enable_test(void);
562 static void amdgpu_ras_inject_test(void);
563 static void amdgpu_ras_query_test(void);
564 static void amdgpu_ras_basic_test(void);
565
566 CU_TestInfo ras_tests[] = {
567         { "ras basic test",     amdgpu_ras_basic_test },
568         { "ras query test",     amdgpu_ras_query_test },
569         { "ras inject test",    amdgpu_ras_inject_test },
570         { "ras disable test",   amdgpu_ras_disable_test },
571 #if 0
572         { "ras enable test",    amdgpu_ras_enable_test },
573 #endif
574         CU_TEST_INFO_NULL,
575 };
576
577 //helpers
578
579 static int test_card;
580 static char sysfs_path[1024];
581 static char debugfs_path[1024];
582 static uint32_t ras_mask;
583 static amdgpu_device_handle device_handle;
584
585 static int set_test_card(int card)
586 {
587         int i;
588
589         test_card = card;
590         sprintf(sysfs_path, "/sys/class/drm/card%d/device/ras/", devices[card].id);
591         sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/ras/", devices[card].id);
592         ras_mask = devices[card].capability;
593         device_handle = devices[card].device_handle;
594         ras_block_mask_inject = devices[card].test_mask.inject_mask;
595         ras_block_mask_query = devices[card].test_mask.query_mask;
596         ras_block_mask_basic = devices[card].test_mask.basic_mask;
597
598         return 0;
599 }
600
601 static const char *get_ras_sysfs_root(void)
602 {
603         return sysfs_path;
604 }
605
606 static const char *get_ras_debugfs_root(void)
607 {
608         return debugfs_path;
609 }
610
611 static int set_file_contents(char *file, char *buf, int size)
612 {
613         int n, fd;
614         fd = open(file, O_WRONLY);
615         if (fd == -1)
616                 return -1;
617         n = write(fd, buf, size);
618         close(fd);
619         return n;
620 }
621
622 static int get_file_contents(char *file, char *buf, int size)
623 {
624         int n, fd;
625         fd = open(file, O_RDONLY);
626         if (fd == -1)
627                 return -1;
628         n = read(fd, buf, size);
629         close(fd);
630         return n;
631 }
632
633 static int is_file_ok(char *file, int flags)
634 {
635         int fd;
636
637         fd = open(file, flags);
638         if (fd == -1)
639                 return -1;
640         close(fd);
641         return 0;
642 }
643
644 static int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)
645 {
646         uint32_t feature_mask;
647         int ret;
648
649         ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
650                         sizeof(feature_mask), &feature_mask);
651         if (ret)
652                 return -1;
653
654         return (1 << block) & feature_mask;
655 }
656
657 static int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)
658 {
659         return (1 << block) & ras_mask;
660 }
661
662 static int amdgpu_ras_invoke(struct ras_debug_if *data)
663 {
664         char path[1024];
665         int ret;
666
667         sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
668
669         ret = set_file_contents(path, (char *)data, sizeof(*data))
670                 - sizeof(*data);
671         return ret;
672 }
673
674 static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
675                 unsigned long *ue, unsigned long *ce)
676 {
677         char buf[64];
678         char name[1024];
679         int ret;
680
681         *ue = *ce = 0;
682
683         if (amdgpu_ras_is_feature_supported(block) <= 0)
684                 return -1;
685
686         sprintf(name, "%s%s%s", get_ras_sysfs_root(), ras_block_str(block), "_err_count");
687
688         if (is_file_ok(name, O_RDONLY))
689                 return 0;
690
691         if (get_file_contents(name, buf, sizeof(buf)) <= 0)
692                 return -1;
693
694         if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2)
695                 return -1;
696
697         return 0;
698 }
699
700 static int amdgpu_ras_inject(enum amdgpu_ras_block block,
701                 uint32_t sub_block, enum amdgpu_ras_error_type type,
702                 uint64_t address, uint64_t value)
703 {
704         struct ras_debug_if data = { .op = 2, };
705         struct ras_inject_if *inject = &data.inject;
706         int ret;
707
708         if (amdgpu_ras_is_feature_enabled(block) <= 0) {
709                 fprintf(stderr, "block id(%d) is not valid\n", block);
710                 return -1;
711         }
712
713         inject->head.block = block;
714         inject->head.type = type;
715         inject->head.sub_block_index = sub_block;
716         strncpy(inject->head.name, ras_block_str(block), 32);
717         inject->address = address;
718         inject->value = value;
719
720         ret = amdgpu_ras_invoke(&data);
721         CU_ASSERT_EQUAL(ret, 0);
722         if (ret)
723                 return -1;
724
725         return 0;
726 }
727
728 //tests
729 static void amdgpu_ras_features_test(int enable)
730 {
731         struct ras_debug_if data;
732         int ret;
733         int i;
734
735         data.op = enable;
736         for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
737                 struct ras_common_if head = {
738                         .block = i,
739                         .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
740                         .sub_block_index = 0,
741                         .name = "",
742                 };
743
744                 if (amdgpu_ras_is_feature_supported(i) <= 0)
745                         continue;
746
747                 data.head = head;
748
749                 ret = amdgpu_ras_invoke(&data);
750                 CU_ASSERT_EQUAL(ret, 0);
751
752                 if (ret)
753                         continue;
754
755                 ret = enable ^ amdgpu_ras_is_feature_enabled(i);
756                 CU_ASSERT_EQUAL(ret, 0);
757         }
758 }
759
760 static void amdgpu_ras_disable_test(void)
761 {
762         int i;
763         for (i = 0; i < devices_count; i++) {
764                 set_test_card(i);
765                 amdgpu_ras_features_test(0);
766         }
767 }
768
769 static void amdgpu_ras_enable_test(void)
770 {
771         int i;
772         for (i = 0; i < devices_count; i++) {
773                 set_test_card(i);
774                 amdgpu_ras_features_test(1);
775         }
776 }
777
778 static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int *size)
779 {
780         *pitems = NULL;
781         *size = 0;
782
783         return 0;
784 }
785
786 static void __amdgpu_ras_inject_test(void)
787 {
788         struct ras_test_item *items = NULL;
789         int i, size;
790         int ret;
791         unsigned long old_ue, old_ce;
792         unsigned long ue, ce;
793         int timeout;
794         bool pass;
795
796         ret = amdgpu_ras_get_test_items(&items, &size);
797         CU_ASSERT_EQUAL(ret, 0);
798         if (ret)
799                 goto mem_free;
800
801         printf("...\n");
802         for (i = 0; i < size; i++) {
803                 timeout = 3;
804                 pass = false;
805
806                 ret = amdgpu_ras_query_err_count(items[i].block, &old_ue,
807                                                  &old_ce);
808                 CU_ASSERT_EQUAL(ret, 0);
809                 if (ret)
810                         break;
811
812                 ret = amdgpu_ras_inject(items[i].block, items[i].sub_block,
813                                         items[i].type, items[i].address,
814                                         items[i].value);
815                 CU_ASSERT_EQUAL(ret, 0);
816                 if (ret)
817                         break;
818
819                 while (timeout > 0) {
820                         sleep(5);
821
822                         ret = amdgpu_ras_query_err_count(items[i].block, &ue,
823                                                          &ce);
824                         CU_ASSERT_EQUAL(ret, 0);
825                         if (ret)
826                                 break;
827
828                         if (old_ue != ue || old_ce != ce) {
829                                 pass = true;
830                                 sleep(20);
831                                 break;
832                         }
833                         timeout -= 1;
834                 }
835                 printf("\t Test %s@%s, address %ld, value %ld: %s\n",
836                         items[i].name, items[i].error_type_str, items[i].address,
837                         items[i].value, pass ? "Pass" : "Fail");
838         }
839
840 mem_free:
841         if (items) {
842                 free(items);
843                 items = NULL;
844         }
845 }
846
847 static void amdgpu_ras_inject_test(void)
848 {
849         int i;
850         for (i = 0; i < devices_count; i++) {
851                 set_test_card(i);
852                 __amdgpu_ras_inject_test();
853         }
854 }
855
856 static void __amdgpu_ras_query_test(void)
857 {
858         unsigned long ue, ce;
859         int ret;
860         int i;
861
862         for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
863                 if (amdgpu_ras_is_feature_supported(i) <= 0)
864                         continue;
865
866                 if (!((1 << i) & ras_block_mask_query))
867                         continue;
868
869                 ret = amdgpu_ras_query_err_count(i, &ue, &ce);
870                 CU_ASSERT_EQUAL(ret, 0);
871         }
872 }
873
874 static void amdgpu_ras_query_test(void)
875 {
876         int i;
877         for (i = 0; i < devices_count; i++) {
878                 set_test_card(i);
879                 __amdgpu_ras_query_test();
880         }
881 }
882
883 static void amdgpu_ras_basic_test(void)
884 {
885         unsigned long ue, ce;
886         char name[1024];
887         int ret;
888         int i;
889         int j;
890         uint32_t features;
891         char path[1024];
892
893         ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY);
894         CU_ASSERT_EQUAL(ret, 0);
895
896         for (i = 0; i < devices_count; i++) {
897                 set_test_card(i);
898
899                 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
900                                 sizeof(features), &features);
901                 CU_ASSERT_EQUAL(ret, 0);
902
903                 sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
904                 ret = is_file_ok(path, O_WRONLY);
905                 CU_ASSERT_EQUAL(ret, 0);
906
907                 sprintf(path, "%s%s", get_ras_sysfs_root(), "features");
908                 ret = is_file_ok(path, O_RDONLY);
909                 CU_ASSERT_EQUAL(ret, 0);
910
911                 for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) {
912                         ret = amdgpu_ras_is_feature_supported(j);
913                         if (ret <= 0)
914                                 continue;
915
916                         if (!((1 << j) & ras_block_mask_basic))
917                                 continue;
918
919                         sprintf(path, "%s%s%s", get_ras_sysfs_root(), ras_block_str(j), "_err_count");
920                         ret = is_file_ok(path, O_RDONLY);
921                         CU_ASSERT_EQUAL(ret, 0);
922
923                         sprintf(path, "%s%s%s", get_ras_debugfs_root(), ras_block_str(j), "_err_inject");
924                         ret = is_file_ok(path, O_WRONLY);
925                         CU_ASSERT_EQUAL(ret, 0);
926                 }
927         }
928 }