2 * Copyright 2017 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include "CUnit/Basic.h"
26 #include "amdgpu_test.h"
27 #include "amdgpu_drm.h"
28 #include "amdgpu_internal.h"
35 const char *ras_block_string[] = {
52 #define ras_block_str(i) (ras_block_string[i])
54 enum amdgpu_ras_block {
55 AMDGPU_RAS_BLOCK__UMC = 0,
56 AMDGPU_RAS_BLOCK__SDMA,
57 AMDGPU_RAS_BLOCK__GFX,
58 AMDGPU_RAS_BLOCK__MMHUB,
59 AMDGPU_RAS_BLOCK__ATHUB,
60 AMDGPU_RAS_BLOCK__PCIE_BIF,
61 AMDGPU_RAS_BLOCK__HDP,
62 AMDGPU_RAS_BLOCK__XGMI_WAFL,
64 AMDGPU_RAS_BLOCK__SMN,
65 AMDGPU_RAS_BLOCK__SEM,
66 AMDGPU_RAS_BLOCK__MP0,
67 AMDGPU_RAS_BLOCK__MP1,
68 AMDGPU_RAS_BLOCK__FUSE,
70 AMDGPU_RAS_BLOCK__LAST
73 #define AMDGPU_RAS_BLOCK_COUNT AMDGPU_RAS_BLOCK__LAST
74 #define AMDGPU_RAS_BLOCK_MASK ((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1)
76 enum amdgpu_ras_gfx_subblock {
78 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0,
79 AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH =
80 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START,
81 AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
82 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1,
83 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1,
84 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1,
85 AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2,
86 AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2,
87 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
88 AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END =
89 AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2,
91 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
92 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 =
93 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START,
94 AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1,
95 AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
96 AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
98 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
99 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ =
100 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START,
101 AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG,
102 AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
103 AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
105 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
106 AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START,
107 AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE,
108 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM,
109 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM,
110 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
111 AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END =
112 AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM,
114 AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM,
116 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
117 AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START,
118 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
119 AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I,
120 AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
121 AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR,
123 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
125 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START =
126 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START,
127 AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO =
128 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START,
129 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF,
130 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO,
131 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF,
132 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
133 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF,
134 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
135 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END =
136 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO,
138 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
139 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM =
140 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START,
141 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO,
142 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO,
143 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM,
144 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM,
145 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO,
146 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO,
147 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM,
148 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
149 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END =
150 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM,
152 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
153 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM =
154 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START,
155 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO,
156 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO,
157 AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM,
158 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM,
159 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO,
160 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO,
161 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM,
162 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
163 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END =
164 AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM,
165 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END =
166 AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END,
168 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
169 AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO =
170 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START,
171 AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO,
172 AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO,
173 AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO,
174 AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
175 AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO,
177 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
178 AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO =
179 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START,
180 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
181 AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END =
182 AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO,
183 /* TCC (5 sub-ranges) */
184 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
186 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START =
187 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START,
188 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA =
189 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START,
190 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
191 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
192 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
193 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0,
194 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1,
195 AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG,
196 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
197 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END =
198 AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG,
200 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
201 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC =
202 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START,
203 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
204 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END =
205 AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER,
207 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
208 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA =
209 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START,
210 AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL,
211 AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO,
212 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN,
213 AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ,
214 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO,
215 AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM,
216 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
217 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END =
218 AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO,
220 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
221 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO =
222 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START,
223 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
224 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END =
225 AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM,
227 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
228 AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN =
229 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START,
230 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
231 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END =
232 AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER,
233 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END =
234 AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END,
236 AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM,
238 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
239 AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM =
240 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START,
241 AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM,
242 AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO,
243 AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO,
244 AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM,
245 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0,
246 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
247 AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END =
248 AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1,
250 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
251 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO =
252 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START,
253 AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI,
254 AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
255 AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO,
256 /* EA (3 sub-ranges) */
257 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
259 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START =
260 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START,
261 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM =
262 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START,
263 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM,
264 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM,
265 AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM,
266 AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM,
267 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM,
268 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM,
269 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
270 AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END =
271 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM,
273 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
274 AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM =
275 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START,
276 AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM,
277 AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM,
278 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM,
279 AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM,
280 AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM,
281 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
282 AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END =
283 AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM,
285 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
286 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM =
287 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START,
288 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM,
289 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM,
290 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
291 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END =
292 AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM,
293 AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END =
294 AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END,
296 AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE,
298 AMDGPU_RAS_BLOCK__UTC_VML2_WALKER,
299 /* UTC ATC L2 2MB cache */
300 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK,
301 /* UTC ATC L2 4KB cache */
302 AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK,
303 AMDGPU_RAS_BLOCK__GFX_MAX
306 enum amdgpu_ras_error_type {
307 AMDGPU_RAS_ERROR__NONE = 0,
308 AMDGPU_RAS_ERROR__PARITY = 1,
309 AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE = 2,
310 AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE = 4,
311 AMDGPU_RAS_ERROR__POISON = 8,
314 struct ras_test_item {
318 char error_type_str[64];
319 enum amdgpu_ras_error_type type;
324 struct ras_common_if {
325 enum amdgpu_ras_block block;
326 enum amdgpu_ras_error_type type;
327 uint32_t sub_block_index;
331 struct ras_inject_if {
332 struct ras_common_if head;
337 struct ras_debug_if {
339 struct ras_common_if head;
340 struct ras_inject_if inject;
344 /* for now, only umc, gfx, sdma has implemented. */
345 #define DEFAULT_RAS_BLOCK_MASK_INJECT ((1 << AMDGPU_RAS_BLOCK__UMC) |\
346 (1 << AMDGPU_RAS_BLOCK__GFX))
347 #define DEFAULT_RAS_BLOCK_MASK_QUERY ((1 << AMDGPU_RAS_BLOCK__UMC) |\
348 (1 << AMDGPU_RAS_BLOCK__GFX))
349 #define DEFAULT_RAS_BLOCK_MASK_BASIC (1 << AMDGPU_RAS_BLOCK__UMC |\
350 (1 << AMDGPU_RAS_BLOCK__SDMA) |\
351 (1 << AMDGPU_RAS_BLOCK__GFX))
353 static uint32_t ras_block_mask_inject = DEFAULT_RAS_BLOCK_MASK_INJECT;
354 static uint32_t ras_block_mask_query = DEFAULT_RAS_BLOCK_MASK_INJECT;
355 static uint32_t ras_block_mask_basic = DEFAULT_RAS_BLOCK_MASK_BASIC;
357 struct ras_test_mask {
358 uint32_t inject_mask;
363 struct amdgpu_ras_data {
364 amdgpu_device_handle device_handle;
367 struct ras_test_mask test_mask;
370 /* all devices who has ras supported */
371 static struct amdgpu_ras_data devices[MAX_CARDS_SUPPORTED];
372 static int devices_count;
374 struct ras_DID_test_mask{
376 uint16_t revision_id;
377 struct ras_test_mask test_mask;
380 /* white list for inject test. */
381 #define RAS_BLOCK_MASK_ALL {\
382 DEFAULT_RAS_BLOCK_MASK_INJECT,\
383 DEFAULT_RAS_BLOCK_MASK_QUERY,\
384 DEFAULT_RAS_BLOCK_MASK_BASIC\
387 #define RAS_BLOCK_MASK_QUERY_BASIC {\
389 DEFAULT_RAS_BLOCK_MASK_QUERY,\
390 DEFAULT_RAS_BLOCK_MASK_BASIC\
393 static const struct ras_DID_test_mask ras_DID_array[] = {
394 {0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
395 {0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
396 {0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
399 static struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
402 static struct ras_test_mask default_test_mask = RAS_BLOCK_MASK_QUERY_BASIC;
404 for (i = 0; i < sizeof(ras_DID_array) / sizeof(ras_DID_array[0]); i++) {
405 if (ras_DID_array[i].device_id == device->deviceinfo.pci->device_id &&
406 ras_DID_array[i].revision_id == device->deviceinfo.pci->revision_id)
407 return ras_DID_array[i].test_mask;
409 return default_test_mask;
412 static uint32_t amdgpu_ras_lookup_capability(amdgpu_device_handle device_handle)
415 uint64_t feature_mask;
417 uint32_t enabled_features;
418 uint32_t supported_features;
423 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
424 sizeof(features), &features);
428 return features.supported_features;
431 static int get_file_contents(char *file, char *buf, int size);
433 static int amdgpu_ras_lookup_id(drmDevicePtr device)
441 for (i = 0; i < MAX_CARDS_SUPPORTED; i++) {
442 memset(str, 0, sizeof(str));
443 memset(&info, 0, sizeof(info));
444 sprintf(path, "/sys/kernel/debug/dri/%d/name", i);
445 if (get_file_contents(path, str, sizeof(str)) <= 0)
448 ret = sscanf(str, "amdgpu dev=%04hx:%02hhx:%02hhx.%01hhx",
449 &info.domain, &info.bus, &info.dev, &info.func);
453 if (memcmp(&info, device->businfo.pci, sizeof(info)) == 0)
459 CU_BOOL suite_ras_tests_enable(void)
461 amdgpu_device_handle device_handle;
462 uint32_t major_version;
463 uint32_t minor_version;
467 for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
468 if (amdgpu_device_initialize(drm_amdgpu[i], &major_version,
469 &minor_version, &device_handle))
472 if (drmGetDevice2(drm_amdgpu[i],
473 DRM_DEVICE_GET_PCI_REVISION,
477 if (device->bustype == DRM_BUS_PCI &&
478 amdgpu_ras_lookup_capability(device_handle)) {
479 amdgpu_device_deinitialize(device_handle);
483 if (amdgpu_device_deinitialize(device_handle))
490 int suite_ras_tests_init(void)
493 amdgpu_device_handle device_handle;
494 uint32_t major_version;
495 uint32_t minor_version;
497 struct ras_test_mask test_mask;
502 for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; i++) {
503 r = amdgpu_device_initialize(drm_amdgpu[i], &major_version,
504 &minor_version, &device_handle);
508 if (drmGetDevice2(drm_amdgpu[i],
509 DRM_DEVICE_GET_PCI_REVISION,
511 amdgpu_device_deinitialize(device_handle);
515 if (device->bustype != DRM_BUS_PCI) {
516 amdgpu_device_deinitialize(device_handle);
520 capability = amdgpu_ras_lookup_capability(device_handle);
521 if (capability == 0) {
522 amdgpu_device_deinitialize(device_handle);
527 id = amdgpu_ras_lookup_id(device);
529 amdgpu_device_deinitialize(device_handle);
533 test_mask = amdgpu_ras_get_test_mask(device);
535 devices[devices_count++] = (struct amdgpu_ras_data) {
536 device_handle, id, capability, test_mask,
540 if (devices_count == 0)
541 return CUE_SINIT_FAILED;
546 int suite_ras_tests_clean(void)
550 int ret = CUE_SUCCESS;
552 for (i = 0; i < devices_count; i++) {
553 r = amdgpu_device_deinitialize(devices[i].device_handle);
555 ret = CUE_SCLEAN_FAILED;
560 static void amdgpu_ras_disable_test(void);
561 static void amdgpu_ras_enable_test(void);
562 static void amdgpu_ras_inject_test(void);
563 static void amdgpu_ras_query_test(void);
564 static void amdgpu_ras_basic_test(void);
566 CU_TestInfo ras_tests[] = {
567 { "ras basic test", amdgpu_ras_basic_test },
568 { "ras query test", amdgpu_ras_query_test },
569 { "ras inject test", amdgpu_ras_inject_test },
570 { "ras disable test", amdgpu_ras_disable_test },
572 { "ras enable test", amdgpu_ras_enable_test },
579 static int test_card;
580 static char sysfs_path[1024];
581 static char debugfs_path[1024];
582 static uint32_t ras_mask;
583 static amdgpu_device_handle device_handle;
585 static int set_test_card(int card)
590 sprintf(sysfs_path, "/sys/class/drm/card%d/device/ras/", devices[card].id);
591 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/ras/", devices[card].id);
592 ras_mask = devices[card].capability;
593 device_handle = devices[card].device_handle;
594 ras_block_mask_inject = devices[card].test_mask.inject_mask;
595 ras_block_mask_query = devices[card].test_mask.query_mask;
596 ras_block_mask_basic = devices[card].test_mask.basic_mask;
601 static const char *get_ras_sysfs_root(void)
606 static const char *get_ras_debugfs_root(void)
611 static int set_file_contents(char *file, char *buf, int size)
614 fd = open(file, O_WRONLY);
617 n = write(fd, buf, size);
622 static int get_file_contents(char *file, char *buf, int size)
625 fd = open(file, O_RDONLY);
628 n = read(fd, buf, size);
633 static int is_file_ok(char *file, int flags)
637 fd = open(file, flags);
644 static int amdgpu_ras_is_feature_enabled(enum amdgpu_ras_block block)
646 uint32_t feature_mask;
649 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
650 sizeof(feature_mask), &feature_mask);
654 return (1 << block) & feature_mask;
657 static int amdgpu_ras_is_feature_supported(enum amdgpu_ras_block block)
659 return (1 << block) & ras_mask;
662 static int amdgpu_ras_invoke(struct ras_debug_if *data)
667 sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
669 ret = set_file_contents(path, (char *)data, sizeof(*data))
674 static int amdgpu_ras_query_err_count(enum amdgpu_ras_block block,
675 unsigned long *ue, unsigned long *ce)
683 if (amdgpu_ras_is_feature_supported(block) <= 0)
686 sprintf(name, "%s%s%s", get_ras_sysfs_root(), ras_block_str(block), "_err_count");
688 if (is_file_ok(name, O_RDONLY))
691 if (get_file_contents(name, buf, sizeof(buf)) <= 0)
694 if (sscanf(buf, "ue: %lu\nce: %lu", ue, ce) != 2)
700 static int amdgpu_ras_inject(enum amdgpu_ras_block block,
701 uint32_t sub_block, enum amdgpu_ras_error_type type,
702 uint64_t address, uint64_t value)
704 struct ras_debug_if data = { .op = 2, };
705 struct ras_inject_if *inject = &data.inject;
708 if (amdgpu_ras_is_feature_enabled(block) <= 0) {
709 fprintf(stderr, "block id(%d) is not valid\n", block);
713 inject->head.block = block;
714 inject->head.type = type;
715 inject->head.sub_block_index = sub_block;
716 strncpy(inject->head.name, ras_block_str(block), 32);
717 inject->address = address;
718 inject->value = value;
720 ret = amdgpu_ras_invoke(&data);
721 CU_ASSERT_EQUAL(ret, 0);
729 static void amdgpu_ras_features_test(int enable)
731 struct ras_debug_if data;
736 for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
737 struct ras_common_if head = {
739 .type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE,
740 .sub_block_index = 0,
744 if (amdgpu_ras_is_feature_supported(i) <= 0)
749 ret = amdgpu_ras_invoke(&data);
750 CU_ASSERT_EQUAL(ret, 0);
755 ret = enable ^ amdgpu_ras_is_feature_enabled(i);
756 CU_ASSERT_EQUAL(ret, 0);
760 static void amdgpu_ras_disable_test(void)
763 for (i = 0; i < devices_count; i++) {
765 amdgpu_ras_features_test(0);
769 static void amdgpu_ras_enable_test(void)
772 for (i = 0; i < devices_count; i++) {
774 amdgpu_ras_features_test(1);
778 static int _json_get_block_id(json_object *block_obj, const char *name)
780 json_object *item_obj, *index_obj;
782 if (!json_object_object_get_ex(block_obj, name, &item_obj))
785 if (!json_object_object_get_ex(item_obj, "index", &index_obj))
788 return json_object_get_int(index_obj);
791 static int _json_get_subblock_id(json_object *block_obj, const char *block_name,
792 const char *subblock_name)
794 json_object *item_obj, *subblock_obj, *name_obj;
796 if (!json_object_object_get_ex(block_obj, block_name, &item_obj))
799 if (!json_object_object_get_ex(item_obj, "subblock", &subblock_obj))
802 if (!json_object_object_get_ex(subblock_obj, subblock_name, &name_obj))
805 return json_object_get_int(name_obj);
808 static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int *size)
810 json_object *root_obj = NULL;
811 json_object *block_obj = NULL;
812 json_object *type_obj = NULL;
813 json_object *tests_obj = NULL;
814 json_object *test_obj = NULL;
815 json_object *tmp_obj = NULL;
816 json_object *tmp_type_obj = NULL;
817 json_object *subblock_obj = NULL;
819 struct ras_test_item *items = NULL;
822 root_obj = json_object_from_file("./amdgpu_ras.json");
824 root_obj = json_object_from_file(
825 "/usr/share/libdrm/amdgpu_ras.json");
828 CU_FAIL_FATAL("Couldn't find amdgpu_ras.json");
833 if (!json_object_object_get_ex(root_obj, "version", &tmp_obj)) {
834 CU_FAIL_FATAL("Wrong format of amdgpu_ras.json");
838 /* Block Definition */
839 if (!json_object_object_get_ex(root_obj, "block", &block_obj)) {
840 fprintf(stderr, "block isn't defined\n");
844 /* Type Definition */
845 if (!json_object_object_get_ex(root_obj, "type", &type_obj)) {
846 fprintf(stderr, "type isn't defined\n");
850 /* Enumulate test items */
851 if (!json_object_object_get_ex(root_obj, "tests", &tests_obj)) {
852 fprintf(stderr, "tests are empty\n");
856 length = json_object_array_length(tests_obj);
858 items = malloc(sizeof(struct ras_test_item) * length);
860 fprintf(stderr, "malloc failed\n");
864 for (i = 0; i < length; i++) {
865 test_obj = json_object_array_get_idx(tests_obj, i);
868 if (!json_object_object_get_ex(test_obj, "name", &tmp_obj)) {
869 fprintf(stderr, "Test %d has no name\n", i);
872 strncpy(items[i].name, json_object_get_string(tmp_obj), 64);
875 if (!json_object_object_get_ex(test_obj, "block", &tmp_obj)) {
876 fprintf(stderr, "Test:%s: block isn't defined\n",
880 items[i].block = _json_get_block_id(
881 block_obj, json_object_get_string(tmp_obj));
884 if (items[i].block < AMDGPU_RAS_BLOCK__UMC ||
885 items[i].block >= AMDGPU_RAS_BLOCK__LAST) {
886 fprintf(stderr, "Test:%s: block id %d is invalid\n",
887 items[i].name, items[i].block);
892 if (json_object_object_get_ex(test_obj, "subblock", &tmp_obj)) {
893 json_object_object_get_ex(test_obj, "block",
896 items[i].sub_block = _json_get_subblock_id(
898 json_object_get_string(subblock_obj),
899 json_object_get_string(tmp_obj));
900 if (items[i].sub_block < 0) {
901 fprintf(stderr, "Test:%s: subblock in block id %d is invalid\n",
902 items[i].name, items[i].block);
906 items[i].sub_block = 0;
909 if (json_object_object_get_ex(test_obj, "type", &tmp_obj)) {
910 strncpy(items[i].error_type_str,
911 json_object_get_string(tmp_obj), 64);
913 if (json_object_object_get_ex(type_obj,
914 json_object_get_string(tmp_obj), &tmp_type_obj))
915 items[i].type = json_object_get_int(tmp_type_obj);
917 items[i].type = (enum amdgpu_ras_error_type)0;
921 if (json_object_object_get_ex(test_obj, "address", &tmp_obj))
922 items[i].address = json_object_get_int(tmp_obj);
924 items[i].address = 0; /* default address 0 */
927 if (json_object_object_get_ex(test_obj, "value", &tmp_obj))
928 items[i].value = json_object_get_int(tmp_obj);
930 items[i].value = 0; /* default value 0 */
938 json_object_put(root_obj);
943 static void __amdgpu_ras_inject_test(void)
945 struct ras_test_item *items = NULL;
948 unsigned long old_ue, old_ce;
949 unsigned long ue, ce;
953 ret = amdgpu_ras_get_test_items(&items, &size);
954 CU_ASSERT_EQUAL(ret, 0);
959 for (i = 0; i < size; i++) {
963 ret = amdgpu_ras_query_err_count(items[i].block, &old_ue,
965 CU_ASSERT_EQUAL(ret, 0);
969 ret = amdgpu_ras_inject(items[i].block, items[i].sub_block,
970 items[i].type, items[i].address,
972 CU_ASSERT_EQUAL(ret, 0);
976 while (timeout > 0) {
979 ret = amdgpu_ras_query_err_count(items[i].block, &ue,
981 CU_ASSERT_EQUAL(ret, 0);
985 if (old_ue != ue || old_ce != ce) {
992 printf("\t Test %s@%s, address %ld, value %ld: %s\n",
993 items[i].name, items[i].error_type_str, items[i].address,
994 items[i].value, pass ? "Pass" : "Fail");
1004 static void amdgpu_ras_inject_test(void)
1007 for (i = 0; i < devices_count; i++) {
1009 __amdgpu_ras_inject_test();
1013 static void __amdgpu_ras_query_test(void)
1015 unsigned long ue, ce;
1019 for (i = 0; i < AMDGPU_RAS_BLOCK__LAST; i++) {
1020 if (amdgpu_ras_is_feature_supported(i) <= 0)
1023 if (!((1 << i) & ras_block_mask_query))
1026 ret = amdgpu_ras_query_err_count(i, &ue, &ce);
1027 CU_ASSERT_EQUAL(ret, 0);
1031 static void amdgpu_ras_query_test(void)
1034 for (i = 0; i < devices_count; i++) {
1036 __amdgpu_ras_query_test();
1040 static void amdgpu_ras_basic_test(void)
1042 unsigned long ue, ce;
1050 ret = is_file_ok("/sys/module/amdgpu/parameters/ras_mask", O_RDONLY);
1051 CU_ASSERT_EQUAL(ret, 0);
1053 for (i = 0; i < devices_count; i++) {
1056 ret = amdgpu_query_info(device_handle, AMDGPU_INFO_RAS_ENABLED_FEATURES,
1057 sizeof(features), &features);
1058 CU_ASSERT_EQUAL(ret, 0);
1060 sprintf(path, "%s%s", get_ras_debugfs_root(), "ras_ctrl");
1061 ret = is_file_ok(path, O_WRONLY);
1062 CU_ASSERT_EQUAL(ret, 0);
1064 sprintf(path, "%s%s", get_ras_sysfs_root(), "features");
1065 ret = is_file_ok(path, O_RDONLY);
1066 CU_ASSERT_EQUAL(ret, 0);
1068 for (j = 0; j < AMDGPU_RAS_BLOCK__LAST; j++) {
1069 ret = amdgpu_ras_is_feature_supported(j);
1073 if (!((1 << j) & ras_block_mask_basic))
1076 sprintf(path, "%s%s%s", get_ras_sysfs_root(), ras_block_str(j), "_err_count");
1077 ret = is_file_ok(path, O_RDONLY);
1078 CU_ASSERT_EQUAL(ret, 0);
1080 sprintf(path, "%s%s%s", get_ras_debugfs_root(), ras_block_str(j), "_err_inject");
1081 ret = is_file_ok(path, O_WRONLY);
1082 CU_ASSERT_EQUAL(ret, 0);