#include <fcntl.h>
#include <stdio.h>
#include "xf86drm.h"
-#include "json.h"
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
const char *ras_block_string[] = {
"umc",
AMDGPU_RAS_ERROR__POISON = 8,
};
-struct ras_test_item {
+struct ras_inject_test_config {
char name[64];
- int block;
+ char block[32];
int sub_block;
- char error_type_str[64];
enum amdgpu_ras_error_type type;
uint64_t address;
uint64_t value;
DEFAULT_RAS_BLOCK_MASK_BASIC\
}
+static const struct ras_inject_test_config umc_ras_inject_test[] = {
+ {"ras_umc.1.0", "umc", 0, AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+};
+
+static const struct ras_inject_test_config gfx_ras_inject_test[] = {
+ {"ras_gfx.2.0", "gfx", AMDGPU_RAS_BLOCK__GFX_CPC_UCODE,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.1", "gfx", AMDGPU_RAS_BLOCK__GFX_CPF_TAG,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.2", "gfx", AMDGPU_RAS_BLOCK__GFX_CPG_TAG,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.3", "gfx", AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.4", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.5", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.6", "gfx", AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.7", "gfx", AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.8", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.9", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.10", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.11", "gfx", AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.12", "gfx", AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.13", "gfx", AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+ {"ras_gfx.2.14", "gfx", AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM,
+ AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE, 0, 0},
+};
+
static const struct ras_DID_test_mask ras_DID_array[] = {
{0x66a1, 0x00, RAS_BLOCK_MASK_ALL},
{0x66a1, 0x01, RAS_BLOCK_MASK_ALL},
{0x66a1, 0x04, RAS_BLOCK_MASK_ALL},
};
+static uint32_t amdgpu_ras_find_block_id_by_name(const char *name)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(ras_block_string); i++) {
+ if (strcmp(name, ras_block_string[i]) == 0)
+ return i;
+ }
+
+ return ARRAY_SIZE(ras_block_string);
+}
+
+static char *amdgpu_ras_get_error_type_id(enum amdgpu_ras_error_type type)
+{
+ switch (type) {
+ case AMDGPU_RAS_ERROR__PARITY:
+ return "parity";
+ case AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE:
+ return "single_correctable";
+ case AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE:
+ return "multi_uncorrectable";
+ case AMDGPU_RAS_ERROR__POISON:
+ return "poison";
+ case AMDGPU_RAS_ERROR__NONE:
+ default:
+ return NULL;
+ }
+}
+
static struct ras_test_mask amdgpu_ras_get_test_mask(drmDevicePtr device)
{
int i;
}
}
-static int _json_get_block_id(json_object *block_obj, const char *name)
+static void __amdgpu_ras_ip_inject_test(const struct ras_inject_test_config *ip_test,
+ uint32_t size)
{
- json_object *item_obj, *index_obj;
-
- if (!json_object_object_get_ex(block_obj, name, &item_obj))
- return -1;
-
- if (!json_object_object_get_ex(item_obj, "index", &index_obj))
- return -1;
-
- return json_object_get_int(index_obj);
-}
-
-static int _json_get_subblock_id(json_object *block_obj, const char *block_name,
- const char *subblock_name)
-{
- json_object *item_obj, *subblock_obj, *name_obj;
-
- if (!json_object_object_get_ex(block_obj, block_name, &item_obj))
- return -1;
-
- if (!json_object_object_get_ex(item_obj, "subblock", &subblock_obj))
- return -1;
-
- if (!json_object_object_get_ex(subblock_obj, subblock_name, &name_obj))
- return -1;
-
- return json_object_get_int(name_obj);
-}
-
-static int amdgpu_ras_get_test_items(struct ras_test_item **pitems, int *size)
-{
- json_object *root_obj = NULL;
- json_object *block_obj = NULL;
- json_object *type_obj = NULL;
- json_object *tests_obj = NULL;
- json_object *test_obj = NULL;
- json_object *tmp_obj = NULL;
- json_object *tmp_type_obj = NULL;
- json_object *subblock_obj = NULL;
- int i, length;
- struct ras_test_item *items = NULL;
- int ret = -1;
-
- root_obj = json_object_from_file("./amdgpu_ras.json");
- if (!root_obj)
- root_obj = json_object_from_file(
- "/usr/share/libdrm/amdgpu_ras.json");
-
- if (!root_obj) {
- CU_FAIL_FATAL("Couldn't find amdgpu_ras.json");
- goto pro_end;
- }
-
- /* Check Version */
- if (!json_object_object_get_ex(root_obj, "version", &tmp_obj)) {
- CU_FAIL_FATAL("Wrong format of amdgpu_ras.json");
- goto pro_end;
- }
-
- /* Block Definition */
- if (!json_object_object_get_ex(root_obj, "block", &block_obj)) {
- fprintf(stderr, "block isn't defined\n");
- goto pro_end;
- }
-
- /* Type Definition */
- if (!json_object_object_get_ex(root_obj, "type", &type_obj)) {
- fprintf(stderr, "type isn't defined\n");
- goto pro_end;
- }
-
- /* Enumulate test items */
- if (!json_object_object_get_ex(root_obj, "tests", &tests_obj)) {
- fprintf(stderr, "tests are empty\n");
- goto pro_end;
- }
-
- length = json_object_array_length(tests_obj);
-
- items = malloc(sizeof(struct ras_test_item) * length);
- if (!items) {
- fprintf(stderr, "malloc failed\n");
- goto pro_end;
- }
-
- for (i = 0; i < length; i++) {
- test_obj = json_object_array_get_idx(tests_obj, i);
-
- /* Name */
- if (!json_object_object_get_ex(test_obj, "name", &tmp_obj)) {
- fprintf(stderr, "Test %d has no name\n", i);
- goto pro_end;
- }
- strncpy(items[i].name, json_object_get_string(tmp_obj), 64);
-
- /* block */
- if (!json_object_object_get_ex(test_obj, "block", &tmp_obj)) {
- fprintf(stderr, "Test:%s: block isn't defined\n",
- items[i].name);
- goto pro_end;
- }
- items[i].block = _json_get_block_id(
- block_obj, json_object_get_string(tmp_obj));
-
- /* check block id */
- if (items[i].block < AMDGPU_RAS_BLOCK__UMC ||
- items[i].block >= AMDGPU_RAS_BLOCK__LAST) {
- fprintf(stderr, "Test:%s: block id %d is invalid\n",
- items[i].name, items[i].block);
- goto pro_end;
- }
-
- /* subblock */
- if (json_object_object_get_ex(test_obj, "subblock", &tmp_obj)) {
- json_object_object_get_ex(test_obj, "block",
- &subblock_obj);
-
- items[i].sub_block = _json_get_subblock_id(
- block_obj,
- json_object_get_string(subblock_obj),
- json_object_get_string(tmp_obj));
- if (items[i].sub_block < 0) {
- fprintf(stderr, "Test:%s: subblock in block id %d is invalid\n",
- items[i].name, items[i].block);
- goto pro_end;
- }
- } else
- items[i].sub_block = 0;
-
- /* type */
- if (json_object_object_get_ex(test_obj, "type", &tmp_obj)) {
- strncpy(items[i].error_type_str,
- json_object_get_string(tmp_obj), 64);
-
- if (json_object_object_get_ex(type_obj,
- json_object_get_string(tmp_obj), &tmp_type_obj))
- items[i].type = json_object_get_int(tmp_type_obj);
- else
- items[i].type = (enum amdgpu_ras_error_type)0;
- }
-
- /* address */
- if (json_object_object_get_ex(test_obj, "address", &tmp_obj))
- items[i].address = json_object_get_int(tmp_obj);
- else
- items[i].address = 0; /* default address 0 */
-
- /* value */
- if (json_object_object_get_ex(test_obj, "value", &tmp_obj))
- items[i].value = json_object_get_int(tmp_obj);
- else
- items[i].value = 0; /* default value 0 */
- }
-
- *pitems = items;
- *size = length;
- ret = 0;
-pro_end:
- if (root_obj)
- json_object_put(root_obj);
-
- return ret;
-}
-
-static void __amdgpu_ras_inject_test(void)
-{
- struct ras_test_item *items = NULL;
- int i, size;
- int ret;
+ int i, ret;
unsigned long old_ue, old_ce;
unsigned long ue, ce;
+ uint32_t block;
int timeout;
bool pass;
- ret = amdgpu_ras_get_test_items(&items, &size);
- CU_ASSERT_EQUAL(ret, 0);
- if (ret)
- goto mem_free;
-
- printf("...\n");
for (i = 0; i < size; i++) {
timeout = 3;
pass = false;
- ret = amdgpu_ras_query_err_count(items[i].block, &old_ue,
- &old_ce);
+ block = amdgpu_ras_find_block_id_by_name(ip_test[i].block);
+
+ /* Ensure one valid ip block */
+ if (block == ARRAY_SIZE(ras_block_string))
+ break;
+
+ /* Ensure RAS feature for the IP block is enabled by kernel */
+ if (amdgpu_ras_is_feature_supported(block) <= 0)
+ break;
+
+ ret = amdgpu_ras_query_err_count(block, &old_ue, &old_ce);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
- ret = amdgpu_ras_inject(items[i].block, items[i].sub_block,
- items[i].type, items[i].address,
- items[i].value);
+ ret = amdgpu_ras_inject(block,
+ ip_test[i].sub_block,
+ ip_test[i].type,
+ ip_test[i].address,
+ ip_test[i].value);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
while (timeout > 0) {
sleep(5);
- ret = amdgpu_ras_query_err_count(items[i].block, &ue,
- &ce);
+ ret = amdgpu_ras_query_err_count(block, &ue, &ce);
CU_ASSERT_EQUAL(ret, 0);
if (ret)
break;
}
timeout -= 1;
}
- printf("\t Test %s@%s, address %ld, value %ld: %s\n",
- items[i].name, items[i].error_type_str, items[i].address,
- items[i].value, pass ? "Pass" : "Fail");
+ printf("\t Test %s@block %s, subblock %d, error_type %s, address %ld, value %ld: %s\n",
+ ip_test[i].name,
+ ip_test[i].block,
+ ip_test[i].sub_block,
+ amdgpu_ras_get_error_type_id(ip_test[i].type),
+ ip_test[i].address,
+ ip_test[i].value,
+ pass ? "Pass" : "Fail");
}
+}
-mem_free:
- if (items) {
- free(items);
- items = NULL;
- }
+static void __amdgpu_ras_inject_test(void)
+{
+ printf("...\n");
+
+ /* run UMC ras inject test */
+ __amdgpu_ras_ip_inject_test(umc_ras_inject_test,
+ ARRAY_SIZE(umc_ras_inject_test));
+
+ /* run GFX ras inject test */
+ __amdgpu_ras_ip_inject_test(gfx_ras_inject_test,
+ ARRAY_SIZE(gfx_ras_inject_test));
}
static void amdgpu_ras_inject_test(void)