amdgpu/amdgpu_cs.c

   1 /*
   2  * Copyright 2014 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice shall be included in
  12  * all copies or substantial portions of the Software.
  13  *
  14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
  18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  20  * OTHER DEALINGS IN THE SOFTWARE.
  21  *
  22  */
  23
  24 #ifdef HAVE_CONFIG_H
  25 #include "config.h"
  26 #endif
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31 #include <errno.h>
  32 #include <pthread.h>
  33 #include <sched.h>
  34 #include <sys/ioctl.h>
  35 #ifdef HAVE_ALLOCA_H
  36 # include <alloca.h>
  37 #endif
  38
  39 #include "xf86drm.h"
  40 #include "amdgpu_drm.h"
  41 #include "amdgpu_internal.h"
  42
  43 /**
  44  * Create command submission context
  45  *
  46  * \param   dev - \c [in] amdgpu device handle
  47  * \param   context - \c [out] amdgpu context handle
  48  *
  49  * \return  0 on success otherwise POSIX Error code
  50 */
  51 int amdgpu_cs_ctx_create(amdgpu_device_handle dev,
  52                          amdgpu_context_handle *context)
  53 {
  54         struct amdgpu_context *gpu_context;
  55         union drm_amdgpu_ctx args;
  56         int r;
  57
  58         if (NULL == dev)
  59                 return -EINVAL;
  60         if (NULL == context)
  61                 return -EINVAL;
  62
  63         gpu_context = calloc(1, sizeof(struct amdgpu_context));
  64         if (NULL == gpu_context)
  65                 return -ENOMEM;
  66
  67         gpu_context->dev = dev;
  68
  69         /* Create the context */
  70         memset(&args, 0, sizeof(args));
  71         args.in.op = AMDGPU_CTX_OP_ALLOC_CTX;
  72         r = drmCommandWriteRead(dev->fd, DRM_AMDGPU_CTX, &args, sizeof(args));
  73         if (r)
  74                 goto error;
  75
  76         gpu_context->id = args.out.alloc.ctx_id;
  77         *context = (amdgpu_context_handle)gpu_context;
  78
  79         return 0;
  80
  81 error:
  82         free(gpu_context);
  83         return r;
  84 }
  85
  86 /**
  87  * Release command submission context
  88  *
  89  * \param   dev - \c [in] amdgpu device handle
  90  * \param   context - \c [in] amdgpu context handle
  91  *
  92  * \return  0 on success otherwise POSIX Error code
  93 */
  94 int amdgpu_cs_ctx_free(amdgpu_context_handle context)
  95 {
  96         union drm_amdgpu_ctx args;
  97         int r;
  98
  99         if (NULL == context)
 100                 return -EINVAL;
 101
 102         /* now deal with kernel side */
 103         memset(&args, 0, sizeof(args));
 104         args.in.op = AMDGPU_CTX_OP_FREE_CTX;
 105         args.in.ctx_id = context->id;
 106         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 107                                 &args, sizeof(args));
 108
 109         free(context);
 110
 111         return r;
 112 }
 113
 114 int amdgpu_cs_query_reset_state(amdgpu_context_handle context,
 115                                 uint32_t *state, uint32_t *hangs)
 116 {
 117         union drm_amdgpu_ctx args;
 118         int r;
 119
 120         if (!context)
 121                 return -EINVAL;
 122
 123         memset(&args, 0, sizeof(args));
 124         args.in.op = AMDGPU_CTX_OP_QUERY_STATE;
 125         args.in.ctx_id = context->id;
 126         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CTX,
 127                                 &args, sizeof(args));
 128         if (!r) {
 129                 *state = args.out.state.reset_status;
 130                 *hangs = args.out.state.hangs;
 131         }
 132         return r;
 133 }
 134
 135 /**
 136  * Submit command to kernel DRM
 137  * \param   dev - \c [in]  Device handle
 138  * \param   context - \c [in]  GPU Context
 139  * \param   ibs_request - \c [in]  Pointer to submission requests
 140  * \param   fence - \c [out] return fence for this submission
 141  *
 142  * \return  0 on success otherwise POSIX Error code
 143  * \sa amdgpu_cs_submit()
 144 */
 145 static int amdgpu_cs_submit_one(amdgpu_context_handle context,
 146                                 struct amdgpu_cs_request *ibs_request)
 147 {
 148         union drm_amdgpu_cs cs;
 149         uint64_t *chunk_array;
 150         struct drm_amdgpu_cs_chunk *chunks;
 151         struct drm_amdgpu_cs_chunk_data *chunk_data;
 152         struct drm_amdgpu_cs_chunk_dep *dependencies = NULL;
 153         uint32_t i, size;
 154         bool user_fence;
 155         int r = 0;
 156
 157         if (ibs_request->ip_type >= AMDGPU_HW_IP_NUM)
 158                 return -EINVAL;
 159         if (ibs_request->ring >= AMDGPU_CS_MAX_RINGS)
 160                 return -EINVAL;
 161         if (ibs_request->number_of_ibs > AMDGPU_CS_MAX_IBS_PER_SUBMIT)
 162                 return -EINVAL;
 163         user_fence = (ibs_request->fence_info.handle != NULL);
 164
 165         size = ibs_request->number_of_ibs + (user_fence ? 2 : 1);
 166
 167         chunk_array = alloca(sizeof(uint64_t) * size);
 168         chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
 169
 170         size = ibs_request->number_of_ibs + (user_fence ? 1 : 0);
 171
 172         chunk_data = alloca(sizeof(struct drm_amdgpu_cs_chunk_data) * size);
 173
 174         memset(&cs, 0, sizeof(cs));
 175         cs.in.chunks = (uint64_t)(uintptr_t)chunk_array;
 176         cs.in.ctx_id = context->id;
 177         if (ibs_request->resources)
 178                 cs.in.bo_list_handle = ibs_request->resources->handle;
 179         cs.in.num_chunks = ibs_request->number_of_ibs;
 180         /* IB chunks */
 181         for (i = 0; i < ibs_request->number_of_ibs; i++) {
 182                 struct amdgpu_cs_ib_info *ib;
 183                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 184                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
 185                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
 186                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 187
 188                 ib = &ibs_request->ibs[i];
 189
 190                 chunk_data[i].ib_data._pad = 0;
 191                 chunk_data[i].ib_data.va_start = ib->ib_mc_address;
 192                 chunk_data[i].ib_data.ib_bytes = ib->size * 4;
 193                 chunk_data[i].ib_data.ip_type = ibs_request->ip_type;
 194                 chunk_data[i].ib_data.ip_instance = ibs_request->ip_instance;
 195                 chunk_data[i].ib_data.ring = ibs_request->ring;
 196                 chunk_data[i].ib_data.flags = ib->flags;
 197         }
 198
 199         if (user_fence) {
 200                 i = cs.in.num_chunks++;
 201
 202                 /* fence chunk */
 203                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 204                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
 205                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
 206                 chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
 207
 208                 /* fence bo handle */
 209                 chunk_data[i].fence_data.handle = ibs_request->fence_info.handle->handle;
 210                 /* offset */
 211                 chunk_data[i].fence_data.offset =
 212                         ibs_request->fence_info.offset * sizeof(uint64_t);
 213         }
 214
 215         if (ibs_request->number_of_dependencies) {
 216                 dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) *
 217                         ibs_request->number_of_dependencies);
 218                 if (!dependencies) {
 219                         r = -ENOMEM;
 220                         goto error_unlock;
 221                 }
 222
 223                 for (i = 0; i < ibs_request->number_of_dependencies; ++i) {
 224                         struct amdgpu_cs_fence *info = &ibs_request->dependencies[i];
 225                         struct drm_amdgpu_cs_chunk_dep *dep = &dependencies[i];
 226                         dep->ip_type = info->ip_type;
 227                         dep->ip_instance = info->ip_instance;
 228                         dep->ring = info->ring;
 229                         dep->ctx_id = info->context->id;
 230                         dep->handle = info->fence;
 231                 }
 232
 233                 i = cs.in.num_chunks++;
 234
 235                 /* dependencies chunk */
 236                 chunk_array[i] = (uint64_t)(uintptr_t)&chunks[i];
 237                 chunks[i].chunk_id = AMDGPU_CHUNK_ID_DEPENDENCIES;
 238                 chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4
 239                         * ibs_request->number_of_dependencies;
 240                 chunks[i].chunk_data = (uint64_t)(uintptr_t)dependencies;
 241         }
 242
 243         r = drmCommandWriteRead(context->dev->fd, DRM_AMDGPU_CS,
 244                                 &cs, sizeof(cs));
 245         if (r)
 246                 goto error_unlock;
 247
 248         ibs_request->seq_no = cs.out.handle;
 249
 250 error_unlock:
 251         free(dependencies);
 252         return r;
 253 }
 254
 255 int amdgpu_cs_submit(amdgpu_context_handle context,
 256                      uint64_t flags,
 257                      struct amdgpu_cs_request *ibs_request,
 258                      uint32_t number_of_requests)
 259 {
 260         uint32_t i;
 261         int r;
 262
 263         if (NULL == context)
 264                 return -EINVAL;
 265         if (NULL == ibs_request)
 266                 return -EINVAL;
 267
 268         r = 0;
 269         for (i = 0; i < number_of_requests; i++) {
 270                 r = amdgpu_cs_submit_one(context, ibs_request);
 271                 if (r)
 272                         break;
 273                 ibs_request++;
 274         }
 275
 276         return r;
 277 }
 278
 279 /**
 280  * Calculate absolute timeout.
 281  *
 282  * \param   timeout - \c [in] timeout in nanoseconds.
 283  *
 284  * \return  absolute timeout in nanoseconds
 285 */
 286 drm_private uint64_t amdgpu_cs_calculate_timeout(uint64_t timeout)
 287 {
 288         int r;
 289
 290         if (timeout != AMDGPU_TIMEOUT_INFINITE) {
 291                 struct timespec current;
 292                 uint64_t current_ns;
 293                 r = clock_gettime(CLOCK_MONOTONIC, &current);
 294                 if (r) {
 295                         fprintf(stderr, "clock_gettime() returned error (%d)!", errno);
 296                         return AMDGPU_TIMEOUT_INFINITE;
 297                 }
 298
 299                 current_ns = ((uint64_t)current.tv_sec) * 1000000000ull;
 300                 current_ns += current.tv_nsec;
 301                 timeout += current_ns;
 302                 if (timeout < current_ns)
 303                         timeout = AMDGPU_TIMEOUT_INFINITE;
 304         }
 305         return timeout;
 306 }
 307
 308 static int amdgpu_ioctl_wait_cs(amdgpu_context_handle context,
 309                                 unsigned ip,
 310                                 unsigned ip_instance,
 311                                 uint32_t ring,
 312                                 uint64_t handle,
 313                                 uint64_t timeout_ns,
 314                                 uint64_t flags,
 315                                 bool *busy)
 316 {
 317         amdgpu_device_handle dev = context->dev;
 318         union drm_amdgpu_wait_cs args;
 319         int r;
 320
 321         memset(&args, 0, sizeof(args));
 322         args.in.handle = handle;
 323         args.in.ip_type = ip;
 324         args.in.ip_instance = ip_instance;
 325         args.in.ring = ring;
 326         args.in.ctx_id = context->id;
 327
 328         if (flags & AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE)
 329                 args.in.timeout = timeout_ns;
 330         else
 331                 args.in.timeout = amdgpu_cs_calculate_timeout(timeout_ns);
 332
 333         r = drmIoctl(dev->fd, DRM_IOCTL_AMDGPU_WAIT_CS, &args);
 334         if (r)
 335                 return -errno;
 336
 337         *busy = args.out.status;
 338         return 0;
 339 }
 340
 341 int amdgpu_cs_query_fence_status(struct amdgpu_cs_fence *fence,
 342                                  uint64_t timeout_ns,
 343                                  uint64_t flags,
 344                                  uint32_t *expired)
 345 {
 346         bool busy = true;
 347         int r;
 348
 349         if (NULL == fence)
 350                 return -EINVAL;
 351         if (NULL == expired)
 352                 return -EINVAL;
 353         if (NULL == fence->context)
 354                 return -EINVAL;
 355         if (fence->ip_type >= AMDGPU_HW_IP_NUM)
 356                 return -EINVAL;
 357         if (fence->ring >= AMDGPU_CS_MAX_RINGS)
 358                 return -EINVAL;
 359
 360         *expired = false;
 361
 362         r = amdgpu_ioctl_wait_cs(fence->context, fence->ip_type,
 363                                 fence->ip_instance, fence->ring,
 364                                 fence->fence, timeout_ns, flags, &busy);
 365
 366         if (!r && !busy)
 367                 *expired = true;
 368
 369         return r;
 370 }
 371