2 * Copyright 2022 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
24 #include <linux/delay.h>
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
30 #include "amdgpu_xcp.h"
31 #include "amdgpu_ucode.h"
32 #include "amdgpu_trace.h"
34 #include "sdma/sdma_4_4_2_offset.h"
35 #include "sdma/sdma_4_4_2_sh_mask.h"
37 #include "soc15_common.h"
39 #include "vega10_sdma_pkt_open.h"
41 #include "ivsrcid/sdma0/irqsrcs_sdma0_4_0.h"
42 #include "ivsrcid/sdma1/irqsrcs_sdma1_4_0.h"
44 #include "amdgpu_ras.h"
46 MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
48 #define WREG32_SDMA(instance, offset, value) \
49 WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
50 #define RREG32_SDMA(instance, offset) \
51 RREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)))
53 static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev);
54 static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev);
55 static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev);
56 static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev);
58 static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev,
59 u32 instance, u32 offset)
61 u32 dev_inst = GET_INST(SDMA0, instance);
63 return (adev->reg_offset[SDMA0_HWIP][dev_inst][0] + offset);
66 static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num)
70 return SOC15_IH_CLIENTID_SDMA0;
72 return SOC15_IH_CLIENTID_SDMA1;
74 return SOC15_IH_CLIENTID_SDMA2;
76 return SOC15_IH_CLIENTID_SDMA3;
82 static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id)
85 case SOC15_IH_CLIENTID_SDMA0:
87 case SOC15_IH_CLIENTID_SDMA1:
89 case SOC15_IH_CLIENTID_SDMA2:
91 case SOC15_IH_CLIENTID_SDMA3:
98 static void sdma_v4_4_2_inst_init_golden_registers(struct amdgpu_device *adev,
104 for (i = 0; i < adev->sdma.num_instances; i++) {
105 val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG);
106 val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG, NUM_BANKS, 4);
107 val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG,
108 PIPE_INTERLEAVE_SIZE, 0);
109 WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG, val);
111 val = RREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ);
112 val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ, NUM_BANKS,
114 val = REG_SET_FIELD(val, SDMA_GB_ADDR_CONFIG_READ,
115 PIPE_INTERLEAVE_SIZE, 0);
116 WREG32_SDMA(i, regSDMA_GB_ADDR_CONFIG_READ, val);
121 * sdma_v4_4_2_init_microcode - load ucode images from disk
123 * @adev: amdgpu_device pointer
125 * Use the firmware interface to load the ucode images into
126 * the driver (not loaded into hw).
127 * Returns 0 on success, error on failure.
129 static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev)
133 for (i = 0; i < adev->sdma.num_instances; i++) {
134 if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2)) {
135 ret = amdgpu_sdma_init_microcode(adev, 0, true);
138 ret = amdgpu_sdma_init_microcode(adev, i, false);
148 * sdma_v4_4_2_ring_get_rptr - get the current read pointer
150 * @ring: amdgpu ring pointer
152 * Get the current rptr from the hardware.
154 static uint64_t sdma_v4_4_2_ring_get_rptr(struct amdgpu_ring *ring)
158 /* XXX check if swapping is necessary on BE */
159 rptr = ((u64 *)&ring->adev->wb.wb[ring->rptr_offs]);
161 DRM_DEBUG("rptr before shift == 0x%016llx\n", *rptr);
162 return ((*rptr) >> 2);
166 * sdma_v4_4_2_ring_get_wptr - get the current write pointer
168 * @ring: amdgpu ring pointer
170 * Get the current wptr from the hardware.
172 static uint64_t sdma_v4_4_2_ring_get_wptr(struct amdgpu_ring *ring)
174 struct amdgpu_device *adev = ring->adev;
177 if (ring->use_doorbell) {
178 /* XXX check if swapping is necessary on BE */
179 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
180 DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr);
182 wptr = RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI);
184 wptr |= RREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR);
185 DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n",
193 * sdma_v4_4_2_ring_set_wptr - commit the write pointer
195 * @ring: amdgpu ring pointer
197 * Write the wptr back to the hardware.
199 static void sdma_v4_4_2_ring_set_wptr(struct amdgpu_ring *ring)
201 struct amdgpu_device *adev = ring->adev;
203 DRM_DEBUG("Setting write pointer\n");
204 if (ring->use_doorbell) {
205 u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
207 DRM_DEBUG("Using doorbell -- "
208 "wptr_offs == 0x%08x "
209 "lower_32_bits(ring->wptr) << 2 == 0x%08x "
210 "upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
212 lower_32_bits(ring->wptr << 2),
213 upper_32_bits(ring->wptr << 2));
214 /* XXX check if swapping is necessary on BE */
215 WRITE_ONCE(*wb, (ring->wptr << 2));
216 DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
217 ring->doorbell_index, ring->wptr << 2);
218 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
220 DRM_DEBUG("Not using doorbell -- "
221 "regSDMA%i_GFX_RB_WPTR == 0x%08x "
222 "regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
224 lower_32_bits(ring->wptr << 2),
226 upper_32_bits(ring->wptr << 2));
227 WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR,
228 lower_32_bits(ring->wptr << 2));
229 WREG32_SDMA(ring->me, regSDMA_GFX_RB_WPTR_HI,
230 upper_32_bits(ring->wptr << 2));
235 * sdma_v4_4_2_page_ring_get_wptr - get the current write pointer
237 * @ring: amdgpu ring pointer
239 * Get the current wptr from the hardware.
241 static uint64_t sdma_v4_4_2_page_ring_get_wptr(struct amdgpu_ring *ring)
243 struct amdgpu_device *adev = ring->adev;
246 if (ring->use_doorbell) {
247 /* XXX check if swapping is necessary on BE */
248 wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs]));
250 wptr = RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI);
252 wptr |= RREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR);
259 * sdma_v4_4_2_page_ring_set_wptr - commit the write pointer
261 * @ring: amdgpu ring pointer
263 * Write the wptr back to the hardware.
265 static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
267 struct amdgpu_device *adev = ring->adev;
269 if (ring->use_doorbell) {
270 u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs];
272 /* XXX check if swapping is necessary on BE */
273 WRITE_ONCE(*wb, (ring->wptr << 2));
274 WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
276 uint64_t wptr = ring->wptr << 2;
278 WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR,
279 lower_32_bits(wptr));
280 WREG32_SDMA(ring->me, regSDMA_PAGE_RB_WPTR_HI,
281 upper_32_bits(wptr));
285 static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
287 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
290 for (i = 0; i < count; i++)
291 if (sdma && sdma->burst_nop && (i == 0))
292 amdgpu_ring_write(ring, ring->funcs->nop |
293 SDMA_PKT_NOP_HEADER_COUNT(count - 1));
295 amdgpu_ring_write(ring, ring->funcs->nop);
299 * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
301 * @ring: amdgpu ring pointer
302 * @job: job to retrieve vmid from
303 * @ib: IB object to schedule
306 * Schedule an IB in the DMA ring.
308 static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
309 struct amdgpu_job *job,
310 struct amdgpu_ib *ib,
313 unsigned vmid = AMDGPU_JOB_GET_VMID(job);
315 /* IB packet must end on a 8 DW boundary */
316 sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
318 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
319 SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
320 /* base must be 32 byte aligned */
321 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
322 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
323 amdgpu_ring_write(ring, ib->length_dw);
324 amdgpu_ring_write(ring, 0);
325 amdgpu_ring_write(ring, 0);
329 static void sdma_v4_4_2_wait_reg_mem(struct amdgpu_ring *ring,
330 int mem_space, int hdp,
331 uint32_t addr0, uint32_t addr1,
332 uint32_t ref, uint32_t mask,
335 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
336 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(hdp) |
337 SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(mem_space) |
338 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
341 amdgpu_ring_write(ring, addr0);
342 amdgpu_ring_write(ring, addr1);
345 amdgpu_ring_write(ring, addr0 << 2);
346 amdgpu_ring_write(ring, addr1 << 2);
348 amdgpu_ring_write(ring, ref); /* reference */
349 amdgpu_ring_write(ring, mask); /* mask */
350 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
351 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(inv)); /* retry count, poll interval */
355 * sdma_v4_4_2_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
357 * @ring: amdgpu ring pointer
359 * Emit an hdp flush packet on the requested DMA ring.
361 static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
363 struct amdgpu_device *adev = ring->adev;
364 u32 ref_and_mask = 0;
365 const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
367 ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
369 sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
370 adev->nbio.funcs->get_hdp_flush_done_offset(adev),
371 adev->nbio.funcs->get_hdp_flush_req_offset(adev),
372 ref_and_mask, ref_and_mask, 10);
376 * sdma_v4_4_2_ring_emit_fence - emit a fence on the DMA ring
378 * @ring: amdgpu ring pointer
380 * @seq: sequence number
381 * @flags: fence related flags
383 * Add a DMA fence packet to the ring to write
384 * the fence seq number and DMA trap packet to generate
385 * an interrupt if needed.
387 static void sdma_v4_4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
390 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
391 /* write the fence */
392 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
393 /* zero in first two bits */
395 amdgpu_ring_write(ring, lower_32_bits(addr));
396 amdgpu_ring_write(ring, upper_32_bits(addr));
397 amdgpu_ring_write(ring, lower_32_bits(seq));
399 /* optionally write high bits as well */
402 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
403 /* zero in first two bits */
405 amdgpu_ring_write(ring, lower_32_bits(addr));
406 amdgpu_ring_write(ring, upper_32_bits(addr));
407 amdgpu_ring_write(ring, upper_32_bits(seq));
410 /* generate an interrupt */
411 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
412 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
417 * sdma_v4_4_2_gfx_stop - stop the gfx async dma engines
419 * @adev: amdgpu_device pointer
421 * Stop the gfx async dma ring buffers.
423 static void sdma_v4_4_2_inst_gfx_stop(struct amdgpu_device *adev,
426 struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
427 u32 rb_cntl, ib_cntl;
430 for_each_inst(i, inst_mask) {
431 sdma[i] = &adev->sdma.instance[i].ring;
433 if ((adev->mman.buffer_funcs_ring == sdma[i]) && unset != 1) {
434 amdgpu_ttm_set_buffer_funcs_status(adev, false);
438 rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
439 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 0);
440 WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
441 ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
442 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 0);
443 WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
448 * sdma_v4_4_2_rlc_stop - stop the compute async dma engines
450 * @adev: amdgpu_device pointer
452 * Stop the compute async dma queues.
454 static void sdma_v4_4_2_inst_rlc_stop(struct amdgpu_device *adev,
461 * sdma_v4_4_2_page_stop - stop the page async dma engines
463 * @adev: amdgpu_device pointer
465 * Stop the page async dma ring buffers.
467 static void sdma_v4_4_2_inst_page_stop(struct amdgpu_device *adev,
470 struct amdgpu_ring *sdma[AMDGPU_MAX_SDMA_INSTANCES];
471 u32 rb_cntl, ib_cntl;
475 for_each_inst(i, inst_mask) {
476 sdma[i] = &adev->sdma.instance[i].page;
478 if ((adev->mman.buffer_funcs_ring == sdma[i]) &&
480 amdgpu_ttm_set_buffer_funcs_status(adev, false);
484 rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
485 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
487 WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
488 ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
489 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL,
491 WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
496 * sdma_v4_4_2_ctx_switch_enable - stop the async dma engines context switch
498 * @adev: amdgpu_device pointer
499 * @enable: enable/disable the DMA MEs context switch.
501 * Halt or unhalt the async dma engines context switch.
503 static void sdma_v4_4_2_inst_ctx_switch_enable(struct amdgpu_device *adev,
504 bool enable, uint32_t inst_mask)
506 u32 f32_cntl, phase_quantum = 0;
509 if (amdgpu_sdma_phase_quantum) {
510 unsigned value = amdgpu_sdma_phase_quantum;
513 while (value > (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
514 SDMA_PHASE0_QUANTUM__VALUE__SHIFT)) {
515 value = (value + 1) >> 1;
518 if (unit > (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
519 SDMA_PHASE0_QUANTUM__UNIT__SHIFT)) {
520 value = (SDMA_PHASE0_QUANTUM__VALUE_MASK >>
521 SDMA_PHASE0_QUANTUM__VALUE__SHIFT);
522 unit = (SDMA_PHASE0_QUANTUM__UNIT_MASK >>
523 SDMA_PHASE0_QUANTUM__UNIT__SHIFT);
525 "clamping sdma_phase_quantum to %uK clock cycles\n",
529 value << SDMA_PHASE0_QUANTUM__VALUE__SHIFT |
530 unit << SDMA_PHASE0_QUANTUM__UNIT__SHIFT;
533 for_each_inst(i, inst_mask) {
534 f32_cntl = RREG32_SDMA(i, regSDMA_CNTL);
535 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_CNTL,
536 AUTO_CTXSW_ENABLE, enable ? 1 : 0);
537 if (enable && amdgpu_sdma_phase_quantum) {
538 WREG32_SDMA(i, regSDMA_PHASE0_QUANTUM, phase_quantum);
539 WREG32_SDMA(i, regSDMA_PHASE1_QUANTUM, phase_quantum);
540 WREG32_SDMA(i, regSDMA_PHASE2_QUANTUM, phase_quantum);
542 WREG32_SDMA(i, regSDMA_CNTL, f32_cntl);
544 /* Extend page fault timeout to avoid interrupt storm */
545 WREG32_SDMA(i, regSDMA_UTCL1_TIMEOUT, 0x00800080);
550 * sdma_v4_4_2_enable - stop the async dma engines
552 * @adev: amdgpu_device pointer
553 * @enable: enable/disable the DMA MEs.
554 * @inst_mask: mask of dma engine instances to be enabled
556 * Halt or unhalt the async dma engines.
558 static void sdma_v4_4_2_inst_enable(struct amdgpu_device *adev, bool enable,
565 sdma_v4_4_2_inst_gfx_stop(adev, inst_mask);
566 sdma_v4_4_2_inst_rlc_stop(adev, inst_mask);
567 if (adev->sdma.has_page_queue)
568 sdma_v4_4_2_inst_page_stop(adev, inst_mask);
571 for_each_inst(i, inst_mask) {
572 f32_cntl = RREG32_SDMA(i, regSDMA_F32_CNTL);
573 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA_F32_CNTL, HALT, enable ? 0 : 1);
574 WREG32_SDMA(i, regSDMA_F32_CNTL, f32_cntl);
579 * sdma_v4_4_2_rb_cntl - get parameters for rb_cntl
581 static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl)
583 /* Set ring buffer size in dwords */
584 uint32_t rb_bufsz = order_base_2(ring->ring_size / 4);
586 barrier(); /* work around https://bugs.llvm.org/show_bug.cgi?id=42576 */
587 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
589 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
590 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
591 RPTR_WRITEBACK_SWAP_ENABLE, 1);
597 * sdma_v4_4_2_gfx_resume - setup and start the async dma engines
599 * @adev: amdgpu_device pointer
600 * @i: instance to resume
602 * Set up the gfx DMA ring buffers and enable them.
603 * Returns 0 for success, error for failure.
605 static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i)
607 struct amdgpu_ring *ring = &adev->sdma.instance[i].ring;
608 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
614 wb_offset = (ring->rptr_offs * 4);
616 rb_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_CNTL);
617 rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
618 WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
620 /* Initialize the ring buffer's read and write pointers */
621 WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0);
622 WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0);
623 WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0);
624 WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0);
626 /* set the wb address whether it's enabled or not */
627 WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_HI,
628 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
629 WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_ADDR_LO,
630 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
632 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL,
633 RPTR_WRITEBACK_ENABLE, 1);
635 WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8);
636 WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40);
640 /* before programing wptr to a less value, need set minor_ptr_update first */
641 WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1);
643 doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL);
644 doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET);
646 doorbell = REG_SET_FIELD(doorbell, SDMA_GFX_DOORBELL, ENABLE,
648 doorbell_offset = REG_SET_FIELD(doorbell_offset,
649 SDMA_GFX_DOORBELL_OFFSET,
650 OFFSET, ring->doorbell_index);
651 WREG32_SDMA(i, regSDMA_GFX_DOORBELL, doorbell);
652 WREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET, doorbell_offset);
654 sdma_v4_4_2_ring_set_wptr(ring);
656 /* set minor_ptr_update to 0 after wptr programed */
657 WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 0);
659 /* setup the wptr shadow polling */
660 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
661 WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_LO,
662 lower_32_bits(wptr_gpu_addr));
663 WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_ADDR_HI,
664 upper_32_bits(wptr_gpu_addr));
665 wptr_poll_cntl = RREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL);
666 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
667 SDMA_GFX_RB_WPTR_POLL_CNTL,
668 F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
669 WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
672 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_GFX_RB_CNTL, RB_ENABLE, 1);
673 WREG32_SDMA(i, regSDMA_GFX_RB_CNTL, rb_cntl);
675 ib_cntl = RREG32_SDMA(i, regSDMA_GFX_IB_CNTL);
676 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_ENABLE, 1);
678 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
681 WREG32_SDMA(i, regSDMA_GFX_IB_CNTL, ib_cntl);
683 ring->sched.ready = true;
687 * sdma_v4_4_2_page_resume - setup and start the async dma engines
689 * @adev: amdgpu_device pointer
690 * @i: instance to resume
692 * Set up the page DMA ring buffers and enable them.
693 * Returns 0 for success, error for failure.
695 static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i)
697 struct amdgpu_ring *ring = &adev->sdma.instance[i].page;
698 u32 rb_cntl, ib_cntl, wptr_poll_cntl;
704 wb_offset = (ring->rptr_offs * 4);
706 rb_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_CNTL);
707 rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl);
708 WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
710 /* Initialize the ring buffer's read and write pointers */
711 WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0);
712 WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0);
713 WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0);
714 WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0);
716 /* set the wb address whether it's enabled or not */
717 WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI,
718 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
719 WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_LO,
720 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
722 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL,
723 RPTR_WRITEBACK_ENABLE, 1);
725 WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8);
726 WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40);
730 /* before programing wptr to a less value, need set minor_ptr_update first */
731 WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1);
733 doorbell = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL);
734 doorbell_offset = RREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET);
736 doorbell = REG_SET_FIELD(doorbell, SDMA_PAGE_DOORBELL, ENABLE,
738 doorbell_offset = REG_SET_FIELD(doorbell_offset,
739 SDMA_PAGE_DOORBELL_OFFSET,
740 OFFSET, ring->doorbell_index);
741 WREG32_SDMA(i, regSDMA_PAGE_DOORBELL, doorbell);
742 WREG32_SDMA(i, regSDMA_PAGE_DOORBELL_OFFSET, doorbell_offset);
744 /* paging queue doorbell range is setup at sdma_v4_4_2_gfx_resume */
745 sdma_v4_4_2_page_ring_set_wptr(ring);
747 /* set minor_ptr_update to 0 after wptr programed */
748 WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 0);
750 /* setup the wptr shadow polling */
751 wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
752 WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_LO,
753 lower_32_bits(wptr_gpu_addr));
754 WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_ADDR_HI,
755 upper_32_bits(wptr_gpu_addr));
756 wptr_poll_cntl = RREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL);
757 wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl,
758 SDMA_PAGE_RB_WPTR_POLL_CNTL,
759 F32_POLL_ENABLE, amdgpu_sriov_vf(adev)? 1 : 0);
760 WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl);
763 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA_PAGE_RB_CNTL, RB_ENABLE, 1);
764 WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl);
766 ib_cntl = RREG32_SDMA(i, regSDMA_PAGE_IB_CNTL);
767 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_ENABLE, 1);
769 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1);
772 WREG32_SDMA(i, regSDMA_PAGE_IB_CNTL, ib_cntl);
774 ring->sched.ready = true;
777 static void sdma_v4_4_2_init_pg(struct amdgpu_device *adev)
783 * sdma_v4_4_2_rlc_resume - setup and start the async dma engines
785 * @adev: amdgpu_device pointer
787 * Set up the compute DMA queues and enable them.
788 * Returns 0 for success, error for failure.
790 static int sdma_v4_4_2_inst_rlc_resume(struct amdgpu_device *adev,
793 sdma_v4_4_2_init_pg(adev);
799 * sdma_v4_4_2_load_microcode - load the sDMA ME ucode
801 * @adev: amdgpu_device pointer
803 * Loads the sDMA0/1 ucode.
804 * Returns 0 for success, -EINVAL if the ucode is not available.
806 static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev,
809 const struct sdma_firmware_header_v1_0 *hdr;
810 const __le32 *fw_data;
815 sdma_v4_4_2_inst_enable(adev, false, inst_mask);
817 for_each_inst(i, inst_mask) {
818 if (!adev->sdma.instance[i].fw)
821 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data;
822 amdgpu_ucode_print_sdma_hdr(&hdr->header);
823 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
825 fw_data = (const __le32 *)
826 (adev->sdma.instance[i].fw->data +
827 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
829 WREG32_SDMA(i, regSDMA_UCODE_ADDR, 0);
831 for (j = 0; j < fw_size; j++)
832 WREG32_SDMA(i, regSDMA_UCODE_DATA,
833 le32_to_cpup(fw_data++));
835 WREG32_SDMA(i, regSDMA_UCODE_ADDR,
836 adev->sdma.instance[i].fw_version);
843 * sdma_v4_4_2_inst_start - setup and start the async dma engines
845 * @adev: amdgpu_device pointer
847 * Set up the DMA engines and enable them.
848 * Returns 0 for success, error for failure.
850 static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev,
853 struct amdgpu_ring *ring;
857 if (amdgpu_sriov_vf(adev)) {
858 sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
859 sdma_v4_4_2_inst_enable(adev, false, inst_mask);
861 /* bypass sdma microcode loading on Gopher */
862 if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP &&
863 adev->sdma.instance[0].fw) {
864 r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask);
870 sdma_v4_4_2_inst_enable(adev, true, inst_mask);
871 /* enable sdma ring preemption */
872 sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
875 /* start the gfx rings and rlc compute queues */
876 tmp_mask = inst_mask;
877 for_each_inst(i, tmp_mask) {
880 WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0);
881 sdma_v4_4_2_gfx_resume(adev, i);
882 if (adev->sdma.has_page_queue)
883 sdma_v4_4_2_page_resume(adev, i);
885 /* set utc l1 enable flag always to 1 */
886 temp = RREG32_SDMA(i, regSDMA_CNTL);
887 temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1);
888 WREG32_SDMA(i, regSDMA_CNTL, temp);
890 if (!amdgpu_sriov_vf(adev)) {
891 ring = &adev->sdma.instance[i].ring;
892 adev->nbio.funcs->sdma_doorbell_range(adev, i,
893 ring->use_doorbell, ring->doorbell_index,
894 adev->doorbell_index.sdma_doorbell_range);
897 temp = RREG32_SDMA(i, regSDMA_F32_CNTL);
898 temp = REG_SET_FIELD(temp, SDMA_F32_CNTL, HALT, 0);
899 WREG32_SDMA(i, regSDMA_F32_CNTL, temp);
903 if (amdgpu_sriov_vf(adev)) {
904 sdma_v4_4_2_inst_ctx_switch_enable(adev, true, inst_mask);
905 sdma_v4_4_2_inst_enable(adev, true, inst_mask);
907 r = sdma_v4_4_2_inst_rlc_resume(adev, inst_mask);
912 tmp_mask = inst_mask;
913 for_each_inst(i, tmp_mask) {
914 ring = &adev->sdma.instance[i].ring;
916 r = amdgpu_ring_test_helper(ring);
920 if (adev->sdma.has_page_queue) {
921 struct amdgpu_ring *page = &adev->sdma.instance[i].page;
923 r = amdgpu_ring_test_helper(page);
927 if (adev->mman.buffer_funcs_ring == page)
928 amdgpu_ttm_set_buffer_funcs_status(adev, true);
931 if (adev->mman.buffer_funcs_ring == ring)
932 amdgpu_ttm_set_buffer_funcs_status(adev, true);
939 * sdma_v4_4_2_ring_test_ring - simple async dma engine test
941 * @ring: amdgpu_ring structure holding ring information
943 * Test the DMA engine by writing using it to write an
945 * Returns 0 for success, error for failure.
947 static int sdma_v4_4_2_ring_test_ring(struct amdgpu_ring *ring)
949 struct amdgpu_device *adev = ring->adev;
956 r = amdgpu_device_wb_get(adev, &index);
960 gpu_addr = adev->wb.gpu_addr + (index * 4);
962 adev->wb.wb[index] = cpu_to_le32(tmp);
964 r = amdgpu_ring_alloc(ring, 5);
968 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
969 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
970 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
971 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
972 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0));
973 amdgpu_ring_write(ring, 0xDEADBEEF);
974 amdgpu_ring_commit(ring);
976 for (i = 0; i < adev->usec_timeout; i++) {
977 tmp = le32_to_cpu(adev->wb.wb[index]);
978 if (tmp == 0xDEADBEEF)
983 if (i >= adev->usec_timeout)
987 amdgpu_device_wb_free(adev, index);
992 * sdma_v4_4_2_ring_test_ib - test an IB on the DMA engine
994 * @ring: amdgpu_ring structure holding ring information
995 * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
997 * Test a simple IB in the DMA ring.
998 * Returns 0 on success, error on failure.
1000 static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout)
1002 struct amdgpu_device *adev = ring->adev;
1003 struct amdgpu_ib ib;
1004 struct dma_fence *f = NULL;
1010 r = amdgpu_device_wb_get(adev, &index);
1014 gpu_addr = adev->wb.gpu_addr + (index * 4);
1016 adev->wb.wb[index] = cpu_to_le32(tmp);
1017 memset(&ib, 0, sizeof(ib));
1018 r = amdgpu_ib_get(adev, NULL, 256,
1019 AMDGPU_IB_POOL_DIRECT, &ib);
1023 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1024 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1025 ib.ptr[1] = lower_32_bits(gpu_addr);
1026 ib.ptr[2] = upper_32_bits(gpu_addr);
1027 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(0);
1028 ib.ptr[4] = 0xDEADBEEF;
1029 ib.ptr[5] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1030 ib.ptr[6] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1031 ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
1034 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1038 r = dma_fence_wait_timeout(f, false, timeout);
1045 tmp = le32_to_cpu(adev->wb.wb[index]);
1046 if (tmp == 0xDEADBEEF)
1052 amdgpu_ib_free(adev, &ib, NULL);
1055 amdgpu_device_wb_free(adev, index);
1061 * sdma_v4_4_2_vm_copy_pte - update PTEs by copying them from the GART
1063 * @ib: indirect buffer to fill with commands
1064 * @pe: addr of the page entry
1065 * @src: src addr to copy from
1066 * @count: number of page entries to update
1068 * Update PTEs by copying them from the GART using sDMA.
1070 static void sdma_v4_4_2_vm_copy_pte(struct amdgpu_ib *ib,
1071 uint64_t pe, uint64_t src,
1074 unsigned bytes = count * 8;
1076 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1077 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
1078 ib->ptr[ib->length_dw++] = bytes - 1;
1079 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1080 ib->ptr[ib->length_dw++] = lower_32_bits(src);
1081 ib->ptr[ib->length_dw++] = upper_32_bits(src);
1082 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1083 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1088 * sdma_v4_4_2_vm_write_pte - update PTEs by writing them manually
1090 * @ib: indirect buffer to fill with commands
1091 * @pe: addr of the page entry
1092 * @value: dst addr to write into pe
1093 * @count: number of page entries to update
1094 * @incr: increase next addr by incr bytes
1096 * Update PTEs by writing them manually using sDMA.
1098 static void sdma_v4_4_2_vm_write_pte(struct amdgpu_ib *ib, uint64_t pe,
1099 uint64_t value, unsigned count,
1102 unsigned ndw = count * 2;
1104 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
1105 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
1106 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
1107 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1108 ib->ptr[ib->length_dw++] = ndw - 1;
1109 for (; ndw > 0; ndw -= 2) {
1110 ib->ptr[ib->length_dw++] = lower_32_bits(value);
1111 ib->ptr[ib->length_dw++] = upper_32_bits(value);
1117 * sdma_v4_4_2_vm_set_pte_pde - update the page tables using sDMA
1119 * @ib: indirect buffer to fill with commands
1120 * @pe: addr of the page entry
1121 * @addr: dst addr to write into pe
1122 * @count: number of page entries to update
1123 * @incr: increase next addr by incr bytes
1124 * @flags: access flags
1126 * Update the page tables using sDMA.
1128 static void sdma_v4_4_2_vm_set_pte_pde(struct amdgpu_ib *ib,
1130 uint64_t addr, unsigned count,
1131 uint32_t incr, uint64_t flags)
1133 /* for physically contiguous pages (vram) */
1134 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_PTEPDE);
1135 ib->ptr[ib->length_dw++] = lower_32_bits(pe); /* dst addr */
1136 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
1137 ib->ptr[ib->length_dw++] = lower_32_bits(flags); /* mask */
1138 ib->ptr[ib->length_dw++] = upper_32_bits(flags);
1139 ib->ptr[ib->length_dw++] = lower_32_bits(addr); /* value */
1140 ib->ptr[ib->length_dw++] = upper_32_bits(addr);
1141 ib->ptr[ib->length_dw++] = incr; /* increment size */
1142 ib->ptr[ib->length_dw++] = 0;
1143 ib->ptr[ib->length_dw++] = count - 1; /* number of entries */
1147 * sdma_v4_4_2_ring_pad_ib - pad the IB to the required number of dw
1149 * @ring: amdgpu_ring structure holding ring information
1150 * @ib: indirect buffer to fill with padding
1152 static void sdma_v4_4_2_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
1154 struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
1158 pad_count = (-ib->length_dw) & 7;
1159 for (i = 0; i < pad_count; i++)
1160 if (sdma && sdma->burst_nop && (i == 0))
1161 ib->ptr[ib->length_dw++] =
1162 SDMA_PKT_HEADER_OP(SDMA_OP_NOP) |
1163 SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1);
1165 ib->ptr[ib->length_dw++] =
1166 SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
1171 * sdma_v4_4_2_ring_emit_pipeline_sync - sync the pipeline
1173 * @ring: amdgpu_ring pointer
1175 * Make sure all previous operations are completed (CIK).
1177 static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
1179 uint32_t seq = ring->fence_drv.sync_seq;
1180 uint64_t addr = ring->fence_drv.gpu_addr;
1183 sdma_v4_4_2_wait_reg_mem(ring, 1, 0,
1185 upper_32_bits(addr) & 0xffffffff,
1186 seq, 0xffffffff, 4);
1191 * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA
1193 * @ring: amdgpu_ring pointer
1194 * @vmid: vmid number to use
1197 * Update the page table base and flush the VM TLB
1200 static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring,
1201 unsigned vmid, uint64_t pd_addr)
1203 amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
1206 static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring,
1207 uint32_t reg, uint32_t val)
1209 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
1210 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
1211 amdgpu_ring_write(ring, reg);
1212 amdgpu_ring_write(ring, val);
1215 static void sdma_v4_4_2_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
1216 uint32_t val, uint32_t mask)
1218 sdma_v4_4_2_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10);
1221 static bool sdma_v4_4_2_fw_support_paging_queue(struct amdgpu_device *adev)
1223 switch (adev->ip_versions[SDMA0_HWIP][0]) {
1224 case IP_VERSION(4, 4, 2):
1231 static int sdma_v4_4_2_early_init(void *handle)
1233 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1236 r = sdma_v4_4_2_init_microcode(adev);
1238 DRM_ERROR("Failed to load sdma firmware!\n");
1242 /* TODO: Page queue breaks driver reload under SRIOV */
1243 if (sdma_v4_4_2_fw_support_paging_queue(adev))
1244 adev->sdma.has_page_queue = true;
1246 sdma_v4_4_2_set_ring_funcs(adev);
1247 sdma_v4_4_2_set_buffer_funcs(adev);
1248 sdma_v4_4_2_set_vm_pte_funcs(adev);
1249 sdma_v4_4_2_set_irq_funcs(adev);
1255 static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
1257 struct amdgpu_iv_entry *entry);
1260 static int sdma_v4_4_2_late_init(void *handle)
1262 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1264 struct ras_ih_if ih_info = {
1265 .cb = sdma_v4_4_2_process_ras_data_cb,
1268 if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
1269 if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
1270 adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
1271 adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
1277 static int sdma_v4_4_2_sw_init(void *handle)
1279 struct amdgpu_ring *ring;
1281 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1284 /* SDMA trap event */
1285 for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
1286 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1287 SDMA0_4_0__SRCID__SDMA_TRAP,
1288 &adev->sdma.trap_irq);
1293 /* SDMA SRAM ECC event */
1294 for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
1295 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1296 SDMA0_4_0__SRCID__SDMA_SRAM_ECC,
1297 &adev->sdma.ecc_irq);
1302 /* SDMA VM_HOLE/DOORBELL_INV/POLL_TIMEOUT/SRBM_WRITE_PROTECTION event*/
1303 for (i = 0; i < adev->sdma.num_inst_per_aid; i++) {
1304 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1305 SDMA0_4_0__SRCID__SDMA_VM_HOLE,
1306 &adev->sdma.vm_hole_irq);
1310 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1311 SDMA0_4_0__SRCID__SDMA_DOORBELL_INVALID,
1312 &adev->sdma.doorbell_invalid_irq);
1316 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1317 SDMA0_4_0__SRCID__SDMA_POLL_TIMEOUT,
1318 &adev->sdma.pool_timeout_irq);
1322 r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i),
1323 SDMA0_4_0__SRCID__SDMA_SRBMWRITE,
1324 &adev->sdma.srbm_write_irq);
1329 for (i = 0; i < adev->sdma.num_instances; i++) {
1330 ring = &adev->sdma.instance[i].ring;
1331 ring->ring_obj = NULL;
1332 ring->use_doorbell = true;
1333 aid_id = adev->sdma.instance[i].aid_id;
1335 DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
1336 ring->use_doorbell?"true":"false");
1338 /* doorbell size is 2 dwords, get DWORD offset */
1339 ring->doorbell_index = adev->doorbell_index.sdma_engine[i] << 1;
1340 ring->vm_hub = AMDGPU_MMHUB0(aid_id);
1342 sprintf(ring->name, "sdma%d.%d", aid_id,
1343 i % adev->sdma.num_inst_per_aid);
1344 r = amdgpu_ring_init(adev, ring, 1024, &adev->sdma.trap_irq,
1345 AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1346 AMDGPU_RING_PRIO_DEFAULT, NULL);
1350 if (adev->sdma.has_page_queue) {
1351 ring = &adev->sdma.instance[i].page;
1352 ring->ring_obj = NULL;
1353 ring->use_doorbell = true;
1355 /* doorbell index of page queue is assigned right after
1356 * gfx queue on the same instance
1358 ring->doorbell_index =
1359 (adev->doorbell_index.sdma_engine[i] + 1) << 1;
1360 ring->vm_hub = AMDGPU_MMHUB0(aid_id);
1362 sprintf(ring->name, "page%d.%d", aid_id,
1363 i % adev->sdma.num_inst_per_aid);
1364 r = amdgpu_ring_init(adev, ring, 1024,
1365 &adev->sdma.trap_irq,
1366 AMDGPU_SDMA_IRQ_INSTANCE0 + i,
1367 AMDGPU_RING_PRIO_DEFAULT, NULL);
1376 static int sdma_v4_4_2_sw_fini(void *handle)
1378 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1381 for (i = 0; i < adev->sdma.num_instances; i++) {
1382 amdgpu_ring_fini(&adev->sdma.instance[i].ring);
1383 if (adev->sdma.has_page_queue)
1384 amdgpu_ring_fini(&adev->sdma.instance[i].page);
1387 if (adev->ip_versions[SDMA0_HWIP][0] == IP_VERSION(4, 4, 2))
1388 amdgpu_sdma_destroy_inst_ctx(adev, true);
1390 amdgpu_sdma_destroy_inst_ctx(adev, false);
1395 static int sdma_v4_4_2_hw_init(void *handle)
1398 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1401 inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1402 if (!amdgpu_sriov_vf(adev))
1403 sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
1405 r = sdma_v4_4_2_inst_start(adev, inst_mask);
1410 static int sdma_v4_4_2_hw_fini(void *handle)
1412 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1416 if (amdgpu_sriov_vf(adev))
1419 inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1420 for (i = 0; i < adev->sdma.num_instances; i++) {
1421 amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
1422 AMDGPU_SDMA_IRQ_INSTANCE0 + i);
1425 sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
1426 sdma_v4_4_2_inst_enable(adev, false, inst_mask);
1431 static int sdma_v4_4_2_set_clockgating_state(void *handle,
1432 enum amd_clockgating_state state);
1434 static int sdma_v4_4_2_suspend(void *handle)
1436 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1438 return sdma_v4_4_2_hw_fini(adev);
1441 static int sdma_v4_4_2_resume(void *handle)
1443 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1445 return sdma_v4_4_2_hw_init(adev);
1448 static bool sdma_v4_4_2_is_idle(void *handle)
1450 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1453 for (i = 0; i < adev->sdma.num_instances; i++) {
1454 u32 tmp = RREG32_SDMA(i, regSDMA_STATUS_REG);
1456 if (!(tmp & SDMA_STATUS_REG__IDLE_MASK))
1463 static int sdma_v4_4_2_wait_for_idle(void *handle)
1466 u32 sdma[AMDGPU_MAX_SDMA_INSTANCES];
1467 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1469 for (i = 0; i < adev->usec_timeout; i++) {
1470 for (j = 0; j < adev->sdma.num_instances; j++) {
1471 sdma[j] = RREG32_SDMA(j, regSDMA_STATUS_REG);
1472 if (!(sdma[j] & SDMA_STATUS_REG__IDLE_MASK))
1475 if (j == adev->sdma.num_instances)
1482 static int sdma_v4_4_2_soft_reset(void *handle)
1489 static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev,
1490 struct amdgpu_irq_src *source,
1492 enum amdgpu_interrupt_state state)
1496 sdma_cntl = RREG32_SDMA(type, regSDMA_CNTL);
1497 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA_CNTL, TRAP_ENABLE,
1498 state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
1499 WREG32_SDMA(type, regSDMA_CNTL, sdma_cntl);
1504 static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev,
1505 struct amdgpu_irq_src *source,
1506 struct amdgpu_iv_entry *entry)
1508 uint32_t instance, i;
1510 DRM_DEBUG("IH: SDMA trap\n");
1511 instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
1513 /* Client id gives the SDMA instance in AID. To know the exact SDMA
1514 * instance, interrupt entry gives the node id which corresponds to the AID instance.
1515 * Match node id with the AID id associated with the SDMA instance. */
1516 for (i = instance; i < adev->sdma.num_instances;
1517 i += adev->sdma.num_inst_per_aid) {
1518 if (adev->sdma.instance[i].aid_id ==
1519 node_id_to_phys_map[entry->node_id])
1523 if (i >= adev->sdma.num_instances) {
1526 "Couldn't find the right sdma instance in trap handler");
1530 switch (entry->ring_id) {
1532 amdgpu_fence_process(&adev->sdma.instance[i].ring);
1541 static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev,
1543 struct amdgpu_iv_entry *entry)
1547 /* When “Full RAS” is enabled, the per-IP interrupt sources should
1548 * be disabled and the driver should only look for the aggregated
1549 * interrupt via sync flood
1551 if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
1554 instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
1558 amdgpu_sdma_process_ras_data_cb(adev, err_data, entry);
1561 return AMDGPU_RAS_SUCCESS;
1565 static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev,
1566 struct amdgpu_irq_src *source,
1567 struct amdgpu_iv_entry *entry)
1571 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1573 instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
1577 switch (entry->ring_id) {
1579 drm_sched_fault(&adev->sdma.instance[instance].ring.sched);
1585 static int sdma_v4_4_2_set_ecc_irq_state(struct amdgpu_device *adev,
1586 struct amdgpu_irq_src *source,
1588 enum amdgpu_interrupt_state state)
1590 u32 sdma_edc_config;
1592 sdma_edc_config = RREG32_SDMA(type, regCC_SDMA_EDC_CONFIG);
1594 * FIXME: This was inherited from Aldebaran, but no this field
1595 * definition in the regspec of both Aldebaran and SDMA 4.4.2
1597 sdma_edc_config |= (state == AMDGPU_IRQ_STATE_ENABLE) ? (1 << 2) : 0;
1598 WREG32_SDMA(type, regCC_SDMA_EDC_CONFIG, sdma_edc_config);
1603 static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev,
1604 struct amdgpu_iv_entry *entry)
1607 struct amdgpu_task_info task_info;
1610 instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id);
1611 if (instance < 0 || instance >= adev->sdma.num_instances) {
1612 dev_err(adev->dev, "sdma instance invalid %d\n", instance);
1616 addr = (u64)entry->src_data[0] << 12;
1617 addr |= ((u64)entry->src_data[1] & 0xf) << 44;
1619 memset(&task_info, 0, sizeof(struct amdgpu_task_info));
1620 amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
1622 dev_dbg_ratelimited(adev->dev,
1623 "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
1624 "pasid:%u, for process %s pid %d thread %s pid %d\n",
1625 instance, addr, entry->src_id, entry->ring_id, entry->vmid,
1626 entry->pasid, task_info.process_name, task_info.tgid,
1627 task_info.task_name, task_info.pid);
1631 static int sdma_v4_4_2_process_vm_hole_irq(struct amdgpu_device *adev,
1632 struct amdgpu_irq_src *source,
1633 struct amdgpu_iv_entry *entry)
1635 dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
1636 sdma_v4_4_2_print_iv_entry(adev, entry);
1640 static int sdma_v4_4_2_process_doorbell_invalid_irq(struct amdgpu_device *adev,
1641 struct amdgpu_irq_src *source,
1642 struct amdgpu_iv_entry *entry)
1645 dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
1646 sdma_v4_4_2_print_iv_entry(adev, entry);
1650 static int sdma_v4_4_2_process_pool_timeout_irq(struct amdgpu_device *adev,
1651 struct amdgpu_irq_src *source,
1652 struct amdgpu_iv_entry *entry)
1654 dev_dbg_ratelimited(adev->dev,
1655 "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
1656 sdma_v4_4_2_print_iv_entry(adev, entry);
1660 static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev,
1661 struct amdgpu_irq_src *source,
1662 struct amdgpu_iv_entry *entry)
1664 dev_dbg_ratelimited(adev->dev,
1665 "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
1666 sdma_v4_4_2_print_iv_entry(adev, entry);
1670 static void sdma_v4_4_2_inst_update_medium_grain_light_sleep(
1671 struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
1676 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
1677 for_each_inst(i, inst_mask) {
1678 /* 1-not override: enable sdma mem light sleep */
1679 def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
1680 data |= SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1682 WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
1685 for_each_inst(i, inst_mask) {
1686 /* 0-override:disable sdma mem light sleep */
1687 def = data = RREG32_SDMA(i, regSDMA_POWER_CNTL);
1688 data &= ~SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK;
1690 WREG32_SDMA(i, regSDMA_POWER_CNTL, data);
1695 static void sdma_v4_4_2_inst_update_medium_grain_clock_gating(
1696 struct amdgpu_device *adev, bool enable, uint32_t inst_mask)
1701 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
1702 for_each_inst(i, inst_mask) {
1703 def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
1704 data &= ~(SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1705 SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1706 SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1707 SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1708 SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1709 SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1710 SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1711 SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1713 WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
1716 for_each_inst(i, inst_mask) {
1717 def = data = RREG32_SDMA(i, regSDMA_CLK_CTRL);
1718 data |= (SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK |
1719 SDMA_CLK_CTRL__SOFT_OVERRIDE6_MASK |
1720 SDMA_CLK_CTRL__SOFT_OVERRIDE5_MASK |
1721 SDMA_CLK_CTRL__SOFT_OVERRIDE4_MASK |
1722 SDMA_CLK_CTRL__SOFT_OVERRIDE3_MASK |
1723 SDMA_CLK_CTRL__SOFT_OVERRIDE2_MASK |
1724 SDMA_CLK_CTRL__SOFT_OVERRIDE1_MASK |
1725 SDMA_CLK_CTRL__SOFT_OVERRIDE0_MASK);
1727 WREG32_SDMA(i, regSDMA_CLK_CTRL, data);
1732 static int sdma_v4_4_2_set_clockgating_state(void *handle,
1733 enum amd_clockgating_state state)
1735 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1738 if (amdgpu_sriov_vf(adev))
1741 inst_mask = GENMASK(adev->sdma.num_instances - 1, 0);
1743 sdma_v4_4_2_inst_update_medium_grain_clock_gating(
1744 adev, state == AMD_CG_STATE_GATE, inst_mask);
1745 sdma_v4_4_2_inst_update_medium_grain_light_sleep(
1746 adev, state == AMD_CG_STATE_GATE, inst_mask);
1750 static int sdma_v4_4_2_set_powergating_state(void *handle,
1751 enum amd_powergating_state state)
1756 static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags)
1758 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1761 if (amdgpu_sriov_vf(adev))
1764 /* AMD_CG_SUPPORT_SDMA_MGCG */
1765 data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_CLK_CTRL));
1766 if (!(data & SDMA_CLK_CTRL__SOFT_OVERRIDE7_MASK))
1767 *flags |= AMD_CG_SUPPORT_SDMA_MGCG;
1769 /* AMD_CG_SUPPORT_SDMA_LS */
1770 data = RREG32(SOC15_REG_OFFSET(SDMA0, GET_INST(SDMA0, 0), regSDMA_POWER_CNTL));
1771 if (data & SDMA_POWER_CNTL__MEM_POWER_OVERRIDE_MASK)
1772 *flags |= AMD_CG_SUPPORT_SDMA_LS;
1775 const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = {
1776 .name = "sdma_v4_4_2",
1777 .early_init = sdma_v4_4_2_early_init,
1778 .late_init = sdma_v4_4_2_late_init,
1779 .sw_init = sdma_v4_4_2_sw_init,
1780 .sw_fini = sdma_v4_4_2_sw_fini,
1781 .hw_init = sdma_v4_4_2_hw_init,
1782 .hw_fini = sdma_v4_4_2_hw_fini,
1783 .suspend = sdma_v4_4_2_suspend,
1784 .resume = sdma_v4_4_2_resume,
1785 .is_idle = sdma_v4_4_2_is_idle,
1786 .wait_for_idle = sdma_v4_4_2_wait_for_idle,
1787 .soft_reset = sdma_v4_4_2_soft_reset,
1788 .set_clockgating_state = sdma_v4_4_2_set_clockgating_state,
1789 .set_powergating_state = sdma_v4_4_2_set_powergating_state,
1790 .get_clockgating_state = sdma_v4_4_2_get_clockgating_state,
1793 static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
1794 .type = AMDGPU_RING_TYPE_SDMA,
1796 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1797 .support_64bit_ptrs = true,
1798 .get_rptr = sdma_v4_4_2_ring_get_rptr,
1799 .get_wptr = sdma_v4_4_2_ring_get_wptr,
1800 .set_wptr = sdma_v4_4_2_ring_set_wptr,
1802 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
1803 3 + /* hdp invalidate */
1804 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
1805 /* sdma_v4_4_2_ring_emit_vm_flush */
1806 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1807 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1808 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
1809 .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
1810 .emit_ib = sdma_v4_4_2_ring_emit_ib,
1811 .emit_fence = sdma_v4_4_2_ring_emit_fence,
1812 .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
1813 .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
1814 .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
1815 .test_ring = sdma_v4_4_2_ring_test_ring,
1816 .test_ib = sdma_v4_4_2_ring_test_ib,
1817 .insert_nop = sdma_v4_4_2_ring_insert_nop,
1818 .pad_ib = sdma_v4_4_2_ring_pad_ib,
1819 .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
1820 .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
1821 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1824 static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
1825 .type = AMDGPU_RING_TYPE_SDMA,
1827 .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
1828 .support_64bit_ptrs = true,
1829 .get_rptr = sdma_v4_4_2_ring_get_rptr,
1830 .get_wptr = sdma_v4_4_2_page_ring_get_wptr,
1831 .set_wptr = sdma_v4_4_2_page_ring_set_wptr,
1833 6 + /* sdma_v4_4_2_ring_emit_hdp_flush */
1834 3 + /* hdp invalidate */
1835 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */
1836 /* sdma_v4_4_2_ring_emit_vm_flush */
1837 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1838 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +
1839 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */
1840 .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */
1841 .emit_ib = sdma_v4_4_2_ring_emit_ib,
1842 .emit_fence = sdma_v4_4_2_ring_emit_fence,
1843 .emit_pipeline_sync = sdma_v4_4_2_ring_emit_pipeline_sync,
1844 .emit_vm_flush = sdma_v4_4_2_ring_emit_vm_flush,
1845 .emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
1846 .test_ring = sdma_v4_4_2_ring_test_ring,
1847 .test_ib = sdma_v4_4_2_ring_test_ib,
1848 .insert_nop = sdma_v4_4_2_ring_insert_nop,
1849 .pad_ib = sdma_v4_4_2_ring_pad_ib,
1850 .emit_wreg = sdma_v4_4_2_ring_emit_wreg,
1851 .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
1852 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1855 static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev)
1859 for (i = 0; i < adev->sdma.num_instances; i++) {
1860 adev->sdma.instance[i].ring.funcs = &sdma_v4_4_2_ring_funcs;
1861 adev->sdma.instance[i].ring.me = i;
1862 if (adev->sdma.has_page_queue) {
1863 adev->sdma.instance[i].page.funcs =
1864 &sdma_v4_4_2_page_ring_funcs;
1865 adev->sdma.instance[i].page.me = i;
1868 dev_inst = GET_INST(SDMA0, i);
1869 /* AID to which SDMA belongs depends on physical instance */
1870 adev->sdma.instance[i].aid_id =
1871 dev_inst / adev->sdma.num_inst_per_aid;
1875 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_trap_irq_funcs = {
1876 .set = sdma_v4_4_2_set_trap_irq_state,
1877 .process = sdma_v4_4_2_process_trap_irq,
1880 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_illegal_inst_irq_funcs = {
1881 .process = sdma_v4_4_2_process_illegal_inst_irq,
1884 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ecc_irq_funcs = {
1885 .set = sdma_v4_4_2_set_ecc_irq_state,
1886 .process = amdgpu_sdma_process_ecc_irq,
1889 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_vm_hole_irq_funcs = {
1890 .process = sdma_v4_4_2_process_vm_hole_irq,
1893 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_doorbell_invalid_irq_funcs = {
1894 .process = sdma_v4_4_2_process_doorbell_invalid_irq,
1897 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_pool_timeout_irq_funcs = {
1898 .process = sdma_v4_4_2_process_pool_timeout_irq,
1901 static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = {
1902 .process = sdma_v4_4_2_process_srbm_write_irq,
1905 static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev)
1907 adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
1908 adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
1909 adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
1910 adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
1911 adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
1912 adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
1914 adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs;
1915 adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs;
1916 adev->sdma.ecc_irq.funcs = &sdma_v4_4_2_ecc_irq_funcs;
1917 adev->sdma.vm_hole_irq.funcs = &sdma_v4_4_2_vm_hole_irq_funcs;
1918 adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs;
1919 adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs;
1920 adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs;
1924 * sdma_v4_4_2_emit_copy_buffer - copy buffer using the sDMA engine
1926 * @ib: indirect buffer to copy to
1927 * @src_offset: src GPU address
1928 * @dst_offset: dst GPU address
1929 * @byte_count: number of bytes to xfer
1930 * @tmz: if a secure copy should be used
1932 * Copy GPU buffers using the DMA engine.
1933 * Used by the amdgpu ttm implementation to move pages if
1934 * registered as the asic copy callback.
1936 static void sdma_v4_4_2_emit_copy_buffer(struct amdgpu_ib *ib,
1937 uint64_t src_offset,
1938 uint64_t dst_offset,
1939 uint32_t byte_count,
1942 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1943 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) |
1944 SDMA_PKT_COPY_LINEAR_HEADER_TMZ(tmz ? 1 : 0);
1945 ib->ptr[ib->length_dw++] = byte_count - 1;
1946 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
1947 ib->ptr[ib->length_dw++] = lower_32_bits(src_offset);
1948 ib->ptr[ib->length_dw++] = upper_32_bits(src_offset);
1949 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1950 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1954 * sdma_v4_4_2_emit_fill_buffer - fill buffer using the sDMA engine
1956 * @ib: indirect buffer to copy to
1957 * @src_data: value to write to buffer
1958 * @dst_offset: dst GPU address
1959 * @byte_count: number of bytes to xfer
1961 * Fill GPU buffers using the DMA engine.
1963 static void sdma_v4_4_2_emit_fill_buffer(struct amdgpu_ib *ib,
1965 uint64_t dst_offset,
1966 uint32_t byte_count)
1968 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL);
1969 ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset);
1970 ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset);
1971 ib->ptr[ib->length_dw++] = src_data;
1972 ib->ptr[ib->length_dw++] = byte_count - 1;
1975 static const struct amdgpu_buffer_funcs sdma_v4_4_2_buffer_funcs = {
1976 .copy_max_bytes = 0x400000,
1978 .emit_copy_buffer = sdma_v4_4_2_emit_copy_buffer,
1980 .fill_max_bytes = 0x400000,
1982 .emit_fill_buffer = sdma_v4_4_2_emit_fill_buffer,
1985 static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev)
1987 adev->mman.buffer_funcs = &sdma_v4_4_2_buffer_funcs;
1988 if (adev->sdma.has_page_queue)
1989 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page;
1991 adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring;
1994 static const struct amdgpu_vm_pte_funcs sdma_v4_4_2_vm_pte_funcs = {
1995 .copy_pte_num_dw = 7,
1996 .copy_pte = sdma_v4_4_2_vm_copy_pte,
1998 .write_pte = sdma_v4_4_2_vm_write_pte,
1999 .set_pte_pde = sdma_v4_4_2_vm_set_pte_pde,
2002 static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev)
2004 struct drm_gpu_scheduler *sched;
2007 adev->vm_manager.vm_pte_funcs = &sdma_v4_4_2_vm_pte_funcs;
2008 for (i = 0; i < adev->sdma.num_instances; i++) {
2009 if (adev->sdma.has_page_queue)
2010 sched = &adev->sdma.instance[i].page.sched;
2012 sched = &adev->sdma.instance[i].ring.sched;
2013 adev->vm_manager.vm_pte_scheds[i] = sched;
2015 adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances;
2018 const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = {
2019 .type = AMD_IP_BLOCK_TYPE_SDMA,
2023 .funcs = &sdma_v4_4_2_ip_funcs,
2026 static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask)
2028 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2031 if (!amdgpu_sriov_vf(adev))
2032 sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask);
2034 r = sdma_v4_4_2_inst_start(adev, inst_mask);
2039 static int sdma_v4_4_2_xcp_suspend(void *handle, uint32_t inst_mask)
2041 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2042 uint32_t tmp_mask = inst_mask;
2045 for_each_inst(i, tmp_mask) {
2046 amdgpu_irq_put(adev, &adev->sdma.ecc_irq,
2047 AMDGPU_SDMA_IRQ_INSTANCE0 + i);
2050 sdma_v4_4_2_inst_ctx_switch_enable(adev, false, inst_mask);
2051 sdma_v4_4_2_inst_enable(adev, false, inst_mask);
2056 struct amdgpu_xcp_ip_funcs sdma_v4_4_2_xcp_funcs = {
2057 .suspend = &sdma_v4_4_2_xcp_suspend,
2058 .resume = &sdma_v4_4_2_xcp_resume