drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

Source file repositories/reference/linux-study-clean/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c

File Facts

System
Linux kernel
Corpus path
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
Extension
.c
Size
14194 bytes
Lines
518
Domain
Driver Families
Bucket
drivers/gpu
Inferred role
Driver Families: implementation source
Status
source implementation candidate

Why This File Exists

Repeatable hardware-adapter layer. Deep compatibility for every driver is out of scope; this atlas records patterns, probe lifecycles, bus glue, IRQ/DMA usage, and links back to core abstractions.

Dependency Surface

Detected Declarations

Annotated Snippet

amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
		dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
			s_job->sched->name);
		goto exit;
	}

	dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
		job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
		ring->fence_drv.sync_seq);

	ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid);
	if (ti) {
		amdgpu_vm_print_task_info(adev, ti);
		info = &ti->task;
	}

	/* attempt a per ring reset */
	if (amdgpu_gpu_recovery &&
	    amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
	    ring->funcs->reset) {
		dev_err(adev->dev, "Starting %s ring reset\n",
			s_job->sched->name);
		/* Stop the scheduler to prevent anybody else from touching the ring buffer. */
		drm_sched_wqueue_stop(&ring->sched);
		r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
		if (!r) {
			/* Start the scheduler again */
			drm_sched_wqueue_start(&ring->sched);
			atomic_inc(&ring->adev->gpu_reset_counter);
			dev_err(adev->dev, "Ring %s reset succeeded\n",
				ring->sched.name);
			drm_dev_wedged_event(adev_to_drm(adev),
					     DRM_WEDGE_RECOVERY_NONE, info);
			goto exit;
		}
		dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
	}

	if (dma_fence_get_status(&s_job->s_fence->finished) == 0)
		dma_fence_set_error(&s_job->s_fence->finished, -ETIME);

	if (amdgpu_device_should_recover_gpu(ring->adev)) {
		struct amdgpu_reset_context reset_context;
		memset(&reset_context, 0, sizeof(reset_context));

		reset_context.method = AMD_RESET_METHOD_NONE;
		reset_context.reset_req_dev = adev;
		reset_context.src = AMDGPU_RESET_SRC_JOB;
		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);

		/*
		 * To avoid an unnecessary extra coredump, as we have already
		 * got the very close representation of GPU's error status
		 */
		set_bit(AMDGPU_SKIP_COREDUMP, &reset_context.flags);

		r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
		if (r)
			dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
	} else {
		drm_sched_suspend_timeout(&ring->sched);
		if (amdgpu_sriov_vf(adev))
			adev->virt.tdr_debug = true;
	}

exit:
	amdgpu_vm_put_task_info(ti);
	drm_dev_exit(idx);
	/* This is needed to add the job back to the pending list */
	return DRM_GPU_SCHED_STAT_NO_HANG;
}

int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
		     struct drm_sched_entity *entity, void *owner,
		     unsigned int num_ibs, struct amdgpu_job **job,
		     u64 drm_client_id)
{
	struct amdgpu_fence *af;
	int r;

	if (num_ibs == 0)
		return -EINVAL;

	*job = kzalloc_flex(**job, ibs, num_ibs);
	if (!*job)
		return -ENOMEM;

	af = kzalloc_obj(struct amdgpu_fence);
	if (!af) {
		r = -ENOMEM;

Annotation

Implementation Notes