drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c

Source file repositories/reference/linux-study-clean/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c

File Facts

System
Linux kernel
Corpus path
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
Extension
.c
Size
17880 bytes
Lines
641
Domain
Driver Families
Bucket
drivers/gpu
Inferred role
Driver Families: implementation source
Status
source implementation candidate

Why This File Exists

Repeatable hardware-adapter layer. Deep compatibility for every driver is out of scope; this atlas records patterns, probe lifecycles, bus glue, IRQ/DMA usage, and links back to core abstractions.

Dependency Surface

Detected Declarations

Annotated Snippet

if (!amdgpu_ras_eeprom_update_record_num(control)) {
			err_data->err_addr_cnt = err_data->de_count =
				control->ras_num_recs -	control->ras_num_recs_old;
			amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
				control->ras_num_recs_old, err_data->de_count);
		}
	}

	/* only uncorrectable error needs gpu reset */
	if (err_data->ue_count || err_data->de_count) {
		err_count = err_data->ue_count + err_data->de_count;
		if ((amdgpu_bad_page_threshold != 0) &&
			err_data->err_addr_cnt) {
			amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
				err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
			amdgpu_ras_save_bad_pages(adev, &err_count);

			amdgpu_dpm_send_hbm_bad_pages_num(adev,
					con->eeprom_control.ras_num_bad_pages);

			if (con->update_channel_flag == true) {
				amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
				con->update_channel_flag = false;
			}
		}
	}

	kfree(err_data->err_addr);
	err_data->err_addr = NULL;

	mutex_unlock(&con->page_retirement_lock);
}

static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
		void *ras_error_status,
		struct amdgpu_iv_entry *entry,
		uint32_t reset)
{
	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);

	kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
	amdgpu_umc_handle_bad_pages(adev, ras_error_status);

	if ((err_data->ue_count || err_data->de_count) &&
	    (reset || amdgpu_ras_is_rma(adev))) {
		con->gpu_reset_flags |= reset;
		amdgpu_ras_reset_gpu(adev);
	}

	return AMDGPU_RAS_SUCCESS;
}

int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
			enum amdgpu_ras_block block, uint16_t pasid,
			pasid_notify pasid_fn, void *data, uint32_t reset)
{
	int ret = AMDGPU_RAS_SUCCESS;

	if (adev->gmc.xgmi.connected_to_cpu ||
		adev->gmc.is_app_apu) {
		if (reset) {
			/* MCA poison handler is only responsible for GPU reset,
			 * let MCA notifier do page retirement.
			 */
			kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
			amdgpu_ras_reset_gpu(adev);
		}
		return ret;
	}

	if (!amdgpu_sriov_vf(adev)) {
		if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
			struct ras_err_data err_data;
			struct ras_common_if head = {
				.block = AMDGPU_RAS_BLOCK__UMC,
			};
			struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);

			ret = amdgpu_ras_error_data_init(&err_data);
			if (ret)
				return ret;

			ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);

			if (ret == AMDGPU_RAS_SUCCESS && obj) {
				obj->err_data.ue_count += err_data.ue_count;
				obj->err_data.ce_count += err_data.ce_count;
				obj->err_data.de_count += err_data.de_count;
			}

Annotation

Implementation Notes