drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
Source file repositories/reference/linux-study-clean/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
File Facts
- System
- Linux kernel
- Corpus path
drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c- Extension
.c- Size
- 17880 bytes
- Lines
- 641
- Domain
- Driver Families
- Bucket
- drivers/gpu
- Inferred role
- Driver Families: implementation source
- Status
- source implementation candidate
Why This File Exists
Repeatable hardware-adapter layer. Deep compatibility for every driver is out of scope; this atlas records patterns, probe lifecycles, bus glue, IRQ/DMA usage, and links back to core abstractions.
- Repeatable hardware-adapter layer. Deep compatibility for every driver is out of scope; this atlas records patterns, probe lifecycles, bus glue, IRQ/DMA usage, and links back to core abstractions.
- Uses kernel synchronization; read lock ordering, sleepability, and interrupt context assumptions before translating.
- Allocates kernel memory; connect allocation flags and lifetime to context constraints.
- Defines or uses C structs; map object ownership, embedded links, reference counts, and lock ownership.
Dependency Surface
linux/sort.hamdgpu.humc_v6_7.hamdgpu_ras_mgr.h
Detected Declarations
function filesfunction amdgpu_umc_page_retirement_mcafunction amdgpu_umc_handle_bad_pagesfunction amdgpu_umc_do_page_retirementfunction amdgpu_umc_pasid_poison_handlerfunction amdgpu_umc_poison_handlerfunction amdgpu_umc_process_ras_data_cbfunction amdgpu_umc_ras_sw_initfunction amdgpu_umc_ras_late_initfunction amdgpu_umc_process_ecc_irqfunction amdgpu_umc_uniras_process_ecc_irqfunction amdgpu_umc_fill_error_recordfunction amdgpu_umc_loop_all_aidfunction LOOP_UMC_CH_INSTfunction amdgpu_umc_loop_channelsfunction LOOP_UMC_INST_AND_CHfunction amdgpu_umc_update_ecc_statusfunction amdgpu_umc_logs_ecc_errfunction amdgpu_umc_pages_in_a_rowfunction amdgpu_umc_lookup_bad_pages_in_a_rowfunction amdgpu_umc_mca_to_addrfunction amdgpu_umc_pa2mca
Annotated Snippet
if (!amdgpu_ras_eeprom_update_record_num(control)) {
err_data->err_addr_cnt = err_data->de_count =
control->ras_num_recs - control->ras_num_recs_old;
amdgpu_ras_eeprom_read_idx(control, err_data->err_addr,
control->ras_num_recs_old, err_data->de_count);
}
}
/* only uncorrectable error needs gpu reset */
if (err_data->ue_count || err_data->de_count) {
err_count = err_data->ue_count + err_data->de_count;
if ((amdgpu_bad_page_threshold != 0) &&
err_data->err_addr_cnt) {
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt, amdgpu_ras_smu_eeprom_supported(adev));
amdgpu_ras_save_bad_pages(adev, &err_count);
amdgpu_dpm_send_hbm_bad_pages_num(adev,
con->eeprom_control.ras_num_bad_pages);
if (con->update_channel_flag == true) {
amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
con->update_channel_flag = false;
}
}
}
kfree(err_data->err_addr);
err_data->err_addr = NULL;
mutex_unlock(&con->page_retirement_lock);
}
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry,
uint32_t reset)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_umc_handle_bad_pages(adev, ras_error_status);
if ((err_data->ue_count || err_data->de_count) &&
(reset || amdgpu_ras_is_rma(adev))) {
con->gpu_reset_flags |= reset;
amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
}
int amdgpu_umc_pasid_poison_handler(struct amdgpu_device *adev,
enum amdgpu_ras_block block, uint16_t pasid,
pasid_notify pasid_fn, void *data, uint32_t reset)
{
int ret = AMDGPU_RAS_SUCCESS;
if (adev->gmc.xgmi.connected_to_cpu ||
adev->gmc.is_app_apu) {
if (reset) {
/* MCA poison handler is only responsible for GPU reset,
* let MCA notifier do page retirement.
*/
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
amdgpu_ras_reset_gpu(adev);
}
return ret;
}
if (!amdgpu_sriov_vf(adev)) {
if (amdgpu_ip_version(adev, UMC_HWIP, 0) < IP_VERSION(12, 0, 0)) {
struct ras_err_data err_data;
struct ras_common_if head = {
.block = AMDGPU_RAS_BLOCK__UMC,
};
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
ret = amdgpu_ras_error_data_init(&err_data);
if (ret)
return ret;
ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
if (ret == AMDGPU_RAS_SUCCESS && obj) {
obj->err_data.ue_count += err_data.ue_count;
obj->err_data.ce_count += err_data.ce_count;
obj->err_data.de_count += err_data.de_count;
}
Annotation
- Immediate include surface: `linux/sort.h`, `amdgpu.h`, `umc_v6_7.h`, `amdgpu_ras_mgr.h`.
- Detected declarations: `function files`, `function amdgpu_umc_page_retirement_mca`, `function amdgpu_umc_handle_bad_pages`, `function amdgpu_umc_do_page_retirement`, `function amdgpu_umc_pasid_poison_handler`, `function amdgpu_umc_poison_handler`, `function amdgpu_umc_process_ras_data_cb`, `function amdgpu_umc_ras_sw_init`, `function amdgpu_umc_ras_late_init`, `function amdgpu_umc_process_ecc_irq`.
- Atlas domain: Driver Families / drivers/gpu.
- Implementation status: source implementation candidate.
- Synchronization appears in or near this file; preserve lock ordering, sleepability, and interrupt-context constraints.
Implementation Notes
- This generated page is the file-by-file coverage layer; curated subsystem chapters should link here when they synthesize a multi-file control flow.
- Core OS pages should be promoted from atlas-only to deep-reviewed when they explain data structures, invariants, locking, lifecycle, and C implementation snippets.
- Driver-family pages are intentionally pattern-oriented unless they are part of the selected PCIe/NVMe representative device path.