Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.8-2024-01-05' of https://gitlab.freedesktop…
Browse files Browse the repository at this point in the history
….org/agd5f/linux into drm-next

amd-drm-next-6.8-2024-01-05:

amdgpu:
- VRR fixes
- PSR-SU fixes
- SubVP fixes
- DCN 3.5 fixes
- Documentation updates
- DMCUB fixes
- DML2 fixes
- UMC 12.0 updates
- GPUVM fix
- Misc code cleanups and whitespace cleanups
- DP MST fix
- Let KFD sync with GPUVM fences
- GFX11 reset fix
- SMU 13.0.6 fixes
- VSC fix for DP/eDP
- Navi12 display fix
- RN/CZN system aperture fix
- DCN 2.1 bandwidth validation fix
- DCN INIT cleanup

amdkfd:
- SVM fixes
- Revert TBA/TMA location change

Signed-off-by: Dave Airlie <[email protected]>

From: Alex Deucher <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
airlied committed Jan 8, 2024
2 parents 3c064ae + 754d349 commit e54478f
Show file tree
Hide file tree
Showing 198 changed files with 4,093 additions and 2,111 deletions.
5 changes: 3 additions & 2 deletions Documentation/gpu/amdgpu/apu-asic-info-table.csv
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ SteamDeck, VANGOGH, DCN 3.0.1, 10.3.1, VCN 3.1.0, 5.2.1, 11.5.0
Ryzen 5000 series / Ryzen 7x30 series, GREEN SARDINE / Cezanne / Barcelo / Barcelo-R, DCN 2.1, 9.3, VCN 2.2, 4.1.1, 12.0.1
Ryzen 6000 series / Ryzen 7x35 series / Ryzen 7x36 series, YELLOW CARP / Rembrandt / Rembrandt-R, 3.1.2, 10.3.3, VCN 3.1.1, 5.2.3, 13.0.3
Ryzen 7000 series (AM5), Raphael, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), / Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x45 series (FL1), Dragon Range, 3.1.5, 10.3.6, 3.1.2, 5.2.6, 13.0.5
Ryzen 7x20 series, Mendocino, 3.1.6, 10.3.7, 3.1.1, 5.2.7, 13.0.8
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen 7x40 series, Phoenix, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
Ryzen 8x40 series, Hawk Point, 3.1.4, 11.0.1 / 11.0.4, 4.0.2, 6.0.1, 13.0.4 / 13.0.11
26 changes: 25 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/aldebaran.c
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
{
struct list_head *reset_device_list = reset_context->reset_device_list;
struct amdgpu_device *tmp_adev = NULL;
struct amdgpu_ras *con;
int r;

if (reset_device_list == NULL)
Expand All @@ -355,7 +356,30 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl,
*/
amdgpu_register_gpu_instance(tmp_adev);

/* Resume RAS */
/* Resume RAS, ecc_irq */
con = amdgpu_ras_get_context(tmp_adev);
if (!amdgpu_sriov_vf(tmp_adev) && con) {
if (tmp_adev->sdma.ras &&
tmp_adev->sdma.ras->ras_block.ras_late_init) {
r = tmp_adev->sdma.ras->ras_block.ras_late_init(tmp_adev,
&tmp_adev->sdma.ras->ras_block.ras_comm);
if (r) {
dev_err(tmp_adev->dev, "SDMA failed to execute ras_late_init! ret:%d\n", r);
goto end;
}
}

if (tmp_adev->gfx.ras &&
tmp_adev->gfx.ras->ras_block.ras_late_init) {
r = tmp_adev->gfx.ras->ras_block.ras_late_init(tmp_adev,
&tmp_adev->gfx.ras->ras_block.ras_comm);
if (r) {
dev_err(tmp_adev->dev, "GFX failed to execute ras_late_init! ret:%d\n", r);
goto end;
}
}
}

amdgpu_ras_resume(tmp_adev);

/* Update PSP FW topology after reset */
Expand Down
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,8 @@ extern int amdgpu_agp;

extern int amdgpu_wbrf;

extern int fw_bo_location;

#define AMDGPU_VM_MAX_NUM_CTX 4096
#define AMDGPU_SG_THRESHOLD (256*1024*1024)
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f)
return NULL;

fence = container_of(f, struct amdgpu_amdkfd_fence, base);
if (fence && f->ops == &amdkfd_fence_ops)
if (f->ops == &amdkfd_fence_ops)
return fence;

return NULL;
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
Original file line number Diff line number Diff line change
Expand Up @@ -1103,7 +1103,7 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
* DDC line. The latter is more complex because with DVI<->HDMI adapters
* you don't really know what's connected to which port as both are digital.
*/
amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
amdgpu_connector_shared_ddc(&ret, connector, amdgpu_connector);
}
}

Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -870,9 +870,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo = e->bo;
int i;

e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL | __GFP_ZERO);
e->user_pages = kvcalloc(bo->tbo.ttm->num_pages,
sizeof(struct page *),
GFP_KERNEL);
if (!e->user_pages) {
DRM_ERROR("kvmalloc_array failure\n");
r = -ENOMEM;
Expand Down
11 changes: 9 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;

value = RREG32_PCIE(*pos);
if (upper_32_bits(*pos))
value = RREG32_PCIE_EXT(*pos);
else
value = RREG32_PCIE(*pos);

r = put_user(value, (uint32_t *)buf);
if (r)
goto out;
Expand Down Expand Up @@ -600,7 +604,10 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
if (r)
goto out;

WREG32_PCIE(*pos, value);
if (upper_32_bits(*pos))
WREG32_PCIE_EXT(*pos, value);
else
WREG32_PCIE(*pos, value);

result += 4;
buf += 4;
Expand Down
11 changes: 2 additions & 9 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -2251,15 +2251,8 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)

adev->firmware.gpu_info_fw = NULL;

if (adev->mman.discovery_bin) {
/*
* FIXME: The bounding box is still needed by Navi12, so
* temporarily read it from gpu_info firmware. Should be dropped
* when DAL no longer needs it.
*/
if (adev->asic_type != CHIP_NAVI12)
return 0;
}
if (adev->mman.discovery_bin)
return 0;

switch (adev->asic_type) {
default:
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ int amdgpu_seamless = -1; /* auto */
uint amdgpu_debug_mask;
int amdgpu_agp = -1; /* auto */
int amdgpu_wbrf = -1;
int fw_bo_location = -1;

static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);

Expand Down Expand Up @@ -989,6 +990,10 @@ MODULE_PARM_DESC(wbrf,
"Enable Wifi RFI interference mitigation (0 = disabled, 1 = enabled, -1 = auto(default)");
module_param_named(wbrf, amdgpu_wbrf, int, 0444);

MODULE_PARM_DESC(fw_bo_location,
"location to put firmware bo for frontdoor loading (-1 = auto (default), 0 = on ram, 1 = on vram");
module_param(fw_bo_location, int, 0644);

/* These devices are not supported by amdgpu.
* They are supported by the mach64, r128, radeon drivers
*/
Expand Down
21 changes: 15 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, st
int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data)
{
struct amdgpu_smuio_mcm_config_info mcm_info;
struct ras_err_addr err_addr = {0};
struct mca_bank_set mca_set;
struct mca_bank_node *node;
struct mca_bank_entry *entry;
Expand Down Expand Up @@ -246,10 +247,18 @@ int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_blo
mcm_info.socket_id = entry->info.socket_id;
mcm_info.die_id = entry->info.aid;

if (blk == AMDGPU_RAS_BLOCK__UMC) {
err_addr.err_status = entry->regs[MCA_REG_IDX_STATUS];
err_addr.err_ipid = entry->regs[MCA_REG_IDX_IPID];
err_addr.err_addr = entry->regs[MCA_REG_IDX_ADDR];
}

if (type == AMDGPU_MCA_ERROR_TYPE_UE)
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, (uint64_t)count);
amdgpu_ras_error_statistic_ue_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
else
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, (uint64_t)count);
amdgpu_ras_error_statistic_ce_count(err_data,
&mcm_info, &err_addr, (uint64_t)count);
}

out_mca_release:
Expand Down Expand Up @@ -351,6 +360,9 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
const struct amdgpu_mca_smu_funcs *mca_funcs = adev->mca.mca_funcs;
int count;

if (!mca_funcs || !mca_funcs->mca_get_mca_entry)
return -EOPNOTSUPP;

switch (type) {
case AMDGPU_MCA_ERROR_TYPE_UE:
count = mca_funcs->max_ue_count;
Expand All @@ -365,10 +377,7 @@ int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_err
if (idx >= count)
return -EINVAL;

if (mca_funcs && mca_funcs->mca_get_mca_entry)
return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);

return -EOPNOTSUPP;
return mca_funcs->mca_get_mca_entry(adev, type, idx, entry);
}

#if defined(CONFIG_DEBUG_FS)
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ static int psp_sw_init(void *handle)
}

ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
amdgpu_sriov_vf(adev) ?
(amdgpu_sriov_vf(adev) || fw_bo_location == 1) ?
AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
&psp->fw_pri_bo,
&psp->fw_pri_mc_addr,
Expand Down
34 changes: 22 additions & 12 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
Original file line number Diff line number Diff line change
Expand Up @@ -1156,8 +1156,10 @@ static void amdgpu_rasmgr_error_data_statistic_update(struct ras_manager *obj, s
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;

amdgpu_ras_error_statistic_ce_count(&obj->err_data, &err_info->mcm_info, err_info->ce_count);
amdgpu_ras_error_statistic_ue_count(&obj->err_data, &err_info->mcm_info, err_info->ue_count);
amdgpu_ras_error_statistic_ce_count(&obj->err_data,
&err_info->mcm_info, NULL, err_info->ce_count);
amdgpu_ras_error_statistic_ue_count(&obj->err_data,
&err_info->mcm_info, NULL, err_info->ue_count);
}
} else {
/* for legacy asic path which doesn't has error source info */
Expand All @@ -1174,6 +1176,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev,
enum amdgpu_ras_block blk = info ? info->head.block : AMDGPU_RAS_BLOCK_COUNT;
struct amdgpu_ras_block_object *block_obj = NULL;

if (blk == AMDGPU_RAS_BLOCK_COUNT)
return -EINVAL;

if (error_query_mode == AMDGPU_RAS_INVALID_ERROR_QUERY)
return -EINVAL;

Expand Down Expand Up @@ -2538,7 +2543,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
return 0;

data = &con->eh_data;
*data = kmalloc(sizeof(**data), GFP_KERNEL | __GFP_ZERO);
*data = kzalloc(sizeof(**data), GFP_KERNEL);
if (!*data) {
ret = -ENOMEM;
goto out;
Expand Down Expand Up @@ -2825,10 +2830,10 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
if (con)
return 0;

con = kmalloc(sizeof(struct amdgpu_ras) +
con = kzalloc(sizeof(*con) +
sizeof(struct ras_manager) * AMDGPU_RAS_BLOCK_COUNT +
sizeof(struct ras_manager) * AMDGPU_RAS_MCA_BLOCK_COUNT,
GFP_KERNEL|__GFP_ZERO);
GFP_KERNEL);
if (!con)
return -ENOMEM;

Expand Down Expand Up @@ -3133,8 +3138,7 @@ int amdgpu_ras_late_init(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return 0;

/* enable MCA debug on APU device */
amdgpu_ras_set_mca_debug_mode(adev, !!(adev->flags & AMD_IS_APU));
amdgpu_ras_set_mca_debug_mode(adev, false);

list_for_each_entry_safe(node, tmp, &adev->ras_list, node) {
if (!node->ras_obj) {
Expand Down Expand Up @@ -3691,7 +3695,8 @@ static int ras_err_info_cmp(void *priv, const struct list_head *a, const struct
}

static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_data,
struct amdgpu_smuio_mcm_config_info *mcm_info)
struct amdgpu_smuio_mcm_config_info *mcm_info,
struct ras_err_addr *err_addr)
{
struct ras_err_node *err_node;

Expand All @@ -3705,6 +3710,9 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d

memcpy(&err_node->err_info.mcm_info, mcm_info, sizeof(*mcm_info));

if (err_addr)
memcpy(&err_node->err_info.err_addr, err_addr, sizeof(*err_addr));

err_data->err_list_count++;
list_add_tail(&err_node->node, &err_data->err_node_list);
list_sort(NULL, &err_data->err_node_list, ras_err_info_cmp);
Expand All @@ -3713,7 +3721,8 @@ static struct ras_err_info *amdgpu_ras_error_get_info(struct ras_err_data *err_d
}

int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
struct amdgpu_smuio_mcm_config_info *mcm_info,
struct ras_err_addr *err_addr, u64 count)
{
struct ras_err_info *err_info;

Expand All @@ -3723,7 +3732,7 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
if (!count)
return 0;

err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
if (!err_info)
return -EINVAL;

Expand All @@ -3734,7 +3743,8 @@ int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
}

int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count)
struct amdgpu_smuio_mcm_config_info *mcm_info,
struct ras_err_addr *err_addr, u64 count)
{
struct ras_err_info *err_info;

Expand All @@ -3744,7 +3754,7 @@ int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
if (!count)
return 0;

err_info = amdgpu_ras_error_get_info(err_data, mcm_info);
err_info = amdgpu_ras_error_get_info(err_data, mcm_info, err_addr);
if (!err_info)
return -EINVAL;

Expand Down
13 changes: 11 additions & 2 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
Original file line number Diff line number Diff line change
Expand Up @@ -452,10 +452,17 @@ struct ras_fs_data {
char debugfs_name[32];
};

struct ras_err_addr {
uint64_t err_status;
uint64_t err_ipid;
uint64_t err_addr;
};

struct ras_err_info {
struct amdgpu_smuio_mcm_config_info mcm_info;
u64 ce_count;
u64 ue_count;
struct ras_err_addr err_addr;
};

struct ras_err_node {
Expand Down Expand Up @@ -806,8 +813,10 @@ void amdgpu_ras_inst_reset_ras_error_count(struct amdgpu_device *adev,
int amdgpu_ras_error_data_init(struct ras_err_data *err_data);
void amdgpu_ras_error_data_fini(struct ras_err_data *err_data);
int amdgpu_ras_error_statistic_ce_count(struct ras_err_data *err_data,
struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
struct amdgpu_smuio_mcm_config_info *mcm_info,
struct ras_err_addr *err_addr, u64 count);
int amdgpu_ras_error_statistic_ue_count(struct ras_err_data *err_data,
struct amdgpu_smuio_mcm_config_info *mcm_info, u64 count);
struct amdgpu_smuio_mcm_config_info *mcm_info,
struct ras_err_addr *err_addr, u64 count);

#endif
11 changes: 5 additions & 6 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c
Original file line number Diff line number Diff line change
Expand Up @@ -531,13 +531,12 @@ int amdgpu_gfx_rlc_init_microcode(struct amdgpu_device *adev,
if (version_major == 2 && version_minor == 1)
adev->gfx.rlc.is_rlc_v2_1 = true;

if (version_minor >= 0) {
err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
if (err) {
dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
return err;
}
err = amdgpu_gfx_rlc_init_microcode_v2_0(adev);
if (err) {
dev_err(adev->dev, "fail to init rlc v2_0 microcode\n");
return err;
}

if (version_minor >= 1)
amdgpu_gfx_rlc_init_microcode_v2_1(adev);
if (version_minor >= 2)
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,

/* Never sync to VM updates either. */
if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
owner != AMDGPU_FENCE_OWNER_UNDEFINED)
owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
owner != AMDGPU_FENCE_OWNER_KFD)
return false;

/* Ignore fences depending on the sync mode */
Expand Down
Loading

0 comments on commit e54478f

Please sign in to comment.