From 88fca61ba5e2ecd0552b9dea2500a16da12d0106 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Roberto=20de=20Souza?= Date: Fri, 27 Dec 2024 10:32:19 -0800 Subject: [PATCH] Revert "drm/xe: Force write completion of MI_STORE_DATA_IMM" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 1460bb1fef9ccf7390af0d74a15252442fd6effd. In all places the MI_STORE_DATA_IMM are not followed by a read of the same memory address in the same batch buffer and the posted writes are flushed with PIPE_CONTROL or MI_FLUSH_DW in xe_ring_ops.c functions so there is no need to set this register. Reviewed-by: Ashutosh Dixit Cc: Thomas Hellström Cc: Ashutosh Dixit Fixes: 1460bb1fef9c ("drm/xe: Force write completion of MI_STORE_DATA_IMM") Signed-off-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20241227183230.101334-1-jose.souza@intel.com --- drivers/gpu/drm/xe/instructions/xe_mi_commands.h | 13 ++++++------- drivers/gpu/drm/xe/xe_migrate.c | 11 +++-------- drivers/gpu/drm/xe/xe_ring_ops.c | 6 ++---- 3 files changed, 11 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h index f4ee910f09432c..10ec2920d31b34 100644 --- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h +++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h @@ -33,13 +33,12 @@ #define MI_TOPOLOGY_FILTER __MI_INSTR(0xD) #define MI_FORCE_WAKEUP __MI_INSTR(0x1D) -#define MI_STORE_DATA_IMM __MI_INSTR(0x20) -#define MI_SDI_GGTT REG_BIT(22) -#define MI_FORCE_WRITE_COMPLETION_CHECK REG_BIT(10) -#define MI_SDI_LEN_DW GENMASK(9, 0) -#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) -#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ - REG_BIT(21)) +#define MI_STORE_DATA_IMM __MI_INSTR(0x20) +#define MI_SDI_GGTT REG_BIT(22) +#define MI_SDI_LEN_DW GENMASK(9, 0) +#define MI_SDI_NUM_DW(x) REG_FIELD_PREP(MI_SDI_LEN_DW, (x) + 3 - 2) +#define MI_SDI_NUM_QW(x) (REG_FIELD_PREP(MI_SDI_LEN_DW, 2 * (x) + 3 - 2) | \ + REG_BIT(21)) #define MI_LOAD_REGISTER_IMM __MI_INSTR(0x22) #define MI_LRI_LRM_CS_MMIO REG_BIT(19) diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c index 8b32fad678782b..1b97d90aaddaf4 100644 --- a/drivers/gpu/drm/xe/xe_migrate.c +++ b/drivers/gpu/drm/xe/xe_migrate.c @@ -581,9 +581,7 @@ static void emit_pte(struct xe_migrate *m, while (ptes) { u32 chunk = min(MAX_PTE_PER_SDI, ptes); - bb->cs[bb->len++] = MI_STORE_DATA_IMM | - MI_FORCE_WRITE_COMPLETION_CHECK | - MI_SDI_NUM_QW(chunk); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = ofs; bb->cs[bb->len++] = 0; @@ -1225,9 +1223,7 @@ static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb, u64 ppgtt_ofs, if (!(bb->len & 1)) bb->cs[bb->len++] = MI_NOOP; - bb->cs[bb->len++] = MI_STORE_DATA_IMM | - MI_FORCE_WRITE_COMPLETION_CHECK | - MI_SDI_NUM_QW(chunk); + bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = lower_32_bits(addr); bb->cs[bb->len++] = upper_32_bits(addr); if (pt_op->bind) @@ -1392,8 +1388,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m, u32 idx = 0; bb->cs[bb->len++] = MI_STORE_DATA_IMM | - MI_FORCE_WRITE_COMPLETION_CHECK | - MI_SDI_NUM_QW(chunk); + MI_SDI_NUM_QW(chunk); bb->cs[bb->len++] = ofs; bb->cs[bb->len++] = 0; /* upper_32_bits */ diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c index c8ab37fa0d1980..9f327f27c0726e 100644 --- a/drivers/gpu/drm/xe/xe_ring_ops.c +++ b/drivers/gpu/drm/xe/xe_ring_ops.c @@ -72,8 +72,7 @@ static int emit_user_interrupt(u32 *dw, int i) static int emit_store_imm_ggtt(u32 addr, u32 value, u32 *dw, int i) { - dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | - MI_FORCE_WRITE_COMPLETION_CHECK | MI_SDI_NUM_DW(1); + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1); dw[i++] = addr; dw[i++] = 0; dw[i++] = value; @@ -163,8 +162,7 @@ static int emit_pipe_invalidate(u32 mask_flags, bool invalidate_tlb, u32 *dw, static int emit_store_imm_ppgtt_posted(u64 addr, u64 value, u32 *dw, int i) { - dw[i++] = MI_STORE_DATA_IMM | MI_FORCE_WRITE_COMPLETION_CHECK | - MI_SDI_NUM_QW(1); + dw[i++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(1); dw[i++] = lower_32_bits(addr); dw[i++] = upper_32_bits(addr); dw[i++] = lower_32_bits(value);