Skip to content

Commit

Permalink
drm/i915/execlists: Preempt-to-busy
Browse files Browse the repository at this point in the history
When using a global seqno, we required a precise stop-the-workd event to
handle preemption and unwind the global seqno counter. To accomplish
this, we would preempt to a special out-of-band context and wait for the
machine to report that it was idle. Given an idle machine, we could very
precisely see which requests had completed and which we needed to feed
back into the run queue.

However, now that we have scrapped the global seqno, we no longer need
to precisely unwind the global counter and only track requests by their
per-context seqno. This allows us to loosely unwind inflight requests
while scheduling a preemption, with the enormous caveat that the
requests we put back on the run queue are still _inflight_ (until the
preemption request is complete). This makes request tracking much more
messy, as at any point then we can see a completed request that we
believe is not currently scheduled for execution. We also have to be
careful not to rewind RING_TAIL past RING_HEAD on preempting to the
running context, and for this we use a semaphore to prevent completion
of the request before continuing.

To accomplish this feat, we change how we track requests scheduled to
the HW. Instead of appending our requests onto a single list as we
submit, we track each submission to ELSP as its own block. Then upon
receiving the CS preemption event, we promote the pending block to the
inflight block (discarding what was previously being tracked). As normal
CS completion events arrive, we then remove stale entries from the
inflight tracker.

v2: Be a tinge paranoid and ensure we flush the write into the HWS page
for the GPU semaphore to pick in a timely fashion.

Signed-off-by: Chris Wilson <[email protected]>
Reviewed-by: Mika Kuoppala <[email protected]>
Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
  • Loading branch information
ickle committed Jun 20, 2019
1 parent 9e138ea commit 22b7a42
Show file tree
Hide file tree
Showing 13 changed files with 508 additions and 610 deletions.
2 changes: 1 addition & 1 deletion drivers/gpu/drm/i915/gem/i915_gem_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ static void init_contexts(struct drm_i915_private *i915)

static bool needs_preempt_context(struct drm_i915_private *i915)
{
return HAS_EXECLISTS(i915);
return USES_GUC_SUBMISSION(i915);
}

int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
Expand Down
5 changes: 5 additions & 0 deletions drivers/gpu/drm/i915/gt/intel_context_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/types.h>

#include "i915_active_types.h"
#include "i915_utils.h"
#include "intel_engine_types.h"
#include "intel_sseu.h"

Expand All @@ -38,6 +39,10 @@ struct intel_context {
struct i915_gem_context *gem_context;
struct intel_engine_cs *engine;
struct intel_engine_cs *inflight;
#define intel_context_inflight(ce) ptr_mask_bits((ce)->inflight, 2)
#define intel_context_inflight_count(ce) ptr_unmask_bits((ce)->inflight, 2)
#define intel_context_inflight_inc(ce) ptr_count_inc(&(ce)->inflight)
#define intel_context_inflight_dec(ce) ptr_count_dec(&(ce)->inflight)

struct list_head signal_link;
struct list_head signals;
Expand Down
61 changes: 8 additions & 53 deletions drivers/gpu/drm/i915/gt/intel_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,71 +125,26 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)

void intel_engines_set_scheduler_caps(struct drm_i915_private *i915);

static inline void
execlists_set_active(struct intel_engine_execlists *execlists,
unsigned int bit)
{
__set_bit(bit, (unsigned long *)&execlists->active);
}

static inline bool
execlists_set_active_once(struct intel_engine_execlists *execlists,
unsigned int bit)
{
return !__test_and_set_bit(bit, (unsigned long *)&execlists->active);
}

static inline void
execlists_clear_active(struct intel_engine_execlists *execlists,
unsigned int bit)
{
__clear_bit(bit, (unsigned long *)&execlists->active);
}

static inline void
execlists_clear_all_active(struct intel_engine_execlists *execlists)
static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
execlists->active = 0;
return execlists->port_mask + 1;
}

static inline bool
execlists_is_active(const struct intel_engine_execlists *execlists,
unsigned int bit)
static inline struct i915_request *
execlists_active(const struct intel_engine_execlists *execlists)
{
return test_bit(bit, (unsigned long *)&execlists->active);
GEM_BUG_ON(execlists->active - execlists->inflight >
execlists_num_ports(execlists));
return READ_ONCE(*execlists->active);
}

void execlists_user_begin(struct intel_engine_execlists *execlists,
const struct execlist_port *port);
void execlists_user_end(struct intel_engine_execlists *execlists);

void
execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);

struct i915_request *
execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);

static inline unsigned int
execlists_num_ports(const struct intel_engine_execlists * const execlists)
{
return execlists->port_mask + 1;
}

static inline struct execlist_port *
execlists_port_complete(struct intel_engine_execlists * const execlists,
struct execlist_port * const port)
{
const unsigned int m = execlists->port_mask;

GEM_BUG_ON(port_index(port, execlists) != 0);
GEM_BUG_ON(!execlists_is_active(execlists, EXECLISTS_ACTIVE_USER));

memmove(port, port + 1, m * sizeof(struct execlist_port));
memset(port + m, 0, sizeof(struct execlist_port));

return port;
}

static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
Expand Down
63 changes: 39 additions & 24 deletions drivers/gpu/drm/i915/gt/intel_engine_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,10 @@ void intel_engine_init_execlists(struct intel_engine_cs *engine)
GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);

memset(execlists->pending, 0, sizeof(execlists->pending));
execlists->active =
memset(execlists->inflight, 0, sizeof(execlists->inflight));

execlists->queue_priority_hint = INT_MIN;
execlists->queue = RB_ROOT_CACHED;
}
Expand Down Expand Up @@ -1152,7 +1156,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
return true;

/* Waiting to drain ELSP? */
if (READ_ONCE(engine->execlists.active)) {
if (execlists_active(&engine->execlists)) {
struct tasklet_struct *t = &engine->execlists.tasklet;

synchronize_hardirq(engine->i915->drm.irq);
Expand All @@ -1169,7 +1173,7 @@ bool intel_engine_is_idle(struct intel_engine_cs *engine)
/* Otherwise flush the tasklet if it was on another cpu */
tasklet_unlock_wait(t);

if (READ_ONCE(engine->execlists.active))
if (execlists_active(&engine->execlists))
return false;
}

Expand Down Expand Up @@ -1367,6 +1371,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}

if (HAS_EXECLISTS(dev_priv)) {
struct i915_request * const *port, *rq;
const u32 *hws =
&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
const u8 num_entries = execlists->csb_size;
Expand Down Expand Up @@ -1399,27 +1404,33 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine,
}

spin_lock_irqsave(&engine->active.lock, flags);
for (idx = 0; idx < execlists_num_ports(execlists); idx++) {
struct i915_request *rq;
unsigned int count;
for (port = execlists->active; (rq = *port); port++) {
char hdr[80];
int len;

len = snprintf(hdr, sizeof(hdr),
"\t\tActive[%d: ",
(int)(port - execlists->active));
if (!i915_request_signaled(rq))
len += snprintf(hdr + len, sizeof(hdr) - len,
"ring:{start:%08x, hwsp:%08x, seqno:%08x}, ",
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset,
hwsp_seqno(rq));
snprintf(hdr + len, sizeof(hdr) - len, "rq: ");
print_request(m, rq, hdr);
}
for (port = execlists->pending; (rq = *port); port++) {
char hdr[80];

rq = port_unpack(&execlists->port[idx], &count);
if (!rq) {
drm_printf(m, "\t\tELSP[%d] idle\n", idx);
} else if (!i915_request_signaled(rq)) {
snprintf(hdr, sizeof(hdr),
"\t\tELSP[%d] count=%d, ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
idx, count,
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset,
hwsp_seqno(rq));
print_request(m, rq, hdr);
} else {
print_request(m, rq, "\t\tELSP[%d] rq: ");
}
snprintf(hdr, sizeof(hdr),
"\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ",
(int)(port - execlists->pending),
i915_ggtt_offset(rq->ring->vma),
rq->timeline->hwsp_offset,
hwsp_seqno(rq));
print_request(m, rq, hdr);
}
drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
spin_unlock_irqrestore(&engine->active.lock, flags);
} else if (INTEL_GEN(dev_priv) > 6) {
drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n",
Expand Down Expand Up @@ -1583,15 +1594,19 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
}

if (engine->stats.enabled++ == 0) {
const struct execlist_port *port = execlists->port;
unsigned int num_ports = execlists_num_ports(execlists);
struct i915_request * const *port;
struct i915_request *rq;

engine->stats.enabled_at = ktime_get();

/* XXX submission method oblivious? */
while (num_ports-- && port_isset(port)) {
for (port = execlists->active; (rq = *port); port++)
engine->stats.active++;
port++;

for (port = execlists->pending; (rq = *port); port++) {
/* Exclude any contexts already counted in active */
if (intel_context_inflight_count(rq->hw_context) == 1)
engine->stats.active++;
}

if (engine->stats.active)
Expand Down
60 changes: 16 additions & 44 deletions drivers/gpu/drm/i915/gt/intel_engine_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,51 +172,28 @@ struct intel_engine_execlists {
*/
u32 __iomem *ctrl_reg;

#define EXECLIST_MAX_PORTS 2
/**
* @active: the currently known context executing on HW
*/
struct i915_request * const *active;
/**
* @port: execlist port states
* @inflight: the set of contexts submitted and acknowleged by HW
*
* For each hardware ELSP (ExecList Submission Port) we keep
* track of the last request and the number of times we submitted
* that port to hw. We then count the number of times the hw reports
* a context completion or preemption. As only one context can
* be active on hw, we limit resubmission of context to port[0]. This
* is called Lite Restore, of the context.
* The set of inflight contexts is managed by reading CS events
* from the HW. On a context-switch event (not preemption), we
* know the HW has transitioned from port0 to port1, and we
* advance our inflight/active tracking accordingly.
*/
struct execlist_port {
/**
* @request_count: combined request and submission count
*/
struct i915_request *request_count;
#define EXECLIST_COUNT_BITS 2
#define port_request(p) ptr_mask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_count(p) ptr_unmask_bits((p)->request_count, EXECLIST_COUNT_BITS)
#define port_pack(rq, count) ptr_pack_bits(rq, count, EXECLIST_COUNT_BITS)
#define port_unpack(p, count) ptr_unpack_bits((p)->request_count, count, EXECLIST_COUNT_BITS)
#define port_set(p, packed) ((p)->request_count = (packed))
#define port_isset(p) ((p)->request_count)
#define port_index(p, execlists) ((p) - (execlists)->port)

/**
* @context_id: context ID for port
*/
GEM_DEBUG_DECL(u32 context_id);

#define EXECLIST_MAX_PORTS 2
} port[EXECLIST_MAX_PORTS];

struct i915_request *inflight[EXECLIST_MAX_PORTS + 1 /* sentinel */];
/**
* @active: is the HW active? We consider the HW as active after
* submitting any context for execution and until we have seen the
* last context completion event. After that, we do not expect any
* more events until we submit, and so can park the HW.
* @pending: the next set of contexts submitted to ELSP
*
* As we have a small number of different sources from which we feed
* the HW, we track the state of each inside a single bitfield.
* We store the array of contexts that we submit to HW (via ELSP) and
* promote them to the inflight array once HW has signaled the
* preemption or idle-to-active event.
*/
unsigned int active;
#define EXECLISTS_ACTIVE_USER 0
#define EXECLISTS_ACTIVE_PREEMPT 1
#define EXECLISTS_ACTIVE_HWACK 2
struct i915_request *pending[EXECLIST_MAX_PORTS + 1];

/**
* @port_mask: number of execlist ports - 1
Expand Down Expand Up @@ -257,11 +234,6 @@ struct intel_engine_execlists {
*/
u32 *csb_status;

/**
* @preempt_complete_status: expected CSB upon completing preemption
*/
u32 preempt_complete_status;

/**
* @csb_size: context status buffer FIFO size
*/
Expand Down
Loading

0 comments on commit 22b7a42

Please sign in to comment.