intel: Throttle harder

Filling the rings is a very unpleasant user experience, so cap the
number of batches we allow to be inflight at any one time.

Interestingly, as also found with SNA, throttling can improve
performance by reducing RSS. However, typically throughput is improved
(at the expense of latency) by oversubscribing work to the GPU and a
10-20% slowdown is commonplace for cairo-traces. Notably, x11perf is
less affected and in particular application level benchmarks show no
change.

Note that this exposes another bug in libdrm-intel 2.4.40 on gen2/3.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2013-01-10 19:14:21 +00:00
parent a37d56f338
commit 441ef916ae
2 changed files with 32 additions and 11 deletions

View File

@ -182,7 +182,7 @@ typedef struct intel_screen_private {
unsigned int batch_emit_start;
/** Number of bytes to be emitted in the current BEGIN_BATCH. */
uint32_t batch_emitting;
dri_bo *batch_bo;
dri_bo *batch_bo, *last_batch_bo[2];
/** Whether we're in a section of code that can't tolerate flushing */
Bool in_batch_atomic;
/** Ending batch_used that was verified by intel_start_batch_atomic() */

View File

@ -67,17 +67,26 @@ void intel_next_vertex(intel_screen_private *intel)
dri_bo_alloc(intel->bufmgr, "vertex", sizeof (intel->vertex_ptr), 4096);
}
static void intel_next_batch(ScrnInfoPtr scrn)
static dri_bo *bo_alloc(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
int size = 4 * 4096;
/* The 865 has issues with larger-than-page-sized batch buffers. */
if (IS_I865G(intel))
intel->batch_bo =
dri_bo_alloc(intel->bufmgr, "batch", 4096, 4096);
else
intel->batch_bo =
dri_bo_alloc(intel->bufmgr, "batch", 4096 * 4, 4096);
size = 4096;
return dri_bo_alloc(intel->bufmgr, "batch", size, 4096);
}
static void intel_next_batch(ScrnInfoPtr scrn, int mode)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
dri_bo *tmp;
drm_intel_gem_bo_clear_relocs(intel->batch_bo, 0);
tmp = intel->last_batch_bo[mode];
intel->last_batch_bo[mode] = intel->batch_bo;
intel->batch_bo = tmp;
intel->batch_used = 0;
@ -95,12 +104,25 @@ void intel_batch_init(ScrnInfoPtr scrn)
intel->batch_emitting = 0;
intel->vertex_id = 0;
intel_next_batch(scrn);
intel->last_batch_bo[0] = bo_alloc(scrn);
intel->last_batch_bo[1] = bo_alloc(scrn);
intel->batch_bo = bo_alloc(scrn);
intel->batch_used = 0;
intel->last_3d = LAST_3D_OTHER;
}
void intel_batch_teardown(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
int i;
for (i = 0; i < ARRAY_SIZE(intel->last_batch_bo); i++) {
if (intel->last_batch_bo[i] != NULL) {
dri_bo_unreference(intel->last_batch_bo[i]);
intel->last_batch_bo[i] = NULL;
}
}
if (intel->batch_bo != NULL) {
dri_bo_unreference(intel->batch_bo);
@ -273,8 +295,7 @@ void intel_batch_submit(ScrnInfoPtr scrn)
if (intel->debug_flush & DEBUG_FLUSH_WAIT)
drm_intel_bo_wait_rendering(intel->batch_bo);
dri_bo_unreference(intel->batch_bo);
intel_next_batch(scrn);
intel_next_batch(scrn, intel->current_batch == I915_EXEC_BLT);
if (intel->batch_commit_notify)
intel->batch_commit_notify(intel);