render: tell the kernel explicitly when fences are needed

This slighlty improves xrender performance on fence reg starved
i8xx hw.

I've also changed a few function calls to the new names from the
compat ones while looking at the code.

The i915 textured video path is not converted because atm the xv
code does not use tiled surfaces.

Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
This commit is contained in:
Daniel Vetter 2010-04-11 12:56:24 +02:00
parent a619a78312
commit 804263c10d
3 changed files with 30 additions and 18 deletions

View File

@ -98,13 +98,21 @@ static inline void
intel_batch_emit_reloc(intel_screen_private *intel,
dri_bo * bo,
uint32_t read_domains,
uint32_t write_domains, uint32_t delta)
uint32_t write_domains, uint32_t delta, int needs_fence)
{
assert(intel_batch_space(intel) >= 4);
*(uint32_t *) (intel->batch_ptr + intel->batch_used) =
bo->offset + delta;
dri_bo_emit_reloc(intel->batch_bo, read_domains, write_domains, delta,
intel->batch_used, bo);
if (needs_fence)
drm_intel_bo_emit_reloc_fence(intel->batch_bo,
intel->batch_used,
bo, delta,
read_domains, write_domains);
else
drm_intel_bo_emit_reloc(intel->batch_bo, intel->batch_used,
bo, delta,
read_domains, write_domains);
intel->batch_used += 4;
}
@ -132,7 +140,7 @@ intel_batch_mark_pixmap_domains(intel_screen_private *intel,
static inline void
intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
uint32_t read_domains, uint32_t write_domain,
uint32_t delta)
uint32_t delta, int needs_fence)
{
struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap);
@ -143,17 +151,20 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
intel_batch_emit_reloc(intel, priv->bo,
read_domains, write_domain,
delta);
delta, needs_fence);
}
#define ALIGN_BATCH(align) intel_batch_align(intel, align);
#define OUT_BATCH(dword) intel_batch_emit_dword(intel, dword)
#define OUT_RELOC(bo, read_domains, write_domains, delta) \
intel_batch_emit_reloc (intel, bo, read_domains, write_domains, delta)
intel_batch_emit_reloc(intel, bo, read_domains, write_domains, delta, 0)
#define OUT_RELOC_PIXMAP(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta)
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 0)
#define OUT_RELOC_PIXMAP_FENCED(pixmap, reads, write, delta) \
intel_batch_emit_reloc_pixmap(intel, pixmap, reads, write, delta, 1)
union intfloat {
float f;

View File

@ -1048,8 +1048,9 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
if (IS_I865G(intel))
batch_size = 4096;
intel->bufmgr = intel_bufmgr_gem_init(intel->drmSubFD, batch_size);
intel_bufmgr_gem_enable_reuse(intel->bufmgr);
intel->bufmgr = drm_intel_bufmgr_gem_init(intel->drmSubFD, batch_size);
drm_intel_bufmgr_gem_enable_reuse(intel->bufmgr);
drm_intel_bufmgr_gem_enable_fenced_relocs(intel->bufmgr);
list_init(&intel->batch_pixmaps);
list_init(&intel->flush_pixmaps);

View File

@ -295,8 +295,8 @@ static void i830_uxa_solid(PixmapPtr pixmap, int x1, int y1, int x2, int y2)
OUT_BATCH(intel->BR[13] | pitch);
OUT_BATCH((y1 << 16) | (x1 & 0xffff));
OUT_BATCH((y2 << 16) | (x2 & 0xffff));
OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER, 0);
OUT_RELOC_PIXMAP_FENCED(pixmap, I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER, 0);
OUT_BATCH(intel->BR[16]);
ADVANCE_BATCH();
}
@ -411,15 +411,15 @@ i830_uxa_copy(PixmapPtr dest, int src_x1, int src_y1, int dst_x1,
OUT_BATCH(intel->BR[13] | dst_pitch);
OUT_BATCH((dst_y1 << 16) | (dst_x1 & 0xffff));
OUT_BATCH((dst_y2 << 16) | (dst_x2 & 0xffff));
OUT_RELOC_PIXMAP(dest,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER,
0);
OUT_RELOC_PIXMAP_FENCED(dest,
I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER,
0);
OUT_BATCH((src_y1 << 16) | (src_x1 & 0xffff));
OUT_BATCH(src_pitch);
OUT_RELOC_PIXMAP(intel->render_source,
I915_GEM_DOMAIN_RENDER, 0,
0);
OUT_RELOC_PIXMAP_FENCED(intel->render_source,
I915_GEM_DOMAIN_RENDER, 0,
0);
ADVANCE_BATCH();
}