From 85ba7e96268dbb8da4bb34078333695a451c6570 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 8 Nov 2012 15:56:13 +0000 Subject: [PATCH] sna: Experiment with using reloc.handle as an index into the execbuffer Signed-off-by: Chris Wilson --- configure.ac | 9 +++++++++ src/sna/kgem.c | 40 +++++++++++++++++++++++++++++++++------- src/sna/kgem.h | 2 ++ 3 files changed, 44 insertions(+), 7 deletions(-) diff --git a/configure.ac b/configure.ac index 9ea1e3c6..ce3b0073 100644 --- a/configure.ac +++ b/configure.ac @@ -301,6 +301,15 @@ if test "x$FASTRELOC" = xyes; then AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support]) fi +AC_ARG_ENABLE(handle-lut, + AS_HELP_STRING([--enable-handle-lut], + [Enable use of "handle LUT" (experimental) [default=no]]), + [HANDLE_LUT="$enableval"], + [HANDLE_LUT=no]) +if test "x$HANDLE_LUT" = xyes; then + AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support]) +fi + AC_ARG_ENABLE(async-swap, AS_HELP_STRING([--enable-async-swap], [Enable use of asynchronous swaps (experimental) [default=no]]), diff --git a/src/sna/kgem.c b/src/sna/kgem.c index e643b857..e2c5da83 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -71,6 +71,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_RELAXED_FENCING 0 #define DBG_NO_SECURE_BATCHES 0 #define DBG_NO_FAST_RELOC 0 +#define DBG_NO_HANDLE_LUT 0 #define DBG_DUMP 0 #define SHOW_BATCH 0 @@ -80,6 +81,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define DBG_NO_FAST_RELOC 1 #endif +#ifndef USE_HANDLE_LUT +#undef DBG_NO_HANDLE_LUT +#define DBG_NO_HANDLE_LUT 1 +#endif + /* Worst case seems to be 965gm where we cannot write within a cacheline that * is being simultaneously being read by the GPU, or within the sampler * prefetch. In general, the chipsets seem to have a requirement that sampler @@ -103,8 +109,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags); #define LOCAL_I915_PARAM_HAS_SEMAPHORES 20 #define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23 #define LOCAL_I915_PARAM_HAS_NO_RELOC 24 +#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 25 #define LOCAL_I915_EXEC_NO_RELOC (1<<10) +#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11) #define LOCAL_I915_GEM_USERPTR 0x32 #define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr) @@ -668,6 +676,14 @@ static bool test_has_no_reloc(struct kgem *kgem) return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0; } +static bool test_has_handle_lut(struct kgem *kgem) +{ + if (DBG_NO_HANDLE_LUT) + return false; + + return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0; +} + static bool test_has_semaphores_enabled(struct kgem *kgem) { FILE *file; @@ -859,6 +875,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) DBG(("%s: has no-reloc? %d\n", __FUNCTION__, kgem->has_no_reloc)); + kgem->has_handle_lut = test_has_handle_lut(kgem); + DBG(("%s: has handle-lut? %d\n", __FUNCTION__, + kgem->has_handle_lut)); + kgem->has_semaphores = false; if (kgem->has_blt && test_has_semaphores_enabled(kgem)) kgem->has_semaphores = true; @@ -1212,6 +1232,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) __FUNCTION__, bo->handle, kgem->nexec)); assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle; exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); exec->handle = bo->handle; exec->offset = bo->presumed_offset; @@ -1246,8 +1267,8 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) int n; for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == 0) { - kgem->reloc[n].target_handle = bo->handle; + if (kgem->reloc[n].target_handle == ~0U) { + kgem->reloc[n].target_handle = bo->target_handle; kgem->reloc[n].presumed_offset = bo->presumed_offset; kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = kgem->reloc[n].delta + bo->presumed_offset; @@ -2047,9 +2068,11 @@ static void kgem_finish_buffers(struct kgem *kgem) gem_write(kgem->fd, shrink->handle, 0, bo->used, bo->mem); + shrink->target_handle = + kgem->has_handle_lut ? bo->base.target_handle : shrink->handle; for (n = 0; n < kgem->nreloc; n++) { - if (kgem->reloc[n].target_handle == bo->base.handle) { - kgem->reloc[n].target_handle = shrink->handle; + if (kgem->reloc[n].target_handle == bo->base.target_handle) { + kgem->reloc[n].target_handle = shrink->target_handle; kgem->reloc[n].presumed_offset = shrink->presumed_offset; kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = kgem->reloc[n].delta + shrink->presumed_offset; @@ -2202,6 +2225,8 @@ void kgem_reset(struct kgem *kgem) kgem->batch_flags = 0; if (kgem->has_no_reloc) kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC; + if (kgem->has_handle_lut) + kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT; kgem->next_request = __kgem_request_alloc(); @@ -2227,7 +2252,7 @@ static int compact_batch_surface(struct kgem *kgem) shrink *= sizeof(uint32_t); for (n = 0; n < kgem->nreloc; n++) { if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION && - kgem->reloc[n].target_handle == 0) + kgem->reloc[n].target_handle == ~0U) kgem->reloc[n].delta -= shrink; if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch) @@ -2292,6 +2317,7 @@ void _kgem_submit(struct kgem *kgem) kgem->exec[i].rsvd1 = 0; kgem->exec[i].rsvd2 = 0; + rq->bo->target_handle = kgem->has_handle_lut ? i : handle; rq->bo->exec = &kgem->exec[i]; rq->bo->rq = rq; /* useful sanity check */ list_add(&rq->bo->request, &rq->buffers); @@ -3895,7 +3921,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, } kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = bo->handle; + kgem->reloc[index].target_handle = bo->target_handle; kgem->reloc[index].presumed_offset = bo->presumed_offset; if (read_write_domain & 0x7ff) { @@ -3906,7 +3932,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, delta += bo->presumed_offset; } else { kgem->reloc[index].delta = delta; - kgem->reloc[index].target_handle = 0; + kgem->reloc[index].target_handle = ~0U; kgem->reloc[index].presumed_offset = 0; } kgem->reloc[index].read_domains = read_write_domain >> 16; diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 8789b55d..b42a8e0a 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -64,6 +64,7 @@ struct kgem_bo { uint32_t unique_id; uint32_t refcnt; uint32_t handle; + uint32_t target_handle; uint32_t presumed_offset; uint32_t delta; union { @@ -165,6 +166,7 @@ struct kgem { uint32_t has_cacheing :1; uint32_t has_llc :1; uint32_t has_no_reloc :1; + uint32_t has_handle_lut :1; uint32_t can_blt_cpu :1;