sna: Experiment with using reloc.handle as an index into the execbuffer

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-11-08 15:56:13 +00:00
parent 93d8dddbb9
commit 85ba7e9626
3 changed files with 44 additions and 7 deletions

View File

@ -301,6 +301,15 @@ if test "x$FASTRELOC" = xyes; then
AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
fi
AC_ARG_ENABLE(handle-lut,
AS_HELP_STRING([--enable-handle-lut],
[Enable use of "handle LUT" (experimental) [default=no]]),
[HANDLE_LUT="$enableval"],
[HANDLE_LUT=no])
if test "x$HANDLE_LUT" = xyes; then
AC_DEFINE(USE_HANDLE_LUT,1,[Assume "handle LUT" support])
fi
AC_ARG_ENABLE(async-swap,
AS_HELP_STRING([--enable-async-swap],
[Enable use of asynchronous swaps (experimental) [default=no]]),

View File

@ -71,6 +71,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_DUMP 0
#define SHOW_BATCH 0
@ -80,6 +81,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_FAST_RELOC 1
#endif
#ifndef USE_HANDLE_LUT
#undef DBG_NO_HANDLE_LUT
#define DBG_NO_HANDLE_LUT 1
#endif
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
@ -103,8 +109,10 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_NO_RELOC 24
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 25
#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
#define LOCAL_I915_EXEC_HANDLE_LUT (1<<11)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@ -668,6 +676,14 @@ static bool test_has_no_reloc(struct kgem *kgem)
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
static bool test_has_handle_lut(struct kgem *kgem)
{
if (DBG_NO_HANDLE_LUT)
return false;
return gem_param(kgem, LOCAL_I915_PARAM_HAS_HANDLE_LUT) > 0;
}
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
@ -859,6 +875,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
kgem->has_handle_lut = test_has_handle_lut(kgem);
DBG(("%s: has handle-lut? %d\n", __FUNCTION__,
kgem->has_handle_lut));
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
@ -1212,6 +1232,7 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
__FUNCTION__, bo->handle, kgem->nexec));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
bo->target_handle = kgem->has_handle_lut ? kgem->nexec : bo->handle;
exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
exec->handle = bo->handle;
exec->offset = bo->presumed_offset;
@ -1246,8 +1267,8 @@ static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo)
int n;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == 0) {
kgem->reloc[n].target_handle = bo->handle;
if (kgem->reloc[n].target_handle == ~0U) {
kgem->reloc[n].target_handle = bo->target_handle;
kgem->reloc[n].presumed_offset = bo->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + bo->presumed_offset;
@ -2047,9 +2068,11 @@ static void kgem_finish_buffers(struct kgem *kgem)
gem_write(kgem->fd, shrink->handle,
0, bo->used, bo->mem);
shrink->target_handle =
kgem->has_handle_lut ? bo->base.target_handle : shrink->handle;
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].target_handle == bo->base.handle) {
kgem->reloc[n].target_handle = shrink->handle;
if (kgem->reloc[n].target_handle == bo->base.target_handle) {
kgem->reloc[n].target_handle = shrink->target_handle;
kgem->reloc[n].presumed_offset = shrink->presumed_offset;
kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
kgem->reloc[n].delta + shrink->presumed_offset;
@ -2202,6 +2225,8 @@ void kgem_reset(struct kgem *kgem)
kgem->batch_flags = 0;
if (kgem->has_no_reloc)
kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
if (kgem->has_handle_lut)
kgem->batch_flags |= LOCAL_I915_EXEC_HANDLE_LUT;
kgem->next_request = __kgem_request_alloc();
@ -2227,7 +2252,7 @@ static int compact_batch_surface(struct kgem *kgem)
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == 0)
kgem->reloc[n].target_handle == ~0U)
kgem->reloc[n].delta -= shrink;
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
@ -2292,6 +2317,7 @@ void _kgem_submit(struct kgem *kgem)
kgem->exec[i].rsvd1 = 0;
kgem->exec[i].rsvd2 = 0;
rq->bo->target_handle = kgem->has_handle_lut ? i : handle;
rq->bo->exec = &kgem->exec[i];
rq->bo->rq = rq; /* useful sanity check */
list_add(&rq->bo->request, &rq->buffers);
@ -3895,7 +3921,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
}
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = bo->handle;
kgem->reloc[index].target_handle = bo->target_handle;
kgem->reloc[index].presumed_offset = bo->presumed_offset;
if (read_write_domain & 0x7ff) {
@ -3906,7 +3932,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
delta += bo->presumed_offset;
} else {
kgem->reloc[index].delta = delta;
kgem->reloc[index].target_handle = 0;
kgem->reloc[index].target_handle = ~0U;
kgem->reloc[index].presumed_offset = 0;
}
kgem->reloc[index].read_domains = read_write_domain >> 16;

View File

@ -64,6 +64,7 @@ struct kgem_bo {
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
uint32_t target_handle;
uint32_t presumed_offset;
uint32_t delta;
union {
@ -165,6 +166,7 @@ struct kgem {
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
uint32_t can_blt_cpu :1;