sna: Support a fast no relocation changed path

x11perf -copywinwin10 on gm45 with c2d L9400:
  before: 553,000 op/s
  after:  565,000 op/s

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-11-07 17:41:20 +00:00
parent b7d2fcf47a
commit 120fa0ef8d
3 changed files with 34 additions and 1 deletions

View File

@ -292,6 +292,15 @@ if test "x$USERPTR" = xyes; then
AC_DEFINE(USE_USERPTR,1,[Assume USERPTR support])
fi
AC_ARG_ENABLE(fast-reloc,
AS_HELP_STRING([--enable-fast-reloc],
[Enable use of "fast reloc" (experimental) [default=no]]),
[FASTRELOC="$enableval"],
[FASTRELOC=no])
if test "x$FASTRELOC" = xyes; then
AC_DEFINE(USE_FASTRELOC,1,[Assume "fast reloc" support])
fi
AC_ARG_ENABLE(async-swap,
AS_HELP_STRING([--enable-async-swap],
[Enable use of asynchronous swaps (experimental) [default=no]]),

View File

@ -70,10 +70,16 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_NO_SECURE_BATCHES 0
#define DBG_NO_FAST_RELOC 0
#define DBG_DUMP 0
#define SHOW_BATCH 0
#ifndef USE_FASTRELOC
#undef DBG_NO_FAST_RELOC
#define DBG_NO_FAST_RELOC 1
#endif
/* Worst case seems to be 965gm where we cannot write within a cacheline that
* is being simultaneously being read by the GPU, or within the sampler
* prefetch. In general, the chipsets seem to have a requirement that sampler
@ -96,6 +102,9 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_NO_RELOC 24
#define LOCAL_I915_EXEC_NO_RELOC (1<<10)
#define LOCAL_I915_GEM_USERPTR 0x32
#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
@ -651,6 +660,14 @@ static bool test_has_execbuffer2(struct kgem *kgem)
errno == EFAULT);
}
static bool test_has_no_reloc(struct kgem *kgem)
{
if (DBG_NO_FAST_RELOC)
return false;
return gem_param(kgem, LOCAL_I915_PARAM_HAS_NO_RELOC) > 0;
}
static bool test_has_semaphores_enabled(struct kgem *kgem)
{
FILE *file;
@ -838,6 +855,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
DBG(("%s: has userptr? %d\n", __FUNCTION__,
kgem->has_userptr));
kgem->has_no_reloc = test_has_no_reloc(kgem);
DBG(("%s: has no-reloc? %d\n", __FUNCTION__,
kgem->has_no_reloc));
kgem->has_semaphores = false;
if (kgem->has_blt && test_has_semaphores_enabled(kgem))
kgem->has_semaphores = true;
@ -2177,8 +2198,10 @@ void kgem_reset(struct kgem *kgem)
kgem->nbatch = 0;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->batch_flags = 0;
kgem->flush = 0;
kgem->batch_flags = 0;
if (kgem->has_no_reloc)
kgem->batch_flags |= LOCAL_I915_EXEC_NO_RELOC;
kgem->next_request = __kgem_request_alloc();

View File

@ -164,6 +164,7 @@ struct kgem {
uint32_t has_secure_batches :1;
uint32_t has_cacheing :1;
uint32_t has_llc :1;
uint32_t has_no_reloc :1;
uint32_t can_blt_cpu :1;