sna: Experimental support for write-combining mmaps

If we have a linear buffer, we can request the kernel mmap it directly
with write-combining without having to pin it into the GTT. This allows
us to efficiently upload very large buffers, and can avoid the dreaded
aperture thrashing.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2014-10-23 10:34:44 +01:00
parent 33764685cb
commit 87802b3402
7 changed files with 266 additions and 126 deletions

View File

@ -760,6 +760,16 @@ if test "x$RENDERNODE" = "xyes"; then
xp_msg="$xp_msg rendernode"
fi
AC_ARG_ENABLE(wc-mmap,
AS_HELP_STRING([--enable-wc-map],
[Enable use of WriteCombining mmaps [default=no]]),
[WC_MMAP="$enableval"],
[WC_MMAP="no"])
if test "x$WC_MMAP" = "xyes"; then
AC_DEFINE(USE_WC_MMAP,1,[Enable use of WriteCombining mmaps])
xp_msg="$xp_msg mmap(wc)"
fi
AC_ARG_ENABLE(create2,
AS_HELP_STRING([--enable-create2],
[Enable use of create2 ioctl (experimental) [default=no]]),

View File

@ -83,6 +83,7 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define DBG_NO_FAST_RELOC 0
#define DBG_NO_HANDLE_LUT 0
#define DBG_NO_WT 0
#define DBG_NO_WC_MMAP 0
#define DBG_DUMP 0
#define DBG_NO_MALLOC_CACHE 0
@ -95,6 +96,11 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define SHOW_BATCH_BEFORE 0
#define SHOW_BATCH_AFTER 0
#if !USE_WC_MMAP
#undef DBG_NO_WC_MMAP
#define DBG_NO_WC_MMAP 1
#endif
#if 0
#define ASSERT_IDLE(kgem__, handle__) assert(!__kgem_busy(kgem__, handle__))
#define ASSERT_MAYBE_IDLE(kgem__, handle__, expect__) assert(!(expect__) || !__kgem_busy(kgem__, handle__))
@ -127,12 +133,14 @@ search_snoop_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags);
#define LOCAL_I915_PARAM_HAS_BLT 11
#define LOCAL_I915_PARAM_HAS_RELAXED_FENCING 12
#define LOCAL_I915_PARAM_HAS_RELAXED_DELTA 15
#define LOCAL_I915_PARAM_HAS_LLC 17
#define LOCAL_I915_PARAM_HAS_SEMAPHORES 20
#define LOCAL_I915_PARAM_HAS_SECURE_BATCHES 23
#define LOCAL_I915_PARAM_HAS_PINNED_BATCHES 24
#define LOCAL_I915_PARAM_HAS_NO_RELOC 25
#define LOCAL_I915_PARAM_HAS_HANDLE_LUT 26
#define LOCAL_I915_PARAM_HAS_WT 27
#define LOCAL_I915_PARAM_MMAP_VERSION 29
#define LOCAL_I915_EXEC_IS_PINNED (1<<10)
#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
@ -179,6 +187,17 @@ struct local_i915_gem_caching {
#define LOCAL_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_SET_CACHING, struct local_i915_gem_caching)
#define LOCAL_IOCTL_I915_GEM_GET_CACHING DRM_IOW(DRM_COMMAND_BASE + LOCAL_I915_GEM_GET_CACHING, struct local_i915_gem_caching)
struct local_i915_gem_mmap2 {
uint32_t handle;
uint32_t pad;
uint64_t offset;
uint64_t size;
uint64_t addr_ptr;
uint64_t flags;
#define I915_MMAP_WC 0x1
};
#define LOCAL_IOCTL_I915_GEM_MMAP_v2 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct local_i915_gem_mmap2)
struct kgem_buffer {
struct kgem_bo base;
void *mem;
@ -411,7 +430,7 @@ static bool __kgem_throttle_retire(struct kgem *kgem, unsigned flags)
static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
struct drm_i915_gem_mmap_gtt mmap_arg;
struct drm_i915_gem_mmap_gtt gtt;
void *ptr;
int err;
@ -419,12 +438,13 @@ static void *__kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
assert(num_pages(bo) <= kgem->aperture_mappable / 4);
assert(num_pages(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
VG_CLEAR(gtt);
retry_gtt:
VG_CLEAR(mmap_arg);
mmap_arg.handle = bo->handle;
if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg))) {
gtt.handle = bo->handle;
if ((err = do_ioctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gtt))) {
assert(err != EINVAL);
(void)__kgem_throttle_retire(kgem, 0);
@ -441,7 +461,7 @@ retry_gtt:
retry_mmap:
ptr = mmap(0, bytes(bo), PROT_READ | PROT_WRITE, MAP_SHARED,
kgem->fd, mmap_arg.offset);
kgem->fd, gtt.offset);
if (ptr == MAP_FAILED) {
err = errno;
assert(err != EINVAL);
@ -457,7 +477,50 @@ retry_mmap:
ptr = NULL;
}
return ptr;
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to
* frequently flush CPU damage to their GPU bo.
*/
return bo->map__gtt = ptr;
}
static void *__kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
{
struct local_i915_gem_mmap2 wc;
int err;
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__,
bo->handle, bytes(bo)));
assert(bo->proxy == NULL);
assert(!bo->snoop);
assert(kgem->has_wc_mmap);
VG_CLEAR(wc);
retry_wc:
wc.handle = bo->handle;
wc.offset = 0;
wc.size = bytes(bo);
wc.flags = I915_MMAP_WC;
if ((err = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc))) {
assert(err != EINVAL);
if (__kgem_throttle_retire(kgem, 0))
goto retry_wc;
if (kgem_cleanup_cache(kgem))
goto retry_wc;
ERR(("%s: failed to mmap handle=%d, %d bytes, into CPU(wc) domain: %d\n",
__FUNCTION__, bo->handle, bytes(bo), -err));
return NULL;
}
VG(VALGRIND_MAKE_MEM_DEFINED(wc.addr_ptr, bytes(bo)));
DBG(("%s: caching CPU(wc) vma for %d\n", __FUNCTION__, bo->handle));
return bo->map__wc = (void *)(uintptr_t)wc.addr_ptr;
}
static int gem_write(int fd, uint32_t handle,
@ -1029,9 +1092,7 @@ static bool test_has_llc(struct kgem *kgem)
if (DBG_NO_LLC)
return false;
#if defined(I915_PARAM_HAS_LLC) /* Expected in libdrm-2.4.31 */
has_llc = gem_param(kgem, I915_PARAM_HAS_LLC);
#endif
has_llc = gem_param(kgem, LOCAL_I915_PARAM_HAS_LLC);
if (has_llc == -1) {
DBG(("%s: no kernel/drm support for HAS_LLC, assuming support for LLC based on GPU generation\n", __FUNCTION__));
has_llc = kgem->gen >= 060;
@ -1040,6 +1101,28 @@ static bool test_has_llc(struct kgem *kgem)
return has_llc;
}
static bool test_has_wc_mmap(struct kgem *kgem)
{
struct local_i915_gem_mmap2 wc;
bool ret;
if (DBG_NO_WC_MMAP)
return false;
if (gem_param(kgem, LOCAL_I915_PARAM_MMAP_VERSION) < 1)
return false;
VG_CLEAR(wc);
wc.handle = gem_create(kgem->fd, 1);
wc.offset = 0;
wc.size = 4096;
wc.flags = I915_MMAP_WC;
ret = do_ioctl(kgem->fd, LOCAL_IOCTL_I915_GEM_MMAP_v2, &wc) == 0;
gem_close(kgem->fd, wc.handle);
return ret;
}
static bool test_has_caching(struct kgem *kgem)
{
uint32_t handle;
@ -1428,6 +1511,10 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
DBG(("%s: has write-through caching for scanouts? %d\n", __FUNCTION__,
kgem->has_wt));
kgem->has_wc_mmap = test_has_wc_mmap(kgem);
DBG(("%s: has wc-mmapping? %d\n", __FUNCTION__,
kgem->has_wc_mmap));
kgem->has_caching = test_has_caching(kgem);
DBG(("%s: has set-cache-level? %d\n", __FUNCTION__,
kgem->has_caching));
@ -1975,17 +2062,23 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
DBG(("%s: releasing %p:%p vma for handle=%d, count=%d\n",
__FUNCTION__, bo->map__gtt, bo->map__cpu,
bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL].count));
bo->handle, list_is_empty(&bo->vma) ? 0 : kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count));
if (!list_is_empty(&bo->vma)) {
_list_del(&bo->vma);
kgem->vma[bo->map__gtt == NULL].count--;
kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
}
if (bo->map__gtt)
munmap(MAP(bo->map__gtt), bytes(bo));
if (bo->map__cpu)
munmap(bo->map__gtt, bytes(bo));
if (bo->map__wc) {
VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
munmap(bo->map__wc, bytes(bo));
}
if (bo->map__cpu) {
VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
munmap(MAP(bo->map__cpu), bytes(bo));
}
_list_del(&bo->list);
_list_del(&bo->request);
@ -2021,25 +2114,24 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
if (bucket(bo) >= NUM_CACHE_BUCKETS) {
if (bo->map__gtt) {
munmap(MAP(bo->map__gtt), bytes(bo));
munmap(bo->map__gtt, bytes(bo));
bo->map__gtt = NULL;
}
list_move(&bo->list, &kgem->large_inactive);
} else {
assert(bo->flush == false);
assert(list_is_empty(&bo->vma));
list_move(&bo->list, &kgem->inactive[bucket(bo)]);
if (bo->map__gtt) {
if (!kgem_bo_can_map(kgem, bo)) {
munmap(MAP(bo->map__gtt), bytes(bo));
bo->map__gtt = NULL;
}
if (bo->map__gtt) {
list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
kgem->vma[0].count++;
}
if (bo->map__gtt && !kgem_bo_can_map(kgem, bo)) {
munmap(bo->map__gtt, bytes(bo));
bo->map__gtt = NULL;
}
if (bo->map__cpu && !bo->map__gtt) {
if (bo->map__gtt || (bo->map__wc && !bo->tiling)) {
list_add(&bo->vma, &kgem->vma[0].inactive[bucket(bo)]);
kgem->vma[0].count++;
}
if (bo->map__cpu && list_is_empty(&bo->vma)) {
list_add(&bo->vma, &kgem->vma[1].inactive[bucket(bo)]);
kgem->vma[1].count++;
}
@ -2087,9 +2179,9 @@ inline static void kgem_bo_remove_from_inactive(struct kgem *kgem,
assert(bo->rq == NULL);
assert(bo->exec == NULL);
if (!list_is_empty(&bo->vma)) {
assert(bo->map__gtt || bo->map__cpu);
assert(bo->map__gtt || bo->map__wc || bo->map__cpu);
list_del(&bo->vma);
kgem->vma[bo->map__gtt == NULL].count--;
kgem->vma[bo->map__gtt == NULL && bo->map__wc == NULL].count--;
}
}
@ -2824,6 +2916,7 @@ static void kgem_commit(struct kgem *kgem)
assert(list_is_empty(&rq->buffers));
assert(rq->bo->map__gtt == NULL);
assert(rq->bo->map__wc == NULL);
assert(rq->bo->map__cpu == NULL);
gem_close(kgem->fd, rq->bo->handle);
kgem_cleanup_cache(kgem);
@ -3985,7 +4078,7 @@ discard:
__FUNCTION__, for_cpu ? "cpu" : "gtt"));
cache = &kgem->vma[for_cpu].inactive[cache_bucket(num_pages)];
list_for_each_entry(bo, cache, vma) {
assert(for_cpu ? bo->map__cpu : bo->map__gtt);
assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
assert(bucket(bo) == cache_bucket(num_pages));
assert(bo->proxy == NULL);
assert(bo->rq == NULL);
@ -4067,10 +4160,10 @@ discard:
bo->pitch = 0;
}
if (bo->map__gtt || bo->map__cpu) {
if (bo->map__gtt || bo->map__wc || bo->map__cpu) {
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
if (for_cpu ? bo->map__cpu : bo->map__gtt){
if (for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc)){
if (first != NULL)
break;
@ -4477,7 +4570,7 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
flags |= KGEM_CAN_CREATE_CPU;
if (size > 4096 && size <= kgem->max_gpu_size)
flags |= KGEM_CAN_CREATE_GPU;
if (size <= PAGE_SIZE*kgem->aperture_mappable/4)
if (size <= PAGE_SIZE*kgem->aperture_mappable/4 || kgem->has_wc_mmap)
flags |= KGEM_CAN_CREATE_GTT;
if (size > kgem->large_object_size)
flags |= KGEM_CAN_CREATE_LARGE;
@ -4903,7 +4996,7 @@ large_inactive:
assert(bucket(bo) == bucket);
assert(bo->refcnt == 0);
assert(!bo->scanout);
assert(for_cpu ? bo->map__cpu : bo->map__gtt);
assert(for_cpu ? !!bo->map__cpu : (bo->map__gtt || bo->map__wc));
assert(bo->rq == NULL);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->request));
@ -5996,7 +6089,6 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
i = 0;
while (kgem->vma[type].count > 0) {
struct kgem_bo *bo = NULL;
void **ptr;
for (j = 0;
bo == NULL && j < ARRAY_SIZE(kgem->vma[type].inactive);
@ -6011,12 +6103,23 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
DBG(("%s: discarding inactive %s vma cache for %d\n",
__FUNCTION__, type ? "CPU" : "GTT", bo->handle));
ptr = type ? &bo->map__cpu : &bo->map__gtt;
assert(bo->rq == NULL);
if (type) {
VG(VALGRIND_MAKE_MEM_NOACCESS(MAP(bo->map__cpu), bytes(bo)));
munmap(MAP(bo->map__cpu), bytes(bo));
bo->map__cpu = NULL;
} else {
if (bo->map__wc) {
VG(VALGRIND_MAKE_MEM_NOACCESS(bo->map__wc, bytes(bo)));
munmap(bo->map__wc, bytes(bo));
bo->map__wc = NULL;
}
if (bo->map__gtt) {
munmap(bo->map__gtt, bytes(bo));
bo->map__gtt = NULL;
}
}
VG(if (type) VALGRIND_MAKE_MEM_NOACCESS(MAP(*ptr), bytes(bo)));
munmap(MAP(*ptr), bytes(bo));
*ptr = NULL;
list_del(&bo->vma);
kgem->vma[type].count--;
@ -6028,10 +6131,28 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
}
}
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
static void *__kgem_bo_map__gtt_or_wc(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
if (bo->tiling || !kgem->has_wc_mmap) {
ptr = bo->map__gtt;
if (ptr == NULL)
ptr = __kgem_bo_map__gtt(kgem, bo);
} else {
ptr = bo->map__wc;
if (ptr == NULL)
ptr = __kgem_bo_map__wc(kgem, bo);
}
return ptr;
}
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
@ -6046,26 +6167,7 @@ void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo)
return kgem_bo_map__cpu(kgem, bo);
}
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(num_pages(bo) <= kgem->aperture_mappable / 2);
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
return ptr;
return __kgem_bo_map__gtt_or_wc(kgem, bo);
}
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
@ -6091,25 +6193,7 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
return ptr;
}
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(num_pages(bo) <= kgem->aperture_mappable / 2);
assert(kgem->gen != 021 || bo->tiling != I915_TILING_Y);
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
ptr = __kgem_bo_map__gtt_or_wc(kgem, bo);
if (bo->domain != DOMAIN_GTT || FORCE_MMAP_SYNC & (1 << DOMAIN_GTT)) {
struct drm_i915_gem_set_domain set_domain;
@ -6137,8 +6221,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
@ -6148,26 +6230,24 @@ void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo)
assert_tiling(kgem, bo);
assert(!bo->purged || bo->reusable);
ptr = MAP(bo->map__gtt);
if (ptr == NULL) {
assert(num_pages(bo) <= kgem->aperture_mappable / 4);
return __kgem_bo_map__gtt_or_wc(kgem, bo);
}
kgem_trim_vma_cache(kgem, MAP_GTT, bucket(bo));
void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, offset=%ld, tiling=%d, map=%p:%p, domain=%d\n", __FUNCTION__,
bo->handle, (long)bo->presumed_offset, bo->tiling, bo->map__gtt, bo->map__cpu, bo->domain));
ptr = __kgem_bo_map__gtt(kgem, bo);
if (ptr == NULL)
return NULL;
assert(bo->proxy == NULL);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
assert_tiling(kgem, bo);
assert(!bo->purged || bo->reusable);
/* Cache this mapping to avoid the overhead of an
* excruciatingly slow GTT pagefault. This is more an
* issue with compositing managers which need to frequently
* flush CPU damage to their GPU bo.
*/
bo->map__gtt = ptr;
DBG(("%s: caching GTT vma for %d\n", __FUNCTION__, bo->handle));
}
if (bo->map__wc)
return bo->map__wc;
return ptr;
return __kgem_bo_map__wc(kgem, bo);
}
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo)
@ -6512,6 +6592,7 @@ init_buffer_from_bo(struct kgem_buffer *bo, struct kgem_bo *old)
__FUNCTION__, old->handle));
assert(old->proxy == NULL);
assert(list_is_empty(&old->list));
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
@ -6817,7 +6898,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
assert(alloc);
alloc /= PAGE_SIZE;
if (alloc > kgem->aperture_mappable / 4)
if (alloc > kgem->aperture_mappable / 4 && !kgem->has_wc_mmap)
flags &= ~KGEM_BUFFER_INPLACE;
if (kgem->has_llc &&
@ -7049,7 +7130,7 @@ init:
assert(!bo->need_io || !bo->base.needs_flush);
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
assert(bo->mem);
assert(bo->mmapped != MMAPPED_GTT || MAP(bo->base.map__gtt) == bo->mem);
assert(bo->mmapped != MMAPPED_GTT || bo->base.map__gtt == bo->mem || bo->base.map__wc == bo->mem);
assert(bo->mmapped != MMAPPED_CPU || MAP(bo->base.map__cpu) == bo->mem);
bo->used = size;

View File

@ -62,6 +62,7 @@ struct kgem_bo {
void *map__cpu;
void *map__gtt;
void *map__wc;
#define MAP(ptr) ((void*)((uintptr_t)(ptr) & ~3))
struct kgem_bo_binding {
@ -192,6 +193,7 @@ struct kgem {
uint32_t has_wt :1;
uint32_t has_no_reloc :1;
uint32_t has_handle_lut :1;
uint32_t has_wc_mmap :1;
uint32_t can_blt_cpu :1;
uint32_t can_render_y :1;
@ -504,6 +506,7 @@ uint64_t kgem_add_reloc64(struct kgem *kgem,
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__wc(struct kgem *kgem, struct kgem_bo *bo);
void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__debug(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
@ -715,13 +718,16 @@ static inline bool kgem_bo_mapped(struct kgem *kgem, struct kgem_bo *bo)
if (bo->tiling == I915_TILING_NONE && (bo->domain == DOMAIN_CPU || kgem->has_llc))
return bo->map__cpu != NULL;
if (bo->tiling == I915_TILING_NONE && bo->map__wc)
return true;
return bo->map__gtt != NULL;
}
static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d, map=%p:%p, tiling=%d, domain=%d, offset=%ld\n",
__FUNCTION__, bo->handle, bo->map__gtt, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset));
DBG(("%s: handle=%d, map=%p:%p:%p, tiling=%d, domain=%d, offset=%ld\n",
__FUNCTION__, bo->handle, bo->map__gtt, bo->map__wc, bo->map__cpu, bo->tiling, bo->domain, (long)bo->presumed_offset));
if (!bo->tiling && (kgem->has_llc || bo->domain == DOMAIN_CPU))
return true;
@ -734,6 +740,9 @@ static inline bool kgem_bo_can_map(struct kgem *kgem, struct kgem_bo *bo)
if (kgem->gen == 021 && bo->tiling == I915_TILING_Y)
return false;
if (!bo->tiling && kgem->has_wc_mmap)
return true;
return __kgem_bo_num_pages(bo) <= kgem->aperture_mappable / 4;
}

View File

@ -675,7 +675,8 @@ void sna_pixmap_destroy(PixmapPtr pixmap);
#define assert_pixmap_map(pixmap, priv) do { \
assert(priv->mapped != MAPPED_NONE || pixmap->devPrivate.ptr == PTR(priv->ptr)); \
assert(priv->mapped == MAPPED_NONE || pixmap->devPrivate.ptr == (priv->mapped == MAPPED_CPU ? MAP(priv->gpu_bo->map__cpu) : MAP(priv->gpu_bo->map__gtt))); \
assert(priv->mapped != MAPPED_CPU || pixmap->devPrivate.ptr == MAP(priv->gpu_bo->map__cpu)); \
assert(priv->mapped != MAPPED_GTT || pixmap->devPrivate.ptr == priv->gpu_bo->map__gtt || pixmap->devPrivate.ptr == priv->gpu_bo->map__wc); \
} while (0)
static inline void sna_pixmap_unmap(PixmapPtr pixmap, struct sna_pixmap *priv)

View File

@ -4588,7 +4588,7 @@ can_create_upload_tiled_x(struct sna *sna,
if (sna->kgem.has_llc)
return true;
if (sna_pixmap_default_tiling(sna, pixmap))
if (!sna->kgem.has_wc_mmap && sna_pixmap_default_tiling(sna, pixmap))
return false;
return true;
@ -4608,13 +4608,21 @@ create_upload_tiled_x(struct sna *sna,
assert(priv->gpu_bo == NULL);
assert(priv->gpu_damage == NULL);
create = CREATE_CPU_MAP | CREATE_INACTIVE;
if (!sna->kgem.has_llc)
create |= CREATE_CACHED;
if (sna->kgem.has_llc)
create = CREATE_CPU_MAP | CREATE_INACTIVE;
else if (sna->kgem.has_wc_mmap)
create = CREATE_GTT_MAP | CREATE_INACTIVE;
else
create = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_CACHED;
return sna_pixmap_alloc_gpu(sna, pixmap, priv, create);
}
static bool can_upload__tiled_x(struct kgem *kgem, struct kgem_bo *bo)
{
return kgem_bo_can_map__cpu(kgem, bo, true) || kgem->has_wc_mmap;
}
static bool
try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
int x, int y, int w, int h, char *bits, int stride)
@ -4625,7 +4633,7 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
uint8_t *dst;
int n;
if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) {
if (!can_upload__tiled_x(&sna->kgem, priv->gpu_bo)) {
DBG(("%s: no, cannot map through the CPU\n", __FUNCTION__));
return false;
}
@ -4638,11 +4646,19 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
return false;
dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
if (kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, true)) {
dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
} else {
dst = kgem_bo_map__wc(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
kgem_bo_sync__gtt(&sna->kgem, priv->gpu_bo);
}
box = region_rects(region);
n = region_num_rects(region);
@ -4712,12 +4728,14 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
} while (--n);
if (!priv->shm) {
assert(dst == MAP(priv->gpu_bo->map__cpu));
pixmap->devPrivate.ptr = dst;
pixmap->devKind = priv->gpu_bo->pitch;
priv->mapped = MAPPED_CPU;
if (dst == MAP(priv->gpu_bo->map__cpu)) {
priv->mapped = MAPPED_CPU;
priv->cpu = true;
} else
priv->mapped = MAPPED_GTT;
assert_pixmap_map(pixmap, priv);
priv->cpu = true;
}
}
@ -6129,7 +6147,7 @@ upload_inplace:
return false;
}
if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true) ||
if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo) ||
__kgem_bo_is_busy(&sna->kgem, dst_priv->gpu_bo)) {
if (replaces && !dst_priv->pinned) {
unsigned create;
@ -6156,7 +6174,7 @@ upload_inplace:
return false;
}
if (!kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) {
if (!can_upload__tiled_x(&sna->kgem, dst_priv->gpu_bo)) {
DBG(("%s - no, cannot map dst for reads into the CPU\n", __FUNCTION__));
return false;
}
@ -6169,13 +6187,23 @@ upload_inplace:
return false;
}
ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo);
if (ptr == NULL) {
DBG(("%s - no, map failed\n", __FUNCTION__));
return false;
}
if (kgem_bo_can_map__cpu(&sna->kgem, dst_priv->gpu_bo, true)) {
ptr = kgem_bo_map__cpu(&sna->kgem, dst_priv->gpu_bo);
if (ptr == NULL) {
DBG(("%s - no, map failed\n", __FUNCTION__));
return false;
}
kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo);
kgem_bo_sync__cpu(&sna->kgem, dst_priv->gpu_bo);
} else {
ptr = kgem_bo_map__wc(&sna->kgem, dst_priv->gpu_bo);
if (ptr == NULL) {
DBG(("%s - no, map failed\n", __FUNCTION__));
return false;
}
kgem_bo_sync__gtt(&sna->kgem, dst_priv->gpu_bo);
}
if (!DAMAGE_IS_ALL(dst_priv->gpu_damage)) {
assert(!dst_priv->clear);

View File

@ -5032,7 +5032,7 @@ sna_cursor_pre_init(struct sna *sna)
#define DRM_CAP_CURSOR_WIDTH 8
#define DRM_CAP_CURSOR_HEIGHT 9
#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
#define I915_PARAM_HAS_COHERENT_PHYS_GTT 30
sna->cursor.max_size = 64;

View File

@ -652,6 +652,9 @@ static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
break;
}
if (kgem->has_wc_mmap)
return true;
return kgem_bo_can_map__cpu(kgem, bo, true);
}
@ -663,14 +666,22 @@ write_boxes_inplace__tiled(struct kgem *kgem,
{
uint8_t *dst;
assert(kgem_bo_can_map__cpu(kgem, bo, true));
assert(kgem->has_wc_mmap || kgem_bo_can_map__cpu(kgem, bo, true));
assert(bo->tiling != I915_TILING_Y);
dst = kgem_bo_map__cpu(kgem, bo);
if (dst == NULL)
return false;
if (kgem_bo_can_map__cpu(kgem, bo, true)) {
dst = kgem_bo_map__cpu(kgem, bo);
if (dst == NULL)
return false;
kgem_bo_sync__cpu(kgem, bo);
kgem_bo_sync__cpu(kgem, bo);
} else {
dst = kgem_bo_map__wc(kgem, bo);
if (dst == NULL)
return false;
kgem_bo_sync__gtt(kgem, bo);
}
if (sigtrap_get())
return false;