From e583af9cca4ad2e5643317447c6b065d3ee7d11e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 25 Jan 2012 23:04:50 +0000 Subject: [PATCH] sna: Experiment with creating large objects as CPU bo Even on non-LLC systems if we can prevent the migration of such objects, we can still benefit immensely from being able to map them into the GTT as required. Signed-off-by: Chris Wilson --- src/sna/kgem.c | 103 ++++++++++++++++++++++++++++---------------- src/sna/kgem.h | 10 ++--- src/sna/sna_accel.c | 58 ++++++++++++------------- 3 files changed, 101 insertions(+), 70 deletions(-) diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 0955a5df..1bcda22a 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -632,6 +632,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) aperture.aper_size = 64*1024*1024; (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + kgem->aperture_total = aperture.aper_size; kgem->aperture_high = aperture.aper_size * 3/4; kgem->aperture_low = aperture.aper_size * 1/4; DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__, @@ -657,12 +658,17 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) * disable dual-stream mode */ kgem->min_alignment = 64; - kgem->max_object_size = kgem->aperture_mappable / 2; - if (kgem->max_object_size > kgem->aperture_low) - kgem->max_object_size = kgem->aperture_low; - if (kgem->max_object_size > MAX_OBJECT_SIZE) - kgem->max_object_size = MAX_OBJECT_SIZE; - DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size)); + kgem->max_gpu_size = kgem->aperture_mappable / 2; + if (kgem->max_gpu_size > kgem->aperture_low) + kgem->max_gpu_size = kgem->aperture_low; + if (kgem->max_gpu_size > MAX_OBJECT_SIZE) + kgem->max_gpu_size = MAX_OBJECT_SIZE; + + kgem->max_cpu_size = kgem->aperture_total / 2; + if (kgem->max_cpu_size > MAX_OBJECT_SIZE) + kgem->max_cpu_size = MAX_OBJECT_SIZE; + DBG(("%s: max object size (tiled=%d, linear=%d)\n", + __FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size)); kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2; if ((int)kgem->fence_max < 0) @@ -979,6 +985,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) goto destroy; } + if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU) + kgem_bo_release_map(kgem, bo); + assert(list_is_empty(&bo->vma)); assert(list_is_empty(&bo->list)); assert(bo->vmap == false && bo->sync == false); @@ -1010,6 +1019,10 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) if (!IS_CPU_MAP(bo->map)) { if (!kgem_bo_set_purgeable(kgem, bo)) goto destroy; + + if (!kgem->has_llc && bo->domain == DOMAIN_CPU) + goto destroy; + DBG(("%s: handle=%d, purged\n", __FUNCTION__, bo->handle)); } @@ -1121,8 +1134,11 @@ bool kgem_retire(struct kgem *kgem) if (kgem_bo_set_purgeable(kgem, rq->bo)) { kgem_bo_move_to_inactive(kgem, rq->bo); retired = true; - } else + } else { + DBG(("%s: closing %d\n", + __FUNCTION__, rq->bo->handle)); kgem_bo_free(kgem, rq->bo); + } _list_del(&rq->list); free(rq); @@ -1679,9 +1695,13 @@ void kgem_purge_cache(struct kgem *kgem) int i; for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { - list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) - if (!kgem_bo_is_retained(kgem, bo)) + list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) { + if (!kgem_bo_is_retained(kgem, bo)) { + DBG(("%s: purging %d\n", + __FUNCTION__, bo->handle)); kgem_bo_free(kgem, bo); + } + } } kgem->need_purge = false; @@ -1748,6 +1768,8 @@ bool kgem_expire_cache(struct kgem *kgem) count++; size += bo->size; kgem_bo_free(kgem, bo); + DBG(("%s: expiring %d\n", + __FUNCTION__, bo->handle)); } } if (!list_is_empty(&preserve)) { @@ -2033,7 +2055,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int if (tiling && kgem_surface_size(kgem, false, false, width, height, bpp, tiling, - &pitch) > kgem->max_object_size) { + &pitch) > kgem->max_gpu_size) { DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n", __FUNCTION__, width, height)); tiling = I915_TILING_NONE; @@ -2096,43 +2118,46 @@ done: return tiling; } -static bool _kgem_can_create_2d(struct kgem *kgem, - int width, int height, int bpp, int tiling) +bool kgem_can_create_cpu(struct kgem *kgem, + int width, int height, int depth) { uint32_t pitch, size; - if (bpp < 8) + if (depth < 8 || kgem->wedged) return false; - if (tiling >= 0 && kgem->wedged) - return false; - - if (tiling < 0) - tiling = -tiling; - size = kgem_surface_size(kgem, false, false, - width, height, bpp, tiling, &pitch); - if (size == 0 || size >= kgem->max_object_size) - size = kgem_surface_size(kgem, false, false, - width, height, bpp, - I915_TILING_NONE, &pitch); - return size > 0 && size < kgem->max_object_size; + width, height, BitsPerPixel(depth), + I915_TILING_NONE, &pitch); + return size > 0 && size < kgem->max_cpu_size; +} + +static bool _kgem_can_create_gpu(struct kgem *kgem, + int width, int height, int bpp) +{ + uint32_t pitch, size; + + if (bpp < 8 || kgem->wedged) + return false; + + size = kgem_surface_size(kgem, false, false, + width, height, bpp, I915_TILING_NONE, + &pitch); + return size > 0 && size < kgem->max_gpu_size; } #if DEBUG_KGEM -bool kgem_can_create_2d(struct kgem *kgem, - int width, int height, int bpp, int tiling) +bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp) { - bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling); - DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__, - width, height, bpp, tiling, ret)); + bool ret = _kgem_can_create_gpu(kgem, width, height, bpp); + DBG(("%s(%dx%d, bpp=%d) = %d\n", __FUNCTION__, + width, height, bpp, ret)); return ret; } #else -bool kgem_can_create_2d(struct kgem *kgem, - int width, int height, int bpp, int tiling) +bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp) { - return _kgem_can_create_2d(kgem, width, height, bpp, tiling); + return _kgem_can_create_gpu(kgem, width, height, bpp); } #endif @@ -2177,12 +2202,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem, !!(flags & CREATE_GTT_MAP), !!(flags & CREATE_SCANOUT))); - assert(_kgem_can_create_2d(kgem, width, height, bpp, flags & CREATE_EXACT ? -tiling : tiling)); size = kgem_surface_size(kgem, kgem->has_relaxed_fencing, flags & CREATE_SCANOUT, width, height, bpp, tiling, &pitch); - assert(size && size <= kgem->max_object_size); + assert(size && size < kgem->max_cpu_size); + assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size); if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) { int for_cpu = !!(flags & CREATE_CPU_MAP); @@ -2342,6 +2367,9 @@ skip_active_search: continue; } + if ((flags & CREATE_CPU_MAP) == 0 && IS_CPU_MAP(bo->map)) + continue; + if (bo->tiling != tiling || (tiling != I915_TILING_NONE && bo->pitch != pitch)) { if (tiling != gem_set_tiling(kgem->fd, @@ -2643,8 +2671,11 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) list_del(&bo->vma); kgem->vma[type].count--; - if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) + if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) { + DBG(("%s: freeing unpurgeable old mapping\n", + __FUNCTION__)); kgem_bo_free(kgem, bo); + } } } diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 652c2d74..fd3aa9de 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -151,10 +151,10 @@ struct kgem { uint16_t fence_max; uint16_t half_cpu_cache_pages; - uint32_t aperture_high, aperture_low, aperture; - uint32_t aperture_fenced, aperture_mappable; + uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable; + uint32_t aperture, aperture_fenced; uint32_t min_alignment; - uint32_t max_object_size; + uint32_t max_gpu_size, max_cpu_size; uint32_t partial_buffer_size; void (*context_switch)(struct kgem *kgem, int new_mode); @@ -200,8 +200,8 @@ struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem, int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp); -bool kgem_can_create_2d(struct kgem *kgem, - int width, int height, int bpp, int tiling); +bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp); +bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int depth); struct kgem_bo * kgem_replace_bo(struct kgem *kgem, diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index b28134c2..759e0fe1 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -245,7 +245,7 @@ sna_pixmap_alloc_cpu(struct sna *sna, assert(priv->stride); - if (sna->kgem.has_cpu_bo) { + if (sna->kgem.has_cpu_bo || !priv->gpu) { DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__, pixmap->drawable.width, pixmap->drawable.height)); @@ -515,11 +515,10 @@ struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap) break; default: - if (!kgem_can_create_2d(&sna->kgem, - pixmap->drawable.width, - pixmap->drawable.height, - pixmap->drawable.bitsPerPixel, - I915_TILING_NONE)) + if (!kgem_can_create_gpu(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel)) return NULL; break; } @@ -586,6 +585,11 @@ sna_pixmap_create_scratch(ScreenPtr screen, return create_pixmap(sna, screen, width, height, depth, CREATE_PIXMAP_USAGE_SCRATCH); + bpp = BitsPerPixel(depth); + if (!kgem_can_create_gpu(&sna->kgem, width, height, bpp)) + return create_pixmap(sna, screen, width, height, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (tiling == I915_TILING_Y && !sna->have_render) tiling = I915_TILING_X; @@ -594,11 +598,7 @@ sna_pixmap_create_scratch(ScreenPtr screen, height > sna->render.max_3d_size)) tiling = I915_TILING_X; - bpp = BitsPerPixel(depth); tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp); - if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling)) - return create_pixmap(sna, screen, width, height, depth, - CREATE_PIXMAP_USAGE_SCRATCH); /* you promise never to access this via the cpu... */ if (sna->freed_pixmap) { @@ -669,7 +669,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__, width, height, depth, usage)); - if (depth < 8 || wedged(sna) || !sna->have_render) + if (!kgem_can_create_cpu(&sna->kgem, width, height, depth)) + return create_pixmap(sna, screen, width, height, depth, usage); + + if (!sna->have_render) return create_pixmap(sna, screen, width, height, depth, usage); @@ -696,13 +699,11 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, width, height, depth, I915_TILING_Y); - if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE || - !kgem_can_create_2d(&sna->kgem, width, height, - BitsPerPixel(depth), I915_TILING_NONE)) + if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE) return create_pixmap(sna, screen, width, height, depth, usage); pad = PixmapBytePad(width, depth); - if (pad*height <= 4096) { + if (pad * height <= 4096) { pixmap = create_pixmap(sna, screen, width, height, depth, usage); if (pixmap == NullPixmap) @@ -729,7 +730,9 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen, } priv->stride = pad; - priv->gpu = true; + priv->gpu = kgem_can_create_gpu(&sna->kgem, + width, height, + pixmap->drawable.bitsPerPixel); } return pixmap; @@ -1821,6 +1824,7 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable, { PixmapPtr pixmap = get_drawable_pixmap(drawable); struct sna_pixmap *priv = sna_pixmap(pixmap); + struct sna *sna = to_sna_from_pixmap(pixmap); BoxRec extents; int16_t dx, dy; @@ -1829,6 +1833,9 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable, if (priv == NULL || priv->cpu_bo == NULL) return FALSE; + if (!sna->kgem.has_llc && priv->cpu_bo->domain == DOMAIN_CPU) + return FALSE; + if (DAMAGE_IS_ALL(priv->cpu_damage)) { *damage = NULL; return TRUE; @@ -1876,9 +1883,7 @@ sna_pixmap_create_upload(ScreenPtr screen, assert(width); assert(height); if (!sna->have_render || - !kgem_can_create_2d(&sna->kgem, - width, height, bpp, - I915_TILING_NONE)) + !kgem_can_create_gpu(&sna->kgem, width, height, bpp)) return create_pixmap(sna, screen, width, height, depth, CREATE_PIXMAP_USAGE_SCRATCH); @@ -2024,7 +2029,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags) sna_damage_reduce(&priv->cpu_damage); DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL)); if (priv->gpu_bo == NULL) { - if (!wedged(sna)) + if (!wedged(sna) && priv->gpu) priv->gpu_bo = kgem_create_2d(&sna->kgem, pixmap->drawable.width, @@ -3195,24 +3200,19 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, } /* Try to maintain the data on the GPU */ - if (dst_priv->gpu_bo == NULL && + if (dst_priv->gpu_bo == NULL && dst_priv->gpu && ((dst_priv->cpu_damage == NULL && copy_use_gpu_bo(sna, dst_priv, ®ion)) || (src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && kgem_bo_is_busy(src_priv->cpu_bo)))))) { uint32_t tiling = sna_pixmap_choose_tiling(dst_pixmap); DBG(("%s: create dst GPU bo for upload\n", __FUNCTION__)); - if (kgem_can_create_2d(&sna->kgem, + dst_priv->gpu_bo = + kgem_create_2d(&sna->kgem, dst_pixmap->drawable.width, dst_pixmap->drawable.height, dst_pixmap->drawable.bitsPerPixel, - tiling)) - dst_priv->gpu_bo = - kgem_create_2d(&sna->kgem, - dst_pixmap->drawable.width, - dst_pixmap->drawable.height, - dst_pixmap->drawable.bitsPerPixel, - tiling, 0); + tiling, 0); } if (dst_priv->gpu_bo) {