sna: Experiment with creating large objects as CPU bo

Even on non-LLC systems if we can prevent the migration of such
objects, we can still benefit immensely from being able to map them into
the GTT as required.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-01-25 23:04:50 +00:00
parent 55569272f7
commit e583af9cca
3 changed files with 101 additions and 70 deletions

View File

@ -632,6 +632,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
aperture.aper_size = 64*1024*1024;
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
kgem->aperture_total = aperture.aper_size;
kgem->aperture_high = aperture.aper_size * 3/4;
kgem->aperture_low = aperture.aper_size * 1/4;
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
@ -657,12 +658,17 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
* disable dual-stream mode */
kgem->min_alignment = 64;
kgem->max_object_size = kgem->aperture_mappable / 2;
if (kgem->max_object_size > kgem->aperture_low)
kgem->max_object_size = kgem->aperture_low;
if (kgem->max_object_size > MAX_OBJECT_SIZE)
kgem->max_object_size = MAX_OBJECT_SIZE;
DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size));
kgem->max_gpu_size = kgem->aperture_mappable / 2;
if (kgem->max_gpu_size > kgem->aperture_low)
kgem->max_gpu_size = kgem->aperture_low;
if (kgem->max_gpu_size > MAX_OBJECT_SIZE)
kgem->max_gpu_size = MAX_OBJECT_SIZE;
kgem->max_cpu_size = kgem->aperture_total / 2;
if (kgem->max_cpu_size > MAX_OBJECT_SIZE)
kgem->max_cpu_size = MAX_OBJECT_SIZE;
DBG(("%s: max object size (tiled=%d, linear=%d)\n",
__FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size));
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
if ((int)kgem->fence_max < 0)
@ -979,6 +985,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
goto destroy;
}
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
kgem_bo_release_map(kgem, bo);
assert(list_is_empty(&bo->vma));
assert(list_is_empty(&bo->list));
assert(bo->vmap == false && bo->sync == false);
@ -1010,6 +1019,10 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
if (!IS_CPU_MAP(bo->map)) {
if (!kgem_bo_set_purgeable(kgem, bo))
goto destroy;
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
goto destroy;
DBG(("%s: handle=%d, purged\n",
__FUNCTION__, bo->handle));
}
@ -1121,8 +1134,11 @@ bool kgem_retire(struct kgem *kgem)
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
kgem_bo_move_to_inactive(kgem, rq->bo);
retired = true;
} else
} else {
DBG(("%s: closing %d\n",
__FUNCTION__, rq->bo->handle));
kgem_bo_free(kgem, rq->bo);
}
_list_del(&rq->list);
free(rq);
@ -1679,9 +1695,13 @@ void kgem_purge_cache(struct kgem *kgem)
int i;
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list)
if (!kgem_bo_is_retained(kgem, bo))
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
if (!kgem_bo_is_retained(kgem, bo)) {
DBG(("%s: purging %d\n",
__FUNCTION__, bo->handle));
kgem_bo_free(kgem, bo);
}
}
}
kgem->need_purge = false;
@ -1748,6 +1768,8 @@ bool kgem_expire_cache(struct kgem *kgem)
count++;
size += bo->size;
kgem_bo_free(kgem, bo);
DBG(("%s: expiring %d\n",
__FUNCTION__, bo->handle));
}
}
if (!list_is_empty(&preserve)) {
@ -2033,7 +2055,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
if (tiling &&
kgem_surface_size(kgem, false, false,
width, height, bpp, tiling,
&pitch) > kgem->max_object_size) {
&pitch) > kgem->max_gpu_size) {
DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n",
__FUNCTION__, width, height));
tiling = I915_TILING_NONE;
@ -2096,43 +2118,46 @@ done:
return tiling;
}
static bool _kgem_can_create_2d(struct kgem *kgem,
int width, int height, int bpp, int tiling)
bool kgem_can_create_cpu(struct kgem *kgem,
int width, int height, int depth)
{
uint32_t pitch, size;
if (bpp < 8)
if (depth < 8 || kgem->wedged)
return false;
if (tiling >= 0 && kgem->wedged)
return false;
if (tiling < 0)
tiling = -tiling;
size = kgem_surface_size(kgem, false, false,
width, height, bpp, tiling, &pitch);
if (size == 0 || size >= kgem->max_object_size)
size = kgem_surface_size(kgem, false, false,
width, height, bpp,
I915_TILING_NONE, &pitch);
return size > 0 && size < kgem->max_object_size;
width, height, BitsPerPixel(depth),
I915_TILING_NONE, &pitch);
return size > 0 && size < kgem->max_cpu_size;
}
static bool _kgem_can_create_gpu(struct kgem *kgem,
int width, int height, int bpp)
{
uint32_t pitch, size;
if (bpp < 8 || kgem->wedged)
return false;
size = kgem_surface_size(kgem, false, false,
width, height, bpp, I915_TILING_NONE,
&pitch);
return size > 0 && size < kgem->max_gpu_size;
}
#if DEBUG_KGEM
bool kgem_can_create_2d(struct kgem *kgem,
int width, int height, int bpp, int tiling)
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
{
bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling);
DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__,
width, height, bpp, tiling, ret));
bool ret = _kgem_can_create_gpu(kgem, width, height, bpp);
DBG(("%s(%dx%d, bpp=%d) = %d\n", __FUNCTION__,
width, height, bpp, ret));
return ret;
}
#else
bool kgem_can_create_2d(struct kgem *kgem,
int width, int height, int bpp, int tiling)
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
{
return _kgem_can_create_2d(kgem, width, height, bpp, tiling);
return _kgem_can_create_gpu(kgem, width, height, bpp);
}
#endif
@ -2177,12 +2202,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
!!(flags & CREATE_GTT_MAP),
!!(flags & CREATE_SCANOUT)));
assert(_kgem_can_create_2d(kgem, width, height, bpp, flags & CREATE_EXACT ? -tiling : tiling));
size = kgem_surface_size(kgem,
kgem->has_relaxed_fencing,
flags & CREATE_SCANOUT,
width, height, bpp, tiling, &pitch);
assert(size && size <= kgem->max_object_size);
assert(size && size < kgem->max_cpu_size);
assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size);
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
@ -2342,6 +2367,9 @@ skip_active_search:
continue;
}
if ((flags & CREATE_CPU_MAP) == 0 && IS_CPU_MAP(bo->map))
continue;
if (bo->tiling != tiling ||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
if (tiling != gem_set_tiling(kgem->fd,
@ -2643,8 +2671,11 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
list_del(&bo->vma);
kgem->vma[type].count--;
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo))
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
DBG(("%s: freeing unpurgeable old mapping\n",
__FUNCTION__));
kgem_bo_free(kgem, bo);
}
}
}

View File

@ -151,10 +151,10 @@ struct kgem {
uint16_t fence_max;
uint16_t half_cpu_cache_pages;
uint32_t aperture_high, aperture_low, aperture;
uint32_t aperture_fenced, aperture_mappable;
uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
uint32_t aperture, aperture_fenced;
uint32_t min_alignment;
uint32_t max_object_size;
uint32_t max_gpu_size, max_cpu_size;
uint32_t partial_buffer_size;
void (*context_switch)(struct kgem *kgem, int new_mode);
@ -200,8 +200,8 @@ struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
int kgem_choose_tiling(struct kgem *kgem,
int tiling, int width, int height, int bpp);
bool kgem_can_create_2d(struct kgem *kgem,
int width, int height, int bpp, int tiling);
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp);
bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int depth);
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,

View File

@ -245,7 +245,7 @@ sna_pixmap_alloc_cpu(struct sna *sna,
assert(priv->stride);
if (sna->kgem.has_cpu_bo) {
if (sna->kgem.has_cpu_bo || !priv->gpu) {
DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
@ -515,11 +515,10 @@ struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
break;
default:
if (!kgem_can_create_2d(&sna->kgem,
pixmap->drawable.width,
pixmap->drawable.height,
pixmap->drawable.bitsPerPixel,
I915_TILING_NONE))
if (!kgem_can_create_gpu(&sna->kgem,
pixmap->drawable.width,
pixmap->drawable.height,
pixmap->drawable.bitsPerPixel))
return NULL;
break;
}
@ -586,6 +585,11 @@ sna_pixmap_create_scratch(ScreenPtr screen,
return create_pixmap(sna, screen, width, height, depth,
CREATE_PIXMAP_USAGE_SCRATCH);
bpp = BitsPerPixel(depth);
if (!kgem_can_create_gpu(&sna->kgem, width, height, bpp))
return create_pixmap(sna, screen, width, height, depth,
CREATE_PIXMAP_USAGE_SCRATCH);
if (tiling == I915_TILING_Y && !sna->have_render)
tiling = I915_TILING_X;
@ -594,11 +598,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
height > sna->render.max_3d_size))
tiling = I915_TILING_X;
bpp = BitsPerPixel(depth);
tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp);
if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling))
return create_pixmap(sna, screen, width, height, depth,
CREATE_PIXMAP_USAGE_SCRATCH);
/* you promise never to access this via the cpu... */
if (sna->freed_pixmap) {
@ -669,7 +669,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
width, height, depth, usage));
if (depth < 8 || wedged(sna) || !sna->have_render)
if (!kgem_can_create_cpu(&sna->kgem, width, height, depth))
return create_pixmap(sna, screen, width, height, depth, usage);
if (!sna->have_render)
return create_pixmap(sna, screen,
width, height, depth,
usage);
@ -696,13 +699,11 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
width, height, depth,
I915_TILING_Y);
if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE ||
!kgem_can_create_2d(&sna->kgem, width, height,
BitsPerPixel(depth), I915_TILING_NONE))
if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE)
return create_pixmap(sna, screen, width, height, depth, usage);
pad = PixmapBytePad(width, depth);
if (pad*height <= 4096) {
if (pad * height <= 4096) {
pixmap = create_pixmap(sna, screen,
width, height, depth, usage);
if (pixmap == NullPixmap)
@ -729,7 +730,9 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
}
priv->stride = pad;
priv->gpu = true;
priv->gpu = kgem_can_create_gpu(&sna->kgem,
width, height,
pixmap->drawable.bitsPerPixel);
}
return pixmap;
@ -1821,6 +1824,7 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
{
PixmapPtr pixmap = get_drawable_pixmap(drawable);
struct sna_pixmap *priv = sna_pixmap(pixmap);
struct sna *sna = to_sna_from_pixmap(pixmap);
BoxRec extents;
int16_t dx, dy;
@ -1829,6 +1833,9 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
if (priv == NULL || priv->cpu_bo == NULL)
return FALSE;
if (!sna->kgem.has_llc && priv->cpu_bo->domain == DOMAIN_CPU)
return FALSE;
if (DAMAGE_IS_ALL(priv->cpu_damage)) {
*damage = NULL;
return TRUE;
@ -1876,9 +1883,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
assert(width);
assert(height);
if (!sna->have_render ||
!kgem_can_create_2d(&sna->kgem,
width, height, bpp,
I915_TILING_NONE))
!kgem_can_create_gpu(&sna->kgem, width, height, bpp))
return create_pixmap(sna, screen, width, height, depth,
CREATE_PIXMAP_USAGE_SCRATCH);
@ -2024,7 +2029,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
sna_damage_reduce(&priv->cpu_damage);
DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL));
if (priv->gpu_bo == NULL) {
if (!wedged(sna))
if (!wedged(sna) && priv->gpu)
priv->gpu_bo =
kgem_create_2d(&sna->kgem,
pixmap->drawable.width,
@ -3195,24 +3200,19 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
}
/* Try to maintain the data on the GPU */
if (dst_priv->gpu_bo == NULL &&
if (dst_priv->gpu_bo == NULL && dst_priv->gpu &&
((dst_priv->cpu_damage == NULL && copy_use_gpu_bo(sna, dst_priv, &region)) ||
(src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && kgem_bo_is_busy(src_priv->cpu_bo)))))) {
uint32_t tiling = sna_pixmap_choose_tiling(dst_pixmap);
DBG(("%s: create dst GPU bo for upload\n", __FUNCTION__));
if (kgem_can_create_2d(&sna->kgem,
dst_priv->gpu_bo =
kgem_create_2d(&sna->kgem,
dst_pixmap->drawable.width,
dst_pixmap->drawable.height,
dst_pixmap->drawable.bitsPerPixel,
tiling))
dst_priv->gpu_bo =
kgem_create_2d(&sna->kgem,
dst_pixmap->drawable.width,
dst_pixmap->drawable.height,
dst_pixmap->drawable.bitsPerPixel,
tiling, 0);
tiling, 0);
}
if (dst_priv->gpu_bo) {