sna: Experiment with creating large objects as CPU bo
Even on non-LLC systems if we can prevent the migration of such objects, we can still benefit immensely from being able to map them into the GTT as required. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
55569272f7
commit
e583af9cca
103
src/sna/kgem.c
103
src/sna/kgem.c
|
|
@ -632,6 +632,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
|
|||
aperture.aper_size = 64*1024*1024;
|
||||
(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
|
||||
|
||||
kgem->aperture_total = aperture.aper_size;
|
||||
kgem->aperture_high = aperture.aper_size * 3/4;
|
||||
kgem->aperture_low = aperture.aper_size * 1/4;
|
||||
DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
|
||||
|
|
@ -657,12 +658,17 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
|
|||
* disable dual-stream mode */
|
||||
kgem->min_alignment = 64;
|
||||
|
||||
kgem->max_object_size = kgem->aperture_mappable / 2;
|
||||
if (kgem->max_object_size > kgem->aperture_low)
|
||||
kgem->max_object_size = kgem->aperture_low;
|
||||
if (kgem->max_object_size > MAX_OBJECT_SIZE)
|
||||
kgem->max_object_size = MAX_OBJECT_SIZE;
|
||||
DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size));
|
||||
kgem->max_gpu_size = kgem->aperture_mappable / 2;
|
||||
if (kgem->max_gpu_size > kgem->aperture_low)
|
||||
kgem->max_gpu_size = kgem->aperture_low;
|
||||
if (kgem->max_gpu_size > MAX_OBJECT_SIZE)
|
||||
kgem->max_gpu_size = MAX_OBJECT_SIZE;
|
||||
|
||||
kgem->max_cpu_size = kgem->aperture_total / 2;
|
||||
if (kgem->max_cpu_size > MAX_OBJECT_SIZE)
|
||||
kgem->max_cpu_size = MAX_OBJECT_SIZE;
|
||||
DBG(("%s: max object size (tiled=%d, linear=%d)\n",
|
||||
__FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size));
|
||||
|
||||
kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
|
||||
if ((int)kgem->fence_max < 0)
|
||||
|
|
@ -979,6 +985,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
|
|||
goto destroy;
|
||||
}
|
||||
|
||||
if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
|
||||
kgem_bo_release_map(kgem, bo);
|
||||
|
||||
assert(list_is_empty(&bo->vma));
|
||||
assert(list_is_empty(&bo->list));
|
||||
assert(bo->vmap == false && bo->sync == false);
|
||||
|
|
@ -1010,6 +1019,10 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
|
|||
if (!IS_CPU_MAP(bo->map)) {
|
||||
if (!kgem_bo_set_purgeable(kgem, bo))
|
||||
goto destroy;
|
||||
|
||||
if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
|
||||
goto destroy;
|
||||
|
||||
DBG(("%s: handle=%d, purged\n",
|
||||
__FUNCTION__, bo->handle));
|
||||
}
|
||||
|
|
@ -1121,8 +1134,11 @@ bool kgem_retire(struct kgem *kgem)
|
|||
if (kgem_bo_set_purgeable(kgem, rq->bo)) {
|
||||
kgem_bo_move_to_inactive(kgem, rq->bo);
|
||||
retired = true;
|
||||
} else
|
||||
} else {
|
||||
DBG(("%s: closing %d\n",
|
||||
__FUNCTION__, rq->bo->handle));
|
||||
kgem_bo_free(kgem, rq->bo);
|
||||
}
|
||||
|
||||
_list_del(&rq->list);
|
||||
free(rq);
|
||||
|
|
@ -1679,9 +1695,13 @@ void kgem_purge_cache(struct kgem *kgem)
|
|||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
|
||||
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list)
|
||||
if (!kgem_bo_is_retained(kgem, bo))
|
||||
list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
|
||||
if (!kgem_bo_is_retained(kgem, bo)) {
|
||||
DBG(("%s: purging %d\n",
|
||||
__FUNCTION__, bo->handle));
|
||||
kgem_bo_free(kgem, bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
kgem->need_purge = false;
|
||||
|
|
@ -1748,6 +1768,8 @@ bool kgem_expire_cache(struct kgem *kgem)
|
|||
count++;
|
||||
size += bo->size;
|
||||
kgem_bo_free(kgem, bo);
|
||||
DBG(("%s: expiring %d\n",
|
||||
__FUNCTION__, bo->handle));
|
||||
}
|
||||
}
|
||||
if (!list_is_empty(&preserve)) {
|
||||
|
|
@ -2033,7 +2055,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
|
|||
if (tiling &&
|
||||
kgem_surface_size(kgem, false, false,
|
||||
width, height, bpp, tiling,
|
||||
&pitch) > kgem->max_object_size) {
|
||||
&pitch) > kgem->max_gpu_size) {
|
||||
DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n",
|
||||
__FUNCTION__, width, height));
|
||||
tiling = I915_TILING_NONE;
|
||||
|
|
@ -2096,43 +2118,46 @@ done:
|
|||
return tiling;
|
||||
}
|
||||
|
||||
static bool _kgem_can_create_2d(struct kgem *kgem,
|
||||
int width, int height, int bpp, int tiling)
|
||||
bool kgem_can_create_cpu(struct kgem *kgem,
|
||||
int width, int height, int depth)
|
||||
{
|
||||
uint32_t pitch, size;
|
||||
|
||||
if (bpp < 8)
|
||||
if (depth < 8 || kgem->wedged)
|
||||
return false;
|
||||
|
||||
if (tiling >= 0 && kgem->wedged)
|
||||
return false;
|
||||
|
||||
if (tiling < 0)
|
||||
tiling = -tiling;
|
||||
|
||||
size = kgem_surface_size(kgem, false, false,
|
||||
width, height, bpp, tiling, &pitch);
|
||||
if (size == 0 || size >= kgem->max_object_size)
|
||||
size = kgem_surface_size(kgem, false, false,
|
||||
width, height, bpp,
|
||||
I915_TILING_NONE, &pitch);
|
||||
return size > 0 && size < kgem->max_object_size;
|
||||
width, height, BitsPerPixel(depth),
|
||||
I915_TILING_NONE, &pitch);
|
||||
return size > 0 && size < kgem->max_cpu_size;
|
||||
}
|
||||
|
||||
static bool _kgem_can_create_gpu(struct kgem *kgem,
|
||||
int width, int height, int bpp)
|
||||
{
|
||||
uint32_t pitch, size;
|
||||
|
||||
if (bpp < 8 || kgem->wedged)
|
||||
return false;
|
||||
|
||||
size = kgem_surface_size(kgem, false, false,
|
||||
width, height, bpp, I915_TILING_NONE,
|
||||
&pitch);
|
||||
return size > 0 && size < kgem->max_gpu_size;
|
||||
}
|
||||
|
||||
#if DEBUG_KGEM
|
||||
bool kgem_can_create_2d(struct kgem *kgem,
|
||||
int width, int height, int bpp, int tiling)
|
||||
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
|
||||
{
|
||||
bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling);
|
||||
DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__,
|
||||
width, height, bpp, tiling, ret));
|
||||
bool ret = _kgem_can_create_gpu(kgem, width, height, bpp);
|
||||
DBG(("%s(%dx%d, bpp=%d) = %d\n", __FUNCTION__,
|
||||
width, height, bpp, ret));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
bool kgem_can_create_2d(struct kgem *kgem,
|
||||
int width, int height, int bpp, int tiling)
|
||||
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
|
||||
{
|
||||
return _kgem_can_create_2d(kgem, width, height, bpp, tiling);
|
||||
return _kgem_can_create_gpu(kgem, width, height, bpp);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -2177,12 +2202,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
|
|||
!!(flags & CREATE_GTT_MAP),
|
||||
!!(flags & CREATE_SCANOUT)));
|
||||
|
||||
assert(_kgem_can_create_2d(kgem, width, height, bpp, flags & CREATE_EXACT ? -tiling : tiling));
|
||||
size = kgem_surface_size(kgem,
|
||||
kgem->has_relaxed_fencing,
|
||||
flags & CREATE_SCANOUT,
|
||||
width, height, bpp, tiling, &pitch);
|
||||
assert(size && size <= kgem->max_object_size);
|
||||
assert(size && size < kgem->max_cpu_size);
|
||||
assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size);
|
||||
|
||||
if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
|
||||
int for_cpu = !!(flags & CREATE_CPU_MAP);
|
||||
|
|
@ -2342,6 +2367,9 @@ skip_active_search:
|
|||
continue;
|
||||
}
|
||||
|
||||
if ((flags & CREATE_CPU_MAP) == 0 && IS_CPU_MAP(bo->map))
|
||||
continue;
|
||||
|
||||
if (bo->tiling != tiling ||
|
||||
(tiling != I915_TILING_NONE && bo->pitch != pitch)) {
|
||||
if (tiling != gem_set_tiling(kgem->fd,
|
||||
|
|
@ -2643,8 +2671,11 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
|
|||
list_del(&bo->vma);
|
||||
kgem->vma[type].count--;
|
||||
|
||||
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo))
|
||||
if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
|
||||
DBG(("%s: freeing unpurgeable old mapping\n",
|
||||
__FUNCTION__));
|
||||
kgem_bo_free(kgem, bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -151,10 +151,10 @@ struct kgem {
|
|||
|
||||
uint16_t fence_max;
|
||||
uint16_t half_cpu_cache_pages;
|
||||
uint32_t aperture_high, aperture_low, aperture;
|
||||
uint32_t aperture_fenced, aperture_mappable;
|
||||
uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
|
||||
uint32_t aperture, aperture_fenced;
|
||||
uint32_t min_alignment;
|
||||
uint32_t max_object_size;
|
||||
uint32_t max_gpu_size, max_cpu_size;
|
||||
uint32_t partial_buffer_size;
|
||||
|
||||
void (*context_switch)(struct kgem *kgem, int new_mode);
|
||||
|
|
@ -200,8 +200,8 @@ struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
|
|||
|
||||
int kgem_choose_tiling(struct kgem *kgem,
|
||||
int tiling, int width, int height, int bpp);
|
||||
bool kgem_can_create_2d(struct kgem *kgem,
|
||||
int width, int height, int bpp, int tiling);
|
||||
bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp);
|
||||
bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int depth);
|
||||
|
||||
struct kgem_bo *
|
||||
kgem_replace_bo(struct kgem *kgem,
|
||||
|
|
|
|||
|
|
@ -245,7 +245,7 @@ sna_pixmap_alloc_cpu(struct sna *sna,
|
|||
|
||||
assert(priv->stride);
|
||||
|
||||
if (sna->kgem.has_cpu_bo) {
|
||||
if (sna->kgem.has_cpu_bo || !priv->gpu) {
|
||||
DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
|
||||
pixmap->drawable.width, pixmap->drawable.height));
|
||||
|
||||
|
|
@ -515,11 +515,10 @@ struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
|
|||
break;
|
||||
|
||||
default:
|
||||
if (!kgem_can_create_2d(&sna->kgem,
|
||||
pixmap->drawable.width,
|
||||
pixmap->drawable.height,
|
||||
pixmap->drawable.bitsPerPixel,
|
||||
I915_TILING_NONE))
|
||||
if (!kgem_can_create_gpu(&sna->kgem,
|
||||
pixmap->drawable.width,
|
||||
pixmap->drawable.height,
|
||||
pixmap->drawable.bitsPerPixel))
|
||||
return NULL;
|
||||
break;
|
||||
}
|
||||
|
|
@ -586,6 +585,11 @@ sna_pixmap_create_scratch(ScreenPtr screen,
|
|||
return create_pixmap(sna, screen, width, height, depth,
|
||||
CREATE_PIXMAP_USAGE_SCRATCH);
|
||||
|
||||
bpp = BitsPerPixel(depth);
|
||||
if (!kgem_can_create_gpu(&sna->kgem, width, height, bpp))
|
||||
return create_pixmap(sna, screen, width, height, depth,
|
||||
CREATE_PIXMAP_USAGE_SCRATCH);
|
||||
|
||||
if (tiling == I915_TILING_Y && !sna->have_render)
|
||||
tiling = I915_TILING_X;
|
||||
|
||||
|
|
@ -594,11 +598,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
|
|||
height > sna->render.max_3d_size))
|
||||
tiling = I915_TILING_X;
|
||||
|
||||
bpp = BitsPerPixel(depth);
|
||||
tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp);
|
||||
if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling))
|
||||
return create_pixmap(sna, screen, width, height, depth,
|
||||
CREATE_PIXMAP_USAGE_SCRATCH);
|
||||
|
||||
/* you promise never to access this via the cpu... */
|
||||
if (sna->freed_pixmap) {
|
||||
|
|
@ -669,7 +669,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
|
|||
DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
|
||||
width, height, depth, usage));
|
||||
|
||||
if (depth < 8 || wedged(sna) || !sna->have_render)
|
||||
if (!kgem_can_create_cpu(&sna->kgem, width, height, depth))
|
||||
return create_pixmap(sna, screen, width, height, depth, usage);
|
||||
|
||||
if (!sna->have_render)
|
||||
return create_pixmap(sna, screen,
|
||||
width, height, depth,
|
||||
usage);
|
||||
|
|
@ -696,13 +699,11 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
|
|||
width, height, depth,
|
||||
I915_TILING_Y);
|
||||
|
||||
if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE ||
|
||||
!kgem_can_create_2d(&sna->kgem, width, height,
|
||||
BitsPerPixel(depth), I915_TILING_NONE))
|
||||
if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE)
|
||||
return create_pixmap(sna, screen, width, height, depth, usage);
|
||||
|
||||
pad = PixmapBytePad(width, depth);
|
||||
if (pad*height <= 4096) {
|
||||
if (pad * height <= 4096) {
|
||||
pixmap = create_pixmap(sna, screen,
|
||||
width, height, depth, usage);
|
||||
if (pixmap == NullPixmap)
|
||||
|
|
@ -729,7 +730,9 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
|
|||
}
|
||||
|
||||
priv->stride = pad;
|
||||
priv->gpu = true;
|
||||
priv->gpu = kgem_can_create_gpu(&sna->kgem,
|
||||
width, height,
|
||||
pixmap->drawable.bitsPerPixel);
|
||||
}
|
||||
|
||||
return pixmap;
|
||||
|
|
@ -1821,6 +1824,7 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
|
|||
{
|
||||
PixmapPtr pixmap = get_drawable_pixmap(drawable);
|
||||
struct sna_pixmap *priv = sna_pixmap(pixmap);
|
||||
struct sna *sna = to_sna_from_pixmap(pixmap);
|
||||
BoxRec extents;
|
||||
int16_t dx, dy;
|
||||
|
||||
|
|
@ -1829,6 +1833,9 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
|
|||
if (priv == NULL || priv->cpu_bo == NULL)
|
||||
return FALSE;
|
||||
|
||||
if (!sna->kgem.has_llc && priv->cpu_bo->domain == DOMAIN_CPU)
|
||||
return FALSE;
|
||||
|
||||
if (DAMAGE_IS_ALL(priv->cpu_damage)) {
|
||||
*damage = NULL;
|
||||
return TRUE;
|
||||
|
|
@ -1876,9 +1883,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
|
|||
assert(width);
|
||||
assert(height);
|
||||
if (!sna->have_render ||
|
||||
!kgem_can_create_2d(&sna->kgem,
|
||||
width, height, bpp,
|
||||
I915_TILING_NONE))
|
||||
!kgem_can_create_gpu(&sna->kgem, width, height, bpp))
|
||||
return create_pixmap(sna, screen, width, height, depth,
|
||||
CREATE_PIXMAP_USAGE_SCRATCH);
|
||||
|
||||
|
|
@ -2024,7 +2029,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
|
|||
sna_damage_reduce(&priv->cpu_damage);
|
||||
DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL));
|
||||
if (priv->gpu_bo == NULL) {
|
||||
if (!wedged(sna))
|
||||
if (!wedged(sna) && priv->gpu)
|
||||
priv->gpu_bo =
|
||||
kgem_create_2d(&sna->kgem,
|
||||
pixmap->drawable.width,
|
||||
|
|
@ -3195,24 +3200,19 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
|
|||
}
|
||||
|
||||
/* Try to maintain the data on the GPU */
|
||||
if (dst_priv->gpu_bo == NULL &&
|
||||
if (dst_priv->gpu_bo == NULL && dst_priv->gpu &&
|
||||
((dst_priv->cpu_damage == NULL && copy_use_gpu_bo(sna, dst_priv, ®ion)) ||
|
||||
(src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && kgem_bo_is_busy(src_priv->cpu_bo)))))) {
|
||||
uint32_t tiling = sna_pixmap_choose_tiling(dst_pixmap);
|
||||
|
||||
DBG(("%s: create dst GPU bo for upload\n", __FUNCTION__));
|
||||
|
||||
if (kgem_can_create_2d(&sna->kgem,
|
||||
dst_priv->gpu_bo =
|
||||
kgem_create_2d(&sna->kgem,
|
||||
dst_pixmap->drawable.width,
|
||||
dst_pixmap->drawable.height,
|
||||
dst_pixmap->drawable.bitsPerPixel,
|
||||
tiling))
|
||||
dst_priv->gpu_bo =
|
||||
kgem_create_2d(&sna->kgem,
|
||||
dst_pixmap->drawable.width,
|
||||
dst_pixmap->drawable.height,
|
||||
dst_pixmap->drawable.bitsPerPixel,
|
||||
tiling, 0);
|
||||
tiling, 0);
|
||||
}
|
||||
|
||||
if (dst_priv->gpu_bo) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue