sna: Experiment with GTT mmapped upload buffers

In a few places, we can stream the source into the GTT and so upload in
place through the WC mapping. Notably, in many other places we want to
rasterise on a partial in cacheable memory. So we need to notify the
backend of the intended usage for the buffer and when we think it is
appropriate we can allocate a GTT mapped pointer for zero-copy upload.

The biggest improvement tends to be in the PutComposite style of
microbenchmark, yet throughput for trapezoid masks seems to suffer (e.g.
swfdec-giant-steps on i3 and gen2 in general). As expected, the culprit
of the regression is the aperture pressure causing eviction stalls, which
the pwrite paths sidesteps by doing a cached copy when there is no GTT
space. This could be alleviated with an is-mappable ioctl predicting when
use of the buffer would block and so falling back in those cases to
pwrite. However, I suspect that this will improve dispatch latency in
the common idle case for which I have no good metric.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-01-13 10:41:44 +00:00
parent 252f381825
commit a4d5d72599
8 changed files with 186 additions and 92 deletions

View File

@ -86,6 +86,7 @@ static inline void list_replace(struct list *old,
#define DBG_NO_TILING 0
#define DBG_NO_VMAP 0
#define DBG_NO_MADV 0
#define DBG_NO_MAP_UPLOAD 0
#define DBG_NO_RELAXED_FENCING 0
#define DBG_DUMP 0
@ -111,7 +112,7 @@ struct kgem_partial_bo {
void *mem;
uint32_t used;
uint32_t need_io : 1;
uint32_t write : 1;
uint32_t write : 2;
uint32_t mmapped : 1;
};
@ -2579,7 +2580,6 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
{
void *ptr;
assert(bo->refcnt);
assert(!bo->purged);
assert(bo->exec == NULL);
assert(list_is_empty(&bo->list));
@ -2641,7 +2641,6 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
struct drm_i915_gem_mmap mmap_arg;
DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bo->size));
assert(bo->refcnt);
assert(!bo->purged);
assert(list_is_empty(&bo->list));
@ -2897,12 +2896,14 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
void **ret)
{
struct kgem_partial_bo *bo;
bool write = !!(flags & KGEM_BUFFER_WRITE);
unsigned offset, alloc;
uint32_t handle;
DBG(("%s: size=%d, flags=%x [write=%d, last=%d]\n",
__FUNCTION__, size, flags, write, flags & KGEM_BUFFER_LAST));
DBG(("%s: size=%d, flags=%x [write?=%d, inplace?=%d, last?=%d]\n",
__FUNCTION__, size, flags,
!!(flags & KGEM_BUFFER_WRITE),
!!(flags & KGEM_BUFFER_INPLACE),
!!(flags & KGEM_BUFFER_LAST)));
assert(size);
list_for_each_entry(bo, &kgem->partial, base.list) {
@ -2923,9 +2924,10 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
}
}
if (bo->write != write) {
DBG(("%s: skip write %d buffer, need %d\n",
__FUNCTION__, bo->write, write));
if ((bo->write & KGEM_BUFFER_WRITE) != (flags & KGEM_BUFFER_WRITE) ||
(bo->write & ~flags) & KGEM_BUFFER_INPLACE) {
DBG(("%s: skip write %x buffer, need %x\n",
__FUNCTION__, bo->write, flags));
continue;
}
@ -2942,9 +2944,11 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
break;
}
alloc = (flags & KGEM_BUFFER_LAST) ? 4096 : 32 * 1024;
alloc = ALIGN(size, alloc);
/* Be a little more generous and hope to hold fewer mmappings */
alloc = ALIGN(size, kgem->aperture_mappable >> 10);
bo = NULL;
#if !DBG_NO_MAP_UPLOAD
if (!DEBUG_NO_LLC && kgem->gen >= 60) {
struct kgem_bo *old;
@ -2952,11 +2956,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
if (bo == NULL)
return NULL;
/* Be a little more generous and hope to hold fewer mmappings */
alloc = ALIGN(size, 128*1024);
old = NULL;
if (!write)
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, CREATE_CPU_MAP);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE | CREATE_CPU_MAP);
@ -2985,72 +2986,145 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
}
bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
if (bo->mem == NULL) {
if (bo->mem) {
if (flags & KGEM_BUFFER_WRITE)
kgem_bo_sync__cpu(kgem, &bo->base);
bo->need_io = false;
bo->base.io = true;
bo->mmapped = true;
alloc = bo->base.size;
} else {
bo->base.refcnt = 0; /* for valgrind */
kgem_bo_free(kgem, &bo->base);
return NULL;
bo = NULL;
}
if (write)
kgem_bo_sync__cpu(kgem, &bo->base);
bo->need_io = false;
bo->base.io = true;
bo->mmapped = true;
alloc = bo->base.size;
} else if (HAVE_VMAP && kgem->has_vmap) {
bo = partial_bo_alloc(alloc);
if (bo == NULL)
return NULL;
handle = gem_vmap(kgem->fd, bo->mem, alloc, write);
if (handle) {
__kgem_bo_init(&bo->base, handle, alloc);
bo->base.vmap = true;
bo->need_io = false;
} else {
free(bo);
return NULL;
}
} else {
} else if ((flags & KGEM_BUFFER_WRITE_INPLACE) == KGEM_BUFFER_WRITE_INPLACE) {
struct kgem_bo *old;
old = NULL;
if (!write)
old = search_linear_cache(kgem, alloc, 0);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
/* The issue with using a GTT upload buffer is that we may
* cause eviction-stalls in order to free up some GTT space.
* An is-mappable? ioctl could help us detect when we are
* about to block, or some per-page magic in the kernel.
*
* XXX This is especially noticeable on memory constrained
* devices like gen2 or with relatively slow gpu like i3.
*/
old = search_linear_cache(kgem, alloc,
CREATE_INACTIVE | CREATE_GTT_MAP);
#if HAVE_I915_GEM_BUFFER_INFO
if (old) {
alloc = old->size;
bo = partial_bo_alloc(alloc);
struct drm_i915_gem_buffer_info info;
/* An example of such a non-blocking ioctl might work */
VG_CLEAR(info);
info.handle = handle;
if (drmIoctl(kgem->fd,
DRM_IOCTL_I915_GEM_BUFFER_INFO,
&fino) == 0) {
old->presumed_offset = info.addr;
if ((info.flags & I915_GEM_MAPPABLE) == 0) {
kgem_bo_move_to_inactive(kgem, old);
old = NULL;
}
}
}
#endif
if (old) {
DBG(("%s: reusing handle=%d for buffer\n",
__FUNCTION__, old->handle));
bo = malloc(sizeof(*bo));
if (bo == NULL)
return NULL;
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
list_replace(&old->request,
&bo->base.request);
list_replace(&old->request, &bo->base.request);
else
list_init(&bo->base.request);
list_replace(&old->vma, &bo->base.vma);
list_init(&bo->base.list);
free(old);
bo->base.refcnt = 1;
} else {
bo->mem = kgem_bo_map(kgem, &bo->base);
if (bo->mem) {
bo->need_io = false;
bo->base.io = true;
bo->mmapped = true;
bo->base.refcnt = 1;
alloc = bo->base.size;
} else {
kgem_bo_free(kgem, &bo->base);
bo = NULL;
}
}
}
#endif
if (bo == NULL) {
/* Be more parsimonious with pwrite/pread buffers */
if ((flags & KGEM_BUFFER_INPLACE) == 0)
alloc = PAGE_ALIGN(size);
flags &= ~KGEM_BUFFER_INPLACE;
if (HAVE_VMAP && kgem->has_vmap) {
bo = partial_bo_alloc(alloc);
if (bo == NULL)
return NULL;
if (!__kgem_bo_init(&bo->base,
gem_create(kgem->fd, alloc),
alloc)) {
handle = gem_vmap(kgem->fd, bo->mem, alloc,
(flags & KGEM_BUFFER_WRITE) == 0);
if (handle) {
__kgem_bo_init(&bo->base, handle, alloc);
bo->base.vmap = true;
bo->need_io = false;
} else {
free(bo);
return NULL;
}
} else {
struct kgem_bo *old;
old = NULL;
if ((flags & KGEM_BUFFER_WRITE) == 0)
old = search_linear_cache(kgem, alloc, 0);
if (old == NULL)
old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
if (old) {
alloc = old->size;
bo = partial_bo_alloc(alloc);
if (bo == NULL)
return NULL;
memcpy(&bo->base, old, sizeof(*old));
if (old->rq)
list_replace(&old->request,
&bo->base.request);
else
list_init(&bo->base.request);
list_replace(&old->vma, &bo->base.vma);
list_init(&bo->base.list);
free(old);
bo->base.refcnt = 1;
} else {
bo = partial_bo_alloc(alloc);
if (bo == NULL)
return NULL;
if (!__kgem_bo_init(&bo->base,
gem_create(kgem->fd, alloc),
alloc)) {
free(bo);
return NULL;
}
}
bo->need_io = flags & KGEM_BUFFER_WRITE;
bo->base.io = true;
}
bo->need_io = write;
bo->base.io = true;
}
bo->base.reusable = false;
assert(bo->base.size == alloc);
@ -3058,7 +3132,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
bo->used = size;
bo->write = write;
bo->write = flags & KGEM_BUFFER_WRITE_INPLACE;
offset = 0;
list_add(&bo->base.list, &kgem->partial);
@ -3139,7 +3213,7 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
bo = kgem_create_buffer_2d(kgem,
width, height, bpp,
KGEM_BUFFER_WRITE, &dst);
KGEM_BUFFER_WRITE_INPLACE, &dst);
if (bo)
memcpy_blt(data, dst, bpp,
stride, bo->pitch,
@ -3167,7 +3241,8 @@ struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
bo = kgem_create_buffer_2d(kgem,
width, height, bpp,
KGEM_BUFFER_WRITE, &dst);
KGEM_BUFFER_WRITE_INPLACE,
&dst);
if (bo == NULL)
return NULL;

View File

@ -414,7 +414,11 @@ static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
void kgem_sync(struct kgem *kgem);
#define KGEM_BUFFER_WRITE 0x1
#define KGEM_BUFFER_LAST 0x2
#define KGEM_BUFFER_INPLACE 0x2
#define KGEM_BUFFER_LAST 0x4
#define KGEM_BUFFER_WRITE_INPLACE (KGEM_BUFFER_WRITE | KGEM_BUFFER_INPLACE)
struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
uint32_t size, uint32_t flags,
void **ret);

View File

@ -434,7 +434,8 @@ inline static struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
}
PixmapPtr sna_pixmap_create_upload(ScreenPtr screen,
int width, int height, int depth);
int width, int height, int depth,
unsigned flags);
struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags);
struct sna_pixmap *sna_pixmap_force_to_gpu(PixmapPtr pixmap, unsigned flags);

View File

@ -1640,7 +1640,8 @@ sna_drawable_use_cpu_bo(DrawablePtr drawable,
PixmapPtr
sna_pixmap_create_upload(ScreenPtr screen,
int width, int height, int depth)
int width, int height, int depth,
unsigned flags)
{
struct sna *sna = to_sna_from_screen(screen);
PixmapPtr pixmap;
@ -1690,7 +1691,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
priv->gpu_bo = kgem_create_buffer_2d(&sna->kgem,
width, height, bpp,
KGEM_BUFFER_WRITE,
flags,
&ptr);
if (!priv->gpu_bo) {
free(priv);
@ -2399,7 +2400,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -2529,7 +2530,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -3018,7 +3019,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
tmp = sna_pixmap_create_upload(src->pScreen,
src->width,
src->height,
src->depth);
src->depth,
KGEM_BUFFER_WRITE_INPLACE);
if (tmp == NullPixmap)
return;
@ -3909,7 +3911,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -4029,7 +4031,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -7594,7 +7596,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -7733,7 +7735,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -7873,7 +7875,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;
@ -9386,7 +9388,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
upload = kgem_create_buffer(&sna->kgem,
bstride*bh,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!upload)
break;

View File

@ -730,7 +730,8 @@ glyphs_via_mask(struct sna *sna,
upload:
pixmap = sna_pixmap_create_upload(screen,
width, height,
format->depth);
format->depth,
KGEM_BUFFER_WRITE);
if (!pixmap)
return FALSE;

View File

@ -398,7 +398,7 @@ fallback:
tmp.drawable.width,
tmp.drawable.height,
tmp.drawable.bitsPerPixel,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!src_bo)
goto fallback;
@ -473,7 +473,7 @@ fallback:
}
src_bo = kgem_create_buffer(kgem, offset,
KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0),
KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
&ptr);
if (!src_bo)
break;
@ -633,7 +633,7 @@ fallback:
tmp.drawable.width,
tmp.drawable.height,
tmp.drawable.bitsPerPixel,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!src_bo)
goto fallback;
@ -709,7 +709,7 @@ fallback:
}
src_bo = kgem_create_buffer(kgem, offset,
KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0),
KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
&ptr);
if (!src_bo)
break;
@ -803,7 +803,7 @@ indirect_replace(struct sna *sna,
pixmap->drawable.width,
pixmap->drawable.height,
pixmap->drawable.bitsPerPixel,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!src_bo)
return false;
@ -832,7 +832,7 @@ indirect_replace(struct sna *sna,
src_bo = kgem_create_buffer(kgem,
pitch * pixmap->drawable.height,
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!src_bo)
return false;
@ -907,7 +907,8 @@ struct kgem_bo *sna_replace(struct sna *sna,
pixmap->drawable.bitsPerPixel,
bo->tiling));
if (indirect_replace(sna, pixmap, bo, src, stride))
if ((!bo->map || bo->rq) &&
indirect_replace(sna, pixmap, bo, src, stride))
return bo;
if (kgem_bo_is_busy(bo)) {

View File

@ -1185,7 +1185,7 @@ do_fixup:
channel->bo = kgem_create_buffer_2d(&sna->kgem,
w, h, PIXMAN_FORMAT_BPP(channel->pict_format),
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!channel->bo) {
DBG(("%s: failed to create upload buffer, using clear\n",
@ -1347,7 +1347,7 @@ sna_render_picture_convert(struct sna *sna,
channel->bo = kgem_create_buffer_2d(&sna->kgem,
w, h, PIXMAN_FORMAT_BPP(channel->pict_format),
KGEM_BUFFER_WRITE,
KGEM_BUFFER_WRITE_INPLACE,
&ptr);
if (!channel->bo) {
pixman_image_unref(src);

View File

@ -2003,7 +2003,8 @@ trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
__FUNCTION__, width, height, depth, format));
scratch = sna_pixmap_create_upload(screen,
width, height, depth);
width, height, depth,
KGEM_BUFFER_WRITE);
if (!scratch)
return;
@ -2438,7 +2439,7 @@ composite_unaligned_boxes_fallback(CARD8 op,
scratch = sna_pixmap_create_upload(screen,
extents.x2 - extents.x1,
extents.y2 - extents.y1,
8);
8, KGEM_BUFFER_WRITE);
if (!scratch)
continue;
@ -3018,7 +3019,9 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: mask (%dx%d), dx=(%d, %d)\n",
__FUNCTION__, extents.x2, extents.y2, dx, dy));
scratch = sna_pixmap_create_upload(screen, extents.x2, extents.y2, 8);
scratch = sna_pixmap_create_upload(screen,
extents.x2, extents.y2, 8,
KGEM_BUFFER_WRITE_INPLACE);
if (!scratch)
return true;
@ -3998,7 +4001,7 @@ trap_mask_converter(PicturePtr picture,
scratch = sna_pixmap_create_upload(screen,
extents.x2-extents.x1,
extents.y2-extents.y1,
8);
8, KGEM_BUFFER_WRITE_INPLACE);
if (!scratch)
return true;
@ -4109,7 +4112,9 @@ trap_upload(PicturePtr picture,
DBG(("%s: tmp (%dx%d) depth=%d\n",
__FUNCTION__, width, height, depth));
scratch = sna_pixmap_create_upload(screen, width, height, depth);
scratch = sna_pixmap_create_upload(screen,
width, height, depth,
KGEM_BUFFER_WRITE);
if (!scratch)
return true;
@ -4510,7 +4515,9 @@ triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
DBG(("%s: mask (%dx%d)\n",
__FUNCTION__, extents.x2, extents.y2));
scratch = sna_pixmap_create_upload(screen, extents.x2, extents.y2, 8);
scratch = sna_pixmap_create_upload(screen,
extents.x2, extents.y2, 8,
KGEM_BUFFER_WRITE_INPLACE);
if (!scratch)
return true;
@ -4615,7 +4622,8 @@ triangles_fallback(CARD8 op,
DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
__FUNCTION__, width, height, depth, format));
scratch = sna_pixmap_create_upload(screen,
width, height, depth);
width, height, depth,
KGEM_BUFFER_WRITE);
if (!scratch)
return;
@ -4857,7 +4865,8 @@ tristrip_fallback(CARD8 op,
DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
__FUNCTION__, width, height, depth, format));
scratch = sna_pixmap_create_upload(screen,
width, height, depth);
width, height, depth,
KGEM_BUFFER_WRITE);
if (!scratch)
return;
@ -4991,7 +5000,8 @@ trifan_fallback(CARD8 op,
DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
__FUNCTION__, width, height, depth, format));
scratch = sna_pixmap_create_upload(screen,
width, height, depth);
width, height, depth,
KGEM_BUFFER_WRITE);
if (!scratch)
return;