sna: Use GPU for readback onto CPU bo

Time to blt from GTT to LLC 16384 bytes:	 125.000µs (snb)
Time to blt from GTT to LLC 16384 bytes:	  71.000µs (ivb)
Time to blt from GTT to LLC 1048576 bytes:	1400.000µs (snb)
Time to blt from GTT to LLC 1048576 bytes:	 938.000µs (ivb)

Time to copy from GTT to LLC 16384 bytes:	 118.000µs (snb)
Time to copy from GTT to LLC 16384 bytes:	 134.000µs (ivb)
Time to copy from GTT to LLC 1048576 bytes:	6723.000µs (snb)
Time to copy from GTT to LLC 1048576 bytes:	7424.000µs (ivb)

And conversely,

Time to blt from LLC to GTT 16384 bytes:	 10.000µs (snb)
Time to blt from LLC to GTT 16384 bytes:	  8.000µs (ivb)
Time to blt from LLC to GTT 1048576 bytes:	217.000µs (snb)
Time to blt from LLC to GTT 1048576 bytes:	135.000µs (ivb)

Time to copy from LLC to GTT 16384 bytes:	  4.000µs (snb)
Time to copy from LLC to GTT 16384 bytes:	  4.000µs (ivb)
Time to copy from LLC to GTT 1048576 bytes:	270.000µs (snb)
Time to copy from LLC to GTT 1048576 bytes:	179.500µs (ivb)

It seems clear then that even with the extra synchronisation cost
copying from the GTT is much preferable with the GPU than using the
uncached reads by the CPU. Streaming write-combines from the CPU into
the GTT seem about as efficient as we can manage, so continue to use the
mapping unless busy.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-06-06 00:08:17 +01:00
parent f2513cb0fd
commit 57d7d5de78
1 changed files with 21 additions and 10 deletions

View File

@ -930,8 +930,19 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap)
return priv->gpu_bo && kgem_bo_is_mappable(&sna->kgem, priv->gpu_bo);
}
static bool use_cpu_bo_for_xfer(struct sna_pixmap *priv)
static inline bool use_cpu_bo_for_write(struct sna *sna,
struct sna_pixmap *priv)
{
return priv->cpu_bo != NULL && sna->kgem.gen >= 30;
}
static inline bool use_cpu_bo_for_read(struct sna_pixmap *priv)
{
#if 0
if (pixmap->devPrivate.ptr == NULL)
return TRUE;
#endif
if (priv->cpu_bo == NULL)
return FALSE;
@ -1112,7 +1123,7 @@ skip_inplace_map:
if (n) {
Bool ok = FALSE;
if (sna->kgem.gen >= 30 && use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_write(sna, priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
pixmap, priv->cpu_bo, 0, 0,
@ -1503,7 +1514,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
assert(pixmap_contains_damage(pixmap, priv->gpu_damage));
ok = FALSE;
if (sna->kgem.gen >= 30 && use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_write(sna, priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
pixmap, priv->cpu_bo, 0, 0,
@ -1604,7 +1615,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
if (n) {
Bool ok = FALSE;
if (sna->kgem.gen >= 30 && use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_write(sna, priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
pixmap, priv->cpu_bo, 0, 0,
@ -1626,7 +1637,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
int n = REGION_NUM_RECTS(r);
Bool ok = FALSE;
if (sna->kgem.gen >= 30 && use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_write(sna, priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
pixmap, priv->cpu_bo, 0, 0,
@ -1648,7 +1659,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
int n = REGION_NUM_RECTS(&need);
Bool ok = FALSE;
if (sna->kgem.gen >= 30 && use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_write(sna, priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
pixmap, priv->cpu_bo, 0, 0,
@ -1878,7 +1889,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
if (n) {
Bool ok = FALSE;
if (pixmap->devPrivate.ptr == NULL || use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_read(priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
@ -1916,7 +1927,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
} else if (DAMAGE_IS_ALL(priv->cpu_damage) ||
sna_damage_contains_box__no_reduce(priv->cpu_damage, box)) {
Bool ok = FALSE;
if (pixmap->devPrivate.ptr == NULL || use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_read(priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
@ -1945,7 +1956,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
box = REGION_RECTS(&i);
ok = FALSE;
if (pixmap->devPrivate.ptr == NULL || use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_read(priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,
@ -2441,7 +2452,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
DBG(("%s: uploading %d damage boxes\n", __FUNCTION__, n));
ok = FALSE;
if (pixmap->devPrivate.ptr == NULL || use_cpu_bo_for_xfer(priv))
if (use_cpu_bo_for_read(priv))
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->cpu_bo, 0, 0,
pixmap, priv->gpu_bo, 0, 0,