From b615ce97ec43ea8fe02e995244c757138abcb2de Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 27 Jun 2013 10:45:22 +0100 Subject: [PATCH] sna: Add a fast path for reading back from tiled X bo This is lower latency than the double copy incurred for first moving the bo to the CPU and then copying it back - but due to the less efficient tiled memcpy, it has lower throughput. So x11perf -shmget500 suffers (by about 30%) but real world applications improve by about 2x. Signed-off-by: Chris Wilson --- src/sna/sna_accel.c | 62 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 3783933c..46e383d1 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -4028,7 +4028,7 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region, if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) return false; - dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); if (dst == NULL) return false; @@ -4048,7 +4048,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region, box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); - __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { if (replaces) { @@ -14345,6 +14344,62 @@ sna_get_image_blt(DrawablePtr drawable, return ok; } +static bool +sna_get_image_tiled(DrawablePtr drawable, + RegionPtr region, + char *dst, + unsigned flags) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + struct sna *sna = to_sna_from_pixmap(pixmap); + char *src; + + if (!sna->kgem.memcpy_from_tiled_x) + return false; + + if (flags & MOVE_INPLACE_HINT) + return false; + + if (priv == NULL || priv->gpu_bo == NULL) + return false; + + if (priv->gpu_bo->tiling != I915_TILING_X) + return false; + + if (priv->gpu_bo->scanout) + return false; + + if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU) + return false; + + if (priv->gpu_damage == NULL || + !(DAMAGE_IS_ALL(priv->gpu_damage) || + sna_damage_contains_box__no_reduce(priv->gpu_damage, + ®ion->extents))) + return false; + + src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (src == NULL) + return false; + + DBG(("%s: download through a tiled CPU map\n", __FUNCTION__)); + + kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); + + memcpy_from_tiled_x(&sna->kgem, src, dst, + pixmap->drawable.bitsPerPixel, + priv->gpu_bo->pitch, + PixmapBytePad(region->extents.x2 - region->extents.x1, + drawable->depth), + region->extents.x1, region->extents.y1, + 0, 0, + region->extents.x2 - region->extents.x1, + region->extents.y2 - region->extents.y1); + + return true; +} + static void sna_get_image(DrawablePtr drawable, int x, int y, int w, int h, @@ -14379,6 +14434,9 @@ sna_get_image(DrawablePtr drawable, if (can_blt && sna_get_image_blt(drawable, ®ion, dst, flags)) return; + if (can_blt && sna_get_image_tiled(drawable, ®ion, dst, flags)) + return; + if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) return;