uxa: Reuse in-flight bo

When we need to allocate a new bo for use as a gpu target, first check
if we can reuse a pixmap that has already been relocated into the
aperture as a temporary target, for instance a glyph mask or a clip mask.

Before:
backend                      test   min(s) median(s) stddev.
xlib         firefox-planet-gnome   50.568   50.873   0.30%
 xcb         firefox-planet-gnome   49.686   53.003   3.92%
xlib                    evolution   40.115   40.131   0.86%
 xcb                    evolution   28.241   28.285   0.18%

After:
backend                      test   min(s) median(s) stddev.
xlib         firefox-planet-gnome   47.759   48.233   0.80%
 xcb         firefox-planet-gnome   48.611   48.657   0.87%
xlib                    evolution   38.954   38.991   0.05%
 xcb                    evolution   26.561   26.654   0.19%

And even more dramatic improvements when using a font size larger than
the maximum size of the glyph cache:
 xcb firefox-36-20090611:  1.79x speedup
xlib firefox-36-20090611:  1.74x speedup
 xcb firefox-36-20090609:  1.62x speedup
xlib firefox-36-20090609:  1.59x speedup

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2010-03-31 11:50:27 +01:00
parent 96aa7a236a
commit c374c94e41
4 changed files with 81 additions and 31 deletions

View File

@ -136,14 +136,22 @@ list_is_empty(struct list *head)
#define list_first_entry(ptr, type, member) \ #define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member) list_entry((ptr)->next, type, member)
#define list_foreach(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)
#define list_foreach_entry(pos, type, head, member) \
for (pos = list_entry((head)->next, type, member);\
&pos->member != (head); \
pos = list_entry(pos->member.next, type, member))
struct intel_pixmap { struct intel_pixmap {
dri_bo *bo; dri_bo *bo;
uint32_t tiling; uint32_t tiling, stride;
uint32_t flush_write_domain; uint32_t flush_write_domain;
uint32_t flush_read_domains; uint32_t flush_read_domains;
uint32_t batch_write_domain; uint32_t batch_write_domain;
uint32_t batch_read_domains; uint32_t batch_read_domains;
struct list flush, batch; struct list flush, batch, in_flight;
}; };
struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap); struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap);
@ -252,6 +260,7 @@ typedef struct intel_screen_private {
int batch_atomic_limit; int batch_atomic_limit;
struct list batch_pixmaps; struct list batch_pixmaps;
struct list flush_pixmaps; struct list flush_pixmaps;
struct list in_flight;
/* For Xvideo */ /* For Xvideo */
Bool use_drmmode_overlay; Bool use_drmmode_overlay;

View File

@ -195,6 +195,18 @@ void intel_batch_submit(ScrnInfoPtr scrn)
list_del(&entry->flush); list_del(&entry->flush);
} }
while (!list_is_empty(&intel->in_flight)) {
struct intel_pixmap *entry;
entry = list_first_entry(&intel->in_flight,
struct intel_pixmap,
in_flight);
dri_bo_unreference(entry->bo);
list_del(&entry->in_flight);
xfree(entry);
}
/* Save a ref to the last batch emitted, which we use for syncing /* Save a ref to the last batch emitted, which we use for syncing
* in debug code. * in debug code.
*/ */

View File

@ -1054,6 +1054,7 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
list_init(&intel->batch_pixmaps); list_init(&intel->batch_pixmaps);
list_init(&intel->flush_pixmaps); list_init(&intel->flush_pixmaps);
list_init(&intel->in_flight);
} }
Bool i830_crtc_on(xf86CrtcPtr crtc) Bool i830_crtc_on(xf86CrtcPtr crtc)

View File

@ -127,7 +127,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
{ {
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum]; ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn); intel_screen_private *intel = intel_get_screen_private(scrn);
int pitch_align; int pitch, pitch_align;
int size; int size;
if (*tiling != I915_TILING_NONE) { if (*tiling != I915_TILING_NONE) {
@ -151,6 +151,9 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
} }
} }
pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
if (pitch <= 256)
*tiling = I915_TILING_NONE;
repeat: repeat:
if (*tiling == I915_TILING_NONE) { if (*tiling == I915_TILING_NONE) {
pitch_align = intel->accel_pixmap_pitch_alignment; pitch_align = intel->accel_pixmap_pitch_alignment;
@ -158,8 +161,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
pitch_align = 512; pitch_align = 512;
} }
*stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8, *stride = ROUND_TO(pitch, pitch_align);
pitch_align);
if (*tiling == I915_TILING_NONE) { if (*tiling == I915_TILING_NONE) {
/* Round the height up so that the GPU's access to a 2x2 aligned /* Round the height up so that the GPU's access to a 2x2 aligned
@ -548,17 +550,19 @@ dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo) void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
{ {
ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
intel_screen_private *intel = intel_get_screen_private(scrn);
struct intel_pixmap *priv; struct intel_pixmap *priv;
priv = i830_get_pixmap_intel(pixmap); priv = i830_get_pixmap_intel(pixmap);
if (priv != NULL) { if (priv != NULL) {
dri_bo_unreference(priv->bo); if (list_is_empty(&priv->batch)) {
dri_bo_unreference(priv->bo);
priv->flush_read_domains = priv->flush_write_domain = 0; } else {
priv->batch_read_domains = priv->batch_write_domain = 0; list_add(&priv->in_flight, &intel->in_flight);
list_del(&priv->batch); priv = NULL;
list_del(&priv->flush); }
} }
if (bo != NULL) { if (bo != NULL) {
@ -576,6 +580,7 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
dri_bo_reference(bo); dri_bo_reference(bo);
priv->bo = bo; priv->bo = bo;
priv->stride = i830_pixmap_pitch(pixmap);
ret = drm_intel_bo_get_tiling(bo, ret = drm_intel_bo_get_tiling(bo,
&priv->tiling, &priv->tiling,
@ -883,35 +888,26 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
if (w && h) { if (w && h) {
struct intel_pixmap *priv; struct intel_pixmap *priv;
unsigned int size; unsigned int size, tiling;
int stride; int stride;
priv = xcalloc(1, sizeof (struct intel_pixmap));
if (priv == NULL) {
fbDestroyPixmap(pixmap);
return NullPixmap;
}
/* Always attempt to tile, compute_size() will remove the /* Always attempt to tile, compute_size() will remove the
* tiling for pixmaps that are either too large or too small * tiling for pixmaps that are either too large or too small
* to be effectively tiled. * to be effectively tiled.
*/ */
priv->tiling = I915_TILING_X; tiling = I915_TILING_X;
if (usage == INTEL_CREATE_PIXMAP_TILING_Y) if (usage == INTEL_CREATE_PIXMAP_TILING_Y)
priv->tiling = I915_TILING_Y; tiling = I915_TILING_Y;
if (usage == UXA_CREATE_PIXMAP_FOR_MAP) if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
priv->tiling = I915_TILING_NONE; tiling = I915_TILING_NONE;
if (priv->tiling != I915_TILING_NONE) { if (tiling != I915_TILING_NONE) {
if (w < 256) if (h <= 4)
priv->tiling = I915_TILING_NONE; tiling = I915_TILING_NONE;
if (h < 8) if (h <= 16 && tiling == I915_TILING_Y)
priv->tiling = I915_TILING_NONE; tiling = I915_TILING_X;
if (h < 32 && priv->tiling == I915_TILING_Y)
priv->tiling = I915_TILING_X;
} }
size = i830_uxa_pixmap_compute_size(pixmap, w, h, size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride);
&priv->tiling, &stride);
/* Fail very large allocations on 32-bit systems. Large BOs will /* Fail very large allocations on 32-bit systems. Large BOs will
* tend to hit SW fallbacks frequently, and also will tend to fail * tend to hit SW fallbacks frequently, and also will tend to fail
@ -923,7 +919,37 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
*/ */
if (sizeof(unsigned long) == 4 && if (sizeof(unsigned long) == 4 &&
size > (unsigned int)(1024 * 1024 * 1024)) { size > (unsigned int)(1024 * 1024 * 1024)) {
xfree(priv); fbDestroyPixmap(pixmap);
return NullPixmap;
}
/* Perform a premilinary search for an in-flight bo */
if (usage != UXA_CREATE_PIXMAP_FOR_MAP) {
int aligned_h;
if (tiling == I915_TILING_X)
aligned_h = ALIGN(h, 8);
else if (tiling == I915_TILING_Y)
aligned_h = ALIGN(h, 32);
else
aligned_h = ALIGN(h, 2);
list_foreach_entry(priv, struct intel_pixmap,
&intel->in_flight,
in_flight) {
if (priv->tiling == tiling &&
priv->stride >= stride &&
priv->bo->size >= priv->stride * aligned_h) {
list_del(&priv->in_flight);
screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, priv->stride, NULL);
i830_uxa_set_pixmap_intel(pixmap, priv);
return pixmap;
}
}
}
priv = xcalloc(1, sizeof (struct intel_pixmap));
if (priv == NULL) {
fbDestroyPixmap(pixmap); fbDestroyPixmap(pixmap);
return NullPixmap; return NullPixmap;
} }
@ -941,6 +967,8 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
return NullPixmap; return NullPixmap;
} }
priv->stride = stride;
priv->tiling = tiling;
if (priv->tiling != I915_TILING_NONE) if (priv->tiling != I915_TILING_NONE)
drm_intel_bo_set_tiling(priv->bo, drm_intel_bo_set_tiling(priv->bo,
&priv->tiling, &priv->tiling,