From 293a867ea55d3004f5be21b1d0ad765a89c28a5a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Jun 2012 23:02:10 +0100 Subject: [PATCH] sna: Specialise the self-copy blitter to handle vertically overlapping copies Signed-off-by: Chris Wilson --- src/sna/blt.c | 243 ++++++++++++++++++++------------------------ src/sna/sna.h | 13 +-- src/sna/sna_accel.c | 7 +- 3 files changed, 118 insertions(+), 145 deletions(-) diff --git a/src/sna/blt.c b/src/sna/blt.c index 99bdece2..c0922b5b 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -217,98 +217,17 @@ memcpy_blt(const void *src, void *dst, int bpp, } void -memmove_blt(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height) -{ - const uint8_t *src_bytes; - uint8_t *dst_bytes; - int byte_width; - - assert(src); - assert(dst); - assert(width && height); - assert(bpp >= 8); - - DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", - __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); - - bpp /= 8; - - src_bytes = (const uint8_t *)src + src_stride * src_y + src_x * bpp; - dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp; - - byte_width = width * bpp; - if (byte_width == src_stride && byte_width == dst_stride) { - byte_width *= height; - height = 1; - } - - switch (byte_width) { - case 1: - do { - *dst_bytes = *src_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - } while (--height); - break; - - case 2: - do { - *(uint16_t *)dst_bytes = *(const uint16_t *)src_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - } while (--height); - break; - - case 4: - do { - *(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - } while (--height); - break; - - case 8: - do { - *(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes; - src_bytes += src_stride; - dst_bytes += dst_stride; - } while (--height); - break; - - default: - if (src_stride == dst_stride) { - if (dst_bytes < src_bytes + byte_width && - src_bytes < dst_bytes + byte_width) { - do { - memmove(dst_bytes, src_bytes, byte_width); - src_bytes += src_stride; - dst_bytes += src_stride; - } while (--height); - } else { - do { - memcpy(dst_bytes, src_bytes, byte_width); - src_bytes += src_stride; - dst_bytes += src_stride; - } while (--height); - } - } else do { - memmove(dst_bytes, src_bytes, byte_width); - src_bytes += src_stride; - dst_bytes += dst_stride; - } while (--height); - break; - } -} - -void -memmove_blt__box(const void *src, void *dst, - int bpp, int32_t stride, - const BoxRec *box) +memmove_box(const void *src, void *dst, + int bpp, int32_t stride, + const BoxRec *box, + int dx, int dy) { + union { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; + } tmp; const uint8_t *src_bytes; uint8_t *dst_bytes; int width, height; @@ -319,8 +238,10 @@ memmove_blt__box(const void *src, void *dst, assert(box->x2 > box->x1); assert(box->y2 > box->y1); - DBG(("%s: box=(%d, %d), (%d, %d), pitch=%d, bpp=%d\n", - __FUNCTION__, box->x1, box->y1, box->x2, box->y2, stride, bpp)); + DBG(("%s: box=(%d, %d), (%d, %d), pitch=%d, bpp=%d, dx=%d, dy=%d\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + stride, bpp, dx, dy)); bpp /= 8; width = box->y1 * stride + box->x1 * bpp; @@ -334,55 +255,111 @@ memmove_blt__box(const void *src, void *dst, height = 1; } - switch (width) { - case 1: - do { - *dst_bytes = *src_bytes; - src_bytes += stride; - dst_bytes += stride; - } while (--height); - break; - - case 2: - do { - *(uint16_t *)dst_bytes = *(const uint16_t *)src_bytes; - src_bytes += stride; - dst_bytes += stride; - } while (--height); - break; - - case 4: - do { - *(uint32_t *)dst_bytes = *(const uint32_t *)src_bytes; - src_bytes += stride; - dst_bytes += stride; - } while (--height); - break; - - case 8: - do { - *(uint64_t *)dst_bytes = *(const uint64_t *)src_bytes; - src_bytes += stride; - dst_bytes += stride; - } while (--height); - break; - - default: - if (dst_bytes < src_bytes + width && - src_bytes < dst_bytes + width) { + if (dy >= 0) { + switch (width) { + case 1: do { - memmove(dst_bytes, src_bytes, width); + *dst_bytes = tmp.u8 = *src_bytes; src_bytes += stride; dst_bytes += stride; } while (--height); - } else { + break; + + case 2: do { - memcpy(dst_bytes, src_bytes, width); + *(uint16_t *)dst_bytes = tmp.u16 = *(const uint16_t *)src_bytes; src_bytes += stride; dst_bytes += stride; } while (--height); + break; + + case 4: + do { + *(uint32_t *)dst_bytes = tmp.u32 = *(const uint32_t *)src_bytes; + src_bytes += stride; + dst_bytes += stride; + } while (--height); + break; + + case 8: + do { + *(uint64_t *)dst_bytes = tmp.u64 = *(const uint64_t *)src_bytes; + src_bytes += stride; + dst_bytes += stride; + } while (--height); + break; + + default: + if (dst_bytes < src_bytes + width && + src_bytes < dst_bytes + width) { + do { + memmove(dst_bytes, src_bytes, width); + src_bytes += stride; + dst_bytes += stride; + } while (--height); + } else { + do { + memcpy(dst_bytes, src_bytes, width); + src_bytes += stride; + dst_bytes += stride; + } while (--height); + } + break; + } + } else { + src_bytes += (height-1) * stride; + dst_bytes += (height-1) * stride; + + switch (width) { + case 1: + do { + *dst_bytes = tmp.u8 = *src_bytes; + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + break; + + case 2: + do { + *(uint16_t *)dst_bytes = tmp.u16 = *(const uint16_t *)src_bytes; + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + break; + + case 4: + do { + *(uint32_t *)dst_bytes = tmp.u32 = *(const uint32_t *)src_bytes; + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + break; + + case 8: + do { + *(uint64_t *)dst_bytes = tmp.u64 = *(const uint64_t *)src_bytes; + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + break; + + default: + if (dst_bytes < src_bytes + width && + src_bytes < dst_bytes + width) { + do { + memmove(dst_bytes, src_bytes, width); + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + } else { + do { + memcpy(dst_bytes, src_bytes, width); + src_bytes -= stride; + dst_bytes -= stride; + } while (--height); + } + break; } - break; } } diff --git a/src/sna/sna.h b/src/sna/sna.h index a426e695..ee8273c8 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -673,15 +673,10 @@ memcpy_blt(const void *src, void *dst, int bpp, int16_t dst_x, int16_t dst_y, uint16_t width, uint16_t height); void -memmove_blt(const void *src, void *dst, int bpp, - int32_t src_stride, int32_t dst_stride, - int16_t src_x, int16_t src_y, - int16_t dst_x, int16_t dst_y, - uint16_t width, uint16_t height); -void -memmove_blt__box(const void *src, void *dst, - int bpp, int32_t stride, - const BoxRec *box); +memmove_box(const void *src, void *dst, + int bpp, int32_t stride, + const BoxRec *box, + int dx, int dy); void memcpy_xor(const void *src, void *dst, int bpp, diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index a44cdb15..982f5623 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -3489,7 +3489,7 @@ fallback: stride = pixmap->devKind; bpp = pixmap->drawable.bitsPerPixel; - if (alu == GXcopy && !reverse && !upsidedown && bpp >= 8) { + if (alu == GXcopy && bpp >= 8) { dst_bits = (FbBits *) ((char *)pixmap->devPrivate.ptr + ty * stride + tx * bpp / 8); @@ -3498,8 +3498,9 @@ fallback: dy * stride + dx * bpp / 8); do { - memmove_blt__box(src_bits, dst_bits, - bpp, stride, box); + memmove_box(src_bits, dst_bits, + bpp, stride, box, + dx, dy); box++; } while (--n); } else {