sna: Don't unroll BLT points

The compiler is smarter than I am; unrolling hurts here.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2015-04-01 23:00:57 +01:00
parent 7df58456b8
commit e47eb0c5e5
2 changed files with 19 additions and 89 deletions

View File

@ -4593,7 +4593,7 @@ static inline bool box32_trim_and_translate(Box32Rec *box, DrawablePtr d, GCPtr
return box32_clip(box, gc);
}
static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y)
static inline void box_add_xy(BoxPtr box, int16_t x, int16_t y)
{
if (box->x1 > x)
box->x1 = x;
@ -4606,6 +4606,11 @@ static inline void box_add_pt(BoxPtr box, int16_t x, int16_t y)
box->y2 = y;
}
static inline void box_add_pt(BoxPtr box, const DDXPointRec *pt)
{
box_add_xy(box, pt->x, pt->y);
}
static inline bool box32_to_box16(const Box32Rec *b32, BoxRec *b16)
{
b16->x1 = b32->x1;
@ -8968,36 +8973,11 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc,
last.x += pt->x;
last.y += pt->y;
pt++;
box_add_pt(&box, last.x, last.y);
box_add_xy(&box, last.x, last.y);
}
} else {
--n; ++pt;
while (n >= 8) {
box_add_pt(&box, pt[0].x, pt[0].y);
box_add_pt(&box, pt[1].x, pt[1].y);
box_add_pt(&box, pt[2].x, pt[2].y);
box_add_pt(&box, pt[3].x, pt[3].y);
box_add_pt(&box, pt[4].x, pt[4].y);
box_add_pt(&box, pt[5].x, pt[5].y);
box_add_pt(&box, pt[6].x, pt[6].y);
box_add_pt(&box, pt[7].x, pt[7].y);
pt += 8;
n -= 8;
}
if (n & 4) {
box_add_pt(&box, pt[0].x, pt[0].y);
box_add_pt(&box, pt[1].x, pt[1].y);
box_add_pt(&box, pt[2].x, pt[2].y);
box_add_pt(&box, pt[3].x, pt[3].y);
pt += 4;
}
if (n & 2) {
box_add_pt(&box, pt[0].x, pt[0].y);
box_add_pt(&box, pt[1].x, pt[1].y);
pt += 2;
}
if (n & 1)
box_add_pt(&box, pt[0].x, pt[0].y);
while (--n)
box_add_pt(&box, ++pt);
}
box.x2++;
box.y2++;
@ -9709,7 +9689,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
y += pt->y;
if (blt)
blt &= pt->x == 0 || pt->y == 0;
box_add_pt(&box, x, y);
box_add_xy(&box, x, y);
}
} else {
int x = box.x1;
@ -9721,7 +9701,7 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
x = pt->x;
y = pt->y;
}
box_add_pt(&box, pt->x, pt->y);
box_add_pt(&box, pt);
}
}
box.x2++;

View File

@ -3185,65 +3185,15 @@ fastcall static void sna_blt_fill_op_points(struct sna *sna,
assert(kgem->nbatch < kgem->surface);
if ((dx|dy) == 0) {
while (n_this_time >= 8) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
*((uint64_t *)b + 4) = pt_add(cmd, p+4, 0, 0);
*((uint64_t *)b + 5) = pt_add(cmd, p+5, 0, 0);
*((uint64_t *)b + 6) = pt_add(cmd, p+6, 0, 0);
*((uint64_t *)b + 7) = pt_add(cmd, p+7, 0, 0);
b += 16;
n_this_time -= 8;
p += 8;
}
if (n_this_time & 4) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
*((uint64_t *)b + 2) = pt_add(cmd, p+2, 0, 0);
*((uint64_t *)b + 3) = pt_add(cmd, p+3, 0, 0);
b += 8;
p += 4;
}
if (n_this_time & 2) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, 0, 0);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, 0, 0);
b += 4;
p += 2;
}
if (n_this_time & 1)
*((uint64_t *)b + 0) = pt_add(cmd, p++, 0, 0);
do {
*(uint64_t *)b = pt_add(cmd, p++, 0, 0);
b += 2;
} while (--n_this_time);
} else {
while (n_this_time >= 8) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
*((uint64_t *)b + 4) = pt_add(cmd, p+4, dx, dy);
*((uint64_t *)b + 5) = pt_add(cmd, p+5, dx, dy);
*((uint64_t *)b + 6) = pt_add(cmd, p+6, dx, dy);
*((uint64_t *)b + 7) = pt_add(cmd, p+7, dx, dy);
b += 16;
n_this_time -= 8;
p += 8;
}
if (n_this_time & 4) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
*((uint64_t *)b + 2) = pt_add(cmd, p+2, dx, dy);
*((uint64_t *)b + 3) = pt_add(cmd, p+3, dx, dy);
b += 8;
p += 8;
}
if (n_this_time & 2) {
*((uint64_t *)b + 0) = pt_add(cmd, p+0, dx, dy);
*((uint64_t *)b + 1) = pt_add(cmd, p+1, dx, dy);
b += 4;
p += 2;
}
if (n_this_time & 1)
*((uint64_t *)b + 0) = pt_add(cmd, p++, dx, dy);
do {
*(uint64_t *)b = pt_add(cmd, p++, dx, dy);
b += 2;
} while (--n_this_time);
}
if (!n)