diff --git a/src/sna/compiler.h b/src/sna/compiler.h index 407d3b54..3dd8a6d9 100644 --- a/src/sna/compiler.h +++ b/src/sna/compiler.h @@ -54,8 +54,8 @@ #if defined(__GNUC__) && (__GNUC__ >= 4) /* 4.4 */ #define sse2 __attribute__((target("sse2,fpmath=sse+387"))) -#define sse4_2 __attribute__((target("sse4.2,sse2"))) -#define avx2 __attribute__((target("avx2,sse4.2,sse2"))) +#define sse4_2 __attribute__((target("sse4.2,sse2,fpmath=sse+387"))) +#define avx2 __attribute__((target("avx2,sse4.2,sse2,fpmath=sse+387"))) #else #define sse2 #define sse4_2 diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c index 65624b79..885b87eb 100644 --- a/src/sna/gen4_vertex.c +++ b/src/sna/gen4_vertex.c @@ -414,66 +414,6 @@ emit_primitive_linear(struct sna *sna, v[5] = compute_linear(&op->src, r->src.x, r->src.y); } -sse4_2 fastcall static void -emit_primitive_linear__sse4_2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - assert(op->floats_per_rect == 6); - assert((sna->render.vertex_used % 2) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[2] = dst.f; - dst.p.y = r->dst.y; - v[4] = dst.f; - - v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); - v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); - v[5] = compute_linear(&op->src, r->src.x, r->src.y); -} - -avx2 fastcall static void -emit_primitive_linear__avx2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - assert(op->floats_per_rect == 6); - assert((sna->render.vertex_used % 2) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 6; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[2] = dst.f; - dst.p.y = r->dst.y; - v[4] = dst.f; - - v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); - v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); - v[5] = compute_linear(&op->src, r->src.x, r->src.y); -} - sse2 fastcall static void emit_boxes_linear(const struct sna_composite_op *op, const BoxRec *box, int nbox, @@ -502,62 +442,6 @@ emit_boxes_linear(const struct sna_composite_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_boxes_linear__sse4_2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - union { - struct sna_coordinate p; - float f; - } dst; - - do { - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[2] = dst.f; - dst.p.y = box->y1; - v[4] = dst.f; - - v[1] = compute_linear(&op->src, box->x2, box->y2); - v[3] = compute_linear(&op->src, box->x1, box->y2); - v[5] = compute_linear(&op->src, box->x1, box->y1); - - v += 6; - box++; - } while (--nbox); -} - -avx2 fastcall static void -emit_boxes_linear__avx2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - union { - struct sna_coordinate p; - float f; - } dst; - - do { - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[2] = dst.f; - dst.p.y = box->y1; - v[4] = dst.f; - - v[1] = compute_linear(&op->src, box->x2, box->y2); - v[3] = compute_linear(&op->src, box->x1, box->y2); - v[5] = compute_linear(&op->src, box->x1, box->y1); - - v += 6; - box++; - } while (--nbox); -} - sse2 fastcall static void emit_primitive_identity_source(struct sna *sna, const struct sna_composite_op *op, @@ -589,68 +473,6 @@ emit_primitive_identity_source(struct sna *sna, v[5] = v[2] = v[8] + r->height * op->src.scale[1]; } -sse4_2 fastcall static void -emit_primitive_identity_source__sse4_2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; - v[1] = v[4] + r->width * op->src.scale[0]; - - v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; - v[5] = v[2] = v[8] + r->height * op->src.scale[1]; -} - -avx2 fastcall static void -emit_primitive_identity_source__avx2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - dst.p.x = r->dst.x; - v[3] = dst.f; - dst.p.y = r->dst.y; - v[6] = dst.f; - - v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; - v[1] = v[4] + r->width * op->src.scale[0]; - - v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; - v[5] = v[2] = v[8] + r->height * op->src.scale[1]; -} - sse2 fastcall static void emit_boxes_identity_source(const struct sna_composite_op *op, const BoxRec *box, int nbox, @@ -681,66 +503,6 @@ emit_boxes_identity_source(const struct sna_composite_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[3] = dst.f; - dst.p.y = box->y1; - v[6] = dst.f; - - v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; - v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; - - v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; - v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; - - v += 9; - box++; - } while (--nbox); -} - -avx2 fastcall static void -emit_boxes_identity_source__avx2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[3] = dst.f; - dst.p.y = box->y1; - v[6] = dst.f; - - v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; - v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; - - v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; - v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; - - v += 9; - box++; - } while (--nbox); -} - sse2 fastcall static void emit_primitive_simple_source(struct sna *sna, const struct sna_composite_op *op, @@ -781,86 +543,6 @@ emit_primitive_simple_source(struct sna *sna, v[8] = ((r->src.y + ty) * yy + y0) * sy; } -sse4_2 fastcall static void -emit_primitive_simple_source__sse4_2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - assert(op->floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*3; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; - v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[8] = ((r->src.y + ty) * yy + y0) * sy; -} - -avx2 fastcall static void -emit_primitive_simple_source__avx2(struct sna *sna, - const struct sna_composite_op *op, - const struct sna_composite_rectangles *r) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - assert(op->floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*3; - - dst.p.x = r->dst.x + r->width; - dst.p.y = r->dst.y + r->height; - v[0] = dst.f; - v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; - v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; - - dst.p.x = r->dst.x; - v[3] = dst.f; - v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; - - dst.p.y = r->dst.y; - v[6] = dst.f; - v[8] = ((r->src.y + ty) * yy + y0) * sy; -} - sse2 fastcall static void emit_boxes_simple_source(const struct sna_composite_op *op, const BoxRec *box, int nbox, @@ -900,84 +582,6 @@ emit_boxes_simple_source(const struct sna_composite_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[3] = dst.f; - v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[6] = dst.f; - v[8] = ((box->y1 + ty) * yy + y0) * sy; - - v += 9; - box++; - } while (--nbox); -} - -avx2 fastcall static void -emit_boxes_simple_source__avx2(const struct sna_composite_op *op, - const BoxRec *box, int nbox, - float *v) -{ - float xx = op->src.transform->matrix[0][0]; - float x0 = op->src.transform->matrix[0][2]; - float yy = op->src.transform->matrix[1][1]; - float y0 = op->src.transform->matrix[1][2]; - float sx = op->src.scale[0]; - float sy = op->src.scale[1]; - int16_t tx = op->src.offset[0]; - int16_t ty = op->src.offset[1]; - - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[3] = dst.f; - v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[6] = dst.f; - v[8] = ((box->y1 + ty) * yy + y0) * sy; - - v += 9; - box++; - } while (--nbox); -} - sse2 fastcall static void emit_primitive_affine_source(struct sna *sna, const struct sna_composite_op *op, @@ -1376,6 +980,405 @@ emit_composite_texcoord_affine(struct sna *sna, OUT_VERTEX_F(t[1] * channel->scale[1]); } +/* SSE4_2 */ + +sse4_2 fastcall static void +emit_primitive_linear__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +sse4_2 fastcall static void +emit_boxes_linear__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_identity_source__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +sse4_2 fastcall static void +emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_primitive_simple_source__sse4_2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +sse4_2 fastcall static void +emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} + +/* AVX2 */ + +avx2 fastcall static void +emit_primitive_linear__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + assert(op->floats_per_rect == 6); + assert((sna->render.vertex_used % 2) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 6; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[2] = dst.f; + dst.p.y = r->dst.y; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height); + v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height); + v[5] = compute_linear(&op->src, r->src.x, r->src.y); +} + +avx2 fastcall static void +emit_boxes_linear__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + union { + struct sna_coordinate p; + float f; + } dst; + + do { + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[2] = dst.f; + dst.p.y = box->y1; + v[4] = dst.f; + + v[1] = compute_linear(&op->src, box->x2, box->y2); + v[3] = compute_linear(&op->src, box->x1, box->y2); + v[5] = compute_linear(&op->src, box->x1, box->y1); + + v += 6; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_identity_source__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +avx2 fastcall static void +emit_boxes_identity_source__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0]; + v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0]; + + v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1]; + v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1]; + + v += 9; + box++; + } while (--nbox); +} + +avx2 fastcall static void +emit_primitive_simple_source__avx2(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + assert(op->floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*3; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx; + v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[8] = ((r->src.y + ty) * yy + y0) * sy; +} + +avx2 fastcall static void +emit_boxes_simple_source__avx2(const struct sna_composite_op *op, + const BoxRec *box, int nbox, + float *v) +{ + float xx = op->src.transform->matrix[0][0]; + float x0 = op->src.transform->matrix[0][2]; + float yy = op->src.transform->matrix[1][1]; + float y0 = op->src.transform->matrix[1][2]; + float sx = op->src.scale[0]; + float sy = op->src.scale[1]; + int16_t tx = op->src.offset[0]; + int16_t ty = op->src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[3] = dst.f; + v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[6] = dst.f; + v[8] = ((box->y1 + ty) * yy + y0) * sy; + + v += 9; + box++; + } while (--nbox); +} unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op *tmp) { @@ -1642,86 +1645,6 @@ emit_span_identity(struct sna *sna, v[11] = v[7] = v[3] = opacity; } -sse4_2 fastcall static void -emit_span_identity__sse4_2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = (box->x2 + tx) * sx; - v[6] = v[2] = (box->y2 + ty) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = (box->x1 + tx) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = (box->y1 + ty) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -avx2 fastcall static void -emit_span_identity__avx2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = (box->x2 + tx) * sx; - v[6] = v[2] = (box->y2 + ty) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = (box->x1 + tx) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = (box->y1 + ty) * sy; - - v[11] = v[7] = v[3] = opacity; -} - sse2 fastcall static void emit_span_boxes_identity(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -1759,80 +1682,6 @@ emit_span_boxes_identity(const struct sna_composite_spans_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_span_boxes_identity__sse4_2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - v[1] = (b->box.x2 + tx) * sx; - v[6] = v[2] = (b->box.y2 + ty) * sy; - - dst.p.x = b->box.x1; - v[4] = dst.f; - v[9] = v[5] = (b->box.x1 + tx) * sx; - - dst.p.y = b->box.y1; - v[8] = dst.f; - v[10] = (b->box.y1 + ty) * sy; - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - -avx2 fastcall static void -emit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - v[1] = (b->box.x2 + tx) * sx; - v[6] = v[2] = (b->box.y2 + ty) * sy; - - dst.p.x = b->box.x1; - v[4] = dst.f; - v[9] = v[5] = (b->box.x1 + tx) * sx; - - dst.p.y = b->box.y1; - v[8] = dst.f; - v[10] = (b->box.y1 + ty) * sy; - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - sse2 fastcall static void emit_span_simple(struct sna *sna, const struct sna_composite_spans_op *op, @@ -1877,94 +1726,6 @@ emit_span_simple(struct sna *sna, v[11] = v[7] = v[3] = opacity; } -sse4_2 fastcall static void -emit_span_simple__sse4_2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = ((box->y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = opacity; -} - -avx2 fastcall static void -emit_span_simple__avx2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - float *v; - union { - struct sna_coordinate p; - float f; - } dst; - - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*4; - assert(sna->render.vertex_used <= sna->render.vertex_size); - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - - dst.p.x = box->x1; - v[4] = dst.f; - v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; - - dst.p.y = box->y1; - v[8] = dst.f; - v[10] = ((box->y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = opacity; -} - sse2 fastcall static void emit_span_boxes_simple(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -2006,88 +1767,6 @@ emit_span_boxes_simple(const struct sna_composite_spans_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_span_boxes_simple__sse4_2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - v[1] = ((b->box.x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; - - dst.p.x = b->box.x1; - v[4] = dst.f; - v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; - - dst.p.y = b->box.y1; - v[8] = dst.f; - v[10] = ((b->box.y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - -avx2 fastcall static void -emit_span_boxes_simple__avx2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - float xx = op->base.src.transform->matrix[0][0]; - float x0 = op->base.src.transform->matrix[0][2]; - float yy = op->base.src.transform->matrix[1][1]; - float y0 = op->base.src.transform->matrix[1][2]; - float sx = op->base.src.scale[0]; - float sy = op->base.src.scale[1]; - int16_t tx = op->base.src.offset[0]; - int16_t ty = op->base.src.offset[1]; - - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - v[1] = ((b->box.x2 + tx) * xx + x0) * sx; - v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; - - dst.p.x = b->box.x1; - v[4] = dst.f; - v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; - - dst.p.y = b->box.y1; - v[8] = dst.f; - v[10] = ((b->box.y1 + ty) * yy + y0) * sy; - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - sse2 fastcall static void emit_span_affine(struct sna *sna, const struct sna_composite_spans_op *op, @@ -2133,96 +1812,6 @@ emit_span_affine(struct sna *sna, v[11] = v[7] = v[3] = opacity; } -sse4_2 fastcall static void -emit_span_affine__sse4_2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, - op->base.src.offset[1] + box->y2, - op->base.src.transform, - op->base.src.scale, - &v[1], &v[2]); - - dst.p.x = box->x1; - v[4] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, - op->base.src.offset[1] + box->y2, - op->base.src.transform, - op->base.src.scale, - &v[5], &v[6]); - - dst.p.y = box->y1; - v[8] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, - op->base.src.offset[1] + box->y1, - op->base.src.transform, - op->base.src.scale, - &v[9], &v[10]); - - v[11] = v[7] = v[3] = opacity; -} - -avx2 fastcall static void -emit_span_affine__avx2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->base.floats_per_rect == 12); - assert((sna->render.vertex_used % 4) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 12; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, - op->base.src.offset[1] + box->y2, - op->base.src.transform, - op->base.src.scale, - &v[1], &v[2]); - - dst.p.x = box->x1; - v[4] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, - op->base.src.offset[1] + box->y2, - op->base.src.transform, - op->base.src.scale, - &v[5], &v[6]); - - dst.p.y = box->y1; - v[8] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, - op->base.src.offset[1] + box->y1, - op->base.src.transform, - op->base.src.scale, - &v[9], &v[10]); - - v[11] = v[7] = v[3] = opacity; -} - sse2 fastcall static void emit_span_boxes_affine(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -2266,92 +1855,6 @@ emit_span_boxes_affine(const struct sna_composite_spans_op *op, } while (--nbox); } -sse4_2 fastcall static void -emit_span_boxes_affine__sse4_2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, - op->base.src.offset[1] + b->box.y2, - op->base.src.transform, - op->base.src.scale, - &v[1], &v[2]); - - dst.p.x = b->box.x1; - v[4] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, - op->base.src.offset[1] + b->box.y2, - op->base.src.transform, - op->base.src.scale, - &v[5], &v[6]); - - dst.p.y = b->box.y1; - v[8] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, - op->base.src.offset[1] + b->box.y1, - op->base.src.transform, - op->base.src.scale, - &v[9], &v[10]); - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - -avx2 fastcall static void -emit_span_boxes_affine__avx2(const struct sna_composite_spans_op *op, - const struct sna_opacity_box *b, int nbox, - float *v) -{ - do { - union { - struct sna_coordinate p; - float f; - } dst; - - dst.p.x = b->box.x2; - dst.p.y = b->box.y2; - v[0] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, - op->base.src.offset[1] + b->box.y2, - op->base.src.transform, - op->base.src.scale, - &v[1], &v[2]); - - dst.p.x = b->box.x1; - v[4] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, - op->base.src.offset[1] + b->box.y2, - op->base.src.transform, - op->base.src.scale, - &v[5], &v[6]); - - dst.p.y = b->box.y1; - v[8] = dst.f; - _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, - op->base.src.offset[1] + b->box.y1, - op->base.src.transform, - op->base.src.scale, - &v[9], &v[10]); - - v[11] = v[7] = v[3] = b->alpha; - - v += 12; - b++; - } while (--nbox); -} - sse2 fastcall static void emit_span_linear(struct sna *sna, const struct sna_composite_spans_op *op, @@ -2384,70 +1887,6 @@ emit_span_linear(struct sna *sna, v[8] = v[5] = v[2] = opacity; } -sse4_2 fastcall static void -emit_span_linear__sse4_2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->base.floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[3] = dst.f; - dst.p.y = box->y1; - v[6] = dst.f; - - v[1] = compute_linear(&op->base.src, box->x2, box->y2); - v[4] = compute_linear(&op->base.src, box->x1, box->y2); - v[7] = compute_linear(&op->base.src, box->x1, box->y1); - - v[8] = v[5] = v[2] = opacity; -} - -avx2 fastcall static void -emit_span_linear__avx2(struct sna *sna, - const struct sna_composite_spans_op *op, - const BoxRec *box, - float opacity) -{ - union { - struct sna_coordinate p; - float f; - } dst; - float *v; - - assert(op->base.floats_per_rect == 9); - assert((sna->render.vertex_used % 3) == 0); - v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 9; - - dst.p.x = box->x2; - dst.p.y = box->y2; - v[0] = dst.f; - dst.p.x = box->x1; - v[3] = dst.f; - dst.p.y = box->y1; - v[6] = dst.f; - - v[1] = compute_linear(&op->base.src, box->x2, box->y2); - v[4] = compute_linear(&op->base.src, box->x1, box->y2); - v[7] = compute_linear(&op->base.src, box->x1, box->y1); - - v[8] = v[5] = v[2] = opacity; -} - sse2 fastcall static void emit_span_boxes_linear(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -2478,6 +1917,330 @@ emit_span_boxes_linear(const struct sna_composite_spans_op *op, } while (--nbox); } +avx2 fastcall static void +emit_span_identity__avx2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = (box->x2 + tx) * sx; + v[6] = v[2] = (box->y2 + ty) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = (box->x1 + tx) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = (box->y1 + ty) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +/* SSE4_2 */ + +sse4_2 fastcall static void +emit_span_identity__sse4_2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = (box->x2 + tx) * sx; + v[6] = v[2] = (box->y2 + ty) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = (box->x1 + tx) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = (box->y1 + ty) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +sse4_2 fastcall static void +emit_span_boxes_identity__sse4_2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = (b->box.x2 + tx) * sx; + v[6] = v[2] = (b->box.y2 + ty) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = (b->box.x1 + tx) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = (b->box.y1 + ty) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_span_simple__sse4_2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = ((box->y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +sse4_2 fastcall static void +emit_span_boxes_simple__sse4_2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = ((b->box.x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = ((b->box.y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_span_affine__sse4_2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = box->y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = opacity; +} + +sse4_2 fastcall static void +emit_span_boxes_affine__sse4_2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = b->box.x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = b->box.y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +sse4_2 fastcall static void +emit_span_linear__sse4_2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[1] = compute_linear(&op->base.src, box->x2, box->y2); + v[4] = compute_linear(&op->base.src, box->x1, box->y2); + v[7] = compute_linear(&op->base.src, box->x1, box->y1); + + v[8] = v[5] = v[2] = opacity; +} + sse4_2 fastcall static void emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -2508,6 +2271,250 @@ emit_span_boxes_linear__sse4_2(const struct sna_composite_spans_op *op, } while (--nbox); } +/* AVX2 */ + +avx2 fastcall static void +emit_span_boxes_identity__avx2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = (b->box.x2 + tx) * sx; + v[6] = v[2] = (b->box.y2 + ty) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = (b->box.x1 + tx) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = (b->box.y1 + ty) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +avx2 fastcall static void +emit_span_simple__avx2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + v[1] = ((box->x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; + + dst.p.x = box->x1; + v[4] = dst.f; + v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; + + dst.p.y = box->y1; + v[8] = dst.f; + v[10] = ((box->y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = opacity; +} + +avx2 fastcall static void +emit_span_boxes_simple__avx2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + float xx = op->base.src.transform->matrix[0][0]; + float x0 = op->base.src.transform->matrix[0][2]; + float yy = op->base.src.transform->matrix[1][1]; + float y0 = op->base.src.transform->matrix[1][2]; + float sx = op->base.src.scale[0]; + float sy = op->base.src.scale[1]; + int16_t tx = op->base.src.offset[0]; + int16_t ty = op->base.src.offset[1]; + + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + v[1] = ((b->box.x2 + tx) * xx + x0) * sx; + v[6] = v[2] = ((b->box.y2 + ty) * yy + y0) * sy; + + dst.p.x = b->box.x1; + v[4] = dst.f; + v[9] = v[5] = ((b->box.x1 + tx) * xx + x0) * sx; + + dst.p.y = b->box.y1; + v[8] = dst.f; + v[10] = ((b->box.y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +avx2 fastcall static void +emit_span_affine__avx2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 12); + assert((sna->render.vertex_used % 4) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 12; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x2, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = box->x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = box->y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + box->x1, + op->base.src.offset[1] + box->y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = opacity; +} + +avx2 fastcall static void +emit_span_boxes_affine__avx2(const struct sna_composite_spans_op *op, + const struct sna_opacity_box *b, int nbox, + float *v) +{ + do { + union { + struct sna_coordinate p; + float f; + } dst; + + dst.p.x = b->box.x2; + dst.p.y = b->box.y2; + v[0] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x2, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[1], &v[2]); + + dst.p.x = b->box.x1; + v[4] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y2, + op->base.src.transform, + op->base.src.scale, + &v[5], &v[6]); + + dst.p.y = b->box.y1; + v[8] = dst.f; + _sna_get_transformed_scaled(op->base.src.offset[0] + b->box.x1, + op->base.src.offset[1] + b->box.y1, + op->base.src.transform, + op->base.src.scale, + &v[9], &v[10]); + + v[11] = v[7] = v[3] = b->alpha; + + v += 12; + b++; + } while (--nbox); +} + +avx2 fastcall static void +emit_span_linear__avx2(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + assert(op->base.floats_per_rect == 9); + assert((sna->render.vertex_used % 3) == 0); + v = sna->render.vertices + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = box->x2; + dst.p.y = box->y2; + v[0] = dst.f; + dst.p.x = box->x1; + v[3] = dst.f; + dst.p.y = box->y1; + v[6] = dst.f; + + v[1] = compute_linear(&op->base.src, box->x2, box->y2); + v[4] = compute_linear(&op->base.src, box->x1, box->y2); + v[7] = compute_linear(&op->base.src, box->x1, box->y1); + + v[8] = v[5] = v[2] = opacity; +} + avx2 fastcall static void emit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op, const struct sna_opacity_box *b, int nbox, @@ -2538,7 +2545,7 @@ emit_span_boxes_linear__avx2(const struct sna_composite_spans_op *op, } while (--nbox); } -inline inline static uint32_t +inline static uint32_t gen4_choose_spans_vertex_buffer(const struct sna_composite_op *op) { int id = op->src.is_solid ? 1 : 2 + !op->src.is_affine;