From 492ff1494f782240e6ca68919b2d0b9aa400fc53 Mon Sep 17 00:00:00 2001
From: Keith Packard <keithp@keithp.com>
Date: Sun, 30 Mar 2008 19:14:18 -0700
Subject: [PATCH] Fix the sf_mask program to compute and pass corrected uvw
 cooefficients

sf_mask is the same as sf except that it must compute both src and mask uvw
cooefficients, which are conveniently adjacent in the same registers, and so
need only an extended execution width
---
 src/exa_sf_mask.g4a    | 104 ++++++++++++++++++++++++++---------------
 src/exa_sf_mask_prog.h |  20 ++------
 2 files changed, 72 insertions(+), 52 deletions(-)

diff --git a/src/exa_sf_mask.g4a b/src/exa_sf_mask.g4a
index a0d6efc4..c830fd86 100644
--- a/src/exa_sf_mask.g4a
+++ b/src/exa_sf_mask.g4a
@@ -21,52 +21,82 @@
  * IN THE SOFTWARE.
  *
  * Authors:
+ *    Keith Packard <keithp@keithp.com>
+ *    Eric Anholt <eric@anholt.net>
  *    Wang Zhenyu <zhenyu.z.wang@intel.com>
  */
 
-/* FIXME how to setup second coeffient for mask tex coord */
 
-/* 
-   g3 (v0) { u0, v0, 1.0, 1.0 }  ==> {u0, v0, 1.0, 1.0, mu0, mv0, 1.0, 1.0}  Co[0](u0) Co[1](v0) Co[2](mu0) Co[3](mv0)
-   g4 (v1) { u1, v1, 1.0, 1.0 }  ==> {u1, v1, 1.0, 1.0, mu1, mv1, 1.0, 1.0}
-   g5 (v2) { u2, v2 }  ==> (u2, v2, mu2, mv2}
-   g6      { 1/(x1-x0), 1/(y1-y0) }
-   g7      { u1-u0, v1-v0, 0, 0}  ==>{u1-u0, v1-v0,0, 0, mu1-mu0, mv1-mv0, 0, 0}
-	   -> { (u1-u0)/(x1-x0), (v1-v0)/(y1-y0) }  ==>{(u1-u0)/(x1-x0), (v1-v0)/(y1-y0),(mu1-mu0)/(x1-x0), (mv1-mv0)/(y1-y0)
-		Cx,		 Cy 			Cx[0],		 Cy[0],		 Cx[1], 	    Cy[1]
+/*
+ * Inputs (note all sub-register addresses are bytes, not float indices)
+ *
+ * Note that the vertices will have been reordered:
+ *
+ * V0 is topmost (leftmost among topmost) (upper left)
+ * V1 is next clockwise (lower right)
+ * V2 is remaining (lower left)
+ *
+ *  V0 ...................... XX
+ *  |                          .
+ *  |                          .
+ *  |                          .
+ *  V2------------------------V1
+ *
+ *  G0	    thread state -- just pass along
+ *
+ *  G1 and G2 are fixed by SF spec
+ *
+ *  G1.0    reserved
+ *  G1.4    Provoking vertex
+ *  G1.8    Determinant
+ *  G1.12   X1 - X0
+ *  G1.16   X2 - X0
+ *  G1.20   Y1 - Y0
+ *  G1.24   Y2 - Y0
+ *  G1.30   reserved
+ *
+ *  G2.0    Z0
+ *  G2.4    1/W0
+ *  G2.8    Z1
+ *  G2.12   1/W1
+ *  G2.16   Z2
+ *  G2.20   1/W2
+ *  G2.24   reserved
+ *  G2.30   reserved
+ *
+ *  G3 is V0 Vertex Attribute Data from URB (upper left)
+ *
+ *  G3.0    u0
+ *  G3.4    v0
+ *
+ *  G4 is V1 Vertex Attribute Data from URB (lower right)
+ *
+ *  G4.0    u1
+ *  G4.4    v1
+ *
+ *  G5 is V2 Vertex Attribute Data from URB (lower left)
+ *
  */
 
-/* assign Cx[0], Cx[1] to src, same to Cy, Co 
-          Cx[2], Cx[3] to mask, same to Cy, Co */
+/* Compute inverses of the input deltas */
+send (4) 0 g6<1>F g1.12<4,4,1>F math inv mlen 1 rlen 1 { align1 };
 
-send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
-send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
-add (8) g7<1>F g4<8,8,1>F -g3<8,8,1>F { align1 };
-/* Cx[0] */
-mul (1) g7<1>F g7<0,1,0>F g6<0,1,0>F { align1 };
-/* Cy[0] */
-mul (1) g7.4<1>F g7.4<0,1,0>F g6.4<0,1,0>F { align1 };
-/* Cx[2] */
-mul (1) g7.8<1>F g7.8<0,1,0>F g6<0,1,0>F { align1 };
-/* Cy[2] */
-mul (1) g7.12<1>F g7.12<0,1,0>F g6.4<0,1,0>F { align1 };
-
-/* src Cx[0], Cx[1] */
-mov (8) m1<1>F g7<0,1,0>F { align1 };
-/* mask Cx[2], Cx[3] */
-mov (1) m1.8<1>F g7.8<0,1,0>F { align1 };
-mov (1) m1.12<1>F g7.8<0,1,0>F { align1 };
-/* src Cy[0], Cy[1] */
-mov (8) m2<1>F g7.4<0,1,0>F { align1 };
-/* mask Cy[2], Cy[3] */
-mov (1) m2.8<1>F g7.12<0,1,0>F { align1 };
-mov (1) m2.12<1>F g7.12<0,1,0>F { align1 };
-/* src Co[0], Co[1] */
+/* texture location at V0 */
 mov (8) m3<1>F g3<8,8,1>F { align1 };
-/* mask Co[2], Co[3] */
-mov (1) m3.8<1>F g3.8<0,1,0>F { align1 };
-mov (1) m3.12<1>F g3.12<0,1,0>F { align1 };
 
+/* compute V1 - V2 (motion in X) for texture coordinates */
+add (8) g7<1>F g4<8,8,1>F -g5<8,8,1>F { align1 };
+
+/* multiply by 1/dx */
+mul (8) m1<1>F g7<8,8,1>F g6.0<0,1,0>F { align1 };
+
+/* Compute V2 - V0 (motion in Y) for texture coordinates */
+add (8) g7<1>F g5<8,8,1>F -g3<8,8,1>F { align1 };
+
+/* multiply by 1/dy */
+mul (8) m2<1>F g7<8,8,1>F g6.8<0,1,0>F {align1 };
+
+/* and we're done */
 send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
 nop;
 nop;
diff --git a/src/exa_sf_mask_prog.h b/src/exa_sf_mask_prog.h
index 4e9114d6..be0a77b0 100644
--- a/src/exa_sf_mask_prog.h
+++ b/src/exa_sf_mask_prog.h
@@ -1,19 +1,9 @@
-   { 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
-   { 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
-   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d4060 },
-   { 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c0 },
-   { 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c4 },
-   { 0x00000041, 0x20e877bd, 0x000000e8, 0x000000c0 },
-   { 0x00000041, 0x20ec77bd, 0x000000ec, 0x000000c4 },
-   { 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
-   { 0x00000001, 0x202803be, 0x000000e8, 0x00000000 },
-   { 0x00000001, 0x202c03be, 0x000000e8, 0x00000000 },
-   { 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
-   { 0x00000001, 0x204803be, 0x000000ec, 0x00000000 },
-   { 0x00000001, 0x204c03be, 0x000000ec, 0x00000000 },
+   { 0x00400031, 0x20c01fbd, 0x0069002c, 0x01110001 },
    { 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
-   { 0x00000001, 0x206803be, 0x00000068, 0x00000000 },
-   { 0x00000001, 0x206c03be, 0x0000006c, 0x00000000 },
+   { 0x00600040, 0x20e077bd, 0x008d0080, 0x008d40a0 },
+   { 0x00600041, 0x202077be, 0x008d00e0, 0x000000c0 },
+   { 0x00600040, 0x20e077bd, 0x008d00a0, 0x008d4060 },
+   { 0x00600041, 0x204077be, 0x008d00e0, 0x000000c8 },
    { 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
    { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },