From b68d9f4245d0ebe3371c179401ff145f1a4d101b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 8 Apr 2008 16:09:00 -0500 Subject: [PATCH] Use symbolic names for channels in YUV code --- src/packed_yuv_wm.g4a | 110 +++++++++++++++++++++++++++--------------- src/packed_yuv_wm.g4b | 23 ++++----- 2 files changed, 81 insertions(+), 52 deletions(-) diff --git a/src/packed_yuv_wm.g4a b/src/packed_yuv_wm.g4a index 9e635ba8..2be52b5f 100644 --- a/src/packed_yuv_wm.g4a +++ b/src/packed_yuv_wm.g4a @@ -26,6 +26,19 @@ * */ +include(`exa_wm.g4i') + +define(`YCbCr_base', `g12') +define(`Cr', `g12') +define(`Cr_01', `g12') +define(`Cr_23', `g13') +define(`Y', `g14') +define(`Y_01', `g14') +define(`Y_23', `g15') +define(`Cb', `g16') +define(`Cb_01', `g16') +define(`Cb_23', `g17') + /* The initial payload of the thread is always g0. * WM_URB (incoming URB entries) is g3 * X0_R is g4 @@ -117,8 +130,12 @@ mov (8) m4<1>F g7<8,8,1>F { align1 }; * g0 holds the PS thread payload, which (oddly) contains * precisely what the sampler wants to see in m0 */ -send (16) 0 g12<1>UW g0<8,8,1>UW sampler (1,0,F) mlen 5 rlen 8 { align1 }; -mov (8) g19<1>UW g19<8,8,1>UW { align1 }; +send (16) + 0 /* load g0 to m0 */ + YCbCr_base<1>UW + g0<8,8,1>UW + sampler (1,0,F) + mlen 5 rlen 8 { align1 }; /* color space conversion function: * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) @@ -133,45 +150,60 @@ mov (8) g19<1>UW g19<8,8,1>UW { align1 }; * G is g3, g7. * B is g4, g8. */ - /* Y = Y - 16/255 */ -add (8) g14<1>F g14<8,8,1>F -0.0627451F { align1 }; - /* Cr = Cr - 128/255 */ -add (8) g12<1>F g12<8,8,1>F -0.501961F { align1 }; - /* Cb = Cb - 128 / 255 */ -add (8) g16<1>F g16<8,8,1>F -0.501961F { align1 }; - /* Y = Y * 1.164 */ -mul (8) g14<1>F g14<8,8,1>F 1.164F { align1 }; - /* acc = 1.596 * Cr */ -mul (8) null g12<8,8,1>F 1.596F { align1 }; - /* R = acc + Y */ -mac.sat (8) m2<1>F g14<8,8,1>F 1F { align1 }; - /* acc = Cr * -0.813 */ -mul (8) null g12<8,8,1>F -0.813F { align1 }; - /* acc += Cb * -0.392 */ -mac (8) null g16<8,8,1>F -0.392F { align1 }; - /* G = acc + Y */ -mac.sat (8) m3<1>F g14<8,8,1>F 1F { align1 }; - /* acc = Cb * 2.017 */ -mul (8) null g16<8,8,1>F 2.017F { align1 }; - /* B = acc + Y */ -mac.sat (8) m4<1>F g14<8,8,1>F 1F { align1 }; - /* and do it again */ -add (8) g15<1>F g15<8,8,1>F -0.0627451F { align1 }; -add (8) g13<1>F g13<8,8,1>F -0.501961F { align1 }; -add (8) g17<1>F g17<8,8,1>F -0.501961F { align1 }; -mul (8) g15<1>F g15<8,8,1>F 1.164F { align1 }; -mul (8) null g13<8,8,1>F 1.596F { align1 }; -mac.sat (8) m6<1>F g15<8,8,1>F 1F { align1 }; -mul (8) null g13<8,8,1>F -0.813F { align1 }; -mac (8) null g17<8,8,1>F -0.392F { align1 }; -mac.sat (8) m7<1>F g15<8,8,1>F 1F { align1 }; -mul (8) null g17<8,8,1>F 2.017F { align1 }; -mac.sat (8) m8<1>F g15<8,8,1>F 1F { align1 }; - /* Pass through control information: + /* Normalize Y, Cb and Cr: + * + * Y = (Y - 16/255) * 1.164 + * Cr = Cr - 128 / 255 + * Cb = Cb - 128 / 255 + */ +add (16) Y<1>F Y<8,8,1>F -0.0627451F { compr align1 }; +mul (16) Y<1>F Y<8,8,1>F 1.164F { compr align1 }; + +add (16) Cr<1>F Cr<8,8,1>F -0.501961F { compr align1 }; + +add (16) Cb<1>F Cb<8,8,1>F -0.501961F { compr align1 }; + + /* + * R = Y + Cr * 1.596 + */ +mul (8) null Cr_01<8,8,1>F 1.596F { align1 }; +mac.sat (8) data_port_r_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cr_23<8,8,1>F 1.596F { align1 }; +mac.sat (8) data_port_r_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * G = Cr * -0.813 + Cb * -0.392 + Y + */ +mul (8) null Cr_01<8,8,1>F -0.813F { align1 }; +mac (8) null Cb_01<8,8,1>F -0.392F { align1 }; +mac.sat (8) data_port_g_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cr_23<8,8,1>F -0.813F { align1 }; +mac (8) null Cb_23<8,8,1>F -0.392F { align1 }; +mac.sat (8) data_port_g_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * B = Cb * 2.017 + Y + */ +mul (8) null Cb_01<8,8,1>F 2.017F { align1 }; +mac.sat (8) data_port_b_01<1>F Y_01<8,8,1>F 1F { align1 }; +mul (8) null Cb_23<8,8,1>F 2.017F { align1 }; +mac.sat (8) data_port_b_23<1>F Y_23<8,8,1>F 1F { align1 }; + + /* + * A = 1.0 + */ +mov (8) data_port_a_01<1>F 1.0F { align1 }; +mov (8) data_port_a_23<1>F 1.0F { align1 }; + + /* + * Pass through control information: + */ +mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; + + /* + * Send framebuffer write message: XXX: acc0? */ -mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable }; - /* Send framebuffer write message: XXX: acc0? */ send (16) 0 acc0<1>UW g0<8,8,1>UW write ( 0, /* binding table index 0 */ 8, /* pixel scoreboard clear */ diff --git a/src/packed_yuv_wm.g4b b/src/packed_yuv_wm.g4b index d72c6510..f2e650a3 100644 --- a/src/packed_yuv_wm.g4b +++ b/src/packed_yuv_wm.g4b @@ -47,29 +47,26 @@ { 0x00600001, 0x206003be, 0x008d00c0, 0x00000000 }, { 0x00600001, 0x208003be, 0x008d00e0, 0x00000000 }, { 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 }, - { 0x00600001, 0x22600129, 0x008d0260, 0x00000000 }, - { 0x00600040, 0x21c07fbd, 0x008d01c0, 0xbd808081 }, - { 0x00600040, 0x21807fbd, 0x008d0180, 0xbf008084 }, - { 0x00600040, 0x22007fbd, 0x008d0200, 0xbf008084 }, - { 0x00600041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 }, + { 0x00802040, 0x21c07fbd, 0x008d01c0, 0xbd808081 }, + { 0x00802041, 0x21c07fbd, 0x008d01c0, 0x3f94fdf4 }, + { 0x00802040, 0x21807fbd, 0x008d0180, 0xbf008084 }, + { 0x00802040, 0x22007fbd, 0x008d0200, 0xbf008084 }, { 0x00600041, 0x20007fbc, 0x008d0180, 0x3fcc49ba }, { 0x80600048, 0x20407fbe, 0x008d01c0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba }, + { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 }, { 0x00600041, 0x20007fbc, 0x008d0180, 0xbf5020c5 }, { 0x00600048, 0x20007fbc, 0x008d0200, 0xbec8b439 }, { 0x80600048, 0x20607fbe, 0x008d01c0, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 }, - { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 }, - { 0x00600040, 0x21e07fbd, 0x008d01e0, 0xbd808081 }, - { 0x00600040, 0x21a07fbd, 0x008d01a0, 0xbf008084 }, - { 0x00600040, 0x22207fbd, 0x008d0220, 0xbf008084 }, - { 0x00600041, 0x21e07fbd, 0x008d01e0, 0x3f94fdf4 }, - { 0x00600041, 0x20007fbc, 0x008d01a0, 0x3fcc49ba }, - { 0x80600048, 0x20c07fbe, 0x008d01e0, 0x3f800000 }, { 0x00600041, 0x20007fbc, 0x008d01a0, 0xbf5020c5 }, { 0x00600048, 0x20007fbc, 0x008d0220, 0xbec8b439 }, { 0x80600048, 0x20e07fbe, 0x008d01e0, 0x3f800000 }, + { 0x00600041, 0x20007fbc, 0x008d0200, 0x40011687 }, + { 0x80600048, 0x20807fbe, 0x008d01c0, 0x3f800000 }, { 0x00600041, 0x20007fbc, 0x008d0220, 0x40011687 }, { 0x80600048, 0x21007fbe, 0x008d01e0, 0x3f800000 }, + { 0x00600001, 0x20a003fe, 0x00000000, 0x3f800000 }, + { 0x00600001, 0x212003fe, 0x00000000, 0x3f800000 }, { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },