EXA: try to enable rotation for G965

The new sf/wm should handle the texture sampling only in
rotated case. Also fix possible hole in VUE slot.
This commit is contained in:
Wang Zhenyu 2007-03-21 14:50:45 +08:00
parent 223944878c
commit 3025fa0fb2
6 changed files with 333 additions and 6 deletions

View File

@ -120,20 +120,24 @@ INTEL_G4A = \
packed_yuv_wm.g4a \
exa_sf.g4a \
exa_sf_mask.g4a \
exa_sf_rotation.g4a \
exa_wm_maskca.g4a \
exa_wm_maskca_srcalpha.g4a \
exa_wm_masknoca.g4a \
exa_wm_nomask.g4a
exa_wm_nomask.g4a \
exa_wm_rotation.g4a
INTEL_G4H = \
sf_prog.h \
wm_prog.h \
exa_sf_mask_prog.h \
exa_sf_prog.h \
exa_sf_rotation_prog.h \
exa_wm_maskca_prog.h \
exa_wm_maskca_srcalpha_prog.h \
exa_wm_masknoca_prog.h \
exa_wm_nomask_prog.h
exa_wm_nomask_prog.h \
exa_wm_rotation_prog.h
EXTRA_DIST = \
$(XMODE_SRCS) \
@ -154,6 +158,9 @@ exa_sf_mask_prog.h: exa_sf_mask.g4a
exa_sf_prog.h: exa_sf.g4a
intel-gen4asm -o exa_sf_prog.h exa_sf.g4a
exa_sf_rotation_prog.h: exa_sf_rotation.g4a
intel-gen4asm -o exa_sf_rotation_prog.h exa_sf_rotation.g4a
exa_wm_maskca_prog.h: exa_wm_maskca.g4a
intel-gen4asm -o exa_wm_maskca_prog.h exa_wm_maskca.g4a
@ -166,6 +173,9 @@ exa_wm_masknoca_prog.h: exa_wm_masknoca.g4a
exa_wm_nomask_prog.h: exa_wm_nomask.g4a
intel-gen4asm -o exa_wm_nomask_prog.h exa_wm_nomask.g4a
exa_wm_rotation_prog.h: exa_wm_rotation.g4a
intel-gen4asm -o exa_wm_rotation_prog.h exa_wm_rotation.g4a
endif

29
src/exa_sf_rotation.g4a Normal file
View File

@ -0,0 +1,29 @@
/* 1/dx */
send (1) 0 g6<1>F g1.12<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
/* 1/dy */
send (1) 0 g6.4<1>F g1.20<0,1,0>F math inv scalar mlen 1 rlen 1 { align1 };
/* du, dv */
mul (1) g7<1>F g3<0,1,0>F -1.0F { align1 };
mul (1) g7.4<1>F g3.4<0,1,0>F -1.0F { align1 };
add (1) g7<1>F g4<0,1,0>F g7<0,1,0>F { align1 };
add (1) g7.4<1>F g4.4<0,1,0>F g7.4<0,1,0>F { align1 };
/* du/dy */
mul (1) g7<1>F g7<0,1,0>F g6.4<0,1,0>F { align1 };
/* dv/dx */
mul (1) g7.4<1>F g7.4<0,1,0>F g6<0,1,0>F { align1 };
/* Cx */
mov (8) m1<1>F g7<0,1,0>F { align1 };
/* Cy */
mov (8) m2<1>F g7.4<0,1,0>F { align1 };
/* Co */
mov (8) m3<1>F g3<8,8,1>F { align1 };
send (8) 0 null g0<8,8,1>F urb 0 transpose used complete mlen 4 rlen 0 { align1 EOT };
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;

View File

@ -0,0 +1,20 @@
{ 0x00000031, 0x20c01fbd, 0x0000002c, 0x01110081 },
{ 0x00000031, 0x20c41fbd, 0x00000034, 0x01110081 },
{ 0x00000041, 0x20e07fbd, 0x00000060, 0xbf800000 },
{ 0x00000041, 0x20e47fbd, 0x00000064, 0xbf800000 },
{ 0x00000040, 0x20e077bd, 0x00000080, 0x000000e0 },
{ 0x00000040, 0x20e477bd, 0x00000084, 0x000000e4 },
{ 0x00000041, 0x20e077bd, 0x000000e0, 0x000000c4 },
{ 0x00000041, 0x20e477bd, 0x000000e4, 0x000000c0 },
{ 0x00600001, 0x202003be, 0x000000e0, 0x00000000 },
{ 0x00600001, 0x204003be, 0x000000e4, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0060, 0x00000000 },
{ 0x00600031, 0x20001fbc, 0x008d0000, 0x8640c800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },

158
src/exa_wm_rotation.g4a Normal file
View File

@ -0,0 +1,158 @@
/*
* This's for exa composite operation in no mask picture case.
* The simplest case is just sending what src picture has to dst picture.
*/
/* I think this should be same as in g4a program for texture video,
as we also use 16-pixel dispatch. and SF scale in g3 is useful for us. */
/* The initial payload of the thread is always g0.
* WM_URB (incoming URB entries) is g3
* X0_R is g4
* X1_R is g5
* Y0_R is g6
* Y1_R is g7
*/
/* Set up the X/Y screen coordinates of the pixels in our 4 subspans. Each
* subspan is a 2x2 rectangle, and the screen x/y of the upper left of each
* subspan are given in GRF register 1.2 through 1.5 (which, with the word
* addressing below, are 1.4 through 1.11).
*
* The result is WM_X*_R and WM_Y*R being:
*
* X0: {ss0.x, ss0.x+1, ss0.x, ss0.x+1, ss1.x, ss1.x+1, ss1.x, ss1.x+y}
* Y0: {ss0.y, ss0.y, ss0.y+1, ss0.y+1, ss1.y, ss1.y, ss1.y+1, ss1.y+1}
* X1: {ss2.x, ss2.x+1, ss2.x, ss2.x+1, ss3.x, ss3.x+1, ss3.x, ss3.x+y}
* Y1: {ss2.y, ss2.y, ss2.y+1, ss2.y+1, ss3.y, ss3.y, ss3.y+1, ss3.y+1}
*/
/* Set up ss0.x coordinates*/
mov (1) g4<1>F g1.8<0,1,0>UW { align1 };
add (1) g4.4<1>F g1.8<0,1,0>UW 1UB { align1 };
mov (1) g4.8<1>F g1.8<0,1,0>UW { align1 };
add (1) g4.12<1>F g1.8<0,1,0>UW 1UB { align1 };
/* Set up ss0.y coordinates */
mov (1) g6<1>F g1.10<0,1,0>UW { align1 };
mov (1) g6.4<1>F g1.10<0,1,0>UW { align1 };
add (1) g6.8<1>F g1.10<0,1,0>UW 1UB { align1 };
add (1) g6.12<1>F g1.10<0,1,0>UW 1UB { align1 };
/* set up ss1.x coordinates */
mov (1) g4.16<1>F g1.12<0,1,0>UW { align1 };
add (1) g4.20<1>F g1.12<0,1,0>UW 1UB { align1 };
mov (1) g4.24<1>F g1.12<0,1,0>UW { align1 };
add (1) g4.28<1>F g1.12<0,1,0>UW 1UB { align1 };
/* set up ss1.y coordinates */
mov (1) g6.16<1>F g1.14<0,1,0>UW { align1 };
mov (1) g6.20<1>F g1.14<0,1,0>UW { align1 };
add (1) g6.24<1>F g1.14<0,1,0>UW 1UB { align1 };
add (1) g6.28<1>F g1.14<0,1,0>UW 1UB { align1 };
/* Set up ss2.x coordinates */
mov (1) g5<1>F g1.16<0,1,0>UW { align1 };
add (1) g5.4<1>F g1.16<0,1,0>UW 1UB { align1 };
mov (1) g5.8<1>F g1.16<0,1,0>UW { align1 };
add (1) g5.12<1>F g1.16<0,1,0>UW 1UB { align1 };
/* Set up ss2.y coordinates */
mov (1) g7<1>F g1.18<0,1,0>UW { align1 };
mov (1) g7.4<1>F g1.18<0,1,0>UW { align1 };
add (1) g7.8<1>F g1.18<0,1,0>UW 1UB { align1 };
add (1) g7.12<1>F g1.18<0,1,0>UW 1UB { align1 };
/* Set up ss3.x coordinates */
mov (1) g5.16<1>F g1.20<0,1,0>UW { align1 };
add (1) g5.20<1>F g1.20<0,1,0>UW 1UB { align1 };
mov (1) g5.24<1>F g1.20<0,1,0>UW { align1 };
add (1) g5.28<1>F g1.20<0,1,0>UW 1UB { align1 };
/* Set up ss3.y coordinates */
mov (1) g7.16<1>F g1.22<0,1,0>UW { align1 };
mov (1) g7.20<1>F g1.22<0,1,0>UW { align1 };
add (1) g7.24<1>F g1.22<0,1,0>UW 1UB { align1 };
add (1) g7.28<1>F g1.22<0,1,0>UW 1UB { align1 };
/* Now, map these screen space coordinates into texture coordinates. */
/* subtract screen-space X origin of vertex 0. */
/* for rotation, texture y is from ssX.x, so g4,g5 will be Y */
add (8) g4<1>F g4<8,8,1>F -g1<0,1,0>F { align1 };
add (8) g5<1>F g5<8,8,1>F -g1<0,1,0>F { align1 };
/* scale by texture X increment */
mul (8) g4<1>F g4<8,8,1>F g3.20<0,1,0>F { align1 };
mul (8) g5<1>F g5<8,8,1>F g3.20<0,1,0>F { align1 };
/* add in texture X offset */
add (8) g4<1>F g4<8,8,1>F g3.28<0,1,0>F { align1 };
add (8) g5<1>F g5<8,8,1>F g3.28<0,1,0>F { align1 };
/* texture Y is from ssX.x */
/* subtract screen-space Y origin of vertex 0. */
add (8) g6<1>F g6<8,8,1>F -g1.4<0,1,0>F { align1 };
add (8) g7<1>F g7<8,8,1>F -g1.4<0,1,0>F { align1 };
/* scale by texture Y increment */
mul (8) g6<1>F g6<8,8,1>F g3.16<0,1,0>F { align1 };
mul (8) g7<1>F g7<8,8,1>F g3.16<0,1,0>F { align1 };
/* add in texture Y offset */
add (8) g6<1>F g6<8,8,1>F g3.12<0,1,0>F { align1 };
add (8) g7<1>F g7<8,8,1>F g3.12<0,1,0>F { align1 };
/* prepare sampler read back gX register, which would be written back to output */
/* use simd16 sampler, param 0 is u, param 1 is v. */
/* 'payload' loading, assuming tex coord start from g4 */
mov (8) m1<1>F g6<8,8,1>F { align1 };
mov (8) m2<1>F g7<8,8,1>F { align1 };
mov (8) m3<1>F g4<8,8,1>F { align1 };
mov (8) m4<1>F g5<8,8,1>F { align1 };
/* m0 will be copied with g0, as it contains send desc */
/* emit sampler 'send' cmd */
send (16) 0 /* msg reg index */
g12<1>UW /* readback */
g0<8,8,1>UW /* copy to msg start reg*/
sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype)
/* here(src->dst) we should use src_sampler and src_surface */
mlen 5 rlen 8 { align1 }; /* required message len 5, readback len 8 */
mov (8) g19<1>UD g19<8,8,1>UD { align1 }; /* wait sampler return */
/* if we set up read-back reg correctly, emit dataport write 'send' cmd with EOT */
/* m0, m1 are all direct passed by PS thread payload */
mov (8) m1<1>F g1<8,8,1>F { align1 };
/* prepare data in m2-m5 for subspan(1,0), m6-m9 for subspan(3,2), then it's ready to write */
/* g12 -> m2
g13 -> m6
g14 -> m3
g15 -> m7
g16 -> m4
g17 -> m8
g18 -> m5
g19 -> m9
*/
mov (8) m2<1>F g12<8,8,1>F { align1 };
mov (8) m3<1>F g14<8,8,1>F { align1 };
mov (8) m4<1>F g16<8,8,1>F { align1 };
mov (8) m5<1>F g18<8,8,1>F { align1 };
mov (8) m6<1>F g13<8,8,1>F { align1 };
mov (8) m7<1>F g15<8,8,1>F { align1 };
mov (8) m8<1>F g17<8,8,1>F { align1 };
mov (8) m9<1>F g19<8,8,1>F { align1 };
/* m0, m1 are all direct passed by PS thread payload */
mov (8) m1<1>UD g1<8,8,1>UD { align1 mask_disable };
/* write */
send (16) 0 acc0<1>UW g0<8,8,1>UW write (
0, /* binding_table */
8, /* pixel scordboard clear, msg type simd16 single source */
4, /* render target write */
0 /* no write commit message */
)
mlen 10
rlen 0
{ align1 EOT };
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;
nop;

View File

@ -0,0 +1,70 @@
{ 0x00000001, 0x2080013d, 0x00000028, 0x00000000 },
{ 0x00000040, 0x20840d3d, 0x00000028, 0x00000001 },
{ 0x00000001, 0x2088013d, 0x00000028, 0x00000000 },
{ 0x00000040, 0x208c0d3d, 0x00000028, 0x00000001 },
{ 0x00000001, 0x20c0013d, 0x0000002a, 0x00000000 },
{ 0x00000001, 0x20c4013d, 0x0000002a, 0x00000000 },
{ 0x00000040, 0x20c80d3d, 0x0000002a, 0x00000001 },
{ 0x00000040, 0x20cc0d3d, 0x0000002a, 0x00000001 },
{ 0x00000001, 0x2090013d, 0x0000002c, 0x00000000 },
{ 0x00000040, 0x20940d3d, 0x0000002c, 0x00000001 },
{ 0x00000001, 0x2098013d, 0x0000002c, 0x00000000 },
{ 0x00000040, 0x209c0d3d, 0x0000002c, 0x00000001 },
{ 0x00000001, 0x20d0013d, 0x0000002e, 0x00000000 },
{ 0x00000001, 0x20d4013d, 0x0000002e, 0x00000000 },
{ 0x00000040, 0x20d80d3d, 0x0000002e, 0x00000001 },
{ 0x00000040, 0x20dc0d3d, 0x0000002e, 0x00000001 },
{ 0x00000001, 0x20a0013d, 0x00000030, 0x00000000 },
{ 0x00000040, 0x20a40d3d, 0x00000030, 0x00000001 },
{ 0x00000001, 0x20a8013d, 0x00000030, 0x00000000 },
{ 0x00000040, 0x20ac0d3d, 0x00000030, 0x00000001 },
{ 0x00000001, 0x20e0013d, 0x00000032, 0x00000000 },
{ 0x00000001, 0x20e4013d, 0x00000032, 0x00000000 },
{ 0x00000040, 0x20e80d3d, 0x00000032, 0x00000001 },
{ 0x00000040, 0x20ec0d3d, 0x00000032, 0x00000001 },
{ 0x00000001, 0x20b0013d, 0x00000034, 0x00000000 },
{ 0x00000040, 0x20b40d3d, 0x00000034, 0x00000001 },
{ 0x00000001, 0x20b8013d, 0x00000034, 0x00000000 },
{ 0x00000040, 0x20bc0d3d, 0x00000034, 0x00000001 },
{ 0x00000001, 0x20f0013d, 0x00000036, 0x00000000 },
{ 0x00000001, 0x20f4013d, 0x00000036, 0x00000000 },
{ 0x00000040, 0x20f80d3d, 0x00000036, 0x00000001 },
{ 0x00000040, 0x20fc0d3d, 0x00000036, 0x00000001 },
{ 0x00600040, 0x208077bd, 0x008d0080, 0x00004020 },
{ 0x00600040, 0x20a077bd, 0x008d00a0, 0x00004020 },
{ 0x00600041, 0x208077bd, 0x008d0080, 0x00000074 },
{ 0x00600041, 0x20a077bd, 0x008d00a0, 0x00000074 },
{ 0x00600040, 0x208077bd, 0x008d0080, 0x0000007c },
{ 0x00600040, 0x20a077bd, 0x008d00a0, 0x0000007c },
{ 0x00600040, 0x20c077bd, 0x008d00c0, 0x00004024 },
{ 0x00600040, 0x20e077bd, 0x008d00e0, 0x00004024 },
{ 0x00600041, 0x20c077bd, 0x008d00c0, 0x00000070 },
{ 0x00600041, 0x20e077bd, 0x008d00e0, 0x00000070 },
{ 0x00600040, 0x20c077bd, 0x008d00c0, 0x0000006c },
{ 0x00600040, 0x20e077bd, 0x008d00e0, 0x0000006c },
{ 0x00600001, 0x202003be, 0x008d00c0, 0x00000000 },
{ 0x00600001, 0x204003be, 0x008d00e0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0080, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d00a0, 0x00000000 },
{ 0x00800031, 0x21801d29, 0x008d0000, 0x02580001 },
{ 0x00600001, 0x22600021, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x202003be, 0x008d0020, 0x00000000 },
{ 0x00600001, 0x204003be, 0x008d0180, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20c003be, 0x008d01a0, 0x00000000 },
{ 0x00600001, 0x20e003be, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x210003be, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x212003be, 0x008d0260, 0x00000000 },
{ 0x00600201, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },

View File

@ -335,6 +335,10 @@ static const CARD32 sf_kernel_static_mask[][4] = {
#include "exa_sf_mask_prog.h"
};
static const CARD32 sf_kernel_static_rotation[][4] = {
#include "exa_sf_rotation_prog.h"
};
/* ps kernels */
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
@ -355,7 +359,12 @@ static const CARD32 ps_kernel_static_masknoca [][4] = {
#include "exa_wm_masknoca_prog.h"
};
static CARD32 i965_get_card_format(PicturePtr pPict)
static const CARD32 ps_kernel_static_rotation [][4] = {
#include "exa_wm_rotation_prog.h"
};
static CARD32
i965_get_card_format(PicturePtr pPict)
{
int i;
@ -368,6 +377,21 @@ static CARD32 i965_get_card_format(PicturePtr pPict)
return i965_tex_formats[i].card_fmt;
}
static Bool
i965_check_rotation_transform(PictTransformPtr t)
{
/* XXX this is arbitrary */
int a, b;
a = xFixedToInt(t->matrix[0][1]);
b = xFixedToInt(t->matrix[1][0]);
if (a == -1 && b == 1)
return TRUE;
else if (a == 1 && b == -1)
return TRUE;
else
return FALSE;
}
Bool
i965_prepare_composite(int op, PicturePtr pSrcPicture,
PicturePtr pMaskPicture, PicturePtr pDstPicture,
@ -378,6 +402,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
CARD32 src_offset, src_pitch;
CARD32 mask_offset = 0, mask_pitch = 0;
CARD32 dst_format, dst_offset, dst_pitch;
Bool rotation_program = FALSE;
#ifdef XF86DRI
if (pI830->directRenderingEnabled) {
@ -406,6 +431,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
pI830->transform[1] = NULL;
pI830->scale_units[1][0] = -1;
pI830->scale_units[1][1] = -1;
if (pI830->transform[0] &&
i965_check_rotation_transform(pI830->transform[0]))
rotation_program = TRUE;
} else {
pI830->transform[1] = pMaskPicture->transform;
pI830->scale_units[1][0] = pMask->drawable.width;
@ -442,7 +470,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
sf_kernel_offset = ALIGN(next_offset, 64);
if (pMask)
next_offset = sf_kernel_offset + sizeof (sf_kernel_static_mask);
else
else if (rotation_program)
next_offset = sf_kernel_offset + sizeof (sf_kernel_static_rotation);
else
next_offset = sf_kernel_offset + sizeof (sf_kernel_static);
ps_kernel_offset = ALIGN(next_offset, 64);
@ -459,6 +489,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
} else
next_offset = ps_kernel_offset +
sizeof(ps_kernel_static_masknoca);
} else if (rotation_program) {
next_offset = ps_kernel_offset + sizeof (ps_kernel_static_rotation);
} else {
next_offset = ps_kernel_offset + sizeof (ps_kernel_static_nomask);
}
@ -762,6 +794,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
*/
if (pMask)
memcpy(sf_kernel, sf_kernel_static_mask, sizeof (sf_kernel_static));
else if (rotation_program)
memcpy(sf_kernel, sf_kernel_static_rotation,
sizeof (sf_kernel_static_rotation));
else
memcpy(sf_kernel, sf_kernel_static, sizeof (sf_kernel_static));
@ -808,6 +843,9 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
} else
memcpy(ps_kernel, ps_kernel_static_masknoca,
sizeof (ps_kernel_static_masknoca));
} else if (rotation_program) {
memcpy(ps_kernel, ps_kernel_static_rotation,
sizeof (ps_kernel_static_rotation));
} else {
memcpy(ps_kernel, ps_kernel_static_nomask,
sizeof (ps_kernel_static_nomask));
@ -973,8 +1011,10 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture,
(0 << VE0_OFFSET_SHIFT));
OUT_RING((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
(BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
(BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_2_SHIFT) |
(BRW_VFCOMPONENT_NOSTORE << VE1_VFCOMPONENT_3_SHIFT) |
((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT)
<< VE1_VFCOMPONENT_2_SHIFT) |
((pMask ? BRW_VFCOMPONENT_NOSTORE: BRW_VFCOMPONENT_STORE_1_FLT)
<< VE1_VFCOMPONENT_3_SHIFT) |
(0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
if (pMask) {
OUT_RING((0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |