Shrink WM thread to 32 registers and 1024 scratch space.

Saving registers means we can run more in parallel.
This commit is contained in:
Keith Packard 2008-04-01 00:06:08 -07:00
parent a6492661ae
commit bfd803e085
15 changed files with 101 additions and 101 deletions

View File

@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F')
define(`mask_wo', `g6.12<0,1,0>F')
/*
* Local variables
* Local variables. Pairs must be aligned on even reg boundry
*/
/* this holds the X dest coordinates */
@ -71,14 +71,14 @@ define(`dst_y_0', `dst_y')
define(`dst_y_1', `g11')
/* When computing x * dn/dx, use this */
define(`temp_x', `g34')
define(`temp_x', `g30')
define(`temp_x_0', `temp_x')
define(`temp_x_1', `g35')
define(`temp_x_1', `g31')
/* When computing y * dn/dy, use this */
define(`temp_y', `g32')
define(`temp_y', `g28')
define(`temp_y_0', temp_y)
define(`temp_y_1', `g33')
define(`temp_y_1', `g29')
/* when loading x/y, use these to hold them in UW format */
define(`temp_x_uw', temp_x)
@ -90,33 +90,33 @@ define(`src_msg_ind',`1')
define(`src_u', `m2')
define(`src_v', `m4')
define(`src_w', `g12')
define(`src_w_0', `g12')
define(`src_w_0', `src_w')
define(`src_w_1', `g13')
define(`mask_msg', `m7')
define(`mask_msg_ind',`7')
define(`mask_u', `m8')
define(`mask_v', `m10')
define(`mask_w', `g14')
define(`mask_w_0', `g14')
define(`mask_w_1', `g15')
define(`mask_w', `src_w')
define(`mask_w_0', `src_w_0')
define(`mask_w_1', `src_w_1')
/* sample src to these registers */
define(`src_sample0', `g16')
define(`src_sample1', `g17')
define(`src_sample2', `g18')
define(`src_sample3', `g19')
define(`src_sample4', `g20')
define(`src_sample5', `g21')
define(`src_sample6', `g22')
define(`src_sample7', `g23')
define(`src_sample0', `g14')
define(`src_sample1', `g15')
define(`src_sample2', `g16')
define(`src_sample3', `g17')
define(`src_sample4', `g18')
define(`src_sample5', `g19')
define(`src_sample6', `g20')
define(`src_sample7', `g21')
/* sample mask to these registers */
define(`mask_sample0', `g24')
define(`mask_sample1', `g25')
define(`mask_sample2', `g26')
define(`mask_sample3', `g27')
define(`mask_sample4', `g28')
define(`mask_sample5', `g29')
define(`mask_sample6', `g30')
define(`mask_sample7', `g31')
define(`mask_sample0', `g22')
define(`mask_sample1', `g23')
define(`mask_sample2', `g24')
define(`mask_sample3', `g25')
define(`mask_sample4', `g26')
define(`mask_sample5', `g27')
define(`mask_sample6', `g28')
define(`mask_sample7', `g29')

View File

@ -1,4 +1,4 @@
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
{ 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },

View File

@ -1,4 +1,4 @@
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 },
{ 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 },
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },

View File

@ -1,8 +1,8 @@
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x210077be, 0x008d0440, 0x000000ac },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x214077be, 0x008d0440, 0x000000bc },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc },

View File

@ -1,16 +1,16 @@
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc },
{ 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 },
{ 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },

View File

@ -1,2 +1,2 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 },
{ 0x07800031, 0x23801d29, 0x008d0000, 0x02520102 },

View File

@ -1,2 +1,2 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 },
{ 0x07800031, 0x22c01d29, 0x008d0000, 0x02580102 },

View File

@ -1,4 +1,4 @@
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 },
{ 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },

View File

@ -1,8 +1,8 @@
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x204077be, 0x008d0440, 0x0000006c },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x208077be, 0x008d0440, 0x0000007c },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },

View File

@ -1,16 +1,16 @@
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 },
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },

View File

@ -1,2 +1,2 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
{ 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 },
{ 0x01800031, 0x22801d29, 0x008d0000, 0x02520001 },

View File

@ -1,2 +1,2 @@
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
{ 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 },
{ 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 },

View File

@ -1,11 +1,11 @@
{ 0x00600001, 0x204003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 },
{ 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 },
{ 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 },
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
{ 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
{ 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
{ 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
{ 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
{ 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
{ 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
{ 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
{ 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },

View File

@ -1,4 +1,4 @@
{ 0x00800040, 0x24406d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 },
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },

View File

@ -329,10 +329,10 @@ static const uint32_t sf_kernel_static_mask[][4] = {
};
/* ps kernels */
#define PS_KERNEL_NUM_GRF 48
#define PS_KERNEL_NUM_GRF 32
#define PS_MAX_THREADS 32
#define PS_SCRATCH_SPACE 2048
#define PS_SCRATCH_SPACE_LOG 1 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
#define PS_SCRATCH_SPACE 1024
#define PS_SCRATCH_SPACE_LOG 0 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
static const uint32_t ps_kernel_static_nomask_affine [][4] = {
#include "exa_wm_xy.g4b"