Shrink WM thread to 32 registers and 1024 scratch space.
Saving registers means we can run more in parallel.
This commit is contained in:
parent
a6492661ae
commit
bfd803e085
|
|
@ -57,7 +57,7 @@ define(`mask_dw_dy', `g6.4<0,1,0>F')
|
|||
define(`mask_wo', `g6.12<0,1,0>F')
|
||||
|
||||
/*
|
||||
* Local variables
|
||||
* Local variables. Pairs must be aligned on even reg boundry
|
||||
*/
|
||||
|
||||
/* this holds the X dest coordinates */
|
||||
|
|
@ -71,14 +71,14 @@ define(`dst_y_0', `dst_y')
|
|||
define(`dst_y_1', `g11')
|
||||
|
||||
/* When computing x * dn/dx, use this */
|
||||
define(`temp_x', `g34')
|
||||
define(`temp_x', `g30')
|
||||
define(`temp_x_0', `temp_x')
|
||||
define(`temp_x_1', `g35')
|
||||
define(`temp_x_1', `g31')
|
||||
|
||||
/* When computing y * dn/dy, use this */
|
||||
define(`temp_y', `g32')
|
||||
define(`temp_y', `g28')
|
||||
define(`temp_y_0', temp_y)
|
||||
define(`temp_y_1', `g33')
|
||||
define(`temp_y_1', `g29')
|
||||
|
||||
/* when loading x/y, use these to hold them in UW format */
|
||||
define(`temp_x_uw', temp_x)
|
||||
|
|
@ -90,33 +90,33 @@ define(`src_msg_ind',`1')
|
|||
define(`src_u', `m2')
|
||||
define(`src_v', `m4')
|
||||
define(`src_w', `g12')
|
||||
define(`src_w_0', `g12')
|
||||
define(`src_w_0', `src_w')
|
||||
define(`src_w_1', `g13')
|
||||
|
||||
define(`mask_msg', `m7')
|
||||
define(`mask_msg_ind',`7')
|
||||
define(`mask_u', `m8')
|
||||
define(`mask_v', `m10')
|
||||
define(`mask_w', `g14')
|
||||
define(`mask_w_0', `g14')
|
||||
define(`mask_w_1', `g15')
|
||||
define(`mask_w', `src_w')
|
||||
define(`mask_w_0', `src_w_0')
|
||||
define(`mask_w_1', `src_w_1')
|
||||
|
||||
/* sample src to these registers */
|
||||
define(`src_sample0', `g16')
|
||||
define(`src_sample1', `g17')
|
||||
define(`src_sample2', `g18')
|
||||
define(`src_sample3', `g19')
|
||||
define(`src_sample4', `g20')
|
||||
define(`src_sample5', `g21')
|
||||
define(`src_sample6', `g22')
|
||||
define(`src_sample7', `g23')
|
||||
define(`src_sample0', `g14')
|
||||
define(`src_sample1', `g15')
|
||||
define(`src_sample2', `g16')
|
||||
define(`src_sample3', `g17')
|
||||
define(`src_sample4', `g18')
|
||||
define(`src_sample5', `g19')
|
||||
define(`src_sample6', `g20')
|
||||
define(`src_sample7', `g21')
|
||||
|
||||
/* sample mask to these registers */
|
||||
define(`mask_sample0', `g24')
|
||||
define(`mask_sample1', `g25')
|
||||
define(`mask_sample2', `g26')
|
||||
define(`mask_sample3', `g27')
|
||||
define(`mask_sample4', `g28')
|
||||
define(`mask_sample5', `g29')
|
||||
define(`mask_sample6', `g30')
|
||||
define(`mask_sample7', `g31')
|
||||
define(`mask_sample0', `g22')
|
||||
define(`mask_sample1', `g23')
|
||||
define(`mask_sample2', `g24')
|
||||
define(`mask_sample3', `g25')
|
||||
define(`mask_sample4', `g26')
|
||||
define(`mask_sample5', `g27')
|
||||
define(`mask_sample6', `g28')
|
||||
define(`mask_sample7', `g29')
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d02c0 },
|
||||
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0300 },
|
||||
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0340 },
|
||||
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
|
||||
{ 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d02c0 },
|
||||
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d02c0 },
|
||||
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d02c0 },
|
||||
{ 0x00802041, 0x22c077bd, 0x008d03c0, 0x008d02c0 },
|
||||
{ 0x00802041, 0x21c077bd, 0x008d02c0, 0x008d0280 },
|
||||
{ 0x00802041, 0x220077bd, 0x008d0300, 0x008d0280 },
|
||||
{ 0x00802041, 0x224077bd, 0x008d0340, 0x008d0280 },
|
||||
{ 0x00802041, 0x228077bd, 0x008d0380, 0x008d0280 },
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x210077be, 0x008d0440, 0x000000ac },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x214077be, 0x008d0440, 0x000000bc },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x210077be, 0x008d03c0, 0x000000ac },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x214077be, 0x008d03c0, 0x000000bc },
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000c0 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000c4 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000cc },
|
||||
{ 0x00600031, 0x21c01fbd, 0x008d0440, 0x01110001 },
|
||||
{ 0x00600031, 0x21e01fbd, 0x008d0460, 0x01110001 },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000a0 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000a4 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000ac },
|
||||
{ 0x00802041, 0x210077be, 0x008d0440, 0x008d01c0 },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x000000b0 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x000000b4 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x000000bc },
|
||||
{ 0x00802041, 0x214077be, 0x008d0440, 0x008d01c0 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000c0 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000c4 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000cc },
|
||||
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
|
||||
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000a0 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000a4 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000ac },
|
||||
{ 0x00802041, 0x210077be, 0x008d03c0, 0x008d0180 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x000000b0 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x000000b4 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x000000bc },
|
||||
{ 0x00802041, 0x214077be, 0x008d03c0, 0x008d0180 },
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
|
||||
{ 0x07800031, 0x23c01d29, 0x008d0000, 0x02520102 },
|
||||
{ 0x07800031, 0x23801d29, 0x008d0000, 0x02520102 },
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
|
||||
{ 0x07800031, 0x23001d29, 0x008d0000, 0x02580102 },
|
||||
{ 0x07800031, 0x22c01d29, 0x008d0000, 0x02580102 },
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d03c0 },
|
||||
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d03c0 },
|
||||
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d03c0 },
|
||||
{ 0x00802041, 0x22c077bd, 0x008d02c0, 0x008d03c0 },
|
||||
{ 0x00802041, 0x21c077bd, 0x008d01c0, 0x008d0380 },
|
||||
{ 0x00802041, 0x220077bd, 0x008d0200, 0x008d0380 },
|
||||
{ 0x00802041, 0x224077bd, 0x008d0240, 0x008d0380 },
|
||||
{ 0x00802041, 0x228077bd, 0x008d0280, 0x008d0380 },
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x204077be, 0x008d0440, 0x0000006c },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x208077be, 0x008d0440, 0x0000007c },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x204077be, 0x008d03c0, 0x0000006c },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x208077be, 0x008d03c0, 0x0000007c },
|
||||
|
|
|
|||
|
|
@ -1,16 +1,16 @@
|
|||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000080 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000084 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000008c },
|
||||
{ 0x00600031, 0x21801fbd, 0x008d0440, 0x01110001 },
|
||||
{ 0x00600031, 0x21a01fbd, 0x008d0460, 0x01110001 },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000060 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000064 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000006c },
|
||||
{ 0x00802041, 0x204077be, 0x008d0440, 0x008d0180 },
|
||||
{ 0x00802041, 0x244077bd, 0x008d0100, 0x00000070 },
|
||||
{ 0x00802041, 0x240077bd, 0x008d0140, 0x00000074 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x008d0400 },
|
||||
{ 0x00802040, 0x244077bd, 0x008d0440, 0x0000007c },
|
||||
{ 0x00802041, 0x208077be, 0x008d0440, 0x008d0180 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000080 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000084 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000008c },
|
||||
{ 0x00600031, 0x21801fbd, 0x008d03c0, 0x01110001 },
|
||||
{ 0x00600031, 0x21a01fbd, 0x008d03e0, 0x01110001 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000060 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000064 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000006c },
|
||||
{ 0x00802041, 0x204077be, 0x008d03c0, 0x008d0180 },
|
||||
{ 0x00802041, 0x23c077bd, 0x008d0100, 0x00000070 },
|
||||
{ 0x00802041, 0x238077bd, 0x008d0140, 0x00000074 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x008d0380 },
|
||||
{ 0x00802040, 0x23c077bd, 0x008d03c0, 0x0000007c },
|
||||
{ 0x00802041, 0x208077be, 0x008d03c0, 0x008d0180 },
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
{ 0x00000201, 0x20080061, 0x00000000, 0x00007000 },
|
||||
{ 0x01800031, 0x22c01d29, 0x008d0000, 0x02520001 },
|
||||
{ 0x01800031, 0x22801d29, 0x008d0000, 0x02520001 },
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
{ 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
|
||||
{ 0x01800031, 0x22001d29, 0x008d0000, 0x02580001 },
|
||||
{ 0x01800031, 0x21c01d29, 0x008d0000, 0x02580001 },
|
||||
|
|
|
|||
|
|
@ -1,11 +1,11 @@
|
|||
{ 0x00600001, 0x204003be, 0x008d0200, 0x00000000 },
|
||||
{ 0x00600001, 0x206003be, 0x008d0240, 0x00000000 },
|
||||
{ 0x00600001, 0x208003be, 0x008d0280, 0x00000000 },
|
||||
{ 0x00600001, 0x20a003be, 0x008d02c0, 0x00000000 },
|
||||
{ 0x00600001, 0x20c003be, 0x008d0220, 0x00000000 },
|
||||
{ 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
|
||||
{ 0x00600001, 0x210003be, 0x008d02a0, 0x00000000 },
|
||||
{ 0x00600001, 0x212003be, 0x008d02e0, 0x00000000 },
|
||||
{ 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
|
||||
{ 0x00600001, 0x206003be, 0x008d0200, 0x00000000 },
|
||||
{ 0x00600001, 0x208003be, 0x008d0240, 0x00000000 },
|
||||
{ 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 },
|
||||
{ 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 },
|
||||
{ 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 },
|
||||
{ 0x00600001, 0x210003be, 0x008d0260, 0x00000000 },
|
||||
{ 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
|
||||
{ 0x00600001, 0x20200022, 0x008d0020, 0x00000000 },
|
||||
{ 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 },
|
||||
{ 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
{ 0x00800040, 0x24406d29, 0x00480028, 0x10101010 },
|
||||
{ 0x00800040, 0x24006d29, 0x0048002a, 0x11001100 },
|
||||
{ 0x00802040, 0x2100753d, 0x008d0440, 0x00004020 },
|
||||
{ 0x00802040, 0x2140753d, 0x008d0400, 0x00004024 },
|
||||
{ 0x00800040, 0x23c06d29, 0x00480028, 0x10101010 },
|
||||
{ 0x00800040, 0x23806d29, 0x0048002a, 0x11001100 },
|
||||
{ 0x00802040, 0x2100753d, 0x008d03c0, 0x00004020 },
|
||||
{ 0x00802040, 0x2140753d, 0x008d0380, 0x00004024 },
|
||||
|
|
|
|||
|
|
@ -329,10 +329,10 @@ static const uint32_t sf_kernel_static_mask[][4] = {
|
|||
};
|
||||
|
||||
/* ps kernels */
|
||||
#define PS_KERNEL_NUM_GRF 48
|
||||
#define PS_KERNEL_NUM_GRF 32
|
||||
#define PS_MAX_THREADS 32
|
||||
#define PS_SCRATCH_SPACE 2048
|
||||
#define PS_SCRATCH_SPACE_LOG 1 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
|
||||
#define PS_SCRATCH_SPACE 1024
|
||||
#define PS_SCRATCH_SPACE_LOG 0 /* log2 (PS_SCRATCH_SPACE) - 10 (1024 is 0, 2048 is 1) */
|
||||
|
||||
static const uint32_t ps_kernel_static_nomask_affine [][4] = {
|
||||
#include "exa_wm_xy.g4b"
|
||||
|
|
|
|||
Loading…
Reference in New Issue