sna/gen3: Pack video frames

If you're going to benchmark, you may as well do so favourably.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2011-11-09 15:16:04 +00:00
parent 8e92675952
commit 3252b46e11
1 changed files with 123 additions and 79 deletions

View File

@ -2959,36 +2959,36 @@ gen3_emit_video_state(struct sna *sna,
struct kgem_bo *dst_bo,
int width, int height)
{
uint32_t shader_offset;
uint32_t ms3;
struct gen3_render_state *state = &sna->render_state.gen3;
uint32_t id, ms3, rewind;
gen3_emit_target(sna, dst_bo, width, height,
sna_format_for_depth(pixmap->drawable.depth));
/* XXX share with composite? Is it worth the effort? */
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
2);
OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
S6_COLOR_WRITE_ENABLE);
if ((state->last_shader & (1<<31)) == 0) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
2);
OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
OUT_BATCH((2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
S6_COLOR_WRITE_ENABLE);
sna->render_state.gen3.last_blend = 0;
sna->render_state.gen3.last_sampler = 0;
sna->render_state.gen3.floats_per_vertex = 4;
sna->render_state.gen3.last_shader = -1;
sna->render_state.gen3.last_constants = 0;
state->last_blend = 0;
state->floats_per_vertex = 4;
}
if (!is_planar_fourcc(frame->id)) {
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
OUT_BATCH(0x0000001); /* constant 0 */
/* constant 0: brightness/contrast */
@ -2996,7 +2996,15 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
if (state->last_constants &&
memcmp(&sna->kgem.batch[state->last_constants],
&sna->kgem.batch[rewind],
6*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_constants = rewind;
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3);
OUT_BATCH(0x00000001);
OUT_BATCH(SS2_COLORSPACE_CONVERSION |
@ -3009,6 +3017,13 @@ gen3_emit_video_state(struct sna *sna,
(0 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler],
&sna->kgem.batch[rewind],
5*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
OUT_BATCH(_3DSTATE_MAP_STATE | 3);
OUT_BATCH(0x00000001); /* texture map #1 */
@ -3031,15 +3046,23 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
shader_offset = sna->kgem.nbatch++;
id = 1<<31 | 1<<1 | !!video->brightness;
if (state->last_shader != id) {
state->last_shader = id;
id = sna->kgem.nbatch++;
gen3_fs_dcl(FS_S0);
gen3_fs_dcl(FS_T0);
gen3_fs_texld(FS_OC, FS_S0, FS_T0);
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C0, X, X, X, ZERO));
gen3_fs_dcl(FS_S0);
gen3_fs_dcl(FS_T0);
gen3_fs_texld(FS_OC, FS_S0, FS_T0);
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C0, X, X, X, ZERO));
}
sna->kgem.batch[id] =
_3DSTATE_PIXEL_SHADER_PROGRAM |
(sna->kgem.nbatch - id - 2);
}
} else {
/* For the planar formats, we set up three samplers --
@ -3062,6 +3085,7 @@ gen3_emit_video_state(struct sna *sna,
* r3 = (v,v,v,v)
* OC = (r,g,b,1)
*/
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2));
OUT_BATCH(0x000001f); /* constants 0-4 */
/* constant 0: normalization offsets */
@ -3089,7 +3113,15 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH_F(video->contrast / 255.0);
OUT_BATCH_F(0.0);
OUT_BATCH_F(0.0);
if (state->last_constants &&
memcmp(&sna->kgem.batch[state->last_constants],
&sna->kgem.batch[rewind],
22*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_constants = rewind;
rewind = sna->kgem.nbatch;
OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9);
OUT_BATCH(0x00000007);
/* sampler 0 */
@ -3122,6 +3154,13 @@ gen3_emit_video_state(struct sna *sna,
(2 << SS3_TEXTUREMAP_INDEX_SHIFT) |
SS3_NORMALIZED_COORDS);
OUT_BATCH(0x00000000);
if (state->last_sampler &&
memcmp(&sna->kgem.batch[state->last_sampler],
&sna->kgem.batch[rewind],
11*sizeof(uint32_t)) == 0)
sna->kgem.nbatch = rewind;
else
state->last_sampler = rewind;
OUT_BATCH(_3DSTATE_MAP_STATE | 9);
OUT_BATCH(0x00000007);
@ -3166,63 +3205,68 @@ gen3_emit_video_state(struct sna *sna,
OUT_BATCH(ms3);
OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT);
shader_offset = sna->kgem.nbatch++;
id = 1<<31 | 2<<1 | !!video->brightness;
if (state->last_shader != id) {
state->last_shader = id;
id = sna->kgem.nbatch++;
/* Declare samplers */
gen3_fs_dcl(FS_S0); /* Y */
gen3_fs_dcl(FS_S1); /* U */
gen3_fs_dcl(FS_S2); /* V */
gen3_fs_dcl(FS_T0); /* normalized coords */
/* Declare samplers */
gen3_fs_dcl(FS_S0); /* Y */
gen3_fs_dcl(FS_S1); /* U */
gen3_fs_dcl(FS_S2); /* V */
gen3_fs_dcl(FS_T0); /* normalized coords */
/* Load samplers to temporaries. */
gen3_fs_texld(FS_R1, FS_S0, FS_T0);
gen3_fs_texld(FS_R2, FS_S1, FS_T0);
gen3_fs_texld(FS_R3, FS_S2, FS_T0);
/* Load samplers to temporaries. */
gen3_fs_texld(FS_R1, FS_S0, FS_T0);
gen3_fs_texld(FS_R2, FS_S1, FS_T0);
gen3_fs_texld(FS_R3, FS_S2, FS_T0);
/* Move the sampled YUV data in R[123] to the first
* 3 channels of R0.
*/
gen3_fs_mov_masked(FS_R0, MASK_X,
gen3_fs_operand_reg(FS_R1));
gen3_fs_mov_masked(FS_R0, MASK_Y,
gen3_fs_operand_reg(FS_R2));
gen3_fs_mov_masked(FS_R0, MASK_Z,
gen3_fs_operand_reg(FS_R3));
/* Move the sampled YUV data in R[123] to the first
* 3 channels of R0.
*/
gen3_fs_mov_masked(FS_R0, MASK_X,
gen3_fs_operand_reg(FS_R1));
gen3_fs_mov_masked(FS_R0, MASK_Y,
gen3_fs_operand_reg(FS_R2));
gen3_fs_mov_masked(FS_R0, MASK_Z,
gen3_fs_operand_reg(FS_R3));
/* Normalize the YUV data */
gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C0));
/* dot-product the YUV data in R0 by the vectors of
* coefficients for calculating R, G, and B, storing
* the results in the R, G, or B channels of the output
* color. The OC results are implicitly clamped
* at the end of the program.
*/
gen3_fs_dp3(FS_OC, MASK_X,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C1));
gen3_fs_dp3(FS_OC, MASK_Y,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C2));
gen3_fs_dp3(FS_OC, MASK_Z,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C3));
/* Set alpha of the output to 1.0, by wiring W to 1
* and not actually using the source.
*/
gen3_fs_mov_masked(FS_OC, MASK_W,
gen3_fs_operand_one());
/* Normalize the YUV data */
gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C0));
/* dot-product the YUV data in R0 by the vectors of
* coefficients for calculating R, G, and B, storing
* the results in the R, G, or B channels of the output
* color. The OC results are implicitly clamped
* at the end of the program.
*/
gen3_fs_dp3(FS_OC, MASK_X,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C1));
gen3_fs_dp3(FS_OC, MASK_Y,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C2));
gen3_fs_dp3(FS_OC, MASK_Z,
gen3_fs_operand_reg(FS_R0),
gen3_fs_operand_reg(FS_C3));
/* Set alpha of the output to 1.0, by wiring W to 1
* and not actually using the source.
*/
gen3_fs_mov_masked(FS_OC, MASK_W,
gen3_fs_operand_one());
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C4, X, X, X, ZERO));
if (video->brightness != 0) {
gen3_fs_add(FS_OC,
gen3_fs_operand_reg(FS_OC),
gen3_fs_operand(FS_C4, X, X, X, ZERO));
}
sna->kgem.batch[id] =
_3DSTATE_PIXEL_SHADER_PROGRAM |
(sna->kgem.nbatch - id - 2);
}
}
sna->kgem.batch[shader_offset] =
_3DSTATE_PIXEL_SHADER_PROGRAM |
(sna->kgem.nbatch - shader_offset - 2);
}
static void