i915: Emit CA over using OutReverse + Add passes

On PineView:
  578/621 -> 610/617 kglyphs/sec [rgb/aa]
This commit is contained in:
Chris Wilson 2010-05-21 14:33:18 +01:00
parent 80a9e64f50
commit ea07535240
4 changed files with 183 additions and 132 deletions

View File

@ -344,6 +344,7 @@ typedef struct intel_screen_private {
Bool render_mask_is_solid;
Bool needs_render_state_emit;
Bool needs_render_vertex_emit;
Bool needs_render_ca_pass;
/* i830 render accel state */
uint32_t render_dest_format;

View File

@ -1055,7 +1055,6 @@ Bool i830_uxa_init(ScreenPtr screen)
intel->uxa_driver->uxa_major = 1;
intel->uxa_driver->uxa_minor = 0;
intel->needs_render_vertex_emit = TRUE;
intel->prim_offset = 0;
intel->vertex_count = 0;
intel->floats_per_vertex = 0;

View File

@ -85,8 +85,13 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 0);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE |
S4_VFMT_XY);
OUT_BATCH(0x00000000); /* Stencil. */
OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);

View File

@ -133,8 +133,10 @@ static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
}
}
return (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
}
#define DSTORG_HORT_BIAS(x) ((x)<<20)
@ -204,11 +206,13 @@ i915_check_composite(int op,
*/
if (i915_blend_op[op].src_alpha &&
(i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
intel_debug_fallback(scrn,
"Component alpha not supported "
"with source alpha and source "
"value blending.\n");
return FALSE;
if (op != PictOpOver) {
intel_debug_fallback(scrn,
"Component alpha not supported "
"with source alpha and source "
"value blending.\n");
return FALSE;
}
}
}
@ -814,6 +818,23 @@ i915_prepare_composite(int op, PicturePtr source_picture,
if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
return FALSE;
intel->needs_render_ca_pass = FALSE;
if (mask_picture != NULL && mask_picture->componentAlpha &&
PICT_FORMAT_RGB(mask_picture->format)) {
/* Check if it's component alpha that relies on a source alpha
* and on the source value. We can only get one of those
* into the single source value that we get to blend with.
*/
if (i915_blend_op[op].src_alpha &&
(i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
if (op != PictOpOver)
return FALSE;
intel->needs_render_ca_pass = TRUE;
}
}
intel->dst_coord_adjust = 0;
intel->src_coord_adjust = 0;
intel->mask_coord_adjust = 0;
@ -902,6 +923,120 @@ i915_prepare_composite(int op, PicturePtr source_picture,
return TRUE;
}
static void
i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
{
PicturePtr mask_picture = intel->render_mask_picture;
PixmapPtr mask = intel->render_mask;
int src_reg, mask_reg;
Bool is_solid_src, is_solid_mask;
uint32_t dst_format = intel->i915_render_state.dst_format;
int tex_unit, t;
FS_LOCALS();
is_solid_src = intel->render_source_is_solid;
is_solid_mask = intel->render_mask_is_solid;
FS_BEGIN();
/* Declare the registers necessary for our program. */
t = 0;
if (is_solid_src) {
i915_fs_dcl(FS_T8);
src_reg = FS_T8;
} else {
i915_fs_dcl(FS_T0);
i915_fs_dcl(FS_S0);
t++;
}
if (!mask) {
/* No mask, so load directly to output color */
if (! is_solid_src) {
if (dst_format == COLR_BUF_8BIT)
src_reg = FS_R0;
else
src_reg = FS_OC;
if (i830_transform_is_affine(intel->transform[0]))
i915_fs_texld(src_reg, FS_S0, FS_T0);
else
i915_fs_texldp(src_reg, FS_S0, FS_T0);
}
if (src_reg != FS_OC) {
if (dst_format == COLR_BUF_8BIT)
i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
else
i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
}
} else {
if (is_solid_mask) {
i915_fs_dcl(FS_T9);
mask_reg = FS_T9;
} else {
i915_fs_dcl(FS_T0 + t);
i915_fs_dcl(FS_S0 + t);
}
tex_unit = 0;
if (! is_solid_src) {
/* Load the source_picture texel */
if (i830_transform_is_affine(intel->transform[tex_unit]))
i915_fs_texld(FS_R0, FS_S0, FS_T0);
else
i915_fs_texldp(FS_R0, FS_S0, FS_T0);
src_reg = FS_R0;
tex_unit++;
}
if (! is_solid_mask) {
/* Load the mask_picture texel */
if (i830_transform_is_affine(intel->transform[tex_unit]))
i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
else
i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
mask_reg = FS_R1;
}
if (dst_format == COLR_BUF_8BIT) {
i915_fs_mul(FS_OC,
i915_fs_operand(src_reg, W, W, W, W),
i915_fs_operand(mask_reg, W, W, W, W));
} else {
/* If component alpha is active in the mask and the blend
* operation uses the source alpha, then we know we don't
* need the source value (otherwise we would have hit a
* fallback earlier), so we provide the source alpha (src.A *
* mask.X) as output color.
* Conversely, if CA is set and we don't need the source alpha,
* then we produce the source value (src.X * mask.X) and the
* source alpha is unused. Otherwise, we provide the non-CA
* source value (src.X * mask.A).
*/
if (mask_picture->componentAlpha &&
PICT_FORMAT_RGB(mask_picture->format)) {
if (i915_blend_op[op].src_alpha) {
i915_fs_mul(FS_OC,
i915_fs_operand(src_reg, W, W, W, W),
i915_fs_operand_reg(mask_reg));
} else {
i915_fs_mul(FS_OC,
i915_fs_operand_reg(src_reg),
i915_fs_operand_reg(mask_reg));
}
} else {
i915_fs_mul(FS_OC,
i915_fs_operand_reg(src_reg),
i915_fs_operand(mask_reg, W, W, W, W));
}
}
}
FS_END();
}
static void i915_emit_composite_setup(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
@ -911,8 +1046,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
PixmapPtr mask = intel->render_mask;
PixmapPtr dest = intel->render_dest;
uint32_t dst_format = intel->i915_render_state.dst_format, dst_pitch;
uint32_t blendctl, tiling_bits;
Bool is_affine_src, is_affine_mask;
uint32_t tiling_bits;
Bool is_solid_src, is_solid_mask;
int tex_count, t;
@ -923,9 +1057,6 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
dst_pitch = intel_get_pixmap_pitch(dest);
is_affine_src = i830_transform_is_affine(intel->transform[0]);
is_affine_mask = i830_transform_is_affine(intel->transform[1]);
is_solid_src = intel->render_source_is_solid;
is_solid_mask = intel->render_mask_is_solid;
@ -982,33 +1113,31 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
{
uint32_t ss2;
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
ss2 = ~0;
t = 0;
if (! is_solid_src) {
ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(t,
is_affine_src ? TEXCOORDFMT_2D :
TEXCOORDFMT_4D);
i830_transform_is_affine(intel->transform[t]) ?
TEXCOORDFMT_2D : TEXCOORDFMT_4D);
t++;
}
if (mask && ! is_solid_mask) {
ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
ss2 |= S2_TEXCOORD_FMT(t,
is_affine_mask ? TEXCOORDFMT_2D :
TEXCOORDFMT_4D);
i830_transform_is_affine(intel->transform[t]) ?
TEXCOORDFMT_2D : TEXCOORDFMT_4D);
t++;
}
OUT_BATCH(ss2);
OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
S4_CULLMODE_NONE | S4_VFMT_XY);
blendctl =
i915_get_blend_cntl(op, mask_picture, dest_picture->format);
OUT_BATCH(0x00000000); /* Disable stencil buffer */
OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
blendctl);
if (intel->needs_render_ca_pass) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0);
OUT_BATCH(ss2);
} else {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
OUT_BATCH(ss2);
OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
}
/* draw rect is unconditional */
OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
@ -1020,109 +1149,8 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
OUT_BATCH(0x00000000);
}
{
FS_LOCALS();
int src_reg, mask_reg;
FS_BEGIN();
/* Declare the registers necessary for our program. */
t = 0;
if (is_solid_src) {
i915_fs_dcl(FS_T8);
src_reg = FS_T8;
} else {
i915_fs_dcl(FS_T0);
i915_fs_dcl(FS_S0);
t++;
}
if (!mask) {
/* No mask, so load directly to output color */
if (! is_solid_src) {
if (dst_format == COLR_BUF_8BIT)
src_reg = FS_R0;
else
src_reg = FS_OC;
if (is_affine_src)
i915_fs_texld(src_reg, FS_S0, FS_T0);
else
i915_fs_texldp(src_reg, FS_S0, FS_T0);
}
if (src_reg != FS_OC) {
if (dst_format == COLR_BUF_8BIT)
i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
else
i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
}
} else {
if (is_solid_mask) {
i915_fs_dcl(FS_T9);
mask_reg = FS_T9;
} else {
i915_fs_dcl(FS_T0 + t);
i915_fs_dcl(FS_S0 + t);
}
if (! is_solid_src) {
/* Load the source_picture texel */
if (is_affine_src) {
i915_fs_texld(FS_R0, FS_S0, FS_T0);
} else {
i915_fs_texldp(FS_R0, FS_S0, FS_T0);
}
src_reg = FS_R0;
}
if (! is_solid_mask) {
/* Load the mask_picture texel */
if (is_affine_mask) {
i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
} else {
i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
}
mask_reg = FS_R1;
}
if (dst_format == COLR_BUF_8BIT) {
i915_fs_mul(FS_OC,
i915_fs_operand(src_reg, W, W, W, W),
i915_fs_operand(mask_reg, W, W, W, W));
} else {
/* If component alpha is active in the mask and the blend
* operation uses the source alpha, then we know we don't
* need the source value (otherwise we would have hit a
* fallback earlier), so we provide the source alpha (src.A *
* mask.X) as output color.
* Conversely, if CA is set and we don't need the source alpha,
* then we produce the source value (src.X * mask.X) and the
* source alpha is unused. Otherwise, we provide the non-CA
* source value (src.X * mask.A).
*/
if (mask_picture->componentAlpha &&
PICT_FORMAT_RGB(mask_picture->format)) {
if (i915_blend_op[op].src_alpha) {
i915_fs_mul(FS_OC,
i915_fs_operand(src_reg, W, W, W, W),
i915_fs_operand_reg(mask_reg));
} else {
i915_fs_mul(FS_OC,
i915_fs_operand_reg(src_reg),
i915_fs_operand_reg(mask_reg));
}
} else {
i915_fs_mul(FS_OC,
i915_fs_operand_reg(src_reg),
i915_fs_operand(mask_reg, W, W, W, W));
}
}
}
FS_END();
}
if (! intel->needs_render_ca_pass)
i915_composite_emit_shader(intel, op);
}
void
@ -1168,6 +1196,14 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
}
if (intel->prim_offset == 0) {
if (intel->needs_render_ca_pass) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse,
intel->render_mask_picture,
intel->render_dest_picture->format));
i915_composite_emit_shader(intel, PictOpOutReverse);
}
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
OUT_BATCH(intel->vertex_index);
@ -1192,6 +1228,16 @@ i915_vertex_flush(intel_screen_private *intel)
intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
intel->prim_offset = 0;
if (intel->needs_render_ca_pass) {
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
OUT_BATCH(i915_get_blend_cntl(PictOpAdd,
intel->render_mask_picture,
intel->render_dest_picture->format));
i915_composite_emit_shader(intel, PictOpAdd);
OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | intel->vertex_count);
OUT_BATCH(intel->vertex_index);
}
intel->vertex_index += intel->vertex_count;
intel->vertex_count = 0;
}