i915: Emit CA over using OutReverse + Add passes

On PineView: 578/621 -> 610/617 kglyphs/sec [rgb/aa]
2010-05-21 14:33:18 +01:00 · 2010-05-21 14:33:18 +01:00 · ea07535240
parent 80a9e64f50
commit ea07535240
4 changed files with 183 additions and 132 deletions
--- a/src/i830.h
+++ b/src/i830.h
@ -344,6 +344,7 @@ typedef struct intel_screen_private {
 	Bool render_mask_is_solid;
 	Bool needs_render_state_emit;
 	Bool needs_render_vertex_emit;
+	Bool needs_render_ca_pass;

 	/* i830 render accel state */
 	uint32_t render_dest_format;
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@ -1055,7 +1055,6 @@ Bool i830_uxa_init(ScreenPtr screen)
 	intel->uxa_driver->uxa_major = 1;
 	intel->uxa_driver->uxa_minor = 0;

-	intel->needs_render_vertex_emit = TRUE;
 	intel->prim_offset = 0;
 	intel->vertex_count = 0;
 	intel->floats_per_vertex = 0;
--- a/src/i915_3d.c
+++ b/src/i915_3d.c
@ -85,8 +85,13 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
 		  ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) |
 		  ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff));

-	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | 0);
+	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2);
 	OUT_BATCH(0x00000000);	/* Disable texture coordinate wrap-shortest */
+	OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) |
+		  S4_LINE_WIDTH_ONE |
+		  S4_CULLMODE_NONE |
+		  S4_VFMT_XY);
+	OUT_BATCH(0x00000000);	/* Stencil. */

 	OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT);
 	OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
--- a/src/i915_render.c
+++ b/src/i915_render.c
@ -133,8 +133,10 @@ static uint32_t i915_get_blend_cntl(int op, PicturePtr mask,
 		}
 	}

-	return (sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
-	    (dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
+	return S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
+		(BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
+		(sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
+		(dblend << S6_CBUF_DST_BLEND_FACT_SHIFT);
 }

 #define DSTORG_HORT_BIAS(x)             ((x)<<20)
@ -204,11 +206,13 @@ i915_check_composite(int op,
 		 */
 		if (i915_blend_op[op].src_alpha &&
 		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
-			intel_debug_fallback(scrn,
-					     "Component alpha not supported "
-					     "with source alpha and source "
-					     "value blending.\n");
-			return FALSE;
+			if (op != PictOpOver) {
+				intel_debug_fallback(scrn,
+						     "Component alpha not supported "
+						     "with source alpha and source "
+						     "value blending.\n");
+				return FALSE;
+			}
 		}
 	}

@ -814,6 +818,23 @@ i915_prepare_composite(int op, PicturePtr source_picture,

 	if (!i830_get_aperture_space(scrn, bo_table, ARRAY_SIZE(bo_table)))
 		return FALSE;
+
+	intel->needs_render_ca_pass = FALSE;
+	if (mask_picture != NULL && mask_picture->componentAlpha &&
+	    PICT_FORMAT_RGB(mask_picture->format)) {
+		/* Check if it's component alpha that relies on a source alpha
+		 * and on the source value.  We can only get one of those
+		 * into the single source value that we get to blend with.
+		 */
+		if (i915_blend_op[op].src_alpha &&
+		    (i915_blend_op[op].src_blend != BLENDFACT_ZERO)) {
+			if (op != PictOpOver)
+				return FALSE;
+
+			intel->needs_render_ca_pass = TRUE;
+		}
+	}
+
 	intel->dst_coord_adjust = 0;
 	intel->src_coord_adjust = 0;
 	intel->mask_coord_adjust = 0;
@ -902,6 +923,120 @@ i915_prepare_composite(int op, PicturePtr source_picture,
 	return TRUE;
 }

+static void
+i915_composite_emit_shader(intel_screen_private *intel, CARD8 op)
+{
+	PicturePtr mask_picture = intel->render_mask_picture;
+	PixmapPtr mask = intel->render_mask;
+	int src_reg, mask_reg;
+	Bool is_solid_src, is_solid_mask;
+	uint32_t dst_format = intel->i915_render_state.dst_format;
+	int tex_unit, t;
+	FS_LOCALS();
+
+	is_solid_src = intel->render_source_is_solid;
+	is_solid_mask = intel->render_mask_is_solid;
+
+	FS_BEGIN();
+
+	/* Declare the registers necessary for our program.  */
+	t = 0;
+	if (is_solid_src) {
+		i915_fs_dcl(FS_T8);
+		src_reg = FS_T8;
+	} else {
+		i915_fs_dcl(FS_T0);
+		i915_fs_dcl(FS_S0);
+		t++;
+	}
+	if (!mask) {
+		/* No mask, so load directly to output color */
+		if (! is_solid_src) {
+			if (dst_format == COLR_BUF_8BIT)
+				src_reg = FS_R0;
+			else
+				src_reg = FS_OC;
+
+			if (i830_transform_is_affine(intel->transform[0]))
+				i915_fs_texld(src_reg, FS_S0, FS_T0);
+			else
+				i915_fs_texldp(src_reg, FS_S0, FS_T0);
+		}
+
+		if (src_reg != FS_OC) {
+			if (dst_format == COLR_BUF_8BIT)
+				i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
+			else
+				i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
+		}
+	} else {
+		if (is_solid_mask) {
+			i915_fs_dcl(FS_T9);
+			mask_reg = FS_T9;
+		} else {
+			i915_fs_dcl(FS_T0 + t);
+			i915_fs_dcl(FS_S0 + t);
+		}
+
+		tex_unit = 0;
+		if (! is_solid_src) {
+			/* Load the source_picture texel */
+			if (i830_transform_is_affine(intel->transform[tex_unit]))
+				i915_fs_texld(FS_R0, FS_S0, FS_T0);
+			else
+				i915_fs_texldp(FS_R0, FS_S0, FS_T0);
+
+			src_reg = FS_R0;
+			tex_unit++;
+		}
+
+		if (! is_solid_mask) {
+			/* Load the mask_picture texel */
+			if (i830_transform_is_affine(intel->transform[tex_unit]))
+				i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
+			else
+				i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
+
+			mask_reg = FS_R1;
+		}
+
+		if (dst_format == COLR_BUF_8BIT) {
+			i915_fs_mul(FS_OC,
+				    i915_fs_operand(src_reg, W, W, W, W),
+				    i915_fs_operand(mask_reg, W, W, W, W));
+		} else {
+			/* If component alpha is active in the mask and the blend
+			 * operation uses the source alpha, then we know we don't
+			 * need the source value (otherwise we would have hit a
+			 * fallback earlier), so we provide the source alpha (src.A *
+			 * mask.X) as output color.
+			 * Conversely, if CA is set and we don't need the source alpha,
+			 * then we produce the source value (src.X * mask.X) and the
+			 * source alpha is unused.  Otherwise, we provide the non-CA
+			 * source value (src.X * mask.A).
+			 */
+			if (mask_picture->componentAlpha &&
+			    PICT_FORMAT_RGB(mask_picture->format)) {
+				if (i915_blend_op[op].src_alpha) {
+					i915_fs_mul(FS_OC,
+						    i915_fs_operand(src_reg, W, W, W, W),
+						    i915_fs_operand_reg(mask_reg));
+				} else {
+					i915_fs_mul(FS_OC,
+						    i915_fs_operand_reg(src_reg),
+						    i915_fs_operand_reg(mask_reg));
+				}
+			} else {
+				i915_fs_mul(FS_OC,
+					    i915_fs_operand_reg(src_reg),
+					    i915_fs_operand(mask_reg, W, W, W, W));
+			}
+		}
+	}
+
+	FS_END();
+}
+
 static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 {
 	intel_screen_private *intel = intel_get_screen_private(scrn);
@ -911,8 +1046,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 	PixmapPtr mask = intel->render_mask;
 	PixmapPtr dest = intel->render_dest;
 	uint32_t dst_format = intel->i915_render_state.dst_format, dst_pitch;
-	uint32_t blendctl, tiling_bits;
-	Bool is_affine_src, is_affine_mask;
+	uint32_t tiling_bits;
 	Bool is_solid_src, is_solid_mask;
 	int tex_count, t;

@ -923,9 +1057,6 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)

 	dst_pitch = intel_get_pixmap_pitch(dest);

-	is_affine_src = i830_transform_is_affine(intel->transform[0]);
-	is_affine_mask = i830_transform_is_affine(intel->transform[1]);
-
 	is_solid_src = intel->render_source_is_solid;
 	is_solid_mask = intel->render_mask_is_solid;

@ -982,33 +1113,31 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 	{
 		uint32_t ss2;

-		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) |
-			  I1_LOAD_S(4) | I1_LOAD_S(5) | I1_LOAD_S(6) | 3);
 		ss2 = ~0;
 		t = 0;
 		if (! is_solid_src) {
 		    ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
 		    ss2 |= S2_TEXCOORD_FMT(t,
-					   is_affine_src ? TEXCOORDFMT_2D :
-					   TEXCOORDFMT_4D);
+					   i830_transform_is_affine(intel->transform[t]) ?
+					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
 		    t++;
 		}
 		if (mask && ! is_solid_mask) {
 		    ss2 &= ~S2_TEXCOORD_FMT(t, TEXCOORDFMT_NOT_PRESENT);
 		    ss2 |= S2_TEXCOORD_FMT(t,
-			    is_affine_mask ? TEXCOORDFMT_2D :
-			    TEXCOORDFMT_4D);
+					   i830_transform_is_affine(intel->transform[t]) ?
+					   TEXCOORDFMT_2D : TEXCOORDFMT_4D);
 		    t++;
 		}
-		OUT_BATCH(ss2);
-		OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | S4_LINE_WIDTH_ONE |
-			  S4_CULLMODE_NONE | S4_VFMT_XY);
-		blendctl =
-		    i915_get_blend_cntl(op, mask_picture, dest_picture->format);
-		OUT_BATCH(0x00000000);	/* Disable stencil buffer */
-		OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE |
-			  (BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT) |
-			  blendctl);
+
+		if (intel->needs_render_ca_pass) {
+			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | 0);
+			OUT_BATCH(ss2);
+		} else {
+			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1);
+			OUT_BATCH(ss2);
+			OUT_BATCH(i915_get_blend_cntl(op, mask_picture, dest_picture->format));
+		}

 		/* draw rect is unconditional */
 		OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
@ -1020,109 +1149,8 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
 		OUT_BATCH(0x00000000);
 	}

-	{
-	    FS_LOCALS();
-	    int src_reg, mask_reg;
-
-	    FS_BEGIN();
-
-	    /* Declare the registers necessary for our program.  */
-	    t = 0;
-	    if (is_solid_src) {
-		i915_fs_dcl(FS_T8);
-		src_reg = FS_T8;
-	    } else {
-		i915_fs_dcl(FS_T0);
-		i915_fs_dcl(FS_S0);
-		t++;
-	    }
-	    if (!mask) {
-		    /* No mask, so load directly to output color */
-		    if (! is_solid_src) {
-			    if (dst_format == COLR_BUF_8BIT)
-				    src_reg = FS_R0;
-			    else
-				    src_reg = FS_OC;
-
-			    if (is_affine_src)
-				    i915_fs_texld(src_reg, FS_S0, FS_T0);
-			    else
-				    i915_fs_texldp(src_reg, FS_S0, FS_T0);
-		    }
-
-		    if (src_reg != FS_OC) {
-			    if (dst_format == COLR_BUF_8BIT)
-				    i915_fs_mov(FS_OC, i915_fs_operand(src_reg, W, W, W, W));
-			    else
-				    i915_fs_mov(FS_OC, i915_fs_operand_reg(src_reg));
-		    }
-	    } else {
-		    if (is_solid_mask) {
-			    i915_fs_dcl(FS_T9);
-			    mask_reg = FS_T9;
-		    } else {
-			    i915_fs_dcl(FS_T0 + t);
-			    i915_fs_dcl(FS_S0 + t);
-		    }
-
-		    if (! is_solid_src) {
-			    /* Load the source_picture texel */
-			    if (is_affine_src) {
-				    i915_fs_texld(FS_R0, FS_S0, FS_T0);
-			    } else {
-				    i915_fs_texldp(FS_R0, FS_S0, FS_T0);
-			    }
-
-			    src_reg = FS_R0;
-		    }
-
-		    if (! is_solid_mask) {
-			    /* Load the mask_picture texel */
-			    if (is_affine_mask) {
-				    i915_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t);
-			    } else {
-				    i915_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t);
-			    }
-
-			    mask_reg = FS_R1;
-		    }
-
-		    if (dst_format == COLR_BUF_8BIT) {
-			    i915_fs_mul(FS_OC,
-					i915_fs_operand(src_reg, W, W, W, W),
-					i915_fs_operand(mask_reg, W, W, W, W));
-		    } else {
-			    /* If component alpha is active in the mask and the blend
-			     * operation uses the source alpha, then we know we don't
-			     * need the source value (otherwise we would have hit a
-			     * fallback earlier), so we provide the source alpha (src.A *
-			     * mask.X) as output color.
-			     * Conversely, if CA is set and we don't need the source alpha,
-			     * then we produce the source value (src.X * mask.X) and the
-			     * source alpha is unused.  Otherwise, we provide the non-CA
-			     * source value (src.X * mask.A).
-			     */
-			    if (mask_picture->componentAlpha &&
-				PICT_FORMAT_RGB(mask_picture->format)) {
-				    if (i915_blend_op[op].src_alpha) {
-					    i915_fs_mul(FS_OC,
-							i915_fs_operand(src_reg, W, W, W, W),
-							i915_fs_operand_reg(mask_reg));
-				    } else {
-					    i915_fs_mul(FS_OC,
-							i915_fs_operand_reg(src_reg),
-							i915_fs_operand_reg(mask_reg));
-				    }
-			    } else {
-				    i915_fs_mul(FS_OC,
-						i915_fs_operand_reg(src_reg),
-						i915_fs_operand(mask_reg, W, W, W, W));
-			    }
-		    }
-	    }
-
-	    FS_END();
-	}
+	if (! intel->needs_render_ca_pass)
+		i915_composite_emit_shader(intel, op);
 }

 void
@ -1168,6 +1196,14 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
 	}

 	if (intel->prim_offset == 0) {
+		if (intel->needs_render_ca_pass) {
+			OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+			OUT_BATCH(i915_get_blend_cntl(PictOpOutReverse,
+						      intel->render_mask_picture,
+						      intel->render_dest_picture->format));
+			i915_composite_emit_shader(intel, PictOpOutReverse);
+		}
+
 		intel->prim_offset = intel->batch_used;
 		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL);
 		OUT_BATCH(intel->vertex_index);
@ -1192,6 +1228,16 @@ i915_vertex_flush(intel_screen_private *intel)
 	intel->batch_ptr[intel->prim_offset] |= intel->vertex_count;
 	intel->prim_offset = 0;

+	if (intel->needs_render_ca_pass) {
+		OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0);
+		OUT_BATCH(i915_get_blend_cntl(PictOpAdd,
+					      intel->render_mask_picture,
+					      intel->render_dest_picture->format));
+		i915_composite_emit_shader(intel, PictOpAdd);
+		OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | intel->vertex_count);
+		OUT_BATCH(intel->vertex_index);
+	}
+
 	intel->vertex_index += intel->vertex_count;
 	intel->vertex_count = 0;
 }