Kill paranoid assertions on every write into the batchbuffer.

On my PineView box these represent ~5% overhead on x11perf text:

Before:
16000000 trep @   0.0020 msec (495000.0/sec): Char in 80-char aa line (Charter 10)
12000000 trep @   0.0022 msec (461000.0/sec): Char in 80-char rgb line (Charter 10)

After:
16000000 trep @   0.0020 msec (511000.0/sec): Char in 80-char aa line (Charter 10)
16000000 trep @   0.0021 msec (480000.0/sec): Char in 80-char rgb line (Charter 10)

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2010-05-19 09:30:02 +01:00
parent bc41f84e01
commit dcef703a7c
9 changed files with 23 additions and 148 deletions

View File

@ -38,7 +38,7 @@ void I830EmitInvarientState(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
ATOMIC_BATCH(58);
assert(intel->in_batch_atomic);
OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0));
OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1));
@ -222,6 +222,4 @@ void I830EmitInvarientState(ScrnInfoPtr scrn)
AA_LINE_ECAAR_WIDTH_1_0 |
AA_LINE_REGION_WIDTH_ENABLE |
AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE);
ADVANCE_BATCH();
}

View File

@ -74,8 +74,6 @@ static inline void intel_batch_end_atomic(ScrnInfoPtr scrn)
static inline void intel_batch_emit_dword(intel_screen_private *intel, uint32_t dword)
{
assert(intel->batch_ptr != NULL);
assert(intel->batch_emitting);
*(uint32_t *) (intel->batch_ptr + intel->batch_used) = dword;
intel->batch_used += 4;
}
@ -84,7 +82,6 @@ static inline void intel_batch_align(intel_screen_private *intel, uint32_t align
{
uint32_t delta;
assert(intel->batch_ptr != NULL);
assert(align);
if ((delta = intel->batch_used & (align - 1))) {
@ -100,9 +97,6 @@ intel_batch_emit_reloc(intel_screen_private *intel,
uint32_t read_domains,
uint32_t write_domains, uint32_t delta, int needs_fence)
{
assert(intel_batch_space(intel) >= 4);
*(uint32_t *) (intel->batch_ptr + intel->batch_used) =
bo->offset + delta;
if (needs_fence)
drm_intel_bo_emit_reloc_fence(intel->batch_bo,
intel->batch_used,
@ -113,7 +107,7 @@ intel_batch_emit_reloc(intel_screen_private *intel,
bo, delta,
read_domains, write_domains);
intel->batch_used += 4;
intel_batch_emit_dword(intel, bo->offset + delta);
}
static inline void
@ -144,9 +138,6 @@ intel_batch_emit_reloc_pixmap(intel_screen_private *intel, PixmapPtr pixmap,
{
struct intel_pixmap *priv = i830_get_pixmap_intel(pixmap);
assert(intel->batch_ptr != NULL);
assert(intel_batch_space(intel) >= 4);
intel_batch_mark_pixmap_domains(intel, priv, read_domains, write_domain);
intel_batch_emit_reloc(intel, priv->bo,
@ -188,18 +179,6 @@ do { \
intel->batch_emit_start = intel->batch_used; \
} while (0)
/* special-case variant for when we have preallocated space */
#define ATOMIC_BATCH(n) \
do { \
if (intel->batch_emitting != 0) \
FatalError("%s: ATOMIC_BATCH called without closing " \
"ADVANCE_BATCH\n", __FUNCTION__); \
assert(intel->in_batch_atomic); \
assert(intel->batch_used + (n) * 4 <= intel->batch_atomic_limit); \
intel->batch_emitting = (n) * 4; \
intel->batch_emit_start = intel->batch_used; \
} while (0)
#define ADVANCE_BATCH() do { \
if (intel->batch_emitting == 0) \
FatalError("%s: ADVANCE_BATCH called with no matching " \

View File

@ -302,7 +302,8 @@ static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
format = i8xx_get_card_format(intel, picture);
ATOMIC_BATCH(10);
assert(intel->in_batch_atomic);
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
LOAD_TEXTURE_MAP(unit) | 4);
OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_SAMPLER, 0, 0);
@ -336,7 +337,6 @@ static void i830_texture_setup(PicturePtr picture, PixmapPtr pixmap, int unit)
ENABLE_TEX_STREAM_COORD_SET |
TEX_STREAM_COORD_SET(unit) |
ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(unit));
ADVANCE_BATCH();
}
Bool
@ -549,7 +549,7 @@ static void i830_emit_composite_state(ScrnInfoPtr scrn)
IntelEmitInvarientState(scrn);
intel->last_3d = LAST_3D_RENDER;
ATOMIC_BATCH(21);
assert(intel->in_batch_atomic);
if (i830_pixmap_tiled(intel->render_dest)) {
tiling_bits = BUF_3D_TILED_SURFACE;
@ -615,8 +615,6 @@ static void i830_emit_composite_state(ScrnInfoPtr scrn)
}
OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt);
ADVANCE_BATCH();
i830_texture_setup(intel->render_source_picture, intel->render_source, 0);
if (intel->render_mask) {
i830_texture_setup(intel->render_mask_picture,
@ -756,8 +754,6 @@ i830_emit_composite_primitive(PixmapPtr dest,
num_floats = 3 * per_vertex;
ATOMIC_BATCH(1 + num_floats);
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (num_floats - 1));
OUT_BATCH_F(dstX + w);
OUT_BATCH_F(dstY + h);
@ -803,8 +799,6 @@ i830_emit_composite_primitive(PixmapPtr dest,
OUT_BATCH_F(mask_w[0]);
}
}
ADVANCE_BATCH();
}
/**

View File

@ -38,7 +38,7 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
{
intel_screen_private *intel = intel_get_screen_private(scrn);
ATOMIC_BATCH(24);
assert(intel->in_batch_atomic);
OUT_BATCH(_3DSTATE_AA_CMD |
AA_LINE_ECAAR_WIDTH_ENABLE |
@ -104,6 +104,4 @@ void I915EmitInvarientState(ScrnInfoPtr scrn)
OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE |
0);
OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
}

View File

@ -418,36 +418,25 @@ do { \
* \param x maximum number of shader commands that may be used between
* a FS_START and FS_END
*/
#define FS_LOCALS(x) \
uint32_t _shader_buf[(x) * 3]; \
unsigned int _max_shader_commands = x; \
unsigned int _cur_shader_commands
#define FS_LOCALS() \
uint32_t _shader_offset
#define FS_BEGIN() \
do { \
_cur_shader_commands = 0; \
_shader_offset = intel->batch_used; \
intel->batch_used += 4; \
} while (0)
#define FS_OUT(_shaderop) \
do { \
if (_cur_shader_commands >= _max_shader_commands) \
FatalError("fragment shader command buffer exceeded (%d)\n", \
_cur_shader_commands); \
_shader_buf[_cur_shader_commands * 3 + 0] = _shaderop.ui[0]; \
_shader_buf[_cur_shader_commands * 3 + 1] = _shaderop.ui[1]; \
_shader_buf[_cur_shader_commands * 3 + 2] = _shaderop.ui[2]; \
++_cur_shader_commands; \
OUT_BATCH(_shaderop.ui[0]); \
OUT_BATCH(_shaderop.ui[1]); \
OUT_BATCH(_shaderop.ui[2]); \
} while (0)
#define FS_END() \
do { \
int _i, _pad = (_cur_shader_commands & 0x1) ? 0 : 1; \
ATOMIC_BATCH(_cur_shader_commands * 3 + 1 + _pad); \
OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | \
(_cur_shader_commands * 3 - 1)); \
for (_i = 0; _i < _cur_shader_commands * 3; _i++) \
OUT_BATCH(_shader_buf[_i]); \
if (_pad != 0) \
OUT_BATCH(MI_NOOP); \
ADVANCE_BATCH(); \
*(uint32_t *)(intel->batch_ptr + _shader_offset) = \
(_3DSTATE_PIXEL_SHADER_PROGRAM | \
((intel->batch_used - _shader_offset) / 4 - 2)); \
} while (0);

View File

@ -489,8 +489,6 @@ i915_emit_composite_primitive_constant(PixmapPtr dest,
intel_screen_private *intel = intel_get_screen_private(scrn);
float x, y;
ATOMIC_BATCH((intel->prim_offset == 0) + 6);
if (intel->prim_offset == 0) {
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
@ -508,8 +506,6 @@ i915_emit_composite_primitive_constant(PixmapPtr dest,
OUT_BATCH_F(x);
OUT_BATCH_F(y);
ADVANCE_BATCH();
}
static void
@ -523,8 +519,6 @@ i915_emit_composite_primitive_identity_source(PixmapPtr dest,
intel_screen_private *intel = intel_get_screen_private(scrn);
float dst_x, dst_y, src_x, src_y;
ATOMIC_BATCH((intel->prim_offset == 0) + 12);
if (intel->prim_offset == 0) {
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
@ -550,8 +544,6 @@ i915_emit_composite_primitive_identity_source(PixmapPtr dest,
OUT_BATCH_F(dst_y);
OUT_BATCH_F(src_x / intel->scale_units[0][0]);
OUT_BATCH_F(src_y / intel->scale_units[0][1]);
ADVANCE_BATCH();
}
static void
@ -586,8 +578,6 @@ i915_emit_composite_primitive_affine_source(PixmapPtr dest,
&src_y[2]))
return;
ATOMIC_BATCH((intel->prim_offset == 0) + 12);
if (intel->prim_offset == 0) {
intel->prim_offset = intel->batch_used;
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
@ -611,8 +601,6 @@ i915_emit_composite_primitive_affine_source(PixmapPtr dest,
OUT_BATCH_F(y);
OUT_BATCH_F(src_x[0] / intel->scale_units[0][0]);
OUT_BATCH_F(src_y[0] / intel->scale_units[0][1]);
ADVANCE_BATCH();
}
static void
@ -751,8 +739,6 @@ i915_emit_composite_primitive(PixmapPtr dest,
num_floats = 3 * per_vertex;
ATOMIC_BATCH(num_floats);
intel->prim_count += num_floats;
OUT_BATCH_F(intel->dst_coord_adjust + dstX + w);
@ -811,8 +797,6 @@ i915_emit_composite_primitive(PixmapPtr dest,
OUT_BATCH_F(mask_w[0]);
}
}
ADVANCE_BATCH();
}
static void i915_emit_composite_setup(ScrnInfoPtr scrn)
@ -846,14 +830,7 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
tex_count += ! is_solid_src;
tex_count += mask && ! is_solid_mask;
t = 15;
if (tex_count)
t += 6 * tex_count + 4;
if (is_solid_src)
t += 2;
if (mask && is_solid_mask)
t += 2;
ATOMIC_BATCH (t);
assert(intel->in_batch_atomic);
if (tex_count != 0) {
OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count));
@ -940,10 +917,8 @@ static void i915_emit_composite_setup(ScrnInfoPtr scrn)
OUT_BATCH(0x00000000);
}
ADVANCE_BATCH();
{
FS_LOCALS(20);
FS_LOCALS();
int src_reg, mask_reg;
FS_BEGIN();
@ -1076,9 +1051,7 @@ i915_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
if (intel->prim_offset == 0) {
intel->prim_offset = intel->batch_used;
ATOMIC_BATCH(1);
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST);
ADVANCE_BATCH();
}
intel->prim_emit(dest,

View File

@ -75,8 +75,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
IntelEmitInvarientState(scrn);
intel->last_3d = LAST_3D_VIDEO;
ATOMIC_BATCH(20);
/* flush map & render cache */
OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE |
MI_INVALIDATE_MAP_CACHE);
@ -134,12 +132,10 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
BUF_3D_PITCH(intel_get_pixmap_pitch(pixmap)));
OUT_RELOC_PIXMAP(pixmap, I915_GEM_DOMAIN_RENDER,
I915_GEM_DOMAIN_RENDER, 0);
ADVANCE_BATCH();
if (!is_planar_fourcc(id)) {
FS_LOCALS(10);
FS_LOCALS();
ATOMIC_BATCH(16);
OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
OUT_BATCH(0x0000001); /* constant 0 */
/* constant 0: brightness/contrast */
@ -184,8 +180,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
OUT_BATCH(ms3);
OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
ADVANCE_BATCH();
FS_BEGIN();
i915_fs_dcl(FS_S0);
i915_fs_dcl(FS_T0);
@ -198,9 +192,8 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
}
FS_END();
} else {
FS_LOCALS(16);
FS_LOCALS();
ATOMIC_BATCH(22 + 11 + 11);
/* For the planar formats, we set up three samplers --
* one for each plane, in a Y8 format. Because I
* couldn't get the special PLANAR_TO_PACKED
@ -332,7 +325,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
ms3 |= (width / 2 - 1) << MS3_WIDTH_SHIFT;
OUT_BATCH(ms3);
OUT_BATCH(((video_pitch / 4) - 1) << MS4_PITCH_SHIFT);
ADVANCE_BATCH();
FS_BEGIN();
/* Declare samplers */
@ -389,13 +381,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
FS_END();
}
{
ATOMIC_BATCH(2);
OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE |
MI_INVALIDATE_MAP_CACHE);
OUT_BATCH(0x00000000);
ADVANCE_BATCH();
}
OUT_BATCH(MI_FLUSH | MI_WRITE_DIRTY_STATE | MI_INVALIDATE_MAP_CACHE);
/* Set up the offset for translating from the given region
* (in screen coordinates) to the backing pixmap.
@ -411,6 +397,7 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
dxo = dstRegion->extents.x1;
dyo = dstRegion->extents.y1;
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 * nbox_this_time - 1));
while (nbox_this_time--) {
int box_x1 = pbox->x1;
int box_y1 = pbox->y1;
@ -423,19 +410,9 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
src_scale_x = ((float)src_w / width) / drw_w;
src_scale_y = ((float)src_h / height) / drw_h;
ATOMIC_BATCH(8 + 12);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
OUT_BATCH(MI_NOOP);
/* vertex data - rect list consists of bottom right,
* bottom left, and top left vertices.
*/
OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (12 - 1));
/* bottom right */
OUT_BATCH_F(box_x2 + pix_xoff);
@ -454,8 +431,6 @@ I915DisplayVideoTextured(ScrnInfoPtr scrn,
OUT_BATCH_F(box_y1 + pix_yoff);
OUT_BATCH_F((box_x1 - dxo) * src_scale_x);
OUT_BATCH_F((box_y1 - dyo) * src_scale_y);
ADVANCE_BATCH();
}
intel_batch_end_atomic(scrn);

View File

@ -1181,12 +1181,8 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
*/
ALIGN_BATCH(64);
assert(intel->in_batch_atomic);
{
if (IS_IGDNG(intel))
ATOMIC_BATCH(14);
else
ATOMIC_BATCH(12);
/* Match Mesa driver setup */
OUT_BATCH(MI_FLUSH |
MI_STATE_INSTRUCTION_CACHE_FLUSH |
@ -1229,12 +1225,10 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
OUT_BATCH(BRW_STATE_SIP | 0);
OUT_RELOC(render_state->sip_kernel_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
ADVANCE_BATCH();
}
{
int pipe_ctrl;
ATOMIC_BATCH(26);
/* Pipe control */
if (IS_IGDNG(intel))
@ -1329,7 +1323,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
OUT_BATCH(BRW_CS_URB_STATE | 0);
OUT_BATCH(((URB_CS_ENTRY_SIZE - 1) << 4) |
(URB_CS_ENTRIES << 0));
ADVANCE_BATCH();
}
{
/*
@ -1356,7 +1349,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
}
if (IS_IGDNG(intel)) {
ATOMIC_BATCH(mask ? 9 : 7);
/*
* The reason to add this extra vertex element in the header is that
* IGDNG has different vertex header definition and origin method to
@ -1386,7 +1378,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
(BRW_VFCOMPONENT_STORE_0 <<
VE1_VFCOMPONENT_3_SHIFT));
} else {
ATOMIC_BATCH(mask ? 7 : 5);
/* Set up our vertex elements, sourced from the single vertex buffer.
* that will be set up later.
*/
@ -1448,8 +1439,6 @@ static void i965_emit_composite_state(ScrnInfoPtr scrn)
else
OUT_BATCH((BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | (BRW_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | (w_component << VE1_VFCOMPONENT_2_SHIFT) | (BRW_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) | ((4 + 4 + 4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT)); /* VUE offset in dwords */
}
ADVANCE_BATCH();
}
}
@ -1835,7 +1824,6 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
if (intel->needs_render_state_emit)
i965_emit_composite_state(scrn);
ATOMIC_BATCH(12);
OUT_BATCH(MI_FLUSH);
/* Set up the pointer to our (single) vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
@ -1860,7 +1848,6 @@ i965_composite(PixmapPtr dest, int srcX, int srcY, int maskX, int maskY,
OUT_BATCH(1); /* single instance */
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
ADVANCE_BATCH();
render_state->vb_offset += i;
drm_intel_bo_unreference(vb_bo);

View File

@ -778,18 +778,12 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
urb_cs_start = urb_sf_start + urb_sf_size;
urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
ATOMIC_BATCH(2);
OUT_BATCH(MI_FLUSH |
MI_STATE_INSTRUCTION_CACHE_FLUSH |
BRW_MI_GLOBAL_SNAPSHOT_RESET);
OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
/* brw_debug (scrn, "before base address modify"); */
if (IS_IGDNG(intel))
ATOMIC_BATCH(14);
else
ATOMIC_BATCH(12);
/* Match Mesa driver setup */
if (IS_G4X(intel) || IS_IGDNG(intel))
OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D);
@ -833,9 +827,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
OUT_RELOC(intel->video.gen4_sip_kernel_bo,
I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
/* brw_debug (scrn, "after base address modify"); */
if (IS_IGDNG(intel))
@ -843,10 +834,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
else
pipe_ctl = BRW_PIPE_CONTROL_NOWRITE | BRW_PIPE_CONTROL_IS_FLUSH;
ATOMIC_BATCH(38);
OUT_BATCH(MI_NOOP);
/* Pipe control */
OUT_BATCH(BRW_PIPE_CONTROL | pipe_ctl | 2);
OUT_BATCH(0); /* Destination address */
@ -970,9 +957,6 @@ i965_emit_video_setup(ScrnInfoPtr scrn, drm_intel_bo * bind_bo, int n_src_surf)
VE1_VFCOMPONENT_3_SHIFT) | (4 <<
VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
}
OUT_BATCH(MI_NOOP); /* pad to quadword */
ADVANCE_BATCH();
}
void
@ -1219,7 +1203,6 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
i965_emit_video_setup(scrn, bind_bo, n_src_surf);
ATOMIC_BATCH(12);
/* Set up the pointer to our vertex buffer */
OUT_BATCH(BRW_3DSTATE_VERTEX_BUFFERS | 3);
/* four 32-bit floats per vertex */
@ -1241,7 +1224,6 @@ I965DisplayVideoTextured(ScrnInfoPtr scrn,
OUT_BATCH(0); /* start instance location */
OUT_BATCH(0); /* index buffer offset, ignored */
OUT_BATCH(MI_NOOP);
ADVANCE_BATCH();
intel_batch_end_atomic(scrn);