From 33c028f8be829caa4fdb9416ff177dc71f24b68e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 1 Aug 2012 01:17:50 +0100 Subject: [PATCH] sna/gen6+: Reduce floats-per-vertex for spans Signed-off-by: Chris Wilson --- src/sna/gen4_render.c | 51 ++++--- src/sna/gen5_render.c | 73 +++++----- src/sna/gen6_render.c | 303 ++++++++++++++++++++++-------------------- src/sna/gen7_render.c | 264 +++++++++++++++++++----------------- src/sna/sna_render.h | 3 +- 5 files changed, 359 insertions(+), 335 deletions(-) diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index d8beed97..35dacd0c 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -279,6 +279,7 @@ static int gen4_vertex_finish(struct sna *sna) unsigned int i; assert(sna->render.vertex_used); + assert(sna->render.nvertex_reloc); /* Note: we only need dword alignment (currently) */ @@ -286,21 +287,18 @@ static int gen4_vertex_finish(struct sna *sna) if (bo) { gen4_vertex_flush(sna); - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); } + sna->render.nvertex_reloc = 0; sna->render.vertex_used = 0; sna->render.vertex_index = 0; sna->render_state.gen4.vb_id = 0; @@ -335,13 +333,12 @@ static void gen4_vertex_close(struct sna *sna) unsigned int i, delta = 0; assert(sna->render_state.gen4.vertex_offset == 0); + if (!sna->render_state.gen4.vb_id) + return; DBG(("%s: used=%d, vbo active? %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - if (!sna->render.vertex_used) - return; - bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { @@ -386,20 +383,18 @@ static void gen4_vertex_close(struct sna *sna) } } - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); } + sna->render.nvertex_reloc = 0; if (sna->render.vbo == NULL) { sna->render.vertex_used = 0; @@ -990,7 +985,7 @@ static void gen4_emit_vertex_buffer(struct sna *sna, OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); - sna->render.vertex_reloc[id] = sna->kgem.nbatch; + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 1e0ee108..90b0bdd8 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -246,6 +246,7 @@ static int gen5_vertex_finish(struct sna *sna) unsigned int i; assert(sna->render.vertex_used); + assert(sna->render.nvertex_reloc); /* Note: we only need dword alignment (currently) */ @@ -254,27 +255,23 @@ static int gen5_vertex_finish(struct sna *sna) if (sna->render_state.gen5.vertex_offset) gen5_vertex_flush(sna); - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; sna->render.vertex_used = 0; sna->render.vertex_index = 0; sna->render_state.gen5.vb_id = 0; @@ -309,13 +306,12 @@ static void gen5_vertex_close(struct sna *sna) unsigned int i, delta = 0; assert(sna->render_state.gen5.vertex_offset == 0); + if (!sna->render_state.gen5.vb_id) + return; DBG(("%s: used=%d, vbo active? %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - if (!sna->render.vertex_used) - return; - bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { @@ -360,26 +356,23 @@ static void gen5_vertex_close(struct sna *sna) } } - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; if (sna->render.vbo == NULL) { sna->render.vertex_used = 0; @@ -977,7 +970,7 @@ static void gen5_emit_vertex_buffer(struct sna *sna, OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); - sna->render.vertex_reloc[id] = sna->kgem.nbatch; + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 3dc0729c..db7599d3 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -104,35 +104,34 @@ static const uint32_t ps_kernel_planar[][4] = { #include "exa_wm_write.g6b" }; -#define NOKERNEL(kernel_enum, func, ns, ni) \ - [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns, ni} -#define KERNEL(kernel_enum, kernel, ns, ni) \ - [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns, ni} +#define NOKERNEL(kernel_enum, func, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns} +#define KERNEL(kernel_enum, kernel, ns) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns} static const struct wm_kernel_info { const char *name; const void *data; unsigned int size; unsigned int num_surfaces; - unsigned int num_inputs; } wm_kernels[] = { - NOKERNEL(NOMASK, brw_wm_kernel__affine, 2, 1), - NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2, 1), + NOKERNEL(NOMASK, brw_wm_kernel__affine, 2), + NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2), - NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3, 2), - NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3, 2), + NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3), + NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3), - NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3, 2), - NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3, 2), + NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3), + NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3), - NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3, 2), - NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3, 2), + NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3), + NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3), - NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2, 2), - NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2, 2), + NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2), + NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2), - KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7, 1), - KERNEL(VIDEO_PACKED, ps_kernel_packed, 2, 1), + KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7), + KERNEL(VIDEO_PACKED, ps_kernel_packed, 2), }; #undef KERNEL @@ -176,7 +175,7 @@ static const struct blendinfo { #define SAMPLER_OFFSET(sf, se, mf, me) \ (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state)) -#define VERTEX_2s2s 4 +#define VERTEX_2s2s 0 #define COPY_SAMPLER 0 #define COPY_VERTEX VERTEX_2s2s @@ -621,7 +620,7 @@ gen6_emit_sf(struct sna *sna, bool has_mask) } static void -gen6_emit_wm(struct sna *sna, unsigned int kernel) +gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask) { const uint32_t *kernels; @@ -649,7 +648,7 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel) (kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) | (kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) | GEN6_3DSTATE_WM_DISPATCH_ENABLE); - OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | + OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); OUT_BATCH(kernels[2]); OUT_BATCH(kernels[1]); @@ -735,17 +734,17 @@ gen6_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen6_render_state *render = &sna->render_state.gen6; - int nelem, selem; - uint32_t w_component; - uint32_t src_format; + uint32_t src_format, dw, offset; int id = GEN6_VERTEX(op->u.gen6.flags); + bool has_mask; + + DBG(("%s: setup id=%d\n", __FUNCTION__, id)); if (render->ve_id == id) return; render->ve_id = id; - switch (id) { - case VERTEX_2s2s: + if (id == VERTEX_2s2s) { DBG(("%s: setup COPY\n", __FUNCTION__)); OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | @@ -762,7 +761,7 @@ gen6_emit_vertex_elements(struct sna *sna, /* x,y */ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ + 0 << VE0_OFFSET_SHIFT); OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | @@ -771,7 +770,7 @@ gen6_emit_vertex_elements(struct sna *sna, /* u0, v0, w0 */ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ + 4 << VE0_OFFSET_SHIFT); OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | @@ -779,17 +778,6 @@ gen6_emit_vertex_elements(struct sna *sna, return; } - nelem = op->mask.bo ? 2 : 1; - if (op->is_affine) { - src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; - w_component = GEN6_VFCOMPONENT_STORE_1_FLT; - selem = 2; - } else { - src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = GEN6_VFCOMPONENT_STORE_SRC; - selem = 3; - } - /* The VUE layout * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) * dword 4-7: position (x, y, 1.0, 1.0), @@ -798,8 +786,9 @@ gen6_emit_vertex_elements(struct sna *sna, * * dword 4-15 are fetched from vertex buffer */ + has_mask = (id >> 2) != 0; OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | - ((2 * (2 + nelem)) + 1 - 2)); + ((2 * (3 + has_mask)) + 1 - 2)); OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | @@ -812,30 +801,74 @@ gen6_emit_vertex_elements(struct sna *sna, /* x,y */ OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | - 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ + 0 << VE0_OFFSET_SHIFT); OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + offset = 4; /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | src_format << VE0_FORMAT_SHIFT | - 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ - OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - w_component << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + offset += (id & 3) * sizeof(float); /* u1, v1, w1 */ - if (op->mask.bo) { + if (has_mask) { + DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); + dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT; + switch ((id >> 2) & 3) { + case 1: + src_format = GEN6_SURFACEFORMAT_R32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT; + break; + } OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | src_format << VE0_FORMAT_SHIFT | - ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ - OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | - GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | - w_component << VE1_VFCOMPONENT_2_SHIFT | - GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + offset << VE0_OFFSET_SHIFT); + OUT_BATCH(dw); } } @@ -860,8 +893,8 @@ gen6_emit_state(struct sna *sna, if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags))) need_stall = false; gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags)); - gen6_emit_sf(sna, op->mask.bo != NULL); - gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags)); + gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2); + gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2); gen6_emit_vertex_elements(sna, op); need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1); @@ -900,7 +933,8 @@ static void gen6_magic_ca_pass(struct sna *sna, gen6_emit_wm(sna, gen6_choose_composite_kernel(PictOpAdd, true, true, - op->is_affine)); + op->is_affine), + true); OUT_BATCH(GEN6_3DPRIMITIVE | GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | @@ -936,6 +970,7 @@ static int gen6_vertex_finish(struct sna *sna) DBG(("%s: used=%d / %d\n", __FUNCTION__, sna->render.vertex_used, sna->render.vertex_size)); assert(sna->render.vertex_used); + assert(sna->render.nvertex_reloc); /* Note: we only need dword alignment (currently) */ @@ -944,27 +979,23 @@ static int gen6_vertex_finish(struct sna *sna) if (sna->render_state.gen6.vertex_offset) gen6_vertex_flush(sna); - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; sna->render.vertex_used = 0; sna->render.vertex_index = 0; sna->render_state.gen6.vb_id = 0; @@ -984,6 +1015,8 @@ static int gen6_vertex_finish(struct sna *sna) return 0; } + DBG(("%s: create vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); + kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo); if (sna->render.vertex_used) { DBG(("%s: copying initial buffer x %d to handle=%d\n", @@ -1005,16 +1038,16 @@ static void gen6_vertex_close(struct sna *sna) assert(sna->render_state.gen6.vertex_offset == 0); - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - - if (!sna->render.vertex_used) + if (!sna->render_state.gen6.vb_id) return; + DBG(("%s: used=%d, vbo active? %d\n", + __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0)); + bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding vbo (full)\n", __FUNCTION__)); + DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); sna->render.vbo = NULL; sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); @@ -1045,26 +1078,23 @@ static void gen6_vertex_close(struct sna *sna) } } - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; if (sna->render.vbo == NULL) { sna->render.vertex_used = 0; @@ -1494,7 +1524,7 @@ static void gen6_emit_vertex_buffer(struct sna *sna, OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); - sna->render.vertex_reloc[id] = sna->kgem.nbatch; + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -1624,9 +1654,11 @@ inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, static uint32_t gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op) { - int has_mask = op->mask.bo != NULL; - int is_affine = op->is_affine; - return has_mask << 1 | is_affine; + int id = 2 + !op->is_affine; + if (op->mask.bo) + id |= id << 2; + assert(id > 0 && id < 16); + return id; } static void @@ -1954,7 +1986,7 @@ gen6_render_video(struct sna *sna, is_planar_fourcc(frame->id) ? GEN6_WM_KERNEL_VIDEO_PLANAR : GEN6_WM_KERNEL_VIDEO_PACKED, - 1); + 2); tmp.priv = frame; kgem_set_mode(&sna->kgem, KGEM_RENDER); @@ -2824,21 +2856,12 @@ gen6_emit_composite_spans_primitive(struct sna *sna, { gen6_emit_composite_spans_vertex(sna, op, box->x2, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y1); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); - if (!op->base.is_affine) - OUT_VERTEX_F(1); } fastcall static void @@ -2849,15 +2872,15 @@ gen6_emit_composite_spans_solid(struct sna *sna, { OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); + OUT_VERTEX_F(opacity); OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); + OUT_VERTEX_F(opacity); OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(0); + OUT_VERTEX_F(opacity); } fastcall static void @@ -2878,24 +2901,24 @@ gen6_emit_composite_spans_identity(struct sna *sna, int16_t ty = op->base.src.offset[1]; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*5; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); dst.p.x = box->x2; dst.p.y = box->y2; v[0] = dst.f; v[1] = (box->x2 + tx) * sx; - v[7] = v[2] = (box->y2 + ty) * sy; - v[13] = v[8] = v[3] = opacity; - v[9] = v[4] = 1; + v[6] = v[2] = (box->y2 + ty) * sy; dst.p.x = box->x1; - v[5] = dst.f; - v[11] = v[6] = (box->x1 + tx) * sx; + v[4] = dst.f; + v[9] = v[5] = (box->x1 + tx) * sx; dst.p.y = box->y1; - v[10] = dst.f; - v[12] = (box->y1 + ty) * sy; - v[14] = 0; + v[8] = dst.f; + v[10] = (box->y1 + ty) * sy; + + v[11] = v[7] = v[3] = opacity; } fastcall static void @@ -2920,24 +2943,24 @@ gen6_emit_composite_spans_simple(struct sna *sna, int16_t ty = op->base.src.offset[1]; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*5; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); dst.p.x = box->x2; dst.p.y = box->y2; v[0] = dst.f; v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[7] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - v[13] = v[8] = v[3] = opacity; - v[9] = v[4] = 1; + v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; dst.p.x = box->x1; - v[5] = dst.f; - v[11] = v[6] = ((box->x1 + tx) * xx + x0) * sx; + v[4] = dst.f; + v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; dst.p.y = box->y1; - v[10] = dst.f; - v[12] = ((box->y1 + ty) * yy + y0) * sy; - v[14] = 0; + v[8] = dst.f; + v[10] = ((box->y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = opacity; } fastcall static void @@ -2950,19 +2973,16 @@ gen6_emit_composite_spans_affine(struct sna *sna, gen6_emit_composite_texcoord_affine(sna, &op->base.src, box->x2, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); OUT_VERTEX(box->x1, box->y2); gen6_emit_composite_texcoord_affine(sna, &op->base.src, box->x1, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); OUT_VERTEX(box->x1, box->y1); gen6_emit_composite_texcoord_affine(sna, &op->base.src, box->x1, box->y1); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); } fastcall static void @@ -3022,7 +3042,6 @@ gen6_render_composite_spans_done(struct sna *sna, if (sna->render_state.gen6.vertex_offset) gen6_vertex_flush(sna); - kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3103,9 +3122,7 @@ gen6_render_composite_spans(struct sna *sna, gen6_composite_channel_convert(&tmp->base.src); break; } - tmp->base.mask.bo = sna_render_get_solid(sna, 0); - if (tmp->base.mask.bo == NULL) - goto cleanup_src; + tmp->base.mask.bo = NULL; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; @@ -3124,7 +3141,7 @@ gen6_render_composite_spans(struct sna *sna, } else tmp->prim_emit = gen6_emit_composite_spans_affine; } - tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; + tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine; tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; tmp->base.u.gen6.flags = @@ -3134,7 +3151,7 @@ gen6_render_composite_spans(struct sna *sna, SAMPLER_EXTEND_PAD), gen6_get_blend(tmp->base.op, false, tmp->base.dst.format), GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine, - 1 << 1 | tmp->base.is_affine); + 1 << 2 | (2+!tmp->base.is_affine)); tmp->box = gen6_render_composite_spans_box; tmp->boxes = gen6_render_composite_spans_boxes; @@ -4086,7 +4103,7 @@ gen6_render_retire(struct kgem *kgem) sna = container_of(kgem, struct sna, kgem); if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) { - DBG(("%s: resetting idle vbo\n", __FUNCTION__)); + DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); sna->render.vertex_used = 0; sna->render.vertex_index = 0; } @@ -4099,7 +4116,7 @@ gen6_render_expire(struct kgem *kgem) sna = container_of(kgem, struct sna, kgem); if (sna->render.vbo && !sna->render.vertex_used) { - DBG(("%s: discarding vbo\n", __FUNCTION__)); + DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); kgem_bo_destroy(kgem, sna->render.vbo); sna->render.vbo = NULL; sna->render.vertices = sna->render.vertex_data; @@ -4114,7 +4131,7 @@ static void gen6_render_reset(struct sna *sna) sna->render_state.gen6.needs_invariant = true; sna->render_state.gen6.first_state_packet = true; sna->render_state.gen6.vb_id = 0; - sna->render_state.gen6.ve_id = -1; + sna->render_state.gen6.ve_id = 3 << 2; sna->render_state.gen6.last_primitive = -1; sna->render_state.gen6.num_sf_outputs = 0; diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index a199307f..c1993df3 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -181,7 +181,7 @@ static const struct blendinfo { #define SAMPLER_OFFSET(sf, se, mf, me) \ ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state)) -#define VERTEX_2s2s 4 +#define VERTEX_2s2s 0 #define COPY_SAMPLER 0 #define COPY_VERTEX VERTEX_2s2s @@ -847,23 +847,23 @@ gen7_emit_vertex_elements(struct sna *sna, * texture coordinate 1 if (has_mask is true): same as above */ struct gen7_render_state *render = &sna->render_state.gen7; - int nelem, selem; - uint32_t w_component; - uint32_t src_format; + uint32_t src_format, dw, offset; int id = GEN7_VERTEX(op->u.gen7.flags); + bool has_mask; + + DBG(("%s: setup id=%d\n", __FUNCTION__, id)); if (render->ve_id == id) return; render->ve_id = id; - switch (id) { - case VERTEX_2s2s: + if (id == VERTEX_2s2s) { DBG(("%s: setup COPY\n", __FUNCTION__)); OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | ((2 * (1 + 2)) + 1 - 2)); - OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | + OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 0 << GEN7_VE0_OFFSET_SHIFT); OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | @@ -872,7 +872,7 @@ gen7_emit_vertex_elements(struct sna *sna, GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT); /* x,y */ - OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | + OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */ OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | @@ -880,7 +880,7 @@ gen7_emit_vertex_elements(struct sna *sna, GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); - OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | + OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | 4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */ OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | @@ -890,17 +890,6 @@ gen7_emit_vertex_elements(struct sna *sna, return; } - nelem = op->mask.bo ? 2 : 1; - if (op->is_affine) { - src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; - w_component = GEN7_VFCOMPONENT_STORE_0; - selem = 2; - } else { - src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; - w_component = GEN7_VFCOMPONENT_STORE_SRC; - selem = 3; - } - /* The VUE layout * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) * dword 4-7: position (x, y, 1.0, 1.0), @@ -909,11 +898,11 @@ gen7_emit_vertex_elements(struct sna *sna, * * dword 4-15 are fetched from vertex buffer */ + has_mask = (id >> 2) != 0; OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS | - ((2 * (2 + nelem)) + 1 - 2)); + ((2 * (3 + has_mask)) + 1 - 2)); - OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | - GEN7_VE0_VALID | + OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT | 0 << GEN7_VE0_OFFSET_SHIFT); OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT | @@ -924,31 +913,74 @@ gen7_emit_vertex_elements(struct sna *sna, /* x,y */ OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT | - 0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */ + 0 << GEN7_VE0_OFFSET_SHIFT); OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT | GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); + offset = 4; /* u0, v0, w0 */ + DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset)); + dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; + switch (id & 3) { + case 1: + src_format = GEN7_SURFACEFORMAT_R32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + } OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | src_format << GEN7_VE0_FORMAT_SHIFT | - 4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */ - OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | - GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | - w_component << GEN7_VE1_VFCOMPONENT_2_SHIFT | - GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); + offset << GEN7_VE0_OFFSET_SHIFT); + OUT_BATCH(dw); + offset += (id & 3) * sizeof(float); /* u1, v1, w1 */ - if (op->mask.bo) { - OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | - GEN7_VE0_VALID | + if (has_mask) { + DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset)); + dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT; + switch ((id >> 2) & 3) { + case 1: + src_format = GEN7_SURFACEFORMAT_R32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + default: + assert(0); + case 2: + src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + case 3: + src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT; + dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT; + break; + } + OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID | src_format << GEN7_VE0_FORMAT_SHIFT | - ((1 + selem) * 4) << GEN7_VE0_OFFSET_SHIFT); /* vb offset in bytes */ - OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT | - GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT | - w_component << GEN7_VE1_VFCOMPONENT_2_SHIFT | - GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT); + offset << GEN7_VE0_OFFSET_SHIFT); + OUT_BATCH(dw); } } @@ -994,7 +1026,7 @@ gen7_emit_state(struct sna *sna, gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags)); gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags)); - gen7_emit_sf(sna, op->mask.bo != NULL); + gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2); gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags)); gen7_emit_vertex_elements(sna, op); @@ -1065,6 +1097,7 @@ static int gen7_vertex_finish(struct sna *sna) unsigned int i; assert(sna->render.vertex_used); + assert(sna->render.nvertex_reloc); /* Note: we only need dword alignment (currently) */ @@ -1073,27 +1106,23 @@ static int gen7_vertex_finish(struct sna *sna) if (sna->render_state.gen7.vertex_offset) gen7_vertex_flush(sna); - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - 0); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + 0); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; sna->render.vertex_used = 0; sna->render.vertex_index = 0; sna->render_state.gen7.vb_id = 0; @@ -1130,16 +1159,16 @@ static void gen7_vertex_close(struct sna *sna) assert(sna->render_state.gen7.vertex_offset == 0); - DBG(("%s: used=%d, vbo active? %d\n", - __FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL)); - - if (!sna->render.vertex_used) + if (!sna->render_state.gen7.vb_id) return; + DBG(("%s: used=%d, vbo active? %d\n", + __FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0)); + bo = sna->render.vbo; if (bo) { if (sna->render.vertex_size - sna->render.vertex_used < 64) { - DBG(("%s: discarding vbo (full)\n", __FUNCTION__)); + DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle)); sna->render.vbo = NULL; sna->render.vertices = sna->render.vertex_data; sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data); @@ -1170,30 +1199,29 @@ static void gen7_vertex_close(struct sna *sna) } } - for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { - if (sna->render.vertex_reloc[i]) { - DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, - i, sna->render.vertex_reloc[i])); + assert(sna->render.nvertex_reloc); + for (i = 0; i < sna->render.nvertex_reloc; i++) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); - sna->kgem.batch[sna->render.vertex_reloc[i]] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i], - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta); - sna->kgem.batch[sna->render.vertex_reloc[i]+1] = - kgem_add_reloc(&sna->kgem, - sna->render.vertex_reloc[i]+1, - bo, - I915_GEM_DOMAIN_VERTEX << 16, - delta + sna->render.vertex_used * 4 - 1); - sna->render.vertex_reloc[i] = 0; - } + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta + sna->render.vertex_used * 4 - 1); } + sna->render.nvertex_reloc = 0; if (sna->render.vbo == NULL) { sna->render.vertex_used = 0; sna->render.vertex_index = 0; + assert(sna->render.vertices == sna->render.vertex_data); + assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data)); } if (free_bo) @@ -1360,6 +1388,8 @@ gen7_emit_composite_primitive_solid(struct sna *sna, v = sna->render.vertices + sna->render.vertex_used; sna->render.vertex_used += 9; + assert(sna->render.vertex_used <= sna->render.vertex_size); + assert(!too_large(r->dst.x + r->width, r->dst.y + r->height)); dst.p.x = r->dst.x + r->width; dst.p.y = r->dst.y + r->height; @@ -1599,7 +1629,7 @@ static void gen7_emit_vertex_buffer(struct sna *sna, GEN7_VB0_VERTEXDATA | GEN7_VB0_ADDRESS_MODIFY_ENABLE | 4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT); - sna->render.vertex_reloc[id] = sna->kgem.nbatch; + sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch; OUT_BATCH(0); OUT_BATCH(0); OUT_BATCH(0); @@ -1686,6 +1716,7 @@ start: if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; + assert(want > 0); sna->render.vertex_index += 3*want; return want; @@ -1719,9 +1750,11 @@ inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, static uint32_t gen7_choose_composite_vertex_buffer(const struct sna_composite_op *op) { - int has_mask = op->mask.bo != NULL; - int is_affine = op->is_affine; - return has_mask << 1 | is_affine; + int id = 2 + !op->is_affine; + if (op->mask.bo) + id |= id << 2; + assert(id > 0 && id < 16); + return id; } static void @@ -2908,21 +2941,12 @@ gen7_emit_composite_spans_primitive(struct sna *sna, { gen7_emit_composite_spans_vertex(sna, op, box->x2, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); - if (!op->base.is_affine) - OUT_VERTEX_F(1); gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y1); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); - if (!op->base.is_affine) - OUT_VERTEX_F(1); } fastcall static void @@ -2933,15 +2957,15 @@ gen7_emit_composite_spans_solid(struct sna *sna, { OUT_VERTEX(box->x2, box->y2); OUT_VERTEX_F(1); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); + OUT_VERTEX_F(opacity); OUT_VERTEX(box->x1, box->y2); OUT_VERTEX_F(0); OUT_VERTEX_F(1); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(1); + OUT_VERTEX_F(opacity); OUT_VERTEX(box->x1, box->y1); OUT_VERTEX_F(0); OUT_VERTEX_F(0); - OUT_VERTEX_F(opacity); OUT_VERTEX_F(0); + OUT_VERTEX_F(opacity); } fastcall static void @@ -2962,24 +2986,24 @@ gen7_emit_composite_spans_identity(struct sna *sna, int16_t ty = op->base.src.offset[1]; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*5; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); dst.p.x = box->x2; dst.p.y = box->y2; v[0] = dst.f; v[1] = (box->x2 + tx) * sx; - v[7] = v[2] = (box->y2 + ty) * sy; - v[13] = v[8] = v[3] = opacity; - v[9] = v[4] = 1; + v[6] = v[2] = (box->y2 + ty) * sy; dst.p.x = box->x1; - v[5] = dst.f; - v[11] = v[6] = (box->x1 + tx) * sx; + v[4] = dst.f; + v[9] = v[5] = (box->x1 + tx) * sx; dst.p.y = box->y1; - v[10] = dst.f; - v[12] = (box->y1 + ty) * sy; - v[14] = 0; + v[8] = dst.f; + v[10] = (box->y1 + ty) * sy; + + v[11] = v[7] = v[3] = opacity; } fastcall static void @@ -3004,24 +3028,24 @@ gen7_emit_composite_spans_simple(struct sna *sna, int16_t ty = op->base.src.offset[1]; v = sna->render.vertices + sna->render.vertex_used; - sna->render.vertex_used += 3*5; + sna->render.vertex_used += 3*4; + assert(sna->render.vertex_used <= sna->render.vertex_size); dst.p.x = box->x2; dst.p.y = box->y2; v[0] = dst.f; v[1] = ((box->x2 + tx) * xx + x0) * sx; - v[7] = v[2] = ((box->y2 + ty) * yy + y0) * sy; - v[13] = v[8] = v[3] = opacity; - v[9] = v[4] = 1; + v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy; dst.p.x = box->x1; - v[5] = dst.f; - v[11] = v[6] = ((box->x1 + tx) * xx + x0) * sx; + v[4] = dst.f; + v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx; dst.p.y = box->y1; - v[10] = dst.f; - v[12] = ((box->y1 + ty) * yy + y0) * sy; - v[14] = 0; + v[8] = dst.f; + v[10] = ((box->y1 + ty) * yy + y0) * sy; + + v[11] = v[7] = v[3] = opacity; } fastcall static void @@ -3034,19 +3058,16 @@ gen7_emit_composite_spans_affine(struct sna *sna, gen7_emit_composite_texcoord_affine(sna, &op->base.src, box->x2, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); OUT_VERTEX(box->x1, box->y2); gen7_emit_composite_texcoord_affine(sna, &op->base.src, box->x1, box->y2); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(1); OUT_VERTEX(box->x1, box->y1); gen7_emit_composite_texcoord_affine(sna, &op->base.src, box->x1, box->y1); OUT_VERTEX_F(opacity); - OUT_VERTEX_F(0); } fastcall static void @@ -3106,7 +3127,6 @@ gen7_render_composite_spans_done(struct sna *sna, DBG(("%s()\n", __FUNCTION__)); - kgem_bo_destroy(&sna->kgem, op->base.mask.bo); if (op->base.src.bo) kgem_bo_destroy(&sna->kgem, op->base.src.bo); @@ -3184,9 +3204,7 @@ gen7_render_composite_spans(struct sna *sna, gen7_composite_channel_convert(&tmp->base.src); break; } - tmp->base.mask.bo = sna_render_get_solid(sna, 0); - if (tmp->base.mask.bo == NULL) - goto cleanup_src; + tmp->base.mask.bo = NULL; tmp->base.is_affine = tmp->base.src.is_affine; tmp->base.need_magic_ca_pass = false; @@ -3205,7 +3223,7 @@ gen7_render_composite_spans(struct sna *sna, } else tmp->prim_emit = gen7_emit_composite_spans_affine; } - tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine; + tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine; tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex; tmp->base.u.gen7.flags = @@ -3215,7 +3233,7 @@ gen7_render_composite_spans(struct sna *sna, SAMPLER_EXTEND_PAD), gen7_get_blend(tmp->base.op, false, tmp->base.dst.format), GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine, - 1 << 1 | tmp->base.is_affine); + 1 << 2 | (2+!tmp->base.is_affine)); tmp->box = gen7_render_composite_spans_box; tmp->boxes = gen7_render_composite_spans_boxes; @@ -4197,7 +4215,7 @@ static void gen7_render_reset(struct sna *sna) sna->render_state.gen7.emit_flush = false; sna->render_state.gen7.needs_invariant = true; sna->render_state.gen7.vb_id = 0; - sna->render_state.gen7.ve_id = -1; + sna->render_state.gen7.ve_id = 3 << 2; sna->render_state.gen7.last_primitive = -1; sna->render_state.gen7.num_sf_outputs = 0; diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index ddcafdbb..68bb9018 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -286,7 +286,8 @@ struct sna_render { uint16_t vertex_index; uint16_t vertex_used; uint16_t vertex_size; - uint16_t vertex_reloc[8]; + uint16_t vertex_reloc[16]; + int nvertex_reloc; struct kgem_bo *vbo; float *vertices;