sna/gen6+: Reduce floats-per-vertex for spans

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-08-01 01:17:50 +01:00
parent 9b2873d3d9
commit 33c028f8be
5 changed files with 359 additions and 335 deletions

View File

@ -279,6 +279,7 @@ static int gen4_vertex_finish(struct sna *sna)
unsigned int i;
assert(sna->render.vertex_used);
assert(sna->render.nvertex_reloc);
/* Note: we only need dword alignment (currently) */
@ -286,21 +287,18 @@ static int gen4_vertex_finish(struct sna *sna)
if (bo) {
gen4_vertex_flush(sna);
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
}
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render_state.gen4.vb_id = 0;
@ -335,13 +333,12 @@ static void gen4_vertex_close(struct sna *sna)
unsigned int i, delta = 0;
assert(sna->render_state.gen4.vertex_offset == 0);
if (!sna->render_state.gen4.vb_id)
return;
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
if (!sna->render.vertex_used)
return;
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
@ -386,20 +383,18 @@ static void gen4_vertex_close(struct sna *sna)
}
}
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
}
sna->render.nvertex_reloc = 0;
if (sna->render.vbo == NULL) {
sna->render.vertex_used = 0;
@ -990,7 +985,7 @@ static void gen4_emit_vertex_buffer(struct sna *sna,
OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
sna->render.vertex_reloc[id] = sna->kgem.nbatch;
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);

View File

@ -246,6 +246,7 @@ static int gen5_vertex_finish(struct sna *sna)
unsigned int i;
assert(sna->render.vertex_used);
assert(sna->render.nvertex_reloc);
/* Note: we only need dword alignment (currently) */
@ -254,27 +255,23 @@ static int gen5_vertex_finish(struct sna *sna)
if (sna->render_state.gen5.vertex_offset)
gen5_vertex_flush(sna);
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render_state.gen5.vb_id = 0;
@ -309,13 +306,12 @@ static void gen5_vertex_close(struct sna *sna)
unsigned int i, delta = 0;
assert(sna->render_state.gen5.vertex_offset == 0);
if (!sna->render_state.gen5.vb_id)
return;
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
if (!sna->render.vertex_used)
return;
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
@ -360,26 +356,23 @@ static void gen5_vertex_close(struct sna *sna)
}
}
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
if (sna->render.vbo == NULL) {
sna->render.vertex_used = 0;
@ -977,7 +970,7 @@ static void gen5_emit_vertex_buffer(struct sna *sna,
OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA |
(4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT));
sna->render.vertex_reloc[id] = sna->kgem.nbatch;
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);

View File

@ -104,35 +104,34 @@ static const uint32_t ps_kernel_planar[][4] = {
#include "exa_wm_write.g6b"
};
#define NOKERNEL(kernel_enum, func, ns, ni) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns, ni}
#define KERNEL(kernel_enum, kernel, ns, ni) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns, ni}
#define NOKERNEL(kernel_enum, func, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, func, 0, ns}
#define KERNEL(kernel_enum, kernel, ns) \
[GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), ns}
static const struct wm_kernel_info {
const char *name;
const void *data;
unsigned int size;
unsigned int num_surfaces;
unsigned int num_inputs;
} wm_kernels[] = {
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2, 1),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2, 1),
NOKERNEL(NOMASK, brw_wm_kernel__affine, 2),
NOKERNEL(NOMASK_P, brw_wm_kernel__projective, 2),
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3, 2),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3, 2),
NOKERNEL(MASK, brw_wm_kernel__affine_mask, 3),
NOKERNEL(MASK_P, brw_wm_kernel__projective_mask, 3),
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3, 2),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3, 2),
NOKERNEL(MASKCA, brw_wm_kernel__affine_mask_ca, 3),
NOKERNEL(MASKCA_P, brw_wm_kernel__projective_mask_ca, 3),
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3, 2),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3, 2),
NOKERNEL(MASKSA, brw_wm_kernel__affine_mask_sa, 3),
NOKERNEL(MASKSA_P, brw_wm_kernel__projective_mask_sa, 3),
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2, 2),
NOKERNEL(OPACITY, brw_wm_kernel__affine_opacity, 2),
NOKERNEL(OPACITY_P, brw_wm_kernel__projective_opacity, 2),
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7, 1),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2, 1),
KERNEL(VIDEO_PLANAR, ps_kernel_planar, 7),
KERNEL(VIDEO_PACKED, ps_kernel_packed, 2),
};
#undef KERNEL
@ -176,7 +175,7 @@ static const struct blendinfo {
#define SAMPLER_OFFSET(sf, se, mf, me) \
(((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me) + 2) * 2 * sizeof(struct gen6_sampler_state))
#define VERTEX_2s2s 4
#define VERTEX_2s2s 0
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
@ -621,7 +620,7 @@ gen6_emit_sf(struct sna *sna, bool has_mask)
}
static void
gen6_emit_wm(struct sna *sna, unsigned int kernel)
gen6_emit_wm(struct sna *sna, unsigned int kernel, bool has_mask)
{
const uint32_t *kernels;
@ -649,7 +648,7 @@ gen6_emit_wm(struct sna *sna, unsigned int kernel)
(kernels[1] ? GEN6_3DSTATE_WM_16_DISPATCH_ENABLE : 0) |
(kernels[2] ? GEN6_3DSTATE_WM_32_DISPATCH_ENABLE : 0) |
GEN6_3DSTATE_WM_DISPATCH_ENABLE);
OUT_BATCH(wm_kernels[kernel].num_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
OUT_BATCH((1 + has_mask) << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT |
GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
OUT_BATCH(kernels[2]);
OUT_BATCH(kernels[1]);
@ -735,17 +734,17 @@ gen6_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen6_render_state *render = &sna->render_state.gen6;
int nelem, selem;
uint32_t w_component;
uint32_t src_format;
uint32_t src_format, dw, offset;
int id = GEN6_VERTEX(op->u.gen6.flags);
bool has_mask;
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
if (render->ve_id == id)
return;
render->ve_id = id;
switch (id) {
case VERTEX_2s2s:
if (id == VERTEX_2s2s) {
DBG(("%s: setup COPY\n", __FUNCTION__));
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
@ -762,7 +761,7 @@ gen6_emit_vertex_elements(struct sna *sna,
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
@ -771,7 +770,7 @@ gen6_emit_vertex_elements(struct sna *sna,
/* u0, v0, w0 */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
4 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
@ -779,17 +778,6 @@ gen6_emit_vertex_elements(struct sna *sna,
return;
}
nelem = op->mask.bo ? 2 : 1;
if (op->is_affine) {
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
w_component = GEN6_VFCOMPONENT_STORE_1_FLT;
selem = 2;
} else {
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
w_component = GEN6_VFCOMPONENT_STORE_SRC;
selem = 3;
}
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
@ -798,8 +786,9 @@ gen6_emit_vertex_elements(struct sna *sna,
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS |
((2 * (2 + nelem)) + 1 - 2));
((2 * (3 + has_mask)) + 1 - 2));
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT |
@ -812,30 +801,74 @@ gen6_emit_vertex_elements(struct sna *sna,
/* x,y */
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT |
0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */
0 << VE0_OFFSET_SHIFT);
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
offset = 4;
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
w_component << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
offset += (id & 3) * sizeof(float);
/* u1, v1, w1 */
if (op->mask.bo) {
if (has_mask) {
DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
dw = GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT;
switch ((id >> 2) & 3) {
case 1:
src_format = GEN6_SURFACEFORMAT_R32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID |
src_format << VE0_FORMAT_SHIFT |
((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */
OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT |
GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT |
w_component << VE1_VFCOMPONENT_2_SHIFT |
GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT);
offset << VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
@ -860,8 +893,8 @@ gen6_emit_state(struct sna *sna,
if (gen6_emit_cc(sna, GEN6_BLEND(op->u.gen6.flags)))
need_stall = false;
gen6_emit_sampler(sna, GEN6_SAMPLER(op->u.gen6.flags));
gen6_emit_sf(sna, op->mask.bo != NULL);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags));
gen6_emit_sf(sna, GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_wm(sna, GEN6_KERNEL(op->u.gen6.flags), GEN6_VERTEX(op->u.gen6.flags) >> 2);
gen6_emit_vertex_elements(sna, op);
need_stall |= gen6_emit_binding_table(sna, wm_binding_table & ~1);
@ -900,7 +933,8 @@ static void gen6_magic_ca_pass(struct sna *sna,
gen6_emit_wm(sna,
gen6_choose_composite_kernel(PictOpAdd,
true, true,
op->is_affine));
op->is_affine),
true);
OUT_BATCH(GEN6_3DPRIMITIVE |
GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL |
@ -936,6 +970,7 @@ static int gen6_vertex_finish(struct sna *sna)
DBG(("%s: used=%d / %d\n", __FUNCTION__,
sna->render.vertex_used, sna->render.vertex_size));
assert(sna->render.vertex_used);
assert(sna->render.nvertex_reloc);
/* Note: we only need dword alignment (currently) */
@ -944,27 +979,23 @@ static int gen6_vertex_finish(struct sna *sna)
if (sna->render_state.gen6.vertex_offset)
gen6_vertex_flush(sna);
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render_state.gen6.vb_id = 0;
@ -984,6 +1015,8 @@ static int gen6_vertex_finish(struct sna *sna)
return 0;
}
DBG(("%s: create vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_sync__cpu(&sna->kgem, sna->render.vbo);
if (sna->render.vertex_used) {
DBG(("%s: copying initial buffer x %d to handle=%d\n",
@ -1005,16 +1038,16 @@ static void gen6_vertex_close(struct sna *sna)
assert(sna->render_state.gen6.vertex_offset == 0);
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
if (!sna->render.vertex_used)
if (!sna->render_state.gen6.vb_id)
return;
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0));
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding vbo (full)\n", __FUNCTION__));
DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
@ -1045,26 +1078,23 @@ static void gen6_vertex_close(struct sna *sna)
}
}
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
if (sna->render.vbo == NULL) {
sna->render.vertex_used = 0;
@ -1494,7 +1524,7 @@ static void gen6_emit_vertex_buffer(struct sna *sna,
OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3);
OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA |
4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[id] = sna->kgem.nbatch;
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@ -1624,9 +1654,11 @@ inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna,
static uint32_t
gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op)
{
int has_mask = op->mask.bo != NULL;
int is_affine = op->is_affine;
return has_mask << 1 | is_affine;
int id = 2 + !op->is_affine;
if (op->mask.bo)
id |= id << 2;
assert(id > 0 && id < 16);
return id;
}
static void
@ -1954,7 +1986,7 @@ gen6_render_video(struct sna *sna,
is_planar_fourcc(frame->id) ?
GEN6_WM_KERNEL_VIDEO_PLANAR :
GEN6_WM_KERNEL_VIDEO_PACKED,
1);
2);
tmp.priv = frame;
kgem_set_mode(&sna->kgem, KGEM_RENDER);
@ -2824,21 +2856,12 @@ gen6_emit_composite_spans_primitive(struct sna *sna,
{
gen6_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
gen6_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(0);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
}
fastcall static void
@ -2849,15 +2872,15 @@ gen6_emit_composite_spans_solid(struct sna *sna,
{
OUT_VERTEX(box->x2, box->y2);
OUT_VERTEX_F(1); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity);
OUT_VERTEX(box->x1, box->y2);
OUT_VERTEX_F(0); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity);
OUT_VERTEX(box->x1, box->y1);
OUT_VERTEX_F(0); OUT_VERTEX_F(0);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(0);
OUT_VERTEX_F(opacity);
}
fastcall static void
@ -2878,24 +2901,24 @@ gen6_emit_composite_spans_identity(struct sna *sna,
int16_t ty = op->base.src.offset[1];
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
sna->render.vertex_used += 3*4;
assert(sna->render.vertex_used <= sna->render.vertex_size);
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = (box->x2 + tx) * sx;
v[7] = v[2] = (box->y2 + ty) * sy;
v[13] = v[8] = v[3] = opacity;
v[9] = v[4] = 1;
v[6] = v[2] = (box->y2 + ty) * sy;
dst.p.x = box->x1;
v[5] = dst.f;
v[11] = v[6] = (box->x1 + tx) * sx;
v[4] = dst.f;
v[9] = v[5] = (box->x1 + tx) * sx;
dst.p.y = box->y1;
v[10] = dst.f;
v[12] = (box->y1 + ty) * sy;
v[14] = 0;
v[8] = dst.f;
v[10] = (box->y1 + ty) * sy;
v[11] = v[7] = v[3] = opacity;
}
fastcall static void
@ -2920,24 +2943,24 @@ gen6_emit_composite_spans_simple(struct sna *sna,
int16_t ty = op->base.src.offset[1];
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
sna->render.vertex_used += 3*4;
assert(sna->render.vertex_used <= sna->render.vertex_size);
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[7] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
v[13] = v[8] = v[3] = opacity;
v[9] = v[4] = 1;
v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
dst.p.x = box->x1;
v[5] = dst.f;
v[11] = v[6] = ((box->x1 + tx) * xx + x0) * sx;
v[4] = dst.f;
v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
dst.p.y = box->y1;
v[10] = dst.f;
v[12] = ((box->y1 + ty) * yy + y0) * sy;
v[14] = 0;
v[8] = dst.f;
v[10] = ((box->y1 + ty) * yy + y0) * sy;
v[11] = v[7] = v[3] = opacity;
}
fastcall static void
@ -2950,19 +2973,16 @@ gen6_emit_composite_spans_affine(struct sna *sna,
gen6_emit_composite_texcoord_affine(sna, &op->base.src,
box->x2, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
OUT_VERTEX(box->x1, box->y2);
gen6_emit_composite_texcoord_affine(sna, &op->base.src,
box->x1, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
OUT_VERTEX(box->x1, box->y1);
gen6_emit_composite_texcoord_affine(sna, &op->base.src,
box->x1, box->y1);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(0);
}
fastcall static void
@ -3022,7 +3042,6 @@ gen6_render_composite_spans_done(struct sna *sna,
if (sna->render_state.gen6.vertex_offset)
gen6_vertex_flush(sna);
kgem_bo_destroy(&sna->kgem, op->base.mask.bo);
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@ -3103,9 +3122,7 @@ gen6_render_composite_spans(struct sna *sna,
gen6_composite_channel_convert(&tmp->base.src);
break;
}
tmp->base.mask.bo = sna_render_get_solid(sna, 0);
if (tmp->base.mask.bo == NULL)
goto cleanup_src;
tmp->base.mask.bo = NULL;
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
@ -3124,7 +3141,7 @@ gen6_render_composite_spans(struct sna *sna,
} else
tmp->prim_emit = gen6_emit_composite_spans_affine;
}
tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine;
tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine;
tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
tmp->base.u.gen6.flags =
@ -3134,7 +3151,7 @@ gen6_render_composite_spans(struct sna *sna,
SAMPLER_EXTEND_PAD),
gen6_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN6_WM_KERNEL_OPACITY | !tmp->base.is_affine,
1 << 1 | tmp->base.is_affine);
1 << 2 | (2+!tmp->base.is_affine));
tmp->box = gen6_render_composite_spans_box;
tmp->boxes = gen6_render_composite_spans_boxes;
@ -4086,7 +4103,7 @@ gen6_render_retire(struct kgem *kgem)
sna = container_of(kgem, struct sna, kgem);
if (kgem->nbatch == 0 && sna->render.vbo && !kgem_bo_is_busy(sna->render.vbo)) {
DBG(("%s: resetting idle vbo\n", __FUNCTION__));
DBG(("%s: resetting idle vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
}
@ -4099,7 +4116,7 @@ gen6_render_expire(struct kgem *kgem)
sna = container_of(kgem, struct sna, kgem);
if (sna->render.vbo && !sna->render.vertex_used) {
DBG(("%s: discarding vbo\n", __FUNCTION__));
DBG(("%s: discarding vbo handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
kgem_bo_destroy(kgem, sna->render.vbo);
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
@ -4114,7 +4131,7 @@ static void gen6_render_reset(struct sna *sna)
sna->render_state.gen6.needs_invariant = true;
sna->render_state.gen6.first_state_packet = true;
sna->render_state.gen6.vb_id = 0;
sna->render_state.gen6.ve_id = -1;
sna->render_state.gen6.ve_id = 3 << 2;
sna->render_state.gen6.last_primitive = -1;
sna->render_state.gen6.num_sf_outputs = 0;

View File

@ -181,7 +181,7 @@ static const struct blendinfo {
#define SAMPLER_OFFSET(sf, se, mf, me) \
((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) + 2) * 2 * sizeof(struct gen7_sampler_state))
#define VERTEX_2s2s 4
#define VERTEX_2s2s 0
#define COPY_SAMPLER 0
#define COPY_VERTEX VERTEX_2s2s
@ -847,23 +847,23 @@ gen7_emit_vertex_elements(struct sna *sna,
* texture coordinate 1 if (has_mask is true): same as above
*/
struct gen7_render_state *render = &sna->render_state.gen7;
int nelem, selem;
uint32_t w_component;
uint32_t src_format;
uint32_t src_format, dw, offset;
int id = GEN7_VERTEX(op->u.gen7.flags);
bool has_mask;
DBG(("%s: setup id=%d\n", __FUNCTION__, id));
if (render->ve_id == id)
return;
render->ve_id = id;
switch (id) {
case VERTEX_2s2s:
if (id == VERTEX_2s2s) {
DBG(("%s: setup COPY\n", __FUNCTION__));
OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
((2 * (1 + 2)) + 1 - 2));
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
@ -872,7 +872,7 @@ gen7_emit_vertex_elements(struct sna *sna,
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_3_SHIFT);
/* x,y */
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
@ -880,7 +880,7 @@ gen7_emit_vertex_elements(struct sna *sna,
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
OUT_BATCH(VERTEX_2s2s << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
@ -890,17 +890,6 @@ gen7_emit_vertex_elements(struct sna *sna,
return;
}
nelem = op->mask.bo ? 2 : 1;
if (op->is_affine) {
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
w_component = GEN7_VFCOMPONENT_STORE_0;
selem = 2;
} else {
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
w_component = GEN7_VFCOMPONENT_STORE_SRC;
selem = 3;
}
/* The VUE layout
* dword 0-3: pad (0.0, 0.0, 0.0. 0.0)
* dword 4-7: position (x, y, 1.0, 1.0),
@ -909,11 +898,11 @@ gen7_emit_vertex_elements(struct sna *sna,
*
* dword 4-15 are fetched from vertex buffer
*/
has_mask = (id >> 2) != 0;
OUT_BATCH(GEN7_3DSTATE_VERTEX_ELEMENTS |
((2 * (2 + nelem)) + 1 - 2));
((2 * (3 + has_mask)) + 1 - 2));
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT |
GEN7_VE0_VALID |
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R32G32B32A32_FLOAT << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_0_SHIFT |
@ -924,31 +913,74 @@ gen7_emit_vertex_elements(struct sna *sna,
/* x,y */
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
GEN7_SURFACEFORMAT_R16G16_SSCALED << GEN7_VE0_FORMAT_SHIFT |
0 << GEN7_VE0_OFFSET_SHIFT); /* offsets vb in bytes */
0 << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
offset = 4;
/* u0, v0, w0 */
DBG(("%s: first channel %d floats, offset=%d\n", __FUNCTION__, id & 3, offset));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch (id & 3) {
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
4 << GEN7_VE0_OFFSET_SHIFT); /* offset vb in bytes */
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
w_component << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
offset << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
offset += (id & 3) * sizeof(float);
/* u1, v1, w1 */
if (op->mask.bo) {
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT |
GEN7_VE0_VALID |
if (has_mask) {
DBG(("%s: second channel %d floats, offset=%d\n", __FUNCTION__, (id >> 2) & 3, offset));
dw = GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT;
switch ((id >> 2) & 3) {
case 1:
src_format = GEN7_SURFACEFORMAT_R32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
default:
assert(0);
case 2:
src_format = GEN7_SURFACEFORMAT_R32G32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_0 << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
case 3:
src_format = GEN7_SURFACEFORMAT_R32G32B32_FLOAT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT;
dw |= GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_2_SHIFT;
break;
}
OUT_BATCH(id << GEN7_VE0_VERTEX_BUFFER_INDEX_SHIFT | GEN7_VE0_VALID |
src_format << GEN7_VE0_FORMAT_SHIFT |
((1 + selem) * 4) << GEN7_VE0_OFFSET_SHIFT); /* vb offset in bytes */
OUT_BATCH(GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_0_SHIFT |
GEN7_VFCOMPONENT_STORE_SRC << GEN7_VE1_VFCOMPONENT_1_SHIFT |
w_component << GEN7_VE1_VFCOMPONENT_2_SHIFT |
GEN7_VFCOMPONENT_STORE_1_FLT << GEN7_VE1_VFCOMPONENT_3_SHIFT);
offset << GEN7_VE0_OFFSET_SHIFT);
OUT_BATCH(dw);
}
}
@ -994,7 +1026,7 @@ gen7_emit_state(struct sna *sna,
gen7_emit_cc(sna, GEN7_BLEND(op->u.gen7.flags));
gen7_emit_sampler(sna, GEN7_SAMPLER(op->u.gen7.flags));
gen7_emit_sf(sna, op->mask.bo != NULL);
gen7_emit_sf(sna, GEN7_VERTEX(op->u.gen7.flags) >> 2);
gen7_emit_wm(sna, GEN7_KERNEL(op->u.gen7.flags));
gen7_emit_vertex_elements(sna, op);
@ -1065,6 +1097,7 @@ static int gen7_vertex_finish(struct sna *sna)
unsigned int i;
assert(sna->render.vertex_used);
assert(sna->render.nvertex_reloc);
/* Note: we only need dword alignment (currently) */
@ -1073,27 +1106,23 @@ static int gen7_vertex_finish(struct sna *sna)
if (sna->render_state.gen7.vertex_offset)
gen7_vertex_flush(sna);
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
0);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
sna->render_state.gen7.vb_id = 0;
@ -1130,16 +1159,16 @@ static void gen7_vertex_close(struct sna *sna)
assert(sna->render_state.gen7.vertex_offset == 0);
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo != NULL));
if (!sna->render.vertex_used)
if (!sna->render_state.gen7.vb_id)
return;
DBG(("%s: used=%d, vbo active? %d\n",
__FUNCTION__, sna->render.vertex_used, sna->render.vbo ? sna->render.vbo->handle : 0));
bo = sna->render.vbo;
if (bo) {
if (sna->render.vertex_size - sna->render.vertex_used < 64) {
DBG(("%s: discarding vbo (full)\n", __FUNCTION__));
DBG(("%s: discarding vbo (full), handle=%d\n", __FUNCTION__, sna->render.vbo->handle));
sna->render.vbo = NULL;
sna->render.vertices = sna->render.vertex_data;
sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
@ -1170,30 +1199,29 @@ static void gen7_vertex_close(struct sna *sna)
}
}
for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) {
if (sna->render.vertex_reloc[i]) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
assert(sna->render.nvertex_reloc);
for (i = 0; i < sna->render.nvertex_reloc; i++) {
DBG(("%s: reloc[%d] = %d\n", __FUNCTION__,
i, sna->render.vertex_reloc[i]));
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i],
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1,
bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
sna->render.vertex_reloc[i] = 0;
}
sna->kgem.batch[sna->render.vertex_reloc[i]] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i], bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta);
sna->kgem.batch[sna->render.vertex_reloc[i]+1] =
kgem_add_reloc(&sna->kgem,
sna->render.vertex_reloc[i]+1, bo,
I915_GEM_DOMAIN_VERTEX << 16,
delta + sna->render.vertex_used * 4 - 1);
}
sna->render.nvertex_reloc = 0;
if (sna->render.vbo == NULL) {
sna->render.vertex_used = 0;
sna->render.vertex_index = 0;
assert(sna->render.vertices == sna->render.vertex_data);
assert(sna->render.vertex_size == ARRAY_SIZE(sna->render.vertex_data));
}
if (free_bo)
@ -1360,6 +1388,8 @@ gen7_emit_composite_primitive_solid(struct sna *sna,
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 9;
assert(sna->render.vertex_used <= sna->render.vertex_size);
assert(!too_large(r->dst.x + r->width, r->dst.y + r->height));
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
@ -1599,7 +1629,7 @@ static void gen7_emit_vertex_buffer(struct sna *sna,
GEN7_VB0_VERTEXDATA |
GEN7_VB0_ADDRESS_MODIFY_ENABLE |
4*op->floats_per_vertex << GEN7_VB0_BUFFER_PITCH_SHIFT);
sna->render.vertex_reloc[id] = sna->kgem.nbatch;
sna->render.vertex_reloc[sna->render.nvertex_reloc++] = sna->kgem.nbatch;
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
@ -1686,6 +1716,7 @@ start:
if (want > 1 && want * op->floats_per_rect > rem)
want = rem / op->floats_per_rect;
assert(want > 0);
sna->render.vertex_index += 3*want;
return want;
@ -1719,9 +1750,11 @@ inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna,
static uint32_t
gen7_choose_composite_vertex_buffer(const struct sna_composite_op *op)
{
int has_mask = op->mask.bo != NULL;
int is_affine = op->is_affine;
return has_mask << 1 | is_affine;
int id = 2 + !op->is_affine;
if (op->mask.bo)
id |= id << 2;
assert(id > 0 && id < 16);
return id;
}
static void
@ -2908,21 +2941,12 @@ gen7_emit_composite_spans_primitive(struct sna *sna,
{
gen7_emit_composite_spans_vertex(sna, op, box->x2, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
gen7_emit_composite_spans_vertex(sna, op, box->x1, box->y1);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(0);
if (!op->base.is_affine)
OUT_VERTEX_F(1);
}
fastcall static void
@ -2933,15 +2957,15 @@ gen7_emit_composite_spans_solid(struct sna *sna,
{
OUT_VERTEX(box->x2, box->y2);
OUT_VERTEX_F(1); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity);
OUT_VERTEX(box->x1, box->y2);
OUT_VERTEX_F(0); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(1);
OUT_VERTEX_F(opacity);
OUT_VERTEX(box->x1, box->y1);
OUT_VERTEX_F(0); OUT_VERTEX_F(0);
OUT_VERTEX_F(opacity); OUT_VERTEX_F(0);
OUT_VERTEX_F(opacity);
}
fastcall static void
@ -2962,24 +2986,24 @@ gen7_emit_composite_spans_identity(struct sna *sna,
int16_t ty = op->base.src.offset[1];
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
sna->render.vertex_used += 3*4;
assert(sna->render.vertex_used <= sna->render.vertex_size);
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = (box->x2 + tx) * sx;
v[7] = v[2] = (box->y2 + ty) * sy;
v[13] = v[8] = v[3] = opacity;
v[9] = v[4] = 1;
v[6] = v[2] = (box->y2 + ty) * sy;
dst.p.x = box->x1;
v[5] = dst.f;
v[11] = v[6] = (box->x1 + tx) * sx;
v[4] = dst.f;
v[9] = v[5] = (box->x1 + tx) * sx;
dst.p.y = box->y1;
v[10] = dst.f;
v[12] = (box->y1 + ty) * sy;
v[14] = 0;
v[8] = dst.f;
v[10] = (box->y1 + ty) * sy;
v[11] = v[7] = v[3] = opacity;
}
fastcall static void
@ -3004,24 +3028,24 @@ gen7_emit_composite_spans_simple(struct sna *sna,
int16_t ty = op->base.src.offset[1];
v = sna->render.vertices + sna->render.vertex_used;
sna->render.vertex_used += 3*5;
sna->render.vertex_used += 3*4;
assert(sna->render.vertex_used <= sna->render.vertex_size);
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
v[1] = ((box->x2 + tx) * xx + x0) * sx;
v[7] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
v[13] = v[8] = v[3] = opacity;
v[9] = v[4] = 1;
v[6] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
dst.p.x = box->x1;
v[5] = dst.f;
v[11] = v[6] = ((box->x1 + tx) * xx + x0) * sx;
v[4] = dst.f;
v[9] = v[5] = ((box->x1 + tx) * xx + x0) * sx;
dst.p.y = box->y1;
v[10] = dst.f;
v[12] = ((box->y1 + ty) * yy + y0) * sy;
v[14] = 0;
v[8] = dst.f;
v[10] = ((box->y1 + ty) * yy + y0) * sy;
v[11] = v[7] = v[3] = opacity;
}
fastcall static void
@ -3034,19 +3058,16 @@ gen7_emit_composite_spans_affine(struct sna *sna,
gen7_emit_composite_texcoord_affine(sna, &op->base.src,
box->x2, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
OUT_VERTEX(box->x1, box->y2);
gen7_emit_composite_texcoord_affine(sna, &op->base.src,
box->x1, box->y2);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(1);
OUT_VERTEX(box->x1, box->y1);
gen7_emit_composite_texcoord_affine(sna, &op->base.src,
box->x1, box->y1);
OUT_VERTEX_F(opacity);
OUT_VERTEX_F(0);
}
fastcall static void
@ -3106,7 +3127,6 @@ gen7_render_composite_spans_done(struct sna *sna,
DBG(("%s()\n", __FUNCTION__));
kgem_bo_destroy(&sna->kgem, op->base.mask.bo);
if (op->base.src.bo)
kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@ -3184,9 +3204,7 @@ gen7_render_composite_spans(struct sna *sna,
gen7_composite_channel_convert(&tmp->base.src);
break;
}
tmp->base.mask.bo = sna_render_get_solid(sna, 0);
if (tmp->base.mask.bo == NULL)
goto cleanup_src;
tmp->base.mask.bo = NULL;
tmp->base.is_affine = tmp->base.src.is_affine;
tmp->base.need_magic_ca_pass = false;
@ -3205,7 +3223,7 @@ gen7_render_composite_spans(struct sna *sna,
} else
tmp->prim_emit = gen7_emit_composite_spans_affine;
}
tmp->base.floats_per_vertex = 5 + 2*!tmp->base.is_affine;
tmp->base.floats_per_vertex = 4 + !tmp->base.is_affine;
tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
tmp->base.u.gen7.flags =
@ -3215,7 +3233,7 @@ gen7_render_composite_spans(struct sna *sna,
SAMPLER_EXTEND_PAD),
gen7_get_blend(tmp->base.op, false, tmp->base.dst.format),
GEN7_WM_KERNEL_OPACITY | !tmp->base.is_affine,
1 << 1 | tmp->base.is_affine);
1 << 2 | (2+!tmp->base.is_affine));
tmp->box = gen7_render_composite_spans_box;
tmp->boxes = gen7_render_composite_spans_boxes;
@ -4197,7 +4215,7 @@ static void gen7_render_reset(struct sna *sna)
sna->render_state.gen7.emit_flush = false;
sna->render_state.gen7.needs_invariant = true;
sna->render_state.gen7.vb_id = 0;
sna->render_state.gen7.ve_id = -1;
sna->render_state.gen7.ve_id = 3 << 2;
sna->render_state.gen7.last_primitive = -1;
sna->render_state.gen7.num_sf_outputs = 0;

View File

@ -286,7 +286,8 @@ struct sna_render {
uint16_t vertex_index;
uint16_t vertex_used;
uint16_t vertex_size;
uint16_t vertex_reloc[8];
uint16_t vertex_reloc[16];
int nvertex_reloc;
struct kgem_bo *vbo;
float *vertices;