diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 3c3de12e..03b80c5c 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -1574,6 +1574,9 @@ static int gen3_vertex_finish(struct sna *sna) bo = sna->render.vbo; if (bo) { + if (sna->render_state.gen3.vertex_offset) + gen3_vertex_flush(sna); + DBG(("%s: reloc = %d\n", __FUNCTION__, sna->render.vertex_reloc[0])); @@ -1736,20 +1739,13 @@ inline static int gen3_get_rectangles(struct sna *sna, DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen3_get_rectangles__flush(sna, op); - if (rem == 0) { - if (sna->render_state.gen3.vertex_offset) { - gen3_vertex_flush(sna); - gen3_magic_ca_pass(sna, op); - } - return 0; - } + if (rem == 0) + goto flush; } if (sna->render_state.gen3.vertex_offset == 0 && - !gen3_rectangle_begin(sna, op)) { - DBG(("%s: flushing batch\n", __FUNCTION__)); - return 0; - } + !gen3_rectangle_begin(sna, op)) + goto flush; if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; @@ -1758,6 +1754,14 @@ inline static int gen3_get_rectangles(struct sna *sna, assert(want); assert(sna->render.vertex_index * op->floats_per_vertex <= sna->render.vertex_size); return want; + +flush: + DBG(("%s: flushing batch\n", __FUNCTION__)); + if (sna->render_state.gen3.vertex_offset) { + gen3_vertex_flush(sna); + gen3_magic_ca_pass(sna, op); + } + return 0; } fastcall static void diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index c9f10a8a..f6a47a05 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -64,7 +64,8 @@ #if FLUSH_EVERY_VERTEX #define FLUSH(OP) do { \ - gen4_vertex_flush(sna, OP); \ + gen4_vertex_flush(sna); \ + gen4_magic_ca_pass(sna, OP); \ OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \ } while (0) #else @@ -348,8 +349,7 @@ static void gen4_magic_ca_pass(struct sna *sna, state->last_primitive = sna->kgem.nbatch; } -static void gen4_vertex_flush(struct sna *sna, - const struct sna_composite_op *op) +static void gen4_vertex_flush(struct sna *sna) { if (sna->render_state.gen4.vertex_offset == 0) return; @@ -360,8 +360,6 @@ static void gen4_vertex_flush(struct sna *sna, sna->kgem.batch[sna->render_state.gen4.vertex_offset] = sna->render.vertex_index - sna->render.vertex_start; sna->render_state.gen4.vertex_offset = 0; - - gen4_magic_ca_pass(sna, op); } static int gen4_vertex_finish(struct sna *sna) @@ -375,6 +373,9 @@ static int gen4_vertex_finish(struct sna *sna) bo = sna->render.vbo; if (bo) { + if (sna->render_state.gen4.vertex_offset) + gen4_vertex_flush(sna); + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { if (sna->render.vertex_reloc[i]) { DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, @@ -1167,20 +1168,23 @@ inline static int gen4_get_rectangles(struct sna *sna, DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, 3*op->floats_per_vertex)); rem = gen4_get_rectangles__flush(sna, op); - if (rem == 0) { - gen4_vertex_flush(sna, op); - return 0; - } + if (rem == 0) + goto flush; } if (!gen4_rectangle_begin(sna, op)) - return 0; + goto flush; if (want > 1 && want * op->floats_per_vertex*3 > rem) want = rem / (3*op->floats_per_vertex); sna->render.vertex_index += 3*want; return want; + +flush: + gen4_vertex_flush(sna); + gen4_magic_ca_pass(sna, op); + return 0; } static uint32_t *gen4_composite_get_binding_table(struct sna *sna, @@ -1799,7 +1803,7 @@ gen4_render_video(struct sna *sna, } priv->clear = false; - gen4_vertex_flush(sna, &tmp); + gen4_vertex_flush(sna); return TRUE; } @@ -1916,7 +1920,8 @@ gen4_render_composite_done(struct sna *sna, { DBG(("%s()\n", __FUNCTION__)); - gen4_vertex_flush(sna, op); + gen4_vertex_flush(sna); + gen4_magic_ca_pass(sna, op); if (op->mask.bo) kgem_bo_destroy(&sna->kgem, op->mask.bo); @@ -2595,7 +2600,7 @@ gen4_render_copy_blt(struct sna *sna, static void gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) { - gen4_vertex_flush(sna, &op->base); + gen4_vertex_flush(sna); } static Bool @@ -2883,7 +2888,7 @@ gen4_render_fill_op_boxes(struct sna *sna, static void gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op) { - gen4_vertex_flush(sna, &op->base); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, op->base.src.bo); } @@ -3043,7 +3048,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, gen4_render_fill_rectangle(sna, &tmp, x1, y1, x2 - x1, y2 - y1); - gen4_vertex_flush(sna, &tmp); + gen4_vertex_flush(sna); kgem_bo_destroy(&sna->kgem, tmp.src.bo); return TRUE; diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 251eb392..ff9e4628 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -66,8 +66,8 @@ #define URB_CS_ENTRY_SIZE 1 #define URB_CS_ENTRIES 0 -#define URB_VS_ENTRY_SIZE 1 // each 512-bit row -#define URB_VS_ENTRIES 8 // we needs at least 8 entries +#define URB_VS_ENTRY_SIZE 1 +#define URB_VS_ENTRIES 128 /* minimum of 8 */ #define URB_GS_ENTRY_SIZE 0 #define URB_GS_ENTRIES 0 @@ -76,7 +76,7 @@ #define URB_CLIP_ENTRIES 0 #define URB_SF_ENTRY_SIZE 2 -#define URB_SF_ENTRIES 1 +#define URB_SF_ENTRIES 32 /* * this program computes dA/dx and dA/dy for the texture coordinates along @@ -358,6 +358,9 @@ static int gen5_vertex_finish(struct sna *sna) bo = sna->render.vbo; if (bo) { + if (sna->render_state.gen5.vertex_offset) + gen5_vertex_flush(sna); + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { if (sna->render.vertex_reloc[i]) { DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, @@ -410,6 +413,8 @@ static void gen5_vertex_close(struct sna *sna) struct kgem_bo *bo; unsigned int i, delta = 0; + assert(sna->render_state.gen5.vertex_offset == 0); + if (!sna->render.vertex_used) { assert(sna->render.vbo == NULL); assert(sna->render.vertices == sna->render.vertex_data); @@ -421,7 +426,6 @@ static void gen5_vertex_close(struct sna *sna) bo = sna->render.vbo; if (bo == NULL) { - if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, sna->render.vertex_used, sna->kgem.nbatch)); @@ -1082,6 +1086,8 @@ static void gen5_emit_vertex_buffer(struct sna *sna, { int id = op->u.gen5.ve_id; + assert((unsigned)id <= 3); + OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3); OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); @@ -1122,6 +1128,8 @@ static bool gen5_rectangle_begin(struct sna *sna, int id = op->u.gen5.ve_id; int ndwords; + assert((unsigned)id <= 3); + ndwords = 0; if ((sna->render_state.gen5.vb_id & (1 << id)) == 0) ndwords += 5; @@ -1167,23 +1175,25 @@ inline static int gen5_get_rectangles(struct sna *sna, DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen5_get_rectangles__flush(sna, op); - if (rem == 0) { - if (sna->render_state.gen5.vertex_offset) { - gen5_vertex_flush(sna); - gen5_magic_ca_pass(sna, op); - } - return 0; - } + if (rem == 0) + goto flush; } if (!gen5_rectangle_begin(sna, op)) - return 0; + goto flush; if (want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; return want; + +flush: + if (sna->render_state.gen5.vertex_offset) { + gen5_vertex_flush(sna); + gen5_magic_ca_pass(sna, op); + } + return 0; } static uint32_t * @@ -1414,8 +1424,9 @@ gen5_emit_vertex_elements(struct sna *sna, int selem = is_affine ? 2 : 3; uint32_t w_component; uint32_t src_format; - int id = op->u.gen5.ve_id;; + int id = op->u.gen5.ve_id; + assert((unsigned)id <= 3); if (!DBG_NO_STATE_CACHE && render->ve_id == id) return; @@ -3554,7 +3565,6 @@ static uint32_t gen5_create_sf_state(struct sna_static_stream *stream, sf_state->thread4.max_threads = SF_MAX_THREADS - 1; sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; - sf_state->thread4.stats_enable = 1; sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE; sf_state->sf6.scissor = 0; diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index e2c78e1d..035da78e 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -932,6 +932,9 @@ static int gen6_vertex_finish(struct sna *sna) bo = sna->render.vbo; if (bo) { + if (sna->render_state.gen6.vertex_offset) + gen6_vertex_flush(sna); + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { if (sna->render.vertex_reloc[i]) { DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, @@ -1635,18 +1638,13 @@ inline static int gen6_get_rectangles(struct sna *sna, DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen6_get_rectangles__flush(sna, op); - if (rem == 0) { - if (sna->render_state.gen6.vertex_offset) { - gen6_vertex_flush(sna); - gen6_magic_ca_pass(sna, op); - } - return 0; - } + if (rem == 0) + goto flush; } if (sna->render_state.gen6.vertex_offset == 0 && !gen6_rectangle_begin(sna, op)) - return 0; + goto flush; if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; @@ -1654,6 +1652,13 @@ inline static int gen6_get_rectangles(struct sna *sna, assert(want > 0); sna->render.vertex_index += 3*want; return want; + +flush: + if (sna->render_state.gen6.vertex_offset) { + gen6_vertex_flush(sna); + gen6_magic_ca_pass(sna, op); + } + return 0; } inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index faeedf06..c872c633 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -1033,6 +1033,9 @@ static int gen7_vertex_finish(struct sna *sna) bo = sna->render.vbo; if (bo) { + if (sna->render_state.gen7.vertex_offset) + gen7_vertex_flush(sna); + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { if (sna->render.vertex_reloc[i]) { DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, @@ -1669,24 +1672,26 @@ inline static int gen7_get_rectangles(struct sna *sna, DBG(("flushing vbo for %s: %d < %d\n", __FUNCTION__, rem, op->floats_per_rect)); rem = gen7_get_rectangles__flush(sna, op); - if (rem == 0) { - if (sna->render_state.gen7.vertex_offset) { - gen7_vertex_flush(sna); - gen7_magic_ca_pass(sna, op); - } - return 0; - } + if (rem == 0) + goto flush; } if (sna->render_state.gen7.vertex_offset == 0 && !gen7_rectangle_begin(sna, op)) - return 0; + goto flush; if (want > 1 && want * op->floats_per_rect > rem) want = rem / op->floats_per_rect; sna->render.vertex_index += 3*want; return want; + +flush: + if (sna->render_state.gen7.vertex_offset) { + gen7_vertex_flush(sna); + gen7_magic_ca_pass(sna, op); + } + return 0; } inline static uint32_t *gen7_composite_get_binding_table(struct sna *sna, diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index f8df2a42..2d8b3b97 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -2216,18 +2216,23 @@ composite_unaligned_box(struct sna *sna, float opacity, pixman_region16_t *clip) { - pixman_region16_t region; + if (clip) { + pixman_region16_t region; - pixman_region_init_rects(®ion, box, 1); - RegionIntersect(®ion, ®ion, clip); - if (REGION_NUM_RECTS(®ion)) { - tmp->boxes(sna, tmp, - REGION_RECTS(®ion), - REGION_NUM_RECTS(®ion), - opacity); - apply_damage(&tmp->base, ®ion); + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + tmp->boxes(sna, tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + opacity); + apply_damage(&tmp->base, ®ion); + } + pixman_region_fini(®ion); + } else { + tmp->box(sna, tmp, box, opacity); + apply_damage_box(&tmp->base, box); } - pixman_region_fini(®ion); } static void @@ -2244,17 +2249,19 @@ composite_unaligned_trap_row(struct sna *sna, if (covered == 0) return; - if (y2 > clip->extents.y2) - y2 = clip->extents.y2; - if (y1 < clip->extents.y1) - y1 = clip->extents.y1; - if (y1 >= y2) - return; - x1 = dx + pixman_fixed_to_int(trap->left.p1.x); x2 = dx + pixman_fixed_to_int(trap->right.p1.x); - if (x2 < clip->extents.x1 || x1 > clip->extents.x2) - return; + if (clip) { + if (y2 > clip->extents.y2) + y2 = clip->extents.y2; + if (y1 < clip->extents.y1) + y1 = clip->extents.y1; + if (y1 >= y2) + return; + + if (x2 < clip->extents.x1 || x1 > clip->extents.x2) + return; + } box.y1 = y1; box.y2 = y2; @@ -2528,7 +2535,7 @@ composite_unaligned_boxes(struct sna *sna, { BoxRec extents; struct sna_composite_spans_op tmp; - pixman_region16_t clip; + pixman_region16_t clip, *c; int dst_x, dst_y; int dx, dy, n; @@ -2584,6 +2591,11 @@ composite_unaligned_boxes(struct sna *sna, return true; } + c = NULL; + if (extents.x2 - extents.x1 > clip.extents.x2 - clip.extents.x1 || + extents.y2 - extents.y1 > clip.extents.y2 - clip.extents.y1) + c = &clip; + extents = *RegionExtents(&clip); dx = dst->pDrawable->x; dy = dst->pDrawable->y; @@ -2611,7 +2623,7 @@ composite_unaligned_boxes(struct sna *sna, } for (n = 0; n < ntrap; n++) - composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, &clip); + composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, c); tmp.done(sna, &tmp); REGION_UNINIT(NULL, &clip);