From 93a0b10f163ee79b6a6a7ea46b0a33b622b1f86e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 3 Feb 2012 19:30:24 +0000 Subject: [PATCH] sna: Apply redirection for the render copy into large pixmaps If the pixmap is larger than the pipeline, but the operation extents fit within the pipeline, we may be able to create a proxy target to transform the operation into one that fits within the constraints of the render pipeline. This fixes the infinite recursion hit with partially displayed extremely large images. Signed-off-by: Chris Wilson --- src/sna/gen2_render.c | 40 +++++++++++- src/sna/gen3_render.c | 44 +++++++++++-- src/sna/gen4_render.c | 130 ++++++++++++++++++++++++++++--------- src/sna/gen5_render.c | 136 ++++++++++++++++++++++++-------------- src/sna/gen6_render.c | 147 +++++++++++++++++++++++++++++++----------- src/sna/gen7_render.c | 140 +++++++++++++++++++++++++++++++--------- src/sna/kgem.c | 6 +- src/sna/kgem.h | 26 ++++---- src/sna/sna_blt.c | 17 +++-- src/sna/sna_io.c | 2 +- src/sna/sna_render.c | 107 ++++++++++++++++++++++++++---- src/sna/sna_render.h | 27 +++++++- src/sna/sna_tiling.c | 130 +++++++++++++++++++++++++++++++++++-- 13 files changed, 756 insertions(+), 196 deletions(-) diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c index 7250d665..97b558db 100644 --- a/src/sna/gen2_render.c +++ b/src/sna/gen2_render.c @@ -2852,9 +2852,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu, if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */ too_large(src->drawable.width, src->drawable.height) || - src_bo->pitch > MAX_3D_PITCH || - too_large(dst->drawable.width, dst->drawable.height) || - dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) { + src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) { fallback: return sna_blt_copy_boxes_fallback(sna, alu, src, src_bo, src_dx, src_dy, @@ -2876,10 +2874,39 @@ fallback: tmp.dst.height = dst->drawable.height; tmp.dst.format = sna_format_for_depth(dst->drawable.depth); tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height) || + dst_bo->pitch > MAX_3D_PITCH) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } tmp.floats_per_vertex = 4; tmp.floats_per_rect = 12; + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + gen2_render_copy_setup_source(&tmp.src, src, src_bo); gen2_emit_copy_state(sna, &tmp); do { @@ -2917,7 +2944,14 @@ fallback: } while (n); gen2_vertex_flush(sna, &tmp); + sna_render_composite_redirect_done(sna, &tmp); return TRUE; + +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c index 784d3998..d5f56176 100644 --- a/src/sna/gen3_render.c +++ b/src/sna/gen3_render.c @@ -3841,10 +3841,8 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu, if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || /* XXX handle overlap using 3D ? */ src_bo->pitch > MAX_3D_PITCH || - too_large(src->drawable.width, src->drawable.height) || - dst_bo->pitch > MAX_3D_PITCH || - too_large(dst->drawable.width, dst->drawable.height)) { -fallback: + too_large(src->drawable.width, src->drawable.height)) { +fallback_blt: return sna_blt_copy_boxes_fallback(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, @@ -3854,7 +3852,7 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; + goto fallback_blt; } memset(&tmp, 0, sizeof(tmp)); @@ -3865,6 +3863,31 @@ fallback: tmp.dst.height = dst->drawable.height; tmp.dst.format = sna_format_for_depth(dst->drawable.depth); tmp.dst.bo = dst_bo; + tmp.dst.x = tmp.dst.y = 0; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height) || + dst_bo->pitch > MAX_3D_PITCH) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } gen3_render_copy_setup_source(&tmp.src, src, src_bo); @@ -3873,6 +3896,10 @@ fallback: tmp.mask.bo = NULL; tmp.mask.u.gen3.type = SHADER_NONE; + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + gen3_emit_composite_state(sna, &tmp); gen3_align_vertex(sna, &tmp); @@ -3911,7 +3938,14 @@ fallback: } while (n); gen3_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); return TRUE; + +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c index ffdcbb71..b3a64d9f 100644 --- a/src/sna/gen4_render.c +++ b/src/sna/gen4_render.c @@ -289,6 +289,13 @@ gen4_emit_pipelined_pointers(struct sna *sna, #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) #define OUT_VERTEX_F(v) vertex_emit(sna, v) +#define GEN4_MAX_3D_SIZE 8192 + +static inline bool too_large(int width, int height) +{ + return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE; +} + static int gen4_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine) { @@ -1884,7 +1891,7 @@ gen4_composite_picture(struct sna *sna, return sna_render_picture_convert(sna, picture, channel, pixmap, x, y, w, h, dst_x, dst_y); - if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) + if (too_large(pixmap->drawable.width, pixmap->drawable.height)) return sna_render_picture_extract(sna, picture, channel, x, y, w, h, dst_x, dst_y); @@ -1983,7 +1990,7 @@ try_blt(struct sna *sna, return TRUE; } - if (width > 8192 || height > 8192) { + if (too_large(width, height)) { DBG(("%s: operation too large for 3D pipe (%d, %d)\n", __FUNCTION__, width, height)); return TRUE; @@ -2221,11 +2228,10 @@ gen4_render_composite(struct sna *sna, return FALSE; sna_render_reduce_damage(tmp, dst_x, dst_y, width, height); - if (tmp->dst.width > 8192 || tmp->dst.height > 8192) { - if (!sna_render_composite_redirect(sna, tmp, - dst_x, dst_y, width, height)) + if (too_large(tmp->dst.width, tmp->dst.height) && + !sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) return FALSE; - } switch (gen4_composite_picture(sna, src, &tmp->src, src_x, src_y, @@ -2432,10 +2438,8 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu, box, n)) return TRUE; - if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || - src->drawable.width > 8192 || src->drawable.height > 8192 || - dst->drawable.width > 8192 || dst->drawable.height > 8192) { -fallback: + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { +fallback_blt: if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) return FALSE; @@ -2458,24 +2462,73 @@ fallback: tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); } if (!gen4_check_format(tmp.src.pict_format)) - goto fallback; + goto fallback_blt; tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; tmp.dst.pixmap = dst; tmp.dst.width = dst->drawable.width; tmp.dst.height = dst->drawable.height; + tmp.dst.x = tmp.dst.y = 0; tmp.dst.bo = dst_bo; - tmp.dst.x = dst_dx; - tmp.dst.y = dst_dy; - tmp.src.bo = src_bo; + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } + tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_NONE; - tmp.src.card_format = - gen4_get_card_format(tmp.src.pict_format), - tmp.src.width = src->drawable.width; - tmp.src.height = src->drawable.height; + tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format); + if (too_large(src->drawable.width, src->drawable.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + goto fallback_tiled_dst; + } + } else { + tmp.src.bo = kgem_bo_reference(src_bo); + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + tmp.src.scale[0] = 1.f/src->drawable.width; + tmp.src.scale[1] = 1.f/src->drawable.height; + } tmp.mask.bo = NULL; @@ -2487,9 +2540,16 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; + goto fallback_tiled_src; } + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + gen4_copy_bind_surfaces(sna, &tmp); gen4_align_vertex(sna, &tmp); @@ -2499,10 +2559,23 @@ fallback: gen4_render_copy_one(sna, &tmp, box->x1 + src_dx, box->y1 + src_dy, box->x2 - box->x1, box->y2 - box->y1, - box->x1, box->y1); + box->x1 + dst_dx, box->y1 + dst_dy); box++; } while (--n); + sna_render_composite_redirect_done(sna, &tmp); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); return TRUE; + +fallback_tiled_src: + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void @@ -2552,8 +2625,8 @@ gen4_render_copy(struct sna *sna, uint8_t alu, return TRUE; if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || - src->drawable.width > 8192 || src->drawable.height > 8192 || - dst->drawable.width > 8192 || dst->drawable.height > 8192) { + too_large(src->drawable.width, src->drawable.height) || + too_large(dst->drawable.width, dst->drawable.height)) { fallback: if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) return FALSE; @@ -2683,10 +2756,7 @@ gen4_render_fill_boxes(struct sna *sna, return FALSE; } - if (prefer_blt(sna) || - dst->drawable.width > 8192 || - dst->drawable.height > 8192 || - !gen4_check_dst_format(format)) { + if (prefer_blt(sna) || too_large(dst->drawable.width, dst->drawable.height)) { uint8_t alu = -1; if (op == PictOpClear || (op == PictOpOutReverse && color->alpha >= 0xff00)) @@ -2715,7 +2785,7 @@ gen4_render_fill_boxes(struct sna *sna, if (!gen4_check_dst_format(format)) return FALSE; - if (dst->drawable.width > 8192 || dst->drawable.height > 8192) + if (too_large(dst->drawable.width, dst->drawable.height)) return sna_tiling_fill_boxes(sna, op, format, color, dst, dst_bo, box, n); } @@ -2834,7 +2904,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu, return TRUE; if (!(alu == GXcopy || alu == GXclear) || - dst->drawable.width > 8192 || dst->drawable.height > 8192) + too_large(dst->drawable.width, dst->drawable.height)) return sna_blt_fill(sna, alu, dst_bo, dst->drawable.bitsPerPixel, color, @@ -2925,7 +2995,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo, /* Must use the BLT if we can't RENDER... */ if (!(alu == GXcopy || alu == GXclear) || - dst->drawable.width > 8192 || dst->drawable.height > 8192) + too_large(dst->drawable.width, dst->drawable.height)) return FALSE; if (alu == GXclear) @@ -3251,7 +3321,7 @@ Bool gen4_render_init(struct sna *sna) sna->render.reset = gen4_render_reset; sna->render.fini = gen4_render_fini; - sna->render.max_3d_size = 8192; + sna->render.max_3d_size = GEN4_MAX_3D_SIZE; sna->render.max_3d_pitch = 1 << 18; return TRUE; } diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c index 03dc8c98..933c51f6 100644 --- a/src/sna/gen5_render.c +++ b/src/sna/gen5_render.c @@ -1378,6 +1378,9 @@ gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + assert(!too_large(op->dst.x, op->dst.y)); + assert(!too_large(op->dst.width, op->dst.height)); + if (!DBG_NO_STATE_CACHE && sna->render_state.gen5.drawrect_limit == limit && sna->render_state.gen5.drawrect_offset == offset) @@ -2731,20 +2734,6 @@ gen5_copy_bind_surfaces(struct sna *sna, gen5_emit_state(sna, op, offset); } -static inline bool untiled_tlb_miss(struct kgem_bo *bo) -{ - return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; -} - -static inline bool prefer_blt_copy(struct sna *sna, - struct kgem_bo *src_bo, - struct kgem_bo *dst_bo) -{ - return (sna->kgem.ring != KGEM_RENDER || - untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)); -} - static Bool gen5_render_copy_boxes(struct sna *sna, uint8_t alu, PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, @@ -2753,8 +2742,7 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu, { struct sna_composite_op tmp; - if (prefer_blt_copy(sna, src_bo, dst_bo) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, @@ -2762,12 +2750,10 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu, box, n)) return TRUE; - if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || - too_large(src->drawable.width, src->drawable.height) || - too_large(dst->drawable.width, dst->drawable.height)) { -fallback: - if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) - return FALSE; + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) { +fallback_blt: + if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) + return FALSE; return sna_blt_copy_boxes_fallback(sna, alu, src, src_bo, src_dx, src_dy, @@ -2787,7 +2773,7 @@ fallback: if (!gen5_check_format(tmp.src.pict_format)) { DBG(("%s: unsupported source format, %x, use BLT\n", __FUNCTION__, tmp.src.pict_format)); - goto fallback; + goto fallback_blt; } DBG(("%s (%d, %d)->(%d, %d) x %d\n", @@ -2798,17 +2784,66 @@ fallback: tmp.dst.pixmap = dst; tmp.dst.width = dst->drawable.width; tmp.dst.height = dst->drawable.height; + tmp.dst.x = tmp.dst.y = 0; tmp.dst.bo = dst_bo; - tmp.dst.x = dst_dx; - tmp.dst.y = dst_dy; - tmp.src.bo = src_bo; + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } + tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_NONE; - tmp.src.card_format = - gen5_get_card_format(tmp.src.pict_format); - tmp.src.width = src->drawable.width; - tmp.src.height = src->drawable.height; + tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format); + if (too_large(src->drawable.width, src->drawable.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled_dst; + } else { + tmp.src.bo = kgem_bo_reference(src_bo); + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + tmp.src.scale[0] = 1.f/src->drawable.width; + tmp.src.scale[1] = 1.f/src->drawable.height; + } tmp.is_affine = TRUE; tmp.floats_per_vertex = 3; @@ -2819,24 +2854,19 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; + goto fallback_tiled_src; } - if (kgem_bo_is_dirty(src_bo)) { - if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && - sna_blt_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - dst->drawable.bitsPerPixel, - box, n)) - return TRUE; - } + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; gen5_copy_bind_surfaces(sna, &tmp); gen5_align_vertex(sna, &tmp); - tmp.src.scale[0] = 1.f/src->drawable.width; - tmp.src.scale[1] = 1.f/src->drawable.height; do { int n_this_time = gen5_get_rectangles(sna, &tmp, n); if (n_this_time == 0) { @@ -2850,15 +2880,15 @@ fallback: box->x1 + src_dx, box->y1 + src_dy, box->x1 + dst_dx, box->y1 + dst_dy, box->x2 - box->x1, box->y2 - box->y1)); - OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy); OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]); OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); - OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy); OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); - OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy); OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]); @@ -2867,7 +2897,20 @@ fallback: } while (n); gen5_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); return TRUE; + +fallback_tiled_src: + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void @@ -2916,8 +2959,7 @@ gen5_render_copy(struct sna *sna, uint8_t alu, { DBG(("%s (alu=%d)\n", __FUNCTION__, alu)); - if (prefer_blt_copy(sna, src_bo, dst_bo) && - sna_blt_compare_depth(&src->drawable, &dst->drawable) && + if (sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, dst->drawable.bitsPerPixel, diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 9f799ef8..08f96687 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -229,6 +229,11 @@ static const struct formatinfo { #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) #define OUT_VERTEX_F(v) vertex_emit(sna, v) +static inline bool too_large(int width, int height) +{ + return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; +} + static uint32_t gen6_get_blend(int op, bool has_component_alpha, uint32_t dst_format) @@ -708,6 +713,9 @@ gen6_emit_drawing_rectangle(struct sna *sna, uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + assert(!too_large(op->dst.x, op->dst.y)); + assert(!too_large(op->dst.width, op->dst.height)); + if (sna->render_state.gen6.drawrect_limit == limit && sna->render_state.gen6.drawrect_offset == offset) return false; @@ -2061,11 +2069,6 @@ gen6_composite_solid_init(struct sna *sna, return channel->bo != NULL; } -static inline bool too_large(int width, int height) -{ - return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE; -} - static int gen6_composite_picture(struct sna *sna, PicturePtr picture, @@ -3082,13 +3085,22 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; } -static inline bool prefer_blt_copy(struct sna *sna, - struct kgem_bo *src_bo, - struct kgem_bo *dst_bo) +static bool prefer_blt_bo(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo) { - return (prefer_blt_ring(sna) || - untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)); + return (too_large(pixmap->drawable.width, pixmap->drawable.height) || + untiled_tlb_miss(bo)) && + kgem_bo_can_blt(&sna->kgem, bo); +} + +static inline bool prefer_blt_copy(struct sna *sna, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo) +{ + return (sna->kgem.ring != KGEM_RENDER || + prefer_blt_bo(sna, src, src_bo) || + prefer_blt_bo(sna, dst, dst_bo)); } static inline bool @@ -3148,7 +3160,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu, dst_bo, dst_dx, dst_dy, box, n))); - if (prefer_blt_copy(sna, src_bo, dst_bo) && + if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3160,26 +3172,15 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu, if (!(alu == GXcopy || alu == GXclear) || overlaps(src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, - box, n) || - too_large(src->drawable.width, src->drawable.height) || - too_large(dst->drawable.width, dst->drawable.height)) { -fallback: + box, n)) { +fallback_blt: if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) return false; - if (sna_blt_copy_boxes_fallback(sna, alu, + return sna_blt_copy_boxes_fallback(sna, alu, src, src_bo, src_dx, src_dy, dst, dst_bo, dst_dx, dst_dy, - box, n)) - return true; - - return false; -#if 0 - return sna_tiling_copy_boxes(sna, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n); -#endif + box, n); } if (dst->drawable.depth == src->drawable.depth) { @@ -3190,25 +3191,73 @@ fallback: tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); } if (!gen6_check_format(tmp.src.pict_format)) - goto fallback; + goto fallback_blt; tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; tmp.dst.pixmap = dst; - tmp.dst.x = tmp.dst.y = 0; tmp.dst.width = dst->drawable.width; tmp.dst.height = dst->drawable.height; tmp.dst.bo = dst_bo; - tmp.dst.x = dst_dx; - tmp.dst.y = dst_dy; + tmp.dst.x = tmp.dst.y = 0; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } - tmp.src.bo = src_bo; tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_NONE; tmp.src.card_format = gen6_get_card_format(tmp.src.pict_format); - tmp.src.width = src->drawable.width; - tmp.src.height = src->drawable.height; + if (too_large(src->drawable.width, src->drawable.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled_dst; + } else { + tmp.src.bo = kgem_bo_reference(src_bo); + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + tmp.src.scale[0] = 1.f/src->drawable.width; + tmp.src.scale[1] = 1.f/src->drawable.height; + } tmp.mask.bo = NULL; tmp.mask.filter = SAMPLER_FILTER_NEAREST; @@ -3229,10 +3278,17 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; + goto fallback_tiled_src; _kgem_set_mode(&sna->kgem, KGEM_RENDER); } + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + gen6_emit_copy_state(sna, &tmp); gen6_align_vertex(sna, &tmp); @@ -3256,9 +3312,9 @@ fallback: box->x1 + src_dx, box->y1 + src_dy, box->x1 + dst_dx, box->y1 + dst_dy, box->x2 - box->x1, box->y2 - box->y1)); - v[0] = pack_2s(box->x2, box->y2); - v[3] = pack_2s(box->x1, box->y2); - v[6] = pack_2s(box->x1, box->y1); + v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy); + v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy); + v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy); v[1] = (box->x2 + src_dx) * tmp.src.scale[0]; v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0]; @@ -3272,7 +3328,20 @@ fallback: } while (n); gen6_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); return TRUE; + +fallback_tiled_src: + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void @@ -3329,7 +3398,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu, src->drawable.width, src->drawable.height, dst->drawable.width, dst->drawable.height)); - if (prefer_blt_copy(sna, src_bo, dst_bo) && + if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index e2486c65..5385a473 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -235,6 +235,11 @@ static const struct formatinfo { #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) #define OUT_VERTEX_F(v) vertex_emit(sna, v) +static inline bool too_large(int width, int height) +{ + return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; +} + static uint32_t gen7_get_blend(int op, Bool has_component_alpha, uint32_t dst_format) @@ -817,6 +822,9 @@ gen7_emit_drawing_rectangle(struct sna *sna, uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + assert(!too_large(op->dst.x, op->dst.y)); + assert(!too_large(op->dst.width, op->dst.height)); + if (sna->render_state.gen7.drawrect_limit == limit && sna->render_state.gen7.drawrect_offset == offset) return; @@ -2124,11 +2132,6 @@ gen7_composite_solid_init(struct sna *sna, return channel->bo != NULL; } -static inline bool too_large(int width, int height) -{ - return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE; -} - static int gen7_composite_picture(struct sna *sna, PicturePtr picture, @@ -3130,13 +3133,22 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo) return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; } +static bool prefer_blt_bo(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo) +{ + return (too_large(pixmap->drawable.width, pixmap->drawable.height) || + untiled_tlb_miss(bo)) && + kgem_bo_can_blt(&sna->kgem, bo); +} + static inline bool prefer_blt_copy(struct sna *sna, - struct kgem_bo *src_bo, - struct kgem_bo *dst_bo) + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo) { return (sna->kgem.ring != KGEM_RENDER || - untiled_tlb_miss(src_bo) || - untiled_tlb_miss(dst_bo)); + prefer_blt_bo(sna, src, src_bo) || + prefer_blt_bo(sna, dst, dst_bo)); } static inline bool @@ -3196,7 +3208,7 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu, dst_bo, dst_dx, dst_dy, box, n))); - if (prefer_blt_copy(sna, src_bo, dst_bo) && + if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3208,17 +3220,15 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu, if (!(alu == GXcopy || alu == GXclear) || overlaps(src_bo, src_dx, src_dy, dst_bo, dst_dx, dst_dy, - box, n) || - too_large(src->drawable.width, src->drawable.height) || - too_large(dst->drawable.width, dst->drawable.height)) { -fallback: + box, n)) { +fallback_blt: if (!sna_blt_compare_depth(&src->drawable, &dst->drawable)) - return FALSE; + return false; return sna_blt_copy_boxes_fallback(sna, alu, - src, src_bo, src_dx, src_dy, - dst, dst_bo, dst_dx, dst_dy, - box, n); + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } if (dst->drawable.depth == src->drawable.depth) { @@ -3229,25 +3239,73 @@ fallback: tmp.src.pict_format = sna_format_for_depth(src->drawable.depth); } if (!gen7_check_format(tmp.src.pict_format)) - goto fallback; + goto fallback_blt; tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; tmp.dst.pixmap = dst; - tmp.dst.x = tmp.dst.y = 0; tmp.dst.width = dst->drawable.width; tmp.dst.height = dst->drawable.height; tmp.dst.bo = dst_bo; - tmp.dst.x = dst_dx; - tmp.dst.y = dst_dy; + tmp.dst.x = tmp.dst.y = 0; + + sna_render_composite_redirect_init(&tmp); + if (too_large(tmp.dst.width, tmp.dst.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + if (!sna_render_composite_redirect(sna, &tmp, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled; + } - tmp.src.bo = src_bo; tmp.src.filter = SAMPLER_FILTER_NEAREST; tmp.src.repeat = SAMPLER_EXTEND_NONE; tmp.src.card_format = gen7_get_card_format(tmp.src.pict_format); - tmp.src.width = src->drawable.width; - tmp.src.height = src->drawable.height; + if (too_large(src->drawable.width, src->drawable.height)) { + BoxRec extents = box[0]; + int i; + + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src, + extents.x1 + src_dx, + extents.y1 + src_dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + goto fallback_tiled_dst; + } else { + tmp.src.bo = kgem_bo_reference(src_bo); + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + tmp.src.offset[0] = tmp.src.offset[1] = 0; + tmp.src.scale[0] = 1.f/src->drawable.width; + tmp.src.scale[1] = 1.f/src->drawable.height; + } tmp.mask.bo = NULL; tmp.mask.filter = SAMPLER_FILTER_NEAREST; @@ -3259,7 +3317,7 @@ fallback: tmp.has_component_alpha = 0; tmp.need_magic_ca_pass = 0; - tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK; + tmp.u.gen7.wm_kernel = GEN6_WM_KERNEL_NOMASK; tmp.u.gen7.nr_surfaces = 2; tmp.u.gen7.nr_inputs = 1; tmp.u.gen7.ve_id = 1; @@ -3268,10 +3326,17 @@ fallback: if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) { kgem_submit(&sna->kgem); if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) - goto fallback; + goto fallback_tiled_src; _kgem_set_mode(&sna->kgem, KGEM_RENDER); } + dst_dx += tmp.dst.x; + dst_dy += tmp.dst.y; + tmp.dst.x = tmp.dst.y = 0; + + src_dx += tmp.src.offset[0]; + src_dy += tmp.src.offset[1]; + gen7_emit_copy_state(sna, &tmp); gen7_align_vertex(sna, &tmp); @@ -3295,9 +3360,9 @@ fallback: box->x1 + src_dx, box->y1 + src_dy, box->x1 + dst_dx, box->y1 + dst_dy, box->x2 - box->x1, box->y2 - box->y1)); - v[0] = pack_2s(box->x2, box->y2); - v[3] = pack_2s(box->x1, box->y2); - v[6] = pack_2s(box->x1, box->y1); + v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy); + v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy); + v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy); v[1] = (box->x2 + src_dx) * tmp.src.scale[0]; v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0]; @@ -3311,7 +3376,20 @@ fallback: } while (n); gen7_vertex_flush(sna); + sna_render_composite_redirect_done(sna, &tmp); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); return TRUE; + +fallback_tiled_src: + kgem_bo_destroy(&sna->kgem, tmp.src.bo); +fallback_tiled_dst: + if (tmp.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp.dst.bo); +fallback_tiled: + return sna_tiling_copy_boxes(sna, alu, + src, src_bo, src_dx, src_dy, + dst, dst_bo, dst_dx, dst_dy, + box, n); } static void @@ -3368,7 +3446,7 @@ gen7_render_copy(struct sna *sna, uint8_t alu, src->drawable.width, src->drawable.height, dst->drawable.width, dst->drawable.height)); - if (prefer_blt_copy(sna, src_bo, dst_bo) && + if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, diff --git a/src/sna/kgem.c b/src/sna/kgem.c index d2580e6f..757bad19 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -678,9 +678,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen) * disable dual-stream mode */ kgem->min_alignment = 64; - kgem->max_object_size = kgem->aperture_total / 2; - kgem->max_cpu_size = kgem->aperture_total / 2; - kgem->max_gpu_size = kgem->aperture_total / 2; + kgem->max_object_size = 2 * kgem->aperture_total / 3; + kgem->max_cpu_size = kgem->max_object_size; + kgem->max_gpu_size = kgem->max_object_size; if (!kgem->has_llc) kgem->max_gpu_size = MAX_CACHE_SIZE; if (gen < 40) { diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 974a7167..b6930e0c 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -379,18 +379,10 @@ static inline int kgem_buffer_size(struct kgem_bo *bo) return bo->size.bytes; } -static inline bool kgem_bo_can_blt(struct kgem *kgem, - struct kgem_bo *bo) +static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem, + struct kgem_bo *bo) { - int pitch; - - if (bo->tiling == I915_TILING_Y) { - DBG(("%s: can not blt to handle=%d, tiling=Y\n", - __FUNCTION__, bo->handle)); - return false; - } - - pitch = bo->pitch; + int pitch = bo->pitch; if (kgem->gen >= 40 && bo->tiling) pitch /= 4; if (pitch > MAXSHORT) { @@ -402,6 +394,18 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem, return true; } +static inline bool kgem_bo_can_blt(struct kgem *kgem, + struct kgem_bo *bo) +{ + if (bo->tiling == I915_TILING_Y) { + DBG(("%s: can not blt to handle=%d, tiling=Y\n", + __FUNCTION__, bo->handle)); + return false; + } + + return kgem_bo_blt_pitch_is_ok(kgem, bo); +} + static inline bool kgem_bo_is_mappable(struct kgem *kgem, struct kgem_bo *bo) { diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 9f510288..a7ea95c0 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -2140,10 +2140,10 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) { _kgem_submit(kgem); if (!kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) - return sna_tiling_copy_boxes(sna, alu, - src_bo, src_dx, src_dy, - dst_bo, dst_dx, dst_dy, - bpp, box, nbox); + return sna_tiling_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + bpp, box, nbox); _kgem_set_mode(kgem, KGEM_BLT); } @@ -2244,7 +2244,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, if (src_bo == dst_bo) { DBG(("%s: dst == src\n", __FUNCTION__)); - if (src_bo->tiling == I915_TILING_Y) { + if (src_bo->tiling == I915_TILING_Y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { struct kgem_bo *bo; DBG(("%s: src is Y-tiled\n", __FUNCTION__)); @@ -2287,7 +2288,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, dst_bo = src_bo = bo; } } else { - if (src_bo->tiling == I915_TILING_Y) { + if (src_bo->tiling == I915_TILING_Y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) { DBG(("%s: src is y-tiled\n", __FUNCTION__)); assert(src_bo == sna_pixmap(src)->gpu_bo); src_bo = sna_pixmap_change_tiling(src, I915_TILING_X); @@ -2298,7 +2300,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu, } } - if (dst_bo->tiling == I915_TILING_Y) { + if (dst_bo->tiling == I915_TILING_Y && + kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) { DBG(("%s: dst is y-tiled\n", __FUNCTION__)); assert(dst_bo == sna_pixmap(dst)->gpu_bo); dst_bo = sna_pixmap_change_tiling(dst, I915_TILING_X); diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index eb5df9d4..62a89622 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -593,7 +593,7 @@ fallback: int step; tile: - step = MIN(sna->render.max_3d_size, + step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel, 8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel); while (step * step * 4 > sna->kgem.max_upload_tile_size) step /= 2; diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index fc6e6df4..bc8b2de7 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -805,6 +805,80 @@ cleanup_tmp: return ret; } +bool +sna_render_pixmap_partial(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h) +{ + BoxRec box; + int tile_width, tile_height, tile_size; + int offset; + + DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + if (bo->pitch > sna->render.max_3d_pitch) + return false; + + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + + kgem_get_tile_size(&sna->kgem, bo->tiling, + &tile_width, &tile_height, &tile_size); + + /* Ensure we align to an even tile row */ + box.y1 = box.y1 & ~(2*tile_height - 1); + box.y2 = ALIGN(box.y2, 2*tile_height); + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + + box.x1 = box.x1 & ~(tile_width * 8 / pixmap->drawable.bitsPerPixel - 1); + box.x2 = ALIGN(box.x2, tile_width * 8 / pixmap->drawable.bitsPerPixel); + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + + w = box.x2 - box.x1; + h = box.y2 - box.y1; + DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2, w, h, + pixmap->drawable.width, pixmap->drawable.height)); + if (w <= 0 || h <= 0 || + w > sna->render.max_3d_size || + h > sna->render.max_3d_size) + return false; + + /* How many tiles across are we? */ + offset = box.x1 * pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size; + channel->bo = kgem_create_proxy(bo, + box.y1 * bo->pitch + offset, + h * bo->pitch); + if (channel->bo == NULL) + return false; + + channel->bo->pitch = bo->pitch; + + channel->offset[0] = x - box.x1; + channel->offset[1] = y - box.y1; + channel->scale[0] = 1.f/w; + channel->scale[1] = 1.f/h; + channel->width = w; + channel->height = h; + return true; +} + static int sna_render_picture_partial(struct sna *sna, PicturePtr picture, @@ -1068,13 +1142,25 @@ sna_render_picture_extract(struct sna *sna, I915_TILING_X, w, h, pixmap->drawable.bitsPerPixel), 0); - if (bo && !sna_blt_copy_boxes(sna, GXcopy, - src_bo, 0, 0, - bo, -box.x1, -box.y1, - pixmap->drawable.bitsPerPixel, - &box, 1)) { - kgem_bo_destroy(&sna->kgem, bo); - bo = NULL; + if (bo) { + PixmapRec tmp; + + tmp.drawable.width = w; + tmp.drawable.height = h; + tmp.drawable.depth = pixmap->drawable.depth; + tmp.drawable.bitsPerPixel = pixmap->drawable.bitsPerPixel; + tmp.devPrivate.ptr = NULL; + + assert(tmp.drawable.width); + assert(tmp.drawable.height); + + if (!sna->render.copy_boxes(sna, GXcopy, + pixmap, src_bo, 0, 0, + &tmp, bo, -box.x1, -box.y1, + &box, 1)) { + kgem_bo_destroy(&sna->kgem, bo); + bo = NULL; + } } } @@ -1541,7 +1627,6 @@ sna_render_composite_redirect(struct sna *sna, { struct sna_composite_redirect *t = &op->redirect; int bpp = op->dst.pixmap->drawable.bitsPerPixel; - struct sna_pixmap *priv; struct kgem_bo *bo; #if NO_REDIRECT @@ -1554,11 +1639,9 @@ sna_render_composite_redirect(struct sna *sna, if (!width || !height) return FALSE; - priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE); - if (priv == NULL) { - DBG(("%s: fallback -- no GPU bo attached\n", __FUNCTION__)); + if (width > sna->render.max_3d_pitch || + height > sna->render.max_3d_pitch) return FALSE; - } if (op->dst.bo->pitch <= sna->render.max_3d_pitch) { int tile_width, tile_height, tile_size; diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index 94c2744e..a689315e 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -507,10 +507,16 @@ Bool sna_tiling_fill_boxes(struct sna *sna, const xRenderColor *color, PixmapPtr dst, struct kgem_bo *dst_bo, const BoxRec *box, int n); + Bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, - struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - int bpp, const BoxRec *box, int nbox); + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + +Bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox); Bool sna_blt_composite(struct sna *sna, uint32_t op, @@ -589,6 +595,14 @@ sna_render_pixmap_bo(struct sna *sna, int16_t w, int16_t h, int16_t dst_x, int16_t dst_y); +bool +sna_render_pixmap_partial(struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h); + int sna_render_picture_extract(struct sna *sna, PicturePtr picture, @@ -614,6 +628,13 @@ sna_render_picture_convert(struct sna *sna, int16_t w, int16_t h, int16_t dst_x, int16_t dst_y); +inline static void sna_render_composite_redirect_init(struct sna_composite_op *op) +{ + struct sna_composite_redirect *t = &op->redirect; + t->real_bo = NULL; + t->damage = NULL; +} + Bool sna_render_composite_redirect(struct sna *sna, struct sna_composite_op *op, diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c index 702192a2..00e111ce 100644 --- a/src/sna/sna_tiling.c +++ b/src/sna/sna_tiling.c @@ -421,10 +421,10 @@ done: return ret; } -Bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, - struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, - struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, - int bpp, const BoxRec *box, int nbox) +Bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox) { RegionRec region, tile, this; struct kgem_bo *bo; @@ -516,3 +516,125 @@ done: pixman_region_fini(®ion); return ret; } + +static Bool +box_intersect(BoxPtr a, const BoxRec *b) +{ + if (a->x1 < b->x1) + a->x1 = b->x1; + if (a->x2 > b->x2) + a->x2 = b->x2; + if (a->y1 < b->y1) + a->y1 = b->y1; + if (a->y2 > b->y2) + a->y2 = b->y2; + + return a->x1 < a->x2 && a->y1 < a->y2; +} + +Bool +sna_tiling_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + BoxRec extents, tile, stack[64], *clipped, *c; + PixmapRec p; + int i, step; + Bool ret = FALSE; + + extents = box[0]; + for (i = 1; i < n; i++) { + if (extents.x1 < box[i].x1) + extents.x1 = box[i].x1; + if (extents.y1 < box[i].y1) + extents.y1 = box[i].y1; + + if (extents.x2 > box[i].x2) + extents.x2 = box[i].x2; + if (extents.y2 > box[i].y2) + extents.y2 = box[i].y2; + } + + step = sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel; + while (step * step * 4 > sna->kgem.max_upload_tile_size) + step /= 2; + + DBG(("%s: tiling copy, using %dx%d tiles\n", + __FUNCTION__, step, step)); + + if (n > ARRAY_SIZE(stack)) { + clipped = malloc(sizeof(BoxRec) * n); + if (clipped == NULL) + goto tiled_error; + } else + clipped = stack; + + p.drawable.depth = src->drawable.depth; + p.drawable.bitsPerPixel = src->drawable.bitsPerPixel; + p.devPrivate.ptr = NULL; + + for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) { + tile.y2 = tile.y1 + step; + if (tile.y2 > extents.y2) + tile.y2 = extents.y2; + + for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) { + struct kgem_bo *tmp_bo; + + tile.x2 = tile.x1 + step; + if (tile.x2 > extents.x2) + tile.x2 = extents.x2; + + p.drawable.width = tile.x2 - tile.x1; + p.drawable.height = tile.y2 - tile.y1; + + tmp_bo = kgem_create_2d(&sna->kgem, + p.drawable.width, + p.drawable.height, + p.drawable.bitsPerPixel, + I915_TILING_X, 0); + if (!tmp_bo) + goto tiled_error; + + c = clipped; + for (i = 0; i < n; i++) { + *c = box[i]; + if (!box_intersect(c, &tile)) + continue; + + DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, + c->x1, c->y1, + c->x2, c->y2, + src_dx, src_dy, + c->x1 - tile.x1, + c->y1 - tile.y1)); + c++; + } + + if (c == clipped || + (sna->render.copy_boxes(sna, GXcopy, + src, src_bo, src_dx, src_dy, + &p, tmp_bo, -tile.x1, -tile.y1, + clipped, c - clipped) && + sna->render.copy_boxes(sna, alu, + &p, tmp_bo, -tile.x1, -tile.y1, + dst, dst_bo, dst_dx, dst_dy, + clipped, c - clipped))) + i = 1; + + kgem_bo_destroy(&sna->kgem, tmp_bo); + + if (!i) + goto tiled_error; + } + } + + ret = TRUE; +tiled_error: + if (clipped != stack) + free(clipped); + + return ret; +}