diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 33b986e6..c6eceda5 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -35,6 +35,7 @@
 #include "sna_reg.h"
 #include "sna_render.h"
 #include "sna_render_inline.h"
+#include "sna_video.h"
 
 #include "gen2_render.h"
 
@@ -3123,6 +3124,273 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 	return true;
 }
 
+static void
+gen2_emit_video_state(struct sna *sna,
+		      struct sna_video *video,
+		      struct sna_video_frame *frame,
+		      PixmapPtr pixmap,
+		      struct kgem_bo *dst_bo,
+		      int width, int height,
+		      bool bilinear)
+{
+	uint32_t ms1, v, unwind;
+
+	gen2_emit_target(sna, dst_bo, width, height,
+			 sna_format_for_depth(pixmap->drawable.depth));
+
+	unwind = sna->kgem.nbatch;
+	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
+	      I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2);
+	BATCH(1 << 12);
+	BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY);
+	BATCH(S8_ENABLE_COLOR_BUFFER_WRITE);
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls1 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   3 * sizeof(uint32_t)) == 0)
+		sna->kgem.nbatch = unwind;
+	else
+		sna->render_state.gen2.ls1 = unwind;
+
+	gen2_disable_logic_op(sna);
+
+	unwind = sna->kgem.nbatch;
+	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 |
+	      LOAD_TEXTURE_BLEND_STAGE(0) | 1);
+	BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OUTPUT_WRITE_CURRENT |
+	      TB0C_OP_ARG1 | TB0C_ARG1_SEL_TEXEL0);
+	BATCH(TB0A_RESULT_SCALE_1X | TB0A_OUTPUT_WRITE_CURRENT |
+	      TB0A_OP_ARG1 | TB0A_ARG1_SEL_ONE);
+	if (memcmp(sna->kgem.batch + sna->render_state.gen2.ls2 + 1,
+		   sna->kgem.batch + unwind + 1,
+		   2 * sizeof(uint32_t)) == 0)
+		sna->kgem.nbatch = unwind;
+	else
+		sna->render_state.gen2.ls2 = unwind;
+
+	BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | LOAD_TEXTURE_MAP(0) | 4);
+	BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
+			     frame->bo,
+			     I915_GEM_DOMAIN_SAMPLER << 16,
+			     0));
+	ms1 = MAPSURF_422 | TM0S1_COLORSPACE_CONVERSION;
+	switch (frame->id) {
+	case FOURCC_YUY2:
+		ms1 |= MT_422_YCRCB_NORMAL;
+		break;
+	case FOURCC_UYVY:
+		ms1 |= MT_422_YCRCB_SWAPY;
+		break;
+	}
+	BATCH(((frame->height - 1) << TM0S1_HEIGHT_SHIFT) |
+	      ((frame->width - 1)  << TM0S1_WIDTH_SHIFT) |
+	      ms1 |
+	      gen2_sampler_tiling_bits(frame->bo->tiling));
+	BATCH((frame->pitch[0] / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D);
+	if (bilinear)
+		BATCH(FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT |
+		      FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT |
+		      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
+	else
+		BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT |
+		      FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT |
+		      MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT);
+	BATCH(0);	/* default color */
+
+	BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) |
+	      ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | TEXCOORDTYPE_CARTESIAN |
+	      ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP) |
+	      ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP));
+
+	v = _3DSTATE_VERTEX_FORMAT_2_CMD | TEXCOORDFMT_2D;
+	if (sna->render_state.gen2.vft != v) {
+		BATCH(v);
+		sna->render_state.gen2.vft = v;
+	}
+}
+
+static void
+gen2_video_get_batch(struct sna *sna, struct kgem_bo *bo)
+{
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
+
+	if (!kgem_check_batch(&sna->kgem, 120) ||
+	    !kgem_check_reloc(&sna->kgem, 4) ||
+	    !kgem_check_exec(&sna->kgem, 2)) {
+		_kgem_submit(&sna->kgem);
+		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	}
+
+	if (sna->render_state.gen2.need_invariant)
+		gen2_emit_invariant(sna);
+}
+
+static int
+gen2_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex)
+{
+	int size = floats_per_vertex * 3;
+	int rem = batch_space(sna) - 1;
+
+	if (size * want > rem)
+		want = rem / size;
+
+	return want;
+}
+
+static bool
+gen2_render_video(struct sna *sna,
+		  struct sna_video *video,
+		  struct sna_video_frame *frame,
+		  RegionPtr dstRegion,
+		  PixmapPtr pixmap)
+{
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	const BoxRec *pbox = region_rects(dstRegion);
+	int nbox = region_num_rects(dstRegion);
+	int dst_width = dstRegion->extents.x2 - dstRegion->extents.x1;
+	int dst_height = dstRegion->extents.y2 - dstRegion->extents.y1;
+	int src_width = frame->src.x2 - frame->src.x1;
+	int src_height = frame->src.y2 - frame->src.y1;
+	float src_offset_x, src_offset_y;
+	float src_scale_x, src_scale_y;
+	int pix_xoff, pix_yoff;
+	struct kgem_bo *dst_bo;
+	bool bilinear;
+	int copy = 0;
+
+	DBG(("%s: src:%dx%d (frame:%dx%d) -> dst:%dx%d\n", __FUNCTION__,
+	     src_width, src_height, frame->width, frame->height, dst_width, dst_height));
+
+	assert(priv->gpu_bo);
+	dst_bo = priv->gpu_bo;
+
+	bilinear = src_width != dst_width || src_height != dst_height;
+
+	src_scale_x = (float)src_width / dst_width / frame->width;
+	src_offset_x = (float)frame->src.x1 / frame->width - dstRegion->extents.x1 * src_scale_x;
+
+	src_scale_y = (float)src_height / dst_height / frame->height;
+	src_offset_y = (float)frame->src.y1 / frame->height - dstRegion->extents.y1 * src_scale_y;
+	DBG(("%s: src offset (%f, %f), scale (%f, %f)\n",
+	     __FUNCTION__, src_offset_x, src_offset_y, src_scale_x, src_scale_y));
+
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
+	    dst_bo->pitch > MAX_3D_PITCH) {
+		int bpp = pixmap->drawable.bitsPerPixel;
+
+		if (too_large(dst_width, dst_height))
+			return false;
+
+		dst_bo = kgem_create_2d(&sna->kgem,
+					dst_width, dst_height, bpp,
+					kgem_choose_tiling(&sna->kgem,
+							   I915_TILING_X,
+							   dst_width, dst_height, bpp),
+					0);
+		if (!dst_bo)
+			return false;
+
+		pix_xoff = -dstRegion->extents.x1;
+		pix_yoff = -dstRegion->extents.y1;
+		copy = 1;
+	} else {
+		/* Set up the offset for translating from the given region
+		 * (in screen coordinates) to the backing pixmap.
+		 */
+#ifdef COMPOSITE
+		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+		pix_xoff = 0;
+		pix_yoff = 0;
+#endif
+
+		dst_width  = pixmap->drawable.width;
+		dst_height = pixmap->drawable.height;
+	}
+
+	gen2_video_get_batch(sna, dst_bo);
+	gen2_emit_video_state(sna, video, frame, pixmap,
+			      dst_bo, dst_width, dst_height, bilinear);
+	do {
+		int nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
+		if (nbox_this_time == 0) {
+			gen2_video_get_batch(sna, dst_bo);
+			gen2_emit_video_state(sna, video, frame, pixmap,
+					      dst_bo, dst_width, dst_height, bilinear);
+			nbox_this_time = gen2_get_inline_rectangles(sna, nbox, 4);
+			assert(nbox_this_time);
+		}
+		nbox -= nbox_this_time;
+
+		BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST |
+		      ((12 * nbox_this_time) - 1));
+		do {
+			int box_x1 = pbox->x1;
+			int box_y1 = pbox->y1;
+			int box_x2 = pbox->x2;
+			int box_y2 = pbox->y2;
+
+			pbox++;
+
+			DBG(("%s: dst (%d, %d), (%d, %d) + (%d, %d); src (%f, %f), (%f, %f)\n",
+			     __FUNCTION__, box_x1, box_y1, box_x2, box_y2, pix_xoff, pix_yoff,
+			     box_x1 * src_scale_x + src_offset_x,
+			     box_y1 * src_scale_y + src_offset_y,
+			     box_x2 * src_scale_x + src_offset_x,
+			     box_y2 * src_scale_y + src_offset_y));
+
+			/* bottom right */
+			BATCH_F(box_x2 + pix_xoff);
+			BATCH_F(box_y2 + pix_yoff);
+			BATCH_F(box_x2 * src_scale_x + src_offset_x);
+			BATCH_F(box_y2 * src_scale_y + src_offset_y);
+
+			/* bottom left */
+			BATCH_F(box_x1 + pix_xoff);
+			BATCH_F(box_y2 + pix_yoff);
+			BATCH_F(box_x1 * src_scale_x + src_offset_x);
+			BATCH_F(box_y2 * src_scale_y + src_offset_y);
+
+			/* top left */
+			BATCH_F(box_x1 + pix_xoff);
+			BATCH_F(box_y1 + pix_yoff);
+			BATCH_F(box_x1 * src_scale_x + src_offset_x);
+			BATCH_F(box_y1 * src_scale_y + src_offset_y);
+		} while (--nbox_this_time);
+	} while (nbox);
+
+	if (copy) {
+#ifdef COMPOSITE
+		pix_xoff = -pixmap->screen_x + pixmap->drawable.x;
+		pix_yoff = -pixmap->screen_y + pixmap->drawable.y;
+#else
+		pix_xoff = 0;
+		pix_yoff = 0;
+#endif
+		sna_blt_copy_boxes(sna, GXcopy,
+				   dst_bo, -dstRegion->extents.x1, -dstRegion->extents.y1,
+				   priv->gpu_bo, pix_xoff, pix_yoff,
+				   pixmap->drawable.bitsPerPixel,
+				   region_rects(dstRegion),
+				   region_num_rects(dstRegion));
+
+		kgem_bo_destroy(&sna->kgem, dst_bo);
+	}
+
+	if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
+		if ((pix_xoff | pix_yoff) == 0) {
+			sna_damage_add(&priv->gpu_damage, dstRegion);
+		} else {
+			sna_damage_add_boxes(&priv->gpu_damage,
+					     region_rects(dstRegion),
+					     region_num_rects(dstRegion),
+					     pix_xoff, pix_yoff);
+		}
+	}
+
+	return true;
+}
+
 static void
 gen2_render_copy_setup_source(struct sna_composite_channel *channel,
 			      const DrawableRec *draw,
@@ -3537,7 +3805,7 @@ const char *gen2_render_init(struct sna *sna, const char *backend)
 	render->copy = gen2_render_copy;
 	render->copy_boxes = gen2_render_copy_boxes;
 
-	/* XXX YUV color space conversion for video? */
+	render->video = gen2_render_video;
 
 	render->reset = gen2_render_reset;
 	render->flush = gen2_render_flush;
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index f5b7e98d..3cce5cf1 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -48,7 +48,12 @@ static const XvAttributeRec Attributes[] = {
 	//{XvSettable | XvGettable, 0, 255, (char *)"XV_CONTRAST"},
 };
 
-static const XvImageRec Images[] = {
+static const XvImageRec gen2_Images[] = {
+	XVIMAGE_YUY2,
+	XVIMAGE_UYVY,
+};
+
+static const XvImageRec gen3_Images[] = {
 	XVIMAGE_YUY2,
 	XVIMAGE_YV12,
 	XVIMAGE_I420,
@@ -366,8 +371,13 @@ void sna_video_textured_setup(struct sna *sna, ScreenPtr screen)
 						 ARRAY_SIZE(Formats));
 	adaptor->nAttributes = ARRAY_SIZE(Attributes);
 	adaptor->pAttributes = (XvAttributeRec *)Attributes;
-	adaptor->nImages = ARRAY_SIZE(Images);
-	adaptor->pImages = (XvImageRec *)Images;
+	if (sna->kgem.gen < 030) {
+		adaptor->nImages = ARRAY_SIZE(gen2_Images);
+		adaptor->pImages = (XvImageRec *)gen2_Images;
+	} else {
+		adaptor->nImages = ARRAY_SIZE(gen3_Images);
+		adaptor->pImages = (XvImageRec *)gen3_Images;
+	}
 #if XORG_XV_VERSION < 2
 	adaptor->ddAllocatePort = sna_xv_alloc_port;
 	adaptor->ddFreePort = sna_xv_free_port;