From 5c9cde37e769287fb7bf4e08c3600a33c2e92dce Mon Sep 17 00:00:00 2001 From: Hong Liu Date: Fri, 11 Apr 2008 09:54:34 +0800 Subject: [PATCH 01/29] Bug #14935: Fix i9xx reference clock for spread spectrum. --- src/i830_display.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/i830_display.c b/src/i830_display.c index 6ac9f9e7..1122721a 100644 --- a/src/i830_display.c +++ b/src/i830_display.c @@ -1718,8 +1718,10 @@ i830_crtc_clock_get(ScrnInfoPtr pScrn, xf86CrtcPtr crtc) return 0; } - /* XXX: Handle the 100Mhz refclk */ - i9xx_clock(96000, &clock); + if ((dpll & PLL_REF_INPUT_MASK) == PLLB_REF_INPUT_SPREADSPECTRUMIN) + i9xx_clock(100000, &clock); + else + i9xx_clock(96000, &clock); } else { Bool is_lvds = (pipe == 1) && (INREG(LVDS) & LVDS_PORT_EN); From f47486fab3dffcbb03e7ad89f777abba1e887299 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 11 Apr 2008 10:12:40 +0800 Subject: [PATCH 02/29] remove '#line NUM ...' in macro process intel-gen4asm doesn't allow '#' line --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index 91f59954..dbf99796 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -186,7 +186,7 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g4a.g4b: - m4 -s $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m + m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m $(INTEL_G4B): $(INTEL_G4I) From da85d1b822dcb31698e9c5ab85a7bb27ad745eee Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 11 Apr 2008 11:22:29 -0700 Subject: [PATCH 03/29] Revert "remove '#line NUM ...' in macro process" This reverts commit f47486fab3dffcbb03e7ad89f777abba1e887299. Sorry for failing to push the #line processing changes to intel-gen4asm; those are now pushed, so this change should no longer be necessary. Having correct file name and line numbers in the error messages is awfully nice though. --- src/Makefile.am | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.am b/src/Makefile.am index dbf99796..91f59954 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -186,7 +186,7 @@ if HAVE_GEN4ASM SUFFIXES = .g4a .g4b .g4a.g4b: - m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m + m4 -s $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && rm $*.g4m $(INTEL_G4B): $(INTEL_G4I) From 2293a3677d1dcf294de6a8712bf0d2f65b50dcc3 Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Mon, 12 Nov 2007 16:09:00 -0800 Subject: [PATCH 04/29] Enumerate all possible src,mask sampler state pairs This will eventually allow for the elimination of sampler state updates while compositing---and initializing everything in the initialization function. (cherry picked from commit d0874697be8086cd64740c24698df8cd4d31c76f) --- src/i965_render.c | 214 ++++++++++++++++++++++++++++------------------ 1 file changed, 131 insertions(+), 83 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 1a3237bb..672c4338 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -274,8 +274,6 @@ static int urb_cs_start, urb_cs_size; static struct brw_surface_state *dest_surf_state, dest_surf_state_local; static struct brw_surface_state *src_surf_state, src_surf_state_local; static struct brw_surface_state *mask_surf_state, mask_surf_state_local; -static struct brw_sampler_state *src_sampler_state, src_sampler_state_local; -static struct brw_sampler_state *mask_sampler_state, mask_sampler_state_local; static struct brw_vs_unit_state *vs_state, vs_state_local; static struct brw_sf_unit_state *sf_state, sf_state_local; @@ -285,7 +283,7 @@ static uint32_t *binding_table; static int binding_table_entries; static int dest_surf_offset, src_surf_offset, mask_surf_offset; -static int src_sampler_offset, mask_sampler_offset,vs_offset; +static int vs_offset; static int sf_offset, wm_offset, cc_offset, vb_offset; static int wm_scratch_offset; static int binding_table_offset; @@ -429,6 +427,18 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = { #define PAD64_MULTI(previous, idx, factor) char previous ## _pad ## idx [(64 - (sizeof(struct previous) * (factor)) % 64) % 64] #define PAD64(previous, idx) PAD64_MULTI(previous, idx, 1) +typedef enum { + SAMPLER_STATE_FILTER_NEAREST, + SAMPLER_STATE_FILTER_BILINEAR, + SAMPLER_STATE_FILTER_COUNT +} sampler_state_filter_t; + +typedef enum { + SAMPLER_STATE_EXTEND_NONE, + SAMPLER_STATE_EXTEND_REPEAT, + SAMPLER_STATE_EXTEND_COUNT +} sampler_state_extend_t; + typedef struct _brw_cc_unit_state_padded { struct brw_cc_unit_state state; char pad[64 - sizeof (struct brw_cc_unit_state)]; @@ -457,6 +467,12 @@ typedef struct _gen4_state { KERNEL_DECL (ps_kernel_masknoca_affine); KERNEL_DECL (ps_kernel_masknoca_projective); + /* Index by [src_filter][src_extend][mask_filter][mask_extend] */ + struct brw_sampler_state sampler_state[SAMPLER_STATE_FILTER_COUNT] + [SAMPLER_STATE_EXTEND_COUNT] + [SAMPLER_STATE_FILTER_COUNT] + [SAMPLER_STATE_EXTEND_COUNT][2]; + struct brw_sampler_default_color sampler_default_color; PAD64 (brw_sampler_default_color, 0); @@ -469,6 +485,50 @@ typedef struct _gen4_state { uint8_t other_state[65536]; } gen4_state_t; +static void +sampler_state_init (struct brw_sampler_state *sampler_state, + sampler_state_filter_t filter, + sampler_state_extend_t extend, + int default_color_offset) +{ + /* PS kernel use this sampler */ + memset(sampler_state, 0, sizeof(*sampler_state)); + + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + sampler_state->ss0.default_color_mode = 0; /* GL mode */ + + switch(filter) { + default: + case SAMPLER_STATE_FILTER_NEAREST: + sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; + break; + case SAMPLER_STATE_FILTER_BILINEAR: + sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_STATE_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_STATE_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; + break; + } + + assert((default_color_offset & 31) == 0); + sampler_state->ss2.default_color_pointer = default_color_offset >> 5; + + sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ +} + static void cc_state_init (struct brw_cc_unit_state *cc_state, int src_blend, @@ -510,7 +570,7 @@ cc_state_init (struct brw_cc_unit_state *cc_state, static void gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) { - int i, j; + int i, j, k, l; #define KERNEL_COPY(kernel) \ memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static)) @@ -537,6 +597,24 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) state->cc_viewport.min_depth = -1.e35; state->cc_viewport.max_depth = 1.e35; + for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { + for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { + for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { + for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { + sampler_state_init (&state->sampler_state[i][j][k][l][0], + i, j, + offsetof (gen4_state_t, + sampler_default_color)); + sampler_state_init (&state->sampler_state[i][j][k][l][1], + k, l, + offsetof (gen4_state_t, + sampler_default_color)); + } + } + } + } + + for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { cc_state_init (&state->cc_state[i][j].state, i, j, @@ -562,6 +640,32 @@ i965_get_card_format(PicturePtr pPict) return i965_tex_formats[i].card_fmt; } +static sampler_state_filter_t +sampler_state_filter_from_picture (int filter) +{ + switch (filter) { + case PictFilterNearest: + return SAMPLER_STATE_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_STATE_FILTER_BILINEAR; + default: + return -1; + } +} + +static sampler_state_extend_t +sampler_state_extend_from_picture (int repeat) +{ + switch (repeat) { + case RepeatNone: + return SAMPLER_STATE_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_STATE_EXTEND_REPEAT; + default: + return -1; + } +} + Bool i965_prepare_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, @@ -574,6 +678,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, mask_tiled = 0; uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0, dst_tiled = 0; + sampler_state_filter_t src_filter, mask_filter; + sampler_state_extend_t src_extend, mask_extend; Bool is_affine_src, is_affine_mask, is_affine; IntelEmitInvarientState(pScrn); @@ -637,14 +743,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, wm_scratch_offset = ALIGN(next_offset, 1024); next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS; - /* for texture sampler */ - src_sampler_offset = ALIGN(next_offset, 32); - next_offset = src_sampler_offset + sizeof(*src_sampler_state); - - if (pMask) { - mask_sampler_offset = ALIGN(next_offset, 32); - next_offset = mask_sampler_offset + sizeof(*mask_sampler_state); - } /* Align VB to native size of elements, for safety */ vb_offset = ALIGN(next_offset, 32); next_offset = vb_offset + vb_size; @@ -809,79 +907,24 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, if (pMask) binding_table[2] = state_base_offset + mask_surf_offset; - /* PS kernel use this sampler */ - src_sampler_state = &src_sampler_state_local; - memset(src_sampler_state, 0, sizeof(*src_sampler_state)); - src_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ - switch(pSrcPicture->filter) { - case PictFilterNearest: - src_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; - src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PictFilterBilinear: - src_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; - src_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - I830FALLBACK("Bad filter 0x%x\n", pSrcPicture->filter); - } - src_sampler_state->ss0.default_color_mode = 0; /* GL mode */ + src_filter = sampler_state_filter_from_picture (pSrcPicture->filter); + if (src_filter < 0) + I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter); + src_extend = sampler_state_extend_from_picture (pSrcPicture->repeat); + if (src_extend < 0) + I830FALLBACK ("Bad src repeat 0x%x\n", pSrcPicture->repeat); - if (!pSrcPicture->repeat) { - src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; - src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; - src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP_BORDER; - src_sampler_state->ss2.default_color_pointer = - (state_base_offset + - offsetof(gen4_state_t, sampler_default_color)) >> 5; + if (pMaskPicture) { + mask_filter = sampler_state_filter_from_picture (pMaskPicture->filter); + if (mask_filter < 0) + I830FALLBACK ("Bad mask filter 0x%x\n", pMaskPicture->filter); + mask_extend = sampler_state_extend_from_picture (pMaskPicture->repeat); + if (mask_extend < 0) + I830FALLBACK ("Bad mask repeat 0x%x\n", pMaskPicture->repeat); } else { - src_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; - src_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; - src_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; - } - src_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ - - src_sampler_state = (void *)(state_base + src_sampler_offset); - memcpy (src_sampler_state, &src_sampler_state_local, sizeof (src_sampler_state_local)); - - if (pMask) { - mask_sampler_state = &mask_sampler_state_local; - memset(mask_sampler_state, 0, sizeof(*mask_sampler_state)); - mask_sampler_state->ss0.lod_preclamp = 1; /* GL mode */ - switch(pMaskPicture->filter) { - case PictFilterNearest: - mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_NEAREST; - mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_NEAREST; - break; - case PictFilterBilinear: - mask_sampler_state->ss0.min_filter = BRW_MAPFILTER_LINEAR; - mask_sampler_state->ss0.mag_filter = BRW_MAPFILTER_LINEAR; - break; - default: - I830FALLBACK("Bad filter 0x%x\n", pMaskPicture->filter); - } - - mask_sampler_state->ss0.default_color_mode = 0; /* GL mode */ - if (!pMaskPicture->repeat) { - mask_sampler_state->ss1.r_wrap_mode = - BRW_TEXCOORDMODE_CLAMP_BORDER; - mask_sampler_state->ss1.s_wrap_mode = - BRW_TEXCOORDMODE_CLAMP_BORDER; - mask_sampler_state->ss1.t_wrap_mode = - BRW_TEXCOORDMODE_CLAMP_BORDER; - mask_sampler_state->ss2.default_color_pointer = - (state_base_offset + - offsetof(gen4_state_t, sampler_default_color)) >> 5; - } else { - mask_sampler_state->ss1.r_wrap_mode = BRW_TEXCOORDMODE_WRAP; - mask_sampler_state->ss1.s_wrap_mode = BRW_TEXCOORDMODE_WRAP; - mask_sampler_state->ss1.t_wrap_mode = BRW_TEXCOORDMODE_WRAP; - } - mask_sampler_state->ss3.chroma_key_enable = 0; /* disable chromakey */ - - mask_sampler_state = (void *)(state_base + mask_sampler_offset); - memcpy (mask_sampler_state, &mask_sampler_state_local, sizeof (mask_sampler_state_local)); + mask_filter = SAMPLER_STATE_FILTER_NEAREST; + mask_extend = SAMPLER_STATE_EXTEND_NONE; } /* Set up the vertex shader to be disabled (passthrough) */ @@ -1021,7 +1064,12 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, wm_state->wm4.stats_enable = 1; /* statistic */ wm_state->wm4.sampler_state_pointer = (state_base_offset + - src_sampler_offset) >> 5; + offsetof(gen4_state_t, + sampler_state + [src_filter] + [src_extend] + [mask_filter] + [mask_extend][0])) >> 5; wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ wm_state->wm5.max_threads = PS_MAX_THREADS - 1; wm_state->wm5.transposed_urb_read = 0; From d3138d9ff0f821fb3adbd27684b0c22a8a910c3e Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Tue, 13 Nov 2007 12:37:13 -0800 Subject: [PATCH 05/29] Enumerate all possible wm_state objects We have a collection of wm_state objects for each ps kernel, (one for each combination of src and mask extend and repeat values). Thanks to Dave Airlie for noticing an errant write through a wild wm_state pointer in an early version of this commit. (cherry picked from 7763706a93d3021907273f9b330750ba110e2fc3 commit) This cherry-pick required more reformatting than most, due to the projective texturing merge. --- src/i965_render.c | 252 +++++++++++++++++++++++++--------------------- 1 file changed, 139 insertions(+), 113 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 672c4338..59af52fb 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -277,15 +277,13 @@ static struct brw_surface_state *mask_surf_state, mask_surf_state_local; static struct brw_vs_unit_state *vs_state, vs_state_local; static struct brw_sf_unit_state *sf_state, sf_state_local; -static struct brw_wm_unit_state *wm_state, wm_state_local; static uint32_t *binding_table; static int binding_table_entries; static int dest_surf_offset, src_surf_offset, mask_surf_offset; static int vs_offset; -static int sf_offset, wm_offset, cc_offset, vb_offset; -static int wm_scratch_offset; +static int sf_offset, cc_offset, vb_offset; static int binding_table_offset; static int next_offset, total_state_size; static char *state_base; @@ -420,6 +418,12 @@ static const uint32_t ps_kernel_masknoca_projective_static [][4] = { #define KERNEL_DECL(template) \ uint32_t template [((sizeof (template ## _static) + 63) & ~63) / 16][4]; +#define WM_STATE_DECL(kernel) \ + struct brw_wm_unit_state wm_state_ ## kernel[SAMPLER_STATE_FILTER_COUNT] \ + [SAMPLER_STATE_EXTEND_COUNT] \ + [SAMPLER_STATE_FILTER_COUNT] \ + [SAMPLER_STATE_EXTEND_COUNT] + /* Many of the fields in the state structure must be aligned to a * 64-byte boundary, (or a 32-byte boundary, but 64 is good enough for * those too). @@ -467,6 +471,17 @@ typedef struct _gen4_state { KERNEL_DECL (ps_kernel_masknoca_affine); KERNEL_DECL (ps_kernel_masknoca_projective); + uint8_t wm_scratch[128 * PS_MAX_THREADS]; + + WM_STATE_DECL (nomask_affine); + WM_STATE_DECL (nomask_projective); + WM_STATE_DECL (maskca_affine); + WM_STATE_DECL (maskca_projective); + WM_STATE_DECL (maskca_srcalpha_affine); + WM_STATE_DECL (maskca_srcalpha_projective); + WM_STATE_DECL (masknoca_affine); + WM_STATE_DECL (masknoca_projective); + /* Index by [src_filter][src_extend][mask_filter][mask_extend] */ struct brw_sampler_state sampler_state[SAMPLER_STATE_FILTER_COUNT] [SAMPLER_STATE_EXTEND_COUNT] @@ -564,6 +579,55 @@ cc_state_init (struct brw_cc_unit_state *cc_state, cc_state->cc6.dest_blend_factor = dst_blend; } +static void +wm_state_init (struct brw_wm_unit_state *wm_state, + Bool has_mask, + int scratch_offset, + int kernel_offset, + int sampler_state_offset) +{ + memset(wm_state, 0, sizeof (*wm_state)); + wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + wm_state->thread1.single_program_flow = 0; + + assert((scratch_offset & 1023) == 0); + wm_state->thread2.scratch_space_base_pointer = scratch_offset >> 10; + + wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG; + wm_state->thread3.const_urb_entry_read_length = 0; + wm_state->thread3.const_urb_entry_read_offset = 0; + + wm_state->thread3.urb_entry_read_offset = 0; + /* wm kernel use urb from 3, see wm_program in compiler module */ + wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + + wm_state->wm4.stats_enable = 1; /* statistic */ + assert((sampler_state_offset & 31) == 0); + wm_state->wm4.sampler_state_pointer = sampler_state_offset >> 5; + wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ + wm_state->wm5.max_threads = PS_MAX_THREADS - 1; + wm_state->wm5.transposed_urb_read = 0; + wm_state->wm5.thread_dispatch_enable = 1; + /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + * start point + */ + wm_state->wm5.enable_16_pix = 1; + wm_state->wm5.enable_8_pix = 0; + wm_state->wm5.early_depth_test = 1; + + assert((kernel_offset & 63) == 0); + wm_state->thread0.kernel_start_pointer = kernel_offset >> 6; + + /* Each pair of attributes (src/mask coords) is two URB entries */ + if (has_mask) { + wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + wm_state->thread3.urb_entry_read_length = 4; + } else { + wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + wm_state->thread3.urb_entry_read_length = 2; + } +} + /** * Called at EnterVT to fill in our state buffer with any static information. */ @@ -586,6 +650,7 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) KERNEL_COPY (ps_kernel_maskca_srcalpha_projective); KERNEL_COPY (ps_kernel_masknoca_affine); KERNEL_COPY (ps_kernel_masknoca_projective); +#undef KERNEL_COPY memset(&state->sampler_default_color, 0, sizeof(state->sampler_default_color)); @@ -623,7 +688,34 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) } } -#undef KERNEL_COPY +#define SETUP_WM_STATE(kernel, has_mask) \ + wm_state_init(&state->wm_state_ ## kernel [i][j][k][l], \ + has_mask, \ + state_base_offset + offsetof(gen4_state_t, \ + wm_scratch), \ + state_base_offset + offsetof(gen4_state_t, \ + ps_kernel_ ## kernel), \ + state_base_offset + offsetof(gen4_state_t, \ + sampler_state[i][j][k][l])); + + + for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { + for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { + for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { + for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { + SETUP_WM_STATE (nomask_affine, FALSE); + SETUP_WM_STATE (nomask_projective, FALSE); + SETUP_WM_STATE (maskca_affine, TRUE); + SETUP_WM_STATE (maskca_projective, TRUE); + SETUP_WM_STATE (maskca_srcalpha_affine, TRUE); + SETUP_WM_STATE (maskca_srcalpha_projective, TRUE); + SETUP_WM_STATE (masknoca_affine, TRUE); + SETUP_WM_STATE (masknoca_projective, TRUE); + } + } + } + } +#undef SETUP_WM_STATE } static uint32_t @@ -737,12 +829,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, sf_offset = ALIGN(next_offset, 32); next_offset = sf_offset + sizeof(*sf_state); - wm_offset = ALIGN(next_offset, 32); - next_offset = wm_offset + sizeof(*wm_state); - - wm_scratch_offset = ALIGN(next_offset, 1024); - next_offset = wm_scratch_offset + PS_SCRATCH_SPACE * PS_MAX_THREADS; - /* Align VB to native size of elements, for safety */ vb_offset = ALIGN(next_offset, 32); next_offset = vb_offset + vb_size; @@ -982,108 +1068,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, sf_state = (void *)(state_base + sf_offset); memcpy (sf_state, &sf_state_local, sizeof (sf_state_local)); - /* Set up the PS kernel (dispatched by WM) */ - wm_state = &wm_state_local; - memset(wm_state, 0, sizeof (*wm_state)); - if (pMask) { - if (pMaskPicture->componentAlpha && - PICT_FORMAT_RGB(pMaskPicture->format)) - { - if (i965_blend_op[op].src_alpha) { - if (is_affine) { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_maskca_srcalpha_affine)) >> 6; - } else { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_maskca_srcalpha_projective)) >> 6; - } - } else { - if (is_affine) { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_maskca_affine)) >> 6; - } else { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_maskca_projective)) >> 6; - } - } - } else { - if (is_affine) { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_masknoca_affine)) >> 6; - } else { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_masknoca_projective)) >> 6; - } - } - } else { - if (is_affine) { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_nomask_affine)) >> 6; - } else { - wm_state->thread0.kernel_start_pointer = - (state_base_offset + - offsetof(gen4_state_t, - ps_kernel_nomask_projective)) >> 6; - } - } - - wm_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(PS_KERNEL_NUM_GRF); - wm_state->thread1.single_program_flow = 0; - if (!pMask) - wm_state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ - else - wm_state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ - - wm_state->thread2.scratch_space_base_pointer = (state_base_offset + - wm_scratch_offset)>>10; - wm_state->thread2.per_thread_scratch_space = PS_SCRATCH_SPACE_LOG; - wm_state->thread3.const_urb_entry_read_length = 0; - wm_state->thread3.const_urb_entry_read_offset = 0; - /* Each pair of attributes (src/mask coords) is one URB entry */ - if (pMask) - wm_state->thread3.urb_entry_read_length = 4; - else - wm_state->thread3.urb_entry_read_length = 2; - wm_state->thread3.urb_entry_read_offset = 0; - /* wm kernel use urb from 3, see wm_program in compiler module */ - wm_state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ - - wm_state->wm4.stats_enable = 1; /* statistic */ - wm_state->wm4.sampler_state_pointer = (state_base_offset + - offsetof(gen4_state_t, - sampler_state - [src_filter] - [src_extend] - [mask_filter] - [mask_extend][0])) >> 5; - wm_state->wm4.sampler_count = 1; /* 1-4 samplers used */ - wm_state->wm5.max_threads = PS_MAX_THREADS - 1; - wm_state->wm5.transposed_urb_read = 0; - wm_state->wm5.thread_dispatch_enable = 1; - /* just use 16-pixel dispatch (4 subspans), don't need to change kernel - * start point - */ - wm_state->wm5.enable_16_pix = 1; - wm_state->wm5.enable_8_pix = 0; - wm_state->wm5.early_depth_test = 1; - - wm_state = (void *)(state_base + wm_offset); - memcpy (wm_state, &wm_state_local, sizeof (wm_state_local)); - /* Begin the long sequence of commands needed to set up the 3D * rendering pipe */ @@ -1166,7 +1150,49 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ OUT_BATCH(state_base_offset + sf_offset); /* 32 byte aligned */ - OUT_BATCH(state_base_offset + wm_offset); /* 32 byte aligned */ + + /* Shorthand for long array lookup */ +#define OUT_WM_KERNEL(kernel) do { \ + uint32_t offset = state_base_offset + \ + offsetof(gen4_state_t, \ + wm_state_ ## kernel \ + [src_filter] \ + [src_extend] \ + [mask_filter] \ + [mask_extend]); \ + assert((offset & 31) == 0); \ + OUT_BATCH(offset); \ +} while (0) + + if (pMask) { + if (pMaskPicture->componentAlpha && + PICT_FORMAT_RGB(pMaskPicture->format)) + { + if (i965_blend_op[op].src_alpha) { + if (is_affine) + OUT_WM_KERNEL(maskca_srcalpha_affine); + else + OUT_WM_KERNEL(maskca_srcalpha_projective); + } else { + if (is_affine) + OUT_WM_KERNEL(maskca_affine); + else + OUT_WM_KERNEL(maskca_projective); + } + } else { + if (is_affine) + OUT_WM_KERNEL(masknoca_affine); + else + OUT_WM_KERNEL(masknoca_projective); + } + } else { + if (is_affine) + OUT_WM_KERNEL(nomask_affine); + else + OUT_WM_KERNEL(nomask_projective); + } +#undef OUT_WM_KERNEL + /* 64 byte aligned */ OUT_BATCH(state_base_offset + offsetof(gen4_state_t, cc_state[src_blend][dst_blend])); From 03836067b77606c134c71b30c7078d09d77c95fa Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 11 Apr 2008 12:17:28 -0700 Subject: [PATCH 06/29] Fix the offset to sampler default color, and a compiler warning. --- src/i965_render.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/i965_render.c b/src/i965_render.c index 59af52fb..efbfc49b 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -283,7 +283,7 @@ static int binding_table_entries; static int dest_surf_offset, src_surf_offset, mask_surf_offset; static int vs_offset; -static int sf_offset, cc_offset, vb_offset; +static int sf_offset, vb_offset; static int binding_table_offset; static int next_offset, total_state_size; static char *state_base; @@ -668,10 +668,12 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { sampler_state_init (&state->sampler_state[i][j][k][l][0], i, j, + state_base_offset + offsetof (gen4_state_t, sampler_default_color)); sampler_state_init (&state->sampler_state[i][j][k][l][1], k, l, + state_base_offset + offsetof (gen4_state_t, sampler_default_color)); } From 91d3e19786e22061f4b617cf39a3cd713139a2d9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 12 Apr 2008 11:00:51 -0700 Subject: [PATCH 07/29] Use compressed instructions for yuv conversion --- src/exa_wm_yuv_rgb.g4a | 44 ++++++++++++++---------------------------- src/exa_wm_yuv_rgb.g4b | 24 ++++++++--------------- 2 files changed, 22 insertions(+), 46 deletions(-) diff --git a/src/exa_wm_yuv_rgb.g4a b/src/exa_wm_yuv_rgb.g4a index 327a8083..4fb2576a 100644 --- a/src/exa_wm_yuv_rgb.g4a +++ b/src/exa_wm_yuv_rgb.g4a @@ -30,27 +30,27 @@ include(`exa_wm.g4i') define(`YCbCr_base', `src_sample_base') -define(`Cr', `src_sample_r_01') +define(`Cr', `src_sample_r') define(`Cr_01', `src_sample_r_01') define(`Cr_23', `src_sample_r_23') -define(`Y', `src_sample_g_01') +define(`Y', `src_sample_g') define(`Y_01', `src_sample_g_01') define(`Y_23', `src_sample_g_23') -define(`Cb', `src_sample_b_01') +define(`Cb', `src_sample_b') define(`Cb_01', `src_sample_b_01') define(`Cb_23', `src_sample_b_23') -define(`Crn', `mask_sample_r_01') +define(`Crn', `mask_sample_r') define(`Crn_01', `mask_sample_r_01') define(`Crn_23', `mask_sample_r_23') -define(`Yn', `mask_sample_g_01') +define(`Yn', `mask_sample_g') define(`Yn_01', `mask_sample_g_01') define(`Yn_23', `mask_sample_g_23') -define(`Cbn', `mask_sample_b_01') +define(`Cbn', `mask_sample_b') define(`Cbn_01', `mask_sample_b_01') define(`Cbn_23', `mask_sample_b_23') @@ -58,14 +58,6 @@ define(`Cbn_23', `mask_sample_b_23') * R = Clamp ( 1.164(Y-16/255) + 1.596(Cr-128/255), 0, 1) * G = Clamp ( 1.164(Y-16/255) - 0.813(Cr-128/255) - 0.392(Cb-128/255), 0, 1) * B = Clamp ( 1.164(Y-16/255) + 2.017(Cb-128/255), 0, 1) - * - * Y is g14, g15. - * Cr is g12, g13. - * Cb is g16, g17. - * - * R is g2, g6. - * G is g3, g7. - * B is g4, g8. */ /* Normalize Y, Cb and Cr: @@ -84,31 +76,23 @@ add (16) Cbn<1>F Cb<8,8,1>F -0.501961F { compr align1 }; /* * R = Y + Cr * 1.596 */ -mul (8) null Crn_01<8,8,1>F 1.596F { align1 }; -mac.sat (8) src_sample_r_01<1>F Yn_01<8,8,1>F 1F { align1 }; -mul (8) null Crn_23<8,8,1>F 1.596F { align1 }; -mac.sat (8) src_sample_r_23<1>F Yn_23<8,8,1>F 1F { align1 }; +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_r<1>F Crn<8,8,1>F 1.596F { compr align1 }; /* * G = Crn * -0.813 + Cbn * -0.392 + Y */ -mul (8) null Crn_01<8,8,1>F -0.813F { align1 }; -mac (8) null Cbn_01<8,8,1>F -0.392F { align1 }; -mac.sat (8) src_sample_g_01<1>F Yn_01<8,8,1>F 1F { align1 }; -mul (8) null Crn_23<8,8,1>F -0.813F { align1 }; -mac (8) null Cbn_23<8,8,1>F -0.392F { align1 }; -mac.sat (8) src_sample_g_23<1>F Yn_23<8,8,1>F 1F { align1 }; +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac (16) acc0<1>F Crn<8,8,1>F -0.813F { compr align1 }; +mac.sat(16) src_sample_g<1>F Cbn<8,8,1>F -0.392F { compr align1 }; /* * B = Cbn * 2.017 + Y */ -mul (8) null Cbn_01<8,8,1>F 2.017F { align1 }; -mac.sat (8) src_sample_b_01<1>F Yn_01<8,8,1>F 1F { align1 }; -mul (8) null Cbn_23<8,8,1>F 2.017F { align1 }; -mac.sat (8) src_sample_b_23<1>F Yn_23<8,8,1>F 1F { align1 }; +mov (16) acc0<1>F Yn<8,8,1>F { compr align1 }; +mac.sat(16) src_sample_b<1>F Cbn<8,8,1>F 2.017F { compr align1 }; /* * A = 1.0 */ -mov (8) src_sample_a_01<1>F 1.0F { align1 }; -mov (8) src_sample_a_23<1>F 1.0F { align1 }; +mov (16) src_sample_a<1>F 1.0F { compr align1 }; diff --git a/src/exa_wm_yuv_rgb.g4b b/src/exa_wm_yuv_rgb.g4b index be72e549..01f6e2b2 100644 --- a/src/exa_wm_yuv_rgb.g4b +++ b/src/exa_wm_yuv_rgb.g4b @@ -2,19 +2,11 @@ { 0x00802041, 0x23007fbd, 0x008d0300, 0x3f94fdf4 }, { 0x00802040, 0x22c07fbd, 0x008d01c0, 0xbf008084 }, { 0x00802040, 0x23407fbd, 0x008d0240, 0xbf008084 }, - { 0x00600041, 0x20007fbc, 0x008d02c0, 0x3fcc49ba }, - { 0x80600048, 0x21c07fbd, 0x008d0300, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d02e0, 0x3fcc49ba }, - { 0x80600048, 0x21e07fbd, 0x008d0320, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d02c0, 0xbf5020c5 }, - { 0x00600048, 0x20007fbc, 0x008d0340, 0xbec8b439 }, - { 0x80600048, 0x22007fbd, 0x008d0300, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d02e0, 0xbf5020c5 }, - { 0x00600048, 0x20007fbc, 0x008d0360, 0xbec8b439 }, - { 0x80600048, 0x22207fbd, 0x008d0320, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d0340, 0x40011687 }, - { 0x80600048, 0x22407fbd, 0x008d0300, 0x3f800000 }, - { 0x00600041, 0x20007fbc, 0x008d0360, 0x40011687 }, - { 0x80600048, 0x22607fbd, 0x008d0320, 0x3f800000 }, - { 0x00600001, 0x228003fd, 0x00000000, 0x3f800000 }, - { 0x00600001, 0x22a003fd, 0x00000000, 0x3f800000 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x21c07fbd, 0x008d02c0, 0x3fcc49ba }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x00802048, 0x24007fbc, 0x008d02c0, 0xbf5020c5 }, + { 0x80802048, 0x22007fbd, 0x008d0340, 0xbec8b439 }, + { 0x00802001, 0x240003bc, 0x008d0300, 0x00000000 }, + { 0x80802048, 0x22407fbd, 0x008d0340, 0x40011687 }, + { 0x00802001, 0x228003fd, 0x00000000, 0x3f800000 }, From 0fec42b64ee529eb7ec15febdaa5e4986ec1ab17 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 12 Apr 2008 11:01:14 -0700 Subject: [PATCH 08/29] Set some mask bits to try and get yuv conversion working --- src/exa_wm.g4i | 16 ++++++++++++++++ src/exa_wm_projective.g4i | 2 +- src/exa_wm_write.g4a | 10 +++++----- src/exa_wm_write.g4b | 10 +++++----- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/exa_wm.g4i b/src/exa_wm.g4i index ee8e3ad0..5d3d45b1 100644 --- a/src/exa_wm.g4i +++ b/src/exa_wm.g4i @@ -103,23 +103,39 @@ define(`mask_w_1', `src_w_1') /* sample src to these registers */ define(`src_sample_base', `g14') + +define(`src_sample_r', `g14') define(`src_sample_r_01', `g14') define(`src_sample_r_23', `g15') + +define(`src_sample_g', `g16') define(`src_sample_g_01', `g16') define(`src_sample_g_23', `g17') + +define(`src_sample_b', `g18') define(`src_sample_b_01', `g18') define(`src_sample_b_23', `g19') + +define(`src_sample_a', `g20') define(`src_sample_a_01', `g20') define(`src_sample_a_23', `g21') /* sample mask to these registers */ define(`mask_sample_base', `g22') + +define(`mask_sample_r', `g22') define(`mask_sample_r_01', `g22') define(`mask_sample_r_23', `g23') + +define(`mask_sample_g', `g24') define(`mask_sample_g_01', `g24') define(`mask_sample_g_23', `g25') + +define(`mask_sample_b', `g26') define(`mask_sample_b_01', `g26') define(`mask_sample_b_23', `g27') + +define(`mask_sample_a', `g28') define(`mask_sample_a_01', `g28') define(`mask_sample_a_23', `g29') diff --git a/src/exa_wm_projective.g4i b/src/exa_wm_projective.g4i index 3c3bbf0c..7e2e0a82 100644 --- a/src/exa_wm_projective.g4i +++ b/src/exa_wm_projective.g4i @@ -32,7 +32,7 @@ mul (16) temp_y<1>F dst_y<8,8,1>F dw_dy { compr align1 }; add (16) temp_x<1>F temp_x<8,8,1>F temp_y<8,8,1>F { compr align1 }; add (16) temp_x<1>F temp_x<8,8,1>F wo { compr align1 }; send (8) 0 w_0<1>F temp_x_0<8,8,1>F math inv mlen 1 rlen 1 { align1 }; -send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { align1 }; +send (8) 0 w_1<1>F temp_x_1<8,8,1>F math inv mlen 1 rlen 1 { sechalf align1 }; /********** Compute u *************/ diff --git a/src/exa_wm_write.g4a b/src/exa_wm_write.g4a index b16e6497..faee80b3 100644 --- a/src/exa_wm_write.g4a +++ b/src/exa_wm_write.g4a @@ -40,13 +40,13 @@ mov (8) data_port_g_01<1>F src_sample_g_01<8,8,1>F { align1 }; mov (8) data_port_b_01<1>F src_sample_b_01<8,8,1>F { align1 }; mov (8) data_port_a_01<1>F src_sample_a_01<8,8,1>F { align1 }; -mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { align1 }; -mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { align1 }; -mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { align1 }; -mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { align1 }; +mov (8) data_port_r_23<1>F src_sample_r_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_g_23<1>F src_sample_g_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_b_23<1>F src_sample_b_23<8,8,1>F { sechalf align1 }; +mov (8) data_port_a_23<1>F src_sample_a_23<8,8,1>F { sechalf align1 }; /* m0, m1 are all direct passed by PS thread payload */ -mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { align1 }; +mov (8) data_port_msg_1<1>UD g1<8,8,1>UD { mask_disable align1 }; /* write */ send (16) diff --git a/src/exa_wm_write.g4b b/src/exa_wm_write.g4b index 785fe321..92e7b248 100644 --- a/src/exa_wm_write.g4b +++ b/src/exa_wm_write.g4b @@ -2,11 +2,11 @@ { 0x00600001, 0x206003be, 0x008d0200, 0x00000000 }, { 0x00600001, 0x208003be, 0x008d0240, 0x00000000 }, { 0x00600001, 0x20a003be, 0x008d0280, 0x00000000 }, - { 0x00600001, 0x20c003be, 0x008d01e0, 0x00000000 }, - { 0x00600001, 0x20e003be, 0x008d0220, 0x00000000 }, - { 0x00600001, 0x210003be, 0x008d0260, 0x00000000 }, - { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 }, - { 0x00600001, 0x20200022, 0x008d0020, 0x00000000 }, + { 0x00601001, 0x20c003be, 0x008d01e0, 0x00000000 }, + { 0x00601001, 0x20e003be, 0x008d0220, 0x00000000 }, + { 0x00601001, 0x210003be, 0x008d0260, 0x00000000 }, + { 0x00601001, 0x212003be, 0x008d02a0, 0x00000000 }, + { 0x00600201, 0x20200022, 0x008d0020, 0x00000000 }, { 0x00800031, 0x24001d28, 0x008d0000, 0x85a04800 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 }, From 6366e4de540c4713cbc8402c89516464bbdc35b9 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sat, 12 Apr 2008 16:39:00 -0700 Subject: [PATCH 09/29] Skip copying on FOURCC_XVMC surfaces --- src/i830_video.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/i830_video.c b/src/i830_video.c index 24372886..73099497 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -2060,6 +2060,7 @@ i830_display_video(ScrnInfoPtr pScrn, xf86CrtcPtr crtc, switch (id) { case FOURCC_YV12: case FOURCC_I420: + case FOURCC_XVMC: OVERLAY_DEBUG("YUV420\n"); #if 0 /* set UV vertical phase to -0.25 */ @@ -2074,7 +2075,6 @@ i830_display_video(ScrnInfoPtr pScrn, xf86CrtcPtr crtc, break; case FOURCC_UYVY: case FOURCC_YUY2: - default: OVERLAY_DEBUG("YUV422\n"); overlay->OSTRIDE = dstPitch; OCMD &= ~SOURCE_FORMAT; @@ -2315,6 +2315,7 @@ I830PutImage(ScrnInfoPtr pScrn, switch (destId) { case FOURCC_YV12: case FOURCC_I420: + case FOURCC_XVMC: if (pPriv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { dstPitch = ((height / 2) + pitchAlignMask) & ~pitchAlignMask; size = dstPitch * width * 3; @@ -2325,7 +2326,7 @@ I830PutImage(ScrnInfoPtr pScrn, break; case FOURCC_UYVY: case FOURCC_YUY2: - default: + if (pPriv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { dstPitch = ((height << 1) + pitchAlignMask) & ~pitchAlignMask; size = dstPitch * width; @@ -2334,6 +2335,10 @@ I830PutImage(ScrnInfoPtr pScrn, size = dstPitch * height; } break; + default: + dstPitch = 0; + size = 0; + break; } #if 0 ErrorF("srcPitch: %d, dstPitch: %d, size: %d\n", srcPitch, dstPitch, size); @@ -2413,11 +2418,14 @@ I830PutImage(ScrnInfoPtr pScrn, break; case FOURCC_UYVY: case FOURCC_YUY2: - default: nlines = ((y2 + 0xffff) >> 16) - top; I830CopyPackedData(pScrn, pPriv, buf, srcPitch, dstPitch, top, left, nlines, npixels); break; + case FOURCC_XVMC: + break; + default: + break; } if (pDraw->type == DRAWABLE_WINDOW) { From 08d3206f3b891e611e3e16c9eb79cfca21ab8a51 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 13 Apr 2008 12:35:14 -0700 Subject: [PATCH 10/29] Kludge to make planar video work - duplicate sampler state This patch duplicates all of the surface sampler state needed to read from the separate YUV surfaces and then has the planar sampler read from samplers 0, 2, 4 instead of 0, 1, 2. This appears to make things work, instead of having the samplers break at random. I do not understand why this works. --- src/exa_wm_src_sample_planar.g4a | 11 +++++----- src/exa_wm_src_sample_planar.g4b | 4 ++-- src/i965_video.c | 35 +++++++++++++++++--------------- 3 files changed, 26 insertions(+), 24 deletions(-) diff --git a/src/exa_wm_src_sample_planar.g4a b/src/exa_wm_src_sample_planar.g4a index 10b15ebc..ca77b484 100644 --- a/src/exa_wm_src_sample_planar.g4a +++ b/src/exa_wm_src_sample_planar.g4a @@ -42,7 +42,7 @@ mov (1) g0.8<1>UD 0x0000e000UD { align1 mask_disable }; /* sample Y */ send (16) src_msg_ind /* msg reg index */ - src_sample_g_01<1>UW /* readback */ + src_sample_g<1>UW /* readback */ g0<8,8,1>UW /* copy to msg start reg*/ sampler (1,0,F) /* sampler message description, (binding_table,sampler_index,datatype) /* here(src->dst) we should use src_sampler and src_surface */ @@ -50,17 +50,16 @@ send (16) src_msg_ind /* msg reg index */ /* sample U (Cr) */ send (16) src_msg_ind /* msg reg index */ - src_sample_r_01<1>UW /* readback */ + src_sample_r<1>UW /* readback */ g0<8,8,1>UW /* copy to msg start reg*/ - sampler (2,1,F) /* sampler message description, (binding_table,sampler_index,datatype) + sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ /* sample V (Cb) */ send (16) src_msg_ind /* msg reg index */ - src_sample_b_01<1>UW /* readback */ + src_sample_b<1>UW /* readback */ g0<8,8,1>UW /* copy to msg start reg*/ - sampler (3,2,F) /* sampler message description, (binding_table,sampler_index,datatype) + sampler (5,4,F) /* sampler message description, (binding_table,sampler_index,datatype) /* here(src->dst) we should use src_sampler and src_surface */ mlen 5 rlen 2 { align1 }; /* required message len 5, readback len 8 */ - diff --git a/src/exa_wm_src_sample_planar.g4b b/src/exa_wm_src_sample_planar.g4b index d2b9cfe5..77a5c234 100644 --- a/src/exa_wm_src_sample_planar.g4b +++ b/src/exa_wm_src_sample_planar.g4b @@ -1,4 +1,4 @@ { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 }, { 0x01800031, 0x22001d29, 0x008d0000, 0x02520001 }, - { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520102 }, - { 0x01800031, 0x22401d29, 0x008d0000, 0x02520203 }, + { 0x01800031, 0x21c01d29, 0x008d0000, 0x02520203 }, + { 0x01800031, 0x22401d29, 0x008d0000, 0x02520405 }, diff --git a/src/i965_video.c b/src/i965_video.c index 464f2e31..485c89a0 100644 --- a/src/i965_video.c +++ b/src/i965_video.c @@ -171,8 +171,8 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, int urb_sf_start, urb_sf_size; int urb_cs_start, urb_cs_size; struct brw_surface_state *dest_surf_state; - struct brw_surface_state *src_surf_state[3]; - struct brw_sampler_state *src_sampler_state[3]; + struct brw_surface_state *src_surf_state[6]; + struct brw_sampler_state *src_sampler_state[6]; struct brw_vs_unit_state *vs_state; struct brw_sf_unit_state *sf_state; struct brw_wm_unit_state *wm_state; @@ -185,7 +185,7 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, float src_scale_x, src_scale_y; uint32_t *binding_table; Bool first_output = TRUE; - int dest_surf_offset, src_surf_offset[3], src_sampler_offset[3], vs_offset; + int dest_surf_offset, src_surf_offset[6], src_sampler_offset[6], vs_offset; int sf_offset, wm_offset, cc_offset, vb_offset, cc_viewport_offset; int wm_scratch_offset; int sf_kernel_offset, ps_kernel_offset, sip_kernel_offset; @@ -197,10 +197,10 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, int src_surf; int n_src_surf; uint32_t src_surf_format; - uint32_t src_surf_base[3]; - int src_width[3]; - int src_height[3]; - int src_pitch[3]; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; int wm_binding_table_entries; const uint32_t *ps_kernel_static; int ps_kernel_static_size; @@ -219,8 +219,11 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, #endif src_surf_base[0] = pPriv->YBuf0offset; - src_surf_base[1] = pPriv->VBuf0offset; - src_surf_base[2] = pPriv->UBuf0offset; + src_surf_base[1] = pPriv->YBuf0offset; + src_surf_base[2] = pPriv->VBuf0offset; + src_surf_base[3] = pPriv->VBuf0offset; + src_surf_base[4] = pPriv->UBuf0offset; + src_surf_base[5] = pPriv->UBuf0offset; #if 0 ErrorF ("base 0 0x%x base 1 0x%x base 2 0x%x\n", src_surf_base[0], src_surf_base[1], src_surf_base[2]); @@ -250,13 +253,13 @@ I965DisplayVideoTextured(ScrnInfoPtr pScrn, I830PortPrivPtr pPriv, int id, src_surf_format = BRW_SURFACEFORMAT_R8_UNORM; ps_kernel_static = &ps_kernel_planar_static[0][0]; ps_kernel_static_size = sizeof (ps_kernel_planar_static); - src_width[0] = width; - src_height[0] = height; - src_pitch[0] = video_pitch * 2; - src_width[1] = src_width[2] = width / 2; - src_height[1] = src_height[2] = height / 2; - src_pitch[1] = src_pitch[2] = video_pitch; - n_src_surf = 3; + src_width[1] = src_width[0] = width; + src_width[1] = src_height[0] = height; + src_pitch[1] = src_pitch[0] = video_pitch * 2; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = video_pitch; + n_src_surf = 6; break; default: return; From 269809030ee9d08b56ad0178a7a9c0194e25785e Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 13 Apr 2008 12:47:02 -0700 Subject: [PATCH 11/29] Move wm_scratch first to meet alignment requirements --- src/i965_render.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index efbfc49b..5a112d10 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -459,6 +459,8 @@ typedef struct _brw_cc_unit_state_padded { * the rest. */ typedef struct _gen4_state { + uint8_t wm_scratch[128 * PS_MAX_THREADS]; + KERNEL_DECL (sip_kernel); KERNEL_DECL (sf_kernel); KERNEL_DECL (sf_kernel_mask); @@ -471,8 +473,6 @@ typedef struct _gen4_state { KERNEL_DECL (ps_kernel_masknoca_affine); KERNEL_DECL (ps_kernel_masknoca_projective); - uint8_t wm_scratch[128 * PS_MAX_THREADS]; - WM_STATE_DECL (nomask_affine); WM_STATE_DECL (nomask_projective); WM_STATE_DECL (maskca_affine); From 18ef4158e5574bfc6621b268821532f13e261d13 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 13 Apr 2008 21:37:49 -0700 Subject: [PATCH 12/29] Only use FOURCC_XVMC when INTEL_XVMC is defined The XVMC code uses a magic FOURCC code to signal frame updates, but that code is only defined when the XVMC code is used. --- src/i830_video.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/i830_video.c b/src/i830_video.c index 73099497..1fa9d75f 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -2060,7 +2060,9 @@ i830_display_video(ScrnInfoPtr pScrn, xf86CrtcPtr crtc, switch (id) { case FOURCC_YV12: case FOURCC_I420: +#ifdef INTEL_XVMC case FOURCC_XVMC: +#endif OVERLAY_DEBUG("YUV420\n"); #if 0 /* set UV vertical phase to -0.25 */ @@ -2315,7 +2317,9 @@ I830PutImage(ScrnInfoPtr pScrn, switch (destId) { case FOURCC_YV12: case FOURCC_I420: +#ifdef INTEL_XVMC case FOURCC_XVMC: +#endif if (pPriv->rotation & (RR_Rotate_90 | RR_Rotate_270)) { dstPitch = ((height / 2) + pitchAlignMask) & ~pitchAlignMask; size = dstPitch * width * 3; @@ -2422,8 +2426,10 @@ I830PutImage(ScrnInfoPtr pScrn, I830CopyPackedData(pScrn, pPriv, buf, srcPitch, dstPitch, top, left, nlines, npixels); break; +#ifdef INTEL_XVMC case FOURCC_XVMC: break; +#endif default: break; } From c1ad0a1a344a87a537aa5f93f6c774997a3189e4 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 14 Apr 2008 10:02:00 +0800 Subject: [PATCH 13/29] Remove old sf/wm program from Makefile Fix distcheck (cherry picked from commit 1d8383923bebf38ab8ada6a262cb4fc8c30e074a) --- src/Makefile.am | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 91f59954..0784c064 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -71,8 +71,6 @@ INTEL_XVMC_SRCS = \ intel_drv_la_SOURCES = \ brw_defines.h \ brw_structs.h \ - sf_prog.h \ - wm_prog.h \ common.h \ i2c_vid.h \ i810_accel.c \ From 90d6b178473ba32cf66e6e654e608cb4374e4a19 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Mon, 14 Apr 2008 16:31:03 +0800 Subject: [PATCH 14/29] Change default panel fitting mode to origin i8xx currently only works in FULL mode. (cherry picked from commit 33ffd781bbca3d0dee8c1b47e7b90be5824b9a4f) --- src/i830_lvds.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/i830_lvds.c b/src/i830_lvds.c index a5004b4b..1562c217 100644 --- a/src/i830_lvds.c +++ b/src/i830_lvds.c @@ -1366,9 +1366,10 @@ i830_lvds_init(ScrnInfoPtr pScrn) /* * Default to filling the whole screen if the mode is less than the - * native size, without breaking aspect ratio. + * native size. (Change default to origin FULL mode, i8xx can only work + * in that mode for now.) */ - dev_priv->fitting_mode = FULL_ASPECT; + dev_priv->fitting_mode = FULL; return; From 456bb529335c28cf60f3b6e5900b804efa5c185a Mon Sep 17 00:00:00 2001 From: Carl Worth Date: Fri, 9 Nov 2007 17:24:21 -0800 Subject: [PATCH 15/29] Associate one sf_state object with each sf_kernel (cherry picked from a2b5c23184d19b386fdfd04f578a55566df60132 commit) --- src/i965_render.c | 152 ++++++++++++++++++++++++++-------------------- 1 file changed, 85 insertions(+), 67 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 5a112d10..bed49df0 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -265,6 +265,24 @@ i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, #define MIN(a,b) ((a) < (b) ? (a) : (b)) #define BRW_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) +/* Set up a default static partitioning of the URB, which is supposed to + * allow anything we would want to do, at potentially lower performance. + */ +#define URB_CS_ENTRY_SIZE 0 +#define URB_CS_ENTRIES 0 + +#define URB_VS_ENTRY_SIZE 1 // each 512-bit row +#define URB_VS_ENTRIES 8 // we needs at least 8 entries + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 2 +#define URB_SF_ENTRIES 1 + static int urb_vs_start, urb_vs_size; static int urb_gs_start, urb_gs_size; static int urb_clip_start, urb_clip_size; @@ -276,14 +294,13 @@ static struct brw_surface_state *src_surf_state, src_surf_state_local; static struct brw_surface_state *mask_surf_state, mask_surf_state_local; static struct brw_vs_unit_state *vs_state, vs_state_local; -static struct brw_sf_unit_state *sf_state, sf_state_local; static uint32_t *binding_table; static int binding_table_entries; static int dest_surf_offset, src_surf_offset, mask_surf_offset; static int vs_offset; -static int sf_offset, vb_offset; +static int vb_offset; static int binding_table_offset; static int next_offset, total_state_size; static char *state_base; @@ -473,6 +490,11 @@ typedef struct _gen4_state { KERNEL_DECL (ps_kernel_masknoca_affine); KERNEL_DECL (ps_kernel_masknoca_projective); + struct brw_sf_unit_state sf_state; + PAD64 (brw_sf_unit_state, 0); + struct brw_sf_unit_state sf_state_mask; + PAD64 (brw_sf_unit_state, 1); + WM_STATE_DECL (nomask_affine); WM_STATE_DECL (nomask_projective); WM_STATE_DECL (maskca_affine); @@ -500,6 +522,49 @@ typedef struct _gen4_state { uint8_t other_state[65536]; } gen4_state_t; +/** + * Sets up the SF state pointing at an SF kernel. + * + * The SF kernel does coord interp: for each attribute, + * calculate dA/dx and dA/dy. Hand these interpolation coefficients + * back to SF which then hands pixels off to WM. + */ +static void +sf_state_init (struct brw_sf_unit_state *sf_state, int kernel_offset) +{ + memset(sf_state, 0, sizeof(*sf_state)); + sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state->sf1.single_program_flow = 1; + sf_state->sf1.binding_table_entry_count = 0; + sf_state->sf1.thread_priority = 0; + sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ + sf_state->sf1.illegal_op_exception_enable = 1; + sf_state->sf1.mask_stack_exception_enable = 1; + sf_state->sf1.sw_exception_enable = 1; + sf_state->thread2.per_thread_scratch_space = 0; + /* scratch space is not used in our kernel */ + sf_state->thread2.scratch_space_base_pointer = 0; + sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + /* don't smash vertex header, read start from dw8 */ + sf_state->thread3.urb_entry_read_offset = 1; + sf_state->thread3.dispatch_grf_start_reg = 3; + sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + sf_state->thread4.stats_enable = 1; + sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; + sf_state->sf6.scissor = 0; + sf_state->sf7.trifan_pv = 2; + sf_state->sf6.dest_org_vbias = 0x8; + sf_state->sf6.dest_org_hbias = 0x8; + + assert((kernel_offset & 63) == 0); + sf_state->thread0.kernel_start_pointer = kernel_offset >> 6; +} + static void sampler_state_init (struct brw_sampler_state *sampler_state, sampler_state_filter_t filter, @@ -662,6 +727,13 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) state->cc_viewport.min_depth = -1.e35; state->cc_viewport.max_depth = 1.e35; + sf_state_init (&state->sf_state, + state_base_offset + + offsetof (gen4_state_t, sf_kernel)); + sf_state_init (&state->sf_state_mask, + state_base_offset + + offsetof (gen4_state_t, sf_kernel_mask)); + for (i = 0; i < SAMPLER_STATE_FILTER_COUNT; i++) { for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { @@ -772,6 +844,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, mask_tiled = 0; uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0, dst_tiled = 0; + uint32_t sf_state_offset; sampler_state_filter_t src_filter, mask_filter; sampler_state_extend_t src_extend, mask_extend; Bool is_affine_src, is_affine_mask, is_affine; @@ -828,9 +901,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, vs_offset = ALIGN(next_offset, 64); next_offset = vs_offset + sizeof(*vs_state); - sf_offset = ALIGN(next_offset, 32); - next_offset = sf_offset + sizeof(*sf_state); - /* Align VB to native size of elements, for safety */ vb_offset = ALIGN(next_offset, 32); next_offset = vb_offset + vb_size; @@ -862,24 +932,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, vb = (void *)(state_base + vb_offset); - /* Set up a default static partitioning of the URB, which is supposed to - * allow anything we would want to do, at potentially lower performance. - */ -#define URB_CS_ENTRY_SIZE 0 -#define URB_CS_ENTRIES 0 - -#define URB_VS_ENTRY_SIZE 1 // each 512-bit row -#define URB_VS_ENTRIES 8 // we needs at least 8 entries - -#define URB_GS_ENTRY_SIZE 0 -#define URB_GS_ENTRIES 0 - -#define URB_CLIP_ENTRY_SIZE 0 -#define URB_CLIP_ENTRIES 0 - -#define URB_SF_ENTRY_SIZE 2 -#define URB_SF_ENTRIES 1 - urb_vs_start = 0; urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; urb_gs_start = urb_vs_start + urb_vs_size; @@ -1026,49 +1078,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, vs_state = (void *)(state_base + vs_offset); memcpy (vs_state, &vs_state_local, sizeof (vs_state_local)); - /* Set up the SF kernel to do coord interp: for each attribute, - * calculate dA/dx and dA/dy. Hand these interpolation coefficients - * back to SF which then hands pixels off to WM. - */ - sf_state = &sf_state_local; - memset(sf_state, 0, sizeof(*sf_state)); - if (pMask) { - sf_state->thread0.kernel_start_pointer = (state_base_offset + - offsetof(gen4_state_t, sf_kernel_mask)) >> 6; - } else { - sf_state->thread0.kernel_start_pointer = (state_base_offset + - offsetof(gen4_state_t, sf_kernel)) >> 6; - } - sf_state->thread0.grf_reg_count = BRW_GRF_BLOCKS(SF_KERNEL_NUM_GRF); - sf_state->sf1.single_program_flow = 1; - sf_state->sf1.binding_table_entry_count = 0; - sf_state->sf1.thread_priority = 0; - sf_state->sf1.floating_point_mode = 0; /* Mesa does this */ - sf_state->sf1.illegal_op_exception_enable = 1; - sf_state->sf1.mask_stack_exception_enable = 1; - sf_state->sf1.sw_exception_enable = 1; - sf_state->thread2.per_thread_scratch_space = 0; - /* scratch space is not used in our kernel */ - sf_state->thread2.scratch_space_base_pointer = 0; - sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ - sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ - sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ - /* don't smash vertex header, read start from dw8 */ - sf_state->thread3.urb_entry_read_offset = 1; - sf_state->thread3.dispatch_grf_start_reg = 3; - sf_state->thread4.max_threads = SF_MAX_THREADS - 1; - sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; - sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; - sf_state->thread4.stats_enable = 1; - sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ - sf_state->sf6.cull_mode = BRW_CULLMODE_NONE; - sf_state->sf6.scissor = 0; - sf_state->sf7.trifan_pv = 2; - sf_state->sf6.dest_org_vbias = 0x8; - sf_state->sf6.dest_org_hbias = 0x8; - - sf_state = (void *)(state_base + sf_offset); - memcpy (sf_state, &sf_state_local, sizeof (sf_state_local)); /* Begin the long sequence of commands needed to set up the 3D * rendering pipe @@ -1151,7 +1160,16 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(state_base_offset + vs_offset); /* 32 byte aligned */ OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ - OUT_BATCH(state_base_offset + sf_offset); /* 32 byte aligned */ + + if (pMask) { + sf_state_offset = state_base_offset + + offsetof(gen4_state_t, sf_state_mask); + } else { + sf_state_offset = state_base_offset + + offsetof(gen4_state_t, sf_state); + } + assert((sf_state_offset & 31) == 0); + OUT_BATCH(sf_state_offset); /* Shorthand for long array lookup */ #define OUT_WM_KERNEL(kernel) do { \ From 4bbdd7096d4927fe48673006fff7df922972f116 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 13:53:36 -0700 Subject: [PATCH 16/29] Rename some variables in i965_render.c for clarity. The gen4_render_state is now always called "render_state" (i965_render.c bookkeeping) and gen4_state_t is now always called "card_state" (the buffer for state used by the chip). --- src/i965_render.c | 61 +++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index bed49df0..c26b904a 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -124,12 +124,6 @@ static struct formatinfo i965_tex_formats[] = { {PICT_a8, BRW_SURFACEFORMAT_A8_UNORM }, }; -/** Private data for gen4 render accel implementation. */ -struct gen4_render_state { - unsigned char *state_addr; - unsigned int state_offset; -}; - static void i965_get_blend_cntl(int op, PicturePtr pMask, uint32_t dst_format, uint32_t *sblend, uint32_t *dblend) { @@ -522,6 +516,12 @@ typedef struct _gen4_state { uint8_t other_state[65536]; } gen4_state_t; +/** Private data for gen4 render accel implementation. */ +struct gen4_render_state { + gen4_state_t *card_state; + uint32_t card_state_offset; +}; + /** * Sets up the SF state pointing at an SF kernel. * @@ -697,12 +697,14 @@ wm_state_init (struct brw_wm_unit_state *wm_state, * Called at EnterVT to fill in our state buffer with any static information. */ static void -gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) +gen4_state_init (struct gen4_render_state *render_state) { int i, j, k, l; + gen4_state_t *card_state = render_state->card_state; + uint32_t state_base_offset = render_state->card_state_offset; #define KERNEL_COPY(kernel) \ - memcpy(state->kernel, kernel ## _static, sizeof(kernel ## _static)) + memcpy(card_state->kernel, kernel ## _static, sizeof(kernel ## _static)) KERNEL_COPY (sip_kernel); KERNEL_COPY (sf_kernel); @@ -717,20 +719,20 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) KERNEL_COPY (ps_kernel_masknoca_projective); #undef KERNEL_COPY - memset(&state->sampler_default_color, 0, - sizeof(state->sampler_default_color)); - state->sampler_default_color.color[0] = 0.0; /* R */ - state->sampler_default_color.color[1] = 0.0; /* G */ - state->sampler_default_color.color[2] = 0.0; /* B */ - state->sampler_default_color.color[3] = 0.0; /* A */ + memset(&card_state->sampler_default_color, 0, + sizeof(card_state->sampler_default_color)); + card_state->sampler_default_color.color[0] = 0.0; /* R */ + card_state->sampler_default_color.color[1] = 0.0; /* G */ + card_state->sampler_default_color.color[2] = 0.0; /* B */ + card_state->sampler_default_color.color[3] = 0.0; /* A */ - state->cc_viewport.min_depth = -1.e35; - state->cc_viewport.max_depth = 1.e35; + card_state->cc_viewport.min_depth = -1.e35; + card_state->cc_viewport.max_depth = 1.e35; - sf_state_init (&state->sf_state, + sf_state_init (&card_state->sf_state, state_base_offset + offsetof (gen4_state_t, sf_kernel)); - sf_state_init (&state->sf_state_mask, + sf_state_init (&card_state->sf_state_mask, state_base_offset + offsetof (gen4_state_t, sf_kernel_mask)); @@ -738,12 +740,12 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) for (j = 0; j < SAMPLER_STATE_EXTEND_COUNT; j++) { for (k = 0; k < SAMPLER_STATE_FILTER_COUNT; k++) { for (l = 0; l < SAMPLER_STATE_EXTEND_COUNT; l++) { - sampler_state_init (&state->sampler_state[i][j][k][l][0], + sampler_state_init (&card_state->sampler_state[i][j][k][l][0], i, j, state_base_offset + offsetof (gen4_state_t, sampler_default_color)); - sampler_state_init (&state->sampler_state[i][j][k][l][1], + sampler_state_init (&card_state->sampler_state[i][j][k][l][1], k, l, state_base_offset + offsetof (gen4_state_t, @@ -756,14 +758,14 @@ gen4_state_init (gen4_state_t *state, uint32_t state_base_offset) for (i = 0; i < BRW_BLENDFACTOR_COUNT; i++) { for (j = 0; j < BRW_BLENDFACTOR_COUNT; j++) { - cc_state_init (&state->cc_state[i][j].state, i, j, + cc_state_init (&card_state->cc_state[i][j].state, i, j, state_base_offset + offsetof (gen4_state_t, cc_viewport)); } } #define SETUP_WM_STATE(kernel, has_mask) \ - wm_state_init(&state->wm_state_ ## kernel [i][j][k][l], \ + wm_state_init(&card_state->wm_state_ ## kernel [i][j][k][l], \ has_mask, \ state_base_offset + offsetof(gen4_state_t, \ wm_scratch), \ @@ -1500,17 +1502,18 @@ void gen4_render_state_init(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); - struct gen4_render_state *state; + struct gen4_render_state *render_state; if (pI830->gen4_render_state == NULL) - pI830->gen4_render_state = calloc(sizeof(*state), 1); + pI830->gen4_render_state = calloc(sizeof(*render_state), 1); - state = pI830->gen4_render_state; + render_state = pI830->gen4_render_state; - state->state_offset = pI830->gen4_render_state_mem->offset; - state->state_addr = pI830->FbBase + pI830->gen4_render_state_mem->offset; + render_state->card_state_offset = pI830->gen4_render_state_mem->offset; + render_state->card_state = (gen4_state_t *) + (pI830->FbBase + render_state->card_state_offset); - gen4_state_init((gen4_state_t *)state->state_addr, state->state_offset); + gen4_state_init(render_state); } /** @@ -1521,7 +1524,7 @@ gen4_render_state_cleanup(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); - pI830->gen4_render_state->state_addr = NULL; + pI830->gen4_render_state->card_state = NULL; } /** From 092962c962fdf3e33a8d387221448fe7c8c36376 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 14:02:58 -0700 Subject: [PATCH 17/29] Make the VS unit state static for 965 render. --- src/i965_render.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index c26b904a..cb2debb2 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -287,13 +287,10 @@ static struct brw_surface_state *dest_surf_state, dest_surf_state_local; static struct brw_surface_state *src_surf_state, src_surf_state_local; static struct brw_surface_state *mask_surf_state, mask_surf_state_local; -static struct brw_vs_unit_state *vs_state, vs_state_local; - static uint32_t *binding_table; static int binding_table_entries; static int dest_surf_offset, src_surf_offset, mask_surf_offset; -static int vs_offset; static int vb_offset; static int binding_table_offset; static int next_offset, total_state_size; @@ -484,6 +481,9 @@ typedef struct _gen4_state { KERNEL_DECL (ps_kernel_masknoca_affine); KERNEL_DECL (ps_kernel_masknoca_projective); + struct brw_vs_unit_state vs_state; + PAD64 (brw_vs_unit_state, 0); + struct brw_sf_unit_state sf_state; PAD64 (brw_sf_unit_state, 0); struct brw_sf_unit_state sf_state_mask; @@ -719,6 +719,15 @@ gen4_state_init (struct gen4_render_state *render_state) KERNEL_COPY (ps_kernel_masknoca_projective); #undef KERNEL_COPY + /* Set up the vertex shader to be disabled (passthrough) */ + memset(&card_state->vs_state, 0, sizeof(card_state->vs_state)); + card_state->vs_state.thread4.nr_urb_entries = URB_VS_ENTRIES; + card_state->vs_state.thread4.urb_entry_allocation_size = + URB_VS_ENTRY_SIZE - 1; + card_state->vs_state.vs6.vs_enable = 0; + card_state->vs_state.vs6.vert_cache_disable = 1; + + /* Set up the sampler default color (always transparent black) */ memset(&card_state->sampler_default_color, 0, sizeof(card_state->sampler_default_color)); card_state->sampler_default_color.color[0] = 0.0; /* R */ @@ -900,8 +909,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* Set up our layout of state in framebuffer. First the general state: */ next_offset = offsetof(gen4_state_t, other_state); - vs_offset = ALIGN(next_offset, 64); - next_offset = vs_offset + sizeof(*vs_state); /* Align VB to native size of elements, for safety */ vb_offset = ALIGN(next_offset, 32); @@ -1069,18 +1076,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, mask_extend = SAMPLER_STATE_EXTEND_NONE; } - /* Set up the vertex shader to be disabled (passthrough) */ - vs_state = &vs_state_local; - memset(vs_state, 0, sizeof(*vs_state)); - vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES; - vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; - vs_state->vs6.vs_enable = 0; - vs_state->vs6.vert_cache_disable = 1; - - vs_state = (void *)(state_base + vs_offset); - memcpy (vs_state, &vs_state_local, sizeof (vs_state_local)); - - /* Begin the long sequence of commands needed to set up the 3D * rendering pipe */ @@ -1159,7 +1154,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* Set the pointers to the 3d pipeline state */ OUT_BATCH(BRW_3DSTATE_PIPELINED_POINTERS | 5); - OUT_BATCH(state_base_offset + vs_offset); /* 32 byte aligned */ + assert((offsetof(gen4_state_t, vs_state) & 31) == 0); + OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vs_state)); OUT_BATCH(BRW_GS_DISABLE); /* disable GS, resulting in passthrough */ OUT_BATCH(BRW_CLIP_DISABLE); /* disable CLIP, resulting in passthrough */ From 049d057dfd73b6c191a8ba5d73eddb77cf7748e7 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 17:09:22 -0700 Subject: [PATCH 18/29] Move the 965 static variables local to the function they're used in. --- src/i965_render.c | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index cb2debb2..11ef803f 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -277,30 +277,9 @@ i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, #define URB_SF_ENTRY_SIZE 2 #define URB_SF_ENTRIES 1 -static int urb_vs_start, urb_vs_size; -static int urb_gs_start, urb_gs_size; -static int urb_clip_start, urb_clip_size; -static int urb_sf_start, urb_sf_size; -static int urb_cs_start, urb_cs_size; - -static struct brw_surface_state *dest_surf_state, dest_surf_state_local; -static struct brw_surface_state *src_surf_state, src_surf_state_local; -static struct brw_surface_state *mask_surf_state, mask_surf_state_local; - -static uint32_t *binding_table; -static int binding_table_entries; - -static int dest_surf_offset, src_surf_offset, mask_surf_offset; -static int vb_offset; -static int binding_table_offset; -static int next_offset, total_state_size; -static char *state_base; -static int state_base_offset; static float *vb; static int vb_size = (2 + 3 + 3) * 3 * 4; /* (dst, src, mask) 3 vertices, 4 bytes */ -static uint32_t src_blend, dst_blend; - static const uint32_t sip_kernel_static[][4] = { /* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ { 0x00000030, 0x20000108, 0x00001220, 0x00000000 }, @@ -859,6 +838,23 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, sampler_state_filter_t src_filter, mask_filter; sampler_state_extend_t src_extend, mask_extend; Bool is_affine_src, is_affine_mask, is_affine; + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + struct brw_surface_state *dest_surf_state, dest_surf_state_local; + struct brw_surface_state *src_surf_state, src_surf_state_local; + struct brw_surface_state *mask_surf_state, mask_surf_state_local; + uint32_t *binding_table; + int binding_table_entries; + int dest_surf_offset, src_surf_offset, mask_surf_offset = 0; + int vb_offset; + int binding_table_offset; + int next_offset, total_state_size; + char *state_base; + int state_base_offset; + uint32_t src_blend, dst_blend; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; From 69709fe36cfdb59c140f5cba233dcb606f8dbc85 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 18:34:13 -0700 Subject: [PATCH 19/29] Move the binding table for 965 to the state structure. --- src/i965_render.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index 11ef803f..fef30ba7 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -477,7 +477,11 @@ typedef struct _gen4_state { WM_STATE_DECL (masknoca_affine); WM_STATE_DECL (masknoca_projective); - /* Index by [src_filter][src_extend][mask_filter][mask_extend] */ + uint32_t binding_table[16]; /* Only use 3, but pad to 64 bytes */ + + /* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of + * the structs happen to add to 32 bytes. + */ struct brw_sampler_state sampler_state[SAMPLER_STATE_FILTER_COUNT] [SAMPLER_STATE_EXTEND_COUNT] [SAMPLER_STATE_FILTER_COUNT] @@ -829,6 +833,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, { ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); + gen4_state_t *card_state = pI830->gen4_render_state->card_state; uint32_t src_offset, src_pitch, src_tile_format = 0, src_tiled = 0; uint32_t mask_offset = 0, mask_pitch = 0, mask_tile_format = 0, mask_tiled = 0; @@ -846,15 +851,13 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, struct brw_surface_state *dest_surf_state, dest_surf_state_local; struct brw_surface_state *src_surf_state, src_surf_state_local; struct brw_surface_state *mask_surf_state, mask_surf_state_local; - uint32_t *binding_table; - int binding_table_entries; int dest_surf_offset, src_surf_offset, mask_surf_offset = 0; int vb_offset; - int binding_table_offset; int next_offset, total_state_size; char *state_base; int state_base_offset; uint32_t src_blend, dst_blend; + uint32_t *binding_table; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; @@ -901,8 +904,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* setup 3d pipeline state */ - binding_table_entries = 2; /* default no mask */ - /* Set up our layout of state in framebuffer. First the general state: */ next_offset = offsetof(gen4_state_t, other_state); @@ -920,12 +921,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, if (pMask) { mask_surf_offset = ALIGN(next_offset, 32); next_offset = mask_surf_offset + sizeof(*mask_surf_state); - binding_table_entries = 3; } - binding_table_offset = ALIGN(next_offset, 32); - next_offset = binding_table_offset + (binding_table_entries * 4); - total_state_size = next_offset; assert(total_state_size < sizeof(gen4_state_t)); @@ -933,8 +930,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, assert((state_base_offset & 63) == 0); state_base = (char *)(pI830->FbBase + state_base_offset); - binding_table = (void *)(state_base + binding_table_offset); - vb = (void *)(state_base + vb_offset); urb_vs_start = 0; @@ -1047,6 +1042,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, } /* Set up a binding table for our surfaces. Only the PS will use it */ + binding_table = &card_state->binding_table[0]; binding_table[0] = state_base_offset + dest_surf_offset; binding_table[1] = state_base_offset + src_surf_offset; if (pMask) @@ -1132,7 +1128,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ - OUT_BATCH(state_base_offset + binding_table_offset); /* ps */ + OUT_BATCH(state_base_offset + offsetof(gen4_state_t, binding_table)); /* The drawing rectangle clipping is always on. Set it to values that * shouldn't do any clipping. From e27ce4238f748d024635835824c120eb298b84b2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 23:30:21 -0700 Subject: [PATCH 20/29] Move the vertex buffer to the state structure as well. --- src/i965_render.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index fef30ba7..ef10a88b 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -277,9 +277,6 @@ i965_check_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, #define URB_SF_ENTRY_SIZE 2 #define URB_SF_ENTRIES 1 -static float *vb; -static int vb_size = (2 + 3 + 3) * 3 * 4; /* (dst, src, mask) 3 vertices, 4 bytes */ - static const uint32_t sip_kernel_static[][4] = { /* wait (1) a0<1>UW a145<0,1,0>UW { align1 + } */ { 0x00000030, 0x20000108, 0x00001220, 0x00000000 }, @@ -497,6 +494,8 @@ typedef struct _gen4_state { PAD64 (brw_cc_viewport, 0); uint8_t other_state[65536]; + + float vb[(2 + 3 + 3) * 3]; /* (dst, src, mask) 3 vertices, 4 bytes */ } gen4_state_t; /** Private data for gen4 render accel implementation. */ @@ -852,7 +851,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, struct brw_surface_state *src_surf_state, src_surf_state_local; struct brw_surface_state *mask_surf_state, mask_surf_state_local; int dest_surf_offset, src_surf_offset, mask_surf_offset = 0; - int vb_offset; int next_offset, total_state_size; char *state_base; int state_base_offset; @@ -907,10 +905,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, /* Set up our layout of state in framebuffer. First the general state: */ next_offset = offsetof(gen4_state_t, other_state); - /* Align VB to native size of elements, for safety */ - vb_offset = ALIGN(next_offset, 32); - next_offset = vb_offset + vb_size; - /* And then the general state: */ dest_surf_offset = ALIGN(next_offset, 32); next_offset = dest_surf_offset + sizeof(*dest_surf_state); @@ -930,8 +924,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, assert((state_base_offset & 63) == 0); state_base = (char *)(pI830->FbBase + state_base_offset); - vb = (void *)(state_base + vb_offset); - urb_vs_start = 0; urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; urb_gs_start = urb_vs_start + urb_vs_size; @@ -1257,7 +1249,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH((0 << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | ((4 * (2 + nelem * selem)) << VB0_BUFFER_PITCH_SHIFT)); - OUT_BATCH(state_base_offset + vb_offset); + OUT_BATCH(state_base_offset + offsetof(gen4_state_t, vb)); OUT_BATCH(3); OUT_BATCH(0); // ignore for VERTEXDATA, but still there @@ -1315,9 +1307,11 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, { ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); + gen4_state_t *card_state = pI830->gen4_render_state->card_state; Bool has_mask; Bool is_affine_src, is_affine_mask, is_affine; float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + float *vb = card_state->vb; int i; is_affine_src = i830_transform_is_affine (pI830->transform[0]); @@ -1441,7 +1435,7 @@ i965_composite(PixmapPtr pDst, int srcX, int srcY, int maskX, int maskY, if (!is_affine) vb[i++] = mask_w[0]; } - assert (i * 4 <= vb_size); + assert (i * 4 <= sizeof(card_state->vb)); { BEGIN_BATCH(6); From 23d1df22d177e54bfc46304053d8115047ff85d4 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Mon, 14 Apr 2008 16:46:17 -0700 Subject: [PATCH 21/29] Abstract surface setup into a separate function. --- src/i965_render.c | 241 ++++++++++++++++++---------------------------- 1 file changed, 93 insertions(+), 148 deletions(-) diff --git a/src/i965_render.c b/src/i965_render.c index ef10a88b..95ac0631 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -432,6 +432,11 @@ typedef struct _brw_cc_unit_state_padded { char pad[64 - sizeof (struct brw_cc_unit_state)]; } brw_cc_unit_state_padded; +typedef struct brw_surface_state_padded { + struct brw_surface_state state; + char pad[32 - sizeof (struct brw_surface_state)]; +} brw_surface_state_padded; + /** * Gen4 rendering state buffer structure. * @@ -476,6 +481,11 @@ typedef struct _gen4_state { uint32_t binding_table[16]; /* Only use 3, but pad to 64 bytes */ + struct brw_surface_state_padded dst_surface; + struct brw_surface_state_padded src_surface; + struct brw_surface_state_padded mask_surface; + uint8_t surface_pad[32]; + /* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of * the structs happen to add to 32 bytes. */ @@ -493,8 +503,6 @@ typedef struct _gen4_state { struct brw_cc_viewport cc_viewport; PAD64 (brw_cc_viewport, 0); - uint8_t other_state[65536]; - float vb[(2 + 3 + 3) * 3]; /* (dst, src, mask) 3 vertices, 4 bytes */ } gen4_state_t; @@ -796,6 +804,8 @@ i965_get_card_format(PicturePtr pPict) if (i965_tex_formats[i].fmt == pPict->format) break; } + assert(i != sizeof(i965_tex_formats) / sizeof(i965_tex_formats[0])); + return i965_tex_formats[i].card_fmt; } @@ -825,6 +835,64 @@ sampler_state_extend_from_picture (int repeat) } } +/** + * Sets up the common fields for a surface state buffer for the given picture + * in the surface state buffer at index, and returns the offset within the + * state buffer for this entry. + */ +static unsigned int +i965_set_picture_surface_state(ScrnInfoPtr pScrn, struct brw_surface_state *ss, + PicturePtr pPicture, PixmapPtr pPixmap, + Bool is_dst) +{ + I830Ptr pI830 = I830PTR(pScrn); + struct gen4_render_state *render_state= pI830->gen4_render_state; + gen4_state_t *card_state = render_state->card_state; + struct brw_surface_state local_ss; + uint32_t offset; + + /* Since ss is a pointer to WC memory, do all of our bit operations + * into a local temporary first. + */ + memset(&local_ss, 0, sizeof(local_ss)); + local_ss.ss0.surface_type = BRW_SURFACE_2D; + if (is_dst) { + uint32_t dst_format; + + assert(i965_get_dest_format(pPicture, &dst_format) == TRUE); + local_ss.ss0.surface_format = dst_format; + } else { + local_ss.ss0.surface_format = i965_get_card_format(pPicture); + } + + local_ss.ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; + local_ss.ss0.writedisable_alpha = 0; + local_ss.ss0.writedisable_red = 0; + local_ss.ss0.writedisable_green = 0; + local_ss.ss0.writedisable_blue = 0; + local_ss.ss0.color_blend = 1; + local_ss.ss0.vert_line_stride = 0; + local_ss.ss0.vert_line_stride_ofs = 0; + local_ss.ss0.mipmap_layout_mode = 0; + local_ss.ss0.render_cache_read_mode = 0; + local_ss.ss1.base_addr = intel_get_pixmap_offset(pPixmap); + + local_ss.ss2.mip_count = 0; + local_ss.ss2.render_target_rotation = 0; + local_ss.ss2.height = pPixmap->drawable.height - 1; + local_ss.ss2.width = pPixmap->drawable.width - 1; + local_ss.ss3.pitch = intel_get_pixmap_pitch(pPixmap) - 1; + local_ss.ss3.tile_walk = 0; /* Tiled X */ + local_ss.ss3.tiled_surface = i830_pixmap_tiled(pPixmap); + + memcpy(ss, &local_ss, sizeof(local_ss)); + + offset = (char *)ss - (char *)card_state; + assert((offset & 31) == 0); + + return offset; +} + Bool i965_prepare_composite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, @@ -832,12 +900,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, { ScrnInfoPtr pScrn = xf86Screens[pSrcPicture->pDrawable->pScreen->myNum]; I830Ptr pI830 = I830PTR(pScrn); - gen4_state_t *card_state = pI830->gen4_render_state->card_state; - uint32_t src_offset, src_pitch, src_tile_format = 0, src_tiled = 0; - uint32_t mask_offset = 0, mask_pitch = 0, mask_tile_format = 0, - mask_tiled = 0; - uint32_t dst_format, dst_offset, dst_pitch, dst_tile_format = 0, - dst_tiled = 0; + struct gen4_render_state *render_state= pI830->gen4_render_state; + gen4_state_t *card_state = render_state->card_state; uint32_t sf_state_offset; sampler_state_filter_t src_filter, mask_filter; sampler_state_extend_t src_extend, mask_extend; @@ -847,39 +911,13 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, int urb_clip_start, urb_clip_size; int urb_sf_start, urb_sf_size; int urb_cs_start, urb_cs_size; - struct brw_surface_state *dest_surf_state, dest_surf_state_local; - struct brw_surface_state *src_surf_state, src_surf_state_local; - struct brw_surface_state *mask_surf_state, mask_surf_state_local; - int dest_surf_offset, src_surf_offset, mask_surf_offset = 0; - int next_offset, total_state_size; char *state_base; int state_base_offset; uint32_t src_blend, dst_blend; - uint32_t *binding_table; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; - src_offset = intel_get_pixmap_offset(pSrc); - src_pitch = intel_get_pixmap_pitch(pSrc); - if (i830_pixmap_tiled(pSrc)) { - src_tiled = 1; - src_tile_format = 0; /* Tiled X */ - } - dst_offset = intel_get_pixmap_offset(pDst); - dst_pitch = intel_get_pixmap_pitch(pDst); - if (i830_pixmap_tiled(pDst)) { - dst_tiled = 1; - dst_tile_format = 0; /* Tiled X */ - } - if (pMask) { - mask_offset = intel_get_pixmap_offset(pMask); - mask_pitch = intel_get_pixmap_pitch(pMask); - if (i830_pixmap_tiled(pMask)) { - mask_tiled = 1; - mask_tile_format = 0; /* Tiled X */ - } - } pI830->scale_units[0][0] = pSrc->drawable.width; pI830->scale_units[0][1] = pSrc->drawable.height; @@ -900,26 +938,6 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, is_affine = is_affine_src && is_affine_mask; - /* setup 3d pipeline state */ - - /* Set up our layout of state in framebuffer. First the general state: */ - next_offset = offsetof(gen4_state_t, other_state); - - /* And then the general state: */ - dest_surf_offset = ALIGN(next_offset, 32); - next_offset = dest_surf_offset + sizeof(*dest_surf_state); - - src_surf_offset = ALIGN(next_offset, 32); - next_offset = src_surf_offset + sizeof(*src_surf_state); - - if (pMask) { - mask_surf_offset = ALIGN(next_offset, 32); - next_offset = mask_surf_offset + sizeof(*mask_surf_state); - } - - total_state_size = next_offset; - assert(total_state_size < sizeof(gen4_state_t)); - state_base_offset = pI830->gen4_render_state_mem->offset; assert((state_base_offset & 63) == 0); state_base = (char *)(pI830->FbBase + state_base_offset); @@ -943,104 +961,28 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, &src_blend, &dst_blend); - /* Set up the state buffer for the destination surface */ - dest_surf_state = &dest_surf_state_local; - memset(dest_surf_state, 0, sizeof(*dest_surf_state)); - dest_surf_state->ss0.surface_type = BRW_SURFACE_2D; - dest_surf_state->ss0.data_return_format = BRW_SURFACERETURNFORMAT_FLOAT32; - if (!i965_get_dest_format(pDstPicture, &dst_format)) - return FALSE; - dest_surf_state->ss0.surface_format = dst_format; + /* Set up and bind the state buffer for the destination surface */ + card_state->binding_table[0] = state_base_offset + + i965_set_picture_surface_state(pScrn, + &card_state->dst_surface.state, + pDstPicture, pDst, TRUE); - dest_surf_state->ss0.writedisable_alpha = 0; - dest_surf_state->ss0.writedisable_red = 0; - dest_surf_state->ss0.writedisable_green = 0; - dest_surf_state->ss0.writedisable_blue = 0; - dest_surf_state->ss0.color_blend = 1; - dest_surf_state->ss0.vert_line_stride = 0; - dest_surf_state->ss0.vert_line_stride_ofs = 0; - dest_surf_state->ss0.mipmap_layout_mode = 0; - dest_surf_state->ss0.render_cache_read_mode = 0; - - dest_surf_state->ss1.base_addr = dst_offset; - dest_surf_state->ss2.height = pDst->drawable.height - 1; - dest_surf_state->ss2.width = pDst->drawable.width - 1; - dest_surf_state->ss2.mip_count = 0; - dest_surf_state->ss2.render_target_rotation = 0; - dest_surf_state->ss3.pitch = dst_pitch - 1; - dest_surf_state->ss3.tile_walk = dst_tile_format; - dest_surf_state->ss3.tiled_surface = dst_tiled; - - dest_surf_state = (void *)(state_base + dest_surf_offset); - memcpy (dest_surf_state, &dest_surf_state_local, sizeof (dest_surf_state_local)); - - /* Set up the source surface state buffer */ - src_surf_state = &src_surf_state_local; - memset(src_surf_state, 0, sizeof(*src_surf_state)); - src_surf_state->ss0.surface_type = BRW_SURFACE_2D; - src_surf_state->ss0.surface_format = i965_get_card_format(pSrcPicture); - - src_surf_state->ss0.writedisable_alpha = 0; - src_surf_state->ss0.writedisable_red = 0; - src_surf_state->ss0.writedisable_green = 0; - src_surf_state->ss0.writedisable_blue = 0; - src_surf_state->ss0.color_blend = 1; - src_surf_state->ss0.vert_line_stride = 0; - src_surf_state->ss0.vert_line_stride_ofs = 0; - src_surf_state->ss0.mipmap_layout_mode = 0; - src_surf_state->ss0.render_cache_read_mode = 0; - - src_surf_state->ss1.base_addr = src_offset; - src_surf_state->ss2.width = pSrc->drawable.width - 1; - src_surf_state->ss2.height = pSrc->drawable.height - 1; - src_surf_state->ss2.mip_count = 0; - src_surf_state->ss2.render_target_rotation = 0; - src_surf_state->ss3.pitch = src_pitch - 1; - src_surf_state->ss3.tile_walk = src_tile_format; - src_surf_state->ss3.tiled_surface = src_tiled; - - src_surf_state = (void *)(state_base + src_surf_offset); - memcpy (src_surf_state, &src_surf_state_local, sizeof (src_surf_state_local)); - - /* setup mask surface */ + /* Set up and bind the source surface state buffer */ + card_state->binding_table[1] = state_base_offset + + i965_set_picture_surface_state(pScrn, + &card_state->src_surface.state, + pSrcPicture, pSrc, FALSE); if (pMask) { - mask_surf_state = &mask_surf_state_local; - memset(mask_surf_state, 0, sizeof(*mask_surf_state)); - mask_surf_state->ss0.surface_type = BRW_SURFACE_2D; - mask_surf_state->ss0.surface_format = - i965_get_card_format(pMaskPicture); - - mask_surf_state->ss0.writedisable_alpha = 0; - mask_surf_state->ss0.writedisable_red = 0; - mask_surf_state->ss0.writedisable_green = 0; - mask_surf_state->ss0.writedisable_blue = 0; - mask_surf_state->ss0.color_blend = 1; - mask_surf_state->ss0.vert_line_stride = 0; - mask_surf_state->ss0.vert_line_stride_ofs = 0; - mask_surf_state->ss0.mipmap_layout_mode = 0; - mask_surf_state->ss0.render_cache_read_mode = 0; - - mask_surf_state->ss1.base_addr = mask_offset; - mask_surf_state->ss2.width = pMask->drawable.width - 1; - mask_surf_state->ss2.height = pMask->drawable.height - 1; - mask_surf_state->ss2.mip_count = 0; - mask_surf_state->ss2.render_target_rotation = 0; - mask_surf_state->ss3.pitch = mask_pitch - 1; - mask_surf_state->ss3.tile_walk = mask_tile_format; - mask_surf_state->ss3.tiled_surface = mask_tiled; - - mask_surf_state = (void *)(state_base + mask_surf_offset); - memcpy (mask_surf_state, &mask_surf_state_local, sizeof (mask_surf_state_local)); + /* Set up and bind the mask surface state buffer */ + card_state->binding_table[2] = state_base_offset + + i965_set_picture_surface_state(pScrn, + &card_state->mask_surface.state, + pMaskPicture, pMask, + FALSE); + } else { + card_state->binding_table[2] = 0; } - /* Set up a binding table for our surfaces. Only the PS will use it */ - binding_table = &card_state->binding_table[0]; - binding_table[0] = state_base_offset + dest_surf_offset; - binding_table[1] = state_base_offset + src_surf_offset; - if (pMask) - binding_table[2] = state_base_offset + mask_surf_offset; - - src_filter = sampler_state_filter_from_picture (pSrcPicture->filter); if (src_filter < 0) I830FALLBACK ("Bad src filter 0x%x\n", pSrcPicture->filter); @@ -1120,6 +1062,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ + assert((offsetof(gen4_state_t, binding_table) & 31) == 0); OUT_BATCH(state_base_offset + offsetof(gen4_state_t, binding_table)); /* The drawing rectangle clipping is always on. Set it to values that @@ -1196,6 +1139,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, #undef OUT_WM_KERNEL /* 64 byte aligned */ + assert((offsetof(gen4_state_t, + cc_state[src_blend][dst_blend]) & 63) == 0); OUT_BATCH(state_base_offset + offsetof(gen4_state_t, cc_state[src_blend][dst_blend])); From 7008eac855302fd467e001495483c2dccfddf68f Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 16 Apr 2008 12:09:41 -0700 Subject: [PATCH 22/29] Update dolt from git, fixing fallback to libtool. --- acinclude.m4 | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/acinclude.m4 b/acinclude.m4 index 17b36d8a..254b3529 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -8,8 +8,7 @@ dnl To use dolt, invoke the DOLT macro immediately after the libtool macros. dnl Optionally, copy this file into acinclude.m4, to avoid the need to have it dnl installed when running autoconf on your project. dnl -dnl git snapshot: 198a3026b347b9220a2f2e2ae23a3049c35af262 - +dnl git snapshot: d91f2b4e9041538400e2703a2a6fbeecdb8ee27d AC_DEFUN([DOLT], [ AC_REQUIRE([AC_CANONICAL_HOST]) # dolt, a replacement for libtool @@ -27,11 +26,13 @@ if test x$GCC != xyes; then fi case $host in i?86-*-linux*|x86_64-*-linux*|powerpc-*-linux*) ;; -amd64-*-freebsd*|i386-*-freebsd*|ia64-*-freebsd*) ;; +amd64-*-freebsd*|i?86-*-freebsd*|ia64-*-freebsd*) ;; *) dolt_supported=no ;; esac if test x$dolt_supported = xno ; then AC_MSG_RESULT([no, falling back to libtool]) + LTCOMPILE='$(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(COMPILE)' + LTCXXCOMPILE='$(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXXCOMPILE)' else AC_MSG_RESULT([yes, replacing libtool]) @@ -65,9 +66,10 @@ dnl Write out shared compilation code. cat <<'__DOLTCOMPILE__EOF__' >>doltcompile libobjdir="${obj%$objbase}.libs" if test ! -d "$libobjdir" ; then - mkdir -p "$libobjdir" + mkdir_out="$(mkdir "$libobjdir" 2>&1)" mkdir_ret=$? if test "$mkdir_ret" -ne 0 && test ! -d "$libobjdir" ; then + echo "$mkdir_out" 1>&2 exit $mkdir_ret fi fi @@ -130,9 +132,9 @@ __DOLTCOMPILE__EOF__ dnl Done writing out doltcompile; substitute it for libtool compilation. chmod +x doltcompile LTCOMPILE='$(top_builddir)/doltcompile $(COMPILE)' - AC_SUBST(LTCOMPILE) LTCXXCOMPILE='$(top_builddir)/doltcompile $(CXXCOMPILE)' - AC_SUBST(LTCXXCOMPILE) fi +AC_SUBST(LTCOMPILE) +AC_SUBST(LTCXXCOMPILE) # end dolt ]) From 0ae283582d21776d3317d5fc1c25751d50d562c7 Mon Sep 17 00:00:00 2001 From: Hong Liu Date: Thu, 17 Apr 2008 10:57:34 +0800 Subject: [PATCH 23/29] fix possible segfault in I830FreeScreen pI830 may point to NULL if I830PreInit fails --- src/i830_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/i830_driver.c b/src/i830_driver.c index ea37e6d6..dd3e2356 100644 --- a/src/i830_driver.c +++ b/src/i830_driver.c @@ -3112,7 +3112,7 @@ I830FreeScreen(int scrnIndex, int flags) #ifdef INTEL_XVMC ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; I830Ptr pI830 = I830PTR(pScrn); - if (pI830->XvMCEnabled) + if (pI830 && pI830->XvMCEnabled) intel_xvmc_finish(xf86Screens[scrnIndex]); #endif I830FreeRec(xf86Screens[scrnIndex]); From c3fb62df4e60b63295f94c99b3c5de70dbf94e1c Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 17 Apr 2008 10:04:55 -0700 Subject: [PATCH 24/29] Add a kludge-around to fix cd/wt bits in fb ptes on linux. Mmap from /sys/devices/pci* on linux forces the cache-disable and write-through bits, which turns our write-combining map into an uncached-map, seriously impacting performance. It turns out that a bug in mprotect allows us to fix this by disabling access to those pages and then immediately re-enabling them. --- configure.ac | 3 +++ src/i830_driver.c | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/configure.ac b/configure.ac index f203d658..c0a1e0bc 100644 --- a/configure.ac +++ b/configure.ac @@ -44,6 +44,9 @@ AM_PROG_CC_C_O AC_CHECK_PROG(gen4asm, [intel-gen4asm], yes, no) AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes) +AC_CHECK_HEADERS(sys/mman.h) +AC_CHECK_FUNCS(mprotect) + AH_TOP([#include "xorg-server.h"]) AC_ARG_WITH(xorg-module-dir, diff --git a/src/i830_driver.c b/src/i830_driver.c index dd3e2356..6bf35662 100644 --- a/src/i830_driver.c +++ b/src/i830_driver.c @@ -197,6 +197,9 @@ USE OR OTHER DEALINGS IN THE SOFTWARE. #include "i830_debug.h" #include "i830_bios.h" #include "i830_video.h" +#if HAVE_SYS_MMAN_H && HAVE_MPROTECT +#include +#endif #ifdef INTEL_XVMC #define _INTEL_XVMC_SERVER_ @@ -685,6 +688,13 @@ I830MapMem(ScrnInfoPtr pScrn) err = pci_device_map_range (device, pI830->LinearAddr, pI830->FbMapSize, PCI_DEV_MAP_FLAG_WRITABLE | PCI_DEV_MAP_FLAG_WRITE_COMBINE, (void **) &pI830->FbBase); + if (err) + return FALSE; + /* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */ +#if HAVE_MPROTECT + mprotect (pI830->FbBase, pI830->FbMapSize, PROT_NONE); + mprotect (pI830->FbBase, pI830->FbMapSize, PROT_READ|PROT_WRITE); +#endif #else pI830->FbBase = xf86MapPciMem(pScrn->scrnIndex, VIDMEM_FRAMEBUFFER, pI830->PciTag, From 2c135ef8ac40f8e7cd071de7414adfae019f9198 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 16 Apr 2008 21:39:58 -0700 Subject: [PATCH 25/29] Make the binding table and surface state be arrays to reduce syncing. --- src/i830.h | 1 - src/i830_accel.c | 3 --- src/i965_render.c | 63 ++++++++++++++++++++++++++--------------------- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/i830.h b/src/i830.h index 4e82036b..2c6d2b45 100644 --- a/src/i830.h +++ b/src/i830.h @@ -831,7 +831,6 @@ Bool i915_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask, unsigned int gen4_render_state_size(ScrnInfoPtr pScrn); void gen4_render_state_init(ScrnInfoPtr pScrn); void gen4_render_state_cleanup(ScrnInfoPtr pScrn); -void gen4_render_state_reset(ScrnInfoPtr pScrn); Bool i965_check_composite(int op, PicturePtr pSrc, PicturePtr pMask, PicturePtr pDst); Bool i965_prepare_composite(int op, PicturePtr pSrc, PicturePtr pMask, diff --git a/src/i830_accel.c b/src/i830_accel.c index 0194f00d..953a73bc 100644 --- a/src/i830_accel.c +++ b/src/i830_accel.c @@ -205,9 +205,6 @@ I830Sync(ScrnInfoPtr pScrn) pI830->LpRing->space = pI830->LpRing->mem->size - 8; pI830->nextColorExpandBuf = 0; - - if (IS_I965G(pI830)) - gen4_render_state_reset(pScrn); } void diff --git a/src/i965_render.c b/src/i965_render.c index 95ac0631..3c553de1 100644 --- a/src/i965_render.c +++ b/src/i965_render.c @@ -479,12 +479,9 @@ typedef struct _gen4_state { WM_STATE_DECL (masknoca_affine); WM_STATE_DECL (masknoca_projective); - uint32_t binding_table[16]; /* Only use 3, but pad to 64 bytes */ + uint32_t binding_table[128]; - struct brw_surface_state_padded dst_surface; - struct brw_surface_state_padded src_surface; - struct brw_surface_state_padded mask_surface; - uint8_t surface_pad[32]; + struct brw_surface_state_padded surface_state[32]; /* Index by [src_filter][src_extend][mask_filter][mask_extend]. Two of * the structs happen to add to 32 bytes. @@ -510,6 +507,9 @@ typedef struct _gen4_state { struct gen4_render_state { gen4_state_t *card_state; uint32_t card_state_offset; + + int binding_table_index; + int surface_state_index; }; /** @@ -902,6 +902,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, I830Ptr pI830 = I830PTR(pScrn); struct gen4_render_state *render_state= pI830->gen4_render_state; gen4_state_t *card_state = render_state->card_state; + struct brw_surface_state_padded *ss; uint32_t sf_state_offset; sampler_state_filter_t src_filter, mask_filter; sampler_state_extend_t src_extend, mask_extend; @@ -914,6 +915,7 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, char *state_base; int state_base_offset; uint32_t src_blend, dst_blend; + uint32_t *binding_table; IntelEmitInvarientState(pScrn); *pI830->last_3d = LAST_3D_RENDER; @@ -953,34 +955,48 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, urb_cs_start = urb_sf_start + urb_sf_size; urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; - /* Because we only have a single static buffer for our state currently, - * we have to sync before updating it every time. - */ - i830WaitSync(pScrn); - i965_get_blend_cntl(op, pMaskPicture, pDstPicture->format, &src_blend, &dst_blend); + if ((render_state->binding_table_index + 3 >= + ARRAY_SIZE(card_state->binding_table)) || + (render_state->surface_state_index + 3 >= + ARRAY_SIZE(card_state->surface_state))) + { + i830WaitSync(pScrn); + render_state->binding_table_index = 0; + render_state->surface_state_index = 0; + } + + binding_table = card_state->binding_table + + render_state->binding_table_index; + ss = card_state->surface_state + render_state->surface_state_index; + /* We only use 2 or 3 entries, but the table has to be 32-byte + * aligned. + */ + render_state->binding_table_index += 8; + render_state->surface_state_index += (pMask != NULL) ? 3 : 2; + /* Set up and bind the state buffer for the destination surface */ - card_state->binding_table[0] = state_base_offset + + binding_table[0] = state_base_offset + i965_set_picture_surface_state(pScrn, - &card_state->dst_surface.state, + &ss[0].state, pDstPicture, pDst, TRUE); /* Set up and bind the source surface state buffer */ - card_state->binding_table[1] = state_base_offset + + binding_table[1] = state_base_offset + i965_set_picture_surface_state(pScrn, - &card_state->src_surface.state, + &ss[1].state, pSrcPicture, pSrc, FALSE); if (pMask) { /* Set up and bind the mask surface state buffer */ - card_state->binding_table[2] = state_base_offset + + binding_table[2] = state_base_offset + i965_set_picture_surface_state(pScrn, - &card_state->mask_surface.state, + &ss[2].state, pMaskPicture, pMask, FALSE); } else { - card_state->binding_table[2] = 0; + binding_table[2] = 0; } src_filter = sampler_state_filter_from_picture (pSrcPicture->filter); @@ -1062,8 +1078,8 @@ i965_prepare_composite(int op, PicturePtr pSrcPicture, OUT_BATCH(0); /* clip */ OUT_BATCH(0); /* sf */ /* Only the PS uses the binding table */ - assert((offsetof(gen4_state_t, binding_table) & 31) == 0); - OUT_BATCH(state_base_offset + offsetof(gen4_state_t, binding_table)); + assert((((unsigned char *)binding_table - pI830->FbBase) & 31) == 0); + OUT_BATCH((unsigned char *)binding_table - pI830->FbBase); /* The drawing rectangle clipping is always on. Set it to values that * shouldn't do any clipping. @@ -1454,15 +1470,6 @@ gen4_render_state_cleanup(ScrnInfoPtr pScrn) pI830->gen4_render_state->card_state = NULL; } -/** - * Called when the hardware is idled and flushed, so we know we can - * reuse the buffer contents. - */ -void -gen4_render_state_reset(ScrnInfoPtr pScrn) -{ -} - unsigned int gen4_render_state_size(ScrnInfoPtr pScrn) { From c8ae3b781f0d8e325876a74c91cd0a685d34454b Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Sun, 20 Apr 2008 02:11:15 -0700 Subject: [PATCH 26/29] Add a bunch of 965 ring stuff to the debug dump --- src/i830_debug.c | 459 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 456 insertions(+), 3 deletions(-) diff --git a/src/i830_debug.c b/src/i830_debug.c index 15b02ceb..7e1a93f3 100644 --- a/src/i830_debug.c +++ b/src/i830_debug.c @@ -927,14 +927,438 @@ void i830DumpRegs (ScrnInfoPtr pScrn) #ifndef REG_DUMPER -#define NUM_RING_DUMP 64 +static char *mi_cmds[0x40] = { + "MI_NOOP", /* 00 */ + "Reserved 01", + "MI_USER_INTERRUPT", + "MI_WAIT_FOR_EVENT", + + "MI_FLUSH", /* 04 */ + "MI_ARB_CHECK", + NULL, + "MI_REPORT_HEAD", + + NULL, /* 08 */ + NULL, + "MI_BATCH_BUFFER_END", + NULL, + + NULL, /* 0c */ + NULL, + NULL, + NULL, + + NULL, /* 10 */ + "MI_OVERLAY_FLIP", + "MI_LOAD_SCAN_LINES_INCL", + "MI_LOAD_SCAN_LINES_EXCL", + + "MI_DISPLAY_BUFFER_INFO", /* 14 */ + NULL, + NULL, + NULL, + + "MI_SET_CONTEXT", /* 18 */ + NULL, + NULL, + NULL, + + NULL, /* 1c */ + NULL, + NULL, + NULL, + + "MI_STORE_DATA_IMM", /* 20 */ + "MI_STORE_DATA_INDEX", + "MI_LOAD_REGISTER_IMM", + NULL, + + "MI_STORE_REGISTER_MEM", /* 24 */ + NULL, + NULL, + NULL, + + NULL, /* 28 */ + NULL, + NULL, + NULL, + + NULL, /* 2c */ + NULL, + NULL, + NULL, + + NULL, /* 30 */ + "MI_BATCH_BUFFER_START", + NULL, + NULL, + + NULL, /* 34 */ + NULL, + NULL, + NULL, + + NULL, /* 38 */ + NULL, + NULL, + NULL, + + NULL, /* 3c */ + NULL, + NULL, + NULL, +}; + +static char *_2d_cmds[0x80] = { + NULL, /* 00 */ + "XY_SETUP_BLT", + NULL, + "XY_SETUP_CLIP_BLT", + + NULL, /* 04 */ + NULL, + NULL, + NULL, + + NULL, /* 08 */ + NULL, + NULL, + NULL, + + NULL, /* 0c */ + NULL, + NULL, + NULL, + + NULL, /* 10 */ + "XY_SETUP_MONO_PATTERN_SL_BLT", + NULL, + NULL, + + NULL, /* 14 */ + NULL, + NULL, + NULL, + + NULL, /* 18 */ + NULL, + NULL, + NULL, + + NULL, /* 1c */ + NULL, + NULL, + NULL, + + NULL, /* 20 */ + NULL, + NULL, + NULL, + + "XY_PIXEL_BLT", /* 24 */ + "XY_SCANLINE_BLT", + "XY_TEXT_BLT", + NULL, + + NULL, /* 28 */ + NULL, + NULL, + NULL, + + NULL, /* 2c */ + NULL, + NULL, + NULL, + + NULL, /* 30 */ + "XY_TEXT_IMMEDIATE_BLT", + NULL, + NULL, + + NULL, /* 34 */ + NULL, + NULL, + NULL, + + NULL, /* 38 */ + NULL, + NULL, + NULL, + + NULL, /* 3c */ + NULL, + NULL, + NULL, + + "COLOR_BLT", /* 40 */ + NULL, + NULL, + "SRC_COPY_BLT", + + NULL, /* 44 */ + NULL, + NULL, + NULL, + + NULL, /* 48 */ + NULL, + NULL, + NULL, + + NULL, /* 4c */ + NULL, + NULL, + NULL, + + "XY_COLOR_BLT", /* 50 */ + "XY_PAT_BLT", + "XY_MONO_PAT_BLT", + "XY_SRC_COPY_BLT", + + "XY_MONO_SRC_COPY_BLT", /* 54 */ + "XY_FULL_BLT", + "XY_FULL_MONO_SRC_BLT", + "XY_FULL_MONO_PATTERN_BLT", + + "XY_FULL_MONO_PATTERN_MONO_SRC_BLT", /* 58 */ + "XY_MONO_PAT_FIXED_BLT", + NULL, + NULL, + + NULL, /* 5c */ + NULL, + NULL, + NULL, + + NULL, /* 60 */ + NULL, + NULL, + NULL, + + NULL, /* 64 */ + NULL, + NULL, + NULL, + + NULL, /* 68 */ + NULL, + NULL, + NULL, + + NULL, /* 6c */ + NULL, + NULL, + NULL, + + NULL, /* 70 */ + "XY_MONO_SRC_COPY_IMMEDIATE_BLT", + "XY_PAT_BLT_IMMEDIATE", + "XY_SRC_COPY_CHROMA_BLT", + + "XY_FULL_IMMEDIATE_PATTERN_BLT", /* 74 */ + "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT", + "XY_PAT_CHROMA_BLT", + "XY_PAT_CHROMA_BLT_IMMEDIATE", + + NULL, /* 78 */ + NULL, + NULL, + NULL, + + NULL, /* 7c */ + NULL, + NULL, + NULL, + +}; + +static char *_3d_cmds[0x4][0x8][0x100] = { + { /* Pipeline Type 00 (Common) */ + { /* Opcode 0 */ + "URB_FENCE", /* 00 */ + "CS_URB_STATE", + "CONSTANT_BUFFER", + "STATE_PREFETCH", + }, + { /* Opcode 1 */ + NULL, /* 00 */ + "STATE_BASE_ADDRESS", + "STATE_SIP", + NULL, + }, + }, + { /* Pipeline Type 01 (Single DW) */ + { /* Opcode 0 */ + }, + { /* Opcode 1 */ + NULL, /* 00 */ + NULL, + NULL, + NULL, + + "PIPELINE_SELECT", /* 04 */ + NULL, + NULL, + NULL, + }, + }, + { /* Pipeline Type 02 (Media) */ + { /* Opcode 0 */ + "MEDIA_STATE_POINTERS", /* 00 */ + }, + { /* Opcode 1 */ + "MEDIA_OBJECT", /* 00 */ + "MEDIA_OBJECT_EX", + "MEDIA_OBJECT_PTR", + }, + }, + { /* Pipeline Type 03 (3D) */ + { /* Opcode 0 */ + "3DSTATE_PIPELINED_POINTERS", /* 00 */ + "3DSTATE_BINDING_TABLE_POINTERS", + NULL, + NULL, + + NULL, /* 04 */ + "3DSTATE_URB", + NULL, + NULL, + + "3DSTATE_VERTEX_BUFFERS", /* 08 */ + "3DSTATE_VERTEX_ELEMENTS", + "3DSTATE_INDEX_BUFFER", + NULL, + + NULL, /* 0c */ + "3DSTATE_VIEWPORT_STATE_POINTERS", + }, + { /* Opcode 1 */ + "3DSTATE_DRAWING_RECTANGLE", /* 00 */ + "3DSTATE_CONSTANT_COLOR", + "3DSTATE_SAMPLER_PALETTE_LOAD0", + NULL, + + "3DSTATE_CHROMA_KEY", /* 04 */ + "3DSTATE_DEPTH_BUFFER", + "3DSTATE_POLY_STIPPLE_OFFSET", + "3DSTATE_POLY_STIPPLE_PATTERN", + + "3DSTATE_LINE_STIPPLE", /* 08 */ + "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP", + }, + { /* Opcode 2 */ + "PIPE_CONTROL", /* 00 */ + }, + { /* Opcode 3 */ + "3DPRIMITIVE", /* 00 */ + }, + }, +}; + +static int +i830_valid_command (uint32_t cmd) +{ + uint32_t type = (cmd >> 29) & 0x7; + uint32_t pipeline_type; + uint32_t opcode; + uint32_t subopcode; + uint32_t count; + + switch (type) { + case 0: /* Memory Interface */ + opcode = (cmd >> 23) & 0x3f; + if (cmd == 0x00000000) + count = 1; + else + count = (cmd & 0x3f) + 2; + if (!mi_cmds[opcode]) + return -1; + break; + case 1: + break; + case 2: /* 2D */ + count = (cmd & 0x1f) + 2; + opcode = (cmd >> 22) & 0x7f; + if (!_2d_cmds[opcode]) + return -1; + break; + case 3: /* 3D */ + pipeline_type = (cmd >> 27) & 0x3; + opcode = (cmd >> 24) & 0x7; + subopcode = (cmd >> 16) & 0xff; + count = (cmd & 0xff) + 2; + if (pipeline_type <= 3) + return count; + if (!_3d_cmds[pipeline_type][opcode][subopcode]) + return -1; + break; + default: + return -1; + } + return count; +} + +static void +i830_dump_cmd (uint32_t cmd) +{ + uint32_t type = (cmd >> 29) & 0x7; + uint32_t pipeline_type; + uint32_t opcode; + uint32_t subopcode; + uint32_t count; + + ErrorF ("\t\t"); + switch (type) { + case 0: /* Memory Interface */ + opcode = (cmd >> 23) & 0x3f; + if (cmd == 0x00000000) + count = 1; + else + count = (cmd & 0x3f) + 2; + if (mi_cmds[opcode]) + ErrorF ("%08x %20.20s %d\n", cmd, mi_cmds[opcode], count); + else + ErrorF ("%08x Memory Interface Reserved\n", cmd); + break; + case 1: + break; + case 2: /* 2D */ + opcode = (cmd >> 22) & 0x7f; + count = (cmd & 0x1f) + 2; + if (_2d_cmds[opcode]) + ErrorF ("%08x %20.20s %d\n", cmd, _2d_cmds[opcode], count); + else + ErrorF ("%08x 2D Reserved\n", cmd); + break; + case 3: /* 3D */ + pipeline_type = (cmd >> 27) & 0x3; + opcode = (cmd >> 24) & 0x7; + subopcode = (cmd >> 16) & 0xff; + count = (cmd & 0xff) + 2; + if (_3d_cmds[pipeline_type][opcode][subopcode]) { + ErrorF ("%08x %20.20s %d\n", + cmd, + _3d_cmds[pipeline_type][opcode][subopcode], + count); + } else { + ErrorF ("%08x 3D/Media Reserved\n", cmd); + } + break; + default: + ErrorF ("%08x Reserved\n", cmd); + break; + } +} static void i830_dump_ring(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); - unsigned int head, tail, ring, mask; + unsigned int head, tail, ring, mask, cmd; volatile unsigned char *virt; + uint32_t data; + int count; + volatile uint32_t *ptr; head = (INREG (LP_RING + RING_HEAD)) & I830_HEAD_MASK; tail = INREG (LP_RING + RING_TAIL) & I830_TAIL_MASK; @@ -943,10 +1367,38 @@ i830_dump_ring(ScrnInfoPtr pScrn) virt = pI830->LpRing->virtual_start; ErrorF ("Ring at virtual %p head 0x%x tail 0x%x count %d\n", virt, head, tail, (((tail + mask + 1) - head) & mask) >> 2); - for (ring = (head - 128) & mask; ring != ((head + 4) & mask); + + /* walk back by instructions */ + cmd = (head + 4) & mask; + for (ring = head & mask; ring != ((head - 256) & mask); + ring = (ring - 4) & mask) + { + ptr = (volatile uint32_t *) (virt + ring); + data = *ptr; + ErrorF ("\t%08x: %08x\n", ring, *(volatile unsigned int *) (virt + ring)); + count = i830_valid_command (data); + if (count < 0) + continue; + if (((ring + count * 4) & mask) == cmd) + { + i830_dump_cmd (data); + cmd = ring; + } + } + + for (ring = cmd; ring != ((head + 4) & mask); ring = (ring + 4) & mask) { ErrorF ("\t%08x: %08x\n", ring, *(volatile unsigned int *) (virt + ring)); + if (ring == cmd) + { + ptr = (volatile uint32_t *) (virt + ring); + data = *ptr; + i830_dump_cmd (data); + count = i830_valid_command (data); + + cmd = (cmd + count * 4) & mask; + } } ErrorF ("Ring end\n"); } @@ -1062,6 +1514,7 @@ i965_dump_error_state(ScrnInfoPtr pScrn) INREG(TS_DEBUG_DATA)); ErrorF("TD_CTL 0x%08x / 0x%08x\n", INREG(TD_CTL), INREG(TD_CTL2)); + i830_dump_ring (pScrn); } /** From 40e0a03af57d7b3180d7066bcb15e03dcc9ca295 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Mon, 21 Apr 2008 01:03:22 -0700 Subject: [PATCH 27/29] Dump batch buffers found from main ring --- src/i830_debug.c | 270 +++++++++++++++++++++++++++++++---------------- 1 file changed, 179 insertions(+), 91 deletions(-) diff --git a/src/i830_debug.c b/src/i830_debug.c index 7e1a93f3..074e8b9c 100644 --- a/src/i830_debug.c +++ b/src/i830_debug.c @@ -1172,85 +1172,92 @@ static char *_2d_cmds[0x80] = { }; -static char *_3d_cmds[0x4][0x8][0x100] = { +#define _3D_ONE_WORD 1 + +static struct { + char *name; + int flags; +} _3d_cmds[0x4][0x8][0x100] = { { /* Pipeline Type 00 (Common) */ { /* Opcode 0 */ - "URB_FENCE", /* 00 */ - "CS_URB_STATE", - "CONSTANT_BUFFER", - "STATE_PREFETCH", + { "URB_FENCE", 0 }, /* 00 */ + { "CS_URB_STATE", 0 }, + { "CONSTANT_BUFFER", 0 }, + { "STATE_PREFETCH", 0 }, }, { /* Opcode 1 */ - NULL, /* 00 */ - "STATE_BASE_ADDRESS", - "STATE_SIP", - NULL, + { NULL, 0 }, /* 00 */ + { "STATE_BASE_ADDRESS", 0 }, + { "STATE_SIP", 0 }, + { NULL, 0 }, + + { "PIPELINE_SELECT", _3D_ONE_WORD }, /* 04 */ }, }, { /* Pipeline Type 01 (Single DW) */ { /* Opcode 0 */ }, { /* Opcode 1 */ - NULL, /* 00 */ - NULL, - NULL, - NULL, + { NULL, 0 }, /* 00 */ + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, - "PIPELINE_SELECT", /* 04 */ - NULL, - NULL, - NULL, + { "PIPELINE_SELECT", 0 }, /* 04 */ + { NULL, 0 }, + { NULL, 0 }, + { NULL, 0 }, }, }, { /* Pipeline Type 02 (Media) */ { /* Opcode 0 */ - "MEDIA_STATE_POINTERS", /* 00 */ + { "MEDIA_STATE_POINTERS", 0 }, /* 00 */ }, { /* Opcode 1 */ - "MEDIA_OBJECT", /* 00 */ - "MEDIA_OBJECT_EX", - "MEDIA_OBJECT_PTR", + { "MEDIA_OBJECT", 0 }, /* 00 */ + { "MEDIA_OBJECT_EX", 0 }, + { "MEDIA_OBJECT_PTR", 0 }, }, }, { /* Pipeline Type 03 (3D) */ { /* Opcode 0 */ - "3DSTATE_PIPELINED_POINTERS", /* 00 */ - "3DSTATE_BINDING_TABLE_POINTERS", - NULL, - NULL, + { "3DSTATE_PIPELINED_POINTERS", 0 }, /* 00 */ + { "3DSTATE_BINDING_TABLE_POINTERS", 0 }, + { NULL, 0 }, + { NULL, 0 }, - NULL, /* 04 */ - "3DSTATE_URB", - NULL, - NULL, + { NULL, 0 }, /* 04 */ + { "3DSTATE_URB", 0 }, + { NULL, 0 }, + { NULL, 0 }, - "3DSTATE_VERTEX_BUFFERS", /* 08 */ - "3DSTATE_VERTEX_ELEMENTS", - "3DSTATE_INDEX_BUFFER", - NULL, + { "3DSTATE_VERTEX_BUFFERS", 0 }, /* 08 */ + { "3DSTATE_VERTEX_ELEMENTS", 0 }, + { "3DSTATE_INDEX_BUFFER", 0 }, + { "3DSTATE_VF_STATISTICS", _3D_ONE_WORD }, - NULL, /* 0c */ - "3DSTATE_VIEWPORT_STATE_POINTERS", + { NULL, 0 }, /* 0c */ + { "3DSTATE_VIEWPORT_STATE_POINTERS", 0 }, }, { /* Opcode 1 */ - "3DSTATE_DRAWING_RECTANGLE", /* 00 */ - "3DSTATE_CONSTANT_COLOR", - "3DSTATE_SAMPLER_PALETTE_LOAD0", - NULL, + { "3DSTATE_DRAWING_RECTANGLE", 0 }, /* 00 */ + { "3DSTATE_CONSTANT_COLOR", 0 }, + { "3DSTATE_SAMPLER_PALETTE_LOAD0", 0 }, + { NULL, 0 }, - "3DSTATE_CHROMA_KEY", /* 04 */ - "3DSTATE_DEPTH_BUFFER", - "3DSTATE_POLY_STIPPLE_OFFSET", - "3DSTATE_POLY_STIPPLE_PATTERN", + { "3DSTATE_CHROMA_KEY", 0 }, /* 04 */ + { "3DSTATE_DEPTH_BUFFER", 0 }, + { "3DSTATE_POLY_STIPPLE_OFFSET", 0 }, + { "3DSTATE_POLY_STIPPLE_PATTERN", 0 }, - "3DSTATE_LINE_STIPPLE", /* 08 */ - "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP", + { "3DSTATE_LINE_STIPPLE", 0 }, /* 08 */ + { "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP", 0 }, }, { /* Opcode 2 */ - "PIPE_CONTROL", /* 00 */ + { "PIPE_CONTROL", 0 }, /* 00 */ }, { /* Opcode 3 */ - "3DPRIMITIVE", /* 00 */ + { "3DPRIMITIVE", 0 }, /* 00 */ }, }, }; @@ -1267,10 +1274,12 @@ i830_valid_command (uint32_t cmd) switch (type) { case 0: /* Memory Interface */ opcode = (cmd >> 23) & 0x3f; - if (cmd == 0x00000000) + if (opcode < 0x10) count = 1; else count = (cmd & 0x3f) + 2; + if (opcode == 0x00 && cmd != 0x00000000) + return -1; if (!mi_cmds[opcode]) return -1; break; @@ -1286,10 +1295,13 @@ i830_valid_command (uint32_t cmd) pipeline_type = (cmd >> 27) & 0x3; opcode = (cmd >> 24) & 0x7; subopcode = (cmd >> 16) & 0xff; - count = (cmd & 0xff) + 2; + if (_3d_cmds[pipeline_type][opcode][subopcode].flags & _3D_ONE_WORD) + count = 1; + else + count = (cmd & 0xff) + 2; if (pipeline_type <= 3) return count; - if (!_3d_cmds[pipeline_type][opcode][subopcode]) + if (!_3d_cmds[pipeline_type][opcode][subopcode].name) return -1; break; default: @@ -1298,63 +1310,57 @@ i830_valid_command (uint32_t cmd) return count; } -static void -i830_dump_cmd (uint32_t cmd) +static int +i830_dump_cmd (uint32_t cmd, int count) { uint32_t type = (cmd >> 29) & 0x7; uint32_t pipeline_type; uint32_t opcode; uint32_t subopcode; - uint32_t count; + int ret = 1; - ErrorF ("\t\t"); + ErrorF ("\t"); switch (type) { case 0: /* Memory Interface */ opcode = (cmd >> 23) & 0x3f; - if (cmd == 0x00000000) - count = 1; - else - count = (cmd & 0x3f) + 2; if (mi_cmds[opcode]) - ErrorF ("%08x %20.20s %d\n", cmd, mi_cmds[opcode], count); + ErrorF ("%-40.40s %d\n", mi_cmds[opcode], count); else - ErrorF ("%08x Memory Interface Reserved\n", cmd); + ErrorF ("Memory Interface Reserved\n"); break; case 1: break; case 2: /* 2D */ opcode = (cmd >> 22) & 0x7f; - count = (cmd & 0x1f) + 2; if (_2d_cmds[opcode]) - ErrorF ("%08x %20.20s %d\n", cmd, _2d_cmds[opcode], count); + ErrorF ("%-40.40s %d\n", _2d_cmds[opcode], count); else - ErrorF ("%08x 2D Reserved\n", cmd); + ErrorF ("2D Reserved\n"); break; case 3: /* 3D */ pipeline_type = (cmd >> 27) & 0x3; opcode = (cmd >> 24) & 0x7; subopcode = (cmd >> 16) & 0xff; - count = (cmd & 0xff) + 2; - if (_3d_cmds[pipeline_type][opcode][subopcode]) { - ErrorF ("%08x %20.20s %d\n", - cmd, - _3d_cmds[pipeline_type][opcode][subopcode], + if (_3d_cmds[pipeline_type][opcode][subopcode].name) { + ErrorF ("%-40.40s %d\n", + _3d_cmds[pipeline_type][opcode][subopcode].name, count); } else { - ErrorF ("%08x 3D/Media Reserved\n", cmd); + ErrorF ("3D/Media Reserved (pipe %d op %d sub %d)\n", pipeline_type, opcode, subopcode); } break; default: - ErrorF ("%08x Reserved\n", cmd); + ErrorF ("Reserved\n"); break; } + return ret; } -static void -i830_dump_ring(ScrnInfoPtr pScrn) +static int +i830_valid_chain (ScrnInfoPtr pScrn, unsigned int ring, unsigned int end) { I830Ptr pI830 = I830PTR(pScrn); - unsigned int head, tail, ring, mask, cmd; + unsigned int head, tail, mask; volatile unsigned char *virt; uint32_t data; int count; @@ -1368,38 +1374,118 @@ i830_dump_ring(ScrnInfoPtr pScrn) ErrorF ("Ring at virtual %p head 0x%x tail 0x%x count %d\n", virt, head, tail, (((tail + mask + 1) - head) & mask) >> 2); - /* walk back by instructions */ - cmd = (head + 4) & mask; - for (ring = head & mask; ring != ((head - 256) & mask); - ring = (ring - 4) & mask) + for (;;) { ptr = (volatile uint32_t *) (virt + ring); data = *ptr; - ErrorF ("\t%08x: %08x\n", ring, *(volatile unsigned int *) (virt + ring)); count = i830_valid_command (data); if (count < 0) - continue; - if (((ring + count * 4) & mask) == cmd) + return 0; + while (count > 0 && ring != end) { - i830_dump_cmd (data); - cmd = ring; + ring = (ring + 4) & mask; + count--; + } + if (ring == end) { + if (count == 0) + return 1; + else + return 0; } } +} - for (ring = cmd; ring != ((head + 4) & mask); - ring = (ring + 4) & mask) +static void +i830_dump_cmds (ScrnInfoPtr pScrn, + volatile unsigned char *virt, + uint32_t start, + uint32_t stop, + uint32_t mask, + uint32_t acthd) +{ + I830Ptr pI830 = I830PTR(pScrn); + uint32_t ring = start; + uint32_t cmd = start; + uint32_t data; + uint32_t batch_start_mask = ((0x7 << 29) | + (0x3f << 23) | + (0x7ff << 12) | + (1 << 11) | + (1 << 7) | + (1 << 6) | + (0x3f << 0)); + uint32_t batch_start_cmd = ((0x0 << 29) | + (0x31 << 23) | + (0x00 << 12) | + (0 << 11) | + (1 << 7) | + (0 << 6) | + (0 << 0)); + int count; + volatile uint32_t *ptr; + + while (ring != stop) { - ErrorF ("\t%08x: %08x\n", ring, *(volatile unsigned int *) (virt + ring)); + if (ring == acthd) + ErrorF ("****"); + ErrorF ("\t%08x: %08x", ring, *(volatile unsigned int *) (virt + ring)); if (ring == cmd) { ptr = (volatile uint32_t *) (virt + ring); data = *ptr; - i830_dump_cmd (data); count = i830_valid_command (data); - + i830_dump_cmd (data, count); + + /* check for MI_BATCH_BUFFER_END */ + if (data == (0x0a << 23)) + stop = (ring + 4) & mask; + /* check for MI_BATCH_BUFFER_START */ + if ((data & batch_start_mask) == batch_start_cmd) + { + uint32_t batch = ptr[1]; + if (batch < pI830->FbMapSize) { + ErrorF ("\t%08x: %08x\n", (ring + 4) & mask, batch); + ErrorF ("Batch buffer at 0x%08x {\n", batch); + i830_dump_cmds (pScrn, pI830->FbBase, batch, + pI830->FbMapSize - batch, + 0xffffffff, acthd); + ErrorF ("}\n"); + ring = (ring + (count - 1) * 4) & mask; + } + } cmd = (cmd + count * 4) & mask; - } + } else + ErrorF ("\n"); + ring = (ring + 4) & mask; } +} + +static void +i830_dump_ring(ScrnInfoPtr pScrn, uint32_t acthd) +{ + I830Ptr pI830 = I830PTR(pScrn); + unsigned int head, tail, mask, cmd; + volatile unsigned char *virt; + + head = (INREG (LP_RING + RING_HEAD)) & I830_HEAD_MASK; + tail = INREG (LP_RING + RING_TAIL) & I830_TAIL_MASK; + mask = pI830->LpRing->tail_mask; + + virt = pI830->LpRing->virtual_start; + ErrorF ("Ring at virtual %p head 0x%x tail 0x%x count %d\n", + virt, head, tail, (((tail + mask + 1) - head) & mask) >> 2); + + /* walk back by instructions */ + for (cmd = (head - 256) & mask; + cmd != (head & mask); + cmd = (cmd + 4) & mask) + { + if (i830_valid_chain (pScrn, cmd, (head & mask))) + break; + } + + i830_dump_cmds (pScrn, virt, cmd, head, mask, acthd); + ErrorF ("Ring end\n"); } @@ -1432,13 +1518,14 @@ i830_dump_error_state(ScrnInfoPtr pScrn) ErrorF("hwstam: 0x%04x ier: 0x%04x imr: 0x%04x iir: 0x%04x\n", INREG16(HWSTAM), INREG16(IER), INREG16(IMR), INREG16(IIR)); - i830_dump_ring (pScrn); + i830_dump_ring (pScrn, 0); } void i965_dump_error_state(ScrnInfoPtr pScrn) { I830Ptr pI830 = I830PTR(pScrn); + uint32_t acthd; ErrorF("pgetbl_ctl: 0x%08x pgetbl_err: 0x%08x\n", INREG(PGETBL_CTL), INREG(PGE_ERR)); @@ -1468,8 +1555,9 @@ i965_dump_error_state(ScrnInfoPtr pScrn) "imr: 0x%08x iir: 0x%08x\n", INREG(HWSTAM), INREG(IER), INREG(IMR), INREG(IIR)); + acthd = INREG(ACTHD); ErrorF("acthd: 0x%08x dma_fadd_p: 0x%08x\n", - INREG(ACTHD), INREG(DMA_FADD_P)); + acthd, INREG(DMA_FADD_P)); ErrorF("ecoskpd: 0x%08x excc: 0x%08x\n", INREG(ECOSKPD), INREG(EXCC)); @@ -1514,7 +1602,7 @@ i965_dump_error_state(ScrnInfoPtr pScrn) INREG(TS_DEBUG_DATA)); ErrorF("TD_CTL 0x%08x / 0x%08x\n", INREG(TD_CTL), INREG(TD_CTL2)); - i830_dump_ring (pScrn); + i830_dump_ring (pScrn, acthd); } /** From 1d467a8038946a37844795e8860be113d43219ac Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Wed, 23 Apr 2008 11:08:38 -0700 Subject: [PATCH 28/29] Overlay video doesn't require that the target pixmap be in video memory. I830PutImage was checking to make sure the target pixmap resided in video memory, but this isn't necessary when using the overlay. Test --- src/i830_video.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/i830_video.c b/src/i830_video.c index 1fa9d75f..e1095781 100644 --- a/src/i830_video.c +++ b/src/i830_video.c @@ -2441,13 +2441,13 @@ I830PutImage(ScrnInfoPtr pScrn, } #ifdef I830_USE_EXA - if (pI830->useEXA) { + if (pPriv->textured && pI830->useEXA) { /* Force the pixmap into framebuffer so we can draw to it. */ exaMoveInPixmap(pPixmap); } #endif - if (!pI830->useEXA && + if (pPriv->textured && !pI830->useEXA && (((char *)pPixmap->devPrivate.ptr < (char *)pI830->FbBase) || ((char *)pPixmap->devPrivate.ptr >= (char *)pI830->FbBase + pI830->FbMapSize))) { From fff17b9d1b58cb53032d153094826dd306836d59 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 29 Apr 2008 10:32:14 -0700 Subject: [PATCH 29/29] Use new xf86RotateFreeShadow function to clean up shadow buffers. This simply moves code from the driver up into the X server; use it where available. --- configure.ac | 12 ++++++++++++ src/i830_driver.c | 6 ++++++ 2 files changed, 18 insertions(+) diff --git a/configure.ac b/configure.ac index c0a1e0bc..00f075e8 100644 --- a/configure.ac +++ b/configure.ac @@ -174,6 +174,18 @@ fi AC_SUBST([XMODES_CFLAGS]) +SAVE_CPPFLAGS="$CPPFLAGS" +CPPFLAGS="$CPPFLAGS $XORG_CFLAGS" + +AC_CHECK_DECL(xf86RotateFreeShadow, + [AC_DEFINE(HAVE_FREE_SHADOW, 1, [have new FreeShadow API])], + [], + [#include + #include + #include ]) + +CPPFLAGS="$SAVE_CPPFLAGS" + dnl Use lots of warning flags with GCC WARN_CFLAGS="" diff --git a/src/i830_driver.c b/src/i830_driver.c index 6bf35662..9077c583 100644 --- a/src/i830_driver.c +++ b/src/i830_driver.c @@ -3136,7 +3136,9 @@ I830LeaveVT(int scrnIndex, int flags) ScrnInfoPtr pScrn = xf86Screens[scrnIndex]; I830Ptr pI830 = I830PTR(pScrn); xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(pScrn); +#ifndef HAVE_FREE_SHADOW int o; +#endif DPRINTF(PFX, "Leave VT\n"); @@ -3164,6 +3166,7 @@ I830LeaveVT(int scrnIndex, int flags) } #endif +#ifndef HAVE_FREE_SHADOW for (o = 0; o < config->num_crtc; o++) { xf86CrtcPtr crtc = config->crtc[o]; @@ -3174,6 +3177,9 @@ I830LeaveVT(int scrnIndex, int flags) crtc->rotatedData = NULL; } } +#else + xf86RotateFreeShadow(pScrn); +#endif xf86_hide_cursors (pScrn);