sna: Expand batch buffers

As batch buffers are compacted to fit into the smallest bo, the only
cost is the larger static array allocation (and presumably cache
misses).

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
Chris Wilson 2012-05-07 10:23:19 +01:00
parent 9281b80644
commit ca4d2296e6
6 changed files with 64 additions and 80 deletions

View File

@ -745,7 +745,7 @@ static bool gen4_check_repeat(PicturePtr picture)
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
static uint32_t
gen4_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
@ -766,14 +766,11 @@ gen4_bind_bo(struct sna *sna,
offset = kgem_bo_get_binding(bo, format);
if (offset)
return offset;
return offset * sizeof(uint32_t);
offset = sna->kgem.surface - sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
offset *= sizeof(uint32_t);
sna->kgem.surface -=
offset = sna->kgem.surface -=
sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t);
ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss));
ss = memset(sna->kgem.batch + offset, 0, sizeof(*ss));
ss->ss0.surface_type = GEN4_SURFACE_2D;
ss->ss0.surface_format = format;
@ -781,9 +778,7 @@ gen4_bind_bo(struct sna *sna,
ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32;
ss->ss0.color_blend = 1;
ss->ss1.base_addr =
kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
bo, domains, 0);
kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss->ss2.height = height - 1;
ss->ss2.width = width - 1;
@ -798,7 +793,7 @@ gen4_bind_bo(struct sna *sna,
ss->ss0.surface_format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
return offset;
return offset * sizeof(uint32_t);
}
fastcall static void

View File

@ -749,7 +749,7 @@ gen5_tiling_bits(uint32_t tiling)
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
static uint32_t
gen5_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
@ -771,23 +771,18 @@ gen5_bind_bo(struct sna *sna,
if (!DBG_NO_SURFACE_CACHE) {
offset = kgem_bo_get_binding(bo, format);
if (offset)
return offset;
return offset * sizeof(uint32_t);
}
offset = sna->kgem.surface - sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
offset *= sizeof(uint32_t);
sna->kgem.surface -=
offset = sna->kgem.surface -=
sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + sna->kgem.surface;
ss = sna->kgem.batch + offset;
ss[0] = (GEN5_SURFACE_2D << GEN5_SURFACE_TYPE_SHIFT |
GEN5_SURFACE_BLEND_ENABLED |
format << GEN5_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
bo, domains, 0);
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN5_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN5_SURFACE_HEIGHT_SHIFT);
@ -803,7 +798,7 @@ gen5_bind_bo(struct sna *sna,
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
return offset;
return offset * sizeof(uint32_t);
}
fastcall static void

View File

@ -1198,21 +1198,16 @@ gen6_bind_bo(struct sna *sna,
DBG(("[%x] bo(handle=%d), format=%d, reuse %s binding\n",
offset, bo->handle, format,
domains & 0xffff ? "render" : "sampler"));
return offset;
return offset * sizeof(uint32_t);
}
offset = sna->kgem.surface - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
offset *= sizeof(uint32_t);
sna->kgem.surface -=
offset = sna->kgem.surface -=
sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t);
ss = sna->kgem.batch + sna->kgem.surface;
ss = sna->kgem.batch + offset;
ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT |
GEN6_SURFACE_BLEND_ENABLED |
format << GEN6_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
bo, domains, 0);
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN6_SURFACE_HEIGHT_SHIFT);
assert(bo->pitch <= (1 << 18));
@ -1228,7 +1223,7 @@ gen6_bind_bo(struct sna *sna,
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
return offset;
return offset * sizeof(uint32_t);
}
fastcall static void

View File

@ -1293,7 +1293,7 @@ gen7_tiling_bits(uint32_t tiling)
* Sets up the common fields for a surface state buffer for the given
* picture in the given surface state buffer.
*/
static int
static uint32_t
gen7_bind_bo(struct sna *sna,
struct kgem_bo *bo,
uint32_t width,
@ -1303,7 +1303,7 @@ gen7_bind_bo(struct sna *sna,
{
uint32_t *ss;
uint32_t domains;
uint16_t offset;
int offset;
COMPILE_TIME_ASSERT(sizeof(struct gen7_surface_state) == 32);
@ -1316,20 +1316,15 @@ gen7_bind_bo(struct sna *sna,
offset = kgem_bo_get_binding(bo, format);
if (offset)
return offset;
return offset * sizeof(uint32_t);
offset = sna->kgem.surface - sizeof(struct gen7_surface_state) / sizeof(uint32_t);
offset *= sizeof(uint32_t);
sna->kgem.surface -=
offset = sna->kgem.surface -=
sizeof(struct gen7_surface_state) / sizeof(uint32_t);
ss = sna->kgem.batch + sna->kgem.surface;
ss = sna->kgem.batch + offset;
ss[0] = (GEN7_SURFACE_2D << GEN7_SURFACE_TYPE_SHIFT |
gen7_tiling_bits(bo->tiling) |
format << GEN7_SURFACE_FORMAT_SHIFT);
ss[1] = kgem_add_reloc(&sna->kgem,
sna->kgem.surface + 1,
bo, domains, 0);
ss[1] = kgem_add_reloc(&sna->kgem, offset + 1, bo, domains, 0);
ss[2] = ((width - 1) << GEN7_SURFACE_WIDTH_SHIFT |
(height - 1) << GEN7_SURFACE_HEIGHT_SHIFT);
ss[3] = (bo->pitch - 1) << GEN7_SURFACE_PITCH_SHIFT;
@ -1345,7 +1340,7 @@ gen7_bind_bo(struct sna *sna,
format, width, height, bo->pitch, bo->tiling,
domains & 0xffff ? "render" : "sampler"));
return offset;
return offset * sizeof(uint32_t);
}
fastcall static void

View File

@ -595,10 +595,12 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->wedged = drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO;
kgem->wedged |= DBG_NO_HW;
kgem->max_batch_size = ARRAY_SIZE(kgem->batch);
kgem->batch_size = ARRAY_SIZE(kgem->batch);
if (gen == 22)
/* 865g cannot handle a batch spanning multiple pages */
kgem->max_batch_size = PAGE_SIZE / sizeof(uint32_t);
kgem->batch_size = PAGE_SIZE / sizeof(uint32_t);
if (gen == 70)
kgem->batch_size = 16*1024;
kgem->min_alignment = 4;
if (gen < 40)
@ -1656,16 +1658,16 @@ static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
assert(!kgem_busy(kgem, handle));
/* If there is no surface data, just upload the batch */
if (kgem->surface == kgem->max_batch_size)
if (kgem->surface == kgem->batch_size)
return gem_write(kgem->fd, handle,
0, sizeof(uint32_t)*kgem->nbatch,
kgem->batch);
/* Are the batch pages conjoint with the surface pages? */
if (kgem->surface < kgem->nbatch + PAGE_SIZE/4) {
assert(size == sizeof(kgem->batch));
if (kgem->surface < kgem->nbatch + PAGE_SIZE/sizeof(uint32_t)) {
assert(size == PAGE_ALIGN(kgem->batch_size*sizeof(uint32_t)));
return gem_write(kgem->fd, handle,
0, sizeof(kgem->batch),
0, kgem->batch_size*sizeof(uint32_t),
kgem->batch);
}
@ -1676,11 +1678,11 @@ static int kgem_batch_write(struct kgem *kgem, uint32_t handle, uint32_t size)
if (ret)
return ret;
assert(kgem->nbatch*sizeof(uint32_t) <=
sizeof(uint32_t)*kgem->surface - (sizeof(kgem->batch)-size));
ret = PAGE_ALIGN(sizeof(uint32_t) * kgem->batch_size);
ret -= sizeof(uint32_t) * kgem->surface;
assert(size-ret >= kgem->nbatch*sizeof(uint32_t));
return __gem_write(kgem->fd, handle,
sizeof(uint32_t)*kgem->surface - (sizeof(kgem->batch)-size),
sizeof(kgem->batch) - sizeof(uint32_t)*kgem->surface,
size - ret, (kgem->batch_size - kgem->surface)*sizeof(uint32_t),
kgem->batch + kgem->surface);
}
@ -1719,7 +1721,7 @@ void kgem_reset(struct kgem *kgem)
kgem->aperture = 0;
kgem->aperture_fenced = 0;
kgem->nbatch = 0;
kgem->surface = kgem->max_batch_size;
kgem->surface = kgem->batch_size;
kgem->mode = KGEM_NONE;
kgem->flush = 0;
kgem->scanout = 0;
@ -1734,24 +1736,26 @@ static int compact_batch_surface(struct kgem *kgem)
int size, shrink, n;
/* See if we can pack the contents into one or two pages */
size = kgem->max_batch_size - kgem->surface + kgem->nbatch;
if (size > 2048)
return sizeof(kgem->batch);
else if (size > 1024)
size = 8192, shrink = 2*4096;
else
size = 4096, shrink = 3*4096;
n = ALIGN(kgem->batch_size, 1024);
size = n - kgem->surface + kgem->nbatch;
size = ALIGN(size, 1024);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == 0)
kgem->reloc[n].delta -= shrink;
shrink = n - size;
if (shrink) {
DBG(("shrinking from %d to %d\n", kgem->batch_size, size));
if (kgem->reloc[n].offset >= size)
kgem->reloc[n].offset -= shrink;
shrink *= sizeof(uint32_t);
for (n = 0; n < kgem->nreloc; n++) {
if (kgem->reloc[n].read_domains == I915_GEM_DOMAIN_INSTRUCTION &&
kgem->reloc[n].target_handle == 0)
kgem->reloc[n].delta -= shrink;
if (kgem->reloc[n].offset >= sizeof(uint32_t)*kgem->nbatch)
kgem->reloc[n].offset -= shrink;
}
}
return size;
return size * sizeof(uint32_t);
}
void _kgem_submit(struct kgem *kgem)
@ -1769,11 +1773,11 @@ void _kgem_submit(struct kgem *kgem)
batch_end = kgem_end_batch(kgem);
kgem_sna_flush(kgem);
DBG(("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface,
DBG(("batch[%d/%d]: %d %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n",
kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, kgem->batch_size,
kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture));
assert(kgem->nbatch <= kgem->max_batch_size);
assert(kgem->nbatch <= kgem->batch_size);
assert(kgem->nbatch <= kgem->surface);
assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
@ -1786,7 +1790,7 @@ void _kgem_submit(struct kgem *kgem)
#endif
rq = kgem->next_request;
if (kgem->surface != kgem->max_batch_size)
if (kgem->surface != kgem->batch_size)
size = compact_batch_surface(kgem);
else
size = kgem->nbatch * sizeof(kgem->batch[0]);
@ -1821,7 +1825,7 @@ void _kgem_submit(struct kgem *kgem)
execbuf.buffers_ptr = (uintptr_t)kgem->exec;
execbuf.buffer_count = kgem->nexec;
execbuf.batch_start_offset = 0;
execbuf.batch_len = batch_end*4;
execbuf.batch_len = batch_end*sizeof(uint32_t);
execbuf.cliprects_ptr = 0;
execbuf.num_cliprects = 0;
execbuf.DR1 = 0;
@ -1835,7 +1839,7 @@ void _kgem_submit(struct kgem *kgem)
O_WRONLY | O_CREAT | O_APPEND,
0666);
if (fd != -1) {
ret = write(fd, kgem->batch, batch_end*4);
ret = write(fd, kgem->batch, batch_end*sizeof(uint32_t));
fd = close(fd);
}
}
@ -1864,7 +1868,7 @@ void _kgem_submit(struct kgem *kgem)
i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666);
if (i != -1) {
ret = write(i, kgem->batch, batch_end*4);
ret = write(i, kgem->batch, batch_end*sizeof(uint32_t));
close(i);
}

View File

@ -142,7 +142,7 @@ struct kgem {
uint16_t nreloc;
uint16_t nfence;
uint16_t wait;
uint16_t max_batch_size;
uint16_t batch_size;
uint16_t min_alignment;
uint32_t flush:1;
@ -170,9 +170,9 @@ struct kgem {
void (*context_switch)(struct kgem *kgem, int new_mode);
void (*retire)(struct kgem *kgem);
uint32_t batch[4*1024];
uint32_t batch[64*1024-8];
struct drm_i915_gem_exec_object2 exec[256];
struct drm_i915_gem_relocation_entry reloc[612];
struct drm_i915_gem_relocation_entry reloc[4096];
};
#define KGEM_BATCH_RESERVED 1
@ -180,7 +180,7 @@ struct kgem {
#define KGEM_EXEC_RESERVED 1
#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
#define KGEM_BATCH_SIZE(K) ((K)->max_batch_size-KGEM_BATCH_RESERVED)
#define KGEM_BATCH_SIZE(K) ((K)->batch_size-KGEM_BATCH_RESERVED)
#define KGEM_EXEC_SIZE(K) (int)(ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED)
#define KGEM_RELOC_SIZE(K) (int)(ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED)