sna: Invert the function wrapping for sse64xN/sse64
We should be consistent in making the code simpler for the compiler and so not rely on it eliminating the dead-code for a single loop of sse64xN! Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
This commit is contained in:
parent
59d371a9b2
commit
65c72d9871
|
|
@ -353,35 +353,34 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
|
|||
|
||||
#if defined(sse2) && defined(__x86_64__)
|
||||
|
||||
sse2 static force_inline void
|
||||
to_sse64(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
__m128i xmm1, xmm2, xmm3, xmm4;
|
||||
|
||||
xmm1 = xmm_load_128u((const __m128i*)src + 0);
|
||||
xmm2 = xmm_load_128u((const __m128i*)src + 1);
|
||||
xmm3 = xmm_load_128u((const __m128i*)src + 2);
|
||||
xmm4 = xmm_load_128u((const __m128i*)src + 3);
|
||||
|
||||
xmm_save_128((__m128i*)dst + 0, xmm1);
|
||||
xmm_save_128((__m128i*)dst + 1, xmm2);
|
||||
xmm_save_128((__m128i*)dst + 2, xmm3);
|
||||
xmm_save_128((__m128i*)dst + 3, xmm4);
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
to_sse64xN(uint8_t *dst, const uint8_t *src, int bytes)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bytes / 64; i++) {
|
||||
__m128i xmm1, xmm2, xmm3, xmm4;
|
||||
|
||||
xmm1 = xmm_load_128u((const __m128i*)src + 0);
|
||||
xmm2 = xmm_load_128u((const __m128i*)src + 1);
|
||||
xmm3 = xmm_load_128u((const __m128i*)src + 2);
|
||||
xmm4 = xmm_load_128u((const __m128i*)src + 3);
|
||||
|
||||
xmm_save_128((__m128i*)dst + 0, xmm1);
|
||||
xmm_save_128((__m128i*)dst + 1, xmm2);
|
||||
xmm_save_128((__m128i*)dst + 2, xmm3);
|
||||
xmm_save_128((__m128i*)dst + 3, xmm4);
|
||||
|
||||
to_sse64(dst, src);
|
||||
dst += 64;
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
to_sse64(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
to_sse64xN(dst, src, 64);
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
to_sse32(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
|
|
@ -475,35 +474,34 @@ memcpy_to_tiled_x__swizzle_0__sse2(const void *src, void *dst, int bpp,
|
|||
}
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
from_sse64(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
__m128i xmm1, xmm2, xmm3, xmm4;
|
||||
|
||||
xmm1 = xmm_load_128((const __m128i*)src + 0);
|
||||
xmm2 = xmm_load_128((const __m128i*)src + 1);
|
||||
xmm3 = xmm_load_128((const __m128i*)src + 2);
|
||||
xmm4 = xmm_load_128((const __m128i*)src + 3);
|
||||
|
||||
xmm_save_128u((__m128i*)dst + 0, xmm1);
|
||||
xmm_save_128u((__m128i*)dst + 1, xmm2);
|
||||
xmm_save_128u((__m128i*)dst + 2, xmm3);
|
||||
xmm_save_128u((__m128i*)dst + 3, xmm4);
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
from_sse64xN(uint8_t *dst, const uint8_t *src, int bytes)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < bytes / 64; i++) {
|
||||
__m128i xmm1, xmm2, xmm3, xmm4;
|
||||
|
||||
xmm1 = xmm_load_128((const __m128i*)src + 0);
|
||||
xmm2 = xmm_load_128((const __m128i*)src + 1);
|
||||
xmm3 = xmm_load_128((const __m128i*)src + 2);
|
||||
xmm4 = xmm_load_128((const __m128i*)src + 3);
|
||||
|
||||
xmm_save_128u((__m128i*)dst + 0, xmm1);
|
||||
xmm_save_128u((__m128i*)dst + 1, xmm2);
|
||||
xmm_save_128u((__m128i*)dst + 2, xmm3);
|
||||
xmm_save_128u((__m128i*)dst + 3, xmm4);
|
||||
|
||||
from_sse64(dst, src);
|
||||
dst += 64;
|
||||
src += 64;
|
||||
}
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
from_sse64(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
from_sse64xN(dst, src, 64);
|
||||
}
|
||||
|
||||
sse2 static force_inline void
|
||||
from_sse32(uint8_t *dst, const uint8_t *src)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in New Issue