[SCM] libav/experimental: slightly faster rgb32tobgr32; avoid one add and one cmp
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 15:58:58 UTC 2013
The following commit has been merged in the experimental branch:
commit 935f50c82cb80e40fa9e6f587af04b428b4aae0d
Author: Ivo van Poorten <ivop at euronet.nl>
Date: Tue Apr 17 20:38:17 2007 +0000
slightly faster rgb32tobgr32; avoid one add and one cmp
Originally committed as revision 23012 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index 7147855..2053a6e 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -1364,21 +1364,22 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_
static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
{
- uint8_t *d = dst, *s = (uint8_t *) src;
- const uint8_t *end = s + src_size;
+ long idx = 15 - src_size;
+ uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
#ifdef HAVE_MMX
__asm __volatile(
- " "PREFETCH" (%1) \n"
+ " test %0, %0 \n"
+ " jns 2f \n"
+ " "PREFETCH" (%1, %0) \n"
" movq %3, %%mm7 \n"
" pxor %4, %%mm7 \n"
" movq %%mm7, %%mm6 \n"
" pxor %5, %%mm7 \n"
- " jmp 2f \n"
ASMALIGN(4)
"1: \n"
- " "PREFETCH" 32(%1) \n"
- " movq (%1), %%mm0 \n"
- " movq 8(%1), %%mm1 \n"
+ " "PREFETCH" 32(%1, %0) \n"
+ " movq (%1, %0), %%mm0 \n"
+ " movq 8(%1, %0), %%mm1 \n"
# ifdef HAVE_MMX2
" pshufw $177, %%mm0, %%mm3 \n"
" pshufw $177, %%mm1, %%mm5 \n"
@@ -1406,23 +1407,21 @@ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long s
" por %%mm3, %%mm0 \n"
" por %%mm5, %%mm1 \n"
# endif
- " "MOVNTQ" %%mm0, (%0) \n"
- " "MOVNTQ" %%mm1, 8(%0) \n"
+ " "MOVNTQ" %%mm0, (%2, %0) \n"
+ " "MOVNTQ" %%mm1, 8(%2, %0) \n"
" add $16, %0 \n"
- " add $16, %1 \n"
- "2: \n"
- " cmp %1, %2 \n"
- " ja 1b \n"
+ " js 1b \n"
" "SFENCE" \n"
" "EMMS" \n"
- : "+r"(d), "+r"(s)
- : "r" (end-15), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
+ "2: \n"
+ : "+&r"(idx)
+ : "r" (s), "r" (d), "m" (mask32b), "m" (mask32r), "m" (mmx_one)
: "memory");
#endif
- for (; s<end; s+=4, d+=4) {
- int v = *(uint32_t *)s, g = v & 0xff00;
+ for (; idx<15; idx+=4) {
+ register int v = *(uint32_t *)&s[idx], g = v & 0xff00;
v &= 0xff00ff;
- *(uint32_t *)d = (v>>16) + g + (v<<16);
+ *(uint32_t *)&d[idx] = (v>>16) + g + (v<<16);
}
}
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list