[SCM] libav/experimental: replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ... the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 15:45:10 UTC 2013
The following commit has been merged in the experimental branch:
commit 12e9668119df1539b1f967aff2e351923ddd043c
Author: Michael Niedermayer <michaelni at gmx.at>
Date: Wed Sep 21 21:17:09 2005 +0000
replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ...
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
Originally committed as revision 4608 to svn://svn.ffmpeg.org/ffmpeg/trunk
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 5d9d499..51ed07c 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -1621,11 +1621,9 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
"movq 64(%1), %%mm1 \n\t"
MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
- "movq %%mm0, %%mm1 \n\t"
- "psrlq $32, %%mm0 \n\t"
+ "pshufw $0x0E, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "psrlq $16, %%mm0 \n\t"
+ "pshufw $0x01, %%mm0, %%mm1 \n\t"
"paddusw %%mm1, %%mm0 \n\t"
"movd %%mm0, %0 \n\t"
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c
index c9354dc..93f156e 100644
--- a/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -22,7 +22,11 @@
#ifdef HAVE_MMX2
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
-
+#define PMAX(a,b) \
+ "pshufw $0x0E," #a ", " #b " \n\t"\
+ PMAXW(b, a)\
+ "pshufw $0x01," #a ", " #b " \n\t"\
+ PMAXW(b, a)
#else
#define SPREADW(a) \
"punpcklwd " #a ", " #a " \n\t"\
@@ -30,6 +34,14 @@
#define PMAXW(a,b) \
"psubusw " #a ", " #b " \n\t"\
"paddw " #a ", " #b " \n\t"
+#define PMAX(a,b) \
+ "movq " #a ", " #b " \n\t"\
+ "psrlq $32, " #a " \n\t"\
+ PMAXW(b, a)\
+ "movq " #a ", " #b " \n\t"\
+ "psrlq $16, " #a " \n\t"\
+ PMAXW(b, a)
+
#endif
static int RENAME(dct_quantize)(MpegEncContext *s,
@@ -119,12 +131,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $32, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $16, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
+ PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
@@ -170,12 +177,7 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
PMAXW(%%mm0, %%mm3)
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $32, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
- "movq %%mm3, %%mm0 \n\t"
- "psrlq $16, %%mm3 \n\t"
- PMAXW(%%mm0, %%mm3)
+ PMAX(%%mm3, %%mm0)
"movd %%mm3, %%"REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1)
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list