[SCM] libav/experimental: Unroll inner bidir loop in h264_loop_filter_strength_mmx2(), which gets rid of the d_idx variable and therefore allows for future optimizations. No speed difference by this commit itself.
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 17:16:33 UTC 2013
The following commit has been merged in the experimental branch:
commit 2c3135f6d3faf764f5df364db00da1b2d4dcb097
Author: Ronald S. Bultje <rsbultje at gmail.com>
Date: Wed Sep 29 13:35:24 2010 +0000
Unroll inner bidir loop in h264_loop_filter_strength_mmx2(), which gets rid
of the d_idx variable and therefore allows for future optimizations. No speed
difference by this commit itself.
Originally committed as revision 25253 to svn://svn.ffmpeg.org/ffmpeg/trunk
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
index 4df3f12..ed0dbc6 100644
--- a/libavcodec/x86/h264dsp_mmx.c
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -86,7 +86,7 @@ static av_always_inline void h264_loop_filter_strength_iteration_mmx2(int16_t bS
"pshufw $0x4E, %%mm2, %%mm3 \n"
"psubb %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] }
"psubb %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] }
- "1: \n"
+
"por %%mm1, %%mm0 \n"
"movq (%2,%0,4), %%mm1 \n"
"movq 8(%2,%0,4), %%mm2 \n"
@@ -103,10 +103,24 @@ static av_always_inline void h264_loop_filter_strength_iteration_mmx2(int16_t bS
"psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
"psubusb %%mm5, %%mm3 \n"
"packsswb %%mm3, %%mm1 \n"
- "add $40, %0 \n"
- "cmp $40, %0 \n"
- "jl 1b \n"
- "sub $80, %0 \n"
+
+ "por %%mm1, %%mm0 \n"
+ "movq 160(%2,%0,4), %%mm1 \n"
+ "movq 168(%2,%0,4), %%mm2 \n"
+ "movq %%mm1, %%mm3 \n"
+ "movq %%mm2, %%mm4 \n"
+ "psubw (%2), %%mm1 \n"
+ "psubw 8(%2), %%mm2 \n"
+ "psubw 160(%2), %%mm3 \n"
+ "psubw 168(%2), %%mm4 \n"
+ "packsswb %%mm2, %%mm1 \n"
+ "packsswb %%mm4, %%mm3 \n"
+ "paddb %%mm6, %%mm1 \n"
+ "paddb %%mm6, %%mm3 \n"
+ "psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit
+ "psubusb %%mm5, %%mm3 \n"
+ "packsswb %%mm3, %%mm1 \n"
+
"pshufw $0x4E, %%mm1, %%mm1 \n"
"por %%mm1, %%mm0 \n"
"pshufw $0x4E, %%mm0, %%mm1 \n"
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list