[SCM] libav/experimental: dont use C-asm loops and unroll once float_to_int16_3dnow() 30% faster

Sun Jun 30 16:24:51 UTC 2013

The following commit has been merged in the experimental branch:
commit 63b737d4f9c118853a4f8d9af641335629bdf3ab
Author: Michael Niedermayer <michaelni at gmx.at>
Date:   Mon Jul 7 20:46:03 2008 +0000

    dont use C-asm loops and unroll once float_to_int16_3dnow()
    30% faster
    
    Originally committed as revision 14102 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
index 60511a3..8925ffa 100644
--- a/libavcodec/i386/dsputil_mmx.c
+++ b/libavcodec/i386/dsputil_mmx.c
@@ -2024,18 +2024,25 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *
 
 static void float_to_int16_3dnow(int16_t *dst, const float *src, int len){
     // not bit-exact: pf2id uses different rounding than C and SSE
-    int i;
-    for(i=0; i<len; i+=4) {
-        asm volatile(
-            "pf2id       %1, %%mm0 \n\t"
-            "pf2id       %2, %%mm1 \n\t"
-            "packssdw %%mm1, %%mm0 \n\t"
-            "movq     %%mm0, %0    \n\t"
-            :"=m"(dst[i])
-            :"m"(src[i]), "m"(src[i+2])
-        );
-    }
-    asm volatile("femms");
+    asm volatile(
+        "add        %0          , %0        \n\t"
+        "lea         (%2,%0,2)  , %2        \n\t"
+        "add        %0          , %1        \n\t"
+        "neg        %0                      \n\t"
+        "1:                                 \n\t"
+        "pf2id       (%2,%0,2)  , %%mm0     \n\t"
+        "pf2id      8(%2,%0,2)  , %%mm1     \n\t"
+        "pf2id     16(%2,%0,2)  , %%mm2     \n\t"
+        "pf2id     24(%2,%0,2)  , %%mm3     \n\t"
+        "packssdw   %%mm1       , %%mm0     \n\t"
+        "packssdw   %%mm3       , %%mm2     \n\t"
+        "movq       %%mm0       ,  (%1,%0)  \n\t"
+        "movq       %%mm2       , 8(%1,%0)  \n\t"
+        "add        $16         , %0        \n\t"
+        " js 1b                             \n\t"
+        "femms                              \n\t"
+        :"+r"(len), "+r"(dst), "+r"(src)
+    );
 }
 static void float_to_int16_sse(int16_t *dst, const float *src, int len){
     int i;

-- 
Libav/FFmpeg packaging