[SCM] libav/experimental: Optimize ff_snow_horizontal_compose97i. this makes the 9/7 C wavelet at the decoder side 22% faster. The old code is changed to match the new in terms of the order of operations (which also makes it sligtly faster)

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sun Jun 30 16:57:17 UTC 2013


The following commit has been merged in the experimental branch:
commit 2c8077621b6466da205ba26fd20a9c906bb71893
Author: Michael Niedermayer <michaelni at gmx.at>
Date:   Mon Jan 11 02:52:50 2010 +0000

    Optimize ff_snow_horizontal_compose97i.
    this makes the 9/7 C wavelet at the decoder side 22% faster.
    The old code is changed to match the new in terms of the order of operations
    (which also makes it sligtly faster)
    
    Originally committed as revision 21132 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index a9aa2eb..53c7deb 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1120,10 +1120,36 @@ void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
     IDWTELEM temp[width];
     const int w2= (width+1)>>1;
 
-    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
-    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
-    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
-    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
+#if 0 //maybe more understadable but slower
+    inv_lift (temp   , b      , b   +w2, 2, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
+    inv_lift (temp+1 , b   +w2, temp   , 2, 1, 2, width,  W_CM, W_CO, W_CS, 1, 1);
+
+    inv_liftS(b      , temp   , temp+1 , 2, 2, 2, width,  W_BM, W_BO, W_BS, 0, 1);
+    inv_lift (b+1    , temp+1 , b      , 2, 2, 2, width,  W_AM, W_AO, W_AS, 1, 0);
+#else
+    int x;
+    temp[0] = b[0] - ((3*b[w2]+2)>>2);
+    for(x=1; x<(width>>1); x++){
+        temp[2*x  ] = b[x     ] - ((3*(b   [x+w2-1] + b[x+w2])+4)>>3);
+        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+    }
+    if(width&1){
+        temp[2*x  ] = b[x     ] - ((3*b   [x+w2-1]+2)>>2);
+        temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+    }else
+        temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
+
+    b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
+    for(x=2; x<width-1; x+=2){
+        b[x  ] = temp[x  ] + ((4*temp[x  ] + temp[x-1] + temp[x+1]+8)>>4);
+        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
+    }
+    if(width&1){
+        b[x  ] = temp[x  ] + ((2*temp[x  ] + temp[x-1]+4)>>3);
+        b[x-1] = temp[x-1] + ((3*(b  [x-2] + b   [x  ] ))>>1);
+    }else
+        b[x-1] = temp[x-1] + 3*b [x-2];
+#endif
 }
 
 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){

-- 
Libav/FFmpeg packaging



More information about the pkg-multimedia-commits mailing list