[SCM] libav/experimental: Optimize ff_snow_horizontal_compose97i. this makes the 9/7 C wavelet at the decoder side 22% faster. The old code is changed to match the new in terms of the order of operations (which also makes it sligtly faster)
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 16:57:17 UTC 2013
The following commit has been merged in the experimental branch:
commit 2c8077621b6466da205ba26fd20a9c906bb71893
Author: Michael Niedermayer <michaelni at gmx.at>
Date: Mon Jan 11 02:52:50 2010 +0000
Optimize ff_snow_horizontal_compose97i.
this makes the 9/7 C wavelet at the decoder side 22% faster.
The old code is changed to match the new in terms of the order of operations
(which also makes it sligtly faster)
Originally committed as revision 21132 to svn://svn.ffmpeg.org/ffmpeg/trunk
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index a9aa2eb..53c7deb 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -1120,10 +1120,36 @@ void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
IDWTELEM temp[width];
const int w2= (width+1)>>1;
- inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
- inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
- inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
- inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
+#if 0 //maybe more understadable but slower
+ inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
+ inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
+
+ inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
+ inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
+#else
+ int x;
+ temp[0] = b[0] - ((3*b[w2]+2)>>2);
+ for(x=1; x<(width>>1); x++){
+ temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
+ temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+ }
+ if(width&1){
+ temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
+ temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
+ }else
+ temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
+
+ b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
+ for(x=2; x<width-1; x+=2){
+ b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
+ b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
+ }
+ if(width&1){
+ b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
+ b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
+ }else
+ b[x-1] = temp[x-1] + 3*b [x-2];
+#endif
}
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list