[SCM] libav/experimental: DCA: break out lfe_interpolation_fir() inner loops to a function

Sun Jun 30 17:06:01 UTC 2013

The following commit has been merged in the experimental branch:
commit 309d16a4a0485554645bfb3e5f9d476e793ce731
Author: Måns Rullgård <mans at mansr.com>
Date:   Mon Apr 12 20:45:25 2010 +0000

    DCA: break out lfe_interpolation_fir() inner loops to a function
    
    This enables SIMD optimisations of this function.
    
    Originally committed as revision 22861 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 4fce1bb..68b42b4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -87,7 +87,7 @@ OBJS-$(CONFIG_CLJR_ENCODER)            += cljr.o
 OBJS-$(CONFIG_COOK_DECODER)            += cook.o
 OBJS-$(CONFIG_CSCD_DECODER)            += cscd.o
 OBJS-$(CONFIG_CYUV_DECODER)            += cyuv.o
-OBJS-$(CONFIG_DCA_DECODER)             += dca.o synth_filter.o
+OBJS-$(CONFIG_DCA_DECODER)             += dca.o synth_filter.o dcadsp.o
 OBJS-$(CONFIG_DNXHD_DECODER)           += dnxhddec.o dnxhddata.o
 OBJS-$(CONFIG_DNXHD_ENCODER)           += dnxhdenc.o dnxhddata.o       \
                                           mpegvideo_enc.o motion_est.o \
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index 8db25fd..6ba9f78 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -41,6 +41,7 @@
 #include "dcahuff.h"
 #include "dca.h"
 #include "synth_filter.h"
+#include "dcadsp.h"
 
 //#define TRACE
 
@@ -256,6 +257,7 @@ typedef struct {
     DSPContext dsp;
     FFTContext imdct;
     SynthFilterContext synth;
+    DCADSPContext dcadsp;
 } DCAContext;
 
 static const uint16_t dca_vlc_offs[] = {
@@ -788,7 +790,7 @@ static void qmf_32_subbands(DCAContext * s, int chans,
     }
 }
 
-static void lfe_interpolation_fir(int decimation_select,
+static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
                                   int num_deci_sample, float *samples_in,
                                   float *samples_out, float scale,
                                   float bias)
@@ -801,7 +803,7 @@ static void lfe_interpolation_fir(int decimation_select,
      * samples_out: An array holding interpolated samples
      */
 
-    int decifactor, k, j;
+    int decifactor;
     const float *prCoeff;
     int deciindex;
 
@@ -815,25 +817,10 @@ static void lfe_interpolation_fir(int decimation_select,
     }
     /* Interpolation */
     for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
-        float *samples_out2 = samples_out + decifactor;
-        const float *cf0 = prCoeff;
-        const float *cf1 = prCoeff + 256;
-
-        /* One decimated sample generates 2*decifactor interpolated ones */
-        for (k = 0; k < decifactor; k++) {
-            float v0 = 0.0;
-            float v1 = 0.0;
-            for (j = 0; j < 256 / decifactor; j++) {
-                float s = samples_in[-j];
-                v0 += s * *cf0++;
-                v1 += s * *--cf1;
-            }
-            *samples_out++  = (v0 * scale) + bias;
-            *samples_out2++ = (v1 * scale) + bias;
-        }
-
+        s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
+                          scale, bias);
         samples_in++;
-        samples_out += decifactor;
+        samples_out += 2 * decifactor;
     }
 }
 
@@ -1083,7 +1070,7 @@ static int dca_subsubframe(DCAContext * s)
     if (s->output & DCA_LFE) {
         int lfe_samples = 2 * s->lfe * s->subsubframes;
 
-        lfe_interpolation_fir(s->lfe, 2 * s->lfe,
+        lfe_interpolation_fir(s, s->lfe, 2 * s->lfe,
                               s->lfe_data + lfe_samples +
                               2 * s->lfe * subsubframe,
                               &s->samples[256 * dca_lfe_index[s->amode]],
@@ -1313,6 +1300,7 @@ static av_cold int dca_decode_init(AVCodecContext * avctx)
     dsputil_init(&s->dsp, avctx);
     ff_mdct_init(&s->imdct, 6, 1, 1.0);
     ff_synth_filter_init(&s->synth);
+    ff_dcadsp_init(&s->dcadsp);
 
     for(i = 0; i < 6; i++)
         s->samples_chanptr[i] = s->samples + i * 256;
diff --git a/libavutil/x86/intmath.h b/libavcodec/dcadsp.c
similarity index 51%
copy from libavutil/x86/intmath.h
copy to libavcodec/dcadsp.c
index f3acddc..fcd3b85 100644
--- a/libavutil/x86/intmath.h
+++ b/libavcodec/dcadsp.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright (c) 2004 Gildas Bazin
  * Copyright (c) 2010 Mans Rullgard <mans at mansr.com>
  *
  * This file is part of FFmpeg.
@@ -18,18 +19,31 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-#ifndef AVUTIL_X86_INTMATH_H
-#define AVUTIL_X86_INTMATH_H
+#include "dcadsp.h"
 
-#define FASTDIV(a,b) \
-    ({\
-        int ret, dmy;\
-        __asm__ volatile(\
-            "mull %3"\
-            :"=d"(ret), "=a"(dmy)\
-            :"1"(a), "g"(ff_inverse[b])\
-            );\
-        ret;\
-    })
+static void dca_lfe_fir_c(float *out, const float *in, const float *coefs,
+                          int decifactor, float scale, float bias)
+{
+    float *out2 = out + decifactor;
+    const float *cf0 = coefs;
+    const float *cf1 = coefs + 256;
+    int j, k;
 
-#endif /* AVUTIL_X86_INTMATH_H */
+    /* One decimated sample generates 2*decifactor interpolated ones */
+    for (k = 0; k < decifactor; k++) {
+        float v0 = 0.0;
+        float v1 = 0.0;
+        for (j = 0; j < 256 / decifactor; j++) {
+            float s = in[-j];
+            v0 += s * *cf0++;
+            v1 += s * *--cf1;
+        }
+        *out++  = (v0 * scale) + bias;
+        *out2++ = (v1 * scale) + bias;
+    }
+}
+
+void ff_dcadsp_init(DCADSPContext *s)
+{
+    s->lfe_fir = dca_lfe_fir_c;
+}
diff --git a/libavcodec/aandcttab.h b/libavcodec/dcadsp.h
similarity index 72%
copy from libavcodec/aandcttab.h
copy to libavcodec/dcadsp.h
index ed1c3c3..807fe1c 100644
--- a/libavcodec/aandcttab.h
+++ b/libavcodec/dcadsp.h
@@ -16,17 +16,14 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
-/**
- * @file libavcodec/aandcttab.h
- * AAN (Arai Agui Nakajima) (I)DCT tables
- */
-
-#ifndef AVCODEC_AANDCTTAB_H
-#define AVCODEC_AANDCTTAB_H
+#ifndef AVCODEC_DCADSP_H
+#define AVCODEC_DCADSP_H
 
-#include <stdint.h>
+typedef struct DCADSPContext {
+    void (*lfe_fir)(float *out, const float *in, const float *coefs,
+                    int decifactor, float scale, float bias);
+} DCADSPContext;
 
-extern const uint16_t ff_aanscales[64];
-extern const uint16_t ff_inv_aanscales[64];
+void ff_dcadsp_init(DCADSPContext *s);
 
-#endif /* AVCODEC_AANDCTTAB_H */
+#endif /* AVCODEC_DCADSP_H */

-- 
Libav/FFmpeg packaging