[SCM] libav/experimental: Convert deinterlacing MMX code to YASM

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sun Jun 30 17:13:08 UTC 2013


The following commit has been merged in the experimental branch:
commit de4bc44abb31d79bf2576dbcdb76606d5f7e971d
Author: Vitor Sessak <vitor1001 at gmail.com>
Date:   Sat Jul 31 14:50:51 2010 +0000

    Convert deinterlacing MMX code to YASM
    
    Originally committed as revision 24615 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index d0fc1ce..1fb8f7e 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -39,7 +39,6 @@
 #include "libavcore/imgutils.h"
 
 #if HAVE_MMX
-#include "x86/mmx.h"
 #include "x86/dsputil_mmx.h"
 #endif
 
@@ -55,6 +54,14 @@
 #define FF_PIXEL_PACKED   1 /**< only one components containing all the channels */
 #define FF_PIXEL_PALETTE  2  /**< one components containing indexes for a palette */
 
+#if HAVE_MMX
+#define deinterlace_line_inplace ff_deinterlace_line_inplace_mmx
+#define deinterlace_line         ff_deinterlace_line_mmx
+#else
+#define deinterlace_line_inplace deinterlace_line_inplace_c
+#define deinterlace_line         deinterlace_line_c
+#endif
+
 typedef struct PixFmtInfo {
     uint8_t nb_channels;     /**< number of channels (including alpha) */
     uint8_t color_type;      /**< color type (see FF_COLOR_xxx constants) */
@@ -1119,61 +1126,14 @@ int img_get_alpha_info(const AVPicture *src,
     return ret;
 }
 
-#if HAVE_MMX
-#define DEINT_INPLACE_LINE_LUM \
-                    movd_m2r(lum_m4[0],mm0);\
-                    movd_m2r(lum_m3[0],mm1);\
-                    movd_m2r(lum_m2[0],mm2);\
-                    movd_m2r(lum_m1[0],mm3);\
-                    movd_m2r(lum[0],mm4);\
-                    punpcklbw_r2r(mm7,mm0);\
-                    movd_r2m(mm2,lum_m4[0]);\
-                    punpcklbw_r2r(mm7,mm1);\
-                    punpcklbw_r2r(mm7,mm2);\
-                    punpcklbw_r2r(mm7,mm3);\
-                    punpcklbw_r2r(mm7,mm4);\
-                    paddw_r2r(mm3,mm1);\
-                    psllw_i2r(1,mm2);\
-                    paddw_r2r(mm4,mm0);\
-                    psllw_i2r(2,mm1);\
-                    paddw_r2r(mm6,mm2);\
-                    paddw_r2r(mm2,mm1);\
-                    psubusw_r2r(mm0,mm1);\
-                    psrlw_i2r(3,mm1);\
-                    packuswb_r2r(mm7,mm1);\
-                    movd_r2m(mm1,lum_m2[0]);
-
-#define DEINT_LINE_LUM \
-                    movd_m2r(lum_m4[0],mm0);\
-                    movd_m2r(lum_m3[0],mm1);\
-                    movd_m2r(lum_m2[0],mm2);\
-                    movd_m2r(lum_m1[0],mm3);\
-                    movd_m2r(lum[0],mm4);\
-                    punpcklbw_r2r(mm7,mm0);\
-                    punpcklbw_r2r(mm7,mm1);\
-                    punpcklbw_r2r(mm7,mm2);\
-                    punpcklbw_r2r(mm7,mm3);\
-                    punpcklbw_r2r(mm7,mm4);\
-                    paddw_r2r(mm3,mm1);\
-                    psllw_i2r(1,mm2);\
-                    paddw_r2r(mm4,mm0);\
-                    psllw_i2r(2,mm1);\
-                    paddw_r2r(mm6,mm2);\
-                    paddw_r2r(mm2,mm1);\
-                    psubusw_r2r(mm0,mm1);\
-                    psrlw_i2r(3,mm1);\
-                    packuswb_r2r(mm7,mm1);\
-                    movd_r2m(mm1,dst[0]);
-#endif
-
+#if !HAVE_MMX
 /* filter parameters: [-1 4 2 4 -1] // 8 */
-static void deinterlace_line(uint8_t *dst,
+static void deinterlace_line_c(uint8_t *dst,
                              const uint8_t *lum_m4, const uint8_t *lum_m3,
                              const uint8_t *lum_m2, const uint8_t *lum_m1,
                              const uint8_t *lum,
                              int size)
 {
-#if !HAVE_MMX
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int sum;
 
@@ -1191,27 +1151,12 @@ static void deinterlace_line(uint8_t *dst,
         lum++;
         dst++;
     }
-#else
-
-    {
-        pxor_r2r(mm7,mm7);
-        movq_m2r(ff_pw_4,mm6);
-    }
-    for (;size > 3; size-=4) {
-        DEINT_LINE_LUM
-        lum_m4+=4;
-        lum_m3+=4;
-        lum_m2+=4;
-        lum_m1+=4;
-        lum+=4;
-        dst+=4;
-    }
-#endif
 }
-static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *lum_m2, uint8_t *lum_m1, uint8_t *lum,
-                             int size)
+
+static void deinterlace_line_inplace_c(uint8_t *lum_m4, uint8_t *lum_m3,
+                                       uint8_t *lum_m2, uint8_t *lum_m1,
+                                       uint8_t *lum, int size)
 {
-#if !HAVE_MMX
     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
     int sum;
 
@@ -1229,22 +1174,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
         lum_m1++;
         lum++;
     }
-#else
-
-    {
-        pxor_r2r(mm7,mm7);
-        movq_m2r(ff_pw_4,mm6);
-    }
-    for (;size > 3; size-=4) {
-        DEINT_INPLACE_LINE_LUM
-        lum_m4+=4;
-        lum_m3+=4;
-        lum_m2+=4;
-        lum_m1+=4;
-        lum+=4;
-    }
-#endif
 }
+#endif
 
 /* deinterlacing : 2 temporal taps, 3 spatial taps linear filter. The
    top field is copied as is, but the bottom field is deinterlaced
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index 3f19f06..ea53e33 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -35,6 +35,7 @@ MMX-OBJS-$(CONFIG_VP6_DECODER)         += x86/vp3dsp_mmx.o              \
 YASM-OBJS-$(CONFIG_VP8_DECODER)        += x86/vp8dsp.o
 MMX-OBJS-$(CONFIG_VP8_DECODER)         += x86/vp8dsp-init.o
 MMX-OBJS-$(HAVE_YASM)                  += x86/dsputil_yasm.o            \
+                                          x86/deinterlace.o             \
                                           $(YASM-OBJS-yes)
 
 MMX-OBJS-$(CONFIG_FFT)                 += x86/fft.o
diff --git a/libavcodec/x86/deinterlace.asm b/libavcodec/x86/deinterlace.asm
new file mode 100644
index 0000000..5db9464
--- /dev/null
+++ b/libavcodec/x86/deinterlace.asm
@@ -0,0 +1,81 @@
+;******************************************************************************
+;* MMX optimized deinterlacing functions
+;* Copyright (c) 2010 Vitor Sessak
+;* Copyright (c) 2002 Michael Niedermayer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+
+cextern pw_4
+
+%macro DEINTERLACE 1
+%ifidn %1, inplace
+;void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum,  int size)
+cglobal deinterlace_line_inplace_mmx, 6,6,7,      lum_m4, lum_m3, lum_m2, lum_m1, lum, size
+%else
+;void ff_deinterlace_line_mmx(uint8_t *dst, const uint8_t *lum_m4, const uint8_t *lum_m3, const uint8_t *lum_m2, const uint8_t *lum_m1, const uint8_t *lum,  int size)
+cglobal deinterlace_line_mmx,         7,7,7, dst, lum_m4, lum_m3, lum_m2, lum_m1, lum, size
+%endif
+    pxor  mm7, mm7
+    movq  mm6, [pw_4]
+.nextrow
+    movd  mm0, [lum_m4q]
+    movd  mm1, [lum_m3q]
+    movd  mm2, [lum_m2q]
+%ifidn %1, inplace
+    movd [lum_m4q], mm2
+%endif
+    movd  mm3, [lum_m1q]
+    movd  mm4, [lumq]
+    punpcklbw mm0, mm7
+    punpcklbw mm1, mm7
+    punpcklbw mm2, mm7
+    punpcklbw mm3, mm7
+    punpcklbw mm4, mm7
+    paddw     mm1, mm3
+    psllw     mm2, 1
+    paddw     mm0, mm4
+    psllw     mm1, 2
+    paddw     mm2, mm6
+    paddw     mm1, mm2
+    psubusw   mm1, mm0
+    psrlw     mm1, 3
+    packuswb  mm1, mm7
+%ifidn %1, inplace
+    movd [lum_m2q], mm1
+%else
+    movd   [dstq], mm1
+    add       dstq, 4
+%endif
+    add    lum_m4q, 4
+    add    lum_m3q, 4
+    add    lum_m2q, 4
+    add    lum_m1q, 4
+    add       lumq, 4
+    sub      sized, 4
+    jg .nextrow
+    REP_RET
+%endmacro
+
+DEINTERLACE ""
+
+DEINTERLACE inplace
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
index 13067df..5de1c90 100644
--- a/libavcodec/x86/dsputil_mmx.h
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -179,4 +179,17 @@ void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
 
+
+void ff_deinterlace_line_mmx(uint8_t *dst,
+                             const uint8_t *lum_m4, const uint8_t *lum_m3,
+                             const uint8_t *lum_m2, const uint8_t *lum_m1,
+                             const uint8_t *lum,
+                             int size);
+
+void ff_deinterlace_line_inplace_mmx(const uint8_t *lum_m4,
+                                     const uint8_t *lum_m3,
+                                     const uint8_t *lum_m2,
+                                     const uint8_t *lum_m1,
+                                     const uint8_t *lum, int size);
+
 #endif /* AVCODEC_X86_DSPUTIL_MMX_H */

-- 
Libav/FFmpeg packaging



More information about the pkg-multimedia-commits mailing list