[SCM] libav/experimental: Port pred8x8l_dc_mmx/ssse3 (H.264 intra prediction) from x264 to FFmpeg. Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser <darkshikari gmail com> (approves LGPL relicensing for this code) and Loren Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing for this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as part of Google's GCI 2010.

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sun Jun 30 17:21:03 UTC 2013


The following commit has been merged in the experimental branch:
commit abab14eac052edbde798ecd58d98e0d91eabb698
Author: Daniel Kang <daniel.d.kang at gmail.com>
Date:   Wed Dec 29 18:33:10 2010 +0000

    Port pred8x8l_dc_mmx/ssse3 (H.264 intra prediction) from x264 to FFmpeg.
    Original authors: Holger Lubitz <holger lubitz org>, Jason Garrett-Glaser
    <darkshikari gmail com> (approves LGPL relicensing for this code) and Loren
    Merritt <lorenm at u dot washington dot edu> (approves LGPL relicensing for
    this code). Patch by Daniel Kang <daniel dot d dot kang at gmail com>, as
    part of Google's GCI 2010.
    
    Originally committed as revision 26138 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index 5b8b4b4..bb7cf73 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -43,6 +43,7 @@ cextern pb_1
 cextern pb_3
 cextern pw_4
 cextern pw_5
+cextern pw_8
 cextern pw_16
 cextern pw_17
 cextern pw_32
@@ -1139,6 +1140,110 @@ PRED8x8L_TOP_DC ssse3
 %endif
 
 ;-----------------------------------------------------------------------------
+;void pred8x8l_dc(uint8_t *src, int has_topleft, int has_topright, int stride)
+;-----------------------------------------------------------------------------
+%ifdef CONFIG_GPL
+%macro PRED8x8L_DC 1
+cglobal pred8x8l_dc_%1, 4,5
+    sub          r0, r3
+    lea          r4, [r0+r3*2]
+    movq        mm0, [r0+r3*1-8]
+    punpckhbw   mm0, [r0+r3*0-8]
+    movq        mm1, [r4+r3*1-8]
+    punpckhbw   mm1, [r0+r3*2-8]
+    mov          r4, r0
+    punpckhwd   mm1, mm0
+    lea          r0, [r0+r3*4]
+    movq        mm2, [r0+r3*1-8]
+    punpckhbw   mm2, [r0+r3*0-8]
+    lea          r0, [r0+r3*2]
+    movq        mm3, [r0+r3*1-8]
+    punpckhbw   mm3, [r0+r3*0-8]
+    punpckhwd   mm3, mm2
+    punpckhdq   mm3, mm1
+    lea          r0, [r0+r3*2]
+    movq        mm0, [r0+r3*0-8]
+    movq        mm1, [r4]
+    mov          r0, r4
+    movq        mm4, mm3
+    movq        mm2, mm3
+    PALIGNR     mm4, mm0, 7, mm0
+    PALIGNR     mm1, mm2, 1, mm2
+    test        r1, r1
+    jnz .do_left
+.fix_lt_1:
+    movq        mm5, mm3
+    pxor        mm5, mm4
+    psrlq       mm5, 56
+    psllq       mm5, 48
+    pxor        mm1, mm5
+    jmp .do_left
+.fix_lt_2:
+    movq        mm5, mm3
+    pxor        mm5, mm2
+    psllq       mm5, 56
+    psrlq       mm5, 56
+    pxor        mm2, mm5
+    test         r2, r2
+    jnz .body
+.fix_tr_1:
+    movq        mm5, mm3
+    pxor        mm5, mm1
+    psrlq       mm5, 56
+    psllq       mm5, 56
+    pxor        mm1, mm5
+    jmp .body
+.do_left:
+    movq        mm0, mm4
+    PRED4x4_LOWPASS mm2, mm1, mm4, mm3, mm5
+    movq        mm4, mm0
+    movq        mm7, mm2
+    PRED4x4_LOWPASS mm1, mm3, mm0, mm4, mm5
+    psllq       mm1, 56
+    PALIGNR     mm7, mm1, 7, mm3
+    movq        mm0, [r0-8]
+    movq        mm3, [r0]
+    movq        mm1, [r0+8]
+    movq        mm2, mm3
+    movq        mm4, mm3
+    PALIGNR     mm2, mm0, 7, mm0
+    PALIGNR     mm1, mm4, 1, mm4
+    test         r1, r1
+    jz .fix_lt_2
+    test         r2, r2
+    jz .fix_tr_1
+.body
+    lea          r1, [r0+r3*2]
+    PRED4x4_LOWPASS mm6, mm2, mm1, mm3, mm5
+    pxor        mm0, mm0
+    pxor        mm1, mm1
+    lea          r2, [r1+r3*2]
+    psadbw      mm0, mm7
+    psadbw      mm1, mm6
+    paddw       mm0, [pw_8]
+    paddw       mm0, mm1
+    lea          r4, [r2+r3*2]
+    psrlw       mm0, 4
+    pshufw      mm0, mm0, 0
+    packuswb    mm0, mm0
+    movq [r0+r3*1], mm0
+    movq [r0+r3*2], mm0
+    movq [r1+r3*1], mm0
+    movq [r1+r3*2], mm0
+    movq [r2+r3*1], mm0
+    movq [r2+r3*2], mm0
+    movq [r4+r3*1], mm0
+    movq [r4+r3*2], mm0
+    RET
+%endmacro
+INIT_MMX
+%define PALIGNR PALIGNR_MMX
+PRED8x8L_DC mmxext
+%define PALIGNR PALIGNR_SSSE3
+PRED8x8L_DC ssse3
+%endif
+
+;-----------------------------------------------------------------------------
 ; void pred4x4_dc_mmxext(uint8_t *src, const uint8_t *topright, int stride)
 ;-----------------------------------------------------------------------------
 
diff --git a/libavcodec/x86/h264_intrapred_init.c b/libavcodec/x86/h264_intrapred_init.c
index 2a0e3b1..7f4834a 100644
--- a/libavcodec/x86/h264_intrapred_init.c
+++ b/libavcodec/x86/h264_intrapred_init.c
@@ -61,6 +61,8 @@ void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
 void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
 void ff_pred8x8l_top_dc_mmxext     (uint8_t *src, int has_topleft, int has_topright, int stride);
 void ff_pred8x8l_top_dc_ssse3      (uint8_t *src, int has_topleft, int has_topright, int stride);
+void ff_pred8x8l_dc_mmxext         (uint8_t *src, int has_topleft, int has_topright, int stride);
+void ff_pred8x8l_dc_ssse3          (uint8_t *src, int has_topleft, int has_topright, int stride);
 void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_down_left_mmxext   (uint8_t *src, const uint8_t *topright, int stride);
 void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
@@ -100,6 +102,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
 #if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext;
+        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_mmxext;
 #endif
         h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
 #if CONFIG_GPL
@@ -155,6 +158,7 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
 #if CONFIG_GPL
         h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3;
+        h->pred8x8l [DC_PRED     ] = ff_pred8x8l_dc_ssse3;
 #endif
         if (codec_id == CODEC_ID_VP8) {
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;

-- 
Libav/FFmpeg packaging



More information about the pkg-multimedia-commits mailing list