[SCM] libav/experimental: ARM: NEON 2xN chroma MC
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 16:55:20 UTC 2013
The following commit has been merged in the experimental branch:
commit 1025d19dd7b53631c77a66c9057fbf1f417fc769
Author: Måns Rullgård <mans at mansr.com>
Date: Wed Dec 2 00:37:36 2009 +0000
ARM: NEON 2xN chroma MC
Originally committed as revision 20696 to svn://svn.ffmpeg.org/ffmpeg/trunk
diff --git a/libavcodec/arm/dsputil_init_neon.c b/libavcodec/arm/dsputil_init_neon.c
index 177497e..920b2e8 100644
--- a/libavcodec/arm/dsputil_init_neon.c
+++ b/libavcodec/arm/dsputil_init_neon.c
@@ -125,9 +125,11 @@ void ff_avg_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_put_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int);
void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
int beta, int8_t *tc0);
@@ -272,9 +274,11 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
if (CONFIG_H264_DECODER) {
c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+ c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_neon;
c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+ c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_neon;
c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
index 5c54fa3..08ff207 100644
--- a/libavcodec/arm/h264dsp_neon.S
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -320,6 +320,74 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
.endfunc
.endm
+ .macro h264_chroma_mc2 type
+function ff_\type\()_h264_chroma_mc2_neon, export=1
+ push {r4-r6, lr}
+ ldr r4, [sp, #16]
+ ldr lr, [sp, #20]
+ pld [r1]
+ pld [r1, r2]
+ orrs r5, r4, lr
+ beq 2f
+
+ mul r5, r4, lr
+ rsb r6, r5, lr, lsl #3
+ rsb r12, r5, r4, lsl #3
+ sub r4, r5, r4, lsl #3
+ sub r4, r4, lr, lsl #3
+ add r4, r4, #64
+ vdup.8 d0, r4
+ vdup.8 d2, r12
+ vdup.8 d1, r6
+ vdup.8 d3, r5
+ vtrn.16 q0, q1
+1:
+ vld1.32 {d4[0]}, [r1], r2
+ vld1.32 {d4[1]}, [r1], r2
+ vrev64.32 d5, d4
+ vld1.32 {d5[1]}, [r1]
+ vext.8 q3, q2, q2, #1
+ vtrn.16 q2, q3
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d5, d1
+.ifc \type,avg
+ vld1.16 {d18[0]}, [r0,:16], r2
+ vld1.16 {d18[1]}, [r0,:16]
+ sub r0, r0, r2
+.endif
+ vtrn.32 d16, d17
+ vadd.i16 d16, d16, d17
+ vrshrn.u16 d16, q8, #6
+.ifc \type,avg
+ vrhadd.u8 d16, d16, d18
+.endif
+ vst1.16 {d16[0]}, [r0,:16], r2
+ vst1.16 {d16[1]}, [r0,:16], r2
+ subs r3, r3, #2
+ bgt 1b
+ pop {r4-r6, pc}
+2:
+.ifc \type,put
+ ldrh r5, [r1], r2
+ strh r5, [r0], r2
+ ldrh r6, [r1], r2
+ strh r6, [r0], r2
+.else
+ vld1.16 {d16[0]}, [r1], r2
+ vld1.16 {d16[1]}, [r1], r2
+ vld1.16 {d18[0]}, [r0,:16], r2
+ vld1.16 {d18[1]}, [r0,:16]
+ sub r0, r0, r2
+ vrhadd.u8 d16, d16, d18
+ vst1.16 {d16[0]}, [r0,:16], r2
+ vst1.16 {d16[1]}, [r0,:16], r2
+.endif
+ subs r3, r3, #2
+ bgt 2b
+ pop {r4-r6, pc}
+ .endfunc
+.endm
+
.text
.align
@@ -327,6 +395,8 @@ function ff_\type\()_h264_chroma_mc4_neon, export=1
h264_chroma_mc8 avg
h264_chroma_mc4 put
h264_chroma_mc4 avg
+ h264_chroma_mc2 put
+ h264_chroma_mc2 avg
/* H.264 loop filter */
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list