[SCM] libav/experimental: dsputil: Move SVQ1 encoding specific bits into svq1enc
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Aug 10 16:03:19 UTC 2014
The following commit has been merged in the experimental branch:
commit 65d5d5865845f057cc6530a8d0f34db952d9009c
Author: Diego Biurrun <diego at biurrun.de>
Date: Mon Dec 23 19:48:43 2013 +0100
dsputil: Move SVQ1 encoding specific bits into svq1enc
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index ab0206b..e26d27a 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -2099,16 +2099,6 @@ static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
return score;
}
-static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
- int size)
-{
- int score = 0, i;
-
- for (i = 0; i < size; i++)
- score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
- return score;
-}
-
#define WRAPPER8_16_SQ(name8, name16) \
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
int stride, int h) \
@@ -2430,8 +2420,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
c->nsse[0] = nsse16_c;
c->nsse[1] = nsse8_c;
- c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
-
c->bswap_buf = bswap_buf;
c->bswap16_buf = bswap16_buf;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 2cfbd55..a4a9f87 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -175,9 +175,6 @@ typedef struct DSPContext {
me_cmp_func ildct_cmp[6]; // only width 16 used
me_cmp_func frame_skip_cmp[6]; // only width 8 used
- int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
- int size);
-
qpel_mc_func put_qpel_pixels_tab[2][16];
qpel_mc_func avg_qpel_pixels_tab[2][16];
qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
diff --git a/libavcodec/ppc/Makefile b/libavcodec/ppc/Makefile
index 0780666..ec0674c 100644
--- a/libavcodec/ppc/Makefile
+++ b/libavcodec/ppc/Makefile
@@ -12,6 +12,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o
OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o
+OBJS-$(CONFIG_SVQ1_ENCODER) += ppc/svq1enc_altivec.o
OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VP7_DECODER) += ppc/vp8dsp_altivec.o
diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/int_altivec.c
index cd1984a..fa3cb66 100644
--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/int_altivec.c
@@ -34,48 +34,6 @@
#include "libavcodec/dsputil.h"
#include "dsputil_altivec.h"
-static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
- int size)
-{
- int i, size16 = size >> 4;
- vector signed char vpix1;
- vector signed short vpix2, vdiff, vpix1l, vpix1h;
- union {
- vector signed int vscore;
- int32_t score[4];
- } u = { .vscore = vec_splat_s32(0) };
-
-// XXX lazy way, fix it later
-
- while (size16) {
- // score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
- // load pix1 and the first batch of pix2
-
- vpix1 = vec_unaligned_load(pix1);
- vpix2 = vec_unaligned_load(pix2);
- pix2 += 8;
- // unpack
- vpix1h = vec_unpackh(vpix1);
- vdiff = vec_sub(vpix1h, vpix2);
- vpix1l = vec_unpackl(vpix1);
- // load another batch from pix2
- vpix2 = vec_unaligned_load(pix2);
- u.vscore = vec_msum(vdiff, vdiff, u.vscore);
- vdiff = vec_sub(vpix1l, vpix2);
- u.vscore = vec_msum(vdiff, vdiff, u.vscore);
- pix1 += 16;
- pix2 += 8;
- size16--;
- }
- u.vscore = vec_sums(u.vscore, vec_splat_s32(0));
-
- size %= 16;
- for (i = 0; i < size; i++)
- u.score[3] += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
-
- return u.score[3];
-}
-
static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
int order)
{
@@ -140,8 +98,6 @@ static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
{
- c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
-
c->scalarproduct_int16 = scalarproduct_int16_altivec;
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/svq1enc_altivec.c
similarity index 50%
copy from libavcodec/ppc/int_altivec.c
copy to libavcodec/ppc/svq1enc_altivec.c
index cd1984a..005239f 100644
--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/svq1enc_altivec.c
@@ -18,10 +18,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/**
- * @file
- * miscellaneous integer operations
- */
+#include <stdint.h>
#include "config.h"
#if HAVE_ALTIVEC_H
@@ -31,9 +28,9 @@
#include "libavutil/attributes.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_altivec.h"
+#include "libavcodec/svq1enc.h"
+#if HAVE_ALTIVEC
static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
int size)
{
@@ -45,8 +42,6 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
int32_t score[4];
} u = { .vscore = vec_splat_s32(0) };
-// XXX lazy way, fix it later
-
while (size16) {
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
@@ -75,74 +70,11 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
return u.score[3];
}
+#endif /* HAVE_ALTIVEC */
-static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
- int order)
-{
- int i;
- LOAD_ZERO;
- register vec_s16 vec1;
- register vec_s32 res = vec_splat_s32(0), t;
- int32_t ires;
-
- for (i = 0; i < order; i += 8) {
- vec1 = vec_unaligned_load(v1);
- t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
- res = vec_sums(t, res);
- v1 += 8;
- v2 += 8;
- }
- res = vec_splat(res, 3);
- vec_ste(res, 0, &ires);
-
- return ires;
-}
-
-static int32_t scalarproduct_and_madd_int16_altivec(int16_t *v1,
- const int16_t *v2,
- const int16_t *v3,
- int order, int mul)
-{
- LOAD_ZERO;
- vec_s16 *pv1 = (vec_s16 *) v1;
- register vec_s16 muls = { mul, mul, mul, mul, mul, mul, mul, mul };
- register vec_s16 t0, t1, i0, i1, i4;
- register vec_s16 i2 = vec_ld(0, v2), i3 = vec_ld(0, v3);
- register vec_s32 res = zero_s32v;
- register vec_u8 align = vec_lvsl(0, v2);
- int32_t ires;
-
- order >>= 4;
- do {
- i1 = vec_ld(16, v2);
- t0 = vec_perm(i2, i1, align);
- i2 = vec_ld(32, v2);
- t1 = vec_perm(i1, i2, align);
- i0 = pv1[0];
- i1 = pv1[1];
- res = vec_msum(t0, i0, res);
- res = vec_msum(t1, i1, res);
- i4 = vec_ld(16, v3);
- t0 = vec_perm(i3, i4, align);
- i3 = vec_ld(32, v3);
- t1 = vec_perm(i4, i3, align);
- pv1[0] = vec_mladd(t0, muls, i0);
- pv1[1] = vec_mladd(t1, muls, i1);
- pv1 += 2;
- v2 += 16;
- v3 += 16;
- } while (--order);
- res = vec_splat(vec_sums(res, zero_s32v), 3);
- vec_ste(res, 0, &ires);
-
- return ires;
-}
-
-av_cold void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx)
+av_cold void ff_svq1enc_init_ppc(SVQ1EncContext *c)
{
+#if HAVE_ALTIVEC
c->ssd_int8_vs_int16 = ssd_int8_vs_int16_altivec;
-
- c->scalarproduct_int16 = scalarproduct_int16_altivec;
-
- c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_altivec;
+#endif /* HAVE_ALTIVEC */
}
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 76c3e6e..bdb6f0f 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -34,49 +34,12 @@
#include "internal.h"
#include "mpegutils.h"
#include "svq1.h"
+#include "svq1enc.h"
#include "svq1enc_cb.h"
#undef NDEBUG
#include <assert.h>
-typedef struct SVQ1EncContext {
- /* FIXME: Needed for motion estimation, should not be used for anything
- * else, the idea is to make the motion estimation eventually independent
- * of MpegEncContext, so this will be removed then. */
- MpegEncContext m;
- AVCodecContext *avctx;
- DSPContext dsp;
- HpelDSPContext hdsp;
- AVFrame *current_picture;
- AVFrame *last_picture;
- PutBitContext pb;
- GetBitContext gb;
-
- /* why ooh why this sick breadth first order,
- * everything is slower and more complex */
- PutBitContext reorder_pb[6];
-
- int frame_width;
- int frame_height;
-
- /* Y plane block dimensions */
- int y_block_width;
- int y_block_height;
-
- /* U & V plane (C planes) block dimensions */
- int c_block_width;
- int c_block_height;
-
- uint16_t *mb_type;
- uint32_t *dummy;
- int16_t (*motion_val8[3])[2];
- int16_t (*motion_val16[3])[2];
-
- int64_t rd_total;
-
- uint8_t *scratchbuf;
-} SVQ1EncContext;
-
static void svq1_write_header(SVQ1EncContext *s, int frame_type)
{
int i;
@@ -114,6 +77,16 @@ static void svq1_write_header(SVQ1EncContext *s, int frame_type)
#define QUALITY_THRESHOLD 100
#define THRESHOLD_MULTIPLIER 0.6
+static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
+ int size)
+{
+ int score = 0, i;
+
+ for (i = 0; i < size; i++)
+ score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
+ return score;
+}
+
static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
uint8_t *decoded, int stride, int level,
int threshold, int lambda, int intra)
@@ -175,7 +148,7 @@ static int encode_block(SVQ1EncContext *s, uint8_t *src, uint8_t *ref,
int sqr, diff, score;
vector = codebook + stage * size * 16 + i * size;
- sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
+ sqr = s->ssd_int8_vs_int16(vector, block[stage], size);
diff = block_sum[stage] - sum;
score = sqr - (diff * (int64_t)diff >> (level + 3)); // FIXME: 64bit slooow
if (score < best_vector_score) {
@@ -574,6 +547,13 @@ static av_cold int svq1_encode_init(AVCodecContext *avctx)
s->y_block_height * sizeof(int16_t));
s->dummy = av_mallocz((s->y_block_width + 1) *
s->y_block_height * sizeof(int32_t));
+ s->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
+
+ if (ARCH_PPC)
+ ff_svq1enc_init_ppc(s);
+ if (ARCH_X86)
+ ff_svq1enc_init_x86(s);
+
ff_h263_encode_init(&s->m); // mv_penalty
return 0;
diff --git a/libavcodec/svq1enc.h b/libavcodec/svq1enc.h
new file mode 100644
index 0000000..1fe2815
--- /dev/null
+++ b/libavcodec/svq1enc.h
@@ -0,0 +1,78 @@
+/*
+ * SVQ1 encoder
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SVQ1ENC_H
+#define AVCODEC_SVQ1ENC_H
+
+#include <stdint.h>
+
+#include "libavutil/frame.h"
+#include "avcodec.h"
+#include "dsputil.h"
+#include "get_bits.h"
+#include "hpeldsp.h"
+#include "mpegvideo.h"
+#include "put_bits.h"
+
+typedef struct SVQ1EncContext {
+ /* FIXME: Needed for motion estimation, should not be used for anything
+ * else, the idea is to make the motion estimation eventually independent
+ * of MpegEncContext, so this will be removed then. */
+ MpegEncContext m;
+ AVCodecContext *avctx;
+ DSPContext dsp;
+ HpelDSPContext hdsp;
+ AVFrame *current_picture;
+ AVFrame *last_picture;
+ PutBitContext pb;
+ GetBitContext gb;
+
+ /* why ooh why this sick breadth first order,
+ * everything is slower and more complex */
+ PutBitContext reorder_pb[6];
+
+ int frame_width;
+ int frame_height;
+
+ /* Y plane block dimensions */
+ int y_block_width;
+ int y_block_height;
+
+ /* U & V plane (C planes) block dimensions */
+ int c_block_width;
+ int c_block_height;
+
+ uint16_t *mb_type;
+ uint32_t *dummy;
+ int16_t (*motion_val8[3])[2];
+ int16_t (*motion_val16[3])[2];
+
+ int64_t rd_total;
+
+ uint8_t *scratchbuf;
+
+ int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
+ int size);
+} SVQ1EncContext;
+
+void ff_svq1enc_init_ppc(SVQ1EncContext *c);
+void ff_svq1enc_init_x86(SVQ1EncContext *c);
+
+#endif /* AVCODEC_SVQ1ENC_H */
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index fef98a5..8830a22 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -51,6 +51,7 @@ MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \
x86/hpeldsp_mmx.o
MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o
+MMX-OBJS-$(CONFIG_SVQ1_ENCODER) += x86/svq1enc_mmx.o
MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
YASM-OBJS += x86/deinterlace.o \
diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
index 79066a7..81c9d13 100644
--- a/libavcodec/x86/dsputilenc_mmx.c
+++ b/libavcodec/x86/dsputilenc_mmx.c
@@ -805,40 +805,6 @@ DCT_SAD_FUNC(ssse3)
#undef HSUM
#undef DCT_SAD
-static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
- int size)
-{
- int sum;
- x86_reg i = size;
-
- __asm__ volatile (
- "pxor %%mm4, %%mm4 \n"
- "1: \n"
- "sub $8, %0 \n"
- "movq (%2, %0), %%mm2 \n"
- "movq (%3, %0, 2), %%mm0 \n"
- "movq 8(%3, %0, 2), %%mm1 \n"
- "punpckhbw %%mm2, %%mm3 \n"
- "punpcklbw %%mm2, %%mm2 \n"
- "psraw $8, %%mm3 \n"
- "psraw $8, %%mm2 \n"
- "psubw %%mm3, %%mm1 \n"
- "psubw %%mm2, %%mm0 \n"
- "pmaddwd %%mm1, %%mm1 \n"
- "pmaddwd %%mm0, %%mm0 \n"
- "paddd %%mm1, %%mm4 \n"
- "paddd %%mm0, %%mm4 \n"
- "jg 1b \n"
- "movq %%mm4, %%mm3 \n"
- "psrlq $32, %%mm3 \n"
- "paddd %%mm3, %%mm4 \n"
- "movd %%mm4, %1 \n"
- : "+r" (i), "=r" (sum)
- : "r" (pix1), "r" (pix2));
-
- return sum;
-}
-
#define PHADDD(a, t) \
"movq " #a ", " #t " \n\t" \
"psrlq $32, " #a " \n\t" \
@@ -958,8 +924,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx,
c->try_8x8basis = try_8x8basis_mmx;
}
c->add_8x8basis = add_8x8basis_mmx;
-
- c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
}
if (INLINE_AMD3DNOW(cpu_flags)) {
diff --git a/libavcodec/x86/svq1enc_mmx.c b/libavcodec/x86/svq1enc_mmx.c
new file mode 100644
index 0000000..02b0a84
--- /dev/null
+++ b/libavcodec/x86/svq1enc_mmx.c
@@ -0,0 +1,73 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/x86/asm.h"
+#include "libavutil/x86/cpu.h"
+#include "libavcodec/svq1enc.h"
+
+#if HAVE_INLINE_ASM
+
+static int ssd_int8_vs_int16_mmx(const int8_t *pix1, const int16_t *pix2,
+ int size)
+{
+ int sum;
+ x86_reg i = size;
+
+ __asm__ volatile (
+ "pxor %%mm4, %%mm4 \n"
+ "1: \n"
+ "sub $8, %0 \n"
+ "movq (%2, %0), %%mm2 \n"
+ "movq (%3, %0, 2), %%mm0 \n"
+ "movq 8(%3, %0, 2), %%mm1 \n"
+ "punpckhbw %%mm2, %%mm3 \n"
+ "punpcklbw %%mm2, %%mm2 \n"
+ "psraw $8, %%mm3 \n"
+ "psraw $8, %%mm2 \n"
+ "psubw %%mm3, %%mm1 \n"
+ "psubw %%mm2, %%mm0 \n"
+ "pmaddwd %%mm1, %%mm1 \n"
+ "pmaddwd %%mm0, %%mm0 \n"
+ "paddd %%mm1, %%mm4 \n"
+ "paddd %%mm0, %%mm4 \n"
+ "jg 1b \n"
+ "movq %%mm4, %%mm3 \n"
+ "psrlq $32, %%mm3 \n"
+ "paddd %%mm3, %%mm4 \n"
+ "movd %%mm4, %1 \n"
+ : "+r" (i), "=r" (sum)
+ : "r" (pix1), "r" (pix2));
+
+ return sum;
+}
+
+#endif /* HAVE_INLINE_ASM */
+
+av_cold void ff_svq1enc_init_x86(SVQ1EncContext *c)
+{
+#if HAVE_INLINE_ASM
+ int cpu_flags = av_get_cpu_flags();
+
+ if (INLINE_MMX(cpu_flags)) {
+ c->ssd_int8_vs_int16 = ssd_int8_vs_int16_mmx;
+ }
+#endif /* HAVE_INLINE_ASM */
+}
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list