[SCM] FFmpeg packaging branch, upstream, updated. 0e69dce4f29f262d94a4e46014aea5a35fb5e854
Reinhard Tartler
siretart at tauware.de
Sat Jan 10 15:45:09 UTC 2009
The following commit has been merged in the upstream branch:
commit 0e69dce4f29f262d94a4e46014aea5a35fb5e854
Author: Reinhard Tartler <siretart at tauware.de>
Date: Sat Jan 10 16:44:07 2009 +0100
Imported Upstream version 0.svn20090110
diff --git a/Changelog b/Changelog
index 74c9a17..d785492 100644
--- a/Changelog
+++ b/Changelog
@@ -140,6 +140,8 @@ version <next>
- liba52 wrapper removed
- Speex decoding via libspeex
- Electronic Arts TGQ decoder
+- RV30 and RV40 decoder
+- QCELP / PureVoice decoder
version 0.4.9-pre1:
diff --git a/MAINTAINERS b/MAINTAINERS
index 5e421d6..b0e9630 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -161,6 +161,7 @@ Codecs:
nuv.c Reimar Doeffinger
pcx.c Ivo van Poorten
ptx.c Ivo van Poorten
+ qcelp* Reynaldo H. Verdejo Pinochet
qdm2.c, qdm2data.h Roberto Togni
qdrw.c Kostya Shishkov
qpeg.c Kostya Shishkov
@@ -288,6 +289,9 @@ Muxers/Demuxers:
westwood.c Mike Melanson
wv.c Kostya Shishkov
+Protocols:
+ udp.c Luca Abeni
+
Operating systems / CPU architectures
=====================================
@@ -295,7 +299,6 @@ Operating systems / CPU architectures
Alpha Mans Rullgard, Falk Hueffner
ARM Mans Rullgard
BeOS Francois Revol
-i386 Michael Niedermayer
Mac OS X / PowerPC Romain Dolbeau, Guillaume Poirier
Amiga / PowerPC Colin Ward
Linux / PowerPC Luca Barbato
@@ -303,3 +306,14 @@ Windows MinGW Alex Beregszaszi, Ramiro Polla
Windows Cygwin Victor Paesa
ADI/Blackfin DSP Marc Hoffman
Sparc Roman Shaposhnik
+x86 Michael Niedermayer
+
+
+GnuPG Fingerprints of maintainers and others who have svn write access
+======================================================================
+
+Benoit Fouet B22A 4F4F 43EF 636B BB66 FCDC 0023 AE1E 2985 49C8
+Michael Niedermayer 9FF2 128B 147E F673 0BAD F133 611E C787 040B 0FAB
+Reimar Döffinger C61D 16E5 9E2C D10C 8958 38A4 0899 A2B9 06D4 D9C7
+Reynaldo H. Verdejo Pinochet 6E27 CD34 170C C78E 4D4F 5F40 C18E 077F 3114 452A
+Sascha Sommer 38A0 F88B 868E 9D3A 97D4 D6A0 E823 706F 1E07 0D3C
diff --git a/Makefile b/Makefile
index ad3daf0..00b275d 100644
--- a/Makefile
+++ b/Makefile
@@ -299,14 +299,14 @@ $(CODEC_TESTS) $(LAVF_TESTS): regtest-ref
regtest-ref: ffmpeg$(EXESUF) tests/vsynth1/00.pgm tests/vsynth2/00.pgm tests/asynth1.sw
$(CODEC_TESTS) regtest-ref: tests/tiny_psnr$(EXESUF)
- $(SRC_PATH)/tests/regression.sh $@ vsynth tests/vsynth1 a
- $(SRC_PATH)/tests/regression.sh $@ rotozoom tests/vsynth2 a
+ $(SRC_PATH)/tests/regression.sh $@ vsynth tests/vsynth1 a "$(TARGET_EXEC)" "$(TARGET_PATH)"
+ $(SRC_PATH)/tests/regression.sh $@ rotozoom tests/vsynth2 a "$(TARGET_EXEC)" "$(TARGET_PATH)"
$(LAVF_TESTS):
- $(SRC_PATH)/tests/regression.sh $@ lavf tests/vsynth1 b
+ $(SRC_PATH)/tests/regression.sh $@ lavf tests/vsynth1 b "$(TARGET_EXEC)" "$(TARGET_PATH)"
seektest: codectest libavtest tests/seek_test$(EXESUF)
- $(SRC_PATH)/tests/seek_test.sh $(SEEK_REFFILE)
+ $(SRC_PATH)/tests/seek_test.sh $(SEEK_REFFILE) "$(TARGET_EXEC)" "$(TARGET_PATH)"
servertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/asynth1.sw
@echo
@@ -326,8 +326,8 @@ tests/vsynth2/00.pgm: tests/rotozoom$(EXESUF)
tests/asynth1.sw: tests/audiogen$(EXESUF)
$(BUILD_ROOT)/$< $@
-%$(EXESUF): %.c
- $(CC) $(FF_LDFLAGS) $(CFLAGS) -o $@ $<
+tests/%$(EXESUF): tests/%.c
+ $(HOSTCC) $(HOSTCFLAGS) $(HOSTLDFLAGS) -o $@ $< $(HOSTLIBS)
tests/seek_test$(EXESUF): tests/seek_test.c $(FF_DEP_LIBS)
$(CC) $(FF_LDFLAGS) $(CFLAGS) -o $@ $< $(FF_EXTRALIBS)
diff --git a/README b/README
index 6274ea2..404c33b 100644
--- a/README
+++ b/README
@@ -17,7 +17,7 @@ FFmpeg README
License, see the file COPYING.GPL for details. Their compilation and use
in FFmpeg is optional.
-* The file libavcodec/i386/idct_mmx.c is distributed under the GNU General
+* The file libavcodec/x86/idct_mmx.c is distributed under the GNU General
Public License. It is strictly an optimization and its use is optional.
* The file libavcodec/ac3dec.c is distributed under the GNU General Public
diff --git a/cmdutils.c b/cmdutils.c
index f52f56e..32ffefa 100644
--- a/cmdutils.c
+++ b/cmdutils.c
@@ -127,10 +127,10 @@ void parse_options(int argc, char **argv, const OptionDef *options,
opt = argv[optindex++];
if (handleoptions && opt[0] == '-' && opt[1] != '\0') {
- if (opt[1] == '-' && opt[2] == '\0') {
- handleoptions = 0;
- continue;
- }
+ if (opt[1] == '-' && opt[2] == '\0') {
+ handleoptions = 0;
+ continue;
+ }
po= find_option(options, opt + 1);
if (!po->name)
po= find_option(options, "default");
@@ -176,25 +176,30 @@ unknown_opt:
int opt_default(const char *opt, const char *arg){
int type;
+ int ret= 0;
const AVOption *o= NULL;
int opt_types[]={AV_OPT_FLAG_VIDEO_PARAM, AV_OPT_FLAG_AUDIO_PARAM, 0, AV_OPT_FLAG_SUBTITLE_PARAM, 0};
- for(type=0; type<CODEC_TYPE_NB; type++){
+ for(type=0; type<CODEC_TYPE_NB && ret>= 0; type++){
const AVOption *o2 = av_find_opt(avctx_opts[0], opt, NULL, opt_types[type], opt_types[type]);
if(o2)
- o = av_set_string2(avctx_opts[type], opt, arg, 1);
+ ret = av_set_string3(avctx_opts[type], opt, arg, 1, &o);
}
if(!o)
- o = av_set_string2(avformat_opts, opt, arg, 1);
+ ret = av_set_string3(avformat_opts, opt, arg, 1, &o);
if(!o)
- o = av_set_string2(sws_opts, opt, arg, 1);
+ ret = av_set_string3(sws_opts, opt, arg, 1, &o);
if(!o){
if(opt[0] == 'a')
- o = av_set_string2(avctx_opts[CODEC_TYPE_AUDIO], opt+1, arg, 1);
+ ret = av_set_string3(avctx_opts[CODEC_TYPE_AUDIO], opt+1, arg, 1, &o);
else if(opt[0] == 'v')
- o = av_set_string2(avctx_opts[CODEC_TYPE_VIDEO], opt+1, arg, 1);
+ ret = av_set_string3(avctx_opts[CODEC_TYPE_VIDEO], opt+1, arg, 1, &o);
else if(opt[0] == 's')
- o = av_set_string2(avctx_opts[CODEC_TYPE_SUBTITLE], opt+1, arg, 1);
+ ret = av_set_string3(avctx_opts[CODEC_TYPE_SUBTITLE], opt+1, arg, 1, &o);
+ }
+ if (o && ret < 0) {
+ fprintf(stderr, "Invalid value '%s' for option '%s'\n", arg, opt);
+ exit(1);
}
if(!o)
return -1;
@@ -219,7 +224,7 @@ void set_context_opts(void *ctx, void *opts_ctx, int flags)
const char *str= av_get_string(opts_ctx, opt_names[i], &opt, buf, sizeof(buf));
/* if an option with name opt_names[i] is present in opts_ctx then str is non-NULL */
if(str && ((opt->flags & flags) == flags))
- av_set_string2(ctx, opt_names[i], str, 1);
+ av_set_string3(ctx, opt_names[i], str, 1, NULL);
}
}
@@ -287,7 +292,7 @@ static void print_all_lib_versions(FILE* outstream, int indent)
void show_banner(void)
{
- fprintf(stderr, "%s version " FFMPEG_VERSION ", Copyright (c) %d-2008 Fabrice Bellard, et al.\n",
+ fprintf(stderr, "%s version " FFMPEG_VERSION ", Copyright (c) %d-2009 Fabrice Bellard, et al.\n",
program_name, program_birth_year);
fprintf(stderr, " configuration: " FFMPEG_CONFIGURATION "\n");
print_all_lib_versions(stderr, 1);
diff --git a/common.mak b/common.mak
index d4c1f14..8ac6ee0 100644
--- a/common.mak
+++ b/common.mak
@@ -91,10 +91,10 @@ $(SUBDIR)%-test.o: $(SUBDIR)%.c
$(SUBDIR)%-test.o: $(SUBDIR)%-test.c
$(CC) $(CFLAGS) -DTEST -c -o $$@ $$^
-$(SUBDIR)i386/%.o: $(SUBDIR)i386/%.asm
+$(SUBDIR)x86/%.o: $(SUBDIR)x86/%.asm
$(YASM) $(YASMFLAGS) -I $$(<D)/ -o $$@ $$<
-$(SUBDIR)i386/%.d: $(SUBDIR)i386/%.asm
+$(SUBDIR)x86/%.d: $(SUBDIR)x86/%.asm
$(YASM) $(YASMFLAGS) -I $$(<D)/ -M -o $$(@:%.d=%.o) $$< > $$@
clean::
diff --git a/configure b/configure
index d633444..56606aa 100755
--- a/configure
+++ b/configure
@@ -68,111 +68,131 @@ show_help(){
echo " --shlibdir=DIR install shared libs in DIR [PREFIX/lib]"
echo " --incdir=DIR install includes in DIR [PREFIX/include]"
echo " --mandir=DIR install man page in DIR [PREFIX/share/man]"
- echo " --enable-static build static libraries [default=yes]"
- echo " --disable-static do not build static libraries [default=no]"
- echo " --enable-shared build shared libraries [default=no]"
- echo " --disable-shared do not build shared libraries [default=yes]"
- echo " --enable-gpl allow use of GPL code, the resulting libav*"
- echo " and ffmpeg will be under GPL [default=no]"
- echo " --enable-nonfree allow use of nonfree code, the resulting libav*"
- echo " and ffmpeg will be unredistributable [default=no]"
- echo " --enable-postproc enable GPLed postprocessing support [default=no]"
- echo " --enable-swscale software scaler support [default=no]"
- echo " --enable-avfilter video filter support (replaces vhook) [default=no]"
- echo " --enable-avfilter-lavf video filters dependant on avformat [default=no]"
- echo " --enable-beosthreads use BeOS threads [default=no]"
- echo " --enable-os2threads use OS/2 threads [default=no]"
- echo " --enable-pthreads use pthreads [default=no]"
- echo " --enable-w32threads use Win32 threads [default=no]"
- echo " --enable-x11grab enable X11 grabbing [default=no]"
+ echo " --enable-static build static libraries [yes]"
+ echo " --disable-static do not build static libraries [no]"
+ echo " --enable-shared build shared libraries [no]"
+ echo " --disable-shared do not build shared libraries [yes]"
+ echo " --enable-gpl allow use of GPL code, the resulting libs"
+ echo " and binaries will be under GPL [no]"
+ echo " --enable-nonfree allow use of nonfree code, the resulting libs"
+ echo " and binaries will be unredistributable [no]"
+ echo " --enable-postproc enable GPLed postprocessing support [no]"
+ echo " --enable-swscale enable GPLed software scaler support [no]"
+ echo " --enable-avfilter video filter support (replaces vhook) [no]"
+ echo " --enable-avfilter-lavf video filters dependent on avformat [no]"
+ echo " --disable-vhook disable video hooking support"
+ echo " --enable-beosthreads use BeOS threads [no]"
+ echo " --enable-os2threads use OS/2 threads [no]"
+ echo " --enable-pthreads use pthreads [no]"
+ echo " --enable-vdpau enable VDPAU support [no]"
+ echo " --enable-w32threads use Win32 threads [no]"
+ echo " --enable-x11grab enable X11 grabbing [no]"
+ echo " --enable-xvmc enable XvMC support [no]"
echo
echo "External library support:"
- echo " --enable-mlib use Sun medialib [default=no]"
- echo " --enable-avisynth allow reading AVISynth script files [default=no]"
- echo " --enable-libamr-nb enable libamr-nb floating point audio codec"
- echo " --enable-libamr-wb enable libamr-wb floating point audio codec"
+ echo " --enable-mlib enable Sun medialib [no]"
+ echo " --enable-avisynth enable reading of AVISynth script files [no]"
+ echo " --enable-bzlib enable bzlib [autodetect]"
+ echo " --enable-libamr-nb enable libamr-nb floating point audio codec [no]"
+ echo " --enable-libamr-wb enable libamr-wb floating point audio codec [no]"
echo " --enable-libdc1394 enable IIDC-1394 grabbing using libdc1394"
- echo " and libraw1394 [default=no]"
- echo " --enable-libdirac enable Dirac support via libdirac [default=no]"
- echo " --enable-libfaac enable FAAC support via libfaac [default=no]"
- echo " --enable-libfaad enable FAAD support via libfaad [default=no]"
- echo " --enable-libfaadbin open libfaad.so.0 at runtime [default=no]"
- echo " --enable-libgsm enable GSM support via libgsm [default=no]"
- echo " --enable-libmp3lame enable MP3 encoding via libmp3lame [default=no]"
+ echo " and libraw1394 [no]"
+ echo " --enable-libdirac enable Dirac support via libdirac [no]"
+ echo " --enable-libfaac enable FAAC support via libfaac [no]"
+ echo " --enable-libfaad enable FAAD support via libfaad [no]"
+ echo " --enable-libfaadbin open libfaad.so.0 at runtime [no]"
+ echo " --enable-libgsm enable GSM support via libgsm [no]"
+ echo " --enable-libmp3lame enable MP3 encoding via libmp3lame [no]"
echo " --enable-libnut enable NUT (de)muxing via libnut,"
- echo " native demuxer exists [default=no]"
- echo " --enable-libschroedinger enable Dirac support via libschroedinger [default=no]"
- echo " --enable-libspeex enable Speex decoding via libspeex [default=no]"
- echo " --enable-libtheora enable Theora encoding via libtheora [default=no]"
+ echo " native (de)muxer exists [no]"
+ echo " --enable-libschroedinger enable Dirac support via libschroedinger [no]"
+ echo " --enable-libspeex enable Speex decoding via libspeex [no]"
+ echo " --enable-libtheora enable Theora encoding via libtheora [no]"
echo " --enable-libvorbis enable Vorbis encoding via libvorbis,"
- echo " native implementation exists [default=no]"
- echo " --enable-libx264 enable H.264 encoding via x264 [default=no]"
+ echo " native implementation exists [no]"
+ echo " --enable-libx264 enable H.264 encoding via x264 [no]"
echo " --enable-libxvid enable Xvid encoding via xvidcore,"
- echo " native MPEG-4/Xvid encoder exists [default=no]"
+ echo " native MPEG-4/Xvid encoder exists [no]"
+ echo " --enable-zlib enable zlib [autodetect]"
echo ""
echo "Advanced options (experts only):"
echo " --source-path=PATH path to source code [$source_path]"
echo " --cross-prefix=PREFIX use PREFIX for compilation tools [$cross_prefix]"
echo " --enable-cross-compile assume a cross-compiler is used"
echo " --target-os=OS compiler targets OS [$target_os]"
+ echo " --target-exec=CMD command to run executables on target"
+ echo " --target-path=DIR path to view of build directory on target"
+ echo " --nm=NM use nm tool"
echo " --cc=CC use C compiler CC [$cc]"
+ echo " --host-cc=HOSTCC use host C compiler HOSTCC"
+ echo " --host-cflags=HCFLAGS use HCFLAGS when compiling for host"
+ echo " --host-ldflags=HLDFLAGS use HLDFLAGS when linking for host"
+ echo " --host-libs=HLIBS use libs HLIBS when linking for host"
echo " --extra-cflags=ECFLAGS add ECFLAGS to CFLAGS [$CFLAGS]"
echo " --extra-ldflags=ELDFLAGS add ELDFLAGS to LDFLAGS [$LDFLAGS]"
echo " --extra-libs=ELIBS add ELIBS [$ELIBS]"
echo " --extra-version=STRING version string suffix []"
- echo " --build-suffix=SUFFIX suffix for application specific build []"
- echo " --arch=ARCH select architecture [$arch]"
- echo " --cpu=CPU selects the minimum cpu required (affects"
+ echo " --build-suffix=SUFFIX library name suffix []"
+ echo " --arch=ARCH select architecture [$arch]"
+ echo " --cpu=CPU select the minimum required CPU (affects"
echo " instruction selection, may crash on older CPUs)"
echo " --enable-powerpc-perf enable performance report on PPC"
echo " (requires enabling PMC)"
- echo " --disable-mmx disable MMX usage"
- echo " --disable-mmx2 disable MMX2 usage"
- echo " --disable-ssse3 disable SSSE3 usage"
- echo " --disable-armv5te disable armv5te usage"
- echo " --disable-armv6 disable armv6 usage"
- echo " --disable-armvfp disable ARM VFP usage"
- echo " --disable-iwmmxt disable iwmmxt usage"
- echo " --disable-altivec disable AltiVec usage"
- echo " --disable-network disable network support [default=no]"
- echo " --disable-ipv6 disable ipv6 support [default=no]"
- echo " --disable-zlib disable zlib [default=no]"
- echo " --disable-bzlib disable bzlib [default=no]"
- echo " --disable-vhook disable video hooking support"
+ echo " --disable-altivec disable AltiVec optimizations"
+ echo " --disable-mmx disable MMX optimizations"
+ echo " --disable-mmx2 disable MMX2 optimizations"
+ echo " --disable-sse disable SSE optimizations"
+ echo " --disable-ssse3 disable SSSE3 optimizations"
+ echo " --disable-armv5te disable armv5te optimizations"
+ echo " --disable-armv6 disable armv6 optimizations"
+ echo " --disable-armvfp disable ARM VFP optimizations"
+ echo " --disable-iwmmxt disable iwmmxt optimizations"
+ echo " --disable-mmi disable MMI optimizations"
+ echo " --disable-neon disable neon optimizations"
+ echo " --disable-vis disable VIS optimizations"
+ echo " --disable-network disable network support [no]"
+ echo " --disable-ipv6 disable IPv6 support [no]"
echo " --disable-mpegaudio-hp faster (but less accurate)"
- echo " MPEG audio decoding [default=no]"
+ echo " MPEG audio decoding [no]"
+ echo " --disable-aandct disable AAN DCT code"
+ echo " --disable-fft disable FFT code"
+ echo " --disable-golomb disable Golomb code"
+ echo " --disable-mdct disable MDCT code"
echo " --enable-gray enable full grayscale support (slower color)"
echo " --disable-ffmpeg disable ffmpeg build"
- echo " --disable-ffserver disable ffserver build"
echo " --disable-ffplay disable ffplay build"
+ echo " --disable-ffserver disable ffserver build"
echo " --enable-small optimize for size instead of speed"
echo " --enable-hardcoded-tables use hardcoded tables instead of runtime generation"
echo " --enable-memalign-hack emulate memalign, interferes with memory debuggers"
- echo " --disable-encoder=NAME disables encoder NAME"
- echo " --enable-encoder=NAME enables encoder NAME"
- echo " --disable-decoder=NAME disables decoder NAME"
- echo " --enable-decoder=NAME enables decoder NAME"
- echo " --disable-encoders disables all encoders"
- echo " --disable-decoders disables all decoders"
- echo " --disable-muxer=NAME disables muxer NAME"
- echo " --enable-muxer=NAME enables muxer NAME"
- echo " --disable-muxers disables all muxers"
- echo " --disable-demuxer=NAME disables demuxer NAME"
- echo " --enable-demuxer=NAME enables demuxer NAME"
- echo " --disable-demuxers disables all demuxers"
- echo " --enable-parser=NAME enables parser NAME"
- echo " --disable-parser=NAME disables parser NAME"
- echo " --disable-parsers disables all parsers"
- echo " --enable-bsf=NAME enables bitstream filter NAME"
- echo " --disable-bsf=NAME disables bitstream filter NAME"
- echo " --disable-bsfs disables all bitstream filters"
- echo " --enable-protocol=NAME enables protocol NAME"
- echo " --disable-protocol=NAME disables protocol NAME"
- echo " --disable-protocols disables all protocols"
- echo " --disable-devices disables all devices"
- echo " --enable-filter=NAME enables filter NAME"
- echo " --disable-filter=NAME disables filter NAME"
- echo " --disable-filters disables all filters"
+ echo " --enable-beos-netserver enable BeOS netserver"
+ echo " --disable-encoder=NAME disable encoder NAME"
+ echo " --enable-encoder=NAME enable encoder NAME"
+ echo " --disable-encoders disable all encoders"
+ echo " --disable-decoder=NAME disable decoder NAME"
+ echo " --enable-decoder=NAME enable decoder NAME"
+ echo " --disable-decoders disable all decoders"
+ echo " --disable-muxer=NAME disable muxer NAME"
+ echo " --enable-muxer=NAME enable muxer NAME"
+ echo " --disable-muxers disable all muxers"
+ echo " --disable-demuxer=NAME disable demuxer NAME"
+ echo " --enable-demuxer=NAME enable demuxer NAME"
+ echo " --disable-demuxers disable all demuxers"
+ echo " --enable-parser=NAME enable parser NAME"
+ echo " --disable-parser=NAME disable parser NAME"
+ echo " --disable-parsers disable all parsers"
+ echo " --enable-bsf=NAME enable bitstream filter NAME"
+ echo " --disable-bsf=NAME disable bitstream filter NAME"
+ echo " --disable-bsfs disable all bitstream filters"
+ echo " --enable-protocol=NAME enable protocol NAME"
+ echo " --disable-protocol=NAME disable protocol NAME"
+ echo " --disable-protocols disable all protocols"
+ echo " --disable-indevs disable input devices"
+ echo " --disable-outdevs disable output devices"
+ echo " --disable-devices disable all devices"
+ echo " --enable-filter=NAME enable filter NAME"
+ echo " --disable-filter=NAME disable filter NAME"
+ echo " --disable-filters disable all filters"
echo " --list-decoders show all available decoders"
echo " --list-encoders show all available encoders"
echo " --list-muxers show all available muxers"
@@ -353,7 +373,7 @@ disabled_any(){
set_default(){
for opt; do
- eval test -z "\$$opt" && eval $opt=\$${opt}_default
+ eval : \${$opt:=\$${opt}_default}
done
}
@@ -494,7 +514,7 @@ check_asm(){
asm="$2"
shift 2
check_cc "$@" <<EOF && enable $name || disable $name
-int foo(void){ __asm__ volatile($asm); }
+void foo(void){ __asm__ volatile($asm); }
EOF
}
@@ -714,6 +734,7 @@ COMPONENT_LIST="
CONFIG_LIST="
$COMPONENT_LIST
+ aandct
avfilter
avfilter_lavf
avisynth
@@ -753,10 +774,14 @@ CONFIG_LIST="
nonfree
postproc
powerpc_perf
+ shared
small
+ static
swscale
+ vdpau
vhook
x11grab
+ xvmc
zlib
"
@@ -769,13 +794,13 @@ THREADS_LIST='
ARCH_LIST='
alpha
- armv4l
+ arm
bfin
ia64
m68k
mips
parisc
- powerpc
+ ppc
s390
sh4
sparc
@@ -789,12 +814,14 @@ ARCH_EXT_LIST='
altivec
armv5te
armv6
+ armv6t2
armvfp
iwmmxt
mmi
mmx
mmx2
neon
+ sse
ssse3
vis
'
@@ -841,6 +868,7 @@ HAVE_LIST="
memalign
mkstemp
pld
+ posix_memalign
ppc64
round
roundf
@@ -863,6 +891,7 @@ HAVE_LIST="
# options emitted with CONFIG_ prefix but not available on command line
CONFIG_EXTRA="
fft_mmx
+ oldscaler
"
CMDLINE_SELECT="
@@ -874,8 +903,6 @@ CMDLINE_SELECT="
extra_warnings
logging
optimizations
- shared
- static
stripping
"
@@ -896,29 +923,39 @@ CMDLINE_SET="
cpu
cross_prefix
extra_version
+ host_cc
+ host_cflags
+ host_ldflags
+ host_libs
logfile
+ nm
source_path
+ target_exec
target_os
+ target_path
"
# code dependency declarations
# architecture extensions
-altivec_deps="powerpc"
-armv5te_deps="armv4l"
-armv6_deps="armv4l"
-armvfp_deps="armv4l"
-iwmmxt_deps="armv4l"
+altivec_deps="ppc"
+armv5te_deps="arm"
+armv6_deps="arm"
+armv6t2_deps="arm"
+armvfp_deps="arm"
+iwmmxt_deps="arm"
mmi_deps="mips"
mmx_deps="x86"
mmx2_deps="x86 mmx"
-neon_deps="armv4l"
-ssse3_deps="x86"
+neon_deps="arm"
+sse_deps="mmx"
+ssse3_deps="x86 sse"
vis_deps="sparc"
# common features
fft_suggest="fft_mmx"
fft_mmx_deps="mmx yasm"
+oldscaler_deps="!swscale"
# decoders / encoders
aac_decoder_select="fft mdct"
@@ -929,25 +966,42 @@ cavs_decoder_select="golomb"
cook_decoder_select="fft mdct"
cscd_decoder_suggest="zlib"
dca_decoder_select="fft mdct"
+dnxhd_encoder_select="aandct"
dxa_decoder_select="zlib"
eac3_decoder_deps="gpl"
eac3_decoder_select="fft mdct"
+eatgq_decoder_select="aandct"
ffv1_decoder_select="golomb"
flac_decoder_select="golomb"
flac_encoder_select="golomb"
flashsv_decoder_select="zlib"
flashsv_encoder_select="zlib"
+flv_encoder_select="aandct"
+h261_encoder_select="aandct"
+h263_encoder_select="aandct"
+h263p_encoder_select="aandct"
h264_decoder_select="golomb"
+h264_vdpau_decoder_deps="vdpau"
imc_decoder_select="fft mdct"
jpegls_decoder_select="golomb"
jpegls_encoder_select="golomb"
+ljpeg_encoder_select="aandct"
loco_decoder_select="golomb"
-mpeg_xvmc_decoder_deps="xvmc"
+mjpeg_encoder_select="aandct"
+mpeg1video_encoder_select="aandct"
+mpeg2video_encoder_select="aandct"
+mpeg4_encoder_select="aandct"
+mpeg_xvmc_decoder_deps="xvmc X11_extensions_XvMClib_h"
+msmpeg4v1_encoder_select="aandct"
+msmpeg4v2_encoder_select="aandct"
+msmpeg4v3_encoder_select="aandct"
nellymoser_decoder_select="fft mdct"
nellymoser_encoder_select="fft mdct"
png_decoder_select="zlib"
png_encoder_select="zlib"
qdm2_decoder_select="fft mdct"
+rv10_encoder_select="aandct"
+rv20_encoder_select="aandct"
shorten_decoder_select="golomb"
sonic_decoder_select="golomb"
sonic_encoder_select="golomb"
@@ -963,6 +1017,8 @@ wmav1_decoder_select="fft mdct"
wmav1_encoder_select="fft mdct"
wmav2_decoder_select="fft mdct"
wmav2_encoder_select="fft mdct"
+wmv1_encoder_select="aandct"
+wmv2_encoder_select="aandct"
zlib_decoder_select="zlib"
zlib_encoder_select="zlib"
zmbv_decoder_select="zlib"
@@ -991,6 +1047,7 @@ libvorbis_encoder_deps="libvorbis"
libx264_encoder_deps="libx264"
libxvid_encoder_deps="libxvid"
mpeg4aac_decoder_deps="libfaad"
+vdpau_deps="vdpau_vdpau_h vdpau_vdpau_x11_h"
# demuxers / muxers
ac3_demuxer_deps="ac3_parser"
@@ -1050,15 +1107,15 @@ mandir_default='${prefix}/share/man'
shlibdir_default="$libdir_default"
# toolchain
-cc_default="gcc"
-yasmexe="yasm"
ar="ar"
-nm="nm"
-ranlib="ranlib"
-strip="strip"
-asmalign_pot="unknown"
+cc_default="gcc"
+host_cc_default="gcc"
ln_s="ln -sf"
+nm_default="nm"
objformat="elf"
+ranlib="ranlib"
+strip="strip"
+yasmexe="yasm"
# machine
arch=`uname -m`
@@ -1075,6 +1132,7 @@ enable ffserver
enable ipv6
enable mpegaudio_hp
enable network
+enable oldscaler
enable optimizations
enable protocols
enable static
@@ -1097,6 +1155,11 @@ SLIBNAME_WITH_VERSION='$(SLIBNAME).$(LIBVERSION)'
SLIBNAME_WITH_MAJOR='$(SLIBNAME).$(LIBMAJOR)'
LIB_INSTALL_EXTRA_CMD='$(RANLIB) "$(LIBDIR)/$(LIBNAME)"'
+host_cflags='-O3 -g -Wall'
+host_libs='-lm'
+
+target_path='.'
+
# gcc stupidly only outputs the basename of targets with -MM, but we need the
# full relative path for objects in subdirectories for non-recursive Make.
DEPEND_CMD='$(CC) $(CFLAGS) -MM -MG $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," -e "s,\\([[:space:]]\\)\\(version\\.h\\),\\1\$$(BUILD_ROOT_REL)/\\2,"'
@@ -1211,30 +1274,29 @@ disabled logging && logfile=/dev/null
echo "# $0 $@" > $logfile
set >> $logfile
-cc_default="${cross_prefix}${cc_default}"
-yasmexe="${cross_prefix}${yasmexe}"
+test -n "$cross_prefix" && enable cross_compile
+
ar="${cross_prefix}${ar}"
-nm="${cross_prefix}${nm}"
+cc_default="${cross_prefix}${cc_default}"
+nm_default="${cross_prefix}${nm_default}"
ranlib="${cross_prefix}${ranlib}"
strip="${cross_prefix}${strip}"
-set_default cc
+set_default cc nm
+enabled cross_compile || host_cc_default=$cc
+set_default host_cc
# set temporary file name
-if test ! -z "$TMPDIR" ; then
- TMPDIR1="${TMPDIR}"
-elif test ! -z "$TEMPDIR" ; then
- TMPDIR1="${TEMPDIR}"
-else
- TMPDIR1="/tmp"
-fi
+: ${TMPDIR:=$TEMPDIR}
+: ${TMPDIR:=$TMP}
+: ${TMPDIR:=/tmp}
-TMPC="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.c"
-TMPE="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}${EXESUF}"
-TMPH="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.h"
-TMPO="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.o"
-TMPS="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.S"
-TMPSH="${TMPDIR1}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.sh"
+TMPC="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.c"
+TMPE="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}${EXESUF}"
+TMPH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.h"
+TMPO="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.o"
+TMPS="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.S"
+TMPSH="${TMPDIR}/ffmpeg-conf-${RANDOM}-$$-${RANDOM}.sh"
check_cflags -std=c99
@@ -1252,26 +1314,29 @@ case "$arch" in
int test[sizeof(char*) - 7];
EOF
;;
- # armv4l is a subset of armv[567]*l
arm|armv[4567]*l)
- arch="armv4l"
+ arch="arm"
;;
alpha)
arch="alpha"
enable fast_64bit
;;
"Power Macintosh"|ppc|powerpc)
- arch="powerpc"
+ arch="ppc"
enable fast_unaligned
;;
ppc64)
- arch="powerpc"
+ arch="ppc"
enable fast_64bit
enable fast_unaligned
;;
mips|mipsel|IP*)
arch="mips"
;;
+ mips64)
+ arch="mips"
+ enable fast_64bit
+ ;;
sun4u|sparc64)
arch="sparc64"
enable fast_64bit
@@ -1279,13 +1344,13 @@ EOF
sparc)
arch="sparc"
;;
- sh4)
+ sh4|sh)
arch="sh4"
;;
- parisc)
+ parisc|hppa)
arch="parisc"
;;
- parisc64)
+ parisc64|hppa64)
arch="parisc"
enable fast_64bit
;;
@@ -1343,6 +1408,7 @@ case $target_os in
FFSERVERLDFLAGS=""
SHFLAGS='-shared -Wl,-h,$$(@F)'
network_extralibs="-lsocket -lnsl"
+ add_cflags -D__EXTENSIONS__
;;
netbsd)
oss_demuxer_extralibs="-lossaudio"
@@ -1352,9 +1418,6 @@ case $target_os in
disable need_memalign
LIBOBJFLAGS='$(PIC)'
SHFLAGS='-shared'
- SLIBNAME='$(SLIBPREF)$(FULLNAME)$(SLIBSUF).$(LIBVERSION)'
- SLIBNAME_WITH_VERSION='$(SLIBNAME)'
- SLIBNAME_WITH_MAJOR='$(SLIBNAME)'
oss_demuxer_extralibs="-lossaudio"
oss_muxer_extralibs="-lossaudio"
;;
@@ -1408,8 +1471,8 @@ case $target_os in
check_cpp_condition _mingw.h "(__MINGW32_MAJOR_VERSION > 3) || (__MINGW32_MAJOR_VERSION == 3 && __MINGW32_MINOR_VERSION >= 15)" ||
die "ERROR: MinGW runtime version must be >= 3.15."
enabled_any avisynth vfwcap_demuxer &&
- { check_cpp_condition w32api.h "(__W32API_MAJOR_VERSION > 3) || (__W32API_MAJOR_VERSION == 3 && __W32API_MINOR_VERSION >= 12)" ||
- die "ERROR: avisynth and vfwcap_demuxer require w32api version 3.12 or later."; }
+ { check_cpp_condition w32api.h "(__W32API_MAJOR_VERSION > 3) || (__W32API_MAJOR_VERSION == 3 && __W32API_MINOR_VERSION >= 13)" ||
+ die "ERROR: avisynth and vfwcap_demuxer require w32api version 3.13 or later."; }
;;
cygwin*)
target_os=cygwin
@@ -1485,8 +1548,6 @@ add_extralibs $osextralibs
# Combine FFLDFLAGS and the LDFLAGS environment variable.
LDFLAGS="$FFLDFLAGS $LDFLAGS"
-test -n "$cross_prefix" && enable cross_compile
-
# we need to build at least one lib type
if ! enabled_any static shared; then
cat <<EOF
@@ -1635,9 +1696,8 @@ EOF
chmod +x $TMPSH >> $logfile 2>&1
if ! $TMPSH >> $logfile 2>&1; then
cat <<EOF
-Unable to create and execute files in $TMPDIR1. Set the TMPDIR environment
-variable to another directory and make sure that $TMPDIR1 is not mounted
-noexec.
+Unable to create and execute files in $TMPDIR. Set the TMPDIR environment
+variable to another directory and make sure that it is not mounted noexec.
EOF
die "Sanity test failed."
fi
@@ -1699,7 +1759,7 @@ fi
# check for assembler specific support
-if test $arch = "powerpc"; then
+if test $arch = "ppc"; then
check_asm dcbzl '"dcbzl 0, 1"'
fi
@@ -1729,9 +1789,10 @@ EOF
fi
# We have to check if pld is a nop and disable it.
-enabled armv4l && check_asm pld '"pld [r0]"'
+enabled arm && check_asm pld '"pld [r0]"'
enabled armv5te && check_asm armv5te '"qadd r0, r0, r0"'
enabled armv6 && check_asm armv6 '"sadd16 r0, r0, r0"'
+enabled armv6t2 && check_asm armv6t2 '"movt r0, #0"'
enabled armvfp && check_asm armvfp '"fadds s0, s0, s0"'
enabled iwmmxt && check_asm iwmmxt '"wunpckelub wr6, wr4"'
enabled mmi && check_asm mmi '"lq $2, 0($2)"'
@@ -1762,6 +1823,7 @@ check_func getrusage
check_func inet_aton $network_extralibs
check_func memalign
check_func mkstemp
+check_func posix_memalign
check_func_headers windows.h GetProcessTimes
check_header conio.h
@@ -1772,13 +1834,16 @@ check_header sys/mman.h
check_header sys/resource.h
check_header sys/select.h
check_header termios.h
+check_header vdpau/vdpau.h
+check_header vdpau/vdpau_x11.h
+check_header X11/extensions/XvMClib.h
-if ! enabled_any memalign memalign_hack && enabled need_memalign ; then
- die "Error, no memalign() but SSE enabled, disable it or use --enable-memalign-hack."
+if ! enabled_any memalign memalign_hack posix_memalign && enabled need_memalign ; then
+ die "Error, no aligned memory allocator but SSE enabled, disable it or use --enable-memalign-hack."
fi
-disabled zlib || check_lib zlib.h zlibVersion -lz || disable zlib
-disabled bzlib || check_lib bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib
+disabled zlib || check_lib zlib.h zlibVersion -lz || disable zlib
+disabled bzlib || check_lib2 bzlib.h BZ2_bzlibVersion -lbz2 || disable bzlib
# check for some common methods of building with pthread support
# do this before the optional library checks as some of them require pthreads
@@ -1826,17 +1891,17 @@ enabled libdirac && add_cflags $(pkg-config --cflags dirac) &&
enabled libfaac && require2 libfaac "stdint.h faac.h" faacEncGetVersion -lfaac
enabled libfaad && require2 libfaad faad.h faacDecOpen -lfaad
enabled libgsm && require libgsm gsm.h gsm_create -lgsm
-enabled libmp3lame && require LAME lame/lame.h lame_init -lmp3lame -lm
+enabled libmp3lame && require libmp3lame lame/lame.h lame_init -lmp3lame -lm
enabled libnut && require libnut libnut.h nut_demuxer_init -lnut
enabled libschroedinger && add_cflags $(pkg-config --cflags schroedinger-1.0) &&
require libschroedinger schroedinger/schro.h schro_init $(pkg-config --libs schroedinger-1.0)
enabled libspeex && require libspeex speex/speex.h speex_decoder_init -lspeex
enabled libtheora && require libtheora theora/theora.h theora_info_init -ltheora -logg
enabled libvorbis && require libvorbis vorbis/vorbisenc.h vorbis_info_init -lvorbisenc -lvorbis -logg
-enabled libx264 && require x264 x264.h x264_encoder_open -lx264 -lm &&
+enabled libx264 && require libx264 x264.h x264_encoder_open -lx264 -lm &&
{ check_cpp_condition x264.h "X264_BUILD >= 65" ||
die "ERROR: libx264 version must be >= 0.65."; }
-enabled libxvid && require Xvid xvid.h xvid_global -lxvidcore
+enabled libxvid && require libxvid xvid.h xvid_global -lxvidcore
enabled mlib && require mediaLib mlib_types.h mlib_VectorSub_S16_U8_Mod -lmlib
# libdc1394 check
@@ -1909,7 +1974,7 @@ texi2html -version > /dev/null 2>&1 && enable texi2html || disable texi2html
# Network check
if enabled network; then
- check_type sys/socket.h socklen_t
+ check_type "sys/types.h sys/socket.h" socklen_t
# Prefer arpa/inet.h over winsock2
if check_header arpa/inet.h ; then
check_func closesocket
@@ -2031,10 +2096,7 @@ fi
VHOOKCFLAGS="-fPIC"
# Find out if the .align argument is a power of two or not.
-if test $asmalign_pot = "unknown"; then
- disable asmalign_pot
- echo '__asm__ (".align 3");' | check_cc && enable asmalign_pot
-fi
+check_asm asmalign_pot '".align 3"'
enabled_any $DECODER_LIST && enable decoders
enabled_any $ENCODER_LIST && enable encoders
@@ -2062,11 +2124,6 @@ check_deps $CONFIG_LIST \
$OUTDEV_LIST \
$PROTOCOL_LIST \
-enabled libdc1394 && append pkg_requires "libraw1394"
-enabled libdirac && append pkg_requires "dirac"
-enabled libtheora && append pkg_requires "theora"
-enabled libvorbis && append pkg_requires "vorbisenc"
-
echo "install prefix $prefix"
echo "source path $source_path"
echo "C compiler $cc"
@@ -2082,14 +2139,16 @@ echo "big-endian ${bigendian-no}"
if test $arch = "x86_32" -o $arch = "x86_64"; then
echo "yasm ${yasm-no}"
echo "MMX enabled ${mmx-no}"
+ echo "SSE enabled ${sse-no}"
echo "CMOV enabled ${cmov-no}"
echo "CMOV is fast ${fast_cmov-no}"
echo "EBX available ${ebx_available-no}"
echo "EBP available ${ebp_available-no}"
fi
-if test $arch = "armv4l"; then
+if test $arch = "arm"; then
echo "ARMv5TE enabled ${armv5te-no}"
echo "ARMv6 enabled ${armv6-no}"
+ echo "ARMv6T2 enabled ${armv6t2-no}"
echo "ARM VFP enabled ${armvfp-no}"
echo "IWMMXT enabled ${iwmmxt-no}"
echo "NEON enabled ${neon-no}"
@@ -2097,7 +2156,7 @@ fi
if test $arch = "mips"; then
echo "MMI enabled ${mmi-no}"
fi
-if test $arch = "powerpc"; then
+if test $arch = "ppc"; then
echo "AltiVec enabled ${altivec-no}"
echo "dcbzl available ${dcbzl-no}"
fi
@@ -2143,6 +2202,7 @@ echo "libtheora enabled ${libtheora-no}"
echo "libvorbis enabled ${libvorbis-no}"
echo "libx264 enabled ${libx264-no}"
echo "libxvid enabled ${libxvid-no}"
+echo "vdpau enabled ${vdpau-no}"
echo "zlib enabled ${zlib-no}"
echo "bzlib enabled ${bzlib-no}"
echo
@@ -2209,6 +2269,12 @@ echo "SLIBSUF=$SLIBSUF" >> config.mak
echo "EXESUF=$EXESUF" >> config.mak
echo "EXTRA_VERSION=$extra_version" >> config.mak
echo "DEPEND_CMD=$DEPEND_CMD" >> config.mak
+echo "HOSTCC=$host_cc" >> config.mak
+echo "HOSTCFLAGS=$host_cflags" >> config.mak
+echo "HOSTLDFLAGS=$host_ldflags" >> config.mak
+echo "HOSTLIBS=$host_libs" >> config.mak
+echo "TARGET_EXEC=$target_exec" >> config.mak
+echo "TARGET_PATH=$target_path" >> config.mak
if enabled bigendian; then
echo "WORDS_BIGENDIAN=yes" >> config.mak
@@ -2303,13 +2369,13 @@ if enabled source_path_used; then
doc \
libavcodec \
libavcodec/alpha \
- libavcodec/armv4l \
+ libavcodec/arm \
libavcodec/bfin \
- libavcodec/i386 \
libavcodec/mlib \
libavcodec/ppc \
libavcodec/sh4 \
libavcodec/sparc \
+ libavcodec/x86 \
libavdevice \
libavfilter \
libavformat \
@@ -2346,7 +2412,7 @@ fi
pkgconfig_generate(){
name=$1
-shortname=${name#lib}
+shortname=${name#lib}${build_suffix}
comment=$2
version=$3
libs=$4
@@ -2384,16 +2450,16 @@ EOF
}
pkgconfig_generate libavutil "FFmpeg utility library" "$LIBAVUTIL_VERSION"
-pkgconfig_generate libavcodec "FFmpeg codec library" "$LIBAVCODEC_VERSION" "$extralibs" "$pkg_requires libavutil = $LIBAVUTIL_VERSION"
-pkgconfig_generate libavformat "FFmpeg container format library" "$LIBAVFORMAT_VERSION" "$extralibs" "$pkg_requires libavcodec = $LIBAVCODEC_VERSION"
-pkgconfig_generate libavdevice "FFmpeg device handling library" "$LIBAVDEVICE_VERSION" "$extralibs" "$pkg_requires libavformat = $LIBAVFORMAT_VERSION"
+pkgconfig_generate libavcodec "FFmpeg codec library" "$LIBAVCODEC_VERSION" "$extralibs" "libavutil = $LIBAVUTIL_VERSION"
+pkgconfig_generate libavformat "FFmpeg container format library" "$LIBAVFORMAT_VERSION" "$extralibs" "libavcodec = $LIBAVCODEC_VERSION"
+pkgconfig_generate libavdevice "FFmpeg device handling library" "$LIBAVDEVICE_VERSION" "$extralibs" "libavformat = $LIBAVFORMAT_VERSION"
enabled avfilter &&
- pkgconfig_generate libavfilter "FFmpeg video filtering library" "$LIBAVFILTER_VERSION" "$extralibs" "$pkg_requires libavutil = $LIBAVUTIL_VERSION"
+ pkgconfig_generate libavfilter "FFmpeg video filtering library" "$LIBAVFILTER_VERSION" "$extralibs" "libavutil = $LIBAVUTIL_VERSION"
enabled postproc &&
pkgconfig_generate libpostproc "FFmpeg post processing library" "$LIBPOSTPROC_VERSION"
if enabled swscale; then
pkgconfig_generate libswscale "FFmpeg image rescaling library" "$LIBSWSCALE_VERSION" "" "libavutil = $LIBAVUTIL_VERSION"
else
- pkgconfig_generate libswscale "FFmpeg image rescaling library" "$LIBSWSCALE_VERSION" "" "$pkg_requires libavcodec = $LIBAVCODEC_VERSION"
+ pkgconfig_generate libswscale "FFmpeg image rescaling library" "$LIBSWSCALE_VERSION" "" "libavcodec = $LIBAVCODEC_VERSION"
apply libswscale/libswscale.pc sed s/^Libs:.*$/Libs:/
fi
diff --git a/doc/faq.texi b/doc/faq.texi
index b76f4e7..e070fd2 100644
--- a/doc/faq.texi
+++ b/doc/faq.texi
@@ -166,11 +166,6 @@ Applying that to the previous example:
Beware that there is no "jpeg" codec. Use "mjpeg" instead.
- at section I get "Unsupported codec (id=86043) for input stream #0.1". What is the problem?
-
-This is the Qcelp codec, FFmpeg has no support for that codec currently.
-Try MEncoder/MPlayer, it might work.
-
@section Why do I see a slight quality degradation with multithreaded MPEG* encoding?
For multithreaded MPEG* encoding, the encoded slices must be independent,
@@ -192,14 +187,14 @@ LGPL to GPL.
@section Why does the chrominance data seem to be sampled at a different time from the luminance data on bt8x8 captures on Linux?
This is a well-known bug in the bt8x8 driver. For 2.4.26 there is a patch at
-(@url{http://svn.mplayerhq.hu/michael/trunk/patches/bttv-420-2.4.26.patch?view=co}). This may also
+(@url{http://svn.ffmpeg.org/michael/trunk/patches/bttv-420-2.4.26.patch?view=co}). This may also
apply cleanly to other 2.4-series kernels.
@section How do I avoid the ugly aliasing artifacts in bt8x8 captures on Linux?
Pass 'combfilter=1 lumafilter=1' to the bttv driver. Note though that 'combfilter=1'
-will cause somewhat too strong filtering. A fix is to apply (@url{http://svn.mplayerhq.hu/michael/trunk/patches/bttv-comb-2.4.26.patch?view=co})
-or (@url{http://svn.mplayerhq.hu/michael/trunk/patches/bttv-comb-2.6.6.patch?view=co})
+will cause somewhat too strong filtering. A fix is to apply (@url{http://svn.ffmpeg.org/michael/trunk/patches/bttv-comb-2.4.26.patch?view=co})
+or (@url{http://svn.ffmpeg.org/michael/trunk/patches/bttv-comb-2.6.6.patch?view=co})
and pass 'combfilter=2'.
@section -f jpeg doesn't work.
@@ -231,7 +226,7 @@ default.
@item non-working stuff
B-frames
@item example command line
-ffmpeg -i input -acodec libfaac -ab 128kb -vcodec mpeg4 -b 1200kb -mbd 2 -flags +4mv+trell -aic 2 -cmp 2 -subcmp 2 -s 320x180 -title X output.mp4
+ffmpeg -i input -acodec libfaac -ab 128kb -vcodec mpeg4 -b 1200kb -mbd 2 -flags +4mv -trellis 2 -aic 2 -cmp 2 -subcmp 2 -s 320x180 -title X output.mp4
@end table
@section How do I encode videos which play on the PSP?
@@ -244,7 +239,7 @@ ffmpeg -i input -acodec libfaac -ab 128kb -vcodec mpeg4 -b 1200kb -mbd 2 -flags
@item non-working stuff
B-frames
@item example command line
-ffmpeg -i input -acodec libfaac -ab 128kb -vcodec mpeg4 -b 1200kb -ar 24000 -mbd 2 -flags +4mv+trell -aic 2 -cmp 2 -subcmp 2 -s 368x192 -r 30000/1001 -title X -f psp output.mp4
+ffmpeg -i input -acodec libfaac -ab 128kb -vcodec mpeg4 -b 1200kb -ar 24000 -mbd 2 -flags +4mv -trellis 2 -aic 2 -cmp 2 -subcmp 2 -s 368x192 -r 30000/1001 -title X -f psp output.mp4
@item needed stuff for H.264
-acodec libfaac -vcodec libx264 width*height<=76800 width%16=0? height%16=0? -ar 48000 -coder 1 -r 30000/1001 or 15000/1001 -f psp
@item working stuff for H.264
@@ -261,12 +256,12 @@ ffmpeg -i input -acodec libfaac -ab 128kb -ac 2 -ar 48000 -vcodec libx264 -level
@section Which are good parameters for encoding high quality MPEG-4?
-'-mbd rd -flags +4mv+trell+aic -cmp 2 -subcmp 2 -g 300 -pass 1/2',
+'-mbd rd -flags +4mv+aic -trellis 2 -cmp 2 -subcmp 2 -g 300 -pass 1/2',
things to try: '-bf 2', '-flags qprd', '-flags mv0', '-flags skiprd'.
@section Which are good parameters for encoding high quality MPEG-1/MPEG-2?
-'-mbd rd -flags +trell -cmp 2 -subcmp 2 -g 100 -pass 1/2'
+'-mbd rd -trellis 2 -cmp 2 -subcmp 2 -g 100 -pass 1/2'
but beware the '-g 100' might cause problems with some decoders.
Things to try: '-bf 2', '-flags qprd', '-flags mv0', '-flags skiprd.
@@ -474,7 +469,7 @@ see @url{http://www.iversenit.dk/dev/ffmpeg-headers/}
@section Where is the documentation about ffv1, msmpeg4, asv1, 4xm?
-see @url{http://svn.mplayerhq.hu/michael/trunk/docs/}
+see @url{http://svn.ffmpeg.org/michael/trunk/docs/}
@section How do I feed H.263-RTP (and other codecs in RTP) to libavcodec?
diff --git a/doc/ffmpeg-doc.texi b/doc/ffmpeg-doc.texi
index 6f354c5..840ed89 100644
--- a/doc/ffmpeg-doc.texi
+++ b/doc/ffmpeg-doc.texi
@@ -439,10 +439,11 @@ tell that the raw codec data must be copied as is.
Use same video quality as source (implies VBR).
@item -pass @var{n}
-Select the pass number (1 or 2). It is useful to do two pass
-encoding. The statistics of the video are recorded in the first
-pass and the video is generated at the exact requested bitrate
-in the second pass.
+Select the pass number (1 or 2). It is used to do two-pass
+video encoding. The statistics of the video are recorded in the first
+pass into a log file (see also the option -passlogfile),
+and in the second pass that log file is used to generate the video
+at the exact requested bitrate.
On pass 1, you may just deactivate audio and set output to null,
examples for Windows and Unix:
@example
@@ -450,8 +451,11 @@ ffmpeg -i foo.mov -vcodec libxvid -pass 1 -an -f rawvideo -y NUL
ffmpeg -i foo.mov -vcodec libxvid -pass 1 -an -f rawvideo -y /dev/null
@end example
- at item -passlogfile @var{file}
-Set two pass logfile name to @var{file}.
+ at item -passlogfile @var{prefix}
+Set two-pass log file name prefix to @var{prefix}, the default file name
+prefix is ``ffmpeg2pass''. The complete file name will be
+ at file{PREFIX-N.log}, where N is a number specific to the output
+stream.
@item -newvideo
Add a new video stream to the current output stream.
diff --git a/doc/general.texi b/doc/general.texi
index 1db8e4a..b594fe1 100644
--- a/doc/general.texi
+++ b/doc/general.texi
@@ -288,6 +288,9 @@ following image formats are supported:
@tab fourccs: QPEG, Q1.0, Q1.1
@item RealVideo 1.0 @tab X @tab X
@item RealVideo 2.0 @tab X @tab X
+ at item RealVideo 3.0 @tab @tab X
+ @tab still far from ideal
+ at item RealVideo 4.0 @tab @tab X
@item Renderware TXD @tab @tab X
@tab Texture dictionaries used by the Renderware Engine.
@item RTjpeg @tab @tab X
@@ -396,6 +399,7 @@ following image formats are supported:
@item Musepack @tab @tab X
@tab SV7 and SV8 are supported.
@item Nellymoser ASAO @tab X @tab X
+ at item QCELP / PureVoice @tab @tab X
@item Qdesign QDM2 @tab @tab X
@tab There are still some distortions.
@item QT IMA ADPCM @tab X @tab X
@@ -477,7 +481,7 @@ are listed below:
@itemize
@item bash 3.1
@item msys-make 3.81-2 (note: not mingw32-make)
- at item w32api 3.12
+ at item w32api 3.13
@item mingw-runtime 3.15
@end itemize
@@ -658,18 +662,21 @@ Then you can easily test FFmpeg with Wine
@subsection Compilation under Cygwin
-The main issue with Cygwin is that newlib, its C library, does not
-contain llrint(). However, it is possible to leverage the
-implementation in MinGW.
+The main issue with the 1.5.x Cygwin versions is that newlib, its C library,
+does not contain llrint(). You need to upgrade to the unstable 1.7.x versions,
+or leverage the implementation in MinGW (as explained below).
Just install your Cygwin with all the "Base" packages, plus the
following "Devel" ones:
@example
-binutils, gcc-core, make, subversion, mingw-runtime
+binutils, gcc-core, make, subversion, mingw-runtime, diffutils
@end example
-Do not install binutils-20060709-1 (they are buggy on shared builds);
-use binutils-20050610-1 instead.
+The experimental gcc4 package is still buggy, hence please
+use the official gcc 3.4.4 or a 4.2.x compiled from source by yourself.
+
+Install the current binutils-20080624-2 as they work fine (the old
+binutils-20060709-1 proved buggy on shared builds).
Then create a small library that just contains llrint():
@@ -693,9 +700,22 @@ to make a static build or
to build shared libraries.
If you want to build FFmpeg with additional libraries, download Cygwin
-"Devel" packages for Ogg and Vorbis from any Cygwin packages repository
-and/or SDL, xvid, faac, faad2 packages from Cygwin Ports,
-(@url{http://cygwinports.dotsrc.org/}).
+"Devel" packages for Ogg and Vorbis from any Cygwin packages repository:
+ at example
+libogg-devel, libvorbis-devel
+ at end example
+
+These library packages are only available from Cygwin Ports
+(@url{http://sourceware.org/cygwinports/}) :
+
+ at example
+yasm, libSDL-devel, libdirac-devel, libfaac-devel, libfaad-devel, libgsm-devel,
+libmp3lame-devel, libschroedinger1.0-devel, speex-devel, libtheora-devel,
+libxvidcore-devel
+ at end example
+
+The recommendation for libnut and x264 is to build them from source by
+yourself, as they evolve too quickly for Cygwin Ports to be up to date.
@subsection Crosscompilation for Windows under Cygwin
@@ -1067,7 +1087,7 @@ do not attach several unrelated patches to the same mail.
If the patch fixes a bug, did you provide enough information, including
a sample, so the bug can be reproduced and the fix can be verified?
Note please do not attach samples >100k to mails but rather provide a
- URL, you can upload to ftp://upload.mplayerhq.hu
+ URL, you can upload to ftp://upload.ffmpeg.org
@item
Did you provide a verbose summary about what the patch does change?
@item
diff --git a/doc/issue_tracker.txt b/doc/issue_tracker.txt
index cab56cf..e5a74db 100644
--- a/doc/issue_tracker.txt
+++ b/doc/issue_tracker.txt
@@ -15,9 +15,10 @@ be properly added to the respective issue.
The subscription URL for the ffmpeg-issues list is:
http://live.polito/mailman/listinfo/ffmpeg-issues
The URL of the webinterface of the tracker is:
-http(s)://roundup.mplayerhq/roundup/ffmpeg/
+http(s)://roundup.ffmpeg/roundup/ffmpeg/
Note the URLs in this document are obfuscated, you must append the top level
-domain of Hungary to the tracker, and of Italy to the mailing list.
+domain for non-profit organizations to the tracker, and of Italy to the
+mailing list.
Email Interface:
----------------
diff --git a/doc/optimization.txt b/doc/optimization.txt
index 50630e7..5469adc 100644
--- a/doc/optimization.txt
+++ b/doc/optimization.txt
@@ -4,11 +4,11 @@ optimization Tips (for libavcodec):
What to optimize:
-----------------
If you plan to do non-x86 architecture specific optimizations (SIMD normally),
-then take a look in the i386/ directory, as most important functions are
+then take a look in the x86/ directory, as most important functions are
already optimized for MMX.
If you want to do x86 optimizations then you can either try to finetune the
-stuff in the i386 directory or find some other functions in the C source to
+stuff in the x86 directory or find some other functions in the C source to
optimize, but there aren't many left.
@@ -18,9 +18,9 @@ As many functions tend to be a bit difficult to understand because
of optimizations, it can be hard to optimize them further, or write
architecture-specific versions. It is recommended to look at older
revisions of the interesting files (for a web frontend try ViewVC at
-http://svn.mplayerhq.hu/ffmpeg/trunk/).
+http://svn.ffmpeg.org/ffmpeg/trunk/).
Alternatively, look into the other architecture-specific versions in
-the i386/, ppc/, alpha/ subdirectories. Even if you don't exactly
+the x86/, ppc/, alpha/ subdirectories. Even if you don't exactly
comprehend the instructions, it could help understanding the functions
and how they can be optimized.
diff --git a/ffmpeg.c b/ffmpeg.c
index 9d74491..aac071e 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -151,6 +151,7 @@ static int qp_hist = 0;
static int intra_only = 0;
static int audio_sample_rate = 44100;
+static int64_t channel_layout = 0;
#define QSCALE_NONE -99999
static float audio_qscale = QSCALE_NONE;
static int audio_disable = 0;
@@ -182,7 +183,7 @@ static int do_hex_dump = 0;
static int do_pkt_dump = 0;
static int do_psnr = 0;
static int do_pass = 0;
-static char *pass_logfilename = NULL;
+static char *pass_logfilename_prefix = NULL;
static int audio_stream_copy = 0;
static int video_stream_copy = 0;
static int subtitle_stream_copy = 0;
@@ -190,11 +191,12 @@ static int video_sync_method= -1;
static int audio_sync_method= 0;
static float audio_drift_threshold= 0.1;
static int copy_ts= 0;
-static int opt_shortest = 0; //
+static int opt_shortest = 0;
static int video_global_header = 0;
static char *vstats_filename;
static FILE *vstats_file;
static int opt_programid = 0;
+static int copy_initial_nonkeyframes = 0;
static int rate_emu = 0;
@@ -215,7 +217,7 @@ static int64_t extra_size = 0;
static int nb_frames_dup = 0;
static int nb_frames_drop = 0;
static int input_sync;
-static uint64_t limit_filesize = 0; //
+static uint64_t limit_filesize = 0;
static int force_fps = 0;
static int pgmyuv_compatibility_hack=0;
@@ -230,7 +232,7 @@ static AVBitStreamFilterContext *audio_bitstream_filters=NULL;
static AVBitStreamFilterContext *subtitle_bitstream_filters=NULL;
static AVBitStreamFilterContext *bitstream_filters[MAX_FILES][MAX_STREAMS];
-#define DEFAULT_PASS_LOGFILENAME "ffmpeg2pass"
+#define DEFAULT_PASS_LOGFILENAME_PREFIX "ffmpeg2pass"
struct AVInputStream;
@@ -280,7 +282,6 @@ typedef struct AVInputStream {
int64_t sample_index; /* current sample */
int64_t start; /* time when read started */
- unsigned long frame; /* current frame */
int64_t next_pts; /* synthetic pts for cases where pkt.pts
is not defined */
int64_t pts; /* current pts */
@@ -395,9 +396,17 @@ static int av_exit(int ret)
if (!(s->oformat->flags & AVFMT_NOFILE) && s->pb)
url_fclose(s->pb);
for(j=0;j<s->nb_streams;j++) {
+ av_metadata_free(&s->streams[j]->metadata);
av_free(s->streams[j]->codec);
av_free(s->streams[j]);
}
+ for(j=0;j<s->nb_programs;j++) {
+ av_metadata_free(&s->programs[j]->metadata);
+ }
+ for(j=0;j<s->nb_chapters;j++) {
+ av_metadata_free(&s->chapters[j]->metadata);
+ }
+ av_metadata_free(&s->metadata);
av_free(s);
}
for(i=0;i<nb_input_files;i++)
@@ -419,7 +428,7 @@ static int av_exit(int ret)
av_free(video_standard);
#ifdef CONFIG_POWERPC_PERF
- extern void powerpc_display_perf_report(void);
+ void powerpc_display_perf_report(void);
powerpc_display_perf_report();
#endif /* CONFIG_POWERPC_PERF */
@@ -1305,13 +1314,11 @@ static int output_packet(AVInputStream *ist, int ist_index,
}
/* frame rate emulation */
- if (ist->st->codec->rate_emu) {
- int64_t pts = av_rescale((int64_t) ist->frame * ist->st->codec->time_base.num, 1000000, ist->st->codec->time_base.den);
+ if (rate_emu) {
+ int64_t pts = av_rescale(ist->pts, 1000000, AV_TIME_BASE);
int64_t now = av_gettime() - ist->start;
if (pts > now)
usleep(pts - now);
-
- ist->frame++;
}
/* if output time reached then transcode raw format,
@@ -1355,7 +1362,7 @@ static int output_packet(AVInputStream *ist, int ist_index,
AVPacket opkt;
av_init_packet(&opkt);
- if (!ost->frame_number && !(pkt->flags & PKT_FLAG_KEY))
+ if ((!ost->frame_number && !(pkt->flags & PKT_FLAG_KEY)) && !copy_initial_nonkeyframes)
continue;
/* no reencoding needed : output the packet directly */
@@ -1402,8 +1409,9 @@ static int output_packet(AVInputStream *ist, int ist_index,
if (subtitle_to_free) {
if (subtitle_to_free->rects != NULL) {
for (i = 0; i < subtitle_to_free->num_rects; i++) {
- av_free(subtitle_to_free->rects[i].bitmap);
- av_free(subtitle_to_free->rects[i].rgba_palette);
+ av_freep(&subtitle_to_free->rects[i]->pict.data[0]);
+ av_freep(&subtitle_to_free->rects[i]->pict.data[1]);
+ av_freep(&subtitle_to_free->rects[i]);
}
av_freep(&subtitle_to_free->rects);
}
@@ -1488,6 +1496,7 @@ static void print_sdp(AVFormatContext **avc, int n)
avf_sdp_create(avc, n, sdp, sizeof(sdp));
printf("SDP:\n%s\n", sdp);
+ fflush(stdout);
}
static int stream_index_from_inputs(AVFormatContext **input_files,
@@ -1569,9 +1578,8 @@ static int av_encode(AVFormatContext **output_files,
ist->discard = 1; /* the stream is discarded by default
(changed later) */
- if (ist->st->codec->rate_emu) {
+ if (rate_emu) {
ist->start = av_gettime();
- ist->frame = 0;
}
}
}
@@ -1581,7 +1589,8 @@ static int av_encode(AVFormatContext **output_files,
for(i=0;i<nb_output_files;i++) {
os = output_files[i];
if (!os->nb_streams) {
- fprintf(stderr, "Output file does not contain any stream\n");
+ dump_format(output_files[i], i, output_files[i]->filename, 1);
+ fprintf(stderr, "Output file #%d does not contain any stream\n", i);
av_exit(1);
}
nb_ostreams += os->nb_streams;
@@ -1733,6 +1742,7 @@ static int av_encode(AVFormatContext **output_files,
fprintf(stderr,"-acodec copy and -vol are incompatible (frames are not decoded)\n");
av_exit(1);
}
+ codec->channel_layout = icodec->channel_layout;
codec->sample_rate = icodec->sample_rate;
codec->channels = icodec->channels;
codec->frame_size = icodec->frame_size;
@@ -1836,12 +1846,12 @@ static int av_encode(AVFormatContext **output_files,
char *logbuffer;
snprintf(logfilename, sizeof(logfilename), "%s-%d.log",
- pass_logfilename ?
- pass_logfilename : DEFAULT_PASS_LOGFILENAME, i);
+ pass_logfilename_prefix ? pass_logfilename_prefix : DEFAULT_PASS_LOGFILENAME_PREFIX,
+ i);
if (codec->flags & CODEC_FLAG_PASS1) {
f = fopen(logfilename, "w");
if (!f) {
- perror(logfilename);
+ fprintf(stderr, "Cannot write log file '%s' for pass-1 encoding: %s\n", logfilename, strerror(errno));
av_exit(1);
}
ost->logfile = f;
@@ -1849,7 +1859,7 @@ static int av_encode(AVFormatContext **output_files,
/* read the log file */
f = fopen(logfilename, "r");
if (!f) {
- perror(logfilename);
+ fprintf(stderr, "Cannot read log file '%s' for pass-2 encoding: %s\n", logfilename, strerror(errno));
av_exit(1);
}
fseek(f, 0, SEEK_END);
@@ -2821,6 +2831,7 @@ static void opt_input_file(const char *filename)
case CODEC_TYPE_AUDIO:
set_context_opts(enc, avctx_opts[CODEC_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_DECODING_PARAM);
//fprintf(stderr, "\nInput Audio channels: %d", enc->channels);
+ channel_layout = enc->channel_layout;
audio_channels = enc->channels;
audio_sample_rate = enc->sample_rate;
audio_sample_fmt = enc->sample_fmt;
@@ -2856,7 +2867,6 @@ static void opt_input_file(const char *filename)
frame_rate.num = rfps;
frame_rate.den = rfps_base;
- enc->rate_emu = rate_emu;
input_codecs[nb_icodecs++] = avcodec_find_decoder_by_name(video_codec_name);
if(video_disable)
ic->streams[i]->discard= AVDISCARD_ALL;
@@ -2891,7 +2901,6 @@ static void opt_input_file(const char *filename)
video_channel = 0;
- rate_emu = 0;
av_freep(&video_codec_name);
av_freep(&audio_codec_name);
av_freep(&subtitle_codec_name);
@@ -3130,6 +3139,7 @@ static void new_audio_stream(AVFormatContext *oc)
audio_enc->thread_count = thread_count;
audio_enc->channels = audio_channels;
audio_enc->sample_fmt = audio_sample_fmt;
+ audio_enc->channel_layout = channel_layout;
if(codec && codec->sample_fmts){
const enum SampleFormat *p= codec->sample_fmts;
@@ -3702,7 +3712,7 @@ static int opt_preset(const char *opt, const char *arg)
continue;
e|= sscanf(line, "%999[^=]=%999[^\n]\n", tmp, tmp2) - 2;
if(e){
- fprintf(stderr, "%s: Preset file invalid\n", filename);
+ fprintf(stderr, "%s: Invalid syntax: '%s'\n", filename, line);
av_exit(1);
}
if(!strcmp(tmp, "acodec")){
@@ -3712,7 +3722,7 @@ static int opt_preset(const char *opt, const char *arg)
}else if(!strcmp(tmp, "scodec")){
opt_subtitle_codec(tmp2);
}else if(opt_default(tmp, tmp2) < 0){
- fprintf(stderr, "%s: Invalid option or argument: %s=%s\n", filename, tmp, tmp2);
+ fprintf(stderr, "%s: Invalid option or argument: '%s', parsed as '%s' = '%s'\n", filename, line, tmp, tmp2);
av_exit(1);
}
}
@@ -3767,6 +3777,7 @@ static const OptionDef options[] = {
{ "dts_delta_threshold", HAS_ARG | OPT_FLOAT | OPT_EXPERT, {(void*)&dts_delta_threshold}, "timestamp discontinuity delta threshold", "threshold" },
{ "programid", HAS_ARG | OPT_INT | OPT_EXPERT, {(void*)&opt_programid}, "desired program number", "" },
{ "xerror", OPT_BOOL, {(void*)&exit_on_error}, "exit on error", "error" },
+ { "copyinkf", OPT_BOOL | OPT_EXPERT, {(void*)©_initial_nonkeyframes}, "copy initial non-keyframes" },
/* video options */
{ "b", OPT_FUNC2 | HAS_ARG | OPT_VIDEO, {(void*)opt_bitrate}, "set bitrate (in bits/s)", "bitrate" },
@@ -3795,7 +3806,7 @@ static const OptionDef options[] = {
{ "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality},
"use same video quality as source (implies VBR)" },
{ "pass", HAS_ARG | OPT_VIDEO, {(void*)&opt_pass}, "select the pass number (1 or 2)", "n" },
- { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename}, "select two pass log file name", "file" },
+ { "passlogfile", HAS_ARG | OPT_STRING | OPT_VIDEO, {(void*)&pass_logfilename_prefix}, "select two pass log file name prefix", "prefix" },
{ "deinterlace", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_deinterlace},
"deinterlace pictures" },
{ "psnr", OPT_BOOL | OPT_EXPERT | OPT_VIDEO, {(void*)&do_psnr}, "calculate PSNR of compressed frames" },
@@ -3873,22 +3884,18 @@ int main(int argc, char **argv)
sws_opts = sws_getContext(16,16,0, 16,16,0, sws_flags, NULL,NULL,NULL);
show_banner();
- if (argc <= 1) {
- show_help();
- av_exit(1);
- }
/* parse options */
parse_options(argc, argv, options, opt_output_file);
/* file converter / grab */
if (nb_output_files <= 0) {
- fprintf(stderr, "Must supply at least one output file\n");
+ fprintf(stderr, "At least one output file must be specified\n");
av_exit(1);
}
if (nb_input_files == 0) {
- fprintf(stderr, "Must supply at least one input file\n");
+ fprintf(stderr, "At least one input file must be specified\n");
av_exit(1);
}
diff --git a/ffplay.c b/ffplay.c
index 67fbff2..12190da 100644
--- a/ffplay.c
+++ b/ffplay.c
@@ -441,10 +441,10 @@ static void blend_subrect(AVPicture *dst, const AVSubtitleRect *rect, int imgw,
const uint32_t *pal;
int dstx, dsty, dstw, dsth;
- dstx = FFMIN(FFMAX(rect->x, 0), imgw);
- dstw = FFMIN(FFMAX(rect->w, 0), imgw - dstx);
- dsty = FFMIN(FFMAX(rect->y, 0), imgh);
- dsth = FFMIN(FFMAX(rect->h, 0), imgh - dsty);
+ dstw = av_clip(rect->w, 0, imgw);
+ dsth = av_clip(rect->h, 0, imgh);
+ dstx = av_clip(rect->x, 0, imgw - dstw);
+ dsty = av_clip(rect->y, 0, imgh - dsth);
lum = dst->data[0] + dsty * dst->linesize[0];
cb = dst->data[1] + (dsty >> 1) * dst->linesize[1];
cr = dst->data[2] + (dsty >> 1) * dst->linesize[2];
@@ -452,9 +452,9 @@ static void blend_subrect(AVPicture *dst, const AVSubtitleRect *rect, int imgw,
width2 = (dstw + 1) >> 1;
skip2 = dstx >> 1;
wrap = dst->linesize[0];
- wrap3 = rect->linesize;
- p = rect->bitmap;
- pal = rect->rgba_palette; /* Now in YCrCb! */
+ wrap3 = rect->pict.linesize[0];
+ p = rect->pict.data[0];
+ pal = (const uint32_t *)rect->pict.data[1]; /* Now in YCrCb! */
if (dsty & 1) {
lum += dstx;
@@ -636,8 +636,9 @@ static void free_subpicture(SubPicture *sp)
for (i = 0; i < sp->sub.num_rects; i++)
{
- av_free(sp->sub.rects[i].bitmap);
- av_free(sp->sub.rects[i].rgba_palette);
+ av_freep(&sp->sub.rects[i]->pict.data[0]);
+ av_freep(&sp->sub.rects[i]->pict.data[1]);
+ av_freep(&sp->sub.rects[i]);
}
av_free(sp->sub.rects);
@@ -721,7 +722,7 @@ static void video_image_display(VideoState *is)
pict.linesize[2] = vp->bmp->pitches[1];
for (i = 0; i < sp->sub.num_rects; i++)
- blend_subrect(&pict, &sp->sub.rects[i],
+ blend_subrect(&pict, sp->sub.rects[i],
vp->bmp->w, vp->bmp->h);
SDL_UnlockYUVOverlay (vp->bmp);
@@ -1024,7 +1025,7 @@ static void video_refresh_timer(void *opaque)
/* compute nominal delay */
delay = vp->pts - is->frame_last_pts;
- if (delay <= 0 || delay >= 2.0) {
+ if (delay <= 0 || delay >= 10.0) {
/* if incorrect delay, use previous one */
delay = is->frame_last_delay;
}
@@ -1435,13 +1436,13 @@ static int subtitle_thread(void *arg)
for (i = 0; i < sp->sub.num_rects; i++)
{
- for (j = 0; j < sp->sub.rects[i].nb_colors; j++)
+ for (j = 0; j < sp->sub.rects[i]->nb_colors; j++)
{
- RGBA_IN(r, g, b, a, sp->sub.rects[i].rgba_palette + j);
+ RGBA_IN(r, g, b, a, (uint32_t*)sp->sub.rects[i]->pict.data[1] + j);
y = RGB_TO_Y_CCIR(r, g, b);
u = RGB_TO_U_CCIR(r, g, b, 0);
v = RGB_TO_V_CCIR(r, g, b, 0);
- YUVA_OUT(sp->sub.rects[i].rgba_palette + j, y, u, v, a);
+ YUVA_OUT((uint32_t*)sp->sub.rects[i]->pict.data[1] + j, y, u, v, a);
}
}
@@ -2062,12 +2063,19 @@ static int decode_thread(void *arg)
/* if the queue are full, no need to read more */
if (is->audioq.size > MAX_AUDIOQ_SIZE ||
is->videoq.size > MAX_VIDEOQ_SIZE ||
- is->subtitleq.size > MAX_SUBTITLEQ_SIZE ||
- url_feof(ic->pb)) {
+ is->subtitleq.size > MAX_SUBTITLEQ_SIZE) {
/* wait 10 ms */
SDL_Delay(10);
continue;
}
+ if(url_feof(ic->pb)) {
+ av_init_packet(pkt);
+ pkt->data=
+ pkt->size=0;
+ pkt->stream_index= is->video_stream;
+ packet_queue_put(&is->videoq, pkt);
+ continue;
+ }
ret = av_read_frame(ic, pkt);
if (ret < 0) {
if (url_ferror(ic->pb) == 0) {
@@ -2553,7 +2561,7 @@ int main(int argc, char **argv)
parse_options(argc, argv, options, opt_input_file);
if (!input_filename) {
- show_help();
+ fprintf(stderr, "An input file must be specified\n");
exit(1);
}
diff --git a/ffpresets/libx264-fastfirstpass.ffpreset b/ffpresets/libx264-fastfirstpass.ffpreset
index f2ce57b..aaad461 100644
--- a/ffpresets/libx264-fastfirstpass.ffpreset
+++ b/ffpresets/libx264-fastfirstpass.ffpreset
@@ -14,7 +14,8 @@ qcomp=0.6
qmin=10
qmax=51
qdiff=4
+bf=4
refs=1
-directpred=1
+directpred=3
trellis=0
flags2=-bpyramid-wpred-mixed_refs-dct8x8+fastpskip
diff --git a/ffpresets/libx264-hq.ffpreset b/ffpresets/libx264-hq.ffpreset
index 3f6cda6..cdf67eb 100644
--- a/ffpresets/libx264-hq.ffpreset
+++ b/ffpresets/libx264-hq.ffpreset
@@ -9,12 +9,12 @@ g=250
keyint_min=25
sc_threshold=40
i_qfactor=0.71
-b_strategy=1
+b_strategy=2
qcomp=0.6
qmin=10
qmax=51
qdiff=4
-bf=16
+bf=4
refs=4
directpred=3
trellis=1
diff --git a/ffpresets/libx264-lossless_fast.ffpreset b/ffpresets/libx264-lossless_fast.ffpreset
new file mode 100644
index 0000000..dcf418a
--- /dev/null
+++ b/ffpresets/libx264-lossless_fast.ffpreset
@@ -0,0 +1,19 @@
+coder=0
+flags=+loop
+cmp=+chroma
+partitions=-parti8x8+parti4x4+partp8x8-partp4x4-partb8x8
+me_method=hex
+subq=3
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+directpred=1
+flags2=+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-lossless_max.ffpreset b/ffpresets/libx264-lossless_max.ffpreset
new file mode 100644
index 0000000..b8506c2
--- /dev/null
+++ b/ffpresets/libx264-lossless_max.ffpreset
@@ -0,0 +1,20 @@
+coder=1
+flags=+loop
+cmp=+chroma
+partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
+me_method=esa
+subq=8
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+refs=16
+directpred=1
+flags2=+mixed_refs+dct8x8+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-lossless_medium.ffpreset b/ffpresets/libx264-lossless_medium.ffpreset
new file mode 100644
index 0000000..99fb6b9
--- /dev/null
+++ b/ffpresets/libx264-lossless_medium.ffpreset
@@ -0,0 +1,19 @@
+coder=1
+flags=+loop
+cmp=+chroma
+partitions=-parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
+me_method=hex
+subq=5
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+directpred=1
+flags2=+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-lossless_slow.ffpreset b/ffpresets/libx264-lossless_slow.ffpreset
new file mode 100644
index 0000000..2ecb55b
--- /dev/null
+++ b/ffpresets/libx264-lossless_slow.ffpreset
@@ -0,0 +1,20 @@
+coder=1
+flags=+loop
+cmp=+chroma
+partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
+me_method=umh
+subq=6
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+refs=2
+directpred=1
+flags2=+dct8x8+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-lossless_slower.ffpreset b/ffpresets/libx264-lossless_slower.ffpreset
new file mode 100644
index 0000000..dd499c7
--- /dev/null
+++ b/ffpresets/libx264-lossless_slower.ffpreset
@@ -0,0 +1,20 @@
+coder=1
+flags=+loop
+cmp=+chroma
+partitions=+parti8x8+parti4x4+partp8x8+partp4x4-partb8x8
+me_method=umh
+subq=8
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+refs=4
+directpred=1
+flags2=+mixed_refs+dct8x8+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-lossless_ultrafast.ffpreset b/ffpresets/libx264-lossless_ultrafast.ffpreset
new file mode 100644
index 0000000..1c429f2
--- /dev/null
+++ b/ffpresets/libx264-lossless_ultrafast.ffpreset
@@ -0,0 +1,19 @@
+coder=0
+flags=+loop
+cmp=+chroma
+partitions=-parti8x8-parti4x4-partp8x8-partp4x4-partb8x8
+me_method=dia
+subq=0
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=1
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+directpred=1
+flags2=+fastpskip
+cqp=0
diff --git a/ffpresets/libx264-max.ffpreset b/ffpresets/libx264-max.ffpreset
index 3638480..afdde53 100644
--- a/ffpresets/libx264-max.ffpreset
+++ b/ffpresets/libx264-max.ffpreset
@@ -9,12 +9,12 @@ g=250
keyint_min=25
sc_threshold=40
i_qfactor=0.71
-b_strategy=1
+b_strategy=2
qcomp=0.6
qmin=10
qmax=51
qdiff=4
-bf=16
+bf=4
refs=16
directpred=3
trellis=2
diff --git a/ffpresets/libx264-normal.ffpreset b/ffpresets/libx264-normal.ffpreset
index 180b9d2..99ac2e3 100644
--- a/ffpresets/libx264-normal.ffpreset
+++ b/ffpresets/libx264-normal.ffpreset
@@ -14,7 +14,7 @@ qcomp=0.6
qmin=10
qmax=51
qdiff=4
-bf=16
+bf=4
refs=2
directpred=3
trellis=0
diff --git a/ffpresets/libx264-slowfirstpass.ffpreset b/ffpresets/libx264-slowfirstpass.ffpreset
new file mode 100644
index 0000000..7358d44
--- /dev/null
+++ b/ffpresets/libx264-slowfirstpass.ffpreset
@@ -0,0 +1,21 @@
+coder=1
+flags=+loop
+cmp=+chroma
+partitions=+parti8x8+parti4x4+partp8x8+partb8x8
+me_method=hex
+subq=6
+me_range=16
+g=250
+keyint_min=25
+sc_threshold=40
+i_qfactor=0.71
+b_strategy=2
+qcomp=0.6
+qmin=10
+qmax=51
+qdiff=4
+bf=4
+refs=1
+directpred=3
+trellis=0
+flags2=+bpyramid+wpred+dct8x8+fastpskip
diff --git a/ffserver.c b/ffserver.c
index 168f996..23bb64c 100644
--- a/ffserver.c
+++ b/ffserver.c
@@ -3507,7 +3507,7 @@ static void build_feed_streams(void)
}
}
if (!url_exist(feed->feed_filename)) {
- AVFormatContext s1, *s = &s1;
+ AVFormatContext s1 = {0}, *s = &s1;
if (feed->readonly) {
http_log("Unable to create feed file '%s' as it is marked readonly\n",
@@ -3686,7 +3686,7 @@ static void add_codec(FFStream *stream, AVCodecContext *av)
memcpy(st->codec, av, sizeof(AVCodecContext));
}
-static int opt_audio_codec(const char *arg)
+static enum CodecID opt_audio_codec(const char *arg)
{
AVCodec *p= avcodec_find_encoder_by_name(arg);
@@ -3696,7 +3696,7 @@ static int opt_audio_codec(const char *arg)
return p->id;
}
-static int opt_video_codec(const char *arg)
+static enum CodecID opt_video_codec(const char *arg)
{
AVCodec *p= avcodec_find_encoder_by_name(arg);
@@ -3735,13 +3735,11 @@ static void load_module(const char *filename)
static int ffserver_opt_default(const char *opt, const char *arg,
AVCodecContext *avctx, int type)
{
- const AVOption *o = NULL;
- const AVOption *o2 = av_find_opt(avctx, opt, NULL, type, type);
- if(o2)
- o = av_set_string2(avctx, opt, arg, 1);
- if(!o)
- return -1;
- return 0;
+ int ret = 0;
+ const AVOption *o = av_find_opt(avctx, opt, NULL, type, type);
+ if(o)
+ ret = av_set_string3(avctx, opt, arg, 1, NULL);
+ return ret;
}
static int parse_ffconfig(const char *filename)
@@ -3755,7 +3753,7 @@ static int parse_ffconfig(const char *filename)
FFStream **last_stream, *stream, *redirect;
FFStream **last_feed, *feed;
AVCodecContext audio_enc, video_enc;
- int audio_id, video_id;
+ enum CodecID audio_id, video_id;
f = fopen(filename, "r");
if (!f) {
@@ -4450,7 +4448,7 @@ static void opt_debug()
static void opt_show_help(void)
{
- printf("usage: FFserver [options]\n"
+ printf("usage: ffserver [options]\n"
"Hyper fast multi format Audio/Video streaming server\n");
printf("\n");
show_help_options(options, "Main options:\n", 0, 0);
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 0540c20..3b179d7 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -3,6 +3,8 @@ include $(SUBDIR)../config.mak
NAME = avcodec
FFLIBS = avutil
+HEADERS = avcodec.h opt.h vdpau.h
+
OBJS = allcodecs.o \
audioconvert.o \
bitstream.o \
@@ -20,14 +22,15 @@ OBJS = allcodecs.o \
simple_idct.o \
utils.o \
-
-HEADERS = avcodec.h opt.h
-
+# parts needed for many different codecs
+OBJS-$(CONFIG_AANDCT) += aandcttab.o
OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o
OBJS-$(CONFIG_FFT) += fft.o
OBJS-$(CONFIG_GOLOMB) += golomb.o
OBJS-$(CONFIG_MDCT) += mdct.o
+OBJS-$(CONFIG_OLDSCALER) += imgresample.o
+# decoders/encoders
OBJS-$(CONFIG_AAC_DECODER) += aac.o aactab.o
OBJS-$(CONFIG_AASC_DECODER) += aasc.o msrledec.o
OBJS-$(CONFIG_AC3_DECODER) += eac3dec.o ac3dec.o ac3tab.o ac3dec_data.o ac3.o
@@ -97,6 +100,7 @@ OBJS-$(CONFIG_H263_ENCODER) += mpegvideo_enc.o motion_est.o ratecontr
OBJS-$(CONFIG_H263P_ENCODER) += mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_H264_DECODER) += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_H264_ENCODER) += h264enc.o h264dspenc.o
+OBJS-$(CONFIG_H264_VDPAU_DECODER) += vdpauvideo.o
OBJS-$(CONFIG_HUFFYUV_DECODER) += huffyuv.o
OBJS-$(CONFIG_HUFFYUV_ENCODER) += huffyuv.o
OBJS-$(CONFIG_IDCIN_DECODER) += idcinvideo.o
@@ -155,6 +159,7 @@ OBJS-$(CONFIG_PNG_DECODER) += png.o pngdec.o
OBJS-$(CONFIG_PNG_ENCODER) += png.o pngenc.o
OBJS-$(CONFIG_PPM_ENCODER) += pnmenc.o pnm.o
OBJS-$(CONFIG_PTX_DECODER) += ptx.o
+OBJS-$(CONFIG_QCELP_DECODER) += qcelpdec.o qcelp_lsp.o celp_math.o celp_filters.o
OBJS-$(CONFIG_QDM2_DECODER) += qdm2.o mpegaudiodec.o mpegaudiodecheader.o mpegaudio.o mpegaudiodata.o
OBJS-$(CONFIG_QDRAW_DECODER) += qdrw.o
OBJS-$(CONFIG_QPEG_DECODER) += qpeg.o
@@ -174,6 +179,8 @@ OBJS-$(CONFIG_RV10_DECODER) += rv10.o h263.o mpeg12data.o mpegvideo.o
OBJS-$(CONFIG_RV10_ENCODER) += rv10.o mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_RV20_DECODER) += rv10.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_RV20_ENCODER) += rv10.o mpegvideo_enc.o motion_est.o ratecontrol.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
+OBJS-$(CONFIG_RV30_DECODER) += rv30.o rv34.o h264pred.o rv30dsp.o
+OBJS-$(CONFIG_RV40_DECODER) += rv40.o rv34.o h264pred.o rv40dsp.o
OBJS-$(CONFIG_SGI_DECODER) += sgidec.o
OBJS-$(CONFIG_SGI_ENCODER) += sgienc.o rle.o
OBJS-$(CONFIG_SHORTEN_DECODER) += shorten.o
@@ -190,13 +197,13 @@ OBJS-$(CONFIG_SP5X_DECODER) += sp5xdec.o mjpegdec.o mjpeg.o
OBJS-$(CONFIG_SUNRAST_DECODER) += sunrast.o
OBJS-$(CONFIG_SVQ1_DECODER) += svq1dec.o svq1.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
OBJS-$(CONFIG_SVQ1_ENCODER) += svq1enc.o svq1.o motion_est.o h263.o mpeg12data.o mpegvideo.o error_resilience.o
-OBJS-$(CONFIG_SVQ3_DECODER) += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o
+OBJS-$(CONFIG_SVQ3_DECODER) += h264.o h264idct.o h264pred.o h264_parser.o cabac.o mpegvideo.o error_resilience.o svq1dec.o
OBJS-$(CONFIG_TARGA_DECODER) += targa.o
OBJS-$(CONFIG_TARGA_ENCODER) += targaenc.o rle.o
OBJS-$(CONFIG_THEORA_DECODER) += vp3.o xiph.o vp3dsp.o
OBJS-$(CONFIG_THP_DECODER) += mjpegdec.o mjpeg.o
OBJS-$(CONFIG_TIERTEXSEQVIDEO_DECODER) += tiertexseqv.o
-OBJS-$(CONFIG_TIFF_DECODER) += tiff.o lzw.o
+OBJS-$(CONFIG_TIFF_DECODER) += tiff.o lzw.o faxcompr.o
OBJS-$(CONFIG_TIFF_ENCODER) += tiffenc.o rle.o lzwenc.o
OBJS-$(CONFIG_TRUEMOTION1_DECODER) += truemotion1.o
OBJS-$(CONFIG_TRUEMOTION2_DECODER) += truemotion2.o
@@ -237,11 +244,13 @@ OBJS-$(CONFIG_XAN_WC3_DECODER) += xan.o
OBJS-$(CONFIG_XAN_WC4_DECODER) += xan.o
OBJS-$(CONFIG_XL_DECODER) += xl.o
OBJS-$(CONFIG_XSUB_DECODER) += xsubdec.o
+OBJS-$(CONFIG_XVMC) += xvmcvideo.o
OBJS-$(CONFIG_ZLIB_DECODER) += lcldec.o
OBJS-$(CONFIG_ZLIB_ENCODER) += lclenc.o
OBJS-$(CONFIG_ZMBV_DECODER) += zmbv.o
OBJS-$(CONFIG_ZMBV_ENCODER) += zmbvenc.o
+# (AD)PCM decoders/encoders
OBJS-$(CONFIG_PCM_ALAW_DECODER) += pcm.o
OBJS-$(CONFIG_PCM_ALAW_ENCODER) += pcm.o
OBJS-$(CONFIG_PCM_DVD_DECODER) += pcm.o
@@ -344,7 +353,7 @@ OBJS-$(CONFIG_LIBVORBIS) += libvorbis.o
OBJS-$(CONFIG_LIBX264) += libx264.o
OBJS-$(CONFIG_LIBXVID) += libxvidff.o libxvid_rc.o
-
+# parsers
OBJS-$(CONFIG_AAC_PARSER) += aac_parser.o aac_ac3_parser.o mpeg4audio.o
OBJS-$(CONFIG_AC3_PARSER) += ac3_parser.o ac3tab.o aac_ac3_parser.o
OBJS-$(CONFIG_CAVSVIDEO_PARSER) += cavs_parser.o
@@ -365,6 +374,7 @@ OBJS-$(CONFIG_PNM_PARSER) += pnm_parser.o pnm.o
OBJS-$(CONFIG_VC1_PARSER) += vc1_parser.o
OBJS-$(CONFIG_VP3_PARSER) += vp3_parser.o
+# bitstream filters
OBJS-$(CONFIG_DUMP_EXTRADATA_BSF) += dump_extradata_bsf.o
OBJS-$(CONFIG_H264_MP4TOANNEXB_BSF) += h264_mp4toannexb_bsf.o
OBJS-$(CONFIG_IMX_DUMP_HEADER_BSF) += imx_dump_header_bsf.o
@@ -376,91 +386,97 @@ OBJS-$(CONFIG_NOISE_BSF) += noise_bsf.o
OBJS-$(CONFIG_REMOVE_EXTRADATA_BSF) += remove_extradata_bsf.o
OBJS-$(CONFIG_TEXT2MOVSUB_BSF) += movsub_bsf.o
+# thread libraries
OBJS-$(HAVE_BEOSTHREADS) += beosthread.o
OBJS-$(HAVE_OS2THREADS) += os2thread.o
OBJS-$(HAVE_PTHREADS) += pthread.o
OBJS-$(HAVE_W32THREADS) += w32thread.o
-OBJS-$(HAVE_XVMC) += xvmcvideo.o
-
-ifndef CONFIG_SWSCALE
-OBJS += imgresample.o
-endif
-
# processor-specific code
-ifdef HAVE_MMX
-OBJS += i386/fdct_mmx.o \
- i386/cpuid.o \
- i386/dsputil_mmx.o \
- i386/mpegvideo_mmx.o \
- i386/motion_est_mmx.o \
- i386/simple_idct_mmx.o \
- i386/idct_mmx_xvid.o \
- i386/idct_sse2_xvid.o \
-
-OBJS-$(CONFIG_FFT_MMX) += i386/fft_mmx.o \
- i386/fft_sse.o \
- i386/fft_3dn.o \
- i386/fft_3dn2.o \
-
-OBJS-$(HAVE_YASM) += i386/dsputil_yasm.o
-
-OBJS-$(CONFIG_GPL) += i386/idct_mmx.o
-
-OBJS-$(CONFIG_ENCODERS) += i386/dsputilenc_mmx.o
-
-OBJS-$(CONFIG_CAVS_DECODER) += i386/cavsdsp_mmx.o
-OBJS-$(CONFIG_FLAC_ENCODER) += i386/flacdsp_mmx.o
-OBJS-$(CONFIG_SNOW_DECODER) += i386/snowdsp_mmx.o
-OBJS-$(CONFIG_VC1_DECODER) += i386/vc1dsp_mmx.o
-OBJS-$(CONFIG_VP3_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
-OBJS-$(CONFIG_VP5_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
-OBJS-$(CONFIG_VP6_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
-OBJS-$(CONFIG_VP6A_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
-OBJS-$(CONFIG_VP6F_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
-OBJS-$(CONFIG_WMV3_DECODER) += i386/vc1dsp_mmx.o
-endif
-
-OBJS-$(ARCH_ARMV4L) += armv4l/jrevdct_arm.o \
- armv4l/simple_idct_arm.o \
- armv4l/dsputil_arm_s.o \
- armv4l/dsputil_arm.o \
- armv4l/mpegvideo_arm.o \
-
-OBJS-$(HAVE_IWMMXT) += armv4l/dsputil_iwmmxt.o \
- armv4l/mpegvideo_iwmmxt.o \
-
-OBJS-$(HAVE_ARMV5TE) += armv4l/mpegvideo_armv5te.o \
- armv4l/simple_idct_armv5te.o \
-
-OBJS-$(HAVE_ARMVFP) += armv4l/float_arm_vfp.o \
- armv4l/dsputil_vfp.o \
-
-OBJS-$(HAVE_ARMV6) += armv4l/simple_idct_armv6.o \
-
-OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
- sparc/simple_idct_vis.o \
-
-OBJS-$(CONFIG_MLIB) += mlib/dsputil_mlib.o \
+YASM-OBJS-$(CONFIG_GPL) += x86/h264_deblock_sse2.o \
+ x86/h264_idct_sse2.o \
+
+MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o
+MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o
+MMX-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flacdsp_mmx.o
+MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o
+MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp_mmx.o
+MMX-OBJS-$(CONFIG_THEORA_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o
+MMX-OBJS-$(CONFIG_VP3_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_VP5_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_VP6_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_VP6A_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_VP6F_DECODER) += x86/vp3dsp_mmx.o x86/vp3dsp_sse2.o
+MMX-OBJS-$(CONFIG_WMV3_DECODER) += x86/vc1dsp_mmx.o
+MMX-OBJS-$(HAVE_YASM) += x86/dsputil_yasm.o \
+ $(YASM-OBJS-yes)
+
+OBJS-$(HAVE_MMX) += x86/cpuid.o \
+ x86/dnxhd_mmx.o \
+ x86/dsputil_mmx.o \
+ x86/fdct_mmx.o \
+ x86/idct_mmx_xvid.o \
+ x86/idct_sse2_xvid.o \
+ x86/motion_est_mmx.o \
+ x86/mpegvideo_mmx.o \
+ x86/simple_idct_mmx.o \
+ $(MMX-OBJS-yes)
+
+OBJS-$(CONFIG_FFT_MMX) += x86/fft_3dn.o \
+ x86/fft_3dn2.o \
+ x86/fft_mmx.o \
+ x86/fft_sse.o \
OBJS-$(ARCH_ALPHA) += alpha/dsputil_alpha.o \
+ alpha/dsputil_alpha_asm.o \
alpha/motion_est_alpha.o \
+ alpha/motion_est_mvi_asm.o \
alpha/mpegvideo_alpha.o \
alpha/simple_idct_alpha.o \
- alpha/dsputil_alpha_asm.o \
- alpha/motion_est_mvi_asm.o \
-OBJS-$(ARCH_POWERPC) += ppc/dsputil_ppc.o \
+OBJS-$(ARCH_ARM) += arm/dsputil_arm.o \
+ arm/dsputil_arm_s.o \
+ arm/jrevdct_arm.o \
+ arm/mpegvideo_arm.o \
+ arm/simple_idct_arm.o \
-OBJS-$(HAVE_MMI) += ps2/dsputil_mmi.o \
- ps2/idct_mmi.o \
- ps2/mpegvideo_mmi.o \
+OBJS-$(HAVE_ARMV5TE) += arm/mpegvideo_armv5te.o \
+ arm/mpegvideo_armv5te_s.o \
+ arm/simple_idct_armv5te.o \
-OBJS-$(ARCH_SH4) += sh4/idct_sh4.o \
- sh4/dsputil_align.o \
- sh4/dsputil_sh4.o \
+OBJS-$(HAVE_ARMV6) += arm/simple_idct_armv6.o \
+
+OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \
+ arm/float_arm_vfp.o \
+
+OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \
+ arm/mpegvideo_iwmmxt.o \
-ALTIVEC-OBJS-yes += ppc/dsputil_altivec.o \
+OBJS-$(HAVE_NEON) += arm/dsputil_neon.o \
+ arm/dsputil_neon_s.o \
+ arm/h264dsp_neon.o \
+ arm/h264idct_neon.o \
+ arm/simple_idct_neon.o \
+
+OBJS-$(ARCH_BFIN) += bfin/dsputil_bfin.o \
+ bfin/fdct_bfin.o \
+ bfin/idct_bfin.o \
+ bfin/mpegvideo_bfin.o \
+ bfin/pixels_bfin.o \
+ bfin/vp3_bfin.o \
+ bfin/vp3_idct_bfin.o \
+
+OBJS-$(ARCH_PPC) += ppc/dsputil_ppc.o \
+
+ALTIVEC-OBJS-$(CONFIG_H264_DECODER) += ppc/h264_altivec.o
+ALTIVEC-OBJS-$(CONFIG_OLDSCALER) += ppc/imgresample_altivec.o
+ALTIVEC-OBJS-$(CONFIG_SNOW_DECODER) += ppc/snow_altivec.o
+ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
+ALTIVEC-OBJS-$(CONFIG_WMV3_DECODER) += ppc/vc1dsp_altivec.o
+
+OBJS-$(HAVE_ALTIVEC) += ppc/check_altivec.o \
+ ppc/dsputil_altivec.o \
ppc/fdct_altivec.o \
ppc/fft_altivec.o \
ppc/float_altivec.o \
@@ -468,34 +484,30 @@ ALTIVEC-OBJS-yes += ppc/dsputil_altivec.o \
ppc/idct_altivec.o \
ppc/int_altivec.o \
ppc/mpegvideo_altivec.o \
+ $(ALTIVEC-OBJS-yes)
-ALTIVEC-OBJS-$(CONFIG_H264_DECODER) += ppc/h264_altivec.o
-ALTIVEC-OBJS-$(CONFIG_SNOW_DECODER) += ppc/snow_altivec.o
-ALTIVEC-OBJS-$(CONFIG_VC1_DECODER) += ppc/vc1dsp_altivec.o
-ALTIVEC-OBJS-$(CONFIG_WMV3_DECODER) += ppc/vc1dsp_altivec.o
+OBJS-$(ARCH_SH4) += sh4/dsputil_align.o \
+ sh4/dsputil_sh4.o \
+ sh4/idct_sh4.o \
-# -maltivec is needed in order to build AltiVec code.
-$(addprefix $(SUBDIR),$(ALTIVEC-OBJS-yes)): CFLAGS += -maltivec -mabi=altivec
+OBJS-$(CONFIG_MLIB) += mlib/dsputil_mlib.o \
-# check_altivec must be built without -maltivec
-OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS-yes) \
- ppc/check_altivec.o
+OBJS-$(HAVE_MMI) += ps2/dsputil_mmi.o \
+ ps2/idct_mmi.o \
+ ps2/mpegvideo_mmi.o \
+
+OBJS-$(HAVE_VIS) += sparc/dsputil_vis.o \
+ sparc/simple_idct_vis.o \
-OBJS-$(ARCH_BFIN) += bfin/dsputil_bfin.o \
- bfin/mpegvideo_bfin.o \
- bfin/vp3_bfin.o \
- bfin/pixels_bfin.o \
- bfin/fdct_bfin.o \
- bfin/idct_bfin.o \
- bfin/vp3_idct_bfin.o \
-TESTS = $(addsuffix -test$(EXESUF), cabac dct eval fft h264 imgresample rangecoder snow)
-TESTS-$(ARCH_X86) += i386/cpuid-test$(EXESUF) motion-test$(EXESUF)
+TESTS = $(addsuffix -test$(EXESUF), cabac dct eval fft h264 rangecoder snow)
+TESTS-$(CONFIG_OLDSCALER) += imgresample-test$(EXESUF)
+TESTS-$(ARCH_X86) += x86/cpuid-test$(EXESUF) motion-test$(EXESUF)
CLEANFILES = apiexample$(EXESUF)
-DIRS = alpha armv4l bfin i386 mlib ppc ps2 sh4 sparc
+DIRS = alpha arm bfin mlib ppc ps2 sh4 sparc x86
include $(SUBDIR)../subdir.mak
-$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o
+$(SUBDIR)dct-test$(EXESUF): $(SUBDIR)fdctref.o $(SUBDIR)aandcttab.o
$(SUBDIR)fft-test$(EXESUF): $(SUBDIR)fdctref.o
diff --git a/libavcodec/aac.c b/libavcodec/aac.c
index 36742ba..545f125 100644
--- a/libavcodec/aac.c
+++ b/libavcodec/aac.c
@@ -41,7 +41,7 @@
* N (code in SoC repo) Long Term Prediction
* Y intensity stereo
* Y channel coupling
- * N frequency domain prediction
+ * Y frequency domain prediction
* Y Perceptual Noise Substitution
* Y Mid/Side stereo
* N Scalable Inverse AAC Quantization
@@ -77,6 +77,7 @@
#include "avcodec.h"
+#include "internal.h"
#include "bitstream.h"
#include "dsputil.h"
#include "lpc.h"
@@ -85,6 +86,7 @@
#include "aactab.h"
#include "aacdectab.h"
#include "mpeg4audio.h"
+#include "aac_parser.h"
#include <assert.h>
#include <errno.h>
@@ -166,15 +168,16 @@ static void decode_channel_map(enum ChannelPosition *cpe_map,
*/
static int decode_pce(AACContext * ac, enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
GetBitContext * gb) {
- int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc;
+ int num_front, num_side, num_back, num_lfe, num_assoc_data, num_cc, sampling_index;
skip_bits(gb, 2); // object_type
- ac->m4ac.sampling_index = get_bits(gb, 4);
- if(ac->m4ac.sampling_index > 11) {
+ sampling_index = get_bits(gb, 4);
+ if(sampling_index > 11) {
av_log(ac->avccontext, AV_LOG_ERROR, "invalid sampling rate index %d\n", ac->m4ac.sampling_index);
return -1;
}
+ ac->m4ac.sampling_index = sampling_index;
ac->m4ac.sample_rate = ff_mpeg4audio_sample_rates[ac->m4ac.sampling_index];
num_front = get_bits(gb, 4);
num_side = get_bits(gb, 4);
@@ -262,7 +265,7 @@ static int decode_ga_specific_config(AACContext * ac, GetBitContext * gb, int ch
int extension_flag, ret;
if(get_bits1(gb)) { // frameLengthFlag
- av_log_missing_feature(ac->avccontext, "960/120 MDCT window is", 1);
+ ff_log_missing_feature(ac->avccontext, "960/120 MDCT window is", 1);
return -1;
}
@@ -331,6 +334,7 @@ static int decode_audio_specific_config(AACContext * ac, void *data, int data_si
skip_bits_long(&gb, i);
switch (ac->m4ac.object_type) {
+ case AOT_AAC_MAIN:
case AOT_AAC_LC:
if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
return -1;
@@ -354,18 +358,51 @@ static av_always_inline int lcg_random(int previous_val) {
return previous_val * 1664525 + 1013904223;
}
+static void reset_predict_state(PredictorState * ps) {
+ ps->r0 = 0.0f;
+ ps->r1 = 0.0f;
+ ps->cor0 = 0.0f;
+ ps->cor1 = 0.0f;
+ ps->var0 = 1.0f;
+ ps->var1 = 1.0f;
+}
+
+static void reset_all_predictors(PredictorState * ps) {
+ int i;
+ for (i = 0; i < MAX_PREDICTORS; i++)
+ reset_predict_state(&ps[i]);
+}
+
+static void reset_predictor_group(PredictorState * ps, int group_num) {
+ int i;
+ for (i = group_num-1; i < MAX_PREDICTORS; i+=30)
+ reset_predict_state(&ps[i]);
+}
+
static av_cold int aac_decode_init(AVCodecContext * avccontext) {
AACContext * ac = avccontext->priv_data;
int i;
ac->avccontext = avccontext;
- if (avccontext->extradata_size <= 0 ||
- decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))
+ if (avccontext->extradata_size > 0) {
+ if(decode_audio_specific_config(ac, avccontext->extradata, avccontext->extradata_size))
+ return -1;
+ avccontext->sample_rate = ac->m4ac.sample_rate;
+ } else if (avccontext->channels > 0) {
+ enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
+ memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0]));
+ if(set_default_channel_config(ac, new_che_pos, avccontext->channels - (avccontext->channels == 8)))
+ return -1;
+ if(output_configure(ac, ac->che_pos, new_che_pos))
+ return -1;
+ ac->m4ac.sample_rate = avccontext->sample_rate;
+ } else {
+ ff_log_missing_feature(ac->avccontext, "Implicit channel configuration is", 0);
return -1;
+ }
avccontext->sample_fmt = SAMPLE_FMT_S16;
- avccontext->sample_rate = ac->m4ac.sample_rate;
avccontext->frame_size = 1024;
AAC_INIT_VLC_STATIC( 0, 144);
@@ -432,6 +469,21 @@ static void skip_data_stream_element(GetBitContext * gb) {
skip_bits_long(gb, 8 * count);
}
+static int decode_prediction(AACContext * ac, IndividualChannelStream * ics, GetBitContext * gb) {
+ int sfb;
+ if (get_bits1(gb)) {
+ ics->predictor_reset_group = get_bits(gb, 5);
+ if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
+ av_log(ac->avccontext, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
+ return -1;
+ }
+ }
+ for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
+ ics->prediction_used[sfb] = get_bits1(gb);
+ }
+ return 0;
+}
+
/**
* Decode Individual Channel Stream info; reference: table 4.6.
*
@@ -464,16 +516,30 @@ static int decode_ics_info(AACContext * ac, IndividualChannelStream * ics, GetBi
ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index];
ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index];
+ ics->predictor_present = 0;
} else {
ics->max_sfb = get_bits(gb, 6);
ics->num_windows = 1;
ics->swb_offset = swb_offset_1024[ac->m4ac.sampling_index];
ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index];
- if (get_bits1(gb)) {
- av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1);
- memset(ics, 0, sizeof(IndividualChannelStream));
- return -1;
+ ics->predictor_present = get_bits1(gb);
+ ics->predictor_reset_group = 0;
+ if (ics->predictor_present) {
+ if (ac->m4ac.object_type == AOT_AAC_MAIN) {
+ if (decode_prediction(ac, ics, gb)) {
+ memset(ics, 0, sizeof(IndividualChannelStream));
+ return -1;
+ }
+ } else if (ac->m4ac.object_type == AOT_AAC_LC) {
+ av_log(ac->avccontext, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
+ memset(ics, 0, sizeof(IndividualChannelStream));
+ return -1;
+ } else {
+ ff_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1);
+ memset(ics, 0, sizeof(IndividualChannelStream));
+ return -1;
+ }
}
}
@@ -689,6 +755,7 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
const int c = 1024/ics->num_windows;
const uint16_t * offsets = ics->swb_offset;
float *coef_base = coef;
+ static const float sign_lookup[] = { 1.0f, -1.0f };
for (g = 0; g < ics->num_windows; g++)
memset(coef + g * 128 + offsets[ics->max_sfb], 0, sizeof(float)*(c - offsets[ics->max_sfb]));
@@ -699,7 +766,7 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
const int dim = cur_band_type >= FIRST_PAIR_BT ? 2 : 4;
const int is_cb_unsigned = IS_CODEBOOK_UNSIGNED(cur_band_type);
int group;
- if (cur_band_type == ZERO_BT) {
+ if (cur_band_type == ZERO_BT || cur_band_type == INTENSITY_BT2 || cur_band_type == INTENSITY_BT) {
for (group = 0; group < ics->group_len[g]; group++) {
memset(coef + group * 128 + offsets[i], 0, (offsets[i+1] - offsets[i])*sizeof(float));
}
@@ -717,7 +784,7 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
coef[group*128+k] *= scale;
}
}
- }else if (cur_band_type != INTENSITY_BT2 && cur_band_type != INTENSITY_BT) {
+ }else {
for (group = 0; group < ics->group_len[g]; group++) {
for (k = offsets[i]; k < offsets[i+1]; k += dim) {
const int index = get_vlc2(gb, vlc_spectral[cur_band_type - 1].table, 6, 3);
@@ -732,12 +799,19 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
}
vq_ptr = &ff_aac_codebook_vectors[cur_band_type - 1][index * dim];
if (is_cb_unsigned) {
- for (j = 0; j < dim; j++)
- if (vq_ptr[j])
- coef[coef_tmp_idx + j] = 1 - 2*(int)get_bits1(gb);
+ if (vq_ptr[0]) coef[coef_tmp_idx ] = sign_lookup[get_bits1(gb)];
+ if (vq_ptr[1]) coef[coef_tmp_idx + 1] = sign_lookup[get_bits1(gb)];
+ if (dim == 4) {
+ if (vq_ptr[2]) coef[coef_tmp_idx + 2] = sign_lookup[get_bits1(gb)];
+ if (vq_ptr[3]) coef[coef_tmp_idx + 3] = sign_lookup[get_bits1(gb)];
+ }
}else {
- for (j = 0; j < dim; j++)
- coef[coef_tmp_idx + j] = 1.0f;
+ coef[coef_tmp_idx ] = 1.0f;
+ coef[coef_tmp_idx + 1] = 1.0f;
+ if (dim == 4) {
+ coef[coef_tmp_idx + 2] = 1.0f;
+ coef[coef_tmp_idx + 3] = 1.0f;
+ }
}
if (cur_band_type == ESC_BT) {
for (j = 0; j < 2; j++) {
@@ -751,15 +825,25 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
return -1;
}
n = (1<<n) + get_bits(gb, n);
- coef[coef_tmp_idx + j] *= cbrtf(fabsf(n)) * n;
+ coef[coef_tmp_idx + j] *= cbrtf(n) * n;
}else
coef[coef_tmp_idx + j] *= vq_ptr[j];
}
}else
- for (j = 0; j < dim; j++)
- coef[coef_tmp_idx + j] *= vq_ptr[j];
- for (j = 0; j < dim; j++)
- coef[coef_tmp_idx + j] *= sf[idx];
+ {
+ coef[coef_tmp_idx ] *= vq_ptr[0];
+ coef[coef_tmp_idx + 1] *= vq_ptr[1];
+ if (dim == 4) {
+ coef[coef_tmp_idx + 2] *= vq_ptr[2];
+ coef[coef_tmp_idx + 3] *= vq_ptr[3];
+ }
+ }
+ coef[coef_tmp_idx ] *= sf[idx];
+ coef[coef_tmp_idx + 1] *= sf[idx];
+ if (dim == 4) {
+ coef[coef_tmp_idx + 2] *= sf[idx];
+ coef[coef_tmp_idx + 3] *= sf[idx];
+ }
}
}
}
@@ -786,6 +870,77 @@ static int decode_spectrum_and_dequant(AACContext * ac, float coef[1024], GetBit
return 0;
}
+static av_always_inline float flt16_round(float pf) {
+ int exp;
+ pf = frexpf(pf, &exp);
+ pf = ldexpf(roundf(ldexpf(pf, 8)), exp-8);
+ return pf;
+}
+
+static av_always_inline float flt16_even(float pf) {
+ int exp;
+ pf = frexpf(pf, &exp);
+ pf = ldexpf(rintf(ldexpf(pf, 8)), exp-8);
+ return pf;
+}
+
+static av_always_inline float flt16_trunc(float pf) {
+ int exp;
+ pf = frexpf(pf, &exp);
+ pf = ldexpf(truncf(ldexpf(pf, 8)), exp-8);
+ return pf;
+}
+
+static void predict(AACContext * ac, PredictorState * ps, float* coef, int output_enable) {
+ const float a = 0.953125; // 61.0/64
+ const float alpha = 0.90625; // 29.0/32
+ float e0, e1;
+ float pv;
+ float k1, k2;
+
+ k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
+ k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
+
+ pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
+ if (output_enable)
+ *coef += pv * ac->sf_scale;
+
+ e0 = *coef / ac->sf_scale;
+ e1 = e0 - k1 * ps->r0;
+
+ ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
+ ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
+ ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
+ ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
+
+ ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
+ ps->r0 = flt16_trunc(a * e0);
+}
+
+/**
+ * Apply AAC-Main style frequency domain prediction.
+ */
+static void apply_prediction(AACContext * ac, SingleChannelElement * sce) {
+ int sfb, k;
+
+ if (!sce->ics.predictor_initialized) {
+ reset_all_predictors(sce->predictor_state);
+ sce->ics.predictor_initialized = 1;
+ }
+
+ if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
+ for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
+ for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
+ predict(ac, &sce->predictor_state[k], &sce->coeffs[k],
+ sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
+ }
+ }
+ if (sce->ics.predictor_reset_group)
+ reset_predictor_group(sce->predictor_state, sce->ics.predictor_reset_group);
+ } else
+ reset_all_predictors(sce->predictor_state);
+}
+
/**
* Decode an individual_channel_stream payload; reference: table 4.44.
*
@@ -833,13 +988,17 @@ static int decode_ics(AACContext * ac, SingleChannelElement * sce, GetBitContext
if ((tns->present = get_bits1(gb)) && decode_tns(ac, tns, gb, ics))
return -1;
if (get_bits1(gb)) {
- av_log_missing_feature(ac->avccontext, "SSR", 1);
+ ff_log_missing_feature(ac->avccontext, "SSR", 1);
return -1;
}
}
if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
return -1;
+
+ if(ac->m4ac.object_type == AOT_AAC_MAIN && !common_window)
+ apply_prediction(ac, sce);
+
return 0;
}
@@ -940,8 +1099,14 @@ static int decode_cpe(AACContext * ac, GetBitContext * gb, int elem_id) {
if ((ret = decode_ics(ac, &cpe->ch[1], gb, common_window, 0)))
return ret;
- if (common_window && ms_present)
- apply_mid_side_stereo(cpe);
+ if (common_window) {
+ if (ms_present)
+ apply_mid_side_stereo(cpe);
+ if (ac->m4ac.object_type == AOT_AAC_MAIN) {
+ apply_prediction(ac, &cpe->ch[0]);
+ apply_prediction(ac, &cpe->ch[1]);
+ }
+ }
apply_intensity_stereo(cpe, ms_present);
return 0;
@@ -1033,7 +1198,7 @@ static int decode_cce(AACContext * ac, GetBitContext * gb, ChannelElement * che)
*/
static int decode_sbr_extension(AACContext * ac, GetBitContext * gb, int crc, int cnt) {
// TODO : sbr_extension implementation
- av_log_missing_feature(ac->avccontext, "SBR", 0);
+ ff_log_missing_feature(ac->avccontext, "SBR", 0);
skip_bits_long(gb, 8*cnt - 4); // -4 due to reading extension type
return cnt;
}
@@ -1191,7 +1356,7 @@ static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) {
const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
float * buf = ac->buf_mdct;
- DECLARE_ALIGNED(16, float, temp[128]);
+ float * temp = ac->temp;
int i;
// imdct
@@ -1354,6 +1519,29 @@ static void spectral_to_sample(AACContext * ac) {
}
}
+static int parse_adts_frame_header(AACContext * ac, GetBitContext * gb) {
+
+ int size;
+ AACADTSHeaderInfo hdr_info;
+
+ size = ff_aac_parse_header(gb, &hdr_info);
+ if (size > 0) {
+ if (hdr_info.chan_config)
+ ac->m4ac.chan_config = hdr_info.chan_config;
+ ac->m4ac.sample_rate = hdr_info.sample_rate;
+ ac->m4ac.sampling_index = hdr_info.sampling_index;
+ ac->m4ac.object_type = hdr_info.object_type;
+ }
+ if (hdr_info.num_aac_frames == 1) {
+ if (!hdr_info.crc_absent)
+ skip_bits(gb, 16);
+ } else {
+ ff_log_missing_feature(ac->avccontext, "More than one AAC RDB per ADTS frame is", 0);
+ return -1;
+ }
+ return size;
+}
+
static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data_size, const uint8_t * buf, int buf_size) {
AACContext * ac = avccontext->priv_data;
GetBitContext gb;
@@ -1362,6 +1550,13 @@ static int aac_decode_frame(AVCodecContext * avccontext, void * data, int * data
init_get_bits(&gb, buf, buf_size*8);
+ if (show_bits(&gb, 12) == 0xfff) {
+ if ((err = parse_adts_frame_header(ac, &gb)) < 0) {
+ av_log(avccontext, AV_LOG_ERROR, "Error decoding AAC frame header.\n");
+ return -1;
+ }
+ }
+
// parse
while ((elem_type = get_bits(&gb, 3)) != TYPE_END) {
elem_id = get_bits(&gb, 4);
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index eec0828..d2f81f2 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -133,6 +133,20 @@ enum CouplingPoint {
};
/**
+ * Predictor State
+ */
+typedef struct {
+ float cor0;
+ float cor1;
+ float var0;
+ float var1;
+ float r0;
+ float r1;
+} PredictorState;
+
+#define MAX_PREDICTORS 672
+
+/**
* Individual Channel Stream
*/
typedef struct {
@@ -145,6 +159,10 @@ typedef struct {
int num_swb; ///< number of scalefactor window bands
int num_windows;
int tns_max_bands;
+ int predictor_present;
+ int predictor_initialized;
+ int predictor_reset_group;
+ uint8_t prediction_used[41];
} IndividualChannelStream;
/**
@@ -207,6 +225,7 @@ typedef struct {
DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT
DECLARE_ALIGNED_16(float, saved[512]); ///< overlap
DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output
+ PredictorState predictor_state[MAX_PREDICTORS];
} SingleChannelElement;
/**
@@ -269,6 +288,7 @@ typedef struct {
int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16
/** @} */
+ DECLARE_ALIGNED(16, float, temp[128]);
} AACContext;
#endif /* AVCODEC_AAC_H */
diff --git a/libavcodec/aac_ac3_parser.h b/libavcodec/aac_ac3_parser.h
index b2fc0af..b8613d8 100644
--- a/libavcodec/aac_ac3_parser.h
+++ b/libavcodec/aac_ac3_parser.h
@@ -27,6 +27,16 @@
#include "avcodec.h"
#include "parser.h"
+typedef enum {
+ AAC_AC3_PARSE_ERROR_SYNC = -1,
+ AAC_AC3_PARSE_ERROR_BSID = -2,
+ AAC_AC3_PARSE_ERROR_SAMPLE_RATE = -3,
+ AAC_AC3_PARSE_ERROR_FRAME_SIZE = -4,
+ AAC_AC3_PARSE_ERROR_FRAME_TYPE = -5,
+ AAC_AC3_PARSE_ERROR_CRC = -6,
+ AAC_AC3_PARSE_ERROR_CHANNEL_CFG = -7,
+} AACAC3ParseError;
+
typedef struct AACAC3ParseContext {
ParseContext pc;
int frame_size;
diff --git a/libavcodec/aac_parser.c b/libavcodec/aac_parser.c
index 1d75e1e..e38b5ec 100644
--- a/libavcodec/aac_parser.c
+++ b/libavcodec/aac_parser.c
@@ -22,55 +22,80 @@
#include "parser.h"
#include "aac_ac3_parser.h"
+#include "aac_parser.h"
#include "bitstream.h"
#include "mpeg4audio.h"
#define AAC_HEADER_SIZE 7
-static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info,
- int *need_next_header, int *new_frame_start)
+int ff_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr)
{
- GetBitContext bits;
int size, rdb, ch, sr;
- uint8_t tmp[8];
+ int aot, crc_abs;
- AV_WB64(tmp, state);
- init_get_bits(&bits, tmp+8-AAC_HEADER_SIZE, AAC_HEADER_SIZE * 8);
-
- if(get_bits(&bits, 12) != 0xfff)
- return 0;
+ if(get_bits(gbc, 12) != 0xfff)
+ return AAC_AC3_PARSE_ERROR_SYNC;
- skip_bits1(&bits); /* id */
- skip_bits(&bits, 2); /* layer */
- skip_bits1(&bits); /* protection_absent */
- skip_bits(&bits, 2); /* profile_objecttype */
- sr = get_bits(&bits, 4); /* sample_frequency_index */
+ skip_bits1(gbc); /* id */
+ skip_bits(gbc, 2); /* layer */
+ crc_abs = get_bits1(gbc); /* protection_absent */
+ aot = get_bits(gbc, 2); /* profile_objecttype */
+ sr = get_bits(gbc, 4); /* sample_frequency_index */
if(!ff_mpeg4audio_sample_rates[sr])
- return 0;
- skip_bits1(&bits); /* private_bit */
- ch = get_bits(&bits, 3); /* channel_configuration */
+ return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
+ skip_bits1(gbc); /* private_bit */
+ ch = get_bits(gbc, 3); /* channel_configuration */
+
if(!ff_mpeg4audio_channels[ch])
- return 0;
- skip_bits1(&bits); /* original/copy */
- skip_bits1(&bits); /* home */
+ return AAC_AC3_PARSE_ERROR_CHANNEL_CFG;
+
+ skip_bits1(gbc); /* original/copy */
+ skip_bits1(gbc); /* home */
/* adts_variable_header */
- skip_bits1(&bits); /* copyright_identification_bit */
- skip_bits1(&bits); /* copyright_identification_start */
- size = get_bits(&bits, 13); /* aac_frame_length */
+ skip_bits1(gbc); /* copyright_identification_bit */
+ skip_bits1(gbc); /* copyright_identification_start */
+ size = get_bits(gbc, 13); /* aac_frame_length */
if(size < AAC_HEADER_SIZE)
- return 0;
+ return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
- skip_bits(&bits, 11); /* adts_buffer_fullness */
- rdb = get_bits(&bits, 2); /* number_of_raw_data_blocks_in_frame */
+ skip_bits(gbc, 11); /* adts_buffer_fullness */
+ rdb = get_bits(gbc, 2); /* number_of_raw_data_blocks_in_frame */
- hdr_info->channels = ff_mpeg4audio_channels[ch];
- hdr_info->sample_rate = ff_mpeg4audio_sample_rates[sr];
- hdr_info->samples = (rdb + 1) * 1024;
- hdr_info->bit_rate = size * 8 * hdr_info->sample_rate / hdr_info->samples;
+ hdr->object_type = aot;
+ hdr->chan_config = ch;
+ hdr->crc_absent = crc_abs;
+ hdr->num_aac_frames = rdb + 1;
+ hdr->sampling_index = sr;
+ hdr->sample_rate = ff_mpeg4audio_sample_rates[sr];
+ hdr->samples = (rdb + 1) * 1024;
+ hdr->bit_rate = size * 8 * hdr->sample_rate / hdr->samples;
+ return size;
+}
+
+static int aac_sync(uint64_t state, AACAC3ParseContext *hdr_info,
+ int *need_next_header, int *new_frame_start)
+{
+ GetBitContext bits;
+ AACADTSHeaderInfo hdr;
+ int size;
+ union {
+ uint64_t u64;
+ uint8_t u8[8];
+ } tmp;
+
+ tmp.u64 = be2me_64(state);
+ init_get_bits(&bits, tmp.u8+8-AAC_HEADER_SIZE, AAC_HEADER_SIZE * 8);
+
+ if ((size = ff_aac_parse_header(&bits, &hdr)) < 0)
+ return 0;
*need_next_header = 0;
*new_frame_start = 1;
+ hdr_info->sample_rate = hdr.sample_rate;
+ hdr_info->channels = ff_mpeg4audio_channels[hdr.chan_config];
+ hdr_info->samples = hdr.samples;
+ hdr_info->bit_rate = hdr.bit_rate;
return size;
}
diff --git a/libavcodec/aac_parser.h b/libavcodec/aac_parser.h
new file mode 100644
index 0000000..efc4678
--- /dev/null
+++ b/libavcodec/aac_parser.h
@@ -0,0 +1,53 @@
+/*
+ * AAC parser prototypes
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2003 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AAC_PARSER_H
+#define AVCODEC_AAC_PARSER_H
+
+#include <stdint.h>
+#include "aac_ac3_parser.h"
+#include "bitstream.h"
+
+typedef struct {
+ uint32_t sample_rate;
+ uint32_t samples;
+ uint32_t bit_rate;
+ uint8_t crc_absent;
+ uint8_t object_type;
+ uint8_t sampling_index;
+ uint8_t chan_config;
+ uint8_t num_aac_frames;
+} AACADTSHeaderInfo;
+
+/**
+ * Parses AAC frame header.
+ * Parses the ADTS frame header to the end of the variable header, which is
+ * the first 54 bits.
+ * @param gbc[in] BitContext containing the first 54 bits of the frame.
+ * @param hdr[out] Pointer to struct where header info is written.
+ * @return Returns 0 on success, -1 if there is a sync word mismatch,
+ * -2 if the version element is invalid, -3 if the sample rate
+ * element is invalid, or -4 if the bit rate element is invalid.
+ */
+int ff_aac_parse_header(GetBitContext *gbc, AACADTSHeaderInfo *hdr);
+
+#endif /* AVCODEC_AAC_PARSER_H */
diff --git a/libavcodec/aactab.c b/libavcodec/aactab.c
index 0a8b032..3eab636 100644
--- a/libavcodec/aactab.c
+++ b/libavcodec/aactab.c
@@ -43,6 +43,10 @@ const uint8_t ff_aac_num_swb_128[] = {
12, 12, 12, 14, 14, 14, 15, 15, 15, 15, 15, 15
};
+const uint8_t ff_aac_pred_sfb_max[] = {
+ 33, 33, 38, 40, 40, 40, 41, 41, 37, 37, 37, 34
+};
+
const uint32_t ff_aac_scalefactor_code[121] = {
0x3ffe8, 0x3ffe6, 0x3ffe7, 0x3ffe5, 0x7fff5, 0x7fff1, 0x7ffed, 0x7fff6,
0x7ffee, 0x7ffef, 0x7fff0, 0x7fffc, 0x7fffd, 0x7ffff, 0x7fffe, 0x7fff7,
diff --git a/libavcodec/aactab.h b/libavcodec/aactab.h
index f019f7e..07574d8 100644
--- a/libavcodec/aactab.h
+++ b/libavcodec/aactab.h
@@ -54,6 +54,8 @@ extern const uint8_t ff_aac_num_swb_1024[];
extern const uint8_t ff_aac_num_swb_128 [];
// @}
+extern const uint8_t ff_aac_pred_sfb_max [];
+
extern const uint32_t ff_aac_scalefactor_code[121];
extern const uint8_t ff_aac_scalefactor_bits[121];
diff --git a/libavcodec/aandcttab.c b/libavcodec/aandcttab.c
new file mode 100644
index 0000000..4097067
--- /dev/null
+++ b/libavcodec/aandcttab.c
@@ -0,0 +1,47 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aandcttab.c
+ * AAN (Arai Agui Aakajima) (I)DCT tables
+ */
+
+#include <stdint.h>
+
+const uint16_t ff_aanscales[64] = {
+ /* precomputed values scaled up by 14 bits */
+ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
+ 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
+ 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
+ 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
+ 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
+ 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
+ 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
+ 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
+};
+
+const uint16_t ff_inv_aanscales[64] = {
+ 4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
+ 2953, 2129, 2260, 2511, 2953, 3759, 5457, 10703,
+ 3135, 2260, 2399, 2666, 3135, 3990, 5793, 11363,
+ 3483, 2511, 2666, 2962, 3483, 4433, 6436, 12625,
+ 4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
+ 5213, 3759, 3990, 4433, 5213, 6635, 9633, 18895,
+ 7568, 5457, 5793, 6436, 7568, 9633, 13985, 27432,
+ 14846, 10703, 11363, 12625, 14846, 18895, 27432, 53809,
+};
diff --git a/libavcodec/aandcttab.h b/libavcodec/aandcttab.h
new file mode 100644
index 0000000..03bb8b5
--- /dev/null
+++ b/libavcodec/aandcttab.h
@@ -0,0 +1,32 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file aandcttab.h
+ * AAN (Arai Agui Nakajima) (I)DCT tables
+ */
+
+#ifndef AVCODEC_AANDCTTAB_H
+#define AVCODEC_AANDCTTAB_H
+
+#include <stdint.h>
+
+extern const uint16_t ff_aanscales[64];
+extern const uint16_t ff_inv_aanscales[64];
+
+#endif /* AVCODEC_AANDCTTAB_H */
diff --git a/libavcodec/aasc.c b/libavcodec/aasc.c
index 77162f9..fa32231 100644
--- a/libavcodec/aasc.c
+++ b/libavcodec/aasc.c
@@ -62,6 +62,7 @@ static int aasc_decode_frame(AVCodecContext *avctx,
const uint8_t *buf, int buf_size)
{
AascContext *s = avctx->priv_data;
+ int compr, i, stride;
s->frame.reference = 1;
s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
@@ -70,7 +71,24 @@ static int aasc_decode_frame(AVCodecContext *avctx,
return -1;
}
- ff_msrle_decode(avctx, &s->frame, 8, buf, buf_size);
+ compr = AV_RL32(buf);
+ buf += 4;
+ buf_size -= 4;
+ switch(compr){
+ case 0:
+ stride = (avctx->width * 3 + 3) & ~3;
+ for(i = avctx->height - 1; i >= 0; i--){
+ memcpy(s->frame.data[0] + i*s->frame.linesize[0], buf, avctx->width*3);
+ buf += stride;
+ }
+ break;
+ case 1:
+ ff_msrle_decode(avctx, &s->frame, 8, buf - 4, buf_size + 4);
+ break;
+ default:
+ av_log(avctx, AV_LOG_ERROR, "Unknown compression type %d\n", compr);
+ return -1;
+ }
*data_size = sizeof(AVFrame);
*(AVFrame*)data = s->frame;
diff --git a/libavcodec/ac3.c b/libavcodec/ac3.c
index e4117f1..cb1a147 100644
--- a/libavcodec/ac3.c
+++ b/libavcodec/ac3.c
@@ -80,11 +80,11 @@ void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd,
} while (end > band_start_tab[k]);
}
-void ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
- int start, int end, int fast_gain, int is_lfe,
- int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
- uint8_t *dba_lengths, uint8_t *dba_values,
- int16_t *mask)
+int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
+ int start, int end, int fast_gain, int is_lfe,
+ int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
+ uint8_t *dba_lengths, uint8_t *dba_values,
+ int16_t *mask)
{
int16_t excite[50]; /* excitation */
int bin, k;
@@ -156,9 +156,13 @@ void ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
if (dba_mode == DBA_REUSE || dba_mode == DBA_NEW) {
int band, seg, delta;
+ if (dba_nsegs >= 8)
+ return -1;
band = 0;
- for (seg = 0; seg < FFMIN(8, dba_nsegs); seg++) {
- band = FFMIN(49, band + dba_offsets[seg]);
+ for (seg = 0; seg < dba_nsegs; seg++) {
+ band += dba_offsets[seg];
+ if (band >= 50 || dba_lengths[seg] > 50-band)
+ return -1;
if (dba_values[seg] >= 4) {
delta = (dba_values[seg] - 3) << 7;
} else {
@@ -170,6 +174,7 @@ void ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
}
}
}
+ return 0;
}
void ff_ac3_bit_alloc_calc_bap(int16_t *mask, int16_t *psd, int start, int end,
diff --git a/libavcodec/ac3.h b/libavcodec/ac3.h
index 1f5a711..cbbb718 100644
--- a/libavcodec/ac3.h
+++ b/libavcodec/ac3.h
@@ -149,12 +149,13 @@ void ff_ac3_bit_alloc_calc_psd(int8_t *exp, int start, int end, int16_t *psd,
* @param[in] dba_lengths length of each segment
* @param[in] dba_values delta bit allocation for each segment
* @param[out] mask calculated masking curve
+ * @return returns 0 for success, non-zero for error
*/
-void ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
- int start, int end, int fast_gain, int is_lfe,
- int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
- uint8_t *dba_lengths, uint8_t *dba_values,
- int16_t *mask);
+int ff_ac3_bit_alloc_calc_mask(AC3BitAllocParameters *s, int16_t *band_psd,
+ int start, int end, int fast_gain, int is_lfe,
+ int dba_mode, int dba_nsegs, uint8_t *dba_offsets,
+ uint8_t *dba_lengths, uint8_t *dba_values,
+ int16_t *mask);
/**
* Calculates bit allocation pointers.
diff --git a/libavcodec/ac3_parser.c b/libavcodec/ac3_parser.c
index aedcbcd..f47f97d 100644
--- a/libavcodec/ac3_parser.c
+++ b/libavcodec/ac3_parser.c
@@ -42,12 +42,12 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
hdr->sync_word = get_bits(gbc, 16);
if(hdr->sync_word != 0x0B77)
- return AC3_PARSE_ERROR_SYNC;
+ return AAC_AC3_PARSE_ERROR_SYNC;
/* read ahead to bsid to distinguish between AC-3 and E-AC-3 */
hdr->bitstream_id = show_bits_long(gbc, 29) & 0x1F;
if(hdr->bitstream_id > 16)
- return AC3_PARSE_ERROR_BSID;
+ return AAC_AC3_PARSE_ERROR_BSID;
hdr->num_blocks = 6;
@@ -60,11 +60,11 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
hdr->crc1 = get_bits(gbc, 16);
hdr->sr_code = get_bits(gbc, 2);
if(hdr->sr_code == 3)
- return AC3_PARSE_ERROR_SAMPLE_RATE;
+ return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
frame_size_code = get_bits(gbc, 6);
if(frame_size_code > 37)
- return AC3_PARSE_ERROR_FRAME_SIZE;
+ return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
skip_bits(gbc, 5); // skip bsid, already got it
@@ -93,19 +93,19 @@ int ff_ac3_parse_header(GetBitContext *gbc, AC3HeaderInfo *hdr)
hdr->crc1 = 0;
hdr->frame_type = get_bits(gbc, 2);
if(hdr->frame_type == EAC3_FRAME_TYPE_RESERVED)
- return AC3_PARSE_ERROR_FRAME_TYPE;
+ return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
hdr->substreamid = get_bits(gbc, 3);
hdr->frame_size = (get_bits(gbc, 11) + 1) << 1;
if(hdr->frame_size < AC3_HEADER_SIZE)
- return AC3_PARSE_ERROR_FRAME_SIZE;
+ return AAC_AC3_PARSE_ERROR_FRAME_SIZE;
hdr->sr_code = get_bits(gbc, 2);
if (hdr->sr_code == 3) {
int sr_code2 = get_bits(gbc, 2);
if(sr_code2 == 3)
- return AC3_PARSE_ERROR_SAMPLE_RATE;
+ return AAC_AC3_PARSE_ERROR_SAMPLE_RATE;
hdr->sample_rate = ff_ac3_sample_rate_tab[sr_code2] / 2;
hdr->sr_shift = 1;
} else {
@@ -158,11 +158,14 @@ static int ac3_sync(uint64_t state, AACAC3ParseContext *hdr_info,
int *need_next_header, int *new_frame_start)
{
int err;
- uint64_t tmp = be2me_64(state);
+ union {
+ uint64_t u64;
+ uint8_t u8[8];
+ } tmp = { be2me_64(state) };
AC3HeaderInfo hdr;
GetBitContext gbc;
- init_get_bits(&gbc, ((uint8_t *)&tmp)+8-AC3_HEADER_SIZE, 54);
+ init_get_bits(&gbc, tmp.u8+8-AC3_HEADER_SIZE, 54);
err = ff_ac3_parse_header(&gbc, &hdr);
if(err < 0)
diff --git a/libavcodec/ac3_parser.h b/libavcodec/ac3_parser.h
index fc17e87..0f8fc6d 100644
--- a/libavcodec/ac3_parser.h
+++ b/libavcodec/ac3_parser.h
@@ -26,15 +26,6 @@
#include "ac3.h"
#include "bitstream.h"
-typedef enum {
- AC3_PARSE_ERROR_SYNC = -1,
- AC3_PARSE_ERROR_BSID = -2,
- AC3_PARSE_ERROR_SAMPLE_RATE = -3,
- AC3_PARSE_ERROR_FRAME_SIZE = -4,
- AC3_PARSE_ERROR_FRAME_TYPE = -5,
- AC3_PARSE_ERROR_CRC = -6,
-} AC3ParseError;
-
/**
* Parses AC-3 frame header.
* Parses the header up to the lfeon element, which is the first 52 or 54 bits
diff --git a/libavcodec/ac3dec.c b/libavcodec/ac3dec.c
index 3e8b0b5..74c8748 100644
--- a/libavcodec/ac3dec.c
+++ b/libavcodec/ac3dec.c
@@ -35,6 +35,8 @@
#include <string.h>
#include "libavutil/crc.h"
+#include "internal.h"
+#include "aac_ac3_parser.h"
#include "ac3_parser.h"
#include "ac3dec.h"
#include "ac3dec_data.h"
@@ -372,8 +374,8 @@ static void set_downmix_coeffs(AC3DecodeContext *s)
* Decode the grouped exponents according to exponent strategy.
* reference: Section 7.1.3 Exponent Decoding
*/
-static void decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
- uint8_t absexp, int8_t *dexps)
+static int decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
+ uint8_t absexp, int8_t *dexps)
{
int i, j, grp, group_size;
int dexp[256];
@@ -390,12 +392,18 @@ static void decode_exponents(GetBitContext *gbc, int exp_strategy, int ngrps,
/* convert to absolute exps and expand groups */
prevexp = absexp;
- for(i=0; i<ngrps*3; i++) {
- prevexp = av_clip(prevexp + dexp[i]-2, 0, 24);
- for(j=0; j<group_size; j++) {
- dexps[(i*group_size)+j] = prevexp;
+ for(i=0,j=0; i<ngrps*3; i++) {
+ prevexp += dexp[i] - 2;
+ if (prevexp > 24U)
+ return -1;
+ switch (group_size) {
+ case 4: dexps[j++] = prevexp;
+ dexps[j++] = prevexp;
+ case 2: dexps[j++] = prevexp;
+ case 1: dexps[j++] = prevexp;
}
}
+ return 0;
}
/**
@@ -728,9 +736,10 @@ static void decode_band_structure(GetBitContext *gbc, int blk, int eac3,
int ecpl, int start_subband, int end_subband,
const uint8_t *default_band_struct,
uint8_t *band_struct, int *num_subbands,
- int *num_bands, int *band_sizes)
+ int *num_bands, uint8_t *band_sizes)
{
- int subbnd, bnd, n_subbands, n_bands, bnd_sz[22];
+ int subbnd, bnd, n_subbands, n_bands=0;
+ uint8_t bnd_sz[22];
n_subbands = end_subband - start_subband;
@@ -769,7 +778,7 @@ static void decode_band_structure(GetBitContext *gbc, int blk, int eac3,
if (num_bands)
*num_bands = n_bands;
if (band_sizes)
- memcpy(band_sizes, bnd_sz, sizeof(int)*n_bands);
+ memcpy(band_sizes, bnd_sz, n_bands);
}
/**
@@ -819,7 +828,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
/* spectral extension strategy */
if (s->eac3 && (!blk || get_bits1(gbc))) {
if (get_bits1(gbc)) {
- av_log_missing_feature(s->avctx, "Spectral extension", 1);
+ ff_log_missing_feature(s->avctx, "Spectral extension", 1);
return -1;
}
/* TODO: parse spectral extension strategy info */
@@ -844,7 +853,7 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
/* check for enhanced coupling */
if (s->eac3 && get_bits1(gbc)) {
/* TODO: parse enhanced coupling strategy info */
- av_log_missing_feature(s->avctx, "Enhanced coupling", 1);
+ ff_log_missing_feature(s->avctx, "Enhanced coupling", 1);
return -1;
}
@@ -988,9 +997,12 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
for (ch = !cpl_in_use; ch <= s->channels; ch++) {
if (s->exp_strategy[blk][ch] != EXP_REUSE) {
s->dexps[ch][0] = get_bits(gbc, 4) << !ch;
- decode_exponents(gbc, s->exp_strategy[blk][ch],
- s->num_exp_groups[ch], s->dexps[ch][0],
- &s->dexps[ch][s->start_freq[ch]+!!ch]);
+ if (decode_exponents(gbc, s->exp_strategy[blk][ch],
+ s->num_exp_groups[ch], s->dexps[ch][0],
+ &s->dexps[ch][s->start_freq[ch]+!!ch])) {
+ av_log(s->avctx, AV_LOG_ERROR, "exponent out-of-range\n");
+ return -1;
+ }
if(ch != CPL_CH && ch != s->lfe_ch)
skip_bits(gbc, 2); /* skip gainrng */
}
@@ -1123,12 +1135,15 @@ static int decode_audio_block(AC3DecodeContext *s, int blk)
if(bit_alloc_stages[ch] > 1) {
/* Compute excitation function, Compute masking curve, and
Apply delta bit allocation */
- ff_ac3_bit_alloc_calc_mask(&s->bit_alloc_params, s->band_psd[ch],
- s->start_freq[ch], s->end_freq[ch],
- s->fast_gain[ch], (ch == s->lfe_ch),
- s->dba_mode[ch], s->dba_nsegs[ch],
- s->dba_offsets[ch], s->dba_lengths[ch],
- s->dba_values[ch], s->mask[ch]);
+ if (ff_ac3_bit_alloc_calc_mask(&s->bit_alloc_params, s->band_psd[ch],
+ s->start_freq[ch], s->end_freq[ch],
+ s->fast_gain[ch], (ch == s->lfe_ch),
+ s->dba_mode[ch], s->dba_nsegs[ch],
+ s->dba_offsets[ch], s->dba_lengths[ch],
+ s->dba_values[ch], s->mask[ch])) {
+ av_log(s->avctx, AV_LOG_ERROR, "error in bit allocation\n");
+ return -1;
+ }
}
if(bit_alloc_stages[ch] > 0) {
/* Compute bit allocation */
@@ -1234,32 +1249,32 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
/* check that reported frame size fits in input buffer */
if(s->frame_size > buf_size) {
av_log(avctx, AV_LOG_ERROR, "incomplete frame\n");
- err = AC3_PARSE_ERROR_FRAME_SIZE;
+ err = AAC_AC3_PARSE_ERROR_FRAME_SIZE;
}
/* check for crc mismatch */
- if(err != AC3_PARSE_ERROR_FRAME_SIZE && avctx->error_recognition >= FF_ER_CAREFUL) {
+ if(err != AAC_AC3_PARSE_ERROR_FRAME_SIZE && avctx->error_recognition >= FF_ER_CAREFUL) {
if(av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, &buf[2], s->frame_size-2)) {
av_log(avctx, AV_LOG_ERROR, "frame CRC mismatch\n");
- err = AC3_PARSE_ERROR_CRC;
+ err = AAC_AC3_PARSE_ERROR_CRC;
}
}
- if(err && err != AC3_PARSE_ERROR_CRC) {
+ if(err && err != AAC_AC3_PARSE_ERROR_CRC) {
switch(err) {
- case AC3_PARSE_ERROR_SYNC:
+ case AAC_AC3_PARSE_ERROR_SYNC:
av_log(avctx, AV_LOG_ERROR, "frame sync error\n");
return -1;
- case AC3_PARSE_ERROR_BSID:
+ case AAC_AC3_PARSE_ERROR_BSID:
av_log(avctx, AV_LOG_ERROR, "invalid bitstream id\n");
break;
- case AC3_PARSE_ERROR_SAMPLE_RATE:
+ case AAC_AC3_PARSE_ERROR_SAMPLE_RATE:
av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n");
break;
- case AC3_PARSE_ERROR_FRAME_SIZE:
+ case AAC_AC3_PARSE_ERROR_FRAME_SIZE:
av_log(avctx, AV_LOG_ERROR, "invalid frame size\n");
break;
- case AC3_PARSE_ERROR_FRAME_TYPE:
+ case AAC_AC3_PARSE_ERROR_FRAME_TYPE:
/* skip frame if CRC is ok. otherwise use error concealment. */
/* TODO: add support for substreams and dependent frames */
if(s->frame_type == EAC3_FRAME_TYPE_DEPENDENT || s->substreamid) {
@@ -1308,6 +1323,7 @@ static int ac3_decode_frame(AVCodecContext * avctx, void *data, int *data_size,
const float *output[s->out_channels];
if (!err && decode_audio_block(s, blk)) {
av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
+ err = 1;
}
for (ch = 0; ch < s->out_channels; ch++)
output[ch] = s->output[ch];
diff --git a/libavcodec/acelp_pitch_delay.c b/libavcodec/acelp_pitch_delay.c
index 3db5ea3..ac929c4 100644
--- a/libavcodec/acelp_pitch_delay.c
+++ b/libavcodec/acelp_pitch_delay.c
@@ -21,6 +21,7 @@
*/
#include "avcodec.h"
+#include "dsputil.h"
#include "acelp_pitch_delay.h"
#include "celp_math.h"
@@ -87,6 +88,7 @@ void ff_acelp_update_past_gain(
}
int16_t ff_acelp_decode_gain_code(
+ DSPContext *dsp,
int gain_corr_factor,
const int16_t* fc_v,
int mr_energy,
@@ -103,7 +105,7 @@ int16_t ff_acelp_decode_gain_code(
mr_energy += quant_energy[i] * ma_prediction_coeff[i];
#ifdef G729_BITEXACT
- mr_energy += (((-6165LL * ff_log2(dot_product(fc_v, fc_v, subframe_size, 0))) >> 3) & ~0x3ff);
+ mr_energy += (((-6165LL * ff_log2(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0))) >> 3) & ~0x3ff);
mr_energy = (5439 * (mr_energy >> 15)) >> 8; // (0.15) = (0.15) * (7.23)
@@ -113,7 +115,7 @@ int16_t ff_acelp_decode_gain_code(
);
#else
mr_energy = gain_corr_factor * exp(M_LN10 / (20 << 23) * mr_energy) /
- sqrt(dot_product(fc_v, fc_v, subframe_size, 0));
+ sqrt(dsp->scalarproduct_int16(fc_v, fc_v, subframe_size, 0));
return mr_energy >> 12;
#endif
}
diff --git a/libavcodec/acelp_pitch_delay.h b/libavcodec/acelp_pitch_delay.h
index be5ac09..2504a9e 100644
--- a/libavcodec/acelp_pitch_delay.h
+++ b/libavcodec/acelp_pitch_delay.h
@@ -24,6 +24,7 @@
#define AVCODEC_ACELP_PITCH_DELAY_H
#include <stdint.h>
+#include "dsputil.h"
#define PITCH_DELAY_MIN 20
#define PITCH_DELAY_MAX 143
@@ -140,6 +141,7 @@ void ff_acelp_update_past_gain(
/**
* \brief Decode the adaptive codebook gain and add
* correction (4.1.5 and 3.9.1 of G.729).
+ * \param dsp initialized dsputil context
* \param gain_corr_factor gain correction factor (2.13)
* \param fc_v fixed-codebook vector (2.13)
* \param mr_energy mean innovation energy and fixed-point correction (7.13)
@@ -209,6 +211,7 @@ void ff_acelp_update_past_gain(
* \remark The routine is used in G.729 and AMR (all modes).
*/
int16_t ff_acelp_decode_gain_code(
+ DSPContext *dsp,
int gain_corr_factor,
const int16_t* fc_v,
int mr_energy,
diff --git a/libavcodec/alacenc.c b/libavcodec/alacenc.c
index df4136f..9fd5064 100644
--- a/libavcodec/alacenc.c
+++ b/libavcodec/alacenc.c
@@ -253,7 +253,8 @@ static void alac_linear_predictor(AlacEncodeContext *s, int ch)
sum >>= lpc.lpc_quant;
sum += samples[0];
- residual[i] = samples[lpc.lpc_order+1] - sum;
+ residual[i] = (samples[lpc.lpc_order+1] - sum) << (32 - s->write_sample_size) >>
+ (32 - s->write_sample_size);
res_val = residual[i];
if(res_val) {
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7d958ee..2d6ce40 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -41,16 +41,6 @@
extern AVBitStreamFilter x##_bsf; \
if(ENABLE_##X##_BSF) av_register_bitstream_filter(&x##_bsf); }
-/**
- * Register all the codecs, parsers and bitstream filters which were enabled at
- * configuration time. If you do not call this function you can select exactly
- * which formats you want to support, by using the individual registration
- * functions.
- *
- * @see register_avcodec
- * @see av_register_codec_parser
- * @see av_register_bitstream_filter
- */
void avcodec_register_all(void)
{
static int initialized;
@@ -97,6 +87,7 @@ void avcodec_register_all(void)
REGISTER_DECODER (H263, h263);
REGISTER_DECODER (H263I, h263i);
REGISTER_DECODER (H264, h264);
+ REGISTER_DECODER (H264_VDPAU, h264_vdpau);
REGISTER_ENCDEC (HUFFYUV, huffyuv);
REGISTER_DECODER (IDCIN, idcin);
REGISTER_DECODER (INDEO2, indeo2);
@@ -141,6 +132,8 @@ void avcodec_register_all(void)
REGISTER_DECODER (RPZA, rpza);
REGISTER_ENCDEC (RV10, rv10);
REGISTER_ENCDEC (RV20, rv20);
+ REGISTER_DECODER (RV30, rv30);
+ REGISTER_DECODER (RV40, rv40);
REGISTER_ENCDEC (SGI, sgi);
REGISTER_DECODER (SMACKER, smacker);
REGISTER_DECODER (SMC, smc);
@@ -202,6 +195,7 @@ void avcodec_register_all(void)
REGISTER_DECODER (MPC7, mpc7);
REGISTER_DECODER (MPC8, mpc8);
REGISTER_ENCDEC (NELLYMOSER, nellymoser);
+ REGISTER_DECODER (QCELP, qcelp);
REGISTER_DECODER (QDM2, qdm2);
REGISTER_DECODER (RA_144, ra_144);
REGISTER_DECODER (RA_288, ra_288);
diff --git a/libavcodec/alpha/dsputil_alpha.c b/libavcodec/alpha/dsputil_alpha.c
index 6b80731..c1500b1 100644
--- a/libavcodec/alpha/dsputil_alpha.c
+++ b/libavcodec/alpha/dsputil_alpha.c
@@ -22,9 +22,9 @@
#include "libavcodec/dsputil.h"
#include "asm.h"
-extern void simple_idct_axp(DCTELEM *block);
-extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
-extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
+void simple_idct_axp(DCTELEM *block);
+void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
+void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
int line_size, int h);
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index 82d567f..8746724 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -199,6 +199,7 @@ static av_cold int ape_decode_init(AVCodecContext * avctx)
dsputil_init(&s->dsp, avctx);
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/arm/asm.S b/libavcodec/arm/asm.S
new file mode 100644
index 0000000..0aa1639
--- /dev/null
+++ b/libavcodec/arm/asm.S
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+
+ .macro require8, val=1
+ .eabi_attribute 24, \val
+ .endm
+
+ .macro preserve8, val=1
+ .eabi_attribute 25, \val
+ .endm
+
+ .macro function name, export=0
+.if \export
+ .global \name
+.endif
+ .type \name, %function
+ .func \name
+\name:
+ .endm
+
+ .macro movrel rd, val
+#if defined(HAVE_ARMV6T2) && !defined(CONFIG_SHARED)
+ movw \rd, #:lower16:\val
+ movt \rd, #:upper16:\val
+#else
+ ldr \rd, =\val
+#endif
+ .endm
diff --git a/libavcodec/arm/dsputil_arm.c b/libavcodec/arm/dsputil_arm.c
new file mode 100644
index 0000000..eaa6b9e
--- /dev/null
+++ b/libavcodec/arm/dsputil_arm.c
@@ -0,0 +1,217 @@
+/*
+ * ARM optimized DSP utils
+ * Copyright (c) 2001 Lionel Ulmer.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/dsputil.h"
+#ifdef HAVE_IPP
+#include <ipp.h>
+#endif
+
+void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
+void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx);
+
+void j_rev_dct_ARM(DCTELEM *data);
+void simple_idct_ARM(DCTELEM *data);
+
+void simple_idct_armv5te(DCTELEM *data);
+void simple_idct_put_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
+void simple_idct_add_armv5te(uint8_t *dest, int line_size, DCTELEM *data);
+
+void ff_simple_idct_armv6(DCTELEM *data);
+void ff_simple_idct_put_armv6(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_simple_idct_add_armv6(uint8_t *dest, int line_size, DCTELEM *data);
+
+void ff_simple_idct_neon(DCTELEM *data);
+void ff_simple_idct_put_neon(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_simple_idct_add_neon(uint8_t *dest, int line_size, DCTELEM *data);
+
+/* XXX: local hack */
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
+
+void ff_prefetch_arm(void *mem, int stride, int h);
+
+CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
+CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
+CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
+CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
+
+void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
+ int line_size);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ j_rev_dct_ARM (block);
+ ff_put_pixels_clamped(block, dest, line_size);
+}
+static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ j_rev_dct_ARM (block);
+ ff_add_pixels_clamped(block, dest, line_size);
+}
+static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ simple_idct_ARM (block);
+ ff_put_pixels_clamped(block, dest, line_size);
+}
+static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ simple_idct_ARM (block);
+ ff_add_pixels_clamped(block, dest, line_size);
+}
+
+#ifdef HAVE_IPP
+static void simple_idct_ipp(DCTELEM *block)
+{
+ ippiDCT8x8Inv_Video_16s_C1I(block);
+}
+static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
+}
+
+void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ippiDCT8x8Inv_Video_16s_C1I(block);
+#ifdef HAVE_IWMMXT
+ add_pixels_clamped_iwmmxt(block, dest, line_size);
+#else
+ ff_add_pixels_clamped_ARM(block, dest, line_size);
+#endif
+}
+#endif
+
+int mm_support(void)
+{
+ return ENABLE_IWMMXT * FF_MM_IWMMXT;
+}
+
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx)
+{
+ int idct_algo= avctx->idct_algo;
+
+ ff_put_pixels_clamped = c->put_pixels_clamped;
+ ff_add_pixels_clamped = c->add_pixels_clamped;
+
+ if (avctx->lowres == 0) {
+ if(idct_algo == FF_IDCT_AUTO){
+#if defined(HAVE_IPP)
+ idct_algo = FF_IDCT_IPP;
+#elif defined(HAVE_NEON)
+ idct_algo = FF_IDCT_SIMPLENEON;
+#elif defined(HAVE_ARMV6)
+ idct_algo = FF_IDCT_SIMPLEARMV6;
+#elif defined(HAVE_ARMV5TE)
+ idct_algo = FF_IDCT_SIMPLEARMV5TE;
+#else
+ idct_algo = FF_IDCT_ARM;
+#endif
+ }
+
+ if(idct_algo==FF_IDCT_ARM){
+ c->idct_put= j_rev_dct_ARM_put;
+ c->idct_add= j_rev_dct_ARM_add;
+ c->idct = j_rev_dct_ARM;
+ c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+ } else if (idct_algo==FF_IDCT_SIMPLEARM){
+ c->idct_put= simple_idct_ARM_put;
+ c->idct_add= simple_idct_ARM_add;
+ c->idct = simple_idct_ARM;
+ c->idct_permutation_type= FF_NO_IDCT_PERM;
+#ifdef HAVE_ARMV6
+ } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
+ c->idct_put= ff_simple_idct_put_armv6;
+ c->idct_add= ff_simple_idct_add_armv6;
+ c->idct = ff_simple_idct_armv6;
+ c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+#endif
+#ifdef HAVE_ARMV5TE
+ } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
+ c->idct_put= simple_idct_put_armv5te;
+ c->idct_add= simple_idct_add_armv5te;
+ c->idct = simple_idct_armv5te;
+ c->idct_permutation_type = FF_NO_IDCT_PERM;
+#endif
+#ifdef HAVE_IPP
+ } else if (idct_algo==FF_IDCT_IPP){
+ c->idct_put= simple_idct_ipp_put;
+ c->idct_add= simple_idct_ipp_add;
+ c->idct = simple_idct_ipp;
+ c->idct_permutation_type= FF_NO_IDCT_PERM;
+#endif
+#ifdef HAVE_NEON
+ } else if (idct_algo==FF_IDCT_SIMPLENEON){
+ c->idct_put= ff_simple_idct_put_neon;
+ c->idct_add= ff_simple_idct_add_neon;
+ c->idct = ff_simple_idct_neon;
+ c->idct_permutation_type = FF_PARTTRANS_IDCT_PERM;
+#endif
+ }
+ }
+
+ c->put_pixels_tab[0][0] = put_pixels16_arm;
+ c->put_pixels_tab[0][1] = put_pixels16_x2_arm;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_arm;
+ c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
+ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm;
+ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
+ c->put_pixels_tab[1][0] = put_pixels8_arm;
+ c->put_pixels_tab[1][1] = put_pixels8_x2_arm;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
+ c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
+ c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm;
+ c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
+
+#ifdef HAVE_ARMV5TE
+ c->prefetch = ff_prefetch_arm;
+#endif
+
+#ifdef HAVE_IWMMXT
+ dsputil_init_iwmmxt(c, avctx);
+#endif
+#ifdef HAVE_ARMVFP
+ ff_float_init_arm_vfp(c, avctx);
+#endif
+#ifdef HAVE_NEON
+ ff_dsputil_init_neon(c, avctx);
+#endif
+}
diff --git a/libavcodec/arm/dsputil_arm_s.S b/libavcodec/arm/dsputil_arm_s.S
new file mode 100644
index 0000000..639b7b8
--- /dev/null
+++ b/libavcodec/arm/dsputil_arm_s.S
@@ -0,0 +1,799 @@
+@
+@ ARMv4 optimized DSP utils
+@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
+@
+@ This file is part of FFmpeg.
+@
+@ FFmpeg is free software; you can redistribute it and/or
+@ modify it under the terms of the GNU Lesser General Public
+@ License as published by the Free Software Foundation; either
+@ version 2.1 of the License, or (at your option) any later version.
+@
+@ FFmpeg is distributed in the hope that it will be useful,
+@ but WITHOUT ANY WARRANTY; without even the implied warranty of
+@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+@ Lesser General Public License for more details.
+@
+@ You should have received a copy of the GNU Lesser General Public
+@ License along with FFmpeg; if not, write to the Free Software
+@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+@
+
+#include "config.h"
+#include "asm.S"
+
+ preserve8
+
+#ifndef HAVE_PLD
+.macro pld reg
+.endm
+#endif
+
+#ifdef HAVE_ARMV5TE
+function ff_prefetch_arm, export=1
+ subs r2, r2, #1
+ pld [r0]
+ add r0, r0, r1
+ bne ff_prefetch_arm
+ bx lr
+ .endfunc
+#endif
+
+.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
+ mov \Rd0, \Rn0, lsr #(\shift * 8)
+ mov \Rd1, \Rn1, lsr #(\shift * 8)
+ mov \Rd2, \Rn2, lsr #(\shift * 8)
+ mov \Rd3, \Rn3, lsr #(\shift * 8)
+ orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
+ orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
+ orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
+ orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
+.endm
+.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
+ mov \R0, \R0, lsr #(\shift * 8)
+ orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
+ mov \R1, \R1, lsr #(\shift * 8)
+ orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
+.endm
+.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
+ mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
+ mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
+ orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
+ orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
+.endm
+
+.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+ @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+ @ Rmask = 0xFEFEFEFE
+ @ Rn = destroy
+ eor \Rd0, \Rn0, \Rm0
+ eor \Rd1, \Rn1, \Rm1
+ orr \Rn0, \Rn0, \Rm0
+ orr \Rn1, \Rn1, \Rm1
+ and \Rd0, \Rd0, \Rmask
+ and \Rd1, \Rd1, \Rmask
+ sub \Rd0, \Rn0, \Rd0, lsr #1
+ sub \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
+ @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
+ @ Rmask = 0xFEFEFEFE
+ @ Rn = destroy
+ eor \Rd0, \Rn0, \Rm0
+ eor \Rd1, \Rn1, \Rm1
+ and \Rn0, \Rn0, \Rm0
+ and \Rn1, \Rn1, \Rm1
+ and \Rd0, \Rd0, \Rmask
+ and \Rd1, \Rd1, \Rmask
+ add \Rd0, \Rn0, \Rd0, lsr #1
+ add \Rd1, \Rn1, \Rd1, lsr #1
+.endm
+
+@ ----------------------------------------------------------------
+ .align 8
+function put_pixels16_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11, lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ bic r1, r1, #3
+ add r5, r5, r4, lsl #2
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r7}
+ add r1, r1, r2
+ stmia r0, {r4-r7}
+ pld [r1]
+ subs r3, r3, #1
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+2:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+3:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r11, pc}
+ .align 8
+4:
+ ldmia r1, {r4-r8}
+ add r1, r1, r2
+ ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r9-r12}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+5:
+ .word 1b
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+@ ----------------------------------------------------------------
+ .align 8
+function put_pixels8_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r5,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ bic r1, r1, #3
+ add r5, r5, r4, lsl #2
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ subs r3, r3, #1
+ pld [r1]
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r12}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
+ pld [r1]
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r5,pc}
+ .align 8
+5:
+ .word 1b
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+@ ----------------------------------------------------------------
+ .align 8
+function put_pixels8_x2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+ pld [r1]
+ RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+ .align 8
+function put_no_rnd_pixels8_x2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r10,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 1b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 2b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
+ subs r3, r3, #1
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bne 3b
+ ldmfd sp!, {r4-r10,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r5, r10}
+ add r1, r1, r2
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 4b
+ ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+
+@ ----------------------------------------------------------------
+ .align 8
+function put_pixels8_y2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ mov r3, r3, lsr #1
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+ pld [r1]
+ RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ pld [r1]
+ RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+ .align 8
+function put_no_rnd_pixels8_y2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adr r5, 5f
+ ands r4, r1, #3
+ mov r3, r3, lsr #1
+ ldr r12, [r5]
+ add r5, r5, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+6: ldmia r1, {r6-r7}
+ add r1, r1, r2
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
+ ldmia r1, {r4-r5}
+ add r1, r1, r2
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ pld [r1]
+ NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
+ subs r3, r3, #1
+ stmia r0, {r8-r9}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+2:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+3:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+4:
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+6: ldmia r1, {r7-r9}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
+ NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ ldmia r1, {r4-r6}
+ add r1, r1, r2
+ pld [r1]
+ ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
+ subs r3, r3, #1
+ NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
+ stmia r0, {r10-r11}
+ add r0, r0, r2
+ bne 6b
+ ldmfd sp!, {r4-r11,pc}
+ .align 8
+5:
+ .word 0xFEFEFEFE
+ .word 2b
+ .word 3b
+ .word 4b
+ .endfunc
+
+@ ----------------------------------------------------------------
+.macro RND_XY2_IT align
+ @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
+ @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
+.if \align == 0
+ ldmia r1, {r6-r8}
+.elseif \align == 3
+ ldmia r1, {r5-r7}
+.else
+ ldmia r1, {r8-r10}
+.endif
+ add r1, r1, r2
+ pld [r1]
+.if \align == 0
+ ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
+.elseif \align == 1
+ ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
+ ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
+.elseif \align == 2
+ ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
+ ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
+.elseif \align == 3
+ ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
+.endif
+ ldr r14, [r12, #0] @ 0x03030303
+ tst r3, #1
+ and r8, r4, r14
+ and r9, r5, r14
+ and r10, r6, r14
+ and r11, r7, r14
+ ldreq r14, [r12, #16] @ 0x02020202/0x01010101
+ add r8, r8, r10
+ add r9, r9, r11
+ addeq r8, r8, r14
+ addeq r9, r9, r14
+ ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
+ and r4, r14, r4, lsr #2
+ and r5, r14, r5, lsr #2
+ and r6, r14, r6, lsr #2
+ and r7, r14, r7, lsr #2
+ add r10, r4, r6
+ add r11, r5, r7
+ subs r3, r3, #1
+.endm
+
+.macro RND_XY2_EXPAND align
+ RND_XY2_IT \align
+6: stmfd sp!, {r8-r11}
+ RND_XY2_IT \align
+ ldmfd sp!, {r4-r7}
+ add r4, r4, r8
+ add r5, r5, r9
+ add r6, r6, r10
+ add r7, r7, r11
+ ldr r14, [r12, #24] @ 0x0F0F0F0F
+ and r4, r14, r4, lsr #2
+ and r5, r14, r5, lsr #2
+ add r4, r4, r6
+ add r5, r5, r7
+ stmia r0, {r4-r5}
+ add r0, r0, r2
+ bge 6b
+ ldmfd sp!, {r4-r11,pc}
+.endm
+
+ .align 8
+function put_pixels8_xy2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3
+ add r5, r12, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ RND_XY2_EXPAND 0
+
+ .align 8
+2:
+ RND_XY2_EXPAND 1
+
+ .align 8
+3:
+ RND_XY2_EXPAND 2
+
+ .align 8
+4:
+ RND_XY2_EXPAND 3
+
+5:
+ .word 0x03030303
+ .word 2b
+ .word 3b
+ .word 4b
+ .word 0x02020202
+ .word 0xFCFCFCFC >> 2
+ .word 0x0F0F0F0F
+ .endfunc
+
+ .align 8
+function put_no_rnd_pixels8_xy2_arm, export=1
+ @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+ @ block = word aligned, pixles = unaligned
+ pld [r1]
+ stmfd sp!, {r4-r11,lr} @ R14 is also called LR
+ adrl r12, 5f
+ ands r4, r1, #3
+ add r5, r12, r4, lsl #2
+ bic r1, r1, #3
+ ldrne pc, [r5]
+1:
+ RND_XY2_EXPAND 0
+
+ .align 8
+2:
+ RND_XY2_EXPAND 1
+
+ .align 8
+3:
+ RND_XY2_EXPAND 2
+
+ .align 8
+4:
+ RND_XY2_EXPAND 3
+
+5:
+ .word 0x03030303
+ .word 2b
+ .word 3b
+ .word 4b
+ .word 0x01010101
+ .word 0xFCFCFCFC >> 2
+ .word 0x0F0F0F0F
+ .endfunc
+
+@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
+function ff_add_pixels_clamped_ARM, export=1
+ push {r4-r10}
+ mov r10, #8
+1:
+ ldr r4, [r1] /* load dest */
+ /* block[0] and block[1]*/
+ ldrsh r5, [r0]
+ ldrsh r7, [r0, #2]
+ and r6, r4, #0xFF
+ and r8, r4, #0xFF00
+ add r6, r5, r6
+ add r8, r7, r8, lsr #8
+ mvn r5, r5
+ mvn r7, r7
+ tst r6, #0x100
+ movne r6, r5, lsr #24
+ tst r8, #0x100
+ movne r8, r7, lsr #24
+ mov r9, r6
+ ldrsh r5, [r0, #4] /* moved form [A] */
+ orr r9, r9, r8, lsl #8
+ /* block[2] and block[3] */
+ /* [A] */
+ ldrsh r7, [r0, #6]
+ and r6, r4, #0xFF0000
+ and r8, r4, #0xFF000000
+ add r6, r5, r6, lsr #16
+ add r8, r7, r8, lsr #24
+ mvn r5, r5
+ mvn r7, r7
+ tst r6, #0x100
+ movne r6, r5, lsr #24
+ tst r8, #0x100
+ movne r8, r7, lsr #24
+ orr r9, r9, r6, lsl #16
+ ldr r4, [r1, #4] /* moved form [B] */
+ orr r9, r9, r8, lsl #24
+ /* store dest */
+ ldrsh r5, [r0, #8] /* moved form [C] */
+ str r9, [r1]
+
+ /* load dest */
+ /* [B] */
+ /* block[4] and block[5] */
+ /* [C] */
+ ldrsh r7, [r0, #10]
+ and r6, r4, #0xFF
+ and r8, r4, #0xFF00
+ add r6, r5, r6
+ add r8, r7, r8, lsr #8
+ mvn r5, r5
+ mvn r7, r7
+ tst r6, #0x100
+ movne r6, r5, lsr #24
+ tst r8, #0x100
+ movne r8, r7, lsr #24
+ mov r9, r6
+ ldrsh r5, [r0, #12] /* moved from [D] */
+ orr r9, r9, r8, lsl #8
+ /* block[6] and block[7] */
+ /* [D] */
+ ldrsh r7, [r0, #14]
+ and r6, r4, #0xFF0000
+ and r8, r4, #0xFF000000
+ add r6, r5, r6, lsr #16
+ add r8, r7, r8, lsr #24
+ mvn r5, r5
+ mvn r7, r7
+ tst r6, #0x100
+ movne r6, r5, lsr #24
+ tst r8, #0x100
+ movne r8, r7, lsr #24
+ orr r9, r9, r6, lsl #16
+ add r0, r0, #16 /* moved from [E] */
+ orr r9, r9, r8, lsl #24
+ subs r10, r10, #1 /* moved from [F] */
+ /* store dest */
+ str r9, [r1, #4]
+
+ /* [E] */
+ /* [F] */
+ add r1, r1, r2
+ bne 1b
+
+ pop {r4-r10}
+ bx lr
+ .endfunc
diff --git a/libavcodec/armv4l/dsputil_iwmmxt.c b/libavcodec/arm/dsputil_iwmmxt.c
similarity index 100%
rename from libavcodec/armv4l/dsputil_iwmmxt.c
rename to libavcodec/arm/dsputil_iwmmxt.c
diff --git a/libavcodec/arm/dsputil_iwmmxt_rnd_template.c b/libavcodec/arm/dsputil_iwmmxt_rnd_template.c
new file mode 100644
index 0000000..35a5a9b
--- /dev/null
+++ b/libavcodec/arm/dsputil_iwmmxt_rnd_template.c
@@ -0,0 +1,1114 @@
+/*
+ * iWMMXt optimized DSP utils
+ * copyright (c) 2004 AGAWA Koji
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ volatile (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ volatile (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr0, [%[block]] \n\t"
+ "wldrd wr2, [r5] \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr0 \n\t"
+ WAVG2B" wr10, wr10, wr2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ volatile (
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size] \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr2, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "walignr1 wr9, wr1, wr2 \n\t"
+ "wldrd wr5, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "walignr1 wr11, wr4, wr5 \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "wstrd wr11, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ __asm__ volatile (
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "1: \n\t"
+ "wldrd wr0, [%[pixels]] \n\t"
+ "wldrd wr1, [%[pixels], #8] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wldrd wr2, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr3, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr8, wr0, wr1 \n\t"
+ "wldrd wr4, [r4, #8] \n\t"
+ "walignr1 wr9, wr1, wr2 \n\t"
+ "wldrd wr5, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "wldrd wr0, [%[block]] \n\t"
+ "pld [r4] \n\t"
+ "wldrd wr1, [%[block], #8] \n\t"
+ "pld [r4, #32] \n\t"
+ "wldrd wr2, [r5] \n\t"
+ "walignr1 wr10, wr3, wr4 \n\t"
+ "wldrd wr3, [r5, #8] \n\t"
+ WAVG2B" wr8, wr8, wr0 \n\t"
+ WAVG2B" wr9, wr9, wr1 \n\t"
+ WAVG2B" wr10, wr10, wr2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "walignr1 wr11, wr4, wr5 \n\t"
+ WAVG2B" wr11, wr11, wr3 \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr10, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "wstrd wr11, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
+ :
+ : "memory", "r4", "r5", "r12");
+}
+
+void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr15, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "walignr1 wr3, wr14, wr15 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr5, wr12 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "wmoveq wr7, wr15 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr5, wr11, wr12 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "walignr2ne wr7, wr14, wr15 \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr1, wr1, wr5 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wstrd wr1, [%[block], #8] \n\t"
+ WAVG2B" wr3, wr3, wr7 \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr3, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "wldrd wr12, [r5] \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ WAVG2B" wr0, wr0, wr10 \n\t"
+ WAVG2B" wr2, wr2, wr12 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "add r4, %[pixels], %[line_size]\n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add r5, %[block], %[line_size] \n\t"
+ "mov %[line_size], %[line_size], lsl #1 \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "wldrd wr13, [r4] \n\t"
+ "pld [%[pixels]] \n\t"
+ "wldrd wr14, [r4, #8] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wldrd wr15, [r4, #16] \n\t"
+ "add r4, r4, %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [r4] \n\t"
+ "pld [r4, #32] \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "walignr1 wr2, wr13, wr14 \n\t"
+ "walignr1 wr3, wr14, wr15 \n\t"
+ "wmoveq wr4, wr11 \n\t"
+ "wmoveq wr5, wr12 \n\t"
+ "wmoveq wr6, wr14 \n\t"
+ "wmoveq wr7, wr15 \n\t"
+ "walignr2ne wr4, wr10, wr11 \n\t"
+ "walignr2ne wr5, wr11, wr12 \n\t"
+ "walignr2ne wr6, wr13, wr14 \n\t"
+ "walignr2ne wr7, wr14, wr15 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr0, wr0, wr4 \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr1, wr1, wr5 \n\t"
+ "wldrd wr12, [r5] \n\t"
+ WAVG2B" wr2, wr2, wr6 \n\t"
+ "wldrd wr13, [r5, #8] \n\t"
+ WAVG2B" wr3, wr3, wr7 \n\t"
+ WAVG2B" wr0, wr0, wr10 \n\t"
+ WAVG2B" wr1, wr1, wr11 \n\t"
+ WAVG2B" wr2, wr2, wr12 \n\t"
+ WAVG2B" wr3, wr3, wr13 \n\t"
+ "wstrd wr0, [%[block]] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr1, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wstrd wr2, [r5] \n\t"
+ "pld [%[block]] \n\t"
+ "wstrd wr3, [r5, #8] \n\t"
+ "add r5, r5, %[line_size] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [r5] \n\t"
+ "pld [r5, #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ :"r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "cc", "memory", "r12");
+}
+
+void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "walignr1 wr5, wr11, wr12 \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ int stride = line_size;
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "and r12, %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+
+ "1: \n\t"
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr4, wr10, wr11 \n\t"
+ "walignr1 wr5, wr11, wr12 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ WAVG2B" wr9, wr9, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "wldrd wr10, [%[pixels]] \n\t"
+ "wldrd wr11, [%[pixels], #8] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr12, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr0, wr10, wr11 \n\t"
+ "walignr1 wr1, wr11, wr12 \n\t"
+ "wldrd wr10, [%[block]] \n\t"
+ "wldrd wr11, [%[block], #8] \n\t"
+ WAVG2B" wr8, wr0, wr4 \n\t"
+ WAVG2B" wr9, wr1, wr5 \n\t"
+ WAVG2B" wr8, wr8, wr10 \n\t"
+ WAVG2B" wr9, wr9, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
+ :
+ : "r4", "r5", "r12", "memory");
+}
+
+void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "add r12, r12, #1 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "cmp r12, #8 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ /* alignment */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "tmcr wcgr2, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr7, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr6, wr7 \n\t"
+ "wunpckehub wr7, wr7 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr6, wr6, wr10 \n\t"
+ "waddhus wr7, wr7, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ "subs %[h], %[h], #2 \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "add r12, r12, #1 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "tmcr wcgr2, r12 \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "cmp r12, #8 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "wldrd wr12, [%[pixels]] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
+
+void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
+{
+ // [wr0 wr1 wr2 wr3] for previous line
+ // [wr4 wr5 wr6 wr7] for current line
+ SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "pld [%[pixels]] \n\t"
+ "mov r12, #2 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "tmcr wcgr0, r12 \n\t" /* for shift value */
+ /* alignment */
+ "and r12, %[pixels], #7 \n\t"
+ "bic %[pixels], %[pixels], #7 \n\t"
+ "tmcr wcgr1, r12 \n\t"
+ "add r12, r12, #1 \n\t"
+ "tmcr wcgr2, r12 \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "pld [%[pixels]] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+
+ "1: \n\t"
+ // [wr0 wr1 wr2 wr3]
+ // [wr4 wr5 wr6 wr7] <= *
+ "wldrd wr12, [%[pixels]] \n\t"
+ "cmp r12, #8 \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr6, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr7, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr4, wr6 \n\t"
+ "wunpckehub wr5, wr6 \n\t"
+ "wunpckelub wr6, wr7 \n\t"
+ "wunpckehub wr7, wr7 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr4, wr4, wr8 \n\t"
+ "waddhus wr5, wr5, wr9 \n\t"
+ "waddhus wr6, wr6, wr10 \n\t"
+ "waddhus wr7, wr7, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wldrd wr13, [%[block], #8] \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ WAVG2B" wr9, wr9, wr13 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+
+ // [wr0 wr1 wr2 wr3] <= *
+ // [wr4 wr5 wr6 wr7]
+ "wldrd wr12, [%[pixels]] \n\t"
+ "pld [%[block]] \n\t"
+ "wldrd wr13, [%[pixels], #8] \n\t"
+ "pld [%[block], #32] \n\t"
+ "wldrd wr14, [%[pixels], #16] \n\t"
+ "add %[pixels], %[pixels], %[line_size] \n\t"
+ "walignr1 wr2, wr12, wr13 \n\t"
+ "pld [%[pixels]] \n\t"
+ "pld [%[pixels], #32] \n\t"
+ "walignr1 wr3, wr13, wr14 \n\t"
+ "wmoveq wr10, wr13 \n\t"
+ "wmoveq wr11, wr14 \n\t"
+ "walignr2ne wr10, wr12, wr13 \n\t"
+ "walignr2ne wr11, wr13, wr14 \n\t"
+ "wunpckelub wr0, wr2 \n\t"
+ "wunpckehub wr1, wr2 \n\t"
+ "wunpckelub wr2, wr3 \n\t"
+ "wunpckehub wr3, wr3 \n\t"
+ "wunpckelub wr8, wr10 \n\t"
+ "wunpckehub wr9, wr10 \n\t"
+ "wunpckelub wr10, wr11 \n\t"
+ "wunpckehub wr11, wr11 \n\t"
+ "waddhus wr0, wr0, wr8 \n\t"
+ "waddhus wr1, wr1, wr9 \n\t"
+ "waddhus wr2, wr2, wr10 \n\t"
+ "waddhus wr3, wr3, wr11 \n\t"
+ "waddhus wr8, wr0, wr4 \n\t"
+ "waddhus wr9, wr1, wr5 \n\t"
+ "waddhus wr10, wr2, wr6 \n\t"
+ "waddhus wr11, wr3, wr7 \n\t"
+ "waddhus wr8, wr8, wr15 \n\t"
+ "waddhus wr9, wr9, wr15 \n\t"
+ "waddhus wr10, wr10, wr15 \n\t"
+ "waddhus wr11, wr11, wr15 \n\t"
+ "wsrlhg wr8, wr8, wcgr0 \n\t"
+ "wsrlhg wr9, wr9, wcgr0 \n\t"
+ "wldrd wr12, [%[block]] \n\t"
+ "wldrd wr13, [%[block], #8] \n\t"
+ "wsrlhg wr10, wr10, wcgr0 \n\t"
+ "wsrlhg wr11, wr11, wcgr0 \n\t"
+ "wpackhus wr8, wr8, wr9 \n\t"
+ "wpackhus wr9, wr10, wr11 \n\t"
+ WAVG2B" wr8, wr8, wr12 \n\t"
+ WAVG2B" wr9, wr9, wr13 \n\t"
+ "wstrd wr8, [%[block]] \n\t"
+ "wstrd wr9, [%[block], #8] \n\t"
+ "add %[block], %[block], %[line_size] \n\t"
+ "subs %[h], %[h], #2 \n\t"
+ "pld [%[block]] \n\t"
+ "pld [%[block], #32] \n\t"
+ "bne 1b \n\t"
+ : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
+ : [line_size]"r"(line_size)
+ : "r12", "memory");
+}
diff --git a/libavcodec/arm/dsputil_neon.c b/libavcodec/arm/dsputil_neon.c
new file mode 100644
index 0000000..e50e160
--- /dev/null
+++ b/libavcodec/arm/dsputil_neon.c
@@ -0,0 +1,189 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/dsputil.h"
+
+void ff_put_pixels16_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_x2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_y2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_xy2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_x2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_y2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_xy2_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels16_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_x2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_y2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+void ff_put_pixels8_xy2_no_rnd_neon(uint8_t *, const uint8_t *, int, int);
+
+void ff_avg_pixels16_neon(uint8_t *, const uint8_t *, int, int);
+
+void ff_put_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc10_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc20_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc30_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc01_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc11_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc21_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc31_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc02_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc12_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc22_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc32_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc03_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc13_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc23_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel16_mc33_neon(uint8_t *, uint8_t *, int);
+
+void ff_put_h264_qpel8_mc00_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc10_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc20_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc30_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc01_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc11_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc21_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc31_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc02_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc12_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc22_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc32_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc03_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc13_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc23_neon(uint8_t *, uint8_t *, int);
+void ff_put_h264_qpel8_mc33_neon(uint8_t *, uint8_t *, int);
+
+void ff_avg_h264_qpel16_mc00_neon(uint8_t *, uint8_t *, int);
+
+void ff_put_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_put_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+
+void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int);
+void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int);
+
+void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha,
+ int beta, int8_t *tc0);
+
+void ff_h264_idct_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_dc_add_neon(uint8_t *dst, DCTELEM *block, int stride);
+void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset,
+ DCTELEM *block, int stride,
+ const uint8_t nnzc[6*8]);
+
+void ff_float_to_int16_neon(int16_t *, const float *, long);
+void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
+
+void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx)
+{
+ c->put_pixels_tab[0][0] = ff_put_pixels16_neon;
+ c->put_pixels_tab[0][1] = ff_put_pixels16_x2_neon;
+ c->put_pixels_tab[0][2] = ff_put_pixels16_y2_neon;
+ c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_neon;
+ c->put_pixels_tab[1][0] = ff_put_pixels8_neon;
+ c->put_pixels_tab[1][1] = ff_put_pixels8_x2_neon;
+ c->put_pixels_tab[1][2] = ff_put_pixels8_y2_neon;
+ c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_neon;
+
+ c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_neon;
+ c->put_no_rnd_pixels_tab[0][1] = ff_put_pixels16_x2_no_rnd_neon;
+ c->put_no_rnd_pixels_tab[0][2] = ff_put_pixels16_y2_no_rnd_neon;
+ c->put_no_rnd_pixels_tab[0][3] = ff_put_pixels16_xy2_no_rnd_neon;
+ c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_neon;
+ c->put_no_rnd_pixels_tab[1][1] = ff_put_pixels8_x2_no_rnd_neon;
+ c->put_no_rnd_pixels_tab[1][2] = ff_put_pixels8_y2_no_rnd_neon;
+ c->put_no_rnd_pixels_tab[1][3] = ff_put_pixels8_xy2_no_rnd_neon;
+
+ c->avg_pixels_tab[0][0] = ff_avg_pixels16_neon;
+
+ c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_neon;
+ c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_neon;
+
+ c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_neon;
+ c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_neon;
+
+ c->put_h264_qpel_pixels_tab[0][ 0] = ff_put_h264_qpel16_mc00_neon;
+ c->put_h264_qpel_pixels_tab[0][ 1] = ff_put_h264_qpel16_mc10_neon;
+ c->put_h264_qpel_pixels_tab[0][ 2] = ff_put_h264_qpel16_mc20_neon;
+ c->put_h264_qpel_pixels_tab[0][ 3] = ff_put_h264_qpel16_mc30_neon;
+ c->put_h264_qpel_pixels_tab[0][ 4] = ff_put_h264_qpel16_mc01_neon;
+ c->put_h264_qpel_pixels_tab[0][ 5] = ff_put_h264_qpel16_mc11_neon;
+ c->put_h264_qpel_pixels_tab[0][ 6] = ff_put_h264_qpel16_mc21_neon;
+ c->put_h264_qpel_pixels_tab[0][ 7] = ff_put_h264_qpel16_mc31_neon;
+ c->put_h264_qpel_pixels_tab[0][ 8] = ff_put_h264_qpel16_mc02_neon;
+ c->put_h264_qpel_pixels_tab[0][ 9] = ff_put_h264_qpel16_mc12_neon;
+ c->put_h264_qpel_pixels_tab[0][10] = ff_put_h264_qpel16_mc22_neon;
+ c->put_h264_qpel_pixels_tab[0][11] = ff_put_h264_qpel16_mc32_neon;
+ c->put_h264_qpel_pixels_tab[0][12] = ff_put_h264_qpel16_mc03_neon;
+ c->put_h264_qpel_pixels_tab[0][13] = ff_put_h264_qpel16_mc13_neon;
+ c->put_h264_qpel_pixels_tab[0][14] = ff_put_h264_qpel16_mc23_neon;
+ c->put_h264_qpel_pixels_tab[0][15] = ff_put_h264_qpel16_mc33_neon;
+
+ c->put_h264_qpel_pixels_tab[1][ 0] = ff_put_h264_qpel8_mc00_neon;
+ c->put_h264_qpel_pixels_tab[1][ 1] = ff_put_h264_qpel8_mc10_neon;
+ c->put_h264_qpel_pixels_tab[1][ 2] = ff_put_h264_qpel8_mc20_neon;
+ c->put_h264_qpel_pixels_tab[1][ 3] = ff_put_h264_qpel8_mc30_neon;
+ c->put_h264_qpel_pixels_tab[1][ 4] = ff_put_h264_qpel8_mc01_neon;
+ c->put_h264_qpel_pixels_tab[1][ 5] = ff_put_h264_qpel8_mc11_neon;
+ c->put_h264_qpel_pixels_tab[1][ 6] = ff_put_h264_qpel8_mc21_neon;
+ c->put_h264_qpel_pixels_tab[1][ 7] = ff_put_h264_qpel8_mc31_neon;
+ c->put_h264_qpel_pixels_tab[1][ 8] = ff_put_h264_qpel8_mc02_neon;
+ c->put_h264_qpel_pixels_tab[1][ 9] = ff_put_h264_qpel8_mc12_neon;
+ c->put_h264_qpel_pixels_tab[1][10] = ff_put_h264_qpel8_mc22_neon;
+ c->put_h264_qpel_pixels_tab[1][11] = ff_put_h264_qpel8_mc32_neon;
+ c->put_h264_qpel_pixels_tab[1][12] = ff_put_h264_qpel8_mc03_neon;
+ c->put_h264_qpel_pixels_tab[1][13] = ff_put_h264_qpel8_mc13_neon;
+ c->put_h264_qpel_pixels_tab[1][14] = ff_put_h264_qpel8_mc23_neon;
+ c->put_h264_qpel_pixels_tab[1][15] = ff_put_h264_qpel8_mc33_neon;
+
+ c->avg_h264_qpel_pixels_tab[0][ 0] = ff_avg_h264_qpel16_mc00_neon;
+
+ c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon;
+ c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon;
+ c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon;
+ c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon;
+
+ c->h264_idct_add = ff_h264_idct_add_neon;
+ c->h264_idct_dc_add = ff_h264_idct_dc_add_neon;
+ c->h264_idct_add16 = ff_h264_idct_add16_neon;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_neon;
+ c->h264_idct_add8 = ff_h264_idct_add8_neon;
+
+ if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->float_to_int16 = ff_float_to_int16_neon;
+ c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
+ }
+}
diff --git a/libavcodec/arm/dsputil_neon_s.S b/libavcodec/arm/dsputil_neon_s.S
new file mode 100644
index 0000000..4f86714
--- /dev/null
+++ b/libavcodec/arm/dsputil_neon_s.S
@@ -0,0 +1,611 @@
+/*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+ preserve8
+ .fpu neon
+ .text
+
+ .macro pixels16 avg=0
+.if \avg
+ mov ip, r0
+.endif
+1: vld1.64 {d0, d1}, [r1], r2
+ vld1.64 {d2, d3}, [r1], r2
+ vld1.64 {d4, d5}, [r1], r2
+ pld [r1, r2, lsl #2]
+ vld1.64 {d6, d7}, [r1], r2
+ pld [r1]
+ pld [r1, r2]
+ pld [r1, r2, lsl #1]
+.if \avg
+ vld1.64 {d16,d17}, [ip], r2
+ vrhadd.u8 q0, q0, q8
+ vld1.64 {d18,d19}, [ip], r2
+ vrhadd.u8 q1, q1, q9
+ vld1.64 {d20,d21}, [ip], r2
+ vrhadd.u8 q2, q2, q10
+ vld1.64 {d22,d23}, [ip], r2
+ vrhadd.u8 q3, q3, q11
+.endif
+ subs r3, r3, #4
+ vst1.64 {d0, d1}, [r0,:128], r2
+ vst1.64 {d2, d3}, [r0,:128], r2
+ vst1.64 {d4, d5}, [r0,:128], r2
+ vst1.64 {d6, d7}, [r0,:128], r2
+ bne 1b
+ bx lr
+ .endm
+
+ .macro pixels16_x2 vhadd=vrhadd.u8
+1: vld1.64 {d0-d2}, [r1], r2
+ vld1.64 {d4-d6}, [r1], r2
+ pld [r1]
+ pld [r1, r2]
+ subs r3, r3, #2
+ vext.8 q1, q0, q1, #1
+ \vhadd q0, q0, q1
+ vext.8 q3, q2, q3, #1
+ \vhadd q2, q2, q3
+ vst1.64 {d0, d1}, [r0,:128], r2
+ vst1.64 {d4, d5}, [r0,:128], r2
+ bne 1b
+ bx lr
+ .endm
+
+ .macro pixels16_y2 vhadd=vrhadd.u8
+ push {lr}
+ add ip, r1, r2
+ lsl lr, r2, #1
+ vld1.64 {d0, d1}, [r1], lr
+ vld1.64 {d2, d3}, [ip], lr
+1: subs r3, r3, #2
+ \vhadd q2, q0, q1
+ vld1.64 {d0, d1}, [r1], lr
+ \vhadd q3, q0, q1
+ vld1.64 {d2, d3}, [ip], lr
+ pld [r1]
+ pld [ip]
+ vst1.64 {d4, d5}, [r0,:128], r2
+ vst1.64 {d6, d7}, [r0,:128], r2
+ bne 1b
+ pop {pc}
+ .endm
+
+ .macro pixels16_xy2 vshrn=vrshrn.u16 no_rnd=0
+ push {lr}
+ lsl lr, r2, #1
+ add ip, r1, r2
+ vld1.64 {d0-d2}, [r1], lr
+ vld1.64 {d4-d6}, [ip], lr
+.if \no_rnd
+ vmov.i16 q13, #1
+.endif
+ pld [r1]
+ pld [ip]
+ vext.8 q1, q0, q1, #1
+ vext.8 q3, q2, q3, #1
+ vaddl.u8 q8, d0, d2
+ vaddl.u8 q10, d1, d3
+ vaddl.u8 q9, d4, d6
+ vaddl.u8 q11, d5, d7
+1: subs r3, r3, #2
+ vld1.64 {d0-d2}, [r1], lr
+ vadd.u16 q12, q8, q9
+ pld [r1]
+.if \no_rnd
+ vadd.u16 q12, q12, q13
+.endif
+ vext.8 q15, q0, q1, #1
+ vadd.u16 q1 , q10, q11
+ \vshrn d28, q12, #2
+.if \no_rnd
+ vadd.u16 q1, q1, q13
+.endif
+ \vshrn d29, q1, #2
+ vaddl.u8 q8, d0, d30
+ vld1.64 {d2-d4}, [ip], lr
+ vaddl.u8 q10, d1, d31
+ vst1.64 {d28,d29}, [r0,:128], r2
+ vadd.u16 q12, q8, q9
+ pld [ip]
+.if \no_rnd
+ vadd.u16 q12, q12, q13
+.endif
+ vext.8 q2, q1, q2, #1
+ vadd.u16 q0, q10, q11
+ \vshrn d30, q12, #2
+.if \no_rnd
+ vadd.u16 q0, q0, q13
+.endif
+ \vshrn d31, q0, #2
+ vaddl.u8 q9, d2, d4
+ vaddl.u8 q11, d3, d5
+ vst1.64 {d30,d31}, [r0,:128], r2
+ bgt 1b
+ pop {pc}
+ .endm
+
+ .macro pixels8
+1: vld1.64 {d0}, [r1], r2
+ vld1.64 {d1}, [r1], r2
+ vld1.64 {d2}, [r1], r2
+ pld [r1, r2, lsl #2]
+ vld1.64 {d3}, [r1], r2
+ pld [r1]
+ pld [r1, r2]
+ pld [r1, r2, lsl #1]
+ subs r3, r3, #4
+ vst1.64 {d0}, [r0,:64], r2
+ vst1.64 {d1}, [r0,:64], r2
+ vst1.64 {d2}, [r0,:64], r2
+ vst1.64 {d3}, [r0,:64], r2
+ bne 1b
+ bx lr
+ .endm
+
+ .macro pixels8_x2 vhadd=vrhadd.u8
+1: vld1.64 {d0, d1}, [r1], r2
+ vext.8 d1, d0, d1, #1
+ vld1.64 {d2, d3}, [r1], r2
+ vext.8 d3, d2, d3, #1
+ pld [r1]
+ pld [r1, r2]
+ subs r3, r3, #2
+ vswp d1, d2
+ \vhadd q0, q0, q1
+ vst1.64 {d0}, [r0,:64], r2
+ vst1.64 {d1}, [r0,:64], r2
+ bne 1b
+ bx lr
+ .endm
+
+ .macro pixels8_y2 vhadd=vrhadd.u8
+ push {lr}
+ add ip, r1, r2
+ lsl lr, r2, #1
+ vld1.64 {d0}, [r1], lr
+ vld1.64 {d1}, [ip], lr
+1: subs r3, r3, #2
+ \vhadd d4, d0, d1
+ vld1.64 {d0}, [r1], lr
+ \vhadd d5, d0, d1
+ vld1.64 {d1}, [ip], lr
+ pld [r1]
+ pld [ip]
+ vst1.64 {d4}, [r0,:64], r2
+ vst1.64 {d5}, [r0,:64], r2
+ bne 1b
+ pop {pc}
+ .endm
+
+ .macro pixels8_xy2 vshrn=vrshrn.u16 no_rnd=0
+ push {lr}
+ lsl lr, r2, #1
+ add ip, r1, r2
+ vld1.64 {d0, d1}, [r1], lr
+ vld1.64 {d2, d3}, [ip], lr
+.if \no_rnd
+ vmov.i16 q11, #1
+.endif
+ pld [r1]
+ pld [ip]
+ vext.8 d4, d0, d1, #1
+ vext.8 d6, d2, d3, #1
+ vaddl.u8 q8, d0, d4
+ vaddl.u8 q9, d2, d6
+1: subs r3, r3, #2
+ vld1.64 {d0, d1}, [r1], lr
+ pld [r1]
+ vadd.u16 q10, q8, q9
+ vext.8 d4, d0, d1, #1
+.if \no_rnd
+ vadd.u16 q10, q10, q11
+.endif
+ vaddl.u8 q8, d0, d4
+ \vshrn d5, q10, #2
+ vld1.64 {d2, d3}, [ip], lr
+ vadd.u16 q10, q8, q9
+ pld [ip]
+.if \no_rnd
+ vadd.u16 q10, q10, q11
+.endif
+ vst1.64 {d5}, [r0,:64], r2
+ \vshrn d7, q10, #2
+ vext.8 d6, d2, d3, #1
+ vaddl.u8 q9, d2, d6
+ vst1.64 {d7}, [r0,:64], r2
+ bgt 1b
+ pop {pc}
+ .endm
+
+ .macro pixfunc pfx name suf rnd_op args:vararg
+function ff_\pfx\name\suf\()_neon, export=1
+ \name \rnd_op \args
+ .endfunc
+ .endm
+
+ .macro pixfunc2 pfx name args:vararg
+ pixfunc \pfx \name
+ pixfunc \pfx \name \args
+ .endm
+
+function ff_put_h264_qpel16_mc00_neon, export=1
+ mov r3, #16
+ .endfunc
+
+ pixfunc put_ pixels16
+ pixfunc2 put_ pixels16_x2, _no_rnd, vhadd.u8
+ pixfunc2 put_ pixels16_y2, _no_rnd, vhadd.u8
+ pixfunc2 put_ pixels16_xy2, _no_rnd, vshrn.u16, 1
+
+function ff_avg_h264_qpel16_mc00_neon, export=1
+ mov r3, #16
+ .endfunc
+
+ pixfunc avg_ pixels16,, 1
+
+function ff_put_h264_qpel8_mc00_neon, export=1
+ mov r3, #8
+ .endfunc
+
+ pixfunc put_ pixels8
+ pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
+ pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
+ pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
+
+function ff_float_to_int16_neon, export=1
+ subs r2, r2, #8
+ vld1.64 {d0-d1}, [r1,:128]!
+ vcvt.s32.f32 q8, q0, #16
+ vld1.64 {d2-d3}, [r1,:128]!
+ vcvt.s32.f32 q9, q1, #16
+ beq 3f
+ bics ip, r2, #15
+ beq 2f
+1: subs ip, ip, #16
+ vshrn.s32 d4, q8, #16
+ vld1.64 {d0-d1}, [r1,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vshrn.s32 d5, q9, #16
+ vld1.64 {d2-d3}, [r1,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ vshrn.s32 d6, q0, #16
+ vst1.64 {d4-d5}, [r0,:128]!
+ vshrn.s32 d7, q1, #16
+ vld1.64 {d16-d17},[r1,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vld1.64 {d18-d19},[r1,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vst1.64 {d6-d7}, [r0,:128]!
+ bne 1b
+ ands r2, r2, #15
+ beq 3f
+2: vld1.64 {d0-d1}, [r1,:128]!
+ vshrn.s32 d4, q8, #16
+ vcvt.s32.f32 q0, q0, #16
+ vld1.64 {d2-d3}, [r1,:128]!
+ vshrn.s32 d5, q9, #16
+ vcvt.s32.f32 q1, q1, #16
+ vshrn.s32 d6, q0, #16
+ vst1.64 {d4-d5}, [r0,:128]!
+ vshrn.s32 d7, q1, #16
+ vst1.64 {d6-d7}, [r0,:128]!
+ bx lr
+3: vshrn.s32 d4, q8, #16
+ vshrn.s32 d5, q9, #16
+ vst1.64 {d4-d5}, [r0,:128]!
+ bx lr
+ .endfunc
+
+function ff_float_to_int16_interleave_neon, export=1
+ cmp r3, #2
+ ldrlt r1, [r1]
+ blt ff_float_to_int16_neon
+ bne 4f
+
+ ldr r3, [r1]
+ ldr r1, [r1, #4]
+
+ subs r2, r2, #8
+ vld1.64 {d0-d1}, [r3,:128]!
+ vcvt.s32.f32 q8, q0, #16
+ vld1.64 {d2-d3}, [r3,:128]!
+ vcvt.s32.f32 q9, q1, #16
+ vld1.64 {d20-d21},[r1,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vld1.64 {d22-d23},[r1,:128]!
+ vcvt.s32.f32 q11, q11, #16
+ beq 3f
+ bics ip, r2, #15
+ beq 2f
+1: subs ip, ip, #16
+ vld1.64 {d0-d1}, [r3,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vsri.32 q10, q8, #16
+ vld1.64 {d2-d3}, [r3,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ vld1.64 {d24-d25},[r1,:128]!
+ vcvt.s32.f32 q12, q12, #16
+ vld1.64 {d26-d27},[r1,:128]!
+ vsri.32 q11, q9, #16
+ vst1.64 {d20-d21},[r0,:128]!
+ vcvt.s32.f32 q13, q13, #16
+ vst1.64 {d22-d23},[r0,:128]!
+ vsri.32 q12, q0, #16
+ vld1.64 {d16-d17},[r3,:128]!
+ vsri.32 q13, q1, #16
+ vst1.64 {d24-d25},[r0,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vld1.64 {d18-d19},[r3,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vld1.64 {d20-d21},[r1,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vld1.64 {d22-d23},[r1,:128]!
+ vcvt.s32.f32 q11, q11, #16
+ vst1.64 {d26-d27},[r0,:128]!
+ bne 1b
+ ands r2, r2, #15
+ beq 3f
+2: vsri.32 q10, q8, #16
+ vld1.64 {d0-d1}, [r3,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vld1.64 {d2-d3}, [r3,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ vld1.64 {d24-d25},[r1,:128]!
+ vcvt.s32.f32 q12, q12, #16
+ vsri.32 q11, q9, #16
+ vld1.64 {d26-d27},[r1,:128]!
+ vcvt.s32.f32 q13, q13, #16
+ vst1.64 {d20-d21},[r0,:128]!
+ vsri.32 q12, q0, #16
+ vst1.64 {d22-d23},[r0,:128]!
+ vsri.32 q13, q1, #16
+ vst1.64 {d24-d27},[r0,:128]!
+ bx lr
+3: vsri.32 q10, q8, #16
+ vsri.32 q11, q9, #16
+ vst1.64 {d20-d23},[r0,:128]!
+ bx lr
+
+4: push {r4-r8,lr}
+ cmp r3, #4
+ lsl ip, r3, #1
+ blt 4f
+
+ @ 4 channels
+5: ldmia r1!, {r4-r7}
+ mov lr, r2
+ mov r8, r0
+ vld1.64 {d16-d17},[r4,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vld1.64 {d18-d19},[r5,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vld1.64 {d20-d21},[r6,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vld1.64 {d22-d23},[r7,:128]!
+ vcvt.s32.f32 q11, q11, #16
+6: subs lr, lr, #8
+ vld1.64 {d0-d1}, [r4,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vsri.32 q9, q8, #16
+ vld1.64 {d2-d3}, [r5,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ vsri.32 q11, q10, #16
+ vld1.64 {d4-d5}, [r6,:128]!
+ vcvt.s32.f32 q2, q2, #16
+ vzip.32 d18, d22
+ vld1.64 {d6-d7}, [r7,:128]!
+ vcvt.s32.f32 q3, q3, #16
+ vzip.32 d19, d23
+ vst1.64 {d18}, [r8], ip
+ vsri.32 q1, q0, #16
+ vst1.64 {d22}, [r8], ip
+ vsri.32 q3, q2, #16
+ vst1.64 {d19}, [r8], ip
+ vzip.32 d2, d6
+ vst1.64 {d23}, [r8], ip
+ vzip.32 d3, d7
+ beq 7f
+ vld1.64 {d16-d17},[r4,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vst1.64 {d2}, [r8], ip
+ vld1.64 {d18-d19},[r5,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vst1.64 {d6}, [r8], ip
+ vld1.64 {d20-d21},[r6,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vst1.64 {d3}, [r8], ip
+ vld1.64 {d22-d23},[r7,:128]!
+ vcvt.s32.f32 q11, q11, #16
+ vst1.64 {d7}, [r8], ip
+ b 6b
+7: vst1.64 {d2}, [r8], ip
+ vst1.64 {d6}, [r8], ip
+ vst1.64 {d3}, [r8], ip
+ vst1.64 {d7}, [r8], ip
+ subs r3, r3, #4
+ popeq {r4-r8,pc}
+ cmp r3, #4
+ add r0, r0, #8
+ bge 5b
+
+ @ 2 channels
+4: cmp r3, #2
+ blt 4f
+ ldmia r1!, {r4-r5}
+ mov lr, r2
+ mov r8, r0
+ tst lr, #8
+ vld1.64 {d16-d17},[r4,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vld1.64 {d18-d19},[r5,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vld1.64 {d20-d21},[r4,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vld1.64 {d22-d23},[r5,:128]!
+ vcvt.s32.f32 q11, q11, #16
+ beq 6f
+ subs lr, lr, #8
+ beq 7f
+ vsri.32 d18, d16, #16
+ vsri.32 d19, d17, #16
+ vld1.64 {d16-d17},[r4,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vst1.32 {d18[0]}, [r8], ip
+ vsri.32 d22, d20, #16
+ vst1.32 {d18[1]}, [r8], ip
+ vsri.32 d23, d21, #16
+ vst1.32 {d19[0]}, [r8], ip
+ vst1.32 {d19[1]}, [r8], ip
+ vld1.64 {d18-d19},[r5,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vst1.32 {d22[0]}, [r8], ip
+ vst1.32 {d22[1]}, [r8], ip
+ vld1.64 {d20-d21},[r4,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vst1.32 {d23[0]}, [r8], ip
+ vst1.32 {d23[1]}, [r8], ip
+ vld1.64 {d22-d23},[r5,:128]!
+ vcvt.s32.f32 q11, q11, #16
+6: subs lr, lr, #16
+ vld1.64 {d0-d1}, [r4,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vsri.32 d18, d16, #16
+ vld1.64 {d2-d3}, [r5,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ vsri.32 d19, d17, #16
+ vld1.64 {d4-d5}, [r4,:128]!
+ vcvt.s32.f32 q2, q2, #16
+ vld1.64 {d6-d7}, [r5,:128]!
+ vcvt.s32.f32 q3, q3, #16
+ vst1.32 {d18[0]}, [r8], ip
+ vsri.32 d22, d20, #16
+ vst1.32 {d18[1]}, [r8], ip
+ vsri.32 d23, d21, #16
+ vst1.32 {d19[0]}, [r8], ip
+ vsri.32 d2, d0, #16
+ vst1.32 {d19[1]}, [r8], ip
+ vsri.32 d3, d1, #16
+ vst1.32 {d22[0]}, [r8], ip
+ vsri.32 d6, d4, #16
+ vst1.32 {d22[1]}, [r8], ip
+ vsri.32 d7, d5, #16
+ vst1.32 {d23[0]}, [r8], ip
+ vst1.32 {d23[1]}, [r8], ip
+ beq 6f
+ vld1.64 {d16-d17},[r4,:128]!
+ vcvt.s32.f32 q8, q8, #16
+ vst1.32 {d2[0]}, [r8], ip
+ vst1.32 {d2[1]}, [r8], ip
+ vld1.64 {d18-d19},[r5,:128]!
+ vcvt.s32.f32 q9, q9, #16
+ vst1.32 {d3[0]}, [r8], ip
+ vst1.32 {d3[1]}, [r8], ip
+ vld1.64 {d20-d21},[r4,:128]!
+ vcvt.s32.f32 q10, q10, #16
+ vst1.32 {d6[0]}, [r8], ip
+ vst1.32 {d6[1]}, [r8], ip
+ vld1.64 {d22-d23},[r5,:128]!
+ vcvt.s32.f32 q11, q11, #16
+ vst1.32 {d7[0]}, [r8], ip
+ vst1.32 {d7[1]}, [r8], ip
+ bgt 6b
+6: vst1.32 {d2[0]}, [r8], ip
+ vst1.32 {d2[1]}, [r8], ip
+ vst1.32 {d3[0]}, [r8], ip
+ vst1.32 {d3[1]}, [r8], ip
+ vst1.32 {d6[0]}, [r8], ip
+ vst1.32 {d6[1]}, [r8], ip
+ vst1.32 {d7[0]}, [r8], ip
+ vst1.32 {d7[1]}, [r8], ip
+ b 8f
+7: vsri.32 d18, d16, #16
+ vsri.32 d19, d17, #16
+ vst1.32 {d18[0]}, [r8], ip
+ vsri.32 d22, d20, #16
+ vst1.32 {d18[1]}, [r8], ip
+ vsri.32 d23, d21, #16
+ vst1.32 {d19[0]}, [r8], ip
+ vst1.32 {d19[1]}, [r8], ip
+ vst1.32 {d22[0]}, [r8], ip
+ vst1.32 {d22[1]}, [r8], ip
+ vst1.32 {d23[0]}, [r8], ip
+ vst1.32 {d23[1]}, [r8], ip
+8: subs r3, r3, #2
+ add r0, r0, #4
+ popeq {r4-r8,pc}
+
+ @ 1 channel
+4: ldr r4, [r1],#4
+ tst r2, #8
+ mov lr, r2
+ mov r5, r0
+ vld1.64 {d0-d1}, [r4,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vld1.64 {d2-d3}, [r4,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ bne 8f
+6: subs lr, lr, #16
+ vld1.64 {d4-d5}, [r4,:128]!
+ vcvt.s32.f32 q2, q2, #16
+ vld1.64 {d6-d7}, [r4,:128]!
+ vcvt.s32.f32 q3, q3, #16
+ vst1.16 {d0[1]}, [r5,:16], ip
+ vst1.16 {d0[3]}, [r5,:16], ip
+ vst1.16 {d1[1]}, [r5,:16], ip
+ vst1.16 {d1[3]}, [r5,:16], ip
+ vst1.16 {d2[1]}, [r5,:16], ip
+ vst1.16 {d2[3]}, [r5,:16], ip
+ vst1.16 {d3[1]}, [r5,:16], ip
+ vst1.16 {d3[3]}, [r5,:16], ip
+ beq 7f
+ vld1.64 {d0-d1}, [r4,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vld1.64 {d2-d3}, [r4,:128]!
+ vcvt.s32.f32 q1, q1, #16
+7: vst1.16 {d4[1]}, [r5,:16], ip
+ vst1.16 {d4[3]}, [r5,:16], ip
+ vst1.16 {d5[1]}, [r5,:16], ip
+ vst1.16 {d5[3]}, [r5,:16], ip
+ vst1.16 {d6[1]}, [r5,:16], ip
+ vst1.16 {d6[3]}, [r5,:16], ip
+ vst1.16 {d7[1]}, [r5,:16], ip
+ vst1.16 {d7[3]}, [r5,:16], ip
+ bgt 6b
+ pop {r4-r8,pc}
+8: subs lr, lr, #8
+ vst1.16 {d0[1]}, [r5,:16], ip
+ vst1.16 {d0[3]}, [r5,:16], ip
+ vst1.16 {d1[1]}, [r5,:16], ip
+ vst1.16 {d1[3]}, [r5,:16], ip
+ vst1.16 {d2[1]}, [r5,:16], ip
+ vst1.16 {d2[3]}, [r5,:16], ip
+ vst1.16 {d3[1]}, [r5,:16], ip
+ vst1.16 {d3[3]}, [r5,:16], ip
+ popeq {r4-r8,pc}
+ vld1.64 {d0-d1}, [r4,:128]!
+ vcvt.s32.f32 q0, q0, #16
+ vld1.64 {d2-d3}, [r4,:128]!
+ vcvt.s32.f32 q1, q1, #16
+ b 6b
+ .endfunc
diff --git a/libavcodec/arm/dsputil_vfp.S b/libavcodec/arm/dsputil_vfp.S
new file mode 100644
index 0000000..04c8014
--- /dev/null
+++ b/libavcodec/arm/dsputil_vfp.S
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+ .fpu neon @ required for gas to accept UAL syntax
+/*
+ * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
+ * throughput for almost all the instructions (except for double precision
+ * arithmetics), but rather high latency. Latency is 4 cycles for loads and 8 cycles
+ * for arithmetic operations. Scheduling code to avoid pipeline stalls is very
+ * important for performance. One more interesting feature is that VFP has
+ * independent load/store and arithmetics pipelines, so it is possible to make
+ * them work simultaneously and get more than 1 operation per cycle. Load/store
+ * pipeline can process 2 single precision floating point values per cycle and
+ * supports bulk loads and stores for large sets of registers. Arithmetic operations
+ * can be done on vectors, which allows to keep the arithmetics pipeline busy,
+ * while the processor may issue and execute other instructions. Detailed
+ * optimization manuals can be found at http://www.arm.com
+ */
+
+/**
+ * ARM VFP optimized implementation of 'vector_fmul_c' function.
+ * Assume that len is a positive number and is multiple of 8
+ */
+@ void ff_vector_fmul_vfp(float *dst, const float *src, int len)
+function ff_vector_fmul_vfp, export=1
+ vpush {d8-d15}
+ mov r3, r0
+ fmrx r12, fpscr
+ orr r12, r12, #(3 << 16) /* set vector size to 4 */
+ fmxr fpscr, r12
+
+ vldmia r3!, {s0-s3}
+ vldmia r1!, {s8-s11}
+ vldmia r3!, {s4-s7}
+ vldmia r1!, {s12-s15}
+ vmul.f32 s8, s0, s8
+1:
+ subs r2, r2, #16
+ vmul.f32 s12, s4, s12
+ vldmiage r3!, {s16-s19}
+ vldmiage r1!, {s24-s27}
+ vldmiage r3!, {s20-s23}
+ vldmiage r1!, {s28-s31}
+ vmulge.f32 s24, s16, s24
+ vstmia r0!, {s8-s11}
+ vstmia r0!, {s12-s15}
+ vmulge.f32 s28, s20, s28
+ vldmiagt r3!, {s0-s3}
+ vldmiagt r1!, {s8-s11}
+ vldmiagt r3!, {s4-s7}
+ vldmiagt r1!, {s12-s15}
+ vmulge.f32 s8, s0, s8
+ vstmiage r0!, {s24-s27}
+ vstmiage r0!, {s28-s31}
+ bgt 1b
+
+ bic r12, r12, #(7 << 16) /* set vector size back to 1 */
+ fmxr fpscr, r12
+ vpop {d8-d15}
+ bx lr
+ .endfunc
+
+/**
+ * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
+ * Assume that len is a positive number and is multiple of 8
+ */
+@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
+@ const float *src1, int len)
+function ff_vector_fmul_reverse_vfp, export=1
+ vpush {d8-d15}
+ add r2, r2, r3, lsl #2
+ vldmdb r2!, {s0-s3}
+ vldmia r1!, {s8-s11}
+ vldmdb r2!, {s4-s7}
+ vldmia r1!, {s12-s15}
+ vmul.f32 s8, s3, s8
+ vmul.f32 s9, s2, s9
+ vmul.f32 s10, s1, s10
+ vmul.f32 s11, s0, s11
+1:
+ subs r3, r3, #16
+ vldmdbge r2!, {s16-s19}
+ vmul.f32 s12, s7, s12
+ vldmiage r1!, {s24-s27}
+ vmul.f32 s13, s6, s13
+ vldmdbge r2!, {s20-s23}
+ vmul.f32 s14, s5, s14
+ vldmiage r1!, {s28-s31}
+ vmul.f32 s15, s4, s15
+ vmulge.f32 s24, s19, s24
+ vldmdbgt r2!, {s0-s3}
+ vmulge.f32 s25, s18, s25
+ vstmia r0!, {s8-s13}
+ vmulge.f32 s26, s17, s26
+ vldmiagt r1!, {s8-s11}
+ vmulge.f32 s27, s16, s27
+ vmulge.f32 s28, s23, s28
+ vldmdbgt r2!, {s4-s7}
+ vmulge.f32 s29, s22, s29
+ vstmia r0!, {s14-s15}
+ vmulge.f32 s30, s21, s30
+ vmulge.f32 s31, s20, s31
+ vmulge.f32 s8, s3, s8
+ vldmiagt r1!, {s12-s15}
+ vmulge.f32 s9, s2, s9
+ vmulge.f32 s10, s1, s10
+ vstmiage r0!, {s24-s27}
+ vmulge.f32 s11, s0, s11
+ vstmiage r0!, {s28-s31}
+ bgt 1b
+
+ vpop {d8-d15}
+ bx lr
+ .endfunc
+
+#ifdef HAVE_ARMV6
+/**
+ * ARM VFP optimized float to int16 conversion.
+ * Assume that len is a positive number and is multiple of 8, destination
+ * buffer is at least 4 bytes aligned (8 bytes alignment is better for
+ * performance), little endian byte sex
+ */
+@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
+function ff_float_to_int16_vfp, export=1
+ push {r4-r8,lr}
+ vpush {d8-d11}
+ vldmia r1!, {s16-s23}
+ vcvt.s32.f32 s0, s16
+ vcvt.s32.f32 s1, s17
+ vcvt.s32.f32 s2, s18
+ vcvt.s32.f32 s3, s19
+ vcvt.s32.f32 s4, s20
+ vcvt.s32.f32 s5, s21
+ vcvt.s32.f32 s6, s22
+ vcvt.s32.f32 s7, s23
+1:
+ subs r2, r2, #8
+ vmov r3, r4, s0, s1
+ vmov r5, r6, s2, s3
+ vmov r7, r8, s4, s5
+ vmov ip, lr, s6, s7
+ vldmiagt r1!, {s16-s23}
+ ssat r4, #16, r4
+ ssat r3, #16, r3
+ ssat r6, #16, r6
+ ssat r5, #16, r5
+ pkhbt r3, r3, r4, lsl #16
+ pkhbt r4, r5, r6, lsl #16
+ vcvtgt.s32.f32 s0, s16
+ vcvtgt.s32.f32 s1, s17
+ vcvtgt.s32.f32 s2, s18
+ vcvtgt.s32.f32 s3, s19
+ vcvtgt.s32.f32 s4, s20
+ vcvtgt.s32.f32 s5, s21
+ vcvtgt.s32.f32 s6, s22
+ vcvtgt.s32.f32 s7, s23
+ ssat r8, #16, r8
+ ssat r7, #16, r7
+ ssat lr, #16, lr
+ ssat ip, #16, ip
+ pkhbt r5, r7, r8, lsl #16
+ pkhbt r6, ip, lr, lsl #16
+ stmia r0!, {r3-r6}
+ bgt 1b
+
+ vpop {d8-d11}
+ pop {r4-r8,pc}
+ .endfunc
+#endif
diff --git a/libavcodec/arm/float_arm_vfp.c b/libavcodec/arm/float_arm_vfp.c
new file mode 100644
index 0000000..5598aa9
--- /dev/null
+++ b/libavcodec/arm/float_arm_vfp.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/dsputil.h"
+
+void ff_vector_fmul_vfp(float *dst, const float *src, int len);
+void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
+ const float *src1, int len);
+void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);
+
+void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx)
+{
+ c->vector_fmul = ff_vector_fmul_vfp;
+ c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
+#ifdef HAVE_ARMV6
+ c->float_to_int16 = ff_float_to_int16_vfp;
+#endif
+}
diff --git a/libavcodec/arm/h264dsp_neon.S b/libavcodec/arm/h264dsp_neon.S
new file mode 100644
index 0000000..39a8daf
--- /dev/null
+++ b/libavcodec/arm/h264dsp_neon.S
@@ -0,0 +1,1377 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+ .fpu neon
+
+ .macro transpose_8x8 r0 r1 r2 r3 r4 r5 r6 r7
+ vtrn.32 \r0, \r4
+ vtrn.32 \r1, \r5
+ vtrn.32 \r2, \r6
+ vtrn.32 \r3, \r7
+ vtrn.16 \r0, \r2
+ vtrn.16 \r1, \r3
+ vtrn.16 \r4, \r6
+ vtrn.16 \r5, \r7
+ vtrn.8 \r0, \r1
+ vtrn.8 \r2, \r3
+ vtrn.8 \r4, \r5
+ vtrn.8 \r6, \r7
+ .endm
+
+ .macro swap4 r0 r1 r2 r3 r4 r5 r6 r7
+ vswp \r0, \r4
+ vswp \r1, \r5
+ vswp \r2, \r6
+ vswp \r3, \r7
+ .endm
+
+ .macro transpose16_4x4 r0 r1 r2 r3 r4 r5 r6 r7
+ vtrn.32 \r0, \r2
+ vtrn.32 \r1, \r3
+ vtrn.32 \r4, \r6
+ vtrn.32 \r5, \r7
+ vtrn.16 \r0, \r1
+ vtrn.16 \r2, \r3
+ vtrn.16 \r4, \r5
+ vtrn.16 \r6, \r7
+ .endm
+
+/* chroma_mc8(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
+ .macro h264_chroma_mc8 avg=0
+ push {r4-r7, lr}
+ ldrd r4, [sp, #20]
+.if \avg
+ mov lr, r0
+.endif
+ pld [r1]
+ pld [r1, r2]
+
+ muls r7, r4, r5
+ rsb r6, r7, r5, lsl #3
+ rsb ip, r7, r4, lsl #3
+ sub r4, r7, r4, lsl #3
+ sub r4, r4, r5, lsl #3
+ add r4, r4, #64
+
+ beq 2f
+
+ add r5, r1, r2
+
+ vdup.8 d0, r4
+ lsl r4, r2, #1
+ vdup.8 d1, ip
+ vld1.64 {d4, d5}, [r1], r4
+ vdup.8 d2, r6
+ vld1.64 {d6, d7}, [r5], r4
+ vdup.8 d3, r7
+
+ vext.8 d5, d4, d5, #1
+ vext.8 d7, d6, d7, #1
+
+1: pld [r5]
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d5, d1
+ vld1.64 {d4, d5}, [r1], r4
+ vmlal.u8 q8, d6, d2
+ vext.8 d5, d4, d5, #1
+ vmlal.u8 q8, d7, d3
+ vmull.u8 q9, d6, d0
+ subs r3, r3, #2
+ vmlal.u8 q9, d7, d1
+ vmlal.u8 q9, d4, d2
+ vmlal.u8 q9, d5, d3
+ vrshrn.u16 d16, q8, #6
+ vld1.64 {d6, d7}, [r5], r4
+ pld [r1]
+ vrshrn.u16 d17, q9, #6
+.if \avg
+ vld1.64 {d20}, [lr,:64], r2
+ vld1.64 {d21}, [lr,:64], r2
+ vrhadd.u8 q8, q8, q10
+.endif
+ vext.8 d7, d6, d7, #1
+ vst1.64 {d16}, [r0,:64], r2
+ vst1.64 {d17}, [r0,:64], r2
+ bgt 1b
+
+ pop {r4-r7, pc}
+
+2: tst r6, r6
+ add ip, ip, r6
+ vdup.8 d0, r4
+ vdup.8 d1, ip
+
+ beq 4f
+
+ add r5, r1, r2
+ lsl r4, r2, #1
+ vld1.64 {d4}, [r1], r4
+ vld1.64 {d6}, [r5], r4
+
+3: pld [r5]
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d6, d1
+ vld1.64 {d4}, [r1], r4
+ vmull.u8 q9, d6, d0
+ vmlal.u8 q9, d4, d1
+ vld1.64 {d6}, [r5], r4
+ vrshrn.u16 d16, q8, #6
+ vrshrn.u16 d17, q9, #6
+.if \avg
+ vld1.64 {d20}, [lr,:64], r2
+ vld1.64 {d21}, [lr,:64], r2
+ vrhadd.u8 q8, q8, q10
+.endif
+ subs r3, r3, #2
+ pld [r1]
+ vst1.64 {d16}, [r0,:64], r2
+ vst1.64 {d17}, [r0,:64], r2
+ bgt 3b
+
+ pop {r4-r7, pc}
+
+4: vld1.64 {d4, d5}, [r1], r2
+ vld1.64 {d6, d7}, [r1], r2
+ vext.8 d5, d4, d5, #1
+ vext.8 d7, d6, d7, #1
+
+5: pld [r1]
+ subs r3, r3, #2
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d5, d1
+ vld1.64 {d4, d5}, [r1], r2
+ vmull.u8 q9, d6, d0
+ vmlal.u8 q9, d7, d1
+ pld [r1]
+ vext.8 d5, d4, d5, #1
+ vrshrn.u16 d16, q8, #6
+ vrshrn.u16 d17, q9, #6
+.if \avg
+ vld1.64 {d20}, [lr,:64], r2
+ vld1.64 {d21}, [lr,:64], r2
+ vrhadd.u8 q8, q8, q10
+.endif
+ vld1.64 {d6, d7}, [r1], r2
+ vext.8 d7, d6, d7, #1
+ vst1.64 {d16}, [r0,:64], r2
+ vst1.64 {d17}, [r0,:64], r2
+ bgt 5b
+
+ pop {r4-r7, pc}
+ .endm
+
+/* chroma_mc4(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) */
+ .macro h264_chroma_mc4 avg=0
+ push {r4-r7, lr}
+ ldrd r4, [sp, #20]
+.if \avg
+ mov lr, r0
+.endif
+ pld [r1]
+ pld [r1, r2]
+
+ muls r7, r4, r5
+ rsb r6, r7, r5, lsl #3
+ rsb ip, r7, r4, lsl #3
+ sub r4, r7, r4, lsl #3
+ sub r4, r4, r5, lsl #3
+ add r4, r4, #64
+
+ beq 2f
+
+ add r5, r1, r2
+
+ vdup.8 d0, r4
+ lsl r4, r2, #1
+ vdup.8 d1, ip
+ vld1.64 {d4}, [r1], r4
+ vdup.8 d2, r6
+ vld1.64 {d6}, [r5], r4
+ vdup.8 d3, r7
+
+ vext.8 d5, d4, d5, #1
+ vext.8 d7, d6, d7, #1
+ vtrn.32 d4, d5
+ vtrn.32 d6, d7
+
+ vtrn.32 d0, d1
+ vtrn.32 d2, d3
+
+1: pld [r5]
+ vmull.u8 q8, d4, d0
+ vmlal.u8 q8, d6, d2
+ vld1.64 {d4}, [r1], r4
+ vext.8 d5, d4, d5, #1
+ vtrn.32 d4, d5
+ vmull.u8 q9, d6, d0
+ vmlal.u8 q9, d4, d2
+ vld1.64 {d6}, [r5], r4
+ vadd.i16 d16, d16, d17
+ vadd.i16 d17, d18, d19
+ vrshrn.u16 d16, q8, #6
+ subs r3, r3, #2
+ pld [r1]
+.if \avg
+ vld1.32 {d20[0]}, [lr,:32], r2
+ vld1.32 {d20[1]}, [lr,:32], r2
+ vrhadd.u8 d16, d16, d20
+.endif
+ vext.8 d7, d6, d7, #1
+ vtrn.32 d6, d7
+ vst1.32 {d16[0]}, [r0,:32], r2
+ vst1.32 {d16[1]}, [r0,:32], r2
+ bgt 1b
+
+ pop {r4-r7, pc}
+
+2: tst r6, r6
+ add ip, ip, r6
+ vdup.8 d0, r4
+ vdup.8 d1, ip
+ vtrn.32 d0, d1
+
+ beq 4f
+
+ vext.32 d1, d0, d1, #1
+ add r5, r1, r2
+ lsl r4, r2, #1
+ vld1.32 {d4[0]}, [r1], r4
+ vld1.32 {d4[1]}, [r5], r4
+
+3: pld [r5]
+ vmull.u8 q8, d4, d0
+ vld1.32 {d4[0]}, [r1], r4
+ vmull.u8 q9, d4, d1
+ vld1.32 {d4[1]}, [r5], r4
+ vadd.i16 d16, d16, d17
+ vadd.i16 d17, d18, d19
+ vrshrn.u16 d16, q8, #6
+.if \avg
+ vld1.32 {d20[0]}, [lr,:32], r2
+ vld1.32 {d20[1]}, [lr,:32], r2
+ vrhadd.u8 d16, d16, d20
+.endif
+ subs r3, r3, #2
+ pld [r1]
+ vst1.32 {d16[0]}, [r0,:32], r2
+ vst1.32 {d16[1]}, [r0,:32], r2
+ bgt 3b
+
+ pop {r4-r7, pc}
+
+4: vld1.64 {d4}, [r1], r2
+ vld1.64 {d6}, [r1], r2
+ vext.8 d5, d4, d5, #1
+ vext.8 d7, d6, d7, #1
+ vtrn.32 d4, d5
+ vtrn.32 d6, d7
+
+5: vmull.u8 q8, d4, d0
+ vmull.u8 q9, d6, d0
+ subs r3, r3, #2
+ vld1.64 {d4}, [r1], r2
+ vext.8 d5, d4, d5, #1
+ vtrn.32 d4, d5
+ vadd.i16 d16, d16, d17
+ vadd.i16 d17, d18, d19
+ pld [r1]
+ vrshrn.u16 d16, q8, #6
+.if \avg
+ vld1.32 {d20[0]}, [lr,:32], r2
+ vld1.32 {d20[1]}, [lr,:32], r2
+ vrhadd.u8 d16, d16, d20
+.endif
+ vld1.64 {d6}, [r1], r2
+ vext.8 d7, d6, d7, #1
+ vtrn.32 d6, d7
+ pld [r1]
+ vst1.32 {d16[0]}, [r0,:32], r2
+ vst1.32 {d16[1]}, [r0,:32], r2
+ bgt 5b
+
+ pop {r4-r7, pc}
+ .endm
+
+ .text
+ .align
+
+function ff_put_h264_chroma_mc8_neon, export=1
+ h264_chroma_mc8
+ .endfunc
+
+function ff_avg_h264_chroma_mc8_neon, export=1
+ h264_chroma_mc8 avg=1
+ .endfunc
+
+function ff_put_h264_chroma_mc4_neon, export=1
+ h264_chroma_mc4
+ .endfunc
+
+function ff_avg_h264_chroma_mc4_neon, export=1
+ h264_chroma_mc4 avg=1
+ .endfunc
+
+ /* H.264 loop filter */
+
+ .macro h264_loop_filter_start
+ ldr ip, [sp]
+ tst r2, r2
+ ldr ip, [ip]
+ tstne r3, r3
+ vmov.32 d24[0], ip
+ and ip, ip, ip, lsl #16
+ bxeq lr
+ ands ip, ip, ip, lsl #8
+ bxlt lr
+ .endm
+
+ .macro align_push_regs
+ and ip, sp, #15
+ add ip, ip, #32
+ sub sp, sp, ip
+ vst1.64 {d12-d15}, [sp,:128]
+ sub sp, sp, #32
+ vst1.64 {d8-d11}, [sp,:128]
+ .endm
+
+ .macro align_pop_regs
+ vld1.64 {d8-d11}, [sp,:128]!
+ vld1.64 {d12-d15}, [sp,:128], ip
+ .endm
+
+ .macro h264_loop_filter_luma
+ vdup.8 q11, r2 @ alpha
+ vmovl.u8 q12, d24
+ vabd.u8 q6, q8, q0 @ abs(p0 - q0)
+ vmovl.u16 q12, d24
+ vabd.u8 q14, q9, q8 @ abs(p1 - p0)
+ vsli.16 q12, q12, #8
+ vabd.u8 q15, q1, q0 @ abs(q1 - q0)
+ vsli.32 q12, q12, #16
+ vclt.u8 q6, q6, q11 @ < alpha
+ vdup.8 q11, r3 @ beta
+ vclt.s8 q7, q12, #0
+ vclt.u8 q14, q14, q11 @ < beta
+ vclt.u8 q15, q15, q11 @ < beta
+ vbic q6, q6, q7
+ vabd.u8 q4, q10, q8 @ abs(p2 - p0)
+ vand q6, q6, q14
+ vabd.u8 q5, q2, q0 @ abs(q2 - q0)
+ vclt.u8 q4, q4, q11 @ < beta
+ vand q6, q6, q15
+ vclt.u8 q5, q5, q11 @ < beta
+ vand q4, q4, q6
+ vand q5, q5, q6
+ vand q12, q12, q6
+ vrhadd.u8 q14, q8, q0
+ vsub.i8 q6, q12, q4
+ vqadd.u8 q7, q9, q12
+ vhadd.u8 q10, q10, q14
+ vsub.i8 q6, q6, q5
+ vhadd.u8 q14, q2, q14
+ vmin.u8 q7, q7, q10
+ vqsub.u8 q11, q9, q12
+ vqadd.u8 q2, q1, q12
+ vmax.u8 q7, q7, q11
+ vqsub.u8 q11, q1, q12
+ vmin.u8 q14, q2, q14
+ vmovl.u8 q2, d0
+ vmax.u8 q14, q14, q11
+ vmovl.u8 q10, d1
+ vsubw.u8 q2, q2, d16
+ vsubw.u8 q10, q10, d17
+ vshl.i16 q2, q2, #2
+ vshl.i16 q10, q10, #2
+ vaddw.u8 q2, q2, d18
+ vaddw.u8 q10, q10, d19
+ vsubw.u8 q2, q2, d2
+ vsubw.u8 q10, q10, d3
+ vrshrn.i16 d4, q2, #3
+ vrshrn.i16 d5, q10, #3
+ vbsl q4, q7, q9
+ vbsl q5, q14, q1
+ vneg.s8 q7, q6
+ vmovl.u8 q14, d16
+ vmin.s8 q2, q2, q6
+ vmovl.u8 q6, d17
+ vmax.s8 q2, q2, q7
+ vmovl.u8 q11, d0
+ vmovl.u8 q12, d1
+ vaddw.s8 q14, q14, d4
+ vaddw.s8 q6, q6, d5
+ vsubw.s8 q11, q11, d4
+ vsubw.s8 q12, q12, d5
+ vqmovun.s16 d16, q14
+ vqmovun.s16 d17, q6
+ vqmovun.s16 d0, q11
+ vqmovun.s16 d1, q12
+ .endm
+
+function ff_h264_v_loop_filter_luma_neon, export=1
+ h264_loop_filter_start
+
+ vld1.64 {d0, d1}, [r0,:128], r1
+ vld1.64 {d2, d3}, [r0,:128], r1
+ vld1.64 {d4, d5}, [r0,:128], r1
+ sub r0, r0, r1, lsl #2
+ sub r0, r0, r1, lsl #1
+ vld1.64 {d20,d21}, [r0,:128], r1
+ vld1.64 {d18,d19}, [r0,:128], r1
+ vld1.64 {d16,d17}, [r0,:128], r1
+
+ align_push_regs
+
+ h264_loop_filter_luma
+
+ sub r0, r0, r1, lsl #1
+ vst1.64 {d8, d9}, [r0,:128], r1
+ vst1.64 {d16,d17}, [r0,:128], r1
+ vst1.64 {d0, d1}, [r0,:128], r1
+ vst1.64 {d10,d11}, [r0,:128]
+
+ align_pop_regs
+ bx lr
+ .endfunc
+
+function ff_h264_h_loop_filter_luma_neon, export=1
+ h264_loop_filter_start
+
+ sub r0, r0, #4
+ vld1.64 {d6}, [r0], r1
+ vld1.64 {d20}, [r0], r1
+ vld1.64 {d18}, [r0], r1
+ vld1.64 {d16}, [r0], r1
+ vld1.64 {d0}, [r0], r1
+ vld1.64 {d2}, [r0], r1
+ vld1.64 {d4}, [r0], r1
+ vld1.64 {d26}, [r0], r1
+ vld1.64 {d7}, [r0], r1
+ vld1.64 {d21}, [r0], r1
+ vld1.64 {d19}, [r0], r1
+ vld1.64 {d17}, [r0], r1
+ vld1.64 {d1}, [r0], r1
+ vld1.64 {d3}, [r0], r1
+ vld1.64 {d5}, [r0], r1
+ vld1.64 {d27}, [r0], r1
+
+ transpose_8x8 q3, q10, q9, q8, q0, q1, q2, q13
+
+ align_push_regs
+ sub sp, sp, #16
+ vst1.64 {d4, d5}, [sp,:128]
+ sub sp, sp, #16
+ vst1.64 {d20,d21}, [sp,:128]
+
+ h264_loop_filter_luma
+
+ vld1.64 {d20,d21}, [sp,:128]!
+ vld1.64 {d4, d5}, [sp,:128]!
+
+ transpose_8x8 q3, q10, q4, q8, q0, q5, q2, q13
+
+ sub r0, r0, r1, lsl #4
+ vst1.64 {d6}, [r0], r1
+ vst1.64 {d20}, [r0], r1
+ vst1.64 {d8}, [r0], r1
+ vst1.64 {d16}, [r0], r1
+ vst1.64 {d0}, [r0], r1
+ vst1.64 {d10}, [r0], r1
+ vst1.64 {d4}, [r0], r1
+ vst1.64 {d26}, [r0], r1
+ vst1.64 {d7}, [r0], r1
+ vst1.64 {d21}, [r0], r1
+ vst1.64 {d9}, [r0], r1
+ vst1.64 {d17}, [r0], r1
+ vst1.64 {d1}, [r0], r1
+ vst1.64 {d11}, [r0], r1
+ vst1.64 {d5}, [r0], r1
+ vst1.64 {d27}, [r0], r1
+
+ align_pop_regs
+ bx lr
+ .endfunc
+
+ .macro h264_loop_filter_chroma
+ vdup.8 d22, r2 @ alpha
+ vmovl.u8 q12, d24
+ vabd.u8 d26, d16, d0 @ abs(p0 - q0)
+ vmovl.u8 q2, d0
+ vabd.u8 d28, d18, d16 @ abs(p1 - p0)
+ vsubw.u8 q2, q2, d16
+ vsli.16 d24, d24, #8
+ vshl.i16 q2, q2, #2
+ vabd.u8 d30, d2, d0 @ abs(q1 - q0)
+ vaddw.u8 q2, q2, d18
+ vclt.u8 d26, d26, d22 @ < alpha
+ vsubw.u8 q2, q2, d2
+ vdup.8 d22, r3 @ beta
+ vclt.s8 d25, d24, #0
+ vrshrn.i16 d4, q2, #3
+ vclt.u8 d28, d28, d22 @ < beta
+ vbic d26, d26, d25
+ vclt.u8 d30, d30, d22 @ < beta
+ vand d26, d26, d28
+ vneg.s8 d25, d24
+ vand d26, d26, d30
+ vmin.s8 d4, d4, d24
+ vmovl.u8 q14, d16
+ vand d4, d4, d26
+ vmax.s8 d4, d4, d25
+ vmovl.u8 q11, d0
+ vaddw.s8 q14, q14, d4
+ vsubw.s8 q11, q11, d4
+ vqmovun.s16 d16, q14
+ vqmovun.s16 d0, q11
+ .endm
+
+function ff_h264_v_loop_filter_chroma_neon, export=1
+ h264_loop_filter_start
+
+ sub r0, r0, r1, lsl #1
+ vld1.64 {d18}, [r0,:64], r1
+ vld1.64 {d16}, [r0,:64], r1
+ vld1.64 {d0}, [r0,:64], r1
+ vld1.64 {d2}, [r0,:64]
+
+ h264_loop_filter_chroma
+
+ sub r0, r0, r1, lsl #1
+ vst1.64 {d16}, [r0,:64], r1
+ vst1.64 {d0}, [r0,:64], r1
+
+ bx lr
+ .endfunc
+
+function ff_h264_h_loop_filter_chroma_neon, export=1
+ h264_loop_filter_start
+
+ sub r0, r0, #2
+ vld1.32 {d18[0]}, [r0], r1
+ vld1.32 {d16[0]}, [r0], r1
+ vld1.32 {d0[0]}, [r0], r1
+ vld1.32 {d2[0]}, [r0], r1
+ vld1.32 {d18[1]}, [r0], r1
+ vld1.32 {d16[1]}, [r0], r1
+ vld1.32 {d0[1]}, [r0], r1
+ vld1.32 {d2[1]}, [r0], r1
+
+ vtrn.16 d18, d0
+ vtrn.16 d16, d2
+ vtrn.8 d18, d16
+ vtrn.8 d0, d2
+
+ h264_loop_filter_chroma
+
+ vtrn.16 d18, d0
+ vtrn.16 d16, d2
+ vtrn.8 d18, d16
+ vtrn.8 d0, d2
+
+ sub r0, r0, r1, lsl #3
+ vst1.32 {d18[0]}, [r0], r1
+ vst1.32 {d16[0]}, [r0], r1
+ vst1.32 {d0[0]}, [r0], r1
+ vst1.32 {d2[0]}, [r0], r1
+ vst1.32 {d18[1]}, [r0], r1
+ vst1.32 {d16[1]}, [r0], r1
+ vst1.32 {d0[1]}, [r0], r1
+ vst1.32 {d2[1]}, [r0], r1
+
+ bx lr
+ .endfunc
+
+ /* H.264 qpel MC */
+
+ .macro lowpass_const r
+ movw \r, #5
+ movt \r, #20
+ vmov.32 d6[0], \r
+ .endm
+
+ .macro lowpass_8 r0, r1, r2, r3, d0, d1, narrow=1
+.if \narrow
+ t0 .req q0
+ t1 .req q8
+.else
+ t0 .req \d0
+ t1 .req \d1
+.endif
+ vext.8 d2, \r0, \r1, #2
+ vext.8 d3, \r0, \r1, #3
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, \r0, \r1, #1
+ vext.8 d5, \r0, \r1, #4
+ vaddl.u8 q2, d4, d5
+ vext.8 d30, \r0, \r1, #5
+ vaddl.u8 t0, \r0, d30
+ vext.8 d18, \r2, \r3, #2
+ vmla.i16 t0, q1, d6[1]
+ vext.8 d19, \r2, \r3, #3
+ vaddl.u8 q9, d18, d19
+ vext.8 d20, \r2, \r3, #1
+ vmls.i16 t0, q2, d6[0]
+ vext.8 d21, \r2, \r3, #4
+ vaddl.u8 q10, d20, d21
+ vext.8 d31, \r2, \r3, #5
+ vaddl.u8 t1, \r2, d31
+ vmla.i16 t1, q9, d6[1]
+ vmls.i16 t1, q10, d6[0]
+.if \narrow
+ vqrshrun.s16 \d0, t0, #5
+ vqrshrun.s16 \d1, t1, #5
+.endif
+ .unreq t0
+ .unreq t1
+ .endm
+
+ .macro lowpass_8_1 r0, r1, d0, narrow=1
+.if \narrow
+ t0 .req q0
+.else
+ t0 .req \d0
+.endif
+ vext.8 d2, \r0, \r1, #2
+ vext.8 d3, \r0, \r1, #3
+ vaddl.u8 q1, d2, d3
+ vext.8 d4, \r0, \r1, #1
+ vext.8 d5, \r0, \r1, #4
+ vaddl.u8 q2, d4, d5
+ vext.8 d30, \r0, \r1, #5
+ vaddl.u8 t0, \r0, d30
+ vmla.i16 t0, q1, d6[1]
+ vmls.i16 t0, q2, d6[0]
+.if \narrow
+ vqrshrun.s16 \d0, t0, #5
+.endif
+ .unreq t0
+ .endm
+
+ .macro lowpass_8.16 r0, r1, l0, h0, l1, h1, d
+ vext.16 q1, \r0, \r1, #2
+ vext.16 q0, \r0, \r1, #3
+ vaddl.s16 q9, d2, d0
+ vext.16 q2, \r0, \r1, #1
+ vaddl.s16 q1, d3, d1
+ vext.16 q3, \r0, \r1, #4
+ vaddl.s16 q10, d4, d6
+ vext.16 \r1, \r0, \r1, #5
+ vaddl.s16 q2, d5, d7
+ vaddl.s16 q0, \h0, \h1
+ vaddl.s16 q8, \l0, \l1
+
+ vshl.i32 q3, q9, #4
+ vshl.i32 q9, q9, #2
+ vshl.i32 q15, q10, #2
+ vadd.i32 q9, q9, q3
+ vadd.i32 q10, q10, q15
+
+ vshl.i32 q3, q1, #4
+ vshl.i32 q1, q1, #2
+ vshl.i32 q15, q2, #2
+ vadd.i32 q1, q1, q3
+ vadd.i32 q2, q2, q15
+
+ vadd.i32 q9, q9, q8
+ vsub.i32 q9, q9, q10
+
+ vadd.i32 q1, q1, q0
+ vsub.i32 q1, q1, q2
+
+ vrshrn.s32 d18, q9, #10
+ vrshrn.s32 d19, q1, #10
+
+ vqmovun.s16 \d, q9
+ .endm
+
+function put_h264_qpel16_h_lowpass_neon_packed
+ mov r4, lr
+ mov ip, #16
+ mov r3, #8
+ bl put_h264_qpel8_h_lowpass_neon
+ sub r1, r1, r2, lsl #4
+ add r1, r1, #8
+ mov ip, #16
+ mov lr, r4
+ b put_h264_qpel8_h_lowpass_neon
+ .endfunc
+
+function put_h264_qpel16_h_lowpass_neon
+ push {lr}
+ mov ip, #16
+ bl put_h264_qpel8_h_lowpass_neon
+ sub r0, r0, r3, lsl #4
+ sub r1, r1, r2, lsl #4
+ add r0, r0, #8
+ add r1, r1, #8
+ mov ip, #16
+ pop {lr}
+ .endfunc
+
+function put_h264_qpel8_h_lowpass_neon
+1: vld1.64 {d0, d1}, [r1], r2
+ vld1.64 {d16,d17}, [r1], r2
+ subs ip, ip, #2
+ lowpass_8 d0, d1, d16, d17, d0, d16
+ vst1.64 {d0}, [r0,:64], r3
+ vst1.64 {d16}, [r0,:64], r3
+ bne 1b
+ bx lr
+ .endfunc
+
+function put_h264_qpel16_h_lowpass_l2_neon
+ push {lr}
+ mov ip, #16
+ bl put_h264_qpel8_h_lowpass_l2_neon
+ sub r0, r0, r2, lsl #4
+ sub r1, r1, r2, lsl #4
+ sub r3, r3, r2, lsl #4
+ add r0, r0, #8
+ add r1, r1, #8
+ add r3, r3, #8
+ mov ip, #16
+ pop {lr}
+ .endfunc
+
+function put_h264_qpel8_h_lowpass_l2_neon
+1: vld1.64 {d0, d1}, [r1], r2
+ vld1.64 {d16,d17}, [r1], r2
+ vld1.64 {d28}, [r3], r2
+ vld1.64 {d29}, [r3], r2
+ subs ip, ip, #2
+ lowpass_8 d0, d1, d16, d17, d0, d1
+ vrhadd.u8 q0, q0, q14
+ vst1.64 {d0}, [r0,:64], r2
+ vst1.64 {d1}, [r0,:64], r2
+ bne 1b
+ bx lr
+ .endfunc
+
+function put_h264_qpel16_v_lowpass_neon_packed
+ mov r4, lr
+ mov r2, #8
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r1, r1, r3, lsl #4
+ sub r1, r1, r3, lsl #2
+ add r1, r1, #8
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ mov lr, r4
+ b put_h264_qpel8_v_lowpass_neon
+ .endfunc
+
+function put_h264_qpel16_v_lowpass_neon
+ mov r4, lr
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ sub r1, r1, r3, lsl #4
+ sub r1, r1, r3, lsl #2
+ add r1, r1, #8
+ bl put_h264_qpel8_v_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ mov lr, r4
+ .endfunc
+
+function put_h264_qpel8_v_lowpass_neon
+ vld1.64 {d8}, [r1], r3
+ vld1.64 {d10}, [r1], r3
+ vld1.64 {d12}, [r1], r3
+ vld1.64 {d14}, [r1], r3
+ vld1.64 {d22}, [r1], r3
+ vld1.64 {d24}, [r1], r3
+ vld1.64 {d26}, [r1], r3
+ vld1.64 {d28}, [r1], r3
+ vld1.64 {d9}, [r1], r3
+ vld1.64 {d11}, [r1], r3
+ vld1.64 {d13}, [r1], r3
+ vld1.64 {d15}, [r1], r3
+ vld1.64 {d23}, [r1]
+
+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
+ lowpass_8 d8, d9, d10, d11, d8, d10
+ lowpass_8 d12, d13, d14, d15, d12, d14
+ lowpass_8 d22, d23, d24, d25, d22, d24
+ lowpass_8 d26, d27, d28, d29, d26, d28
+ transpose_8x8 d8, d10, d12, d14, d22, d24, d26, d28
+
+ vst1.64 {d8}, [r0,:64], r2
+ vst1.64 {d10}, [r0,:64], r2
+ vst1.64 {d12}, [r0,:64], r2
+ vst1.64 {d14}, [r0,:64], r2
+ vst1.64 {d22}, [r0,:64], r2
+ vst1.64 {d24}, [r0,:64], r2
+ vst1.64 {d26}, [r0,:64], r2
+ vst1.64 {d28}, [r0,:64], r2
+
+ bx lr
+ .endfunc
+
+function put_h264_qpel16_v_lowpass_l2_neon
+ mov r4, lr
+ bl put_h264_qpel8_v_lowpass_l2_neon
+ sub r1, r1, r3, lsl #2
+ bl put_h264_qpel8_v_lowpass_l2_neon
+ sub r0, r0, r3, lsl #4
+ sub ip, ip, r2, lsl #4
+ add r0, r0, #8
+ add ip, ip, #8
+ sub r1, r1, r3, lsl #4
+ sub r1, r1, r3, lsl #2
+ add r1, r1, #8
+ bl put_h264_qpel8_v_lowpass_l2_neon
+ sub r1, r1, r3, lsl #2
+ mov lr, r4
+ .endfunc
+
+function put_h264_qpel8_v_lowpass_l2_neon
+ vld1.64 {d8}, [r1], r3
+ vld1.64 {d10}, [r1], r3
+ vld1.64 {d12}, [r1], r3
+ vld1.64 {d14}, [r1], r3
+ vld1.64 {d22}, [r1], r3
+ vld1.64 {d24}, [r1], r3
+ vld1.64 {d26}, [r1], r3
+ vld1.64 {d28}, [r1], r3
+ vld1.64 {d9}, [r1], r3
+ vld1.64 {d11}, [r1], r3
+ vld1.64 {d13}, [r1], r3
+ vld1.64 {d15}, [r1], r3
+ vld1.64 {d23}, [r1]
+
+ transpose_8x8 q4, q5, q6, q7, q11, q12, q13, q14
+ lowpass_8 d8, d9, d10, d11, d8, d9
+ lowpass_8 d12, d13, d14, d15, d12, d13
+ lowpass_8 d22, d23, d24, d25, d22, d23
+ lowpass_8 d26, d27, d28, d29, d26, d27
+ transpose_8x8 d8, d9, d12, d13, d22, d23, d26, d27
+
+ vld1.64 {d0}, [ip], r2
+ vld1.64 {d1}, [ip], r2
+ vld1.64 {d2}, [ip], r2
+ vld1.64 {d3}, [ip], r2
+ vld1.64 {d4}, [ip], r2
+ vrhadd.u8 q0, q0, q4
+ vld1.64 {d5}, [ip], r2
+ vrhadd.u8 q1, q1, q6
+ vld1.64 {d10}, [ip], r2
+ vrhadd.u8 q2, q2, q11
+ vld1.64 {d11}, [ip], r2
+
+ vst1.64 {d0}, [r0,:64], r3
+ vst1.64 {d1}, [r0,:64], r3
+ vrhadd.u8 q5, q5, q13
+ vst1.64 {d2}, [r0,:64], r3
+ vst1.64 {d3}, [r0,:64], r3
+ vst1.64 {d4}, [r0,:64], r3
+ vst1.64 {d5}, [r0,:64], r3
+ vst1.64 {d10}, [r0,:64], r3
+ vst1.64 {d11}, [r0,:64], r3
+
+ bx lr
+ .endfunc
+
+function put_h264_qpel8_hv_lowpass_neon_top
+ lowpass_const ip
+ mov ip, #12
+1: vld1.64 {d0, d1}, [r1], r3
+ vld1.64 {d16,d17}, [r1], r3
+ subs ip, ip, #2
+ lowpass_8 d0, d1, d16, d17, q11, q12, narrow=0
+ vst1.64 {d22-d25}, [r4,:128]!
+ bne 1b
+
+ vld1.64 {d0, d1}, [r1]
+ lowpass_8_1 d0, d1, q12, narrow=0
+
+ mov ip, #-16
+ add r4, r4, ip
+ vld1.64 {d30,d31}, [r4,:128], ip
+ vld1.64 {d20,d21}, [r4,:128], ip
+ vld1.64 {d18,d19}, [r4,:128], ip
+ vld1.64 {d16,d17}, [r4,:128], ip
+ vld1.64 {d14,d15}, [r4,:128], ip
+ vld1.64 {d12,d13}, [r4,:128], ip
+ vld1.64 {d10,d11}, [r4,:128], ip
+ vld1.64 {d8, d9}, [r4,:128], ip
+ vld1.64 {d6, d7}, [r4,:128], ip
+ vld1.64 {d4, d5}, [r4,:128], ip
+ vld1.64 {d2, d3}, [r4,:128], ip
+ vld1.64 {d0, d1}, [r4,:128]
+
+ swap4 d1, d3, d5, d7, d8, d10, d12, d14
+ transpose16_4x4 q0, q1, q2, q3, q4, q5, q6, q7
+
+ swap4 d17, d19, d21, d31, d24, d26, d28, d22
+ transpose16_4x4 q8, q9, q10, q15, q12, q13, q14, q11
+
+ vst1.64 {d30,d31}, [r4,:128]!
+ vst1.64 {d6, d7}, [r4,:128]!
+ vst1.64 {d20,d21}, [r4,:128]!
+ vst1.64 {d4, d5}, [r4,:128]!
+ vst1.64 {d18,d19}, [r4,:128]!
+ vst1.64 {d2, d3}, [r4,:128]!
+ vst1.64 {d16,d17}, [r4,:128]!
+ vst1.64 {d0, d1}, [r4,:128]
+
+ lowpass_8.16 q4, q12, d8, d9, d24, d25, d8
+ lowpass_8.16 q5, q13, d10, d11, d26, d27, d9
+ lowpass_8.16 q6, q14, d12, d13, d28, d29, d10
+ lowpass_8.16 q7, q11, d14, d15, d22, d23, d11
+
+ vld1.64 {d16,d17}, [r4,:128], ip
+ vld1.64 {d30,d31}, [r4,:128], ip
+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d12
+ vld1.64 {d16,d17}, [r4,:128], ip
+ vld1.64 {d30,d31}, [r4,:128], ip
+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d13
+ vld1.64 {d16,d17}, [r4,:128], ip
+ vld1.64 {d30,d31}, [r4,:128], ip
+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d14
+ vld1.64 {d16,d17}, [r4,:128], ip
+ vld1.64 {d30,d31}, [r4,:128]
+ lowpass_8.16 q8, q15, d16, d17, d30, d31, d15
+
+ transpose_8x8 d12, d13, d14, d15, d8, d9, d10, d11
+
+ bx lr
+ .endfunc
+
+function put_h264_qpel8_hv_lowpass_neon
+ mov r10, lr
+ bl put_h264_qpel8_hv_lowpass_neon_top
+ vst1.64 {d12}, [r0,:64], r2
+ vst1.64 {d13}, [r0,:64], r2
+ vst1.64 {d14}, [r0,:64], r2
+ vst1.64 {d15}, [r0,:64], r2
+ vst1.64 {d8}, [r0,:64], r2
+ vst1.64 {d9}, [r0,:64], r2
+ vst1.64 {d10}, [r0,:64], r2
+ vst1.64 {d11}, [r0,:64], r2
+
+ mov lr, r10
+ bx lr
+ .endfunc
+
+function put_h264_qpel8_hv_lowpass_l2_neon
+ mov r10, lr
+ bl put_h264_qpel8_hv_lowpass_neon_top
+
+ vld1.64 {d0, d1}, [r2,:128]!
+ vld1.64 {d2, d3}, [r2,:128]!
+ vrhadd.u8 q0, q0, q6
+ vld1.64 {d4, d5}, [r2,:128]!
+ vrhadd.u8 q1, q1, q7
+ vld1.64 {d6, d7}, [r2,:128]!
+ vrhadd.u8 q2, q2, q4
+
+ vst1.64 {d0}, [r0,:64], r3
+ vrhadd.u8 q3, q3, q5
+ vst1.64 {d1}, [r0,:64], r3
+ vst1.64 {d2}, [r0,:64], r3
+ vst1.64 {d3}, [r0,:64], r3
+ vst1.64 {d4}, [r0,:64], r3
+ vst1.64 {d5}, [r0,:64], r3
+ vst1.64 {d6}, [r0,:64], r3
+ vst1.64 {d7}, [r0,:64], r3
+
+ mov lr, r10
+ bx lr
+ .endfunc
+
+function put_h264_qpel16_hv_lowpass_neon
+ mov r9, lr
+ bl put_h264_qpel8_hv_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ bl put_h264_qpel8_hv_lowpass_neon
+ sub r1, r1, r3, lsl #4
+ sub r1, r1, r3, lsl #2
+ add r1, r1, #8
+ sub r0, r0, r2, lsl #4
+ add r0, r0, #8
+ bl put_h264_qpel8_hv_lowpass_neon
+ sub r1, r1, r3, lsl #2
+ mov lr, r9
+ b put_h264_qpel8_hv_lowpass_neon
+ .endfunc
+
+function put_h264_qpel16_hv_lowpass_l2_neon
+ mov r9, lr
+ sub r2, r4, #256
+ bl put_h264_qpel8_hv_lowpass_l2_neon
+ sub r1, r1, r3, lsl #2
+ bl put_h264_qpel8_hv_lowpass_l2_neon
+ sub r1, r1, r3, lsl #4
+ sub r1, r1, r3, lsl #2
+ add r1, r1, #8
+ sub r0, r0, r3, lsl #4
+ add r0, r0, #8
+ bl put_h264_qpel8_hv_lowpass_l2_neon
+ sub r1, r1, r3, lsl #2
+ mov lr, r9
+ b put_h264_qpel8_hv_lowpass_l2_neon
+ .endfunc
+
+function ff_put_h264_qpel8_mc10_neon, export=1
+ lowpass_const r3
+ mov r3, r1
+ sub r1, r1, #2
+ mov ip, #8
+ b put_h264_qpel8_h_lowpass_l2_neon
+ .endfunc
+
+function ff_put_h264_qpel8_mc20_neon, export=1
+ lowpass_const r3
+ sub r1, r1, #2
+ mov r3, r2
+ mov ip, #8
+ b put_h264_qpel8_h_lowpass_neon
+ .endfunc
+
+function ff_put_h264_qpel8_mc30_neon, export=1
+ lowpass_const r3
+ add r3, r1, #1
+ sub r1, r1, #2
+ mov ip, #8
+ b put_h264_qpel8_h_lowpass_l2_neon
+ .endfunc
+
+function ff_put_h264_qpel8_mc01_neon, export=1
+ push {lr}
+ mov ip, r1
+put_h264_qpel8_mc01:
+ lowpass_const r3
+ mov r3, r2
+ sub r1, r1, r2, lsl #1
+ vpush {d8-d15}
+ bl put_h264_qpel8_v_lowpass_l2_neon
+ vpop {d8-d15}
+ pop {pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc11_neon, export=1
+ push {r0, r1, r2, lr}
+put_h264_qpel8_mc11:
+ lowpass_const r3
+ sub sp, sp, #64
+ mov r0, sp
+ sub r1, r1, #2
+ mov r3, #8
+ mov ip, #8
+ vpush {d8-d15}
+ bl put_h264_qpel8_h_lowpass_neon
+ ldrd r0, [sp, #128]
+ mov r3, r2
+ add ip, sp, #64
+ sub r1, r1, r2, lsl #1
+ mov r2, #8
+ bl put_h264_qpel8_v_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, sp, #76
+ pop {pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc21_neon, export=1
+ push {r0, r1, r4, r10, r11, lr}
+put_h264_qpel8_mc21:
+ lowpass_const r3
+ mov r11, sp
+ bic sp, sp, #15
+ sub sp, sp, #(8*8+16*12)
+ sub r1, r1, #2
+ mov r3, #8
+ mov r0, sp
+ mov ip, #8
+ vpush {d8-d15}
+ bl put_h264_qpel8_h_lowpass_neon
+ mov r4, r0
+ ldrd r0, [r11]
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, r2
+ sub r2, r4, #64
+ bl put_h264_qpel8_hv_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, r11, #8
+ pop {r4, r10, r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc31_neon, export=1
+ add r1, r1, #1
+ push {r0, r1, r2, lr}
+ sub r1, r1, #1
+ b put_h264_qpel8_mc11
+ .endfunc
+
+function ff_put_h264_qpel8_mc02_neon, export=1
+ push {lr}
+ lowpass_const r3
+ sub r1, r1, r2, lsl #1
+ mov r3, r2
+ vpush {d8-d15}
+ bl put_h264_qpel8_v_lowpass_neon
+ vpop {d8-d15}
+ pop {pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc12_neon, export=1
+ push {r0, r1, r4, r10, r11, lr}
+put_h264_qpel8_mc12:
+ lowpass_const r3
+ mov r11, sp
+ bic sp, sp, #15
+ sub sp, sp, #(8*8+16*12)
+ sub r1, r1, r2, lsl #1
+ mov r3, r2
+ mov r2, #8
+ mov r0, sp
+ vpush {d8-d15}
+ bl put_h264_qpel8_v_lowpass_neon
+ mov r4, r0
+ ldrd r0, [r11]
+ sub r1, r1, r3, lsl #1
+ sub r1, r1, #2
+ sub r2, r4, #64
+ bl put_h264_qpel8_hv_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, r11, #8
+ pop {r4, r10, r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc22_neon, export=1
+ push {r4, r10, r11, lr}
+ mov r11, sp
+ bic sp, sp, #15
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, r2
+ sub sp, sp, #(16*12)
+ mov r4, sp
+ vpush {d8-d15}
+ bl put_h264_qpel8_hv_lowpass_neon
+ vpop {d8-d15}
+ mov sp, r11
+ pop {r4, r10, r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel8_mc32_neon, export=1
+ push {r0, r1, r4, r10, r11, lr}
+ add r1, r1, #1
+ b put_h264_qpel8_mc12
+ .endfunc
+
+function ff_put_h264_qpel8_mc03_neon, export=1
+ push {lr}
+ add ip, r1, r2
+ b put_h264_qpel8_mc01
+ .endfunc
+
+function ff_put_h264_qpel8_mc13_neon, export=1
+ push {r0, r1, r2, lr}
+ add r1, r1, r2
+ b put_h264_qpel8_mc11
+ .endfunc
+
+function ff_put_h264_qpel8_mc23_neon, export=1
+ push {r0, r1, r4, r10, r11, lr}
+ add r1, r1, r2
+ b put_h264_qpel8_mc21
+ .endfunc
+
+function ff_put_h264_qpel8_mc33_neon, export=1
+ add r1, r1, #1
+ push {r0, r1, r2, lr}
+ add r1, r1, r2
+ sub r1, r1, #1
+ b put_h264_qpel8_mc11
+ .endfunc
+
+function ff_put_h264_qpel16_mc10_neon, export=1
+ lowpass_const r3
+ mov r3, r1
+ sub r1, r1, #2
+ b put_h264_qpel16_h_lowpass_l2_neon
+ .endfunc
+
+function ff_put_h264_qpel16_mc20_neon, export=1
+ lowpass_const r3
+ sub r1, r1, #2
+ mov r3, r2
+ b put_h264_qpel16_h_lowpass_neon
+ .endfunc
+
+function ff_put_h264_qpel16_mc30_neon, export=1
+ lowpass_const r3
+ add r3, r1, #1
+ sub r1, r1, #2
+ b put_h264_qpel16_h_lowpass_l2_neon
+ .endfunc
+
+function ff_put_h264_qpel16_mc01_neon, export=1
+ push {r4, lr}
+ mov ip, r1
+put_h264_qpel16_mc01:
+ lowpass_const r3
+ mov r3, r2
+ sub r1, r1, r2, lsl #1
+ vpush {d8-d15}
+ bl put_h264_qpel16_v_lowpass_l2_neon
+ vpop {d8-d15}
+ pop {r4, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc11_neon, export=1
+ push {r0, r1, r4, lr}
+put_h264_qpel16_mc11:
+ lowpass_const r3
+ sub sp, sp, #256
+ mov r0, sp
+ sub r1, r1, #2
+ mov r3, #16
+ vpush {d8-d15}
+ bl put_h264_qpel16_h_lowpass_neon
+ add r0, sp, #256
+ ldrd r0, [r0, #64]
+ mov r3, r2
+ add ip, sp, #64
+ sub r1, r1, r2, lsl #1
+ mov r2, #16
+ bl put_h264_qpel16_v_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, sp, #(256+8)
+ pop {r4, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc21_neon, export=1
+ push {r0, r1, r4-r5, r9-r11, lr}
+put_h264_qpel16_mc21:
+ lowpass_const r3
+ mov r11, sp
+ bic sp, sp, #15
+ sub sp, sp, #(16*16+16*12)
+ sub r1, r1, #2
+ mov r0, sp
+ vpush {d8-d15}
+ bl put_h264_qpel16_h_lowpass_neon_packed
+ mov r4, r0
+ ldrd r0, [r11]
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, r2
+ bl put_h264_qpel16_hv_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, r11, #8
+ pop {r4-r5, r9-r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc31_neon, export=1
+ add r1, r1, #1
+ push {r0, r1, r4, lr}
+ sub r1, r1, #1
+ b put_h264_qpel16_mc11
+ .endfunc
+
+function ff_put_h264_qpel16_mc02_neon, export=1
+ push {r4, lr}
+ lowpass_const r3
+ sub r1, r1, r2, lsl #1
+ mov r3, r2
+ vpush {d8-d15}
+ bl put_h264_qpel16_v_lowpass_neon
+ vpop {d8-d15}
+ pop {r4, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc12_neon, export=1
+ push {r0, r1, r4-r5, r9-r11, lr}
+put_h264_qpel16_mc12:
+ lowpass_const r3
+ mov r11, sp
+ bic sp, sp, #15
+ sub sp, sp, #(16*16+16*12)
+ sub r1, r1, r2, lsl #1
+ mov r0, sp
+ mov r3, r2
+ vpush {d8-d15}
+ bl put_h264_qpel16_v_lowpass_neon_packed
+ mov r4, r0
+ ldrd r0, [r11]
+ sub r1, r1, r3, lsl #1
+ sub r1, r1, #2
+ mov r2, r3
+ bl put_h264_qpel16_hv_lowpass_l2_neon
+ vpop {d8-d15}
+ add sp, r11, #8
+ pop {r4-r5, r9-r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc22_neon, export=1
+ push {r4, r9-r11, lr}
+ lowpass_const r3
+ mov r11, sp
+ bic sp, sp, #15
+ sub r1, r1, r2, lsl #1
+ sub r1, r1, #2
+ mov r3, r2
+ sub sp, sp, #(16*12)
+ mov r4, sp
+ vpush {d8-d15}
+ bl put_h264_qpel16_hv_lowpass_neon
+ vpop {d8-d15}
+ mov sp, r11
+ pop {r4, r9-r11, pc}
+ .endfunc
+
+function ff_put_h264_qpel16_mc32_neon, export=1
+ push {r0, r1, r4-r5, r9-r11, lr}
+ add r1, r1, #1
+ b put_h264_qpel16_mc12
+ .endfunc
+
+function ff_put_h264_qpel16_mc03_neon, export=1
+ push {r4, lr}
+ add ip, r1, r2
+ b put_h264_qpel16_mc01
+ .endfunc
+
+function ff_put_h264_qpel16_mc13_neon, export=1
+ push {r0, r1, r4, lr}
+ add r1, r1, r2
+ b put_h264_qpel16_mc11
+ .endfunc
+
+function ff_put_h264_qpel16_mc23_neon, export=1
+ push {r0, r1, r4-r5, r9-r11, lr}
+ add r1, r1, r2
+ b put_h264_qpel16_mc21
+ .endfunc
+
+function ff_put_h264_qpel16_mc33_neon, export=1
+ add r1, r1, #1
+ push {r0, r1, r4, lr}
+ add r1, r1, r2
+ sub r1, r1, #1
+ b put_h264_qpel16_mc11
+ .endfunc
diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S
new file mode 100644
index 0000000..6527390
--- /dev/null
+++ b/libavcodec/arm/h264idct_neon.S
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+ preserve8
+ .fpu neon
+
+ .text
+
+function ff_h264_idct_add_neon, export=1
+ mov r3, #(1<<5)
+ vmov.i16 d16, #0
+ vmov.16 d16[0], r3
+ vld1.64 {d0-d3}, [r1,:128]
+ vadd.i16 d0, d0, d16
+
+ vswp d1, d2
+ vadd.i16 d4, d0, d1
+ vshr.s16 q8, q1, #1
+ vsub.i16 d5, d0, d1
+ vadd.i16 d6, d2, d17
+ vsub.i16 d7, d16, d3
+ vadd.i16 q0, q2, q3
+ vsub.i16 q1, q2, q3
+
+ vtrn.16 d0, d1
+ vtrn.16 d3, d2
+ vtrn.32 d0, d3
+ vtrn.32 d1, d2
+
+ vadd.i16 d4, d0, d3
+ vld1.32 {d18[0]}, [r0,:32], r2
+ vswp d1, d3
+ vshr.s16 q8, q1, #1
+ vld1.32 {d19[1]}, [r0,:32], r2
+ vsub.i16 d5, d0, d1
+ vld1.32 {d18[1]}, [r0,:32], r2
+ vadd.i16 d6, d16, d3
+ vld1.32 {d19[0]}, [r0,:32], r2
+ vsub.i16 d7, d2, d17
+ sub r0, r0, r2, lsl #2
+ vadd.i16 q0, q2, q3
+ vsub.i16 q1, q2, q3
+
+ vshr.s16 q0, q0, #6
+ vshr.s16 q1, q1, #6
+
+ vaddw.u8 q0, q0, d18
+ vaddw.u8 q1, q1, d19
+
+ vqmovun.s16 d0, q0
+ vqmovun.s16 d1, q1
+
+ vst1.32 {d0[0]}, [r0,:32], r2
+ vst1.32 {d1[1]}, [r0,:32], r2
+ vst1.32 {d0[1]}, [r0,:32], r2
+ vst1.32 {d1[0]}, [r0,:32], r2
+
+ bx lr
+ .endfunc
+
+function ff_h264_idct_dc_add_neon, export=1
+ vld1.16 {d2[],d3[]}, [r1,:16]
+ vrshr.s16 q1, q1, #6
+ vld1.32 {d0[0]}, [r0,:32], r2
+ vld1.32 {d0[1]}, [r0,:32], r2
+ vaddw.u8 q2, q1, d0
+ vld1.32 {d1[0]}, [r0,:32], r2
+ vld1.32 {d1[1]}, [r0,:32], r2
+ vaddw.u8 q1, q1, d1
+ vqmovun.s16 d0, q2
+ vqmovun.s16 d1, q1
+ sub r0, r0, r2, lsl #2
+ vst1.32 {d0[0]}, [r0,:32], r2
+ vst1.32 {d0[1]}, [r0,:32], r2
+ vst1.32 {d1[0]}, [r0,:32], r2
+ vst1.32 {d1[1]}, [r0,:32], r2
+ bx lr
+ .endfunc
+
+function ff_h264_idct_add16_neon, export=1
+ push {r4-r8,lr}
+ mov r4, r0
+ mov r5, r1
+ mov r1, r2
+ mov r2, r3
+ ldr r6, [sp, #24]
+ movrel r7, scan8
+ mov ip, #16
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ subs r8, r8, #1
+ blt 2f
+ ldrsh lr, [r1]
+ add r0, r0, r4
+ movne lr, #0
+ cmp lr, #0
+ adrne lr, ff_h264_idct_dc_add_neon
+ adreq lr, ff_h264_idct_add_neon
+ blx lr
+2: subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r8,pc}
+ .endfunc
+
+function ff_h264_idct_add16intra_neon, export=1
+ push {r4-r8,lr}
+ mov r4, r0
+ mov r5, r1
+ mov r1, r2
+ mov r2, r3
+ ldr r6, [sp, #24]
+ movrel r7, scan8
+ mov ip, #16
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ add r0, r0, r4
+ cmp r8, #0
+ ldrsh r8, [r1]
+ adrne lr, ff_h264_idct_add_neon
+ adreq lr, ff_h264_idct_dc_add_neon
+ cmpeq r8, #0
+ blxne lr
+ subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r8,pc}
+ .endfunc
+
+function ff_h264_idct_add8_neon, export=1
+ push {r4-r10,lr}
+ ldm r0, {r4,r9}
+ add r5, r1, #16*4
+ add r1, r2, #16*32
+ mov r2, r3
+ ldr r6, [sp, #32]
+ movrel r7, scan8+16
+ mov ip, #8
+1: ldrb r8, [r7], #1
+ ldr r0, [r5], #4
+ ldrb r8, [r6, r8]
+ tst ip, #4
+ addeq r0, r0, r4
+ addne r0, r0, r9
+ cmp r8, #0
+ ldrsh r8, [r1]
+ adrne lr, ff_h264_idct_add_neon
+ adreq lr, ff_h264_idct_dc_add_neon
+ cmpeq r8, #0
+ blxne lr
+ subs ip, ip, #1
+ add r1, r1, #32
+ bne 1b
+ pop {r4-r10,pc}
+ .endfunc
+
+ .section .rodata
+scan8: .byte 4+1*8, 5+1*8, 4+2*8, 5+2*8
+ .byte 6+1*8, 7+1*8, 6+2*8, 7+2*8
+ .byte 4+3*8, 5+3*8, 4+4*8, 5+4*8
+ .byte 6+3*8, 7+3*8, 6+4*8, 7+4*8
+ .byte 1+1*8, 2+1*8
+ .byte 1+2*8, 2+2*8
+ .byte 1+4*8, 2+4*8
+ .byte 1+5*8, 2+5*8
diff --git a/libavcodec/armv4l/jrevdct_arm.S b/libavcodec/arm/jrevdct_arm.S
similarity index 100%
rename from libavcodec/armv4l/jrevdct_arm.S
rename to libavcodec/arm/jrevdct_arm.S
diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h
new file mode 100644
index 0000000..e36316c
--- /dev/null
+++ b/libavcodec/arm/mathops.h
@@ -0,0 +1,93 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni at gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_ARM_MATHOPS_H
+#define AVCODEC_ARM_MATHOPS_H
+
+#include <stdint.h>
+#include "libavutil/common.h"
+
+# define MULL MULL
+static inline av_const int MULL(int a, int b, unsigned shift)
+{
+ int lo, hi;
+ __asm__("smull %0, %1, %2, %3 \n\t"
+ "mov %0, %0, lsr %4 \n\t"
+ "add %1, %0, %1, lsl %5 \n\t"
+ : "=&r"(lo), "=&r"(hi)
+ : "r"(b), "r"(a), "i"(shift), "i"(32-shift));
+ return hi;
+}
+
+#define MULH MULH
+#ifdef HAVE_ARMV6
+static inline av_const int MULH(int a, int b)
+{
+ int r;
+ __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
+ return r;
+}
+#else
+static inline av_const int MULH(int a, int b)
+{
+ int lo, hi;
+ __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
+ return hi;
+}
+#endif
+
+static inline av_const int64_t MUL64(int a, int b)
+{
+ union { uint64_t x; unsigned hl[2]; } x;
+ __asm__ ("smull %0, %1, %2, %3"
+ : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
+ return x.x;
+}
+#define MUL64 MUL64
+
+static inline av_const int64_t MAC64(int64_t d, int a, int b)
+{
+ union { uint64_t x; unsigned hl[2]; } x = { d };
+ __asm__ ("smlal %0, %1, %2, %3"
+ : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
+ return x.x;
+}
+#define MAC64(d, a, b) ((d) = MAC64(d, a, b))
+#define MLS64(d, a, b) MAC64(d, -(a), b)
+
+#if defined(HAVE_ARMV5TE)
+
+/* signed 16x16 -> 32 multiply add accumulate */
+# define MAC16(rt, ra, rb) \
+ __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
+
+/* signed 16x16 -> 32 multiply */
+# define MUL16 MUL16
+static inline av_const MUL16(int ra, int rb)
+{
+ int rt;
+ __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
+ return rt;
+}
+
+#endif
+
+#endif /* AVCODEC_ARM_MATHOPS_H */
diff --git a/libavcodec/arm/mpegvideo_arm.c b/libavcodec/arm/mpegvideo_arm.c
new file mode 100644
index 0000000..18faed2
--- /dev/null
+++ b/libavcodec/arm/mpegvideo_arm.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2002 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegvideo.h"
+
+void MPV_common_init_iwmmxt(MpegEncContext *s);
+void MPV_common_init_armv5te(MpegEncContext *s);
+
+void MPV_common_init_arm(MpegEncContext *s)
+{
+ /* IWMMXT support is a superset of armv5te, so
+ * allow optimized functions for armv5te unless
+ * a better iwmmxt function exists
+ */
+#ifdef HAVE_ARMV5TE
+ MPV_common_init_armv5te(s);
+#endif
+#ifdef HAVE_IWMMXT
+ MPV_common_init_iwmmxt(s);
+#endif
+}
diff --git a/libavcodec/arm/mpegvideo_armv5te.c b/libavcodec/arm/mpegvideo_armv5te.c
new file mode 100644
index 0000000..b213cf1
--- /dev/null
+++ b/libavcodec/arm/mpegvideo_armv5te.c
@@ -0,0 +1,100 @@
+/*
+ * Optimization of some functions from mpegvideo.c for armv5te
+ * Copyright (c) 2007 Siarhei Siamashka <ssvb at users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/avcodec.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegvideo.h"
+
+void ff_dct_unquantize_h263_armv5te(DCTELEM *block, int qmul, int qadd, int count);
+
+#ifdef ENABLE_ARM_TESTS
+/**
+ * h263 dequantizer supplementary function, it is performance critical and needs to
+ * have optimized implementations for each architecture. Is also used as a reference
+ * implementation in regression tests
+ */
+static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
+{
+ int i, level;
+ for (i = 0; i < count; i++) {
+ level = block[i];
+ if (level) {
+ if (level < 0) {
+ level = level * qmul - qadd;
+ } else {
+ level = level * qmul + qadd;
+ }
+ block[i] = level;
+ }
+ }
+}
+#endif
+
+static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int level, qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+ qmul = qscale << 1;
+
+ if (!s->h263_aic) {
+ if (n < 4)
+ level = block[0] * s->y_dc_scale;
+ else
+ level = block[0] * s->c_dc_scale;
+ qadd = (qscale - 1) | 1;
+ }else{
+ qadd = 0;
+ level = block[0];
+ }
+ if(s->ac_pred)
+ nCoeffs=63;
+ else
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ ff_dct_unquantize_h263_armv5te(block, qmul, qadd, nCoeffs + 1);
+ block[0] = level;
+}
+
+static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
+ DCTELEM *block, int n, int qscale)
+{
+ int qmul, qadd;
+ int nCoeffs;
+
+ assert(s->block_last_index[n]>=0);
+
+ qadd = (qscale - 1) | 1;
+ qmul = qscale << 1;
+
+ nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
+
+ ff_dct_unquantize_h263_armv5te(block, qmul, qadd, nCoeffs + 1);
+}
+
+void MPV_common_init_armv5te(MpegEncContext *s)
+{
+ s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te;
+ s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te;
+}
diff --git a/libavcodec/arm/mpegvideo_armv5te_s.S b/libavcodec/arm/mpegvideo_armv5te_s.S
new file mode 100644
index 0000000..aaa252d
--- /dev/null
+++ b/libavcodec/arm/mpegvideo_armv5te_s.S
@@ -0,0 +1,117 @@
+/*
+ * Optimization of some functions from mpegvideo.c for armv5te
+ * Copyright (c) 2007 Siarhei Siamashka <ssvb at users.sourceforge.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "asm.S"
+
+/*
+ * Special optimized version of dct_unquantize_h263_helper_c, it
+ * requires the block to be at least 8 bytes aligned, and may process
+ * more elements than requested. But it is guaranteed to never
+ * process more than 64 elements provided that count argument is <= 64,
+ * so it is safe. This function is optimized for a common distribution
+ * of values for nCoeffs (they are mostly multiple of 8 plus one or
+ * two extra elements). So this function processes data as 8 elements
+ * per loop iteration and contains optional 2 elements processing in
+ * the end.
+ *
+ * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
+ */
+function ff_dct_unquantize_h263_armv5te, export=1
+ push {r4-r9,lr}
+ mov ip, #0
+ subs r3, r3, #2
+ ble 2f
+ ldrd r4, [r0, #0]
+1:
+ ldrd r6, [r0, #8]
+
+ rsbs r9, ip, r4, asr #16
+ addgt r9, r2, #0
+ rsblt r9, r2, #0
+ smlatbne r9, r4, r1, r9
+
+ rsbs lr, ip, r5, asr #16
+ addgt lr, r2, #0
+ rsblt lr, r2, #0
+ smlatbne lr, r5, r1, lr
+
+ rsbs r8, ip, r4, asl #16
+ addgt r8, r2, #0
+ rsblt r8, r2, #0
+ smlabbne r4, r4, r1, r8
+
+ rsbs r8, ip, r5, asl #16
+ addgt r8, r2, #0
+ rsblt r8, r2, #0
+ smlabbne r5, r5, r1, r8
+
+ strh r4, [r0], #2
+ strh r9, [r0], #2
+ strh r5, [r0], #2
+ strh lr, [r0], #2
+
+ rsbs r9, ip, r6, asr #16
+ addgt r9, r2, #0
+ rsblt r9, r2, #0
+ smlatbne r9, r6, r1, r9
+
+ rsbs lr, ip, r7, asr #16
+ addgt lr, r2, #0
+ rsblt lr, r2, #0
+ smlatbne lr, r7, r1, lr
+
+ rsbs r8, ip, r6, asl #16
+ addgt r8, r2, #0
+ rsblt r8, r2, #0
+ smlabbne r6, r6, r1, r8
+
+ rsbs r8, ip, r7, asl #16
+ addgt r8, r2, #0
+ rsblt r8, r2, #0
+ smlabbne r7, r7, r1, r8
+
+ strh r6, [r0], #2
+ strh r9, [r0], #2
+ strh r7, [r0], #2
+ strh lr, [r0], #2
+
+ subs r3, r3, #8
+ ldrgtd r4, [r0, #0] /* load data early to avoid load/use pipeline stall */
+ bgt 1b
+
+ adds r3, r3, #2
+ pople {r4-r9,pc}
+2:
+ ldrsh r9, [r0, #0]
+ ldrsh lr, [r0, #2]
+ mov r8, r2
+ cmp r9, #0
+ rsblt r8, r2, #0
+ smlabbne r9, r9, r1, r8
+ mov r8, r2
+ cmp lr, #0
+ rsblt r8, r2, #0
+ smlabbne lr, lr, r1, r8
+ strh r9, [r0], #2
+ strh lr, [r0], #2
+ pop {r4-r9,pc}
+ .endfunc
diff --git a/libavcodec/armv4l/mpegvideo_iwmmxt.c b/libavcodec/arm/mpegvideo_iwmmxt.c
similarity index 100%
rename from libavcodec/armv4l/mpegvideo_iwmmxt.c
rename to libavcodec/arm/mpegvideo_iwmmxt.c
diff --git a/libavcodec/armv4l/simple_idct_arm.S b/libavcodec/arm/simple_idct_arm.S
similarity index 100%
rename from libavcodec/armv4l/simple_idct_arm.S
rename to libavcodec/arm/simple_idct_arm.S
diff --git a/libavcodec/armv4l/simple_idct_armv5te.S b/libavcodec/arm/simple_idct_armv5te.S
similarity index 100%
rename from libavcodec/armv4l/simple_idct_armv5te.S
rename to libavcodec/arm/simple_idct_armv5te.S
diff --git a/libavcodec/armv4l/simple_idct_armv6.S b/libavcodec/arm/simple_idct_armv6.S
similarity index 100%
rename from libavcodec/armv4l/simple_idct_armv6.S
rename to libavcodec/arm/simple_idct_armv6.S
diff --git a/libavcodec/arm/simple_idct_neon.S b/libavcodec/arm/simple_idct_neon.S
new file mode 100644
index 0000000..e7099a2
--- /dev/null
+++ b/libavcodec/arm/simple_idct_neon.S
@@ -0,0 +1,362 @@
+/*
+ * ARM NEON IDCT
+ *
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * Based on Simple IDCT
+ * Copyright (c) 2001 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "asm.S"
+
+#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define W4c ((1<<(COL_SHIFT-1))/W4)
+#define ROW_SHIFT 11
+#define COL_SHIFT 20
+
+#define w1 d0[0]
+#define w2 d0[1]
+#define w3 d0[2]
+#define w4 d0[3]
+#define w5 d1[0]
+#define w6 d1[1]
+#define w7 d1[2]
+#define w4c d1[3]
+
+ .fpu neon
+
+ .macro idct_col4_top
+ vmull.s16 q7, d6, w2 /* q9 = W2 * col[2] */
+ vmull.s16 q8, d6, w6 /* q10 = W6 * col[2] */
+ vmull.s16 q9, d4, w1 /* q9 = W1 * col[1] */
+ vadd.i32 q11, q15, q7
+ vmull.s16 q10, d4, w3 /* q10 = W3 * col[1] */
+ vadd.i32 q12, q15, q8
+ vmull.s16 q5, d4, w5 /* q5 = W5 * col[1] */
+ vsub.i32 q13, q15, q8
+ vmull.s16 q6, d4, w7 /* q6 = W7 * col[1] */
+ vsub.i32 q14, q15, q7
+
+ vmlal.s16 q9, d8, w3 /* q9 += W3 * col[3] */
+ vmlsl.s16 q10, d8, w7 /* q10 -= W7 * col[3] */
+ vmlsl.s16 q5, d8, w1 /* q5 -= W1 * col[3] */
+ vmlsl.s16 q6, d8, w5 /* q6 -= W5 * col[3] */
+ .endm
+
+ .text
+ .align 6
+
+function idct_row4_neon
+ vmov.i32 q15, #(1<<(ROW_SHIFT-1))
+ vld1.64 {d2-d5}, [r2,:128]!
+ vmlal.s16 q15, d2, w4 /* q15 += W4 * col[0] */
+ vld1.64 {d6,d7}, [r2,:128]!
+ vorr d10, d3, d5
+ vld1.64 {d8,d9}, [r2,:128]!
+ add r2, r2, #-64
+
+ vorr d11, d7, d9
+ vorr d10, d10, d11
+ vmov r3, r4, d10
+
+ idct_col4_top
+
+ orrs r3, r3, r4
+ beq 1f
+
+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q7
+ vsub.i32 q13, q13, q7
+ vadd.i32 q14, q14, q7
+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
+ vmlal.s16 q9, d9, w7
+ vmlsl.s16 q10, d9, w5
+ vmlal.s16 q5, d9, w3
+ vmlsl.s16 q6, d9, w1
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q8
+ vadd.i32 q13, q13, q8
+ vsub.i32 q14, q14, q7
+
+1: vadd.i32 q3, q11, q9
+ vadd.i32 q4, q12, q10
+ vshrn.i32 d2, q3, #ROW_SHIFT
+ vshrn.i32 d4, q4, #ROW_SHIFT
+ vadd.i32 q7, q13, q5
+ vadd.i32 q8, q14, q6
+ vtrn.16 d2, d4
+ vshrn.i32 d6, q7, #ROW_SHIFT
+ vshrn.i32 d8, q8, #ROW_SHIFT
+ vsub.i32 q14, q14, q6
+ vsub.i32 q11, q11, q9
+ vtrn.16 d6, d8
+ vsub.i32 q13, q13, q5
+ vshrn.i32 d3, q14, #ROW_SHIFT
+ vtrn.32 d2, d6
+ vsub.i32 q12, q12, q10
+ vtrn.32 d4, d8
+ vshrn.i32 d5, q13, #ROW_SHIFT
+ vshrn.i32 d7, q12, #ROW_SHIFT
+ vshrn.i32 d9, q11, #ROW_SHIFT
+
+ vtrn.16 d3, d5
+ vtrn.16 d7, d9
+ vtrn.32 d3, d7
+ vtrn.32 d5, d9
+
+ vst1.64 {d2-d5}, [r2,:128]!
+ vst1.64 {d6-d9}, [r2,:128]!
+
+ bx lr
+ .endfunc
+
+function idct_col4_neon
+ mov ip, #16
+ vld1.64 {d2}, [r2,:64], ip /* d2 = col[0] */
+ vdup.16 d30, w4c
+ vld1.64 {d4}, [r2,:64], ip /* d3 = col[1] */
+ vadd.i16 d30, d30, d2
+ vld1.64 {d6}, [r2,:64], ip /* d4 = col[2] */
+ vmull.s16 q15, d30, w4 /* q15 = W4*(col[0]+(1<<COL_SHIFT-1)/W4)*/
+ vld1.64 {d8}, [r2,:64], ip /* d5 = col[3] */
+
+ ldrd r4, [r2]
+ ldrd r6, [r2, #16]
+ orrs r4, r4, r5
+
+ idct_col4_top
+ addeq r2, r2, #16
+ beq 1f
+
+ vld1.64 {d3}, [r2,:64], ip /* d6 = col[4] */
+ vmull.s16 q7, d3, w4 /* q7 = W4 * col[4] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q12, q12, q7
+ vsub.i32 q13, q13, q7
+ vadd.i32 q14, q14, q7
+
+1: orrs r6, r6, r7
+ ldrd r4, [r2, #16]
+ addeq r2, r2, #16
+ beq 2f
+
+ vld1.64 {d5}, [r2,:64], ip /* d7 = col[5] */
+ vmlal.s16 q9, d5, w5 /* q9 += W5 * col[5] */
+ vmlsl.s16 q10, d5, w1 /* q10 -= W1 * col[5] */
+ vmlal.s16 q5, d5, w7 /* q5 += W7 * col[5] */
+ vmlal.s16 q6, d5, w3 /* q6 += W3 * col[5] */
+
+2: orrs r4, r4, r5
+ ldrd r4, [r2, #16]
+ addeq r2, r2, #16
+ beq 3f
+
+ vld1.64 {d7}, [r2,:64], ip /* d8 = col[6] */
+ vmull.s16 q7, d7, w6 /* q7 = W6 * col[6] */
+ vmull.s16 q8, d7, w2 /* q8 = W2 * col[6] */
+ vadd.i32 q11, q11, q7
+ vsub.i32 q14, q14, q7
+ vsub.i32 q12, q12, q8
+ vadd.i32 q13, q13, q8
+
+3: orrs r4, r4, r5
+ addeq r2, r2, #16
+ beq 4f
+
+ vld1.64 {d9}, [r2,:64], ip /* d9 = col[7] */
+ vmlal.s16 q9, d9, w7
+ vmlsl.s16 q10, d9, w5
+ vmlal.s16 q5, d9, w3
+ vmlsl.s16 q6, d9, w1
+
+4: vaddhn.i32 d2, q11, q9
+ vaddhn.i32 d3, q12, q10
+ vaddhn.i32 d4, q13, q5
+ vaddhn.i32 d5, q14, q6
+ vsubhn.i32 d9, q11, q9
+ vsubhn.i32 d8, q12, q10
+ vsubhn.i32 d7, q13, q5
+ vsubhn.i32 d6, q14, q6
+
+ bx lr
+ .endfunc
+
+ .align 6
+
+function idct_col4_st8_neon
+ vqshrun.s16 d2, q1, #COL_SHIFT-16
+ vqshrun.s16 d3, q2, #COL_SHIFT-16
+ vqshrun.s16 d4, q3, #COL_SHIFT-16
+ vqshrun.s16 d5, q4, #COL_SHIFT-16
+ vst1.32 {d2[0]}, [r0,:32], r1
+ vst1.32 {d2[1]}, [r0,:32], r1
+ vst1.32 {d3[0]}, [r0,:32], r1
+ vst1.32 {d3[1]}, [r0,:32], r1
+ vst1.32 {d4[0]}, [r0,:32], r1
+ vst1.32 {d4[1]}, [r0,:32], r1
+ vst1.32 {d5[0]}, [r0,:32], r1
+ vst1.32 {d5[1]}, [r0,:32], r1
+
+ bx lr
+ .endfunc
+
+ .section .rodata
+ .align 4
+idct_coeff_neon:
+ .short W1, W2, W3, W4, W5, W6, W7, W4c
+ .previous
+
+ .macro idct_start data
+ push {r4-r7, lr}
+ pld [\data]
+ pld [\data, #64]
+ vpush {d8-d15}
+ movrel r3, idct_coeff_neon
+ vld1.64 {d0,d1}, [r3,:128]
+ .endm
+
+ .macro idct_end
+ vpop {d8-d15}
+ pop {r4-r7, pc}
+ .endm
+
+/* void ff_simple_idct_put_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_put_neon, export=1
+ idct_start r2
+
+ bl idct_row4_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ bl idct_col4_st8_neon
+ sub r0, r0, r1, lsl #3
+ add r0, r0, #4
+ add r2, r2, #-120
+ bl idct_col4_neon
+ bl idct_col4_st8_neon
+
+ idct_end
+ .endfunc
+
+ .align 6
+
+function idct_col4_add8_neon
+ mov ip, r0
+
+ vld1.32 {d10[0]}, [r0,:32], r1
+ vshr.s16 q1, q1, #COL_SHIFT-16
+ vld1.32 {d10[1]}, [r0,:32], r1
+ vshr.s16 q2, q2, #COL_SHIFT-16
+ vld1.32 {d11[0]}, [r0,:32], r1
+ vshr.s16 q3, q3, #COL_SHIFT-16
+ vld1.32 {d11[1]}, [r0,:32], r1
+ vshr.s16 q4, q4, #COL_SHIFT-16
+ vld1.32 {d12[0]}, [r0,:32], r1
+ vaddw.u8 q1, q1, d10
+ vld1.32 {d12[1]}, [r0,:32], r1
+ vaddw.u8 q2, q2, d11
+ vld1.32 {d13[0]}, [r0,:32], r1
+ vqmovun.s16 d2, q1
+ vld1.32 {d13[1]}, [r0,:32], r1
+ vaddw.u8 q3, q3, d12
+ vst1.32 {d2[0]}, [ip,:32], r1
+ vqmovun.s16 d3, q2
+ vst1.32 {d2[1]}, [ip,:32], r1
+ vaddw.u8 q4, q4, d13
+ vst1.32 {d3[0]}, [ip,:32], r1
+ vqmovun.s16 d4, q3
+ vst1.32 {d3[1]}, [ip,:32], r1
+ vqmovun.s16 d5, q4
+ vst1.32 {d4[0]}, [ip,:32], r1
+ vst1.32 {d4[1]}, [ip,:32], r1
+ vst1.32 {d5[0]}, [ip,:32], r1
+ vst1.32 {d5[1]}, [ip,:32], r1
+
+ bx lr
+ .endfunc
+
+/* void ff_simple_idct_add_neon(uint8_t *dst, int line_size, DCTELEM *data); */
+function ff_simple_idct_add_neon, export=1
+ idct_start r2
+
+ bl idct_row4_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ bl idct_col4_add8_neon
+ sub r0, r0, r1, lsl #3
+ add r0, r0, #4
+ add r2, r2, #-120
+ bl idct_col4_neon
+ bl idct_col4_add8_neon
+
+ idct_end
+ .endfunc
+
+ .align 6
+
+function idct_col4_st16_neon
+ mov ip, #16
+
+ vshr.s16 q1, q1, #COL_SHIFT-16
+ vshr.s16 q2, q2, #COL_SHIFT-16
+ vst1.64 {d2}, [r2,:64], ip
+ vshr.s16 q3, q3, #COL_SHIFT-16
+ vst1.64 {d3}, [r2,:64], ip
+ vshr.s16 q4, q4, #COL_SHIFT-16
+ vst1.64 {d4}, [r2,:64], ip
+ vst1.64 {d5}, [r2,:64], ip
+ vst1.64 {d6}, [r2,:64], ip
+ vst1.64 {d7}, [r2,:64], ip
+ vst1.64 {d8}, [r2,:64], ip
+ vst1.64 {d9}, [r2,:64], ip
+
+ bx lr
+ .endfunc
+
+/* void ff_simple_idct_neon(DCTELEM *data); */
+function ff_simple_idct_neon, export=1
+ idct_start r0
+
+ mov r2, r0
+ bl idct_row4_neon
+ bl idct_row4_neon
+ add r2, r2, #-128
+ bl idct_col4_neon
+ add r2, r2, #-128
+ bl idct_col4_st16_neon
+ add r2, r2, #-120
+ bl idct_col4_neon
+ add r2, r2, #-128
+ bl idct_col4_st16_neon
+
+ idct_end
+ .endfunc
diff --git a/libavcodec/armv4l/asm.S b/libavcodec/armv4l/asm.S
deleted file mode 100644
index e2595f4..0000000
--- a/libavcodec/armv4l/asm.S
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
- .macro require8, val=1
- .eabi_attribute 24, \val
- .endm
-
- .macro preserve8, val=1
- .eabi_attribute 25, \val
- .endm
-
- .macro function name, export=0
-.if \export
- .global \name
-.endif
- .type \name, %function
- .func \name
-\name:
- .endm
diff --git a/libavcodec/armv4l/dsputil_arm.c b/libavcodec/armv4l/dsputil_arm.c
deleted file mode 100644
index 695bf1c..0000000
--- a/libavcodec/armv4l/dsputil_arm.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- * ARMv4L optimized DSP utils
- * Copyright (c) 2001 Lionel Ulmer.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/dsputil.h"
-#ifdef HAVE_IPP
-#include <ipp.h>
-#endif
-
-extern void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx);
-extern void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx);
-
-extern void j_rev_dct_ARM(DCTELEM *data);
-extern void simple_idct_ARM(DCTELEM *data);
-
-extern void simple_idct_armv5te(DCTELEM *data);
-extern void simple_idct_put_armv5te(uint8_t *dest, int line_size,
- DCTELEM *data);
-extern void simple_idct_add_armv5te(uint8_t *dest, int line_size,
- DCTELEM *data);
-
-extern void ff_simple_idct_armv6(DCTELEM *data);
-extern void ff_simple_idct_put_armv6(uint8_t *dest, int line_size,
- DCTELEM *data);
-extern void ff_simple_idct_add_armv6(uint8_t *dest, int line_size,
- DCTELEM *data);
-
-/* XXX: local hack */
-static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-
-void put_pixels8_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void put_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void put_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void put_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-
-void put_no_rnd_pixels8_x2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void put_no_rnd_pixels8_y2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-void put_no_rnd_pixels8_xy2_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-
-void put_pixels16_arm(uint8_t *block, const uint8_t *pixels, int line_size, int h);
-
-extern void ff_prefetch_arm(void *mem, int stride, int h);
-
-CALL_2X_PIXELS(put_pixels16_x2_arm , put_pixels8_x2_arm , 8)
-CALL_2X_PIXELS(put_pixels16_y2_arm , put_pixels8_y2_arm , 8)
-CALL_2X_PIXELS(put_pixels16_xy2_arm, put_pixels8_xy2_arm, 8)
-CALL_2X_PIXELS(put_no_rnd_pixels16_x2_arm , put_no_rnd_pixels8_x2_arm , 8)
-CALL_2X_PIXELS(put_no_rnd_pixels16_y2_arm , put_no_rnd_pixels8_y2_arm , 8)
-CALL_2X_PIXELS(put_no_rnd_pixels16_xy2_arm, put_no_rnd_pixels8_xy2_arm, 8)
-
-extern void ff_add_pixels_clamped_ARM(short *block, unsigned char *dest,
- int line_size);
-
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
- converted */
-static void j_rev_dct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- j_rev_dct_ARM (block);
- ff_put_pixels_clamped(block, dest, line_size);
-}
-static void j_rev_dct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- j_rev_dct_ARM (block);
- ff_add_pixels_clamped(block, dest, line_size);
-}
-static void simple_idct_ARM_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- simple_idct_ARM (block);
- ff_put_pixels_clamped(block, dest, line_size);
-}
-static void simple_idct_ARM_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- simple_idct_ARM (block);
- ff_add_pixels_clamped(block, dest, line_size);
-}
-
-#ifdef HAVE_IPP
-static void simple_idct_ipp(DCTELEM *block)
-{
- ippiDCT8x8Inv_Video_16s_C1I(block);
-}
-static void simple_idct_ipp_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ippiDCT8x8Inv_Video_16s8u_C1R(block, dest, line_size);
-}
-
-void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size);
-
-static void simple_idct_ipp_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ippiDCT8x8Inv_Video_16s_C1I(block);
-#ifdef HAVE_IWMMXT
- add_pixels_clamped_iwmmxt(block, dest, line_size);
-#else
- ff_add_pixels_clamped_ARM(block, dest, line_size);
-#endif
-}
-#endif
-
-int mm_support(void)
-{
- return ENABLE_IWMMXT * FF_MM_IWMMXT;
-}
-
-void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
-{
- int idct_algo= avctx->idct_algo;
-
- ff_put_pixels_clamped = c->put_pixels_clamped;
- ff_add_pixels_clamped = c->add_pixels_clamped;
-
- if (avctx->lowres == 0) {
- if(idct_algo == FF_IDCT_AUTO){
-#if defined(HAVE_IPP)
- idct_algo = FF_IDCT_IPP;
-#elif defined(HAVE_ARMV6)
- idct_algo = FF_IDCT_SIMPLEARMV6;
-#elif defined(HAVE_ARMV5TE)
- idct_algo = FF_IDCT_SIMPLEARMV5TE;
-#else
- idct_algo = FF_IDCT_ARM;
-#endif
- }
-
- if(idct_algo==FF_IDCT_ARM){
- c->idct_put= j_rev_dct_ARM_put;
- c->idct_add= j_rev_dct_ARM_add;
- c->idct = j_rev_dct_ARM;
- c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
- } else if (idct_algo==FF_IDCT_SIMPLEARM){
- c->idct_put= simple_idct_ARM_put;
- c->idct_add= simple_idct_ARM_add;
- c->idct = simple_idct_ARM;
- c->idct_permutation_type= FF_NO_IDCT_PERM;
-#ifdef HAVE_ARMV6
- } else if (idct_algo==FF_IDCT_SIMPLEARMV6){
- c->idct_put= ff_simple_idct_put_armv6;
- c->idct_add= ff_simple_idct_add_armv6;
- c->idct = ff_simple_idct_armv6;
- c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-#endif
-#ifdef HAVE_ARMV5TE
- } else if (idct_algo==FF_IDCT_SIMPLEARMV5TE){
- c->idct_put= simple_idct_put_armv5te;
- c->idct_add= simple_idct_add_armv5te;
- c->idct = simple_idct_armv5te;
- c->idct_permutation_type = FF_NO_IDCT_PERM;
-#endif
-#ifdef HAVE_IPP
- } else if (idct_algo==FF_IDCT_IPP){
- c->idct_put= simple_idct_ipp_put;
- c->idct_add= simple_idct_ipp_add;
- c->idct = simple_idct_ipp;
- c->idct_permutation_type= FF_NO_IDCT_PERM;
-#endif
- }
- }
-
- c->put_pixels_tab[0][0] = put_pixels16_arm;
- c->put_pixels_tab[0][1] = put_pixels16_x2_arm; //OK!
- c->put_pixels_tab[0][2] = put_pixels16_y2_arm; //OK!
- c->put_pixels_tab[0][3] = put_pixels16_xy2_arm;
- c->put_no_rnd_pixels_tab[0][0] = put_pixels16_arm;
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_arm; // OK
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_arm; //OK
- c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_arm;
- c->put_pixels_tab[1][0] = put_pixels8_arm; //OK
- c->put_pixels_tab[1][1] = put_pixels8_x2_arm; //OK
- c->put_pixels_tab[1][2] = put_pixels8_y2_arm;
- c->put_pixels_tab[1][3] = put_pixels8_xy2_arm;
- c->put_no_rnd_pixels_tab[1][0] = put_pixels8_arm;//OK
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_arm; //OK
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_arm; //OK
- c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_arm;
-
-#ifdef HAVE_ARMV5TE
- c->prefetch = ff_prefetch_arm;
-#endif
-
-#ifdef HAVE_IWMMXT
- dsputil_init_iwmmxt(c, avctx);
-#endif
-#ifdef HAVE_ARMVFP
- ff_float_init_arm_vfp(c, avctx);
-#endif
-}
diff --git a/libavcodec/armv4l/dsputil_arm_s.S b/libavcodec/armv4l/dsputil_arm_s.S
deleted file mode 100644
index ba06f37..0000000
--- a/libavcodec/armv4l/dsputil_arm_s.S
+++ /dev/null
@@ -1,799 +0,0 @@
-@
-@ ARMv4L optimized DSP utils
-@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
-@
-@ This file is part of FFmpeg.
-@
-@ FFmpeg is free software; you can redistribute it and/or
-@ modify it under the terms of the GNU Lesser General Public
-@ License as published by the Free Software Foundation; either
-@ version 2.1 of the License, or (at your option) any later version.
-@
-@ FFmpeg is distributed in the hope that it will be useful,
-@ but WITHOUT ANY WARRANTY; without even the implied warranty of
-@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-@ Lesser General Public License for more details.
-@
-@ You should have received a copy of the GNU Lesser General Public
-@ License along with FFmpeg; if not, write to the Free Software
-@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-@
-
-#include "config.h"
-#include "asm.S"
-
- preserve8
-
-#ifndef HAVE_PLD
-.macro pld reg
-.endm
-#endif
-
-#ifdef HAVE_ARMV5TE
-function ff_prefetch_arm, export=1
- subs r2, r2, #1
- pld [r0]
- add r0, r0, r1
- bne ff_prefetch_arm
- bx lr
- .endfunc
-#endif
-
-.macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
- mov \Rd0, \Rn0, lsr #(\shift * 8)
- mov \Rd1, \Rn1, lsr #(\shift * 8)
- mov \Rd2, \Rn2, lsr #(\shift * 8)
- mov \Rd3, \Rn3, lsr #(\shift * 8)
- orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
- orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
- orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
- orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
-.endm
-.macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
- mov \R0, \R0, lsr #(\shift * 8)
- orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
- mov \R1, \R1, lsr #(\shift * 8)
- orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
-.endm
-.macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
- mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
- mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
- orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
- orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
-.endm
-
-.macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
- @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
- @ Rmask = 0xFEFEFEFE
- @ Rn = destroy
- eor \Rd0, \Rn0, \Rm0
- eor \Rd1, \Rn1, \Rm1
- orr \Rn0, \Rn0, \Rm0
- orr \Rn1, \Rn1, \Rm1
- and \Rd0, \Rd0, \Rmask
- and \Rd1, \Rd1, \Rmask
- sub \Rd0, \Rn0, \Rd0, lsr #1
- sub \Rd1, \Rn1, \Rd1, lsr #1
-.endm
-
-.macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
- @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
- @ Rmask = 0xFEFEFEFE
- @ Rn = destroy
- eor \Rd0, \Rn0, \Rm0
- eor \Rd1, \Rn1, \Rm1
- and \Rn0, \Rn0, \Rm0
- and \Rn1, \Rn1, \Rm1
- and \Rd0, \Rd0, \Rmask
- and \Rd1, \Rd1, \Rmask
- add \Rd0, \Rn0, \Rd0, lsr #1
- add \Rd1, \Rn1, \Rd1, lsr #1
-.endm
-
-@ ----------------------------------------------------------------
- .align 8
-function put_pixels16_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r11, lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- bic r1, r1, #3
- add r5, r5, r4, lsl #2
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r7}
- add r1, r1, r2
- stmia r0, {r4-r7}
- pld [r1]
- subs r3, r3, #1
- add r0, r0, r2
- bne 1b
- ldmfd sp!, {r4-r11, pc}
- .align 8
-2:
- ldmia r1, {r4-r8}
- add r1, r1, r2
- ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r9-r12}
- add r0, r0, r2
- bne 2b
- ldmfd sp!, {r4-r11, pc}
- .align 8
-3:
- ldmia r1, {r4-r8}
- add r1, r1, r2
- ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r9-r12}
- add r0, r0, r2
- bne 3b
- ldmfd sp!, {r4-r11, pc}
- .align 8
-4:
- ldmia r1, {r4-r8}
- add r1, r1, r2
- ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r9-r12}
- add r0, r0, r2
- bne 4b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-5:
- .word 1b
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
-@ ----------------------------------------------------------------
- .align 8
-function put_pixels8_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r5,lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- bic r1, r1, #3
- add r5, r5, r4, lsl #2
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r5}
- add r1, r1, r2
- subs r3, r3, #1
- pld [r1]
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 1b
- ldmfd sp!, {r4-r5,pc}
- .align 8
-2:
- ldmia r1, {r4-r5, r12}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 2b
- ldmfd sp!, {r4-r5,pc}
- .align 8
-3:
- ldmia r1, {r4-r5, r12}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 3b
- ldmfd sp!, {r4-r5,pc}
- .align 8
-4:
- ldmia r1, {r4-r5, r12}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
- pld [r1]
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 4b
- ldmfd sp!, {r4-r5,pc}
- .align 8
-5:
- .word 1b
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
-@ ----------------------------------------------------------------
- .align 8
-function put_pixels8_x2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r10,lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- ldr r12, [r5]
- add r5, r5, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
- pld [r1]
- RND_AVG32 r8, r9, r4, r5, r6, r7, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 1b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-2:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
- ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
- pld [r1]
- RND_AVG32 r4, r5, r6, r7, r8, r9, r12
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 2b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-3:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
- ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
- pld [r1]
- RND_AVG32 r4, r5, r6, r7, r8, r9, r12
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 3b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-4:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
- pld [r1]
- RND_AVG32 r8, r9, r6, r7, r5, r10, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 4b
- ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
- .align 8
-5:
- .word 0xFEFEFEFE
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
- .align 8
-function put_no_rnd_pixels8_x2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r10,lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- ldr r12, [r5]
- add r5, r5, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
- pld [r1]
- NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 1b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-2:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
- ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
- pld [r1]
- NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 2b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-3:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
- ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
- pld [r1]
- NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
- subs r3, r3, #1
- stmia r0, {r4-r5}
- add r0, r0, r2
- bne 3b
- ldmfd sp!, {r4-r10,pc}
- .align 8
-4:
- ldmia r1, {r4-r5, r10}
- add r1, r1, r2
- ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
- pld [r1]
- NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 4b
- ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
- .align 8
-5:
- .word 0xFEFEFEFE
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
-
-@ ----------------------------------------------------------------
- .align 8
-function put_pixels8_y2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r11,lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- mov r3, r3, lsr #1
- ldr r12, [r5]
- add r5, r5, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r5}
- add r1, r1, r2
-6: ldmia r1, {r6-r7}
- add r1, r1, r2
- pld [r1]
- RND_AVG32 r8, r9, r4, r5, r6, r7, r12
- ldmia r1, {r4-r5}
- add r1, r1, r2
- stmia r0, {r8-r9}
- add r0, r0, r2
- pld [r1]
- RND_AVG32 r8, r9, r6, r7, r4, r5, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-2:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
- RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
- subs r3, r3, #1
- RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-3:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
- RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
- subs r3, r3, #1
- RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-4:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
- RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
- subs r3, r3, #1
- RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
-
- .align 8
-5:
- .word 0xFEFEFEFE
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
- .align 8
-function put_no_rnd_pixels8_y2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r11,lr} @ R14 is also called LR
- adr r5, 5f
- ands r4, r1, #3
- mov r3, r3, lsr #1
- ldr r12, [r5]
- add r5, r5, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- ldmia r1, {r4-r5}
- add r1, r1, r2
-6: ldmia r1, {r6-r7}
- add r1, r1, r2
- pld [r1]
- NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
- ldmia r1, {r4-r5}
- add r1, r1, r2
- stmia r0, {r8-r9}
- add r0, r0, r2
- pld [r1]
- NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
- subs r3, r3, #1
- stmia r0, {r8-r9}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-2:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
- NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
- subs r3, r3, #1
- NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-3:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
- NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
- subs r3, r3, #1
- NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-4:
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
-6: ldmia r1, {r7-r9}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
- NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- ldmia r1, {r4-r6}
- add r1, r1, r2
- pld [r1]
- ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
- subs r3, r3, #1
- NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
- stmia r0, {r10-r11}
- add r0, r0, r2
- bne 6b
- ldmfd sp!, {r4-r11,pc}
- .align 8
-5:
- .word 0xFEFEFEFE
- .word 2b
- .word 3b
- .word 4b
- .endfunc
-
-@ ----------------------------------------------------------------
-.macro RND_XY2_IT align
- @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
- @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
-.if \align == 0
- ldmia r1, {r6-r8}
-.elseif \align == 3
- ldmia r1, {r5-r7}
-.else
- ldmia r1, {r8-r10}
-.endif
- add r1, r1, r2
- pld [r1]
-.if \align == 0
- ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
-.elseif \align == 1
- ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
- ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
-.elseif \align == 2
- ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
- ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
-.elseif \align == 3
- ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
-.endif
- ldr r14, [r12, #0] @ 0x03030303
- tst r3, #1
- and r8, r4, r14
- and r9, r5, r14
- and r10, r6, r14
- and r11, r7, r14
- ldreq r14, [r12, #16] @ 0x02020202/0x01010101
- add r8, r8, r10
- add r9, r9, r11
- addeq r8, r8, r14
- addeq r9, r9, r14
- ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
- and r4, r14, r4, lsr #2
- and r5, r14, r5, lsr #2
- and r6, r14, r6, lsr #2
- and r7, r14, r7, lsr #2
- add r10, r4, r6
- add r11, r5, r7
- subs r3, r3, #1
-.endm
-
-.macro RND_XY2_EXPAND align
- RND_XY2_IT \align
-6: stmfd sp!, {r8-r11}
- RND_XY2_IT \align
- ldmfd sp!, {r4-r7}
- add r4, r4, r8
- add r5, r5, r9
- add r6, r6, r10
- add r7, r7, r11
- ldr r14, [r12, #24] @ 0x0F0F0F0F
- and r4, r14, r4, lsr #2
- and r5, r14, r5, lsr #2
- add r4, r4, r6
- add r5, r5, r7
- stmia r0, {r4-r5}
- add r0, r0, r2
- bge 6b
- ldmfd sp!, {r4-r11,pc}
-.endm
-
- .align 8
-function put_pixels8_xy2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r11,lr} @ R14 is also called LR
- adrl r12, 5f
- ands r4, r1, #3
- add r5, r12, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- RND_XY2_EXPAND 0
-
- .align 8
-2:
- RND_XY2_EXPAND 1
-
- .align 8
-3:
- RND_XY2_EXPAND 2
-
- .align 8
-4:
- RND_XY2_EXPAND 3
-
-5:
- .word 0x03030303
- .word 2b
- .word 3b
- .word 4b
- .word 0x02020202
- .word 0xFCFCFCFC >> 2
- .word 0x0F0F0F0F
- .endfunc
-
- .align 8
-function put_no_rnd_pixels8_xy2_arm, export=1
- @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
- @ block = word aligned, pixles = unaligned
- pld [r1]
- stmfd sp!, {r4-r11,lr} @ R14 is also called LR
- adrl r12, 5f
- ands r4, r1, #3
- add r5, r12, r4, lsl #2
- bic r1, r1, #3
- ldrne pc, [r5]
-1:
- RND_XY2_EXPAND 0
-
- .align 8
-2:
- RND_XY2_EXPAND 1
-
- .align 8
-3:
- RND_XY2_EXPAND 2
-
- .align 8
-4:
- RND_XY2_EXPAND 3
-
-5:
- .word 0x03030303
- .word 2b
- .word 3b
- .word 4b
- .word 0x01010101
- .word 0xFCFCFCFC >> 2
- .word 0x0F0F0F0F
- .endfunc
-
-@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
-function ff_add_pixels_clamped_ARM, export=1
- push {r4-r10}
- mov r10, #8
-1:
- ldr r4, [r1] /* load dest */
- /* block[0] and block[1]*/
- ldrsh r5, [r0]
- ldrsh r7, [r0, #2]
- and r6, r4, #0xFF
- and r8, r4, #0xFF00
- add r6, r5, r6
- add r8, r7, r8, lsr #8
- mvn r5, r5
- mvn r7, r7
- tst r6, #0x100
- movne r6, r5, lsr #24
- tst r8, #0x100
- movne r8, r7, lsr #24
- mov r9, r6
- ldrsh r5, [r0, #4] /* moved form [A] */
- orr r9, r9, r8, lsl #8
- /* block[2] and block[3] */
- /* [A] */
- ldrsh r7, [r0, #6]
- and r6, r4, #0xFF0000
- and r8, r4, #0xFF000000
- add r6, r5, r6, lsr #16
- add r8, r7, r8, lsr #24
- mvn r5, r5
- mvn r7, r7
- tst r6, #0x100
- movne r6, r5, lsr #24
- tst r8, #0x100
- movne r8, r7, lsr #24
- orr r9, r9, r6, lsl #16
- ldr r4, [r1, #4] /* moved form [B] */
- orr r9, r9, r8, lsl #24
- /* store dest */
- ldrsh r5, [r0, #8] /* moved form [C] */
- str r9, [r1]
-
- /* load dest */
- /* [B] */
- /* block[4] and block[5] */
- /* [C] */
- ldrsh r7, [r0, #10]
- and r6, r4, #0xFF
- and r8, r4, #0xFF00
- add r6, r5, r6
- add r8, r7, r8, lsr #8
- mvn r5, r5
- mvn r7, r7
- tst r6, #0x100
- movne r6, r5, lsr #24
- tst r8, #0x100
- movne r8, r7, lsr #24
- mov r9, r6
- ldrsh r5, [r0, #12] /* moved from [D] */
- orr r9, r9, r8, lsl #8
- /* block[6] and block[7] */
- /* [D] */
- ldrsh r7, [r0, #14]
- and r6, r4, #0xFF0000
- and r8, r4, #0xFF000000
- add r6, r5, r6, lsr #16
- add r8, r7, r8, lsr #24
- mvn r5, r5
- mvn r7, r7
- tst r6, #0x100
- movne r6, r5, lsr #24
- tst r8, #0x100
- movne r8, r7, lsr #24
- orr r9, r9, r6, lsl #16
- add r0, r0, #16 /* moved from [E] */
- orr r9, r9, r8, lsl #24
- subs r10, r10, #1 /* moved from [F] */
- /* store dest */
- str r9, [r1, #4]
-
- /* [E] */
- /* [F] */
- add r1, r1, r2
- bne 1b
-
- pop {r4-r10}
- bx lr
- .endfunc
diff --git a/libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c b/libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c
deleted file mode 100644
index fddbdae..0000000
--- a/libavcodec/armv4l/dsputil_iwmmxt_rnd_template.c
+++ /dev/null
@@ -1,1118 +0,0 @@
-/*
- * iWMMXt optimized DSP utils
- * copyright (c) 2004 AGAWA Koji
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* This header intentionally has no multiple inclusion guards. It is meant to
- * be included multiple times and generates different code depending on the
- * value of certain #defines. */
-
-void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr0, [%[block]] \n\t"
- "wldrd wr2, [r5] \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- WAVG2B" wr8, wr8, wr0 \n\t"
- WAVG2B" wr10, wr10, wr2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size] \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr2, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "walignr1 wr9, wr1, wr2 \n\t"
- "wldrd wr5, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "walignr1 wr11, wr4, wr5 \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "wstrd wr11, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- __asm__ volatile (
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "1: \n\t"
- "wldrd wr0, [%[pixels]] \n\t"
- "wldrd wr1, [%[pixels], #8] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wldrd wr2, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr3, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr8, wr0, wr1 \n\t"
- "wldrd wr4, [r4, #8] \n\t"
- "walignr1 wr9, wr1, wr2 \n\t"
- "wldrd wr5, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "wldrd wr0, [%[block]] \n\t"
- "pld [r4] \n\t"
- "wldrd wr1, [%[block], #8] \n\t"
- "pld [r4, #32] \n\t"
- "wldrd wr2, [r5] \n\t"
- "walignr1 wr10, wr3, wr4 \n\t"
- "wldrd wr3, [r5, #8] \n\t"
- WAVG2B" wr8, wr8, wr0 \n\t"
- WAVG2B" wr9, wr9, wr1 \n\t"
- WAVG2B" wr10, wr10, wr2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "walignr1 wr11, wr4, wr5 \n\t"
- WAVG2B" wr11, wr11, wr3 \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr10, [r5] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "wstrd wr11, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [block]"+r"(block), [pixels]"+r"(pixels), [line_size]"+r"(stride), [h]"+r"(h)
- :
- : "memory", "r4", "r5", "r12");
-}
-
-void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr2, [r5] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr15, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "walignr1 wr3, wr14, wr15 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr5, wr12 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "wmoveq wr7, wr15 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr5, wr11, wr12 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "walignr2ne wr7, wr14, wr15 \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr1, wr1, wr5 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wstrd wr1, [%[block], #8] \n\t"
- WAVG2B" wr3, wr3, wr7 \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr2, [r5] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr3, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "wldrd wr12, [r5] \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- WAVG2B" wr0, wr0, wr10 \n\t"
- WAVG2B" wr2, wr2, wr12 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr2, [r5] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "add r4, %[pixels], %[line_size]\n\t"
- "tmcr wcgr2, r12 \n\t"
- "add r5, %[block], %[line_size] \n\t"
- "mov %[line_size], %[line_size], lsl #1 \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "wldrd wr13, [r4] \n\t"
- "pld [%[pixels]] \n\t"
- "wldrd wr14, [r4, #8] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wldrd wr15, [r4, #16] \n\t"
- "add r4, r4, %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [r4] \n\t"
- "pld [r4, #32] \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "walignr1 wr2, wr13, wr14 \n\t"
- "walignr1 wr3, wr14, wr15 \n\t"
- "wmoveq wr4, wr11 \n\t"
- "wmoveq wr5, wr12 \n\t"
- "wmoveq wr6, wr14 \n\t"
- "wmoveq wr7, wr15 \n\t"
- "walignr2ne wr4, wr10, wr11 \n\t"
- "walignr2ne wr5, wr11, wr12 \n\t"
- "walignr2ne wr6, wr13, wr14 \n\t"
- "walignr2ne wr7, wr14, wr15 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr0, wr0, wr4 \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr1, wr1, wr5 \n\t"
- "wldrd wr12, [r5] \n\t"
- WAVG2B" wr2, wr2, wr6 \n\t"
- "wldrd wr13, [r5, #8] \n\t"
- WAVG2B" wr3, wr3, wr7 \n\t"
- WAVG2B" wr0, wr0, wr10 \n\t"
- WAVG2B" wr1, wr1, wr11 \n\t"
- WAVG2B" wr2, wr2, wr12 \n\t"
- WAVG2B" wr3, wr3, wr13 \n\t"
- "wstrd wr0, [%[block]] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr1, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wstrd wr2, [r5] \n\t"
- "pld [%[block]] \n\t"
- "wstrd wr3, [r5, #8] \n\t"
- "add r5, r5, %[line_size] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [r5] \n\t"
- "pld [r5, #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- :"r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "cc", "memory", "r12");
-}
-
-void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "walignr1 wr5, wr11, wr12 \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- int stride = line_size;
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "and r12, %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
-
- "1: \n\t"
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr4, wr10, wr11 \n\t"
- "walignr1 wr5, wr11, wr12 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- WAVG2B" wr9, wr9, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "wldrd wr10, [%[pixels]] \n\t"
- "wldrd wr11, [%[pixels], #8] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr12, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr0, wr10, wr11 \n\t"
- "walignr1 wr1, wr11, wr12 \n\t"
- "wldrd wr10, [%[block]] \n\t"
- "wldrd wr11, [%[block], #8] \n\t"
- WAVG2B" wr8, wr0, wr4 \n\t"
- WAVG2B" wr9, wr1, wr5 \n\t"
- WAVG2B" wr8, wr8, wr10 \n\t"
- WAVG2B" wr9, wr9, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block), [line_size]"+r"(stride)
- :
- : "r4", "r5", "r12", "memory");
-}
-
-void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "add r12, r12, #1 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "tmcr wcgr2, r12 \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "cmp r12, #8 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "subs %[h], %[h], #2 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- /* alignment */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "tmcr wcgr2, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr7, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr6, wr7 \n\t"
- "wunpckehub wr7, wr7 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr6, wr6, wr10 \n\t"
- "waddhus wr7, wr7, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- "subs %[h], %[h], #2 \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "add r12, r12, #1 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "tmcr wcgr2, r12 \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "cmp r12, #8 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "wldrd wr12, [%[pixels]] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr13, [%[pixels], #8] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "wmoveq wr10, wr13 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "subs %[h], %[h], #2 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
-
-void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
-{
- // [wr0 wr1 wr2 wr3] for previous line
- // [wr4 wr5 wr6 wr7] for current line
- SET_RND(wr15); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "pld [%[pixels]] \n\t"
- "mov r12, #2 \n\t"
- "pld [%[pixels], #32] \n\t"
- "tmcr wcgr0, r12 \n\t" /* for shift value */
- /* alignment */
- "and r12, %[pixels], #7 \n\t"
- "bic %[pixels], %[pixels], #7 \n\t"
- "tmcr wcgr1, r12 \n\t"
- "add r12, r12, #1 \n\t"
- "tmcr wcgr2, r12 \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "pld [%[pixels]] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
-
- "1: \n\t"
- // [wr0 wr1 wr2 wr3]
- // [wr4 wr5 wr6 wr7] <= *
- "wldrd wr12, [%[pixels]] \n\t"
- "cmp r12, #8 \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr6, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr7, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr4, wr6 \n\t"
- "wunpckehub wr5, wr6 \n\t"
- "wunpckelub wr6, wr7 \n\t"
- "wunpckehub wr7, wr7 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr4, wr4, wr8 \n\t"
- "waddhus wr5, wr5, wr9 \n\t"
- "waddhus wr6, wr6, wr10 \n\t"
- "waddhus wr7, wr7, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wldrd wr13, [%[block], #8] \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- WAVG2B" wr9, wr9, wr13 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
-
- // [wr0 wr1 wr2 wr3] <= *
- // [wr4 wr5 wr6 wr7]
- "wldrd wr12, [%[pixels]] \n\t"
- "pld [%[block]] \n\t"
- "wldrd wr13, [%[pixels], #8] \n\t"
- "pld [%[block], #32] \n\t"
- "wldrd wr14, [%[pixels], #16] \n\t"
- "add %[pixels], %[pixels], %[line_size] \n\t"
- "walignr1 wr2, wr12, wr13 \n\t"
- "pld [%[pixels]] \n\t"
- "pld [%[pixels], #32] \n\t"
- "walignr1 wr3, wr13, wr14 \n\t"
- "wmoveq wr10, wr13 \n\t"
- "wmoveq wr11, wr14 \n\t"
- "walignr2ne wr10, wr12, wr13 \n\t"
- "walignr2ne wr11, wr13, wr14 \n\t"
- "wunpckelub wr0, wr2 \n\t"
- "wunpckehub wr1, wr2 \n\t"
- "wunpckelub wr2, wr3 \n\t"
- "wunpckehub wr3, wr3 \n\t"
- "wunpckelub wr8, wr10 \n\t"
- "wunpckehub wr9, wr10 \n\t"
- "wunpckelub wr10, wr11 \n\t"
- "wunpckehub wr11, wr11 \n\t"
- "waddhus wr0, wr0, wr8 \n\t"
- "waddhus wr1, wr1, wr9 \n\t"
- "waddhus wr2, wr2, wr10 \n\t"
- "waddhus wr3, wr3, wr11 \n\t"
- "waddhus wr8, wr0, wr4 \n\t"
- "waddhus wr9, wr1, wr5 \n\t"
- "waddhus wr10, wr2, wr6 \n\t"
- "waddhus wr11, wr3, wr7 \n\t"
- "waddhus wr8, wr8, wr15 \n\t"
- "waddhus wr9, wr9, wr15 \n\t"
- "waddhus wr10, wr10, wr15 \n\t"
- "waddhus wr11, wr11, wr15 \n\t"
- "wsrlhg wr8, wr8, wcgr0 \n\t"
- "wsrlhg wr9, wr9, wcgr0 \n\t"
- "wldrd wr12, [%[block]] \n\t"
- "wldrd wr13, [%[block], #8] \n\t"
- "wsrlhg wr10, wr10, wcgr0 \n\t"
- "wsrlhg wr11, wr11, wcgr0 \n\t"
- "wpackhus wr8, wr8, wr9 \n\t"
- "wpackhus wr9, wr10, wr11 \n\t"
- WAVG2B" wr8, wr8, wr12 \n\t"
- WAVG2B" wr9, wr9, wr13 \n\t"
- "wstrd wr8, [%[block]] \n\t"
- "wstrd wr9, [%[block], #8] \n\t"
- "add %[block], %[block], %[line_size] \n\t"
- "subs %[h], %[h], #2 \n\t"
- "pld [%[block]] \n\t"
- "pld [%[block], #32] \n\t"
- "bne 1b \n\t"
- : [h]"+r"(h), [pixels]"+r"(pixels), [block]"+r"(block)
- : [line_size]"r"(line_size)
- : "r12", "memory");
-}
diff --git a/libavcodec/armv4l/dsputil_vfp.S b/libavcodec/armv4l/dsputil_vfp.S
deleted file mode 100644
index 291f2b5..0000000
--- a/libavcodec/armv4l/dsputil_vfp.S
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "config.h"
-#include "asm.S"
-
-/*
- * VFP is a floating point coprocessor used in some ARM cores. VFP11 has 1 cycle
- * throughput for almost all the instructions (except for double precision
- * arithmetics), but rather high latency. Latency is 4 cycles for loads and 8 cycles
- * for arithmetic operations. Scheduling code to avoid pipeline stalls is very
- * important for performance. One more interesting feature is that VFP has
- * independent load/store and arithmetics pipelines, so it is possible to make
- * them work simultaneously and get more than 1 operation per cycle. Load/store
- * pipeline can process 2 single precision floating point values per cycle and
- * supports bulk loads and stores for large sets of registers. Arithmetic operations
- * can be done on vectors, which allows to keep the arithmetics pipeline busy,
- * while the processor may issue and execute other instructions. Detailed
- * optimization manuals can be found at http://www.arm.com
- */
-
-/**
- * ARM VFP optimized implementation of 'vector_fmul_c' function.
- * Assume that len is a positive number and is multiple of 8
- */
-@ void ff_vector_fmul_vfp(float *dst, const float *src, int len)
-function ff_vector_fmul_vfp, export=1
- vpush {d8-d15}
- mov r3, r0
- fmrx r12, fpscr
- orr r12, r12, #(3 << 16) /* set vector size to 4 */
- fmxr fpscr, r12
-
- fldmias r3!, {s0-s3}
- fldmias r1!, {s8-s11}
- fldmias r3!, {s4-s7}
- fldmias r1!, {s12-s15}
- fmuls s8, s0, s8
-1:
- subs r2, r2, #16
- fmuls s12, s4, s12
- fldmiasge r3!, {s16-s19}
- fldmiasge r1!, {s24-s27}
- fldmiasge r3!, {s20-s23}
- fldmiasge r1!, {s28-s31}
- fmulsge s24, s16, s24
- fstmias r0!, {s8-s11}
- fstmias r0!, {s12-s15}
- fmulsge s28, s20, s28
- fldmiasgt r3!, {s0-s3}
- fldmiasgt r1!, {s8-s11}
- fldmiasgt r3!, {s4-s7}
- fldmiasgt r1!, {s12-s15}
- fmulsge s8, s0, s8
- fstmiasge r0!, {s24-s27}
- fstmiasge r0!, {s28-s31}
- bgt 1b
-
- bic r12, r12, #(7 << 16) /* set vector size back to 1 */
- fmxr fpscr, r12
- vpop {d8-d15}
- bx lr
- .endfunc
-
-/**
- * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function.
- * Assume that len is a positive number and is multiple of 8
- */
-@ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
-@ const float *src1, int len)
-function ff_vector_fmul_reverse_vfp, export=1
- vpush {d8-d15}
- add r2, r2, r3, lsl #2
- fldmdbs r2!, {s0-s3}
- fldmias r1!, {s8-s11}
- fldmdbs r2!, {s4-s7}
- fldmias r1!, {s12-s15}
- fmuls s8, s3, s8
- fmuls s9, s2, s9
- fmuls s10, s1, s10
- fmuls s11, s0, s11
-1:
- subs r3, r3, #16
- fldmdbsge r2!, {s16-s19}
- fmuls s12, s7, s12
- fldmiasge r1!, {s24-s27}
- fmuls s13, s6, s13
- fldmdbsge r2!, {s20-s23}
- fmuls s14, s5, s14
- fldmiasge r1!, {s28-s31}
- fmuls s15, s4, s15
- fmulsge s24, s19, s24
- fldmdbsgt r2!, {s0-s3}
- fmulsge s25, s18, s25
- fstmias r0!, {s8-s13}
- fmulsge s26, s17, s26
- fldmiasgt r1!, {s8-s11}
- fmulsge s27, s16, s27
- fmulsge s28, s23, s28
- fldmdbsgt r2!, {s4-s7}
- fmulsge s29, s22, s29
- fstmias r0!, {s14-s15}
- fmulsge s30, s21, s30
- fmulsge s31, s20, s31
- fmulsge s8, s3, s8
- fldmiasgt r1!, {s12-s15}
- fmulsge s9, s2, s9
- fmulsge s10, s1, s10
- fstmiasge r0!, {s24-s27}
- fmulsge s11, s0, s11
- fstmiasge r0!, {s28-s31}
- bgt 1b
-
- vpop {d8-d15}
- bx lr
- .endfunc
-
-#ifdef HAVE_ARMV6
-/**
- * ARM VFP optimized float to int16 conversion.
- * Assume that len is a positive number and is multiple of 8, destination
- * buffer is at least 4 bytes aligned (8 bytes alignment is better for
- * performance), little endian byte sex
- */
-@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
-function ff_float_to_int16_vfp, export=1
- push {r4-r8,lr}
- vpush {d8-d11}
- fldmias r1!, {s16-s23}
- ftosis s0, s16
- ftosis s1, s17
- ftosis s2, s18
- ftosis s3, s19
- ftosis s4, s20
- ftosis s5, s21
- ftosis s6, s22
- ftosis s7, s23
-1:
- subs r2, r2, #8
- fmrrs r3, r4, {s0, s1}
- fmrrs r5, r6, {s2, s3}
- fmrrs r7, r8, {s4, s5}
- fmrrs ip, lr, {s6, s7}
- fldmiasgt r1!, {s16-s23}
- ssat r4, #16, r4
- ssat r3, #16, r3
- ssat r6, #16, r6
- ssat r5, #16, r5
- pkhbt r3, r3, r4, lsl #16
- pkhbt r4, r5, r6, lsl #16
- ftosisgt s0, s16
- ftosisgt s1, s17
- ftosisgt s2, s18
- ftosisgt s3, s19
- ftosisgt s4, s20
- ftosisgt s5, s21
- ftosisgt s6, s22
- ftosisgt s7, s23
- ssat r8, #16, r8
- ssat r7, #16, r7
- ssat lr, #16, lr
- ssat ip, #16, ip
- pkhbt r5, r7, r8, lsl #16
- pkhbt r6, ip, lr, lsl #16
- stmia r0!, {r3-r6}
- bgt 1b
-
- vpop {d8-d11}
- pop {r4-r8,pc}
- .endfunc
-#endif
diff --git a/libavcodec/armv4l/float_arm_vfp.c b/libavcodec/armv4l/float_arm_vfp.c
deleted file mode 100644
index fb827b3..0000000
--- a/libavcodec/armv4l/float_arm_vfp.c
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2008 Siarhei Siamashka <ssvb at users.sourceforge.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/dsputil.h"
-
-extern void ff_vector_fmul_vfp(float *dst, const float *src, int len);
-extern void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
- const float *src1, int len);
-extern void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);
-
-void ff_float_init_arm_vfp(DSPContext* c, AVCodecContext *avctx)
-{
- c->vector_fmul = ff_vector_fmul_vfp;
- c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
-#ifdef HAVE_ARMV6
- c->float_to_int16 = ff_float_to_int16_vfp;
-#endif
-}
diff --git a/libavcodec/armv4l/mathops.h b/libavcodec/armv4l/mathops.h
deleted file mode 100644
index 37a0d3f..0000000
--- a/libavcodec/armv4l/mathops.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * simple math operations
- * Copyright (c) 2006 Michael Niedermayer <michaelni at gmx.at> et al
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_ARMV4L_MATHOPS_H
-#define AVCODEC_ARMV4L_MATHOPS_H
-
-#include <stdint.h>
-#include "libavutil/common.h"
-
-#ifdef FRAC_BITS
-# define MULL MULL
-static inline av_const int MULL(int a, int b)
-{
- int lo, hi;
- __asm__("smull %0, %1, %2, %3 \n\t"
- "mov %0, %0, lsr %4 \n\t"
- "add %1, %0, %1, lsl %5 \n\t"
- : "=&r"(lo), "=&r"(hi)
- : "r"(b), "r"(a), "i"(FRAC_BITS), "i"(32-FRAC_BITS));
- return hi;
-}
-#endif
-
-#define MULH MULH
-#ifdef HAVE_ARMV6
-static inline av_const int MULH(int a, int b)
-{
- int r;
- __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
- return r;
-}
-#else
-static inline av_const int MULH(int a, int b)
-{
- int lo, hi;
- __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));
- return hi;
-}
-#endif
-
-static inline av_const int64_t MUL64(int a, int b)
-{
- union { uint64_t x; unsigned hl[2]; } x;
- __asm__ ("smull %0, %1, %2, %3"
- : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
- return x.x;
-}
-#define MUL64 MUL64
-
-static inline av_const int64_t MAC64(int64_t d, int a, int b)
-{
- union { uint64_t x; unsigned hl[2]; } x = { d };
- __asm__ ("smlal %0, %1, %2, %3"
- : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
- return x.x;
-}
-#define MAC64(d, a, b) ((d) = MAC64(d, a, b))
-#define MLS64(d, a, b) MAC64(d, -(a), b)
-
-#if defined(HAVE_ARMV5TE)
-
-/* signed 16x16 -> 32 multiply add accumulate */
-# define MAC16(rt, ra, rb) \
- __asm__ ("smlabb %0, %1, %2, %0" : "+r"(rt) : "r"(ra), "r"(rb));
-
-/* signed 16x16 -> 32 multiply */
-# define MUL16 MUL16
-static inline av_const MUL16(int ra, int rb)
-{
- int rt;
- __asm__ ("smulbb %0, %1, %2" : "=r"(rt) : "r"(ra), "r"(rb));
- return rt;
-}
-
-#endif
-
-#endif /* AVCODEC_ARMV4L_MATHOPS_H */
diff --git a/libavcodec/armv4l/mpegvideo_arm.c b/libavcodec/armv4l/mpegvideo_arm.c
deleted file mode 100644
index 1a11d7a..0000000
--- a/libavcodec/armv4l/mpegvideo_arm.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Copyright (c) 2002 Michael Niedermayer
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/avcodec.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/mpegvideo.h"
-
-extern void MPV_common_init_iwmmxt(MpegEncContext *s);
-extern void MPV_common_init_armv5te(MpegEncContext *s);
-
-void MPV_common_init_armv4l(MpegEncContext *s)
-{
- /* IWMMXT support is a superset of armv5te, so
- * allow optimized functions for armv5te unless
- * a better iwmmxt function exists
- */
-#ifdef HAVE_ARMV5TE
- MPV_common_init_armv5te(s);
-#endif
-#ifdef HAVE_IWMMXT
- MPV_common_init_iwmmxt(s);
-#endif
-}
diff --git a/libavcodec/armv4l/mpegvideo_armv5te.c b/libavcodec/armv4l/mpegvideo_armv5te.c
deleted file mode 100644
index 0ecadb4..0000000
--- a/libavcodec/armv4l/mpegvideo_armv5te.c
+++ /dev/null
@@ -1,207 +0,0 @@
-/*
- * Optimization of some functions from mpegvideo.c for armv5te
- * Copyright (c) 2007 Siarhei Siamashka <ssvb at users.sourceforge.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/avcodec.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/mpegvideo.h"
-
-
-#ifdef ENABLE_ARM_TESTS
-/**
- * h263 dequantizer supplementary function, it is performance critical and needs to
- * have optimized implementations for each architecture. Is also used as a reference
- * implementation in regression tests
- */
-static inline void dct_unquantize_h263_helper_c(DCTELEM *block, int qmul, int qadd, int count)
-{
- int i, level;
- for (i = 0; i < count; i++) {
- level = block[i];
- if (level) {
- if (level < 0) {
- level = level * qmul - qadd;
- } else {
- level = level * qmul + qadd;
- }
- block[i] = level;
- }
- }
-}
-#endif
-
-/* GCC 3.1 or higher is required to support symbolic names in assembly code */
-#if (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1))
-
-/**
- * Special optimized version of dct_unquantize_h263_helper_c, it requires the block
- * to be at least 8 bytes aligned, and may process more elements than requested.
- * But it is guaranteed to never process more than 64 elements provided that
- * xxcount argument is <= 64, so it is safe. This macro is optimized for a common
- * distribution of values for nCoeffs (they are mostly multiple of 8 plus one or
- * two extra elements). So this macro processes data as 8 elements per loop iteration
- * and contains optional 2 elements processing in the end.
- *
- * Inner loop should take 6 cycles per element on arm926ej-s (Nokia 770)
- */
-#define dct_unquantize_h263_special_helper_armv5te(xxblock, xxqmul, xxqadd, xxcount) \
-({ DCTELEM *xblock = xxblock; \
- int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
- int xdata1, xdata2; \
-__asm__ volatile( \
- "subs %[count], %[count], #2 \n\t" \
- "ble 2f \n\t" \
- "ldrd r4, [%[block], #0] \n\t" \
- "1: \n\t" \
- "ldrd r6, [%[block], #8] \n\t" \
-\
- "rsbs %[data1], %[zero], r4, asr #16 \n\t" \
- "addgt %[data1], %[qadd], #0 \n\t" \
- "rsblt %[data1], %[qadd], #0 \n\t" \
- "smlatbne %[data1], r4, %[qmul], %[data1] \n\t" \
-\
- "rsbs %[data2], %[zero], r5, asr #16 \n\t" \
- "addgt %[data2], %[qadd], #0 \n\t" \
- "rsblt %[data2], %[qadd], #0 \n\t" \
- "smlatbne %[data2], r5, %[qmul], %[data2] \n\t" \
-\
- "rsbs %[tmp], %[zero], r4, asl #16 \n\t" \
- "addgt %[tmp], %[qadd], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne r4, r4, %[qmul], %[tmp] \n\t" \
-\
- "rsbs %[tmp], %[zero], r5, asl #16 \n\t" \
- "addgt %[tmp], %[qadd], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne r5, r5, %[qmul], %[tmp] \n\t" \
-\
- "strh r4, [%[block]], #2 \n\t" \
- "strh %[data1], [%[block]], #2 \n\t" \
- "strh r5, [%[block]], #2 \n\t" \
- "strh %[data2], [%[block]], #2 \n\t" \
-\
- "rsbs %[data1], %[zero], r6, asr #16 \n\t" \
- "addgt %[data1], %[qadd], #0 \n\t" \
- "rsblt %[data1], %[qadd], #0 \n\t" \
- "smlatbne %[data1], r6, %[qmul], %[data1] \n\t" \
-\
- "rsbs %[data2], %[zero], r7, asr #16 \n\t" \
- "addgt %[data2], %[qadd], #0 \n\t" \
- "rsblt %[data2], %[qadd], #0 \n\t" \
- "smlatbne %[data2], r7, %[qmul], %[data2] \n\t" \
-\
- "rsbs %[tmp], %[zero], r6, asl #16 \n\t" \
- "addgt %[tmp], %[qadd], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne r6, r6, %[qmul], %[tmp] \n\t" \
-\
- "rsbs %[tmp], %[zero], r7, asl #16 \n\t" \
- "addgt %[tmp], %[qadd], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne r7, r7, %[qmul], %[tmp] \n\t" \
-\
- "strh r6, [%[block]], #2 \n\t" \
- "strh %[data1], [%[block]], #2 \n\t" \
- "strh r7, [%[block]], #2 \n\t" \
- "strh %[data2], [%[block]], #2 \n\t" \
-\
- "subs %[count], %[count], #8 \n\t" \
- "ldrgtd r4, [%[block], #0] \n\t" /* load data early to avoid load/use pipeline stall */ \
- "bgt 1b \n\t" \
-\
- "adds %[count], %[count], #2 \n\t" \
- "ble 3f \n\t" \
- "2: \n\t" \
- "ldrsh %[data1], [%[block], #0] \n\t" \
- "ldrsh %[data2], [%[block], #2] \n\t" \
- "mov %[tmp], %[qadd] \n\t" \
- "cmp %[data1], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne %[data1], %[data1], %[qmul], %[tmp] \n\t" \
- "mov %[tmp], %[qadd] \n\t" \
- "cmp %[data2], #0 \n\t" \
- "rsblt %[tmp], %[qadd], #0 \n\t" \
- "smlabbne %[data2], %[data2], %[qmul], %[tmp] \n\t" \
- "strh %[data1], [%[block]], #2 \n\t" \
- "strh %[data2], [%[block]], #2 \n\t" \
- "3: \n\t" \
- : [block] "+&r" (xblock), [count] "+&r" (xcount), [tmp] "=&r" (xtmp), \
- [data1] "=&r" (xdata1), [data2] "=&r" (xdata2) \
- : [qmul] "r" (xqmul), [qadd] "r" (xqadd), [zero] "r" (0) \
- : "r4", "r5", "r6", "r7", "cc", "memory" \
-); \
-})
-
-static void dct_unquantize_h263_intra_armv5te(MpegEncContext *s,
- DCTELEM *block, int n, int qscale)
-{
- int level, qmul, qadd;
- int nCoeffs;
-
- assert(s->block_last_index[n]>=0);
-
- qmul = qscale << 1;
-
- if (!s->h263_aic) {
- if (n < 4)
- level = block[0] * s->y_dc_scale;
- else
- level = block[0] * s->c_dc_scale;
- qadd = (qscale - 1) | 1;
- }else{
- qadd = 0;
- level = block[0];
- }
- if(s->ac_pred)
- nCoeffs=63;
- else
- nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
- dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
- block[0] = level;
-}
-
-static void dct_unquantize_h263_inter_armv5te(MpegEncContext *s,
- DCTELEM *block, int n, int qscale)
-{
- int qmul, qadd;
- int nCoeffs;
-
- assert(s->block_last_index[n]>=0);
-
- qadd = (qscale - 1) | 1;
- qmul = qscale << 1;
-
- nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
-
- dct_unquantize_h263_special_helper_armv5te(block, qmul, qadd, nCoeffs + 1);
-}
-
-#define HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
-
-#endif
-
-void MPV_common_init_armv5te(MpegEncContext *s)
-{
-#ifdef HAVE_DCT_UNQUANTIZE_H263_ARMV5TE_OPTIMIZED
- s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_armv5te;
- s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_armv5te;
-#endif
-}
diff --git a/libavcodec/audioconvert.c b/libavcodec/audioconvert.c
index 8e09e4c..8c6a6b8 100644
--- a/libavcodec/audioconvert.c
+++ b/libavcodec/audioconvert.c
@@ -104,7 +104,7 @@ static const struct {
const char *name;
int nb_channels;
int64_t layout;
-} const channel_layout_map[] = {
+} channel_layout_map[] = {
{ "mono", 1, CH_LAYOUT_MONO },
{ "stereo", 2, CH_LAYOUT_STEREO },
{ "surround", 3, CH_LAYOUT_SURROUND },
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 7d0cf0e..3d3e244 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -30,7 +30,7 @@
#include "libavutil/avutil.h"
#define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 3
+#define LIBAVCODEC_VERSION_MINOR 10
#define LIBAVCODEC_VERSION_MICRO 0
#define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
@@ -191,6 +191,9 @@ enum CodecID {
CODEC_ID_TGV,
CODEC_ID_TGQ,
+ /* "codecs" for HW decoding with VDPAU */
+ CODEC_ID_H264_VDPAU= 0x9000,
+
/* various PCM "codecs" */
CODEC_ID_PCM_S16LE= 0x10000,
CODEC_ID_PCM_S16BE,
@@ -400,6 +403,7 @@ enum SampleFormat {
*/
#define FF_MIN_BUFFER_SIZE 16384
+
/**
* motion estimation type.
*/
@@ -526,6 +530,10 @@ typedef struct RcOverride{
* This can be used to prevent truncation of the last audio samples.
*/
#define CODEC_CAP_SMALL_LAST_FRAME 0x0040
+/**
+ * Codec can export data for HW decoding (VDPAU).
+ */
+#define CODEC_CAP_HWACCEL_VDPAU 0x0080
//The following defines may change, don't expect compatibility if you use them.
#define MB_TYPE_INTRA4x4 0x0001
@@ -1390,6 +1398,7 @@ typedef struct AVCodecContext {
#define FF_IDCT_WMV2 19
#define FF_IDCT_FAAN 20
#define FF_IDCT_EA 21
+#define FF_IDCT_SIMPLENEON 22
/**
* slice count
@@ -2081,7 +2090,7 @@ typedef struct AVCodecContext {
/**
* number of reference frames
* - encoding: Set by user.
- * - decoding: unused
+ * - decoding: Set by lavc.
*/
int refs;
@@ -2283,6 +2292,20 @@ typedef struct AVCodecContext {
* - decoding: Set by user.
*/
int64_t request_channel_layout;
+
+ /**
+ * Ratecontrol attempt to use, at maximum, <value> of what can be used without an underflow.
+ * - encoding: Set by user.
+ * - decoding: unused.
+ */
+ float rc_max_available_vbv_use;
+
+ /**
+ * Ratecontrol attempt to use, at least, <value> times the amount needed to prevent a vbv overflow.
+ * - encoding: Set by user.
+ * - decoding: unused.
+ */
+ float rc_min_vbv_overflow_use;
} AVCodecContext;
/**
@@ -2360,23 +2383,54 @@ typedef struct AVPaletteControl {
} AVPaletteControl attribute_deprecated;
+enum AVSubtitleType {
+ SUBTITLE_NONE,
+
+ SUBTITLE_BITMAP, ///< A bitmap, pict will be set
+
+ /**
+ * Plain text, the text field must be set by the decoder and is
+ * authoritative. ass and pict fields may contain approximations.
+ */
+ SUBTITLE_TEXT,
+
+ /**
+ * Formatted text, the ass field must be set by the decoder and is
+ * authoritative. pict and text fields may contain approximations.
+ */
+ SUBTITLE_ASS,
+};
+
typedef struct AVSubtitleRect {
- uint16_t x;
- uint16_t y;
- uint16_t w;
- uint16_t h;
- uint16_t nb_colors;
- int linesize;
- uint32_t *rgba_palette;
- uint8_t *bitmap;
+ int x; ///< top left corner of pict, undefined when pict is not set
+ int y; ///< top left corner of pict, undefined when pict is not set
+ int w; ///< width of pict, undefined when pict is not set
+ int h; ///< height of pict, undefined when pict is not set
+ int nb_colors; ///< number of colors in pict, undefined when pict is not set
+
+ /**
+ * data+linesize for the bitmap of this subtitle.
+ * can be set for text/ass as well once they where rendered
+ */
+ AVPicture pict;
+ enum AVSubtitleType type;
+
+ char *text; ///< 0 terminated plain UTF-8 text
+
+ /**
+ * 0 terminated ASS/SSA compatible event line.
+ * The pressentation of this is unaffected by the other values in this
+ * struct.
+ */
+ char *ass;
} AVSubtitleRect;
typedef struct AVSubtitle {
uint16_t format; /* 0 = graphics */
uint32_t start_display_time; /* relative to packet pts, in ms */
uint32_t end_display_time; /* relative to packet pts, in ms */
- uint32_t num_rects;
- AVSubtitleRect *rects;
+ unsigned num_rects;
+ AVSubtitleRect **rects;
} AVSubtitle;
@@ -2549,7 +2603,12 @@ unsigned avcodec_version(void);
*/
void avcodec_init(void);
-void register_avcodec(AVCodec *format);
+/**
+ * Register the codec \p codec and initialize libavcodec.
+ *
+ * @see avcodec_init()
+ */
+void register_avcodec(AVCodec *codec);
/**
* Finds a registered encoder with a matching codec ID.
@@ -2738,6 +2797,9 @@ int avcodec_decode_audio2(AVCodecContext *avctx, int16_t *samples,
* the linesize is not a multiple of 16 then there's no sense in aligning the
* start of the buffer to 16.
*
+ * @note Some codecs have a delay between input and output, these need to be
+ * feeded with buf=NULL, buf_size=0 at the end to return the remaining frames.
+ *
* @param avctx the codec context
* @param[out] picture The AVFrame in which the decoded video frame will be stored.
* @param[in] buf the input buffer
@@ -2805,6 +2867,16 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
int avcodec_close(AVCodecContext *avctx);
+/**
+ * Register all the codecs, parsers and bitstream filters which were enabled at
+ * configuration time. If you do not call this function you can select exactly
+ * which formats you want to support, by using the individual registration
+ * functions.
+ *
+ * @see register_avcodec
+ * @see av_register_codec_parser
+ * @see av_register_bitstream_filter
+ */
void avcodec_register_all(void);
/**
@@ -2958,7 +3030,7 @@ int av_picture_crop(AVPicture *dst, const AVPicture *src,
int av_picture_pad(AVPicture *dst, const AVPicture *src, int height, int width, int pix_fmt,
int padtop, int padbottom, int padleft, int padright, int *color);
-extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
+unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
/**
* Parses \p str and put in \p width_ptr and \p height_ptr the detected values.
@@ -2984,18 +3056,6 @@ int av_parse_video_frame_size(int *width_ptr, int *height_ptr, const char *str);
*/
int av_parse_video_frame_rate(AVRational *frame_rate, const char *str);
-/**
- * Logs a generic warning message about a missing feature.
- * @param[in] avc a pointer to an arbitrary struct of which the first field is
- * a pointer to an AVClass struct
- * @param[in] feature string containing the name of the missing feature
- * @param[in] want_sample indicates if samples are wanted which exhibit this feature.
- * If \p want_sample is non-zero, additional verbage will be added to the log
- * message which tells the user how to report samples to the development
- * mailing list.
- */
-void av_log_missing_feature(void *avc, const char *feature, int want_sample);
-
/* error handling */
#if EINVAL > 0
#define AVERROR(e) (-(e)) /**< Returns a negative error code from a POSIX error code, to return from library functions. */
diff --git a/libavcodec/avs.c b/libavcodec/avs.c
index c60fe63..3b29c85 100644
--- a/libavcodec/avs.c
+++ b/libavcodec/avs.c
@@ -25,35 +25,35 @@
typedef struct {
AVFrame picture;
-} avs_context_t;
+} AvsContext;
typedef enum {
AVS_VIDEO = 0x01,
AVS_AUDIO = 0x02,
AVS_PALETTE = 0x03,
AVS_GAME_DATA = 0x04,
-} avs_block_type_t;
+} AvsBlockType;
typedef enum {
AVS_I_FRAME = 0x00,
AVS_P_FRAME_3X3 = 0x01,
AVS_P_FRAME_2X2 = 0x02,
AVS_P_FRAME_2X3 = 0x03,
-} avs_video_sub_type_t;
+} AvsVideoSubType;
static int
avs_decode_frame(AVCodecContext * avctx,
void *data, int *data_size, const uint8_t * buf, int buf_size)
{
- avs_context_t *const avs = avctx->priv_data;
+ AvsContext *const avs = avctx->priv_data;
AVFrame *picture = data;
AVFrame *const p = (AVFrame *) & avs->picture;
const uint8_t *table, *vect;
uint8_t *out;
int i, j, x, y, stride, vect_w = 3, vect_h = 3;
- int sub_type;
- avs_block_type_t type;
+ AvsVideoSubType sub_type;
+ AvsBlockType type;
GetBitContext change_map;
if (avctx->reget_buffer(avctx, p)) {
@@ -152,7 +152,7 @@ AVCodec avs_decoder = {
"avs",
CODEC_TYPE_VIDEO,
CODEC_ID_AVS,
- sizeof(avs_context_t),
+ sizeof(AvsContext),
avs_decode_init,
NULL,
NULL,
diff --git a/libavcodec/bfin/dsputil_bfin.c b/libavcodec/bfin/dsputil_bfin.c
index ccdb19c..1a8ae3b 100644
--- a/libavcodec/bfin/dsputil_bfin.c
+++ b/libavcodec/bfin/dsputil_bfin.c
@@ -29,35 +29,35 @@
int off;
-extern void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
-extern void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
-extern void ff_bfin_vp3_idct (DCTELEM *block);
-extern void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block);
-extern void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, DCTELEM *block);
-extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
-extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
-extern void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) attribute_l1_text;
-extern void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) attribute_l1_text;
-extern int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) attribute_l1_text;
-extern int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
-extern int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
-
-extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
-
-
-extern int ff_bfin_pix_sum (uint8_t *p, int stride) attribute_l1_text;
-
-extern void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
-extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
-
-extern int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
-extern int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
-extern int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
+void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
+void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
+void ff_bfin_vp3_idct (DCTELEM *block);
+void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block);
+void ff_bfin_vp3_idct_add (uint8_t *dest, int line_size, DCTELEM *block);
+void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
+void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
+void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) attribute_l1_text;
+void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) attribute_l1_text;
+int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) attribute_l1_text;
+int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
+int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) attribute_l1_text;
+
+void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
+void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) attribute_l1_text;
+void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
+void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) attribute_l1_text;
+
+
+int ff_bfin_pix_sum (uint8_t *p, int stride) attribute_l1_text;
+
+void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
+void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) attribute_l1_text;
+void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
+void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) attribute_l1_text;
+
+int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
+int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
+int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) attribute_l1_text;
static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
diff --git a/libavcodec/bfin/mpegvideo_bfin.c b/libavcodec/bfin/mpegvideo_bfin.c
index 05743e7..d1c33a1 100644
--- a/libavcodec/bfin/mpegvideo_bfin.c
+++ b/libavcodec/bfin/mpegvideo_bfin.c
@@ -26,7 +26,7 @@
#include "dsputil_bfin.h"
-extern void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
+void ff_bfin_fdct (DCTELEM *block) attribute_l1_text;
static int dct_quantize_bfin (MpegEncContext *s,
diff --git a/libavcodec/bfin/vp3_bfin.c b/libavcodec/bfin/vp3_bfin.c
index 62990f6..1906453 100644
--- a/libavcodec/bfin/vp3_bfin.c
+++ b/libavcodec/bfin/vp3_bfin.c
@@ -22,10 +22,10 @@
#include "libavcodec/dsputil.h"
#include "dsputil_bfin.h"
-extern void ff_bfin_vp3_idct (DCTELEM *block) attribute_l1_text;
-extern void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
-extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
-extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
+void ff_bfin_vp3_idct (DCTELEM *block) attribute_l1_text;
+void ff_bfin_idct (DCTELEM *block) attribute_l1_text;
+void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
+void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) attribute_l1_text;
/* Intra iDCT offset 128 */
void ff_bfin_vp3_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
diff --git a/libavcodec/bitstream.h b/libavcodec/bitstream.h
index 5a5db5c..23bc34d 100644
--- a/libavcodec/bitstream.h
+++ b/libavcodec/bitstream.h
@@ -41,7 +41,7 @@
//#define ALT_BITSTREAM_WRITER
//#define ALIGNED_BITSTREAM_WRITER
#if !defined(LIBMPEG2_BITSTREAM_READER) && !defined(A32_BITSTREAM_READER) && !defined(ALT_BITSTREAM_READER)
-# ifdef ARCH_ARMV4L
+# ifdef ARCH_ARM
# define A32_BITSTREAM_READER
# else
# define ALT_BITSTREAM_READER
@@ -179,10 +179,6 @@ typedef struct RL_VLC_ELEM {
uint8_t run;
} RL_VLC_ELEM;
-#if defined(ARCH_SPARC) || defined(ARCH_ARMV4L) || defined(ARCH_MIPS) || defined(ARCH_BFIN)
-#define UNALIGNED_STORES_ARE_BAD
-#endif
-
#ifndef ALT_BITSTREAM_WRITER
static inline void put_bits(PutBitContext *s, int n, unsigned int value)
{
@@ -200,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
#ifdef BITSTREAM_WRITER_LE
bit_buf |= value << (32 - bit_left);
if (n >= bit_left) {
-#ifdef UNALIGNED_STORES_ARE_BAD
+#ifndef HAVE_FAST_UNALIGNED
if (3 & (intptr_t) s->buf_ptr) {
s->buf_ptr[0] = bit_buf ;
s->buf_ptr[1] = bit_buf >> 8;
@@ -221,7 +217,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
} else {
bit_buf<<=bit_left;
bit_buf |= value >> (n - bit_left);
-#ifdef UNALIGNED_STORES_ARE_BAD
+#ifndef HAVE_FAST_UNALIGNED
if (3 & (intptr_t) s->buf_ptr) {
s->buf_ptr[0] = bit_buf >> 24;
s->buf_ptr[1] = bit_buf >> 16;
diff --git a/libavcodec/bmp.c b/libavcodec/bmp.c
index ca2e201..9a20cd6 100644
--- a/libavcodec/bmp.c
+++ b/libavcodec/bmp.c
@@ -73,25 +73,35 @@ static int bmp_decode_frame(AVCodecContext *avctx,
buf += 2; /* reserved2 */
hsize = bytestream_get_le32(&buf); /* header size */
- if(fsize <= hsize){
- av_log(avctx, AV_LOG_ERROR, "not enough data (%d < %d)\n",
- fsize, hsize);
- return -1;
- }
-
ihsize = bytestream_get_le32(&buf); /* more header size */
if(ihsize + 14 > hsize){
av_log(avctx, AV_LOG_ERROR, "invalid header size %d\n", hsize);
return -1;
}
- if (ihsize == 40) {
+ /* sometimes file size is set to some headers size, set a real size in that case */
+ if(fsize == 14 || fsize == ihsize + 14)
+ fsize = buf_size - 2;
+
+ if(fsize <= hsize){
+ av_log(avctx, AV_LOG_ERROR, "declared file size is less than header size (%d < %d)\n",
+ fsize, hsize);
+ return -1;
+ }
+
+ switch(ihsize){
+ case 40: // windib v3
+ case 64: // OS/2 v2
+ case 108: // windib v4
+ case 124: // windib v5
width = bytestream_get_le32(&buf);
height = bytestream_get_le32(&buf);
- } else if (ihsize == 12) {
+ break;
+ case 12: // OS/2 v1
width = bytestream_get_le16(&buf);
height = bytestream_get_le16(&buf);
- } else {
+ break;
+ default:
av_log(avctx, AV_LOG_ERROR, "unsupported BMP file, patch welcome\n");
return -1;
}
diff --git a/libavcodec/cavs.h b/libavcodec/cavs.h
index c684acf..2a1a90e 100644
--- a/libavcodec/cavs.h
+++ b/libavcodec/cavs.h
@@ -144,13 +144,13 @@ DECLARE_ALIGNED_8(typedef, struct) {
int16_t ref;
} vector_t;
-typedef struct dec_2dvlc_t {
+struct dec_2dvlc {
int8_t rltab[59][3];
int8_t level_add[27];
int8_t golomb_order;
int inc_limit;
int8_t max_run;
-} dec_2dvlc_t;
+};
typedef struct {
MpegEncContext s;
@@ -226,9 +226,9 @@ typedef struct {
extern const uint8_t ff_cavs_dequant_shift[64];
extern const uint16_t ff_cavs_dequant_mul[64];
-extern const dec_2dvlc_t ff_cavs_intra_dec[7];
-extern const dec_2dvlc_t ff_cavs_inter_dec[7];
-extern const dec_2dvlc_t ff_cavs_chroma_dec[5];
+extern const struct dec_2dvlc ff_cavs_intra_dec[7];
+extern const struct dec_2dvlc ff_cavs_inter_dec[7];
+extern const struct dec_2dvlc ff_cavs_chroma_dec[5];
extern const uint8_t ff_cavs_chroma_qp[64];
extern const uint8_t ff_cavs_scan3x3[4];
extern const uint8_t ff_cavs_partition_flags[30];
diff --git a/libavcodec/cavsdata.h b/libavcodec/cavsdata.h
index 94b620b..6d80ac1 100644
--- a/libavcodec/cavsdata.h
+++ b/libavcodec/cavsdata.h
@@ -101,7 +101,7 @@ const vector_t ff_cavs_intra_mv = {0,0,1,REF_INTRA};
#define EOB 0,0,0
-const dec_2dvlc_t ff_cavs_intra_dec[7] = {
+const struct dec_2dvlc ff_cavs_intra_dec[7] = {
{
{ //level / run / table_inc
{ 1, 1, 1},{ -1, 1, 1},{ 1, 2, 1},{ -1, 2, 1},{ 1, 3, 1},{ -1, 3, 1},
@@ -238,7 +238,7 @@ const dec_2dvlc_t ff_cavs_intra_dec[7] = {
}
};
-const dec_2dvlc_t ff_cavs_inter_dec[7] = {
+const struct dec_2dvlc ff_cavs_inter_dec[7] = {
{
{ //level / run
{ 1, 1, 1},{ -1, 1, 1},{ 1, 2, 1},{ -1, 2, 1},{ 1, 3, 1},{ -1, 3, 1},
@@ -375,7 +375,7 @@ const dec_2dvlc_t ff_cavs_inter_dec[7] = {
}
};
-const dec_2dvlc_t ff_cavs_chroma_dec[5] = {
+const struct dec_2dvlc ff_cavs_chroma_dec[5] = {
{
{ //level / run
{ 1, 1, 1},{ -1, 1, 1},{ 1, 2, 1},{ -1, 2, 1},{ 1, 3, 1},{ -1, 3, 1},
diff --git a/libavcodec/cavsdec.c b/libavcodec/cavsdec.c
index a978fe1..18dcb57 100644
--- a/libavcodec/cavsdec.c
+++ b/libavcodec/cavsdec.c
@@ -113,7 +113,7 @@ static inline int get_ue_code(GetBitContext *gb, int order) {
* @param stride line stride in frame buffer
*/
static int decode_residual_block(AVSContext *h, GetBitContext *gb,
- const dec_2dvlc_t *r, int esc_golomb_order,
+ const struct dec_2dvlc *r, int esc_golomb_order,
int qp, uint8_t *dst, int stride) {
int i, level_code, esc_code, level, run, mask;
DCTELEM level_buf[65];
diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h
index ce0726f..7cf7861 100644
--- a/libavcodec/celp_math.h
+++ b/libavcodec/celp_math.h
@@ -51,26 +51,6 @@ int ff_exp2(uint16_t power);
int ff_log2(uint32_t value);
/**
- * returns the dot product.
- * @param a input data array
- * @param b input data array
- * @param length number of elements
- * @param shift right shift by this value will be done after multiplication
- *
- * @return dot product = sum of elementwise products
- */
-static int dot_product(const int16_t* a, const int16_t* b, int length, int shift)
-{
- int sum = 0;
- int i;
-
- for(i=0; i<length; i++)
- sum += (a[i] * b[i]) >> shift;
-
- return sum;
-}
-
-/**
* Shift value left or right depending on sign of offset parameter.
* @param value value to shift
* @param offset shift offset
@@ -91,6 +71,6 @@ static inline int bidir_sal(int value, int offset)
*
* @return dot product = sum of elementwise products
*/
-extern float ff_dot_productf(const float* a, const float* b, int length);
+float ff_dot_productf(const float* a, const float* b, int length);
#endif /* AVCODEC_CELP_MATH_H */
diff --git a/libavcodec/cinepak.c b/libavcodec/cinepak.c
index 77e5ff1..8c2efba 100644
--- a/libavcodec/cinepak.c
+++ b/libavcodec/cinepak.c
@@ -40,7 +40,7 @@
typedef struct {
uint8_t y0, y1, y2, y3;
uint8_t u, v;
-} cvid_codebook_t;
+} cvid_codebook;
#define MAX_STRIPS 32
@@ -48,9 +48,9 @@ typedef struct {
uint16_t id;
uint16_t x1, y1;
uint16_t x2, y2;
- cvid_codebook_t v4_codebook[256];
- cvid_codebook_t v1_codebook[256];
-} cvid_strip_t;
+ cvid_codebook v4_codebook[256];
+ cvid_codebook v1_codebook[256];
+} cvid_strip;
typedef struct CinepakContext {
@@ -63,13 +63,13 @@ typedef struct CinepakContext {
int width, height;
int palette_video;
- cvid_strip_t strips[MAX_STRIPS];
+ cvid_strip strips[MAX_STRIPS];
int sega_film_skip_bytes;
} CinepakContext;
-static void cinepak_decode_codebook (cvid_codebook_t *codebook,
+static void cinepak_decode_codebook (cvid_codebook *codebook,
int chunk_id, int size, const uint8_t *data)
{
const uint8_t *eod = (data + size);
@@ -118,12 +118,12 @@ static void cinepak_decode_codebook (cvid_codebook_t *codebook,
}
}
-static int cinepak_decode_vectors (CinepakContext *s, cvid_strip_t *strip,
+static int cinepak_decode_vectors (CinepakContext *s, cvid_strip *strip,
int chunk_id, int size, const uint8_t *data)
{
const uint8_t *eod = (data + size);
uint32_t flag, mask;
- cvid_codebook_t *codebook;
+ cvid_codebook *codebook;
unsigned int x, y;
uint32_t iy[4];
uint32_t iu[2];
@@ -262,7 +262,7 @@ static int cinepak_decode_vectors (CinepakContext *s, cvid_strip_t *strip,
}
static int cinepak_decode_strip (CinepakContext *s,
- cvid_strip_t *strip, const uint8_t *data, int size)
+ cvid_strip *strip, const uint8_t *data, int size)
{
const uint8_t *eod = (data + size);
int chunk_id, chunk_size;
diff --git a/libavcodec/cook.c b/libavcodec/cook.c
index ba9f30f..1affe03 100644
--- a/libavcodec/cook.c
+++ b/libavcodec/cook.c
@@ -1177,6 +1177,7 @@ static int cook_decode_init(AVCodecContext *avctx)
}
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
#ifdef COOKDEBUG
dump_cook_context(q);
diff --git a/libavcodec/dca.c b/libavcodec/dca.c
index b4f5897..dbd1152 100644
--- a/libavcodec/dca.c
+++ b/libavcodec/dca.c
@@ -88,6 +88,48 @@ static const int64_t dca_core_channel_layout[] = {
CH_FRONT_LEFT_OF_CENTER|CH_FRONT_CENTER|CH_FRONT_RIGHT_OF_CENTER|CH_LAYOUT_STEREO|CH_SIDE_LEFT|CH_BACK_CENTER|CH_SIDE_RIGHT, ///< 8, CL + C+ CR + L + R + SL + S+ SR
};
+static const int8_t dca_lfe_index[] = {
+ 1,2,2,2,2,3,2,3,2,3,2,3,1,3,2,3
+};
+
+static const int8_t dca_channel_reorder_lfe[][8] = {
+ { 0, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 2, 0, 1, -1, -1, -1, -1, -1},
+ { 0, 1, 3, -1, -1, -1, -1, -1},
+ { 2, 0, 1, 4, -1, -1, -1, -1},
+ { 0, 1, 3, 4, -1, -1, -1, -1},
+ { 2, 0, 1, 4, 5, -1, -1, -1},
+ { 3, 4, 0, 1, 5, 6, -1, -1},
+ { 2, 0, 1, 4, 5, 6, -1, -1},
+ { 0, 6, 4, 5, 2, 3, -1, -1},
+ { 4, 2, 5, 0, 1, 6, 7, -1},
+ { 5, 6, 0, 1, 7, 3, 8, 4},
+ { 4, 2, 5, 0, 1, 6, 8, 7},
+};
+
+static const int8_t dca_channel_reorder_nolfe[][8] = {
+ { 0, -1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 0, 1, -1, -1, -1, -1, -1, -1},
+ { 2, 0, 1, -1, -1, -1, -1, -1},
+ { 0, 1, 2, -1, -1, -1, -1, -1},
+ { 2, 0, 1, 3, -1, -1, -1, -1},
+ { 0, 1, 2, 3, -1, -1, -1, -1},
+ { 2, 0, 1, 3, 4, -1, -1, -1},
+ { 2, 3, 0, 1, 4, 5, -1, -1},
+ { 2, 0, 1, 3, 4, 5, -1, -1},
+ { 0, 5, 3, 4, 1, 2, -1, -1},
+ { 3, 2, 4, 0, 1, 5, 6, -1},
+ { 4, 5, 0, 1, 6, 2, 7, 3},
+ { 3, 2, 4, 0, 1, 5, 7, 6},
+};
+
#define DCA_DOLBY 101 /* FIXME */
@@ -198,6 +240,7 @@ typedef struct {
uint8_t dca_buffer[DCA_MAX_FRAME_SIZE];
int dca_buffer_size; ///< how much data is in the dca_buffer
+ const int8_t* channel_order_tab; ///< channel reordering table, lfe and non lfe
GetBitContext gb;
/* Current position in DCA frame */
int current_subframe;
@@ -1013,7 +1056,7 @@ static int dca_subsubframe(DCAContext * s)
for (k = 0; k < s->prim_channels; k++) {
/* static float pcm_to_double[8] =
{32768.0, 32768.0, 524288.0, 524288.0, 0, 8388608.0, 8388608.0};*/
- qmf_32_subbands(s, k, subband_samples[k], &s->samples[256 * k],
+ qmf_32_subbands(s, k, subband_samples[k], &s->samples[256 * s->channel_order_tab[k]],
M_SQRT1_2*s->scale_bias /*pcm_to_double[s->source_pcm_res] */ ,
s->add_bias );
}
@@ -1027,12 +1070,11 @@ static int dca_subsubframe(DCAContext * s)
/* Generate LFE samples for this subsubframe FIXME!!! */
if (s->output & DCA_LFE) {
int lfe_samples = 2 * s->lfe * s->subsubframes;
- int i_channels = dca_channels[s->output & DCA_CHANNEL_MASK];
lfe_interpolation_fir(s->lfe, 2 * s->lfe,
s->lfe_data + lfe_samples +
2 * s->lfe * subsubframe,
- &s->samples[256 * i_channels],
+ &s->samples[256 * dca_lfe_index[s->amode]],
(1.0/256.0)*s->scale_bias, s->add_bias);
/* Outputs 20bits pcm samples */
}
@@ -1133,8 +1175,9 @@ static int dca_convert_bitstream(const uint8_t * src, int src_size, uint8_t * ds
PutBitContext pb;
if((unsigned)src_size > (unsigned)max_size) {
- av_log(NULL, AV_LOG_ERROR, "Input frame size larger then DCA_MAX_FRAME_SIZE!\n");
- return -1;
+// av_log(NULL, AV_LOG_ERROR, "Input frame size larger then DCA_MAX_FRAME_SIZE!\n");
+// return -1;
+ src_size = max_size;
}
mrk = AV_RB32(src);
@@ -1192,15 +1235,26 @@ static int dca_decode_frame(AVCodecContext * avctx,
avctx->bit_rate = s->bit_rate;
channels = s->prim_channels + !!s->lfe;
- if(avctx->request_channels == 2 && s->prim_channels > 2) {
- channels = 2;
- s->output = DCA_STEREO;
- avctx->channel_layout = CH_LAYOUT_STEREO;
- }
- if (s->amode<16)
+
+ if (s->amode<16) {
avctx->channel_layout = dca_core_channel_layout[s->amode];
- if (s->lfe) avctx->channel_layout |= CH_LOW_FREQUENCY;
+ if (s->lfe) {
+ avctx->channel_layout |= CH_LOW_FREQUENCY;
+ s->channel_order_tab = dca_channel_reorder_lfe[s->amode];
+ } else
+ s->channel_order_tab = dca_channel_reorder_nolfe[s->amode];
+
+ if(avctx->request_channels == 2 && s->prim_channels > 2) {
+ channels = 2;
+ s->output = DCA_STEREO;
+ avctx->channel_layout = CH_LAYOUT_STEREO;
+ }
+ } else {
+ av_log(avctx, AV_LOG_ERROR, "Non standard configuration %d !\n",s->amode);
+ return -1;
+ }
+
/* There is nothing that prevents a dts frame to change channel configuration
but FFmpeg doesn't support that so only set the channels if it is previously
diff --git a/libavcodec/dca.h b/libavcodec/dca.h
index 3895719..02c0a51 100644
--- a/libavcodec/dca.h
+++ b/libavcodec/dca.h
@@ -31,4 +31,7 @@
#define DCA_MARKER_14B_BE 0x1FFFE800
#define DCA_MARKER_14B_LE 0xFF1F00E8
+/** DCA-HD specific block starts with this marker. */
+#define DCA_HD_MARKER 0x64582025
+
#endif /* AVCODEC_DCA_H */
diff --git a/libavcodec/dca_parser.c b/libavcodec/dca_parser.c
index f182506..b1c06e4 100644
--- a/libavcodec/dca_parser.c
+++ b/libavcodec/dca_parser.c
@@ -34,6 +34,7 @@ typedef struct DCAParseContext {
uint32_t lastmarker;
int size;
int framesize;
+ int hd_pos;
} DCAParseContext;
#define IS_MARKER(state, i, buf, buf_size) \
@@ -75,10 +76,16 @@ static int dca_find_frame_end(DCAParseContext * pc1, const uint8_t * buf,
for (; i < buf_size; i++) {
pc1->size++;
state = (state << 8) | buf[i];
- if (state == pc1->lastmarker && IS_MARKER(state, i, buf, buf_size) && (!pc1->framesize || pc1->framesize == pc1->size)) {
+ if (state == DCA_HD_MARKER && !pc1->hd_pos)
+ pc1->hd_pos = pc1->size;
+ if (state == pc1->lastmarker && IS_MARKER(state, i, buf, buf_size)) {
+ if(pc1->framesize > pc1->size)
+ continue;
+ if(!pc1->framesize){
+ pc1->framesize = pc1->hd_pos ? pc1->hd_pos : pc1->size;
+ }
pc->frame_start_found = 0;
pc->state = -1;
- pc1->framesize = pc1->size;
pc1->size = 0;
return i - 3;
}
diff --git a/libavcodec/dct-test.c b/libavcodec/dct-test.c
index 7a80505..04f75b0 100644
--- a/libavcodec/dct-test.c
+++ b/libavcodec/dct-test.c
@@ -35,9 +35,10 @@
#include "libavutil/common.h"
#include "simple_idct.h"
+#include "aandcttab.h"
#include "faandct.h"
#include "faanidct.h"
-#include "i386/idct_xvid.h"
+#include "x86/idct_xvid.h"
#undef printf
#undef random
@@ -45,30 +46,36 @@
void *fast_memcpy(void *a, const void *b, size_t c){return memcpy(a,b,c);};
/* reference fdct/idct */
-extern void fdct(DCTELEM *block);
-extern void idct(DCTELEM *block);
-extern void init_fdct();
+void fdct(DCTELEM *block);
+void idct(DCTELEM *block);
+void init_fdct();
-extern void ff_mmx_idct(DCTELEM *data);
-extern void ff_mmxext_idct(DCTELEM *data);
+void ff_mmx_idct(DCTELEM *data);
+void ff_mmxext_idct(DCTELEM *data);
-extern void odivx_idct_c (short *block);
+void odivx_idct_c(short *block);
// BFIN
-extern void ff_bfin_idct (DCTELEM *block) ;
-extern void ff_bfin_fdct (DCTELEM *block) ;
+void ff_bfin_idct(DCTELEM *block);
+void ff_bfin_fdct(DCTELEM *block);
// ALTIVEC
-extern void fdct_altivec (DCTELEM *block);
-//extern void idct_altivec (DCTELEM *block);?? no routine
+void fdct_altivec(DCTELEM *block);
+//void idct_altivec(DCTELEM *block);?? no routine
+// ARM
+void j_rev_dct_ARM(DCTELEM *data);
+void simple_idct_ARM(DCTELEM *data);
+void simple_idct_armv5te(DCTELEM *data);
+void ff_simple_idct_armv6(DCTELEM *data);
+void ff_simple_idct_neon(DCTELEM *data);
struct algo {
const char *name;
enum { FDCT, IDCT } is_idct;
void (* func) (DCTELEM *block);
void (* ref) (DCTELEM *block);
- enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM } format;
+ enum formattag { NO_PERM,MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, SSE2_PERM, PARTTRANS_PERM } format;
int mm_support;
};
@@ -116,21 +123,24 @@ struct algo algos[] = {
{"BFINidct", 1, ff_bfin_idct, idct, NO_PERM},
#endif
+#ifdef ARCH_ARM
+ {"SIMPLE-ARM", 1, simple_idct_ARM, idct, NO_PERM },
+ {"INT-ARM", 1, j_rev_dct_ARM, idct, MMX_PERM },
+#ifdef HAVE_ARMV5TE
+ {"SIMPLE-ARMV5TE", 1, simple_idct_armv5te, idct, NO_PERM },
+#endif
+#ifdef HAVE_ARMV6
+ {"SIMPLE-ARMV6", 1, ff_simple_idct_armv6, idct, MMX_PERM },
+#endif
+#ifdef HAVE_NEON
+ {"SIMPLE-NEON", 1, ff_simple_idct_neon, idct, PARTTRANS_PERM },
+#endif
+#endif /* ARCH_ARM */
+
{ 0 }
};
#define AANSCALE_BITS 12
-static const unsigned short aanscales[64] = {
- /* precomputed values scaled up by 14 bits */
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
- 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
- 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
- 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
- 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
- 8867, 12299, 11585, 10426, 8867, 6967, 4799, 2446,
- 4520, 6270, 5906, 5315, 4520, 3552, 2446, 1247
-};
uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
@@ -245,6 +255,9 @@ void dct_error(const char *name, int is_idct,
} else if (form == SSE2_PERM) {
for(i=0; i<64; i++)
block[(i&0x38) | idct_sse2_row_perm[i&7]] = block1[i];
+ } else if (form == PARTTRANS_PERM) {
+ for(i=0; i<64; i++)
+ block[(i&0x24) | ((i&3)<<3) | ((i>>3)&3)] = block1[i];
} else {
for(i=0; i<64; i++)
block[i]= block1[i];
@@ -263,7 +276,7 @@ void dct_error(const char *name, int is_idct,
if (form == SCALE_PERM) {
for(i=0; i<64; i++) {
- scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i];
+ scale = 8*(1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS;
}
}
diff --git a/libavcodec/dirac_parser.c b/libavcodec/dirac_parser.c
index 199354a..ac82dca 100644
--- a/libavcodec/dirac_parser.c
+++ b/libavcodec/dirac_parser.c
@@ -1,7 +1,8 @@
/*
* Dirac parser
*
- * Copyright (c) 2007 Marco Gerards <marco at gnu.org>
+ * Copyright (c) 2007-2008 Marco Gerards <marco at gnu.org>
+ * Copyright (c) 2008 BBC, Anuradha Suraparaju <asuraparaju at gmail.com>
*
* This file is part of FFmpeg.
*
@@ -34,42 +35,200 @@
* Finds the end of the current frame in the bitstream.
* @return the position of the first byte of the next frame or -1
*/
-static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size)
+typedef struct DiracParseContext {
+ int state;
+ int is_synced;
+ int sync_offset;
+ int header_bytes_needed;
+ int overread_index;
+ int buffer_size;
+ int index;
+ uint8_t *buffer;
+ int dirac_unit_size;
+ uint8_t *dirac_unit;
+} DiracParseContext;
+
+static int find_frame_end(DiracParseContext *pc,
+ const uint8_t *buf, int buf_size)
{
uint32_t state = pc->state;
- int i;
-
- for (i = 0; i < buf_size; i++) {
- state = (state << 8) | buf[i];
- if (state == DIRAC_PARSE_INFO_PREFIX) {
- pc->frame_start_found ^= 1;
- if (!pc->frame_start_found) {
- pc->state = -1;
- return i - 3;
+ int i = 0;
+
+ if (!pc->is_synced) {
+ for (i = 0; i < buf_size; i++) {
+ state = (state << 8) | buf[i];
+ if (state == DIRAC_PARSE_INFO_PREFIX) {
+ state = -1;
+ pc->is_synced = 1;
+ pc->header_bytes_needed = 9;
+ pc->sync_offset = i;
+ break;
}
}
}
+ if (pc->is_synced) {
+ pc->sync_offset = 0;
+ for (; i < buf_size; i++) {
+ if (state == DIRAC_PARSE_INFO_PREFIX) {
+ if ((buf_size-i) >= pc->header_bytes_needed) {
+ pc->state = -1;
+ return i + pc->header_bytes_needed;
+ } else {
+ pc->header_bytes_needed = 9-(buf_size-i);
+ break;
+ }
+ } else
+ state = (state << 8) | buf[i];
+ }
+ }
pc->state = state;
+ return -1;
+}
+
+typedef struct DiracParseUnit
+{
+ int next_pu_offset;
+ int prev_pu_offset;
+ uint8_t pu_type;
+} DiracParseUnit;
+
+static int unpack_parse_unit(DiracParseUnit *pu, DiracParseContext *pc,
+ int offset)
+{
+ uint8_t *start = pc->buffer + offset;
+ uint8_t *end = pc->buffer + pc->index;
+ if (start < pc->buffer || (start+13 > end))
+ return 0;
+ pu->pu_type = start[4];
+
+ pu->next_pu_offset = AV_RB32(start+5);
+ pu->prev_pu_offset = AV_RB32(start+9);
+
+ if (pu->pu_type == 0x10 && pu->next_pu_offset == 0)
+ pu->next_pu_offset = 13;
+
+ return 1;
+}
+
+static int dirac_combine_frame(AVCodecParserContext *s, AVCodecContext *avctx,
+ int next, const uint8_t **buf, int *buf_size)
+{
+ int parse_timing_info = (s->pts == AV_NOPTS_VALUE &&
+ s->dts == AV_NOPTS_VALUE);
+ DiracParseContext *pc = s->priv_data;
+
+ if (pc->overread_index) {
+ memcpy(pc->buffer, pc->buffer + pc->overread_index,
+ pc->index - pc->overread_index);
+ pc->index -= pc->overread_index;
+ pc->overread_index = 0;
+ if (*buf_size == 0 && pc->buffer[4] == 0x10) {
+ *buf = pc->buffer;
+ *buf_size = pc->index;
+ return 0;
+ }
+ }
+
+ if ( next == -1) {
+ /* Found a possible frame start but not a frame end */
+ void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+ pc->index + (*buf_size -
+ pc->sync_offset));
+ pc->buffer = new_buffer;
+ memcpy(pc->buffer+pc->index, (*buf + pc->sync_offset),
+ *buf_size - pc->sync_offset);
+ pc->index += *buf_size - pc->sync_offset;
+ return -1;
+ } else {
+ /* Found a possible frame start and a possible frame end */
+ DiracParseUnit pu1, pu;
+ void *new_buffer = av_fast_realloc(pc->buffer, &pc->buffer_size,
+ pc->index + next);
+ pc->buffer = new_buffer;
+ memcpy(pc->buffer + pc->index, *buf, next);
+ pc->index += next;
- return END_NOT_FOUND;
+ /* Need to check if we have a valid Parse Unit. We can't go by the
+ * sync pattern 'BBCD' alone because arithmetic coding of the residual
+ * and motion data can cause the pattern triggering a false start of
+ * frame. So check if the previous parse offset of the next parse unit
+ * is equal to the next parse offset of the current parse unit then
+ * we can be pretty sure that we have a valid parse unit */
+ if (!unpack_parse_unit(&pu1, pc, pc->index - 13) ||
+ !unpack_parse_unit(&pu, pc, pc->index - 13 - pu1.prev_pu_offset) ||
+ pu.next_pu_offset != pu1.prev_pu_offset) {
+ pc->index -= 9;
+ *buf_size = next-9;
+ pc->header_bytes_needed = 9;
+ return -1;
+ }
+
+ /* All non-frame data must be accompanied by frame data. This is to
+ * ensure that pts is set correctly. So if the current parse unit is
+ * not frame data, wait for frame data to come along */
+
+ pc->dirac_unit = pc->buffer + pc->index - 13 -
+ pu1.prev_pu_offset - pc->dirac_unit_size;
+
+ pc->dirac_unit_size += pu.next_pu_offset;
+
+ if ((pu.pu_type&0x08) != 0x08) {
+ pc->header_bytes_needed = 9;
+ *buf_size = next;
+ return -1;
+ }
+
+ /* Get the picture number to set the pts and dts*/
+ if (parse_timing_info) {
+ uint8_t *cur_pu = pc->buffer +
+ pc->index - 13 - pu1.prev_pu_offset;
+ int pts = AV_RB32(cur_pu + 13);
+ if (s->last_pts == 0 && s->last_dts == 0)
+ s->dts = pts - 1;
+ else
+ s->dts = s->last_dts+1;
+ s->pts = pts;
+ if (!avctx->has_b_frames && (cur_pu[4] & 0x03))
+ avctx->has_b_frames = 1;
+ }
+ if (avctx->has_b_frames && s->pts == s->dts)
+ s->pict_type = FF_B_TYPE;
+
+ /* Finally have a complete Dirac data unit */
+ *buf = pc->dirac_unit;
+ *buf_size = pc->dirac_unit_size;
+
+ pc->dirac_unit_size = 0;
+ pc->overread_index = pc->index-13;
+ pc->header_bytes_needed = 9;
+ }
+ return next;
}
static int dirac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
- ParseContext *pc = s->priv_data;
+ DiracParseContext *pc = s->priv_data;
int next;
+ *poutbuf = NULL;
+ *poutbuf_size = 0;
+
if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
next = buf_size;
- }else{
+ *poutbuf = buf;
+ *poutbuf_size = buf_size;
+ /* Assume that data has been packetized into an encapsulation unit. */
+ } else {
next = find_frame_end(pc, buf, buf_size);
+ if (!pc->is_synced && next == -1) {
+ /* No frame start found yet. So throw away the entire buffer. */
+ return buf_size;
+ }
- if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
- *poutbuf = NULL;
- *poutbuf_size = 0;
+ if (dirac_combine_frame(s, avctx, next, &buf, &buf_size) < 0) {
return buf_size;
}
}
@@ -79,10 +238,18 @@ static int dirac_parse(AVCodecParserContext *s, AVCodecContext *avctx,
return next;
}
+static void dirac_parse_close(AVCodecParserContext *s)
+{
+ DiracParseContext *pc = s->priv_data;
+
+ if (pc->buffer_size > 0)
+ av_free(pc->buffer);
+}
+
AVCodecParser dirac_parser = {
{ CODEC_ID_DIRAC },
- sizeof(ParseContext),
+ sizeof(DiracParseContext),
NULL,
dirac_parse,
- ff_parse_close,
+ dirac_parse_close,
};
diff --git a/libavcodec/dnxhddec.c b/libavcodec/dnxhddec.c
index 1f4f1c8..4bf98de 100644
--- a/libavcodec/dnxhddec.c
+++ b/libavcodec/dnxhddec.c
@@ -219,14 +219,12 @@ static int dnxhd_decode_macroblock(DNXHDContext *ctx, int x, int y)
int dct_offset;
int qscale, i;
- ctx->dsp.clear_blocks(ctx->blocks[0]);
- ctx->dsp.clear_blocks(ctx->blocks[2]); // FIXME change clear blocks to take block amount
-
qscale = get_bits(&ctx->gb, 11);
skip_bits1(&ctx->gb);
//av_log(ctx->avctx, AV_LOG_DEBUG, "qscale %d\n", qscale);
for (i = 0; i < 8; i++) {
+ ctx->dsp.clear_block(ctx->blocks[i]);
dnxhd_decode_dct_block(ctx, ctx->blocks[i], i, qscale);
}
diff --git a/libavcodec/dnxhdenc.c b/libavcodec/dnxhdenc.c
index 534b850..a79a161 100644
--- a/libavcodec/dnxhdenc.c
+++ b/libavcodec/dnxhdenc.c
@@ -27,67 +27,29 @@
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"
-#include "dnxhddata.h"
-
-typedef struct {
- uint16_t mb;
- int value;
-} RCCMPEntry;
-
-typedef struct {
- int ssd;
- int bits;
-} RCEntry;
+#include "dnxhdenc.h"
int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
-typedef struct DNXHDEncContext {
- MpegEncContext m; ///< Used for quantization dsp functions
-
- AVFrame frame;
- int cid;
- const CIDEntry *cid_table;
- uint8_t *msip; ///< Macroblock Scan Indexes Payload
- uint32_t *slice_size;
-
- struct DNXHDEncContext *thread[MAX_THREADS];
-
- unsigned dct_y_offset;
- unsigned dct_uv_offset;
- int interlaced;
- int cur_field;
-
- DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
-
- int (*qmatrix_c) [64];
- int (*qmatrix_l) [64];
- uint16_t (*qmatrix_l16)[2][64];
- uint16_t (*qmatrix_c16)[2][64];
-
- unsigned frame_bits;
- uint8_t *src[3];
-
- uint32_t *vlc_codes;
- uint8_t *vlc_bits;
- uint16_t *run_codes;
- uint8_t *run_bits;
-
- /** Rate control */
- unsigned slice_bits;
- unsigned qscale;
- unsigned lambda;
-
- unsigned thread_size;
-
- uint16_t *mb_bits;
- uint8_t *mb_qscale;
-
- RCCMPEntry *mb_cmp;
- RCEntry (*mb_rc)[8160];
-} DNXHDEncContext;
-
#define LAMBDA_FRAC_BITS 10
+static av_always_inline void dnxhd_get_pixels_8x4(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ block[0] = pixels[0]; block[1] = pixels[1];
+ block[2] = pixels[2]; block[3] = pixels[3];
+ block[4] = pixels[4]; block[5] = pixels[5];
+ block[6] = pixels[6]; block[7] = pixels[7];
+ pixels += line_size;
+ block += 8;
+ }
+ memcpy(block , block- 8, sizeof(*block)*8);
+ memcpy(block+ 8, block-16, sizeof(*block)*8);
+ memcpy(block+16, block-24, sizeof(*block)*8);
+ memcpy(block+24, block-32, sizeof(*block)*8);
+}
+
static int dnxhd_init_vlc(DNXHDEncContext *ctx)
{
int i, j, level, run;
@@ -211,8 +173,13 @@ static int dnxhd_encode_init(AVCodecContext *avctx)
ctx->m.mb_intra = 1;
ctx->m.h263_aic = 1;
+ ctx->get_pixels_8x4_sym = dnxhd_get_pixels_8x4;
+
dsputil_init(&ctx->m.dsp, avctx);
ff_dct_common_init(&ctx->m);
+#ifdef HAVE_MMX
+ ff_dnxhd_init_mmx(ctx);
+#endif
if (!ctx->m.dct_quantize)
ctx->m.dct_quantize = dct_quantize_c;
@@ -385,27 +352,6 @@ static av_always_inline int dnxhd_calc_ac_bits(DNXHDEncContext *ctx, DCTELEM *bl
return bits;
}
-static av_always_inline void dnxhd_get_pixels_4x8(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
-{
- int i;
- for (i = 0; i < 4; i++) {
- block[0] = pixels[0];
- block[1] = pixels[1];
- block[2] = pixels[2];
- block[3] = pixels[3];
- block[4] = pixels[4];
- block[5] = pixels[5];
- block[6] = pixels[6];
- block[7] = pixels[7];
- pixels += line_size;
- block += 8;
- }
- memcpy(block , block- 8, sizeof(*block)*8);
- memcpy(block+ 8, block-16, sizeof(*block)*8);
- memcpy(block+16, block-24, sizeof(*block)*8);
- memcpy(block+24, block-32, sizeof(*block)*8);
-}
-
static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, int mb_y)
{
const uint8_t *ptr_y = ctx->thread[0]->src[0] + ((mb_y << 4) * ctx->m.linesize) + (mb_x << 4);
@@ -420,12 +366,14 @@ static av_always_inline void dnxhd_get_blocks(DNXHDEncContext *ctx, int mb_x, in
if (mb_y+1 == ctx->m.mb_height && ctx->m.avctx->height == 1080) {
if (ctx->interlaced) {
- dnxhd_get_pixels_4x8(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
- dnxhd_get_pixels_4x8(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
- dnxhd_get_pixels_4x8(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
- dnxhd_get_pixels_4x8(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
- } else
- memset(ctx->blocks[4], 0, 4*64*sizeof(DCTELEM));
+ ctx->get_pixels_8x4_sym(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[6], ptr_u + ctx->dct_uv_offset , ctx->m.uvlinesize);
+ ctx->get_pixels_8x4_sym(ctx->blocks[7], ptr_v + ctx->dct_uv_offset , ctx->m.uvlinesize);
+ } else {
+ dsp->clear_block(ctx->blocks[4]); dsp->clear_block(ctx->blocks[5]);
+ dsp->clear_block(ctx->blocks[6]); dsp->clear_block(ctx->blocks[7]);
+ }
} else {
dsp->get_pixels(ctx->blocks[4], ptr_y + ctx->dct_y_offset , ctx->m.linesize);
dsp->get_pixels(ctx->blocks[5], ptr_y + ctx->dct_y_offset + 8, ctx->m.linesize);
diff --git a/libavcodec/dnxhdenc.h b/libavcodec/dnxhdenc.h
new file mode 100644
index 0000000..6f9f647
--- /dev/null
+++ b/libavcodec/dnxhdenc.h
@@ -0,0 +1,90 @@
+/*
+ * VC3/DNxHD encoder structure definitions and prototypes
+ * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ *
+ * VC-3 encoder funded by the British Broadcasting Corporation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_DNXHDENC_H
+#define AVCODEC_DNXHDENC_H
+
+#include <stdint.h>
+#include "libavcodec/mpegvideo.h"
+#include "libavcodec/dnxhddata.h"
+
+typedef struct {
+ uint16_t mb;
+ int value;
+} RCCMPEntry;
+
+typedef struct {
+ int ssd;
+ int bits;
+} RCEntry;
+
+typedef struct DNXHDEncContext {
+ MpegEncContext m; ///< Used for quantization dsp functions
+
+ AVFrame frame;
+ int cid;
+ const CIDEntry *cid_table;
+ uint8_t *msip; ///< Macroblock Scan Indexes Payload
+ uint32_t *slice_size;
+
+ struct DNXHDEncContext *thread[MAX_THREADS];
+
+ unsigned dct_y_offset;
+ unsigned dct_uv_offset;
+ int interlaced;
+ int cur_field;
+
+ DECLARE_ALIGNED_16(DCTELEM, blocks[8][64]);
+
+ int (*qmatrix_c) [64];
+ int (*qmatrix_l) [64];
+ uint16_t (*qmatrix_l16)[2][64];
+ uint16_t (*qmatrix_c16)[2][64];
+
+ unsigned frame_bits;
+ uint8_t *src[3];
+
+ uint32_t *vlc_codes;
+ uint8_t *vlc_bits;
+ uint16_t *run_codes;
+ uint8_t *run_bits;
+
+ /** Rate control */
+ unsigned slice_bits;
+ unsigned qscale;
+ unsigned lambda;
+
+ unsigned thread_size;
+
+ uint16_t *mb_bits;
+ uint8_t *mb_qscale;
+
+ RCCMPEntry *mb_cmp;
+ RCEntry (*mb_rc)[8160];
+
+ void (*get_pixels_8x4_sym)(DCTELEM */*align 16*/, const uint8_t *, int);
+} DNXHDEncContext;
+
+void ff_dnxhd_init_mmx(DNXHDEncContext *ctx);
+
+#endif /* AVCODEC_DNXHDENC_H */
diff --git a/libavcodec/dpcm.c b/libavcodec/dpcm.c
index ff684ae..74ca9ec 100644
--- a/libavcodec/dpcm.c
+++ b/libavcodec/dpcm.c
@@ -268,7 +268,7 @@ static int dpcm_decode_frame(AVCodecContext *avctx,
n1 = (buf[in] >> 4) & 0xF;
n2 = buf[in++] & 0xF;
s->sample[0] += s->sol_table[n1];
- if (s->sample[0] < 0) s->sample[0] = 0;
+ if (s->sample[0] < 0) s->sample[0] = 0;
if (s->sample[0] > 255) s->sample[0] = 255;
output_samples[out++] = (s->sample[0] - 128) << 8;
s->sample[s->channels - 1] += s->sol_table[n2];
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 9a73e74..76f5dbb 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -169,7 +169,7 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
int j;
j = src_scantable[i];
st->permutated[i] = permutation[j];
-#ifdef ARCH_POWERPC
+#ifdef ARCH_PPC
st->inverse[j] = i;
#endif
}
@@ -2743,6 +2743,27 @@ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
/* H264 specific */
void ff_h264dspenc_init(DSPContext* c, AVCodecContext *avctx);
+#if defined(CONFIG_RV30_DECODER)
+void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_RV30_DECODER */
+
+#if defined(CONFIG_RV40_DECODER)
+static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+ put_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+ avg_pixels16_xy2_c(dst, src, stride, 16);
+}
+static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+ put_pixels8_xy2_c(dst, src, stride, 8);
+}
+static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
+ avg_pixels8_xy2_c(dst, src, stride, 8);
+}
+
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
+#endif /* CONFIG_RV40_DECODER */
+
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
int i;
@@ -2970,6 +2991,63 @@ static void h264_h_loop_filter_luma_c(uint8_t *pix, int stride, int alpha, int b
h264_loop_filter_luma_c(pix, 1, stride, alpha, beta, tc0);
}
+static inline void h264_loop_filter_luma_intra_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta)
+{
+ int d;
+ for( d = 0; d < 16; d++ ) {
+ const int p2 = pix[-3*xstride];
+ const int p1 = pix[-2*xstride];
+ const int p0 = pix[-1*xstride];
+
+ const int q0 = pix[ 0*xstride];
+ const int q1 = pix[ 1*xstride];
+ const int q2 = pix[ 2*xstride];
+
+ if( FFABS( p0 - q0 ) < alpha &&
+ FFABS( p1 - p0 ) < beta &&
+ FFABS( q1 - q0 ) < beta ) {
+
+ if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
+ if( FFABS( p2 - p0 ) < beta)
+ {
+ const int p3 = pix[-4*xstride];
+ /* p0', p1', p2' */
+ pix[-1*xstride] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
+ pix[-2*xstride] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
+ pix[-3*xstride] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
+ } else {
+ /* p0' */
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ }
+ if( FFABS( q2 - q0 ) < beta)
+ {
+ const int q3 = pix[3*xstride];
+ /* q0', q1', q2' */
+ pix[0*xstride] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
+ pix[1*xstride] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
+ pix[2*xstride] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
+ } else {
+ /* q0' */
+ pix[0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ }else{
+ /* p0', q0' */
+ pix[-1*xstride] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
+ pix[ 0*xstride] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
+ }
+ }
+ pix += ystride;
+ }
+}
+static void h264_v_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+ h264_loop_filter_luma_intra_c(pix, stride, 1, alpha, beta);
+}
+static void h264_h_loop_filter_luma_intra_c(uint8_t *pix, int stride, int alpha, int beta)
+{
+ h264_loop_filter_luma_intra_c(pix, 1, stride, alpha, beta);
+}
+
static inline void h264_loop_filter_chroma_c(uint8_t *pix, int xstride, int ystride, int alpha, int beta, int8_t *tc0)
{
int i, d;
@@ -3403,6 +3481,11 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
}
}
+static void clear_block_c(DCTELEM *block)
+{
+ memset(block, 0, sizeof(DCTELEM)*64);
+}
+
/**
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
*/
@@ -4259,6 +4342,10 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->h264_idct8_add= ff_h264_idct8_add_c;
c->h264_idct_dc_add= ff_h264_idct_dc_add_c;
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_c;
+ c->h264_idct_add16 = ff_h264_idct_add16_c;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_c;
+ c->h264_idct_add8 = ff_h264_idct_add8_c;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
}
c->get_pixels = get_pixels_c;
@@ -4271,6 +4358,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->sum_abs_dctelem = sum_abs_dctelem_c;
c->gmc1 = gmc1_c;
c->gmc = ff_gmc_c;
+ c->clear_block = clear_block_c;
c->clear_blocks = clear_blocks_c;
c->pix_sum = pix_sum_c;
c->pix_norm1 = pix_norm1_c;
@@ -4411,6 +4499,16 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
#if defined(CONFIG_H264_ENCODER)
ff_h264dspenc_init(c,avctx);
#endif
+#if defined(CONFIG_RV30_DECODER)
+ ff_rv30dsp_init(c,avctx);
+#endif
+#if defined(CONFIG_RV40_DECODER)
+ ff_rv40dsp_init(c,avctx);
+ c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
+ c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
+ c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
+ c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
+#endif
c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
@@ -4466,6 +4564,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_c;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_c;
+ c->h264_v_loop_filter_luma_intra= h264_v_loop_filter_luma_intra_c;
+ c->h264_h_loop_filter_luma_intra= h264_h_loop_filter_luma_intra_c;
c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_c;
c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
@@ -4524,11 +4624,11 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
if (ENABLE_MMX) dsputil_init_mmx (c, avctx);
- if (ENABLE_ARMV4L) dsputil_init_armv4l(c, avctx);
+ if (ENABLE_ARM) dsputil_init_arm (c, avctx);
if (ENABLE_MLIB) dsputil_init_mlib (c, avctx);
if (ENABLE_VIS) dsputil_init_vis (c, avctx);
if (ENABLE_ALPHA) dsputil_init_alpha (c, avctx);
- if (ENABLE_POWERPC) dsputil_init_ppc (c, avctx);
+ if (ENABLE_PPC) dsputil_init_ppc (c, avctx);
if (ENABLE_MMI) dsputil_init_mmi (c, avctx);
if (ENABLE_SH4) dsputil_init_sh4 (c, avctx);
if (ENABLE_BFIN) dsputil_init_bfin (c, avctx);
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 9a3acde..88ed315 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -60,6 +60,10 @@ void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride);
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block);
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
+void ff_h264_idct_add16_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
+void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
const float *src2, int src3, int blocksize, int step);
@@ -169,7 +173,7 @@ typedef struct ScanTable{
const uint8_t *scantable;
uint8_t permutated[64];
uint8_t raster_end[64];
-#ifdef ARCH_POWERPC
+#ifdef ARCH_PPC
/** Used by dct_quantize_altivec to find last-non-zero */
DECLARE_ALIGNED(16, uint8_t, inverse[64]);
#endif
@@ -203,6 +207,7 @@ typedef struct DSPContext {
*/
void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+ void (*clear_block)(DCTELEM *block/*align 16*/);
void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
int (*pix_sum)(uint8_t * pix, int line_size);
int (*pix_norm1)(uint8_t * pix, int line_size);
@@ -346,6 +351,8 @@ typedef struct DSPContext {
void (*h264_v_loop_filter_luma)(uint8_t *pix/*align 16*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_luma)(uint8_t *pix/*align 4 */, int stride, int alpha, int beta, int8_t *tc0);
/* v/h_loop_filter_luma_intra: align 16 */
+ void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
+ void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta);
void (*h264_v_loop_filter_chroma)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_h_loop_filter_chroma)(uint8_t *pix/*align 4*/, int stride, int alpha, int beta, int8_t *tc0);
void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix/*align 8*/, int stride, int alpha, int beta);
@@ -435,11 +442,19 @@ typedef struct DSPContext {
#define EDGE_WIDTH 16
/* h264 functions */
+ /* NOTE!!! if you implement any of h264_idct8_add, h264_idct8_add4 then you must implement all of them
+ NOTE!!! if you implement any of h264_idct_add, h264_idct_add16, h264_idct_add16intra, h264_idct_add8 then you must implement all of them
+ The reason for above, is that no 2 out of one list may use a different permutation.
+ */
void (*h264_idct_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct8_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_dct)(DCTELEM block[4][4]);
+ void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+ void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+ void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
+ void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
/* snow wavelet */
void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
@@ -484,6 +499,16 @@ typedef struct DSPContext {
* @param shift number of bits to discard from product
*/
int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift);
+
+ /* rv30 functions */
+ qpel_mc_func put_rv30_tpel_pixels_tab[4][16];
+ qpel_mc_func avg_rv30_tpel_pixels_tab[4][16];
+
+ /* rv40 functions */
+ qpel_mc_func put_rv40_qpel_pixels_tab[4][16];
+ qpel_mc_func avg_rv40_qpel_pixels_tab[4][16];
+ h264_chroma_mc_func put_rv40_chroma_pixels_tab[3];
+ h264_chroma_mc_func avg_rv40_chroma_pixels_tab[3];
} DSPContext;
void dsputil_static_init(void);
@@ -547,7 +572,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
int mm_support(void);
void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
-void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_arm(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
@@ -582,7 +607,7 @@ static inline void emms(void)
void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
-#elif defined(ARCH_ARMV4L)
+#elif defined(ARCH_ARM)
extern int mm_flags;
@@ -591,7 +616,7 @@ extern int mm_flags;
# define STRIDE_ALIGN 16
#endif
-#elif defined(ARCH_POWERPC)
+#elif defined(ARCH_PPC)
extern int mm_flags;
diff --git a/libavcodec/dv.c b/libavcodec/dv.c
index a3f0511..e3f54b8 100644
--- a/libavcodec/dv.c
+++ b/libavcodec/dv.c
@@ -54,8 +54,6 @@ typedef struct DVVideoContext {
uint8_t *buf;
uint8_t dv_zigzag[2][64];
- uint32_t dv_idct_factor[2][2][22][64];
- uint32_t dv100_idct_factor[4][4][16][64];
void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
void (*fdct[2])(DCTELEM *block);
@@ -90,65 +88,182 @@ static inline int dv_work_pool_size(const DVprofile *d)
return size;
}
-static int dv_init_dynamic_tables(const DVprofile *d)
+static inline void dv_calc_mb_coordinates(const DVprofile *d, int chan, int seq, int slot,
+ uint16_t *tbl)
{
- int j,i,c,s,p,k;
-
- if (d->work_chunks[dv_work_pool_size(d)-1].buf_offset)
- return 0;
-
- p = i = 0;
- for (c=0; c<d->n_difchan; c++) {
- for (s=0; s<d->difseg_size; s++) {
- p += 6;
- for (j=0; j<27; j++) {
- p += !(j%3);
- if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
- !(DV_PROFILE_IS_720p50(d) && s > 9)) {
- for (k=0; k<5; k++)
- d->work_chunks[i].mb_coordinates[k] = d->video_place[(c*d->difseg_size+s)*27*5 + j*5 + k];
- d->work_chunks[i++].buf_offset = p;
- }
- p += 5;
- }
+ const static uint8_t off[] = { 2, 6, 8, 0, 4 };
+ const static uint8_t shuf1[] = { 36, 18, 54, 0, 72 };
+ const static uint8_t shuf2[] = { 24, 12, 36, 0, 48 };
+ const static uint8_t shuf3[] = { 18, 9, 27, 0, 36 };
+
+ const static uint8_t l_start[] = {0, 4, 9, 13, 18, 22, 27, 31, 36, 40};
+ const static uint8_t l_start_shuffled[] = { 9, 4, 13, 0, 18 };
+
+ const static uint8_t serpent1[] = {0, 1, 2, 2, 1, 0,
+ 0, 1, 2, 2, 1, 0,
+ 0, 1, 2, 2, 1, 0,
+ 0, 1, 2, 2, 1, 0,
+ 0, 1, 2};
+ const static uint8_t serpent2[] = {0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5, 5, 4, 3, 2, 1, 0,
+ 0, 1, 2, 3, 4, 5};
+
+ const static uint8_t remap[][2] = {{ 0, 0}, { 0, 0}, { 0, 0}, { 0, 0}, /* dummy */
+ { 0, 0}, { 0, 1}, { 0, 2}, { 0, 3}, {10, 0},
+ {10, 1}, {10, 2}, {10, 3}, {20, 0}, {20, 1},
+ {20, 2}, {20, 3}, {30, 0}, {30, 1}, {30, 2},
+ {30, 3}, {40, 0}, {40, 1}, {40, 2}, {40, 3},
+ {50, 0}, {50, 1}, {50, 2}, {50, 3}, {60, 0},
+ {60, 1}, {60, 2}, {60, 3}, {70, 0}, {70, 1},
+ {70, 2}, {70, 3}, { 0,64}, { 0,65}, { 0,66},
+ {10,64}, {10,65}, {10,66}, {20,64}, {20,65},
+ {20,66}, {30,64}, {30,65}, {30,66}, {40,64},
+ {40,65}, {40,66}, {50,64}, {50,65}, {50,66},
+ {60,64}, {60,65}, {60,66}, {70,64}, {70,65},
+ {70,66}, { 0,67}, {20,67}, {40,67}, {60,67}};
+
+ int i, k, m;
+ int x, y, blk;
+
+ for (m=0; m<5; m++) {
+ switch (d->width) {
+ case 1440:
+ blk = (chan*11+seq)*27+slot;
+
+ if (chan == 0 && seq == 11) {
+ x = m*27+slot;
+ if (x<90) {
+ y = 0;
+ } else {
+ x = (x - 90)*2;
+ y = 67;
+ }
+ } else {
+ i = (4*chan + blk + off[m])%11;
+ k = (blk/11)%27;
+
+ x = shuf1[m] + (chan&1)*9 + k%9;
+ y = (i*3+k/9)*2 + (chan>>1) + 1;
+ }
+ tbl[m] = (x<<1)|(y<<9);
+ break;
+ case 1280:
+ blk = (chan*10+seq)*27+slot;
+
+ i = (4*chan + (seq/5) + 2*blk + off[m])%10;
+ k = (blk/5)%27;
+
+ x = shuf1[m]+(chan&1)*9 + k%9;
+ y = (i*3+k/9)*2 + (chan>>1) + 4;
+
+ if (x >= 80) {
+ x = remap[y][0]+((x-80)<<(y>59));
+ y = remap[y][1];
+ }
+ tbl[m] = (x<<1)|(y<<9);
+ break;
+ case 960:
+ blk = (chan*10+seq)*27+slot;
+
+ i = (4*chan + (seq/5) + 2*blk + off[m])%10;
+ k = (blk/5)%27 + (i&1)*3;
+
+ x = shuf2[m] + k%6 + 6*(chan&1);
+ y = l_start[i] + k/6 + 45*(chan>>1);
+ tbl[m] = (x<<1)|(y<<9);
+ break;
+ case 720:
+ switch (d->pix_fmt) {
+ case PIX_FMT_YUV422P:
+ x = shuf3[m] + slot/3;
+ y = serpent1[slot] +
+ ((((seq + off[m]) % d->difseg_size)<<1) + chan)*3;
+ tbl[m] = (x<<1)|(y<<8);
+ break;
+ case PIX_FMT_YUV420P:
+ x = shuf3[m] + slot/3;
+ y = serpent1[slot] +
+ ((seq + off[m]) % d->difseg_size)*3;
+ tbl[m] = (x<<1)|(y<<9);
+ break;
+ case PIX_FMT_YUV411P:
+ i = (seq + off[m]) % d->difseg_size;
+ k = slot + ((m==1||m==2)?3:0);
+
+ x = l_start_shuffled[m] + k/6;
+ y = serpent2[k] + i*6;
+ if (x>21)
+ y = y*2 - i*6;
+ tbl[m] = (x<<2)|(y<<8);
+ break;
+ }
+ default:
+ break;
}
}
- return 0;
}
-static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
+static int dv_init_dynamic_tables(const DVprofile *d)
{
- int i, q, a;
-
- /* NOTE: max left shift is 6 */
- for (q = 0; q < 22; q++) {
- /* 88DCT */
- i = 1;
- for (a = 0; a < 4; a++) {
- for (; i < dv_quant_areas[a]; i++) {
- /* 88 table */
- s->dv_idct_factor[0][0][q][i] = dv_iweight_88[i] << (dv_quant_shifts[q][a] + 1);
- s->dv_idct_factor[1][0][q][i] = s->dv_idct_factor[0][0][q][i] << 1;
-
- /* 248 table */
- s->dv_idct_factor[0][1][q][i] = dv_iweight_248[i] << (dv_quant_shifts[q][a] + 1);
- s->dv_idct_factor[1][1][q][i] = s->dv_idct_factor[0][1][q][i] << 1;
+ int j,i,c,s,p;
+ uint32_t *factor1, *factor2;
+ const int *iweight1, *iweight2;
+
+ if (!d->work_chunks[dv_work_pool_size(d)-1].buf_offset) {
+ p = i = 0;
+ for (c=0; c<d->n_difchan; c++) {
+ for (s=0; s<d->difseg_size; s++) {
+ p += 6;
+ for (j=0; j<27; j++) {
+ p += !(j%3);
+ if (!(DV_PROFILE_IS_1080i50(d) && c != 0 && s == 11) &&
+ !(DV_PROFILE_IS_720p50(d) && s > 9)) {
+ dv_calc_mb_coordinates(d, c, s, j, &d->work_chunks[i].mb_coordinates[0]);
+ d->work_chunks[i++].buf_offset = p;
+ }
+ p += 5;
+ }
}
}
}
- for (a = 0; a < 4; a++) {
- for (q = 0; q < 16; q++) {
- for (i = 1; i < 64; i++) {
- s->dv100_idct_factor[0][a][q][i] = (dv100_qstep[q] << (a + 9)) * dv_iweight_1080_y[i];
- s->dv100_idct_factor[1][a][q][i] = (dv100_qstep[q] << (a + 9)) * dv_iweight_1080_c[i];
- s->dv100_idct_factor[2][a][q][i] = (dv100_qstep[q] << (a + 9)) * dv_iweight_720_y[i];
- s->dv100_idct_factor[3][a][q][i] = (dv100_qstep[q] << (a + 9)) * dv_iweight_720_c[i];
+ if (!d->idct_factor[DV_PROFILE_IS_HD(d)?8191:5631]) {
+ factor1 = &d->idct_factor[0];
+ factor2 = &d->idct_factor[DV_PROFILE_IS_HD(d)?4096:2816];
+ if (d->height == 720) {
+ iweight1 = &dv_iweight_720_y[0];
+ iweight2 = &dv_iweight_720_c[0];
+ } else {
+ iweight1 = &dv_iweight_1080_y[0];
+ iweight2 = &dv_iweight_1080_c[0];
+ }
+ if (DV_PROFILE_IS_HD(d)) {
+ for (c = 0; c < 4; c++) {
+ for (s = 0; s < 16; s++) {
+ for (i = 0; i < 64; i++) {
+ *factor1++ = (dv100_qstep[s] << (c + 9)) * iweight1[i];
+ *factor2++ = (dv100_qstep[s] << (c + 9)) * iweight2[i];
+ }
+ }
+ }
+ } else {
+ iweight1 = &dv_iweight_88[0];
+ for (j = 0; j < 2; j++, iweight1 = &dv_iweight_248[0]) {
+ for (s = 0; s < 22; s++) {
+ for (i = c = 0; c < 4; c++) {
+ for (; i < dv_quant_areas[c]; i++) {
+ *factor1 = iweight1[i] << (dv_quant_shifts[s][c] + 1);
+ *factor2++ = (*factor1++) << 1;
+ }
+ }
}
}
}
}
+ return 0;
+}
+
static av_cold int dvvideo_init(AVCodecContext *avctx)
{
DVVideoContext *s = avctx->priv_data;
@@ -272,9 +387,6 @@ static av_cold int dvvideo_init(AVCodecContext *avctx)
}else
memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
- /* XXX: do it only for constant case */
- dv_build_unquantize_tables(s, dsp.idct_permutation);
-
avctx->coded_frame = &s->picture;
s->avctx = avctx;
@@ -395,8 +507,9 @@ static inline void dv_calculate_mb_xy(DVVideoContext *s, DVwork_chunk *work_chun
}
/* mb_x and mb_y are in units of 8 pixels */
-static inline void dv_decode_video_segment(DVVideoContext *s, DVwork_chunk *work_chunk)
+static int dv_decode_video_segment(AVCodecContext *avctx, DVwork_chunk *work_chunk)
{
+ DVVideoContext *s = avctx->priv_data;
int quant, dc, dct_mode, class1, j;
int mb_index, mb_x, mb_y, last_index;
int y_stride, linesize;
@@ -442,13 +555,13 @@ static inline void dv_decode_video_segment(DVVideoContext *s, DVwork_chunk *work
if (DV_PROFILE_IS_HD(s->sys)) {
mb->idct_put = s->idct_put[0];
mb->scan_table = s->dv_zigzag[0];
- mb->factor_table = s->dv100_idct_factor[((s->sys->height == 720) << 1) | (j >= 4)][class1][quant];
+ mb->factor_table = &s->sys->idct_factor[(j >= 4)*4*16*64 + class1*16*64 + quant*64];
is_field_mode[mb_index] |= !j && dct_mode;
} else {
mb->idct_put = s->idct_put[dct_mode && log2_blocksize == 3];
mb->scan_table = s->dv_zigzag[dct_mode];
- mb->factor_table = s->dv_idct_factor[class1 == 3][dct_mode]
- [quant + dv_quant_offset[class1]];
+ mb->factor_table = &s->sys->idct_factor[(class1 == 3)*2*22*64 + dct_mode*22*64 +
+ (quant + dv_quant_offset[class1])*64];
}
dc = dc << 2;
/* convert to unsigned because 128 is not added in the
@@ -576,6 +689,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s, DVwork_chunk *work
}
}
}
+ return 0;
}
#if ENABLE_SMALL
@@ -857,8 +971,9 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
}
}
-static inline void dv_encode_video_segment(DVVideoContext *s, DVwork_chunk *work_chunk)
+static int dv_encode_video_segment(AVCodecContext *avctx, DVwork_chunk *work_chunk)
{
+ DVVideoContext *s = avctx->priv_data;
int mb_index, i, j;
int mb_x, mb_y, c_offset, linesize;
uint8_t* y_ptr;
@@ -1004,22 +1119,10 @@ static inline void dv_encode_video_segment(DVVideoContext *s, DVwork_chunk *work
for (j = 0; j < 5 * 6; j++)
flush_put_bits(&pbs[j]);
-}
-static int dv_decode_mt(AVCodecContext *avctx, void* sl)
-{
- dv_decode_video_segment((DVVideoContext *)avctx->priv_data, (DVwork_chunk*)sl);
return 0;
}
-#ifdef CONFIG_DVVIDEO_ENCODER
-static int dv_encode_mt(AVCodecContext *avctx, void* sl)
-{
- dv_encode_video_segment((DVVideoContext *)avctx->priv_data, (DVwork_chunk*)sl);
- return 0;
-}
-#endif
-
#ifdef CONFIG_DVVIDEO_DECODER
/* NOTE: exactly one frame must be given (120000 bytes for NTSC,
144000 bytes for PAL - or twice those for 50Mbps) */
@@ -1050,7 +1153,7 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
s->picture.top_field_first = 0;
s->buf = buf;
- avctx->execute(avctx, dv_decode_mt, s->sys->work_chunks, NULL,
+ avctx->execute(avctx, dv_decode_video_segment, s->sys->work_chunks, NULL,
dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
emms_c();
@@ -1203,7 +1306,7 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
s->picture.pict_type = FF_I_TYPE;
s->buf = buf;
- c->execute(c, dv_encode_mt, s->sys->work_chunks, NULL,
+ c->execute(c, dv_encode_video_segment, s->sys->work_chunks, NULL,
dv_work_pool_size(s->sys), sizeof(DVwork_chunk));
emms_c();
diff --git a/libavcodec/dvbsub.c b/libavcodec/dvbsub.c
index d7cb2c4..ed548e2 100644
--- a/libavcodec/dvbsub.c
+++ b/libavcodec/dvbsub.c
@@ -228,8 +228,8 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
for (region_id = 0; region_id < h->num_rects; region_id++) {
*q++ = region_id;
*q++ = 0xff; /* reserved */
- bytestream_put_be16(&q, h->rects[region_id].x); /* left pos */
- bytestream_put_be16(&q, h->rects[region_id].y); /* top pos */
+ bytestream_put_be16(&q, h->rects[region_id]->x); /* left pos */
+ bytestream_put_be16(&q, h->rects[region_id]->y); /* top pos */
}
bytestream_put_be16(&pseg_len, q - pseg_len - 2);
@@ -239,10 +239,10 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
/* CLUT segment */
- if (h->rects[clut_id].nb_colors <= 4) {
+ if (h->rects[clut_id]->nb_colors <= 4) {
/* 2 bpp, some decoders do not support it correctly */
bpp_index = 0;
- } else if (h->rects[clut_id].nb_colors <= 16) {
+ } else if (h->rects[clut_id]->nb_colors <= 16) {
/* 4 bpp, standard encoding */
bpp_index = 1;
} else {
@@ -257,15 +257,16 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
*q++ = clut_id;
*q++ = (0 << 4) | 0xf; /* version = 0 */
- for(i = 0; i < h->rects[clut_id].nb_colors; i++) {
+ for(i = 0; i < h->rects[clut_id]->nb_colors; i++) {
*q++ = i; /* clut_entry_id */
*q++ = (1 << (7 - bpp_index)) | (0xf << 1) | 1; /* 2 bits/pixel full range */
{
int a, r, g, b;
- a = (h->rects[clut_id].rgba_palette[i] >> 24) & 0xff;
- r = (h->rects[clut_id].rgba_palette[i] >> 16) & 0xff;
- g = (h->rects[clut_id].rgba_palette[i] >> 8) & 0xff;
- b = (h->rects[clut_id].rgba_palette[i] >> 0) & 0xff;
+ uint32_t x= ((uint32_t*)h->rects[clut_id]->pict.data[1])[i];
+ a = (x >> 24) & 0xff;
+ r = (x >> 16) & 0xff;
+ g = (x >> 8) & 0xff;
+ b = (x >> 0) & 0xff;
*q++ = RGB_TO_Y_CCIR(r, g, b);
*q++ = RGB_TO_V_CCIR(r, g, b, 0);
@@ -282,10 +283,10 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
/* region composition segment */
- if (h->rects[region_id].nb_colors <= 4) {
+ if (h->rects[region_id]->nb_colors <= 4) {
/* 2 bpp, some decoders do not support it correctly */
bpp_index = 0;
- } else if (h->rects[region_id].nb_colors <= 16) {
+ } else if (h->rects[region_id]->nb_colors <= 16) {
/* 4 bpp, standard encoding */
bpp_index = 1;
} else {
@@ -299,8 +300,8 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
q += 2; /* segment length */
*q++ = region_id;
*q++ = (s->object_version << 4) | (0 << 3) | 0x07; /* version , no fill */
- bytestream_put_be16(&q, h->rects[region_id].w); /* region width */
- bytestream_put_be16(&q, h->rects[region_id].h); /* region height */
+ bytestream_put_be16(&q, h->rects[region_id]->w); /* region width */
+ bytestream_put_be16(&q, h->rects[region_id]->h); /* region height */
*q++ = ((1 + bpp_index) << 5) | ((1 + bpp_index) << 2) | 0x03;
*q++ = region_id; /* clut_id == region_id */
*q++ = 0; /* 8 bit fill colors */
@@ -322,10 +323,10 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
for (object_id = 0; object_id < h->num_rects; object_id++) {
/* Object Data segment */
- if (h->rects[object_id].nb_colors <= 4) {
+ if (h->rects[object_id]->nb_colors <= 4) {
/* 2 bpp, some decoders do not support it correctly */
bpp_index = 0;
- } else if (h->rects[object_id].nb_colors <= 16) {
+ } else if (h->rects[object_id]->nb_colors <= 16) {
/* 4 bpp, standard encoding */
bpp_index = 1;
} else {
@@ -358,12 +359,12 @@ static int encode_dvb_subtitles(DVBSubtitleContext *s,
dvb_encode_rle = dvb_encode_rle4;
top_ptr = q;
- dvb_encode_rle(&q, h->rects[object_id].bitmap, h->rects[object_id].w * 2,
- h->rects[object_id].w, h->rects[object_id].h >> 1);
+ dvb_encode_rle(&q, h->rects[object_id]->pict.data[0], h->rects[object_id]->w * 2,
+ h->rects[object_id]->w, h->rects[object_id]->h >> 1);
bottom_ptr = q;
- dvb_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w,
- h->rects[object_id].w * 2, h->rects[object_id].w,
- h->rects[object_id].h >> 1);
+ dvb_encode_rle(&q, h->rects[object_id]->pict.data[0] + h->rects[object_id]->w,
+ h->rects[object_id]->w * 2, h->rects[object_id]->w,
+ h->rects[object_id]->h >> 1);
bytestream_put_be16(&ptop_field_len, bottom_ptr - top_ptr);
bytestream_put_be16(&pbottom_field_len, q - bottom_ptr);
diff --git a/libavcodec/dvbsubdec.c b/libavcodec/dvbsubdec.c
index 3f47c1b..689c068 100644
--- a/libavcodec/dvbsubdec.c
+++ b/libavcodec/dvbsubdec.c
@@ -1285,14 +1285,17 @@ static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
sub->num_rects = ctx->display_list_size;
- if (sub->num_rects > 0)
- sub->rects = av_mallocz(sizeof(AVSubtitleRect) * sub->num_rects);
+ if (sub->num_rects > 0){
+ sub->rects = av_mallocz(sizeof(*sub->rects) * sub->num_rects);
+ for(i=0; i<sub->num_rects; i++)
+ sub->rects[i] = av_mallocz(sizeof(*sub->rects[i]));
+ }
i = 0;
for (display = ctx->display_list; display; display = display->next) {
region = get_region(ctx, display->region_id);
- rect = &sub->rects[i];
+ rect = sub->rects[i];
if (!region)
continue;
@@ -1302,7 +1305,7 @@ static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
rect->w = region->width;
rect->h = region->height;
rect->nb_colors = 16;
- rect->linesize = region->width;
+ rect->pict.linesize[0] = region->width;
clut = get_clut(ctx, region->clut);
@@ -1322,11 +1325,11 @@ static int dvbsub_display_end_segment(AVCodecContext *avctx, const uint8_t *buf,
break;
}
- rect->rgba_palette = av_malloc((1 << region->depth) * sizeof(uint32_t));
- memcpy(rect->rgba_palette, clut_table, (1 << region->depth) * sizeof(uint32_t));
+ rect->pict.data[1] = av_malloc((1 << region->depth) * sizeof(uint32_t));
+ memcpy(rect->pict.data[1], clut_table, (1 << region->depth) * sizeof(uint32_t));
- rect->bitmap = av_malloc(region->buf_size);
- memcpy(rect->bitmap, region->pbuf, region->buf_size);
+ rect->pict.data[0] = av_malloc(region->buf_size);
+ memcpy(rect->pict.data[0], region->pbuf, region->buf_size);
i++;
}
diff --git a/libavcodec/dvdata.h b/libavcodec/dvdata.h
index 60feda4..f53fbea 100644
--- a/libavcodec/dvdata.h
+++ b/libavcodec/dvdata.h
@@ -53,7 +53,7 @@ typedef struct DVprofile {
int width; /* picture width in pixels */
AVRational sar[2]; /* sample aspect ratios for 4:3 and 16:9 */
DVwork_chunk *work_chunks; /* each thread gets its own chunk of frame to work on */
- const uint16_t *video_place; /* positions of all DV macroblocks */
+ uint32_t *idct_factor; /* set of iDCT factor tables */
enum PixelFormat pix_fmt; /* picture pixel format */
int bpm; /* blocks per macroblock */
const uint8_t *block_sizes; /* AC block sizes, in bits */
@@ -328,5711 +328,6 @@ static const uint8_t dv100_qstep[16] = {
2, 3, 4, 5, 6, 7, 8, 16, 18, 20, 22, 24, 28, 52
};
-/* NOTE: I prefer hardcoding the positioning of DV blocks, it is
- simpler :-) */
-
-static const uint16_t dv_place_420[1620] = {
- 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848,
- 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48,
- 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48,
- 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a,
- 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a,
- 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a,
- 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c,
- 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c,
- 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c,
- 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e,
- 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e,
- 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e,
- 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850,
- 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50,
- 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50,
- 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52,
- 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52,
- 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852,
- 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854,
- 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54,
- 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54,
- 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56,
- 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56,
- 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856,
- 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858,
- 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58,
- 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58,
- 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48,
- 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048,
- 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248,
- 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a,
- 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a,
- 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a,
- 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c,
- 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c,
- 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c,
- 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e,
- 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e,
- 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e,
- 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50,
- 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050,
- 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250,
- 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252,
- 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052,
- 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52,
- 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54,
- 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054,
- 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254,
- 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256,
- 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056,
- 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56,
- 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58,
- 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058,
- 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258,
- 0x1824, 0x3012, 0x3c36, 0x0c00, 0x2448,
- 0x1a24, 0x3212, 0x3e36, 0x0e00, 0x2648,
- 0x1c24, 0x3412, 0x4036, 0x1000, 0x2848,
- 0x1c26, 0x3414, 0x4038, 0x1002, 0x284a,
- 0x1a26, 0x3214, 0x3e38, 0x0e02, 0x264a,
- 0x1826, 0x3014, 0x3c38, 0x0c02, 0x244a,
- 0x1828, 0x3016, 0x3c3a, 0x0c04, 0x244c,
- 0x1a28, 0x3216, 0x3e3a, 0x0e04, 0x264c,
- 0x1c28, 0x3416, 0x403a, 0x1004, 0x284c,
- 0x1c2a, 0x3418, 0x403c, 0x1006, 0x284e,
- 0x1a2a, 0x3218, 0x3e3c, 0x0e06, 0x264e,
- 0x182a, 0x3018, 0x3c3c, 0x0c06, 0x244e,
- 0x182c, 0x301a, 0x3c3e, 0x0c08, 0x2450,
- 0x1a2c, 0x321a, 0x3e3e, 0x0e08, 0x2650,
- 0x1c2c, 0x341a, 0x403e, 0x1008, 0x2850,
- 0x1c2e, 0x341c, 0x4040, 0x100a, 0x2852,
- 0x1a2e, 0x321c, 0x3e40, 0x0e0a, 0x2652,
- 0x182e, 0x301c, 0x3c40, 0x0c0a, 0x2452,
- 0x1830, 0x301e, 0x3c42, 0x0c0c, 0x2454,
- 0x1a30, 0x321e, 0x3e42, 0x0e0c, 0x2654,
- 0x1c30, 0x341e, 0x4042, 0x100c, 0x2854,
- 0x1c32, 0x3420, 0x4044, 0x100e, 0x2856,
- 0x1a32, 0x3220, 0x3e44, 0x0e0e, 0x2656,
- 0x1832, 0x3020, 0x3c44, 0x0c0e, 0x2456,
- 0x1834, 0x3022, 0x3c46, 0x0c10, 0x2458,
- 0x1a34, 0x3222, 0x3e46, 0x0e10, 0x2658,
- 0x1c34, 0x3422, 0x4046, 0x1010, 0x2858,
- 0x1e24, 0x3612, 0x4236, 0x1200, 0x2a48,
- 0x2024, 0x3812, 0x4436, 0x1400, 0x2c48,
- 0x2224, 0x3a12, 0x4636, 0x1600, 0x2e48,
- 0x2226, 0x3a14, 0x4638, 0x1602, 0x2e4a,
- 0x2026, 0x3814, 0x4438, 0x1402, 0x2c4a,
- 0x1e26, 0x3614, 0x4238, 0x1202, 0x2a4a,
- 0x1e28, 0x3616, 0x423a, 0x1204, 0x2a4c,
- 0x2028, 0x3816, 0x443a, 0x1404, 0x2c4c,
- 0x2228, 0x3a16, 0x463a, 0x1604, 0x2e4c,
- 0x222a, 0x3a18, 0x463c, 0x1606, 0x2e4e,
- 0x202a, 0x3818, 0x443c, 0x1406, 0x2c4e,
- 0x1e2a, 0x3618, 0x423c, 0x1206, 0x2a4e,
- 0x1e2c, 0x361a, 0x423e, 0x1208, 0x2a50,
- 0x202c, 0x381a, 0x443e, 0x1408, 0x2c50,
- 0x222c, 0x3a1a, 0x463e, 0x1608, 0x2e50,
- 0x222e, 0x3a1c, 0x4640, 0x160a, 0x2e52,
- 0x202e, 0x381c, 0x4440, 0x140a, 0x2c52,
- 0x1e2e, 0x361c, 0x4240, 0x120a, 0x2a52,
- 0x1e30, 0x361e, 0x4242, 0x120c, 0x2a54,
- 0x2030, 0x381e, 0x4442, 0x140c, 0x2c54,
- 0x2230, 0x3a1e, 0x4642, 0x160c, 0x2e54,
- 0x2232, 0x3a20, 0x4644, 0x160e, 0x2e56,
- 0x2032, 0x3820, 0x4444, 0x140e, 0x2c56,
- 0x1e32, 0x3620, 0x4244, 0x120e, 0x2a56,
- 0x1e34, 0x3622, 0x4246, 0x1210, 0x2a58,
- 0x2034, 0x3822, 0x4446, 0x1410, 0x2c58,
- 0x2234, 0x3a22, 0x4646, 0x1610, 0x2e58,
- 0x2424, 0x3c12, 0x0036, 0x1800, 0x3048,
- 0x2624, 0x3e12, 0x0236, 0x1a00, 0x3248,
- 0x2824, 0x4012, 0x0436, 0x1c00, 0x3448,
- 0x2826, 0x4014, 0x0438, 0x1c02, 0x344a,
- 0x2626, 0x3e14, 0x0238, 0x1a02, 0x324a,
- 0x2426, 0x3c14, 0x0038, 0x1802, 0x304a,
- 0x2428, 0x3c16, 0x003a, 0x1804, 0x304c,
- 0x2628, 0x3e16, 0x023a, 0x1a04, 0x324c,
- 0x2828, 0x4016, 0x043a, 0x1c04, 0x344c,
- 0x282a, 0x4018, 0x043c, 0x1c06, 0x344e,
- 0x262a, 0x3e18, 0x023c, 0x1a06, 0x324e,
- 0x242a, 0x3c18, 0x003c, 0x1806, 0x304e,
- 0x242c, 0x3c1a, 0x003e, 0x1808, 0x3050,
- 0x262c, 0x3e1a, 0x023e, 0x1a08, 0x3250,
- 0x282c, 0x401a, 0x043e, 0x1c08, 0x3450,
- 0x282e, 0x401c, 0x0440, 0x1c0a, 0x3452,
- 0x262e, 0x3e1c, 0x0240, 0x1a0a, 0x3252,
- 0x242e, 0x3c1c, 0x0040, 0x180a, 0x3052,
- 0x2430, 0x3c1e, 0x0042, 0x180c, 0x3054,
- 0x2630, 0x3e1e, 0x0242, 0x1a0c, 0x3254,
- 0x2830, 0x401e, 0x0442, 0x1c0c, 0x3454,
- 0x2832, 0x4020, 0x0444, 0x1c0e, 0x3456,
- 0x2632, 0x3e20, 0x0244, 0x1a0e, 0x3256,
- 0x2432, 0x3c20, 0x0044, 0x180e, 0x3056,
- 0x2434, 0x3c22, 0x0046, 0x1810, 0x3058,
- 0x2634, 0x3e22, 0x0246, 0x1a10, 0x3258,
- 0x2834, 0x4022, 0x0446, 0x1c10, 0x3458,
- 0x2a24, 0x4212, 0x0636, 0x1e00, 0x3648,
- 0x2c24, 0x4412, 0x0836, 0x2000, 0x3848,
- 0x2e24, 0x4612, 0x0a36, 0x2200, 0x3a48,
- 0x2e26, 0x4614, 0x0a38, 0x2202, 0x3a4a,
- 0x2c26, 0x4414, 0x0838, 0x2002, 0x384a,
- 0x2a26, 0x4214, 0x0638, 0x1e02, 0x364a,
- 0x2a28, 0x4216, 0x063a, 0x1e04, 0x364c,
- 0x2c28, 0x4416, 0x083a, 0x2004, 0x384c,
- 0x2e28, 0x4616, 0x0a3a, 0x2204, 0x3a4c,
- 0x2e2a, 0x4618, 0x0a3c, 0x2206, 0x3a4e,
- 0x2c2a, 0x4418, 0x083c, 0x2006, 0x384e,
- 0x2a2a, 0x4218, 0x063c, 0x1e06, 0x364e,
- 0x2a2c, 0x421a, 0x063e, 0x1e08, 0x3650,
- 0x2c2c, 0x441a, 0x083e, 0x2008, 0x3850,
- 0x2e2c, 0x461a, 0x0a3e, 0x2208, 0x3a50,
- 0x2e2e, 0x461c, 0x0a40, 0x220a, 0x3a52,
- 0x2c2e, 0x441c, 0x0840, 0x200a, 0x3852,
- 0x2a2e, 0x421c, 0x0640, 0x1e0a, 0x3652,
- 0x2a30, 0x421e, 0x0642, 0x1e0c, 0x3654,
- 0x2c30, 0x441e, 0x0842, 0x200c, 0x3854,
- 0x2e30, 0x461e, 0x0a42, 0x220c, 0x3a54,
- 0x2e32, 0x4620, 0x0a44, 0x220e, 0x3a56,
- 0x2c32, 0x4420, 0x0844, 0x200e, 0x3856,
- 0x2a32, 0x4220, 0x0644, 0x1e0e, 0x3656,
- 0x2a34, 0x4222, 0x0646, 0x1e10, 0x3658,
- 0x2c34, 0x4422, 0x0846, 0x2010, 0x3858,
- 0x2e34, 0x4622, 0x0a46, 0x2210, 0x3a58,
- 0x3024, 0x0012, 0x0c36, 0x2400, 0x3c48,
- 0x3224, 0x0212, 0x0e36, 0x2600, 0x3e48,
- 0x3424, 0x0412, 0x1036, 0x2800, 0x4048,
- 0x3426, 0x0414, 0x1038, 0x2802, 0x404a,
- 0x3226, 0x0214, 0x0e38, 0x2602, 0x3e4a,
- 0x3026, 0x0014, 0x0c38, 0x2402, 0x3c4a,
- 0x3028, 0x0016, 0x0c3a, 0x2404, 0x3c4c,
- 0x3228, 0x0216, 0x0e3a, 0x2604, 0x3e4c,
- 0x3428, 0x0416, 0x103a, 0x2804, 0x404c,
- 0x342a, 0x0418, 0x103c, 0x2806, 0x404e,
- 0x322a, 0x0218, 0x0e3c, 0x2606, 0x3e4e,
- 0x302a, 0x0018, 0x0c3c, 0x2406, 0x3c4e,
- 0x302c, 0x001a, 0x0c3e, 0x2408, 0x3c50,
- 0x322c, 0x021a, 0x0e3e, 0x2608, 0x3e50,
- 0x342c, 0x041a, 0x103e, 0x2808, 0x4050,
- 0x342e, 0x041c, 0x1040, 0x280a, 0x4052,
- 0x322e, 0x021c, 0x0e40, 0x260a, 0x3e52,
- 0x302e, 0x001c, 0x0c40, 0x240a, 0x3c52,
- 0x3030, 0x001e, 0x0c42, 0x240c, 0x3c54,
- 0x3230, 0x021e, 0x0e42, 0x260c, 0x3e54,
- 0x3430, 0x041e, 0x1042, 0x280c, 0x4054,
- 0x3432, 0x0420, 0x1044, 0x280e, 0x4056,
- 0x3232, 0x0220, 0x0e44, 0x260e, 0x3e56,
- 0x3032, 0x0020, 0x0c44, 0x240e, 0x3c56,
- 0x3034, 0x0022, 0x0c46, 0x2410, 0x3c58,
- 0x3234, 0x0222, 0x0e46, 0x2610, 0x3e58,
- 0x3434, 0x0422, 0x1046, 0x2810, 0x4058,
- 0x3624, 0x0612, 0x1236, 0x2a00, 0x4248,
- 0x3824, 0x0812, 0x1436, 0x2c00, 0x4448,
- 0x3a24, 0x0a12, 0x1636, 0x2e00, 0x4648,
- 0x3a26, 0x0a14, 0x1638, 0x2e02, 0x464a,
- 0x3826, 0x0814, 0x1438, 0x2c02, 0x444a,
- 0x3626, 0x0614, 0x1238, 0x2a02, 0x424a,
- 0x3628, 0x0616, 0x123a, 0x2a04, 0x424c,
- 0x3828, 0x0816, 0x143a, 0x2c04, 0x444c,
- 0x3a28, 0x0a16, 0x163a, 0x2e04, 0x464c,
- 0x3a2a, 0x0a18, 0x163c, 0x2e06, 0x464e,
- 0x382a, 0x0818, 0x143c, 0x2c06, 0x444e,
- 0x362a, 0x0618, 0x123c, 0x2a06, 0x424e,
- 0x362c, 0x061a, 0x123e, 0x2a08, 0x4250,
- 0x382c, 0x081a, 0x143e, 0x2c08, 0x4450,
- 0x3a2c, 0x0a1a, 0x163e, 0x2e08, 0x4650,
- 0x3a2e, 0x0a1c, 0x1640, 0x2e0a, 0x4652,
- 0x382e, 0x081c, 0x1440, 0x2c0a, 0x4452,
- 0x362e, 0x061c, 0x1240, 0x2a0a, 0x4252,
- 0x3630, 0x061e, 0x1242, 0x2a0c, 0x4254,
- 0x3830, 0x081e, 0x1442, 0x2c0c, 0x4454,
- 0x3a30, 0x0a1e, 0x1642, 0x2e0c, 0x4654,
- 0x3a32, 0x0a20, 0x1644, 0x2e0e, 0x4656,
- 0x3832, 0x0820, 0x1444, 0x2c0e, 0x4456,
- 0x3632, 0x0620, 0x1244, 0x2a0e, 0x4256,
- 0x3634, 0x0622, 0x1246, 0x2a10, 0x4258,
- 0x3834, 0x0822, 0x1446, 0x2c10, 0x4458,
- 0x3a34, 0x0a22, 0x1646, 0x2e10, 0x4658,
- 0x3c24, 0x0c12, 0x1836, 0x3000, 0x0048,
- 0x3e24, 0x0e12, 0x1a36, 0x3200, 0x0248,
- 0x4024, 0x1012, 0x1c36, 0x3400, 0x0448,
- 0x4026, 0x1014, 0x1c38, 0x3402, 0x044a,
- 0x3e26, 0x0e14, 0x1a38, 0x3202, 0x024a,
- 0x3c26, 0x0c14, 0x1838, 0x3002, 0x004a,
- 0x3c28, 0x0c16, 0x183a, 0x3004, 0x004c,
- 0x3e28, 0x0e16, 0x1a3a, 0x3204, 0x024c,
- 0x4028, 0x1016, 0x1c3a, 0x3404, 0x044c,
- 0x402a, 0x1018, 0x1c3c, 0x3406, 0x044e,
- 0x3e2a, 0x0e18, 0x1a3c, 0x3206, 0x024e,
- 0x3c2a, 0x0c18, 0x183c, 0x3006, 0x004e,
- 0x3c2c, 0x0c1a, 0x183e, 0x3008, 0x0050,
- 0x3e2c, 0x0e1a, 0x1a3e, 0x3208, 0x0250,
- 0x402c, 0x101a, 0x1c3e, 0x3408, 0x0450,
- 0x402e, 0x101c, 0x1c40, 0x340a, 0x0452,
- 0x3e2e, 0x0e1c, 0x1a40, 0x320a, 0x0252,
- 0x3c2e, 0x0c1c, 0x1840, 0x300a, 0x0052,
- 0x3c30, 0x0c1e, 0x1842, 0x300c, 0x0054,
- 0x3e30, 0x0e1e, 0x1a42, 0x320c, 0x0254,
- 0x4030, 0x101e, 0x1c42, 0x340c, 0x0454,
- 0x4032, 0x1020, 0x1c44, 0x340e, 0x0456,
- 0x3e32, 0x0e20, 0x1a44, 0x320e, 0x0256,
- 0x3c32, 0x0c20, 0x1844, 0x300e, 0x0056,
- 0x3c34, 0x0c22, 0x1846, 0x3010, 0x0058,
- 0x3e34, 0x0e22, 0x1a46, 0x3210, 0x0258,
- 0x4034, 0x1022, 0x1c46, 0x3410, 0x0458,
- 0x4224, 0x1212, 0x1e36, 0x3600, 0x0648,
- 0x4424, 0x1412, 0x2036, 0x3800, 0x0848,
- 0x4624, 0x1612, 0x2236, 0x3a00, 0x0a48,
- 0x4626, 0x1614, 0x2238, 0x3a02, 0x0a4a,
- 0x4426, 0x1414, 0x2038, 0x3802, 0x084a,
- 0x4226, 0x1214, 0x1e38, 0x3602, 0x064a,
- 0x4228, 0x1216, 0x1e3a, 0x3604, 0x064c,
- 0x4428, 0x1416, 0x203a, 0x3804, 0x084c,
- 0x4628, 0x1616, 0x223a, 0x3a04, 0x0a4c,
- 0x462a, 0x1618, 0x223c, 0x3a06, 0x0a4e,
- 0x442a, 0x1418, 0x203c, 0x3806, 0x084e,
- 0x422a, 0x1218, 0x1e3c, 0x3606, 0x064e,
- 0x422c, 0x121a, 0x1e3e, 0x3608, 0x0650,
- 0x442c, 0x141a, 0x203e, 0x3808, 0x0850,
- 0x462c, 0x161a, 0x223e, 0x3a08, 0x0a50,
- 0x462e, 0x161c, 0x2240, 0x3a0a, 0x0a52,
- 0x442e, 0x141c, 0x2040, 0x380a, 0x0852,
- 0x422e, 0x121c, 0x1e40, 0x360a, 0x0652,
- 0x4230, 0x121e, 0x1e42, 0x360c, 0x0654,
- 0x4430, 0x141e, 0x2042, 0x380c, 0x0854,
- 0x4630, 0x161e, 0x2242, 0x3a0c, 0x0a54,
- 0x4632, 0x1620, 0x2244, 0x3a0e, 0x0a56,
- 0x4432, 0x1420, 0x2044, 0x380e, 0x0856,
- 0x4232, 0x1220, 0x1e44, 0x360e, 0x0656,
- 0x4234, 0x1222, 0x1e46, 0x3610, 0x0658,
- 0x4434, 0x1422, 0x2046, 0x3810, 0x0858,
- 0x4634, 0x1622, 0x2246, 0x3a10, 0x0a58,
- 0x0024, 0x1812, 0x2436, 0x3c00, 0x0c48,
- 0x0224, 0x1a12, 0x2636, 0x3e00, 0x0e48,
- 0x0424, 0x1c12, 0x2836, 0x4000, 0x1048,
- 0x0426, 0x1c14, 0x2838, 0x4002, 0x104a,
- 0x0226, 0x1a14, 0x2638, 0x3e02, 0x0e4a,
- 0x0026, 0x1814, 0x2438, 0x3c02, 0x0c4a,
- 0x0028, 0x1816, 0x243a, 0x3c04, 0x0c4c,
- 0x0228, 0x1a16, 0x263a, 0x3e04, 0x0e4c,
- 0x0428, 0x1c16, 0x283a, 0x4004, 0x104c,
- 0x042a, 0x1c18, 0x283c, 0x4006, 0x104e,
- 0x022a, 0x1a18, 0x263c, 0x3e06, 0x0e4e,
- 0x002a, 0x1818, 0x243c, 0x3c06, 0x0c4e,
- 0x002c, 0x181a, 0x243e, 0x3c08, 0x0c50,
- 0x022c, 0x1a1a, 0x263e, 0x3e08, 0x0e50,
- 0x042c, 0x1c1a, 0x283e, 0x4008, 0x1050,
- 0x042e, 0x1c1c, 0x2840, 0x400a, 0x1052,
- 0x022e, 0x1a1c, 0x2640, 0x3e0a, 0x0e52,
- 0x002e, 0x181c, 0x2440, 0x3c0a, 0x0c52,
- 0x0030, 0x181e, 0x2442, 0x3c0c, 0x0c54,
- 0x0230, 0x1a1e, 0x2642, 0x3e0c, 0x0e54,
- 0x0430, 0x1c1e, 0x2842, 0x400c, 0x1054,
- 0x0432, 0x1c20, 0x2844, 0x400e, 0x1056,
- 0x0232, 0x1a20, 0x2644, 0x3e0e, 0x0e56,
- 0x0032, 0x1820, 0x2444, 0x3c0e, 0x0c56,
- 0x0034, 0x1822, 0x2446, 0x3c10, 0x0c58,
- 0x0234, 0x1a22, 0x2646, 0x3e10, 0x0e58,
- 0x0434, 0x1c22, 0x2846, 0x4010, 0x1058,
- 0x0624, 0x1e12, 0x2a36, 0x4200, 0x1248,
- 0x0824, 0x2012, 0x2c36, 0x4400, 0x1448,
- 0x0a24, 0x2212, 0x2e36, 0x4600, 0x1648,
- 0x0a26, 0x2214, 0x2e38, 0x4602, 0x164a,
- 0x0826, 0x2014, 0x2c38, 0x4402, 0x144a,
- 0x0626, 0x1e14, 0x2a38, 0x4202, 0x124a,
- 0x0628, 0x1e16, 0x2a3a, 0x4204, 0x124c,
- 0x0828, 0x2016, 0x2c3a, 0x4404, 0x144c,
- 0x0a28, 0x2216, 0x2e3a, 0x4604, 0x164c,
- 0x0a2a, 0x2218, 0x2e3c, 0x4606, 0x164e,
- 0x082a, 0x2018, 0x2c3c, 0x4406, 0x144e,
- 0x062a, 0x1e18, 0x2a3c, 0x4206, 0x124e,
- 0x062c, 0x1e1a, 0x2a3e, 0x4208, 0x1250,
- 0x082c, 0x201a, 0x2c3e, 0x4408, 0x1450,
- 0x0a2c, 0x221a, 0x2e3e, 0x4608, 0x1650,
- 0x0a2e, 0x221c, 0x2e40, 0x460a, 0x1652,
- 0x082e, 0x201c, 0x2c40, 0x440a, 0x1452,
- 0x062e, 0x1e1c, 0x2a40, 0x420a, 0x1252,
- 0x0630, 0x1e1e, 0x2a42, 0x420c, 0x1254,
- 0x0830, 0x201e, 0x2c42, 0x440c, 0x1454,
- 0x0a30, 0x221e, 0x2e42, 0x460c, 0x1654,
- 0x0a32, 0x2220, 0x2e44, 0x460e, 0x1656,
- 0x0832, 0x2020, 0x2c44, 0x440e, 0x1456,
- 0x0632, 0x1e20, 0x2a44, 0x420e, 0x1256,
- 0x0634, 0x1e22, 0x2a46, 0x4210, 0x1258,
- 0x0834, 0x2022, 0x2c46, 0x4410, 0x1458,
- 0x0a34, 0x2222, 0x2e46, 0x4610, 0x1658,
-};
-
-static const uint16_t dv_place_411P[1620] = {
- 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848,
- 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948,
- 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48,
- 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48,
- 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48,
- 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48,
- 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c,
- 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c,
- 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c,
- 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c,
- 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c,
- 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c,
- 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850,
- 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950,
- 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50,
- 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50,
- 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50,
- 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50,
- 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54,
- 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54,
- 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54,
- 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54,
- 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954,
- 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854,
- 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858,
- 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58,
- 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58,
- 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48,
- 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48,
- 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048,
- 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148,
- 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248,
- 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348,
- 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c,
- 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c,
- 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c,
- 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c,
- 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c,
- 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c,
- 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50,
- 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50,
- 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050,
- 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150,
- 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250,
- 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350,
- 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354,
- 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254,
- 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154,
- 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054,
- 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54,
- 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54,
- 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58,
- 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058,
- 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258,
- 0x1824, 0x3310, 0x3f34, 0x0c00, 0x2448,
- 0x1924, 0x3410, 0x4034, 0x0d00, 0x2548,
- 0x1a24, 0x3510, 0x4134, 0x0e00, 0x2648,
- 0x1b24, 0x3514, 0x4138, 0x0f00, 0x2748,
- 0x1c24, 0x3414, 0x4038, 0x1000, 0x2848,
- 0x1d24, 0x3314, 0x3f38, 0x1100, 0x2948,
- 0x1d28, 0x3214, 0x3e38, 0x1104, 0x294c,
- 0x1c28, 0x3114, 0x3d38, 0x1004, 0x284c,
- 0x1b28, 0x3014, 0x3c38, 0x0f04, 0x274c,
- 0x1a28, 0x3018, 0x3c3c, 0x0e04, 0x264c,
- 0x1928, 0x3118, 0x3d3c, 0x0d04, 0x254c,
- 0x1828, 0x3218, 0x3e3c, 0x0c04, 0x244c,
- 0x182c, 0x3318, 0x3f3c, 0x0c08, 0x2450,
- 0x192c, 0x3418, 0x403c, 0x0d08, 0x2550,
- 0x1a2c, 0x3518, 0x413c, 0x0e08, 0x2650,
- 0x1b2c, 0x351c, 0x4140, 0x0f08, 0x2750,
- 0x1c2c, 0x341c, 0x4040, 0x1008, 0x2850,
- 0x1d2c, 0x331c, 0x3f40, 0x1108, 0x2950,
- 0x1d30, 0x321c, 0x3e40, 0x110c, 0x2954,
- 0x1c30, 0x311c, 0x3d40, 0x100c, 0x2854,
- 0x1b30, 0x301c, 0x3c40, 0x0f0c, 0x2754,
- 0x1a30, 0x3020, 0x3c44, 0x0e0c, 0x2654,
- 0x1930, 0x3120, 0x3d44, 0x0d0c, 0x2554,
- 0x1830, 0x3220, 0x3e44, 0x0c0c, 0x2454,
- 0x1834, 0x3320, 0x3f44, 0x0c10, 0x2458,
- 0x1934, 0x3420, 0x4044, 0x0d10, 0x2658,
- 0x1a34, 0x3520, 0x4144, 0x0e10, 0x2858,
- 0x1e24, 0x3910, 0x4534, 0x1200, 0x2a48,
- 0x1f24, 0x3a10, 0x4634, 0x1300, 0x2b48,
- 0x2024, 0x3b10, 0x4734, 0x1400, 0x2c48,
- 0x2124, 0x3b14, 0x4738, 0x1500, 0x2d48,
- 0x2224, 0x3a14, 0x4638, 0x1600, 0x2e48,
- 0x2324, 0x3914, 0x4538, 0x1700, 0x2f48,
- 0x2328, 0x3814, 0x4438, 0x1704, 0x2f4c,
- 0x2228, 0x3714, 0x4338, 0x1604, 0x2e4c,
- 0x2128, 0x3614, 0x4238, 0x1504, 0x2d4c,
- 0x2028, 0x3618, 0x423c, 0x1404, 0x2c4c,
- 0x1f28, 0x3718, 0x433c, 0x1304, 0x2b4c,
- 0x1e28, 0x3818, 0x443c, 0x1204, 0x2a4c,
- 0x1e2c, 0x3918, 0x453c, 0x1208, 0x2a50,
- 0x1f2c, 0x3a18, 0x463c, 0x1308, 0x2b50,
- 0x202c, 0x3b18, 0x473c, 0x1408, 0x2c50,
- 0x212c, 0x3b1c, 0x4740, 0x1508, 0x2d50,
- 0x222c, 0x3a1c, 0x4640, 0x1608, 0x2e50,
- 0x232c, 0x391c, 0x4540, 0x1708, 0x2f50,
- 0x2330, 0x381c, 0x4440, 0x170c, 0x2f54,
- 0x2230, 0x371c, 0x4340, 0x160c, 0x2e54,
- 0x2130, 0x361c, 0x4240, 0x150c, 0x2d54,
- 0x2030, 0x3620, 0x4244, 0x140c, 0x2c54,
- 0x1f30, 0x3720, 0x4344, 0x130c, 0x2b54,
- 0x1e30, 0x3820, 0x4444, 0x120c, 0x2a54,
- 0x1e34, 0x3920, 0x4544, 0x1210, 0x2a58,
- 0x1f34, 0x3a20, 0x4644, 0x1310, 0x2c58,
- 0x2034, 0x3b20, 0x4744, 0x1410, 0x2e58,
- 0x2424, 0x3f10, 0x0334, 0x1800, 0x3048,
- 0x2524, 0x4010, 0x0434, 0x1900, 0x3148,
- 0x2624, 0x4110, 0x0534, 0x1a00, 0x3248,
- 0x2724, 0x4114, 0x0538, 0x1b00, 0x3348,
- 0x2824, 0x4014, 0x0438, 0x1c00, 0x3448,
- 0x2924, 0x3f14, 0x0338, 0x1d00, 0x3548,
- 0x2928, 0x3e14, 0x0238, 0x1d04, 0x354c,
- 0x2828, 0x3d14, 0x0138, 0x1c04, 0x344c,
- 0x2728, 0x3c14, 0x0038, 0x1b04, 0x334c,
- 0x2628, 0x3c18, 0x003c, 0x1a04, 0x324c,
- 0x2528, 0x3d18, 0x013c, 0x1904, 0x314c,
- 0x2428, 0x3e18, 0x023c, 0x1804, 0x304c,
- 0x242c, 0x3f18, 0x033c, 0x1808, 0x3050,
- 0x252c, 0x4018, 0x043c, 0x1908, 0x3150,
- 0x262c, 0x4118, 0x053c, 0x1a08, 0x3250,
- 0x272c, 0x411c, 0x0540, 0x1b08, 0x3350,
- 0x282c, 0x401c, 0x0440, 0x1c08, 0x3450,
- 0x292c, 0x3f1c, 0x0340, 0x1d08, 0x3550,
- 0x2930, 0x3e1c, 0x0240, 0x1d0c, 0x3554,
- 0x2830, 0x3d1c, 0x0140, 0x1c0c, 0x3454,
- 0x2730, 0x3c1c, 0x0040, 0x1b0c, 0x3354,
- 0x2630, 0x3c20, 0x0044, 0x1a0c, 0x3254,
- 0x2530, 0x3d20, 0x0144, 0x190c, 0x3154,
- 0x2430, 0x3e20, 0x0244, 0x180c, 0x3054,
- 0x2434, 0x3f20, 0x0344, 0x1810, 0x3058,
- 0x2534, 0x4020, 0x0444, 0x1910, 0x3258,
- 0x2634, 0x4120, 0x0544, 0x1a10, 0x3458,
- 0x2a24, 0x4510, 0x0934, 0x1e00, 0x3648,
- 0x2b24, 0x4610, 0x0a34, 0x1f00, 0x3748,
- 0x2c24, 0x4710, 0x0b34, 0x2000, 0x3848,
- 0x2d24, 0x4714, 0x0b38, 0x2100, 0x3948,
- 0x2e24, 0x4614, 0x0a38, 0x2200, 0x3a48,
- 0x2f24, 0x4514, 0x0938, 0x2300, 0x3b48,
- 0x2f28, 0x4414, 0x0838, 0x2304, 0x3b4c,
- 0x2e28, 0x4314, 0x0738, 0x2204, 0x3a4c,
- 0x2d28, 0x4214, 0x0638, 0x2104, 0x394c,
- 0x2c28, 0x4218, 0x063c, 0x2004, 0x384c,
- 0x2b28, 0x4318, 0x073c, 0x1f04, 0x374c,
- 0x2a28, 0x4418, 0x083c, 0x1e04, 0x364c,
- 0x2a2c, 0x4518, 0x093c, 0x1e08, 0x3650,
- 0x2b2c, 0x4618, 0x0a3c, 0x1f08, 0x3750,
- 0x2c2c, 0x4718, 0x0b3c, 0x2008, 0x3850,
- 0x2d2c, 0x471c, 0x0b40, 0x2108, 0x3950,
- 0x2e2c, 0x461c, 0x0a40, 0x2208, 0x3a50,
- 0x2f2c, 0x451c, 0x0940, 0x2308, 0x3b50,
- 0x2f30, 0x441c, 0x0840, 0x230c, 0x3b54,
- 0x2e30, 0x431c, 0x0740, 0x220c, 0x3a54,
- 0x2d30, 0x421c, 0x0640, 0x210c, 0x3954,
- 0x2c30, 0x4220, 0x0644, 0x200c, 0x3854,
- 0x2b30, 0x4320, 0x0744, 0x1f0c, 0x3754,
- 0x2a30, 0x4420, 0x0844, 0x1e0c, 0x3654,
- 0x2a34, 0x4520, 0x0944, 0x1e10, 0x3658,
- 0x2b34, 0x4620, 0x0a44, 0x1f10, 0x3858,
- 0x2c34, 0x4720, 0x0b44, 0x2010, 0x3a58,
- 0x3024, 0x0310, 0x0f34, 0x2400, 0x3c48,
- 0x3124, 0x0410, 0x1034, 0x2500, 0x3d48,
- 0x3224, 0x0510, 0x1134, 0x2600, 0x3e48,
- 0x3324, 0x0514, 0x1138, 0x2700, 0x3f48,
- 0x3424, 0x0414, 0x1038, 0x2800, 0x4048,
- 0x3524, 0x0314, 0x0f38, 0x2900, 0x4148,
- 0x3528, 0x0214, 0x0e38, 0x2904, 0x414c,
- 0x3428, 0x0114, 0x0d38, 0x2804, 0x404c,
- 0x3328, 0x0014, 0x0c38, 0x2704, 0x3f4c,
- 0x3228, 0x0018, 0x0c3c, 0x2604, 0x3e4c,
- 0x3128, 0x0118, 0x0d3c, 0x2504, 0x3d4c,
- 0x3028, 0x0218, 0x0e3c, 0x2404, 0x3c4c,
- 0x302c, 0x0318, 0x0f3c, 0x2408, 0x3c50,
- 0x312c, 0x0418, 0x103c, 0x2508, 0x3d50,
- 0x322c, 0x0518, 0x113c, 0x2608, 0x3e50,
- 0x332c, 0x051c, 0x1140, 0x2708, 0x3f50,
- 0x342c, 0x041c, 0x1040, 0x2808, 0x4050,
- 0x352c, 0x031c, 0x0f40, 0x2908, 0x4150,
- 0x3530, 0x021c, 0x0e40, 0x290c, 0x4154,
- 0x3430, 0x011c, 0x0d40, 0x280c, 0x4054,
- 0x3330, 0x001c, 0x0c40, 0x270c, 0x3f54,
- 0x3230, 0x0020, 0x0c44, 0x260c, 0x3e54,
- 0x3130, 0x0120, 0x0d44, 0x250c, 0x3d54,
- 0x3030, 0x0220, 0x0e44, 0x240c, 0x3c54,
- 0x3034, 0x0320, 0x0f44, 0x2410, 0x3c58,
- 0x3134, 0x0420, 0x1044, 0x2510, 0x3e58,
- 0x3234, 0x0520, 0x1144, 0x2610, 0x4058,
- 0x3624, 0x0910, 0x1534, 0x2a00, 0x4248,
- 0x3724, 0x0a10, 0x1634, 0x2b00, 0x4348,
- 0x3824, 0x0b10, 0x1734, 0x2c00, 0x4448,
- 0x3924, 0x0b14, 0x1738, 0x2d00, 0x4548,
- 0x3a24, 0x0a14, 0x1638, 0x2e00, 0x4648,
- 0x3b24, 0x0914, 0x1538, 0x2f00, 0x4748,
- 0x3b28, 0x0814, 0x1438, 0x2f04, 0x474c,
- 0x3a28, 0x0714, 0x1338, 0x2e04, 0x464c,
- 0x3928, 0x0614, 0x1238, 0x2d04, 0x454c,
- 0x3828, 0x0618, 0x123c, 0x2c04, 0x444c,
- 0x3728, 0x0718, 0x133c, 0x2b04, 0x434c,
- 0x3628, 0x0818, 0x143c, 0x2a04, 0x424c,
- 0x362c, 0x0918, 0x153c, 0x2a08, 0x4250,
- 0x372c, 0x0a18, 0x163c, 0x2b08, 0x4350,
- 0x382c, 0x0b18, 0x173c, 0x2c08, 0x4450,
- 0x392c, 0x0b1c, 0x1740, 0x2d08, 0x4550,
- 0x3a2c, 0x0a1c, 0x1640, 0x2e08, 0x4650,
- 0x3b2c, 0x091c, 0x1540, 0x2f08, 0x4750,
- 0x3b30, 0x081c, 0x1440, 0x2f0c, 0x4754,
- 0x3a30, 0x071c, 0x1340, 0x2e0c, 0x4654,
- 0x3930, 0x061c, 0x1240, 0x2d0c, 0x4554,
- 0x3830, 0x0620, 0x1244, 0x2c0c, 0x4454,
- 0x3730, 0x0720, 0x1344, 0x2b0c, 0x4354,
- 0x3630, 0x0820, 0x1444, 0x2a0c, 0x4254,
- 0x3634, 0x0920, 0x1544, 0x2a10, 0x4258,
- 0x3734, 0x0a20, 0x1644, 0x2b10, 0x4458,
- 0x3834, 0x0b20, 0x1744, 0x2c10, 0x4658,
- 0x3c24, 0x0f10, 0x1b34, 0x3000, 0x0048,
- 0x3d24, 0x1010, 0x1c34, 0x3100, 0x0148,
- 0x3e24, 0x1110, 0x1d34, 0x3200, 0x0248,
- 0x3f24, 0x1114, 0x1d38, 0x3300, 0x0348,
- 0x4024, 0x1014, 0x1c38, 0x3400, 0x0448,
- 0x4124, 0x0f14, 0x1b38, 0x3500, 0x0548,
- 0x4128, 0x0e14, 0x1a38, 0x3504, 0x054c,
- 0x4028, 0x0d14, 0x1938, 0x3404, 0x044c,
- 0x3f28, 0x0c14, 0x1838, 0x3304, 0x034c,
- 0x3e28, 0x0c18, 0x183c, 0x3204, 0x024c,
- 0x3d28, 0x0d18, 0x193c, 0x3104, 0x014c,
- 0x3c28, 0x0e18, 0x1a3c, 0x3004, 0x004c,
- 0x3c2c, 0x0f18, 0x1b3c, 0x3008, 0x0050,
- 0x3d2c, 0x1018, 0x1c3c, 0x3108, 0x0150,
- 0x3e2c, 0x1118, 0x1d3c, 0x3208, 0x0250,
- 0x3f2c, 0x111c, 0x1d40, 0x3308, 0x0350,
- 0x402c, 0x101c, 0x1c40, 0x3408, 0x0450,
- 0x412c, 0x0f1c, 0x1b40, 0x3508, 0x0550,
- 0x4130, 0x0e1c, 0x1a40, 0x350c, 0x0554,
- 0x4030, 0x0d1c, 0x1940, 0x340c, 0x0454,
- 0x3f30, 0x0c1c, 0x1840, 0x330c, 0x0354,
- 0x3e30, 0x0c20, 0x1844, 0x320c, 0x0254,
- 0x3d30, 0x0d20, 0x1944, 0x310c, 0x0154,
- 0x3c30, 0x0e20, 0x1a44, 0x300c, 0x0054,
- 0x3c34, 0x0f20, 0x1b44, 0x3010, 0x0058,
- 0x3d34, 0x1020, 0x1c44, 0x3110, 0x0258,
- 0x3e34, 0x1120, 0x1d44, 0x3210, 0x0458,
- 0x4224, 0x1510, 0x2134, 0x3600, 0x0648,
- 0x4324, 0x1610, 0x2234, 0x3700, 0x0748,
- 0x4424, 0x1710, 0x2334, 0x3800, 0x0848,
- 0x4524, 0x1714, 0x2338, 0x3900, 0x0948,
- 0x4624, 0x1614, 0x2238, 0x3a00, 0x0a48,
- 0x4724, 0x1514, 0x2138, 0x3b00, 0x0b48,
- 0x4728, 0x1414, 0x2038, 0x3b04, 0x0b4c,
- 0x4628, 0x1314, 0x1f38, 0x3a04, 0x0a4c,
- 0x4528, 0x1214, 0x1e38, 0x3904, 0x094c,
- 0x4428, 0x1218, 0x1e3c, 0x3804, 0x084c,
- 0x4328, 0x1318, 0x1f3c, 0x3704, 0x074c,
- 0x4228, 0x1418, 0x203c, 0x3604, 0x064c,
- 0x422c, 0x1518, 0x213c, 0x3608, 0x0650,
- 0x432c, 0x1618, 0x223c, 0x3708, 0x0750,
- 0x442c, 0x1718, 0x233c, 0x3808, 0x0850,
- 0x452c, 0x171c, 0x2340, 0x3908, 0x0950,
- 0x462c, 0x161c, 0x2240, 0x3a08, 0x0a50,
- 0x472c, 0x151c, 0x2140, 0x3b08, 0x0b50,
- 0x4730, 0x141c, 0x2040, 0x3b0c, 0x0b54,
- 0x4630, 0x131c, 0x1f40, 0x3a0c, 0x0a54,
- 0x4530, 0x121c, 0x1e40, 0x390c, 0x0954,
- 0x4430, 0x1220, 0x1e44, 0x380c, 0x0854,
- 0x4330, 0x1320, 0x1f44, 0x370c, 0x0754,
- 0x4230, 0x1420, 0x2044, 0x360c, 0x0654,
- 0x4234, 0x1520, 0x2144, 0x3610, 0x0658,
- 0x4334, 0x1620, 0x2244, 0x3710, 0x0858,
- 0x4434, 0x1720, 0x2344, 0x3810, 0x0a58,
- 0x0024, 0x1b10, 0x2734, 0x3c00, 0x0c48,
- 0x0124, 0x1c10, 0x2834, 0x3d00, 0x0d48,
- 0x0224, 0x1d10, 0x2934, 0x3e00, 0x0e48,
- 0x0324, 0x1d14, 0x2938, 0x3f00, 0x0f48,
- 0x0424, 0x1c14, 0x2838, 0x4000, 0x1048,
- 0x0524, 0x1b14, 0x2738, 0x4100, 0x1148,
- 0x0528, 0x1a14, 0x2638, 0x4104, 0x114c,
- 0x0428, 0x1914, 0x2538, 0x4004, 0x104c,
- 0x0328, 0x1814, 0x2438, 0x3f04, 0x0f4c,
- 0x0228, 0x1818, 0x243c, 0x3e04, 0x0e4c,
- 0x0128, 0x1918, 0x253c, 0x3d04, 0x0d4c,
- 0x0028, 0x1a18, 0x263c, 0x3c04, 0x0c4c,
- 0x002c, 0x1b18, 0x273c, 0x3c08, 0x0c50,
- 0x012c, 0x1c18, 0x283c, 0x3d08, 0x0d50,
- 0x022c, 0x1d18, 0x293c, 0x3e08, 0x0e50,
- 0x032c, 0x1d1c, 0x2940, 0x3f08, 0x0f50,
- 0x042c, 0x1c1c, 0x2840, 0x4008, 0x1050,
- 0x052c, 0x1b1c, 0x2740, 0x4108, 0x1150,
- 0x0530, 0x1a1c, 0x2640, 0x410c, 0x1154,
- 0x0430, 0x191c, 0x2540, 0x400c, 0x1054,
- 0x0330, 0x181c, 0x2440, 0x3f0c, 0x0f54,
- 0x0230, 0x1820, 0x2444, 0x3e0c, 0x0e54,
- 0x0130, 0x1920, 0x2544, 0x3d0c, 0x0d54,
- 0x0030, 0x1a20, 0x2644, 0x3c0c, 0x0c54,
- 0x0034, 0x1b20, 0x2744, 0x3c10, 0x0c58,
- 0x0134, 0x1c20, 0x2844, 0x3d10, 0x0e58,
- 0x0234, 0x1d20, 0x2944, 0x3e10, 0x1058,
- 0x0624, 0x2110, 0x2d34, 0x4200, 0x1248,
- 0x0724, 0x2210, 0x2e34, 0x4300, 0x1348,
- 0x0824, 0x2310, 0x2f34, 0x4400, 0x1448,
- 0x0924, 0x2314, 0x2f38, 0x4500, 0x1548,
- 0x0a24, 0x2214, 0x2e38, 0x4600, 0x1648,
- 0x0b24, 0x2114, 0x2d38, 0x4700, 0x1748,
- 0x0b28, 0x2014, 0x2c38, 0x4704, 0x174c,
- 0x0a28, 0x1f14, 0x2b38, 0x4604, 0x164c,
- 0x0928, 0x1e14, 0x2a38, 0x4504, 0x154c,
- 0x0828, 0x1e18, 0x2a3c, 0x4404, 0x144c,
- 0x0728, 0x1f18, 0x2b3c, 0x4304, 0x134c,
- 0x0628, 0x2018, 0x2c3c, 0x4204, 0x124c,
- 0x062c, 0x2118, 0x2d3c, 0x4208, 0x1250,
- 0x072c, 0x2218, 0x2e3c, 0x4308, 0x1350,
- 0x082c, 0x2318, 0x2f3c, 0x4408, 0x1450,
- 0x092c, 0x231c, 0x2f40, 0x4508, 0x1550,
- 0x0a2c, 0x221c, 0x2e40, 0x4608, 0x1650,
- 0x0b2c, 0x211c, 0x2d40, 0x4708, 0x1750,
- 0x0b30, 0x201c, 0x2c40, 0x470c, 0x1754,
- 0x0a30, 0x1f1c, 0x2b40, 0x460c, 0x1654,
- 0x0930, 0x1e1c, 0x2a40, 0x450c, 0x1554,
- 0x0830, 0x1e20, 0x2a44, 0x440c, 0x1454,
- 0x0730, 0x1f20, 0x2b44, 0x430c, 0x1354,
- 0x0630, 0x2020, 0x2c44, 0x420c, 0x1254,
- 0x0634, 0x2120, 0x2d44, 0x4210, 0x1258,
- 0x0734, 0x2220, 0x2e44, 0x4310, 0x1458,
- 0x0834, 0x2320, 0x2f44, 0x4410, 0x1658,
-};
-
-static const uint16_t dv_place_411[1350] = {
- 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848,
- 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948,
- 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48,
- 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48,
- 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48,
- 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48,
- 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c,
- 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c,
- 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c,
- 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c,
- 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c,
- 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c,
- 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850,
- 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950,
- 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50,
- 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50,
- 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50,
- 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50,
- 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54,
- 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54,
- 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54,
- 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54,
- 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954,
- 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854,
- 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858,
- 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58,
- 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58,
- 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48,
- 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48,
- 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048,
- 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148,
- 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248,
- 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348,
- 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c,
- 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c,
- 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c,
- 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c,
- 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c,
- 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c,
- 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50,
- 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50,
- 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050,
- 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150,
- 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250,
- 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350,
- 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354,
- 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254,
- 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154,
- 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054,
- 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54,
- 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54,
- 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58,
- 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058,
- 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258,
- 0x1824, 0x3310, 0x0334, 0x0c00, 0x2448,
- 0x1924, 0x3410, 0x0434, 0x0d00, 0x2548,
- 0x1a24, 0x3510, 0x0534, 0x0e00, 0x2648,
- 0x1b24, 0x3514, 0x0538, 0x0f00, 0x2748,
- 0x1c24, 0x3414, 0x0438, 0x1000, 0x2848,
- 0x1d24, 0x3314, 0x0338, 0x1100, 0x2948,
- 0x1d28, 0x3214, 0x0238, 0x1104, 0x294c,
- 0x1c28, 0x3114, 0x0138, 0x1004, 0x284c,
- 0x1b28, 0x3014, 0x0038, 0x0f04, 0x274c,
- 0x1a28, 0x3018, 0x003c, 0x0e04, 0x264c,
- 0x1928, 0x3118, 0x013c, 0x0d04, 0x254c,
- 0x1828, 0x3218, 0x023c, 0x0c04, 0x244c,
- 0x182c, 0x3318, 0x033c, 0x0c08, 0x2450,
- 0x192c, 0x3418, 0x043c, 0x0d08, 0x2550,
- 0x1a2c, 0x3518, 0x053c, 0x0e08, 0x2650,
- 0x1b2c, 0x351c, 0x0540, 0x0f08, 0x2750,
- 0x1c2c, 0x341c, 0x0440, 0x1008, 0x2850,
- 0x1d2c, 0x331c, 0x0340, 0x1108, 0x2950,
- 0x1d30, 0x321c, 0x0240, 0x110c, 0x2954,
- 0x1c30, 0x311c, 0x0140, 0x100c, 0x2854,
- 0x1b30, 0x301c, 0x0040, 0x0f0c, 0x2754,
- 0x1a30, 0x3020, 0x0044, 0x0e0c, 0x2654,
- 0x1930, 0x3120, 0x0144, 0x0d0c, 0x2554,
- 0x1830, 0x3220, 0x0244, 0x0c0c, 0x2454,
- 0x1834, 0x3320, 0x0344, 0x0c10, 0x2458,
- 0x1934, 0x3420, 0x0444, 0x0d10, 0x2658,
- 0x1a34, 0x3520, 0x0544, 0x0e10, 0x2858,
- 0x1e24, 0x3910, 0x0934, 0x1200, 0x2a48,
- 0x1f24, 0x3a10, 0x0a34, 0x1300, 0x2b48,
- 0x2024, 0x3b10, 0x0b34, 0x1400, 0x2c48,
- 0x2124, 0x3b14, 0x0b38, 0x1500, 0x2d48,
- 0x2224, 0x3a14, 0x0a38, 0x1600, 0x2e48,
- 0x2324, 0x3914, 0x0938, 0x1700, 0x2f48,
- 0x2328, 0x3814, 0x0838, 0x1704, 0x2f4c,
- 0x2228, 0x3714, 0x0738, 0x1604, 0x2e4c,
- 0x2128, 0x3614, 0x0638, 0x1504, 0x2d4c,
- 0x2028, 0x3618, 0x063c, 0x1404, 0x2c4c,
- 0x1f28, 0x3718, 0x073c, 0x1304, 0x2b4c,
- 0x1e28, 0x3818, 0x083c, 0x1204, 0x2a4c,
- 0x1e2c, 0x3918, 0x093c, 0x1208, 0x2a50,
- 0x1f2c, 0x3a18, 0x0a3c, 0x1308, 0x2b50,
- 0x202c, 0x3b18, 0x0b3c, 0x1408, 0x2c50,
- 0x212c, 0x3b1c, 0x0b40, 0x1508, 0x2d50,
- 0x222c, 0x3a1c, 0x0a40, 0x1608, 0x2e50,
- 0x232c, 0x391c, 0x0940, 0x1708, 0x2f50,
- 0x2330, 0x381c, 0x0840, 0x170c, 0x2f54,
- 0x2230, 0x371c, 0x0740, 0x160c, 0x2e54,
- 0x2130, 0x361c, 0x0640, 0x150c, 0x2d54,
- 0x2030, 0x3620, 0x0644, 0x140c, 0x2c54,
- 0x1f30, 0x3720, 0x0744, 0x130c, 0x2b54,
- 0x1e30, 0x3820, 0x0844, 0x120c, 0x2a54,
- 0x1e34, 0x3920, 0x0944, 0x1210, 0x2a58,
- 0x1f34, 0x3a20, 0x0a44, 0x1310, 0x2c58,
- 0x2034, 0x3b20, 0x0b44, 0x1410, 0x2e58,
- 0x2424, 0x0310, 0x0f34, 0x1800, 0x3048,
- 0x2524, 0x0410, 0x1034, 0x1900, 0x3148,
- 0x2624, 0x0510, 0x1134, 0x1a00, 0x3248,
- 0x2724, 0x0514, 0x1138, 0x1b00, 0x3348,
- 0x2824, 0x0414, 0x1038, 0x1c00, 0x3448,
- 0x2924, 0x0314, 0x0f38, 0x1d00, 0x3548,
- 0x2928, 0x0214, 0x0e38, 0x1d04, 0x354c,
- 0x2828, 0x0114, 0x0d38, 0x1c04, 0x344c,
- 0x2728, 0x0014, 0x0c38, 0x1b04, 0x334c,
- 0x2628, 0x0018, 0x0c3c, 0x1a04, 0x324c,
- 0x2528, 0x0118, 0x0d3c, 0x1904, 0x314c,
- 0x2428, 0x0218, 0x0e3c, 0x1804, 0x304c,
- 0x242c, 0x0318, 0x0f3c, 0x1808, 0x3050,
- 0x252c, 0x0418, 0x103c, 0x1908, 0x3150,
- 0x262c, 0x0518, 0x113c, 0x1a08, 0x3250,
- 0x272c, 0x051c, 0x1140, 0x1b08, 0x3350,
- 0x282c, 0x041c, 0x1040, 0x1c08, 0x3450,
- 0x292c, 0x031c, 0x0f40, 0x1d08, 0x3550,
- 0x2930, 0x021c, 0x0e40, 0x1d0c, 0x3554,
- 0x2830, 0x011c, 0x0d40, 0x1c0c, 0x3454,
- 0x2730, 0x001c, 0x0c40, 0x1b0c, 0x3354,
- 0x2630, 0x0020, 0x0c44, 0x1a0c, 0x3254,
- 0x2530, 0x0120, 0x0d44, 0x190c, 0x3154,
- 0x2430, 0x0220, 0x0e44, 0x180c, 0x3054,
- 0x2434, 0x0320, 0x0f44, 0x1810, 0x3058,
- 0x2534, 0x0420, 0x1044, 0x1910, 0x3258,
- 0x2634, 0x0520, 0x1144, 0x1a10, 0x3458,
- 0x2a24, 0x0910, 0x1534, 0x1e00, 0x3648,
- 0x2b24, 0x0a10, 0x1634, 0x1f00, 0x3748,
- 0x2c24, 0x0b10, 0x1734, 0x2000, 0x3848,
- 0x2d24, 0x0b14, 0x1738, 0x2100, 0x3948,
- 0x2e24, 0x0a14, 0x1638, 0x2200, 0x3a48,
- 0x2f24, 0x0914, 0x1538, 0x2300, 0x3b48,
- 0x2f28, 0x0814, 0x1438, 0x2304, 0x3b4c,
- 0x2e28, 0x0714, 0x1338, 0x2204, 0x3a4c,
- 0x2d28, 0x0614, 0x1238, 0x2104, 0x394c,
- 0x2c28, 0x0618, 0x123c, 0x2004, 0x384c,
- 0x2b28, 0x0718, 0x133c, 0x1f04, 0x374c,
- 0x2a28, 0x0818, 0x143c, 0x1e04, 0x364c,
- 0x2a2c, 0x0918, 0x153c, 0x1e08, 0x3650,
- 0x2b2c, 0x0a18, 0x163c, 0x1f08, 0x3750,
- 0x2c2c, 0x0b18, 0x173c, 0x2008, 0x3850,
- 0x2d2c, 0x0b1c, 0x1740, 0x2108, 0x3950,
- 0x2e2c, 0x0a1c, 0x1640, 0x2208, 0x3a50,
- 0x2f2c, 0x091c, 0x1540, 0x2308, 0x3b50,
- 0x2f30, 0x081c, 0x1440, 0x230c, 0x3b54,
- 0x2e30, 0x071c, 0x1340, 0x220c, 0x3a54,
- 0x2d30, 0x061c, 0x1240, 0x210c, 0x3954,
- 0x2c30, 0x0620, 0x1244, 0x200c, 0x3854,
- 0x2b30, 0x0720, 0x1344, 0x1f0c, 0x3754,
- 0x2a30, 0x0820, 0x1444, 0x1e0c, 0x3654,
- 0x2a34, 0x0920, 0x1544, 0x1e10, 0x3658,
- 0x2b34, 0x0a20, 0x1644, 0x1f10, 0x3858,
- 0x2c34, 0x0b20, 0x1744, 0x2010, 0x3a58,
- 0x3024, 0x0f10, 0x1b34, 0x2400, 0x0048,
- 0x3124, 0x1010, 0x1c34, 0x2500, 0x0148,
- 0x3224, 0x1110, 0x1d34, 0x2600, 0x0248,
- 0x3324, 0x1114, 0x1d38, 0x2700, 0x0348,
- 0x3424, 0x1014, 0x1c38, 0x2800, 0x0448,
- 0x3524, 0x0f14, 0x1b38, 0x2900, 0x0548,
- 0x3528, 0x0e14, 0x1a38, 0x2904, 0x054c,
- 0x3428, 0x0d14, 0x1938, 0x2804, 0x044c,
- 0x3328, 0x0c14, 0x1838, 0x2704, 0x034c,
- 0x3228, 0x0c18, 0x183c, 0x2604, 0x024c,
- 0x3128, 0x0d18, 0x193c, 0x2504, 0x014c,
- 0x3028, 0x0e18, 0x1a3c, 0x2404, 0x004c,
- 0x302c, 0x0f18, 0x1b3c, 0x2408, 0x0050,
- 0x312c, 0x1018, 0x1c3c, 0x2508, 0x0150,
- 0x322c, 0x1118, 0x1d3c, 0x2608, 0x0250,
- 0x332c, 0x111c, 0x1d40, 0x2708, 0x0350,
- 0x342c, 0x101c, 0x1c40, 0x2808, 0x0450,
- 0x352c, 0x0f1c, 0x1b40, 0x2908, 0x0550,
- 0x3530, 0x0e1c, 0x1a40, 0x290c, 0x0554,
- 0x3430, 0x0d1c, 0x1940, 0x280c, 0x0454,
- 0x3330, 0x0c1c, 0x1840, 0x270c, 0x0354,
- 0x3230, 0x0c20, 0x1844, 0x260c, 0x0254,
- 0x3130, 0x0d20, 0x1944, 0x250c, 0x0154,
- 0x3030, 0x0e20, 0x1a44, 0x240c, 0x0054,
- 0x3034, 0x0f20, 0x1b44, 0x2410, 0x0058,
- 0x3134, 0x1020, 0x1c44, 0x2510, 0x0258,
- 0x3234, 0x1120, 0x1d44, 0x2610, 0x0458,
- 0x3624, 0x1510, 0x2134, 0x2a00, 0x0648,
- 0x3724, 0x1610, 0x2234, 0x2b00, 0x0748,
- 0x3824, 0x1710, 0x2334, 0x2c00, 0x0848,
- 0x3924, 0x1714, 0x2338, 0x2d00, 0x0948,
- 0x3a24, 0x1614, 0x2238, 0x2e00, 0x0a48,
- 0x3b24, 0x1514, 0x2138, 0x2f00, 0x0b48,
- 0x3b28, 0x1414, 0x2038, 0x2f04, 0x0b4c,
- 0x3a28, 0x1314, 0x1f38, 0x2e04, 0x0a4c,
- 0x3928, 0x1214, 0x1e38, 0x2d04, 0x094c,
- 0x3828, 0x1218, 0x1e3c, 0x2c04, 0x084c,
- 0x3728, 0x1318, 0x1f3c, 0x2b04, 0x074c,
- 0x3628, 0x1418, 0x203c, 0x2a04, 0x064c,
- 0x362c, 0x1518, 0x213c, 0x2a08, 0x0650,
- 0x372c, 0x1618, 0x223c, 0x2b08, 0x0750,
- 0x382c, 0x1718, 0x233c, 0x2c08, 0x0850,
- 0x392c, 0x171c, 0x2340, 0x2d08, 0x0950,
- 0x3a2c, 0x161c, 0x2240, 0x2e08, 0x0a50,
- 0x3b2c, 0x151c, 0x2140, 0x2f08, 0x0b50,
- 0x3b30, 0x141c, 0x2040, 0x2f0c, 0x0b54,
- 0x3a30, 0x131c, 0x1f40, 0x2e0c, 0x0a54,
- 0x3930, 0x121c, 0x1e40, 0x2d0c, 0x0954,
- 0x3830, 0x1220, 0x1e44, 0x2c0c, 0x0854,
- 0x3730, 0x1320, 0x1f44, 0x2b0c, 0x0754,
- 0x3630, 0x1420, 0x2044, 0x2a0c, 0x0654,
- 0x3634, 0x1520, 0x2144, 0x2a10, 0x0658,
- 0x3734, 0x1620, 0x2244, 0x2b10, 0x0858,
- 0x3834, 0x1720, 0x2344, 0x2c10, 0x0a58,
- 0x0024, 0x1b10, 0x2734, 0x3000, 0x0c48,
- 0x0124, 0x1c10, 0x2834, 0x3100, 0x0d48,
- 0x0224, 0x1d10, 0x2934, 0x3200, 0x0e48,
- 0x0324, 0x1d14, 0x2938, 0x3300, 0x0f48,
- 0x0424, 0x1c14, 0x2838, 0x3400, 0x1048,
- 0x0524, 0x1b14, 0x2738, 0x3500, 0x1148,
- 0x0528, 0x1a14, 0x2638, 0x3504, 0x114c,
- 0x0428, 0x1914, 0x2538, 0x3404, 0x104c,
- 0x0328, 0x1814, 0x2438, 0x3304, 0x0f4c,
- 0x0228, 0x1818, 0x243c, 0x3204, 0x0e4c,
- 0x0128, 0x1918, 0x253c, 0x3104, 0x0d4c,
- 0x0028, 0x1a18, 0x263c, 0x3004, 0x0c4c,
- 0x002c, 0x1b18, 0x273c, 0x3008, 0x0c50,
- 0x012c, 0x1c18, 0x283c, 0x3108, 0x0d50,
- 0x022c, 0x1d18, 0x293c, 0x3208, 0x0e50,
- 0x032c, 0x1d1c, 0x2940, 0x3308, 0x0f50,
- 0x042c, 0x1c1c, 0x2840, 0x3408, 0x1050,
- 0x052c, 0x1b1c, 0x2740, 0x3508, 0x1150,
- 0x0530, 0x1a1c, 0x2640, 0x350c, 0x1154,
- 0x0430, 0x191c, 0x2540, 0x340c, 0x1054,
- 0x0330, 0x181c, 0x2440, 0x330c, 0x0f54,
- 0x0230, 0x1820, 0x2444, 0x320c, 0x0e54,
- 0x0130, 0x1920, 0x2544, 0x310c, 0x0d54,
- 0x0030, 0x1a20, 0x2644, 0x300c, 0x0c54,
- 0x0034, 0x1b20, 0x2744, 0x3010, 0x0c58,
- 0x0134, 0x1c20, 0x2844, 0x3110, 0x0e58,
- 0x0234, 0x1d20, 0x2944, 0x3210, 0x1058,
- 0x0624, 0x2110, 0x2d34, 0x3600, 0x1248,
- 0x0724, 0x2210, 0x2e34, 0x3700, 0x1348,
- 0x0824, 0x2310, 0x2f34, 0x3800, 0x1448,
- 0x0924, 0x2314, 0x2f38, 0x3900, 0x1548,
- 0x0a24, 0x2214, 0x2e38, 0x3a00, 0x1648,
- 0x0b24, 0x2114, 0x2d38, 0x3b00, 0x1748,
- 0x0b28, 0x2014, 0x2c38, 0x3b04, 0x174c,
- 0x0a28, 0x1f14, 0x2b38, 0x3a04, 0x164c,
- 0x0928, 0x1e14, 0x2a38, 0x3904, 0x154c,
- 0x0828, 0x1e18, 0x2a3c, 0x3804, 0x144c,
- 0x0728, 0x1f18, 0x2b3c, 0x3704, 0x134c,
- 0x0628, 0x2018, 0x2c3c, 0x3604, 0x124c,
- 0x062c, 0x2118, 0x2d3c, 0x3608, 0x1250,
- 0x072c, 0x2218, 0x2e3c, 0x3708, 0x1350,
- 0x082c, 0x2318, 0x2f3c, 0x3808, 0x1450,
- 0x092c, 0x231c, 0x2f40, 0x3908, 0x1550,
- 0x0a2c, 0x221c, 0x2e40, 0x3a08, 0x1650,
- 0x0b2c, 0x211c, 0x2d40, 0x3b08, 0x1750,
- 0x0b30, 0x201c, 0x2c40, 0x3b0c, 0x1754,
- 0x0a30, 0x1f1c, 0x2b40, 0x3a0c, 0x1654,
- 0x0930, 0x1e1c, 0x2a40, 0x390c, 0x1554,
- 0x0830, 0x1e20, 0x2a44, 0x380c, 0x1454,
- 0x0730, 0x1f20, 0x2b44, 0x370c, 0x1354,
- 0x0630, 0x2020, 0x2c44, 0x360c, 0x1254,
- 0x0634, 0x2120, 0x2d44, 0x3610, 0x1258,
- 0x0734, 0x2220, 0x2e44, 0x3710, 0x1458,
- 0x0834, 0x2320, 0x2f44, 0x3810, 0x1658,
-};
-
-/* 2 channels per frame, 10 DIF sequences per channel,
- 27 video segments per DIF sequence, 5 macroblocks per video segment */
-static const uint16_t dv_place_422_525[2*10*27*5] = {
- 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848,
- 0x0d24, 0x2512, 0x3136, 0x0100, 0x1948,
- 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48,
- 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a,
- 0x0d26, 0x2514, 0x3138, 0x0102, 0x194a,
- 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a,
- 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c,
- 0x0d28, 0x2516, 0x313a, 0x0104, 0x194c,
- 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c,
- 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e,
- 0x0d2a, 0x2518, 0x313c, 0x0106, 0x194e,
- 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e,
- 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850,
- 0x0d2c, 0x251a, 0x313e, 0x0108, 0x1950,
- 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50,
- 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52,
- 0x0d2e, 0x251c, 0x3140, 0x010a, 0x1952,
- 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852,
- 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854,
- 0x0d30, 0x251e, 0x3142, 0x010c, 0x1954,
- 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54,
- 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56,
- 0x0d32, 0x2520, 0x3144, 0x010e, 0x1956,
- 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856,
- 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858,
- 0x0d34, 0x2522, 0x3146, 0x0110, 0x1958,
- 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58,
- 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48,
- 0x1324, 0x2b12, 0x3736, 0x0700, 0x1f48,
- 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048,
- 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a,
- 0x1326, 0x2b14, 0x3738, 0x0702, 0x1f4a,
- 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a,
- 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c,
- 0x1328, 0x2b16, 0x373a, 0x0704, 0x1f4c,
- 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c,
- 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e,
- 0x132a, 0x2b18, 0x373c, 0x0706, 0x1f4e,
- 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e,
- 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50,
- 0x132c, 0x2b1a, 0x373e, 0x0708, 0x1f50,
- 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050,
- 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052,
- 0x132e, 0x2b1c, 0x3740, 0x070a, 0x1f52,
- 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52,
- 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54,
- 0x1330, 0x2b1e, 0x3742, 0x070c, 0x1f54,
- 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054,
- 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056,
- 0x1332, 0x2b20, 0x3744, 0x070e, 0x1f56,
- 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56,
- 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58,
- 0x1334, 0x2b22, 0x3746, 0x0710, 0x1f58,
- 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058,
- 0x1824, 0x3012, 0x0036, 0x0c00, 0x2448,
- 0x1924, 0x3112, 0x0136, 0x0d00, 0x2548,
- 0x1a24, 0x3212, 0x0236, 0x0e00, 0x2648,
- 0x1a26, 0x3214, 0x0238, 0x0e02, 0x264a,
- 0x1926, 0x3114, 0x0138, 0x0d02, 0x254a,
- 0x1826, 0x3014, 0x0038, 0x0c02, 0x244a,
- 0x1828, 0x3016, 0x003a, 0x0c04, 0x244c,
- 0x1928, 0x3116, 0x013a, 0x0d04, 0x254c,
- 0x1a28, 0x3216, 0x023a, 0x0e04, 0x264c,
- 0x1a2a, 0x3218, 0x023c, 0x0e06, 0x264e,
- 0x192a, 0x3118, 0x013c, 0x0d06, 0x254e,
- 0x182a, 0x3018, 0x003c, 0x0c06, 0x244e,
- 0x182c, 0x301a, 0x003e, 0x0c08, 0x2450,
- 0x192c, 0x311a, 0x013e, 0x0d08, 0x2550,
- 0x1a2c, 0x321a, 0x023e, 0x0e08, 0x2650,
- 0x1a2e, 0x321c, 0x0240, 0x0e0a, 0x2652,
- 0x192e, 0x311c, 0x0140, 0x0d0a, 0x2552,
- 0x182e, 0x301c, 0x0040, 0x0c0a, 0x2452,
- 0x1830, 0x301e, 0x0042, 0x0c0c, 0x2454,
- 0x1930, 0x311e, 0x0142, 0x0d0c, 0x2554,
- 0x1a30, 0x321e, 0x0242, 0x0e0c, 0x2654,
- 0x1a32, 0x3220, 0x0244, 0x0e0e, 0x2656,
- 0x1932, 0x3120, 0x0144, 0x0d0e, 0x2556,
- 0x1832, 0x3020, 0x0044, 0x0c0e, 0x2456,
- 0x1834, 0x3022, 0x0046, 0x0c10, 0x2458,
- 0x1934, 0x3122, 0x0146, 0x0d10, 0x2558,
- 0x1a34, 0x3222, 0x0246, 0x0e10, 0x2658,
- 0x1e24, 0x3612, 0x0636, 0x1200, 0x2a48,
- 0x1f24, 0x3712, 0x0736, 0x1300, 0x2b48,
- 0x2024, 0x3812, 0x0836, 0x1400, 0x2c48,
- 0x2026, 0x3814, 0x0838, 0x1402, 0x2c4a,
- 0x1f26, 0x3714, 0x0738, 0x1302, 0x2b4a,
- 0x1e26, 0x3614, 0x0638, 0x1202, 0x2a4a,
- 0x1e28, 0x3616, 0x063a, 0x1204, 0x2a4c,
- 0x1f28, 0x3716, 0x073a, 0x1304, 0x2b4c,
- 0x2028, 0x3816, 0x083a, 0x1404, 0x2c4c,
- 0x202a, 0x3818, 0x083c, 0x1406, 0x2c4e,
- 0x1f2a, 0x3718, 0x073c, 0x1306, 0x2b4e,
- 0x1e2a, 0x3618, 0x063c, 0x1206, 0x2a4e,
- 0x1e2c, 0x361a, 0x063e, 0x1208, 0x2a50,
- 0x1f2c, 0x371a, 0x073e, 0x1308, 0x2b50,
- 0x202c, 0x381a, 0x083e, 0x1408, 0x2c50,
- 0x202e, 0x381c, 0x0840, 0x140a, 0x2c52,
- 0x1f2e, 0x371c, 0x0740, 0x130a, 0x2b52,
- 0x1e2e, 0x361c, 0x0640, 0x120a, 0x2a52,
- 0x1e30, 0x361e, 0x0642, 0x120c, 0x2a54,
- 0x1f30, 0x371e, 0x0742, 0x130c, 0x2b54,
- 0x2030, 0x381e, 0x0842, 0x140c, 0x2c54,
- 0x2032, 0x3820, 0x0844, 0x140e, 0x2c56,
- 0x1f32, 0x3720, 0x0744, 0x130e, 0x2b56,
- 0x1e32, 0x3620, 0x0644, 0x120e, 0x2a56,
- 0x1e34, 0x3622, 0x0646, 0x1210, 0x2a58,
- 0x1f34, 0x3722, 0x0746, 0x1310, 0x2b58,
- 0x2034, 0x3822, 0x0846, 0x1410, 0x2c58,
- 0x2424, 0x0012, 0x0c36, 0x1800, 0x3048,
- 0x2524, 0x0112, 0x0d36, 0x1900, 0x3148,
- 0x2624, 0x0212, 0x0e36, 0x1a00, 0x3248,
- 0x2626, 0x0214, 0x0e38, 0x1a02, 0x324a,
- 0x2526, 0x0114, 0x0d38, 0x1902, 0x314a,
- 0x2426, 0x0014, 0x0c38, 0x1802, 0x304a,
- 0x2428, 0x0016, 0x0c3a, 0x1804, 0x304c,
- 0x2528, 0x0116, 0x0d3a, 0x1904, 0x314c,
- 0x2628, 0x0216, 0x0e3a, 0x1a04, 0x324c,
- 0x262a, 0x0218, 0x0e3c, 0x1a06, 0x324e,
- 0x252a, 0x0118, 0x0d3c, 0x1906, 0x314e,
- 0x242a, 0x0018, 0x0c3c, 0x1806, 0x304e,
- 0x242c, 0x001a, 0x0c3e, 0x1808, 0x3050,
- 0x252c, 0x011a, 0x0d3e, 0x1908, 0x3150,
- 0x262c, 0x021a, 0x0e3e, 0x1a08, 0x3250,
- 0x262e, 0x021c, 0x0e40, 0x1a0a, 0x3252,
- 0x252e, 0x011c, 0x0d40, 0x190a, 0x3152,
- 0x242e, 0x001c, 0x0c40, 0x180a, 0x3052,
- 0x2430, 0x001e, 0x0c42, 0x180c, 0x3054,
- 0x2530, 0x011e, 0x0d42, 0x190c, 0x3154,
- 0x2630, 0x021e, 0x0e42, 0x1a0c, 0x3254,
- 0x2632, 0x0220, 0x0e44, 0x1a0e, 0x3256,
- 0x2532, 0x0120, 0x0d44, 0x190e, 0x3156,
- 0x2432, 0x0020, 0x0c44, 0x180e, 0x3056,
- 0x2434, 0x0022, 0x0c46, 0x1810, 0x3058,
- 0x2534, 0x0122, 0x0d46, 0x1910, 0x3158,
- 0x2634, 0x0222, 0x0e46, 0x1a10, 0x3258,
- 0x2a24, 0x0612, 0x1236, 0x1e00, 0x3648,
- 0x2b24, 0x0712, 0x1336, 0x1f00, 0x3748,
- 0x2c24, 0x0812, 0x1436, 0x2000, 0x3848,
- 0x2c26, 0x0814, 0x1438, 0x2002, 0x384a,
- 0x2b26, 0x0714, 0x1338, 0x1f02, 0x374a,
- 0x2a26, 0x0614, 0x1238, 0x1e02, 0x364a,
- 0x2a28, 0x0616, 0x123a, 0x1e04, 0x364c,
- 0x2b28, 0x0716, 0x133a, 0x1f04, 0x374c,
- 0x2c28, 0x0816, 0x143a, 0x2004, 0x384c,
- 0x2c2a, 0x0818, 0x143c, 0x2006, 0x384e,
- 0x2b2a, 0x0718, 0x133c, 0x1f06, 0x374e,
- 0x2a2a, 0x0618, 0x123c, 0x1e06, 0x364e,
- 0x2a2c, 0x061a, 0x123e, 0x1e08, 0x3650,
- 0x2b2c, 0x071a, 0x133e, 0x1f08, 0x3750,
- 0x2c2c, 0x081a, 0x143e, 0x2008, 0x3850,
- 0x2c2e, 0x081c, 0x1440, 0x200a, 0x3852,
- 0x2b2e, 0x071c, 0x1340, 0x1f0a, 0x3752,
- 0x2a2e, 0x061c, 0x1240, 0x1e0a, 0x3652,
- 0x2a30, 0x061e, 0x1242, 0x1e0c, 0x3654,
- 0x2b30, 0x071e, 0x1342, 0x1f0c, 0x3754,
- 0x2c30, 0x081e, 0x1442, 0x200c, 0x3854,
- 0x2c32, 0x0820, 0x1444, 0x200e, 0x3856,
- 0x2b32, 0x0720, 0x1344, 0x1f0e, 0x3756,
- 0x2a32, 0x0620, 0x1244, 0x1e0e, 0x3656,
- 0x2a34, 0x0622, 0x1246, 0x1e10, 0x3658,
- 0x2b34, 0x0722, 0x1346, 0x1f10, 0x3758,
- 0x2c34, 0x0822, 0x1446, 0x2010, 0x3858,
- 0x3024, 0x0c12, 0x1836, 0x2400, 0x0048,
- 0x3124, 0x0d12, 0x1936, 0x2500, 0x0148,
- 0x3224, 0x0e12, 0x1a36, 0x2600, 0x0248,
- 0x3226, 0x0e14, 0x1a38, 0x2602, 0x024a,
- 0x3126, 0x0d14, 0x1938, 0x2502, 0x014a,
- 0x3026, 0x0c14, 0x1838, 0x2402, 0x004a,
- 0x3028, 0x0c16, 0x183a, 0x2404, 0x004c,
- 0x3128, 0x0d16, 0x193a, 0x2504, 0x014c,
- 0x3228, 0x0e16, 0x1a3a, 0x2604, 0x024c,
- 0x322a, 0x0e18, 0x1a3c, 0x2606, 0x024e,
- 0x312a, 0x0d18, 0x193c, 0x2506, 0x014e,
- 0x302a, 0x0c18, 0x183c, 0x2406, 0x004e,
- 0x302c, 0x0c1a, 0x183e, 0x2408, 0x0050,
- 0x312c, 0x0d1a, 0x193e, 0x2508, 0x0150,
- 0x322c, 0x0e1a, 0x1a3e, 0x2608, 0x0250,
- 0x322e, 0x0e1c, 0x1a40, 0x260a, 0x0252,
- 0x312e, 0x0d1c, 0x1940, 0x250a, 0x0152,
- 0x302e, 0x0c1c, 0x1840, 0x240a, 0x0052,
- 0x3030, 0x0c1e, 0x1842, 0x240c, 0x0054,
- 0x3130, 0x0d1e, 0x1942, 0x250c, 0x0154,
- 0x3230, 0x0e1e, 0x1a42, 0x260c, 0x0254,
- 0x3232, 0x0e20, 0x1a44, 0x260e, 0x0256,
- 0x3132, 0x0d20, 0x1944, 0x250e, 0x0156,
- 0x3032, 0x0c20, 0x1844, 0x240e, 0x0056,
- 0x3034, 0x0c22, 0x1846, 0x2410, 0x0058,
- 0x3134, 0x0d22, 0x1946, 0x2510, 0x0158,
- 0x3234, 0x0e22, 0x1a46, 0x2610, 0x0258,
- 0x3624, 0x1212, 0x1e36, 0x2a00, 0x0648,
- 0x3724, 0x1312, 0x1f36, 0x2b00, 0x0748,
- 0x3824, 0x1412, 0x2036, 0x2c00, 0x0848,
- 0x3826, 0x1414, 0x2038, 0x2c02, 0x084a,
- 0x3726, 0x1314, 0x1f38, 0x2b02, 0x074a,
- 0x3626, 0x1214, 0x1e38, 0x2a02, 0x064a,
- 0x3628, 0x1216, 0x1e3a, 0x2a04, 0x064c,
- 0x3728, 0x1316, 0x1f3a, 0x2b04, 0x074c,
- 0x3828, 0x1416, 0x203a, 0x2c04, 0x084c,
- 0x382a, 0x1418, 0x203c, 0x2c06, 0x084e,
- 0x372a, 0x1318, 0x1f3c, 0x2b06, 0x074e,
- 0x362a, 0x1218, 0x1e3c, 0x2a06, 0x064e,
- 0x362c, 0x121a, 0x1e3e, 0x2a08, 0x0650,
- 0x372c, 0x131a, 0x1f3e, 0x2b08, 0x0750,
- 0x382c, 0x141a, 0x203e, 0x2c08, 0x0850,
- 0x382e, 0x141c, 0x2040, 0x2c0a, 0x0852,
- 0x372e, 0x131c, 0x1f40, 0x2b0a, 0x0752,
- 0x362e, 0x121c, 0x1e40, 0x2a0a, 0x0652,
- 0x3630, 0x121e, 0x1e42, 0x2a0c, 0x0654,
- 0x3730, 0x131e, 0x1f42, 0x2b0c, 0x0754,
- 0x3830, 0x141e, 0x2042, 0x2c0c, 0x0854,
- 0x3832, 0x1420, 0x2044, 0x2c0e, 0x0856,
- 0x3732, 0x1320, 0x1f44, 0x2b0e, 0x0756,
- 0x3632, 0x1220, 0x1e44, 0x2a0e, 0x0656,
- 0x3634, 0x1222, 0x1e46, 0x2a10, 0x0658,
- 0x3734, 0x1322, 0x1f46, 0x2b10, 0x0758,
- 0x3834, 0x1422, 0x2046, 0x2c10, 0x0858,
- 0x0024, 0x1812, 0x2436, 0x3000, 0x0c48,
- 0x0124, 0x1912, 0x2536, 0x3100, 0x0d48,
- 0x0224, 0x1a12, 0x2636, 0x3200, 0x0e48,
- 0x0226, 0x1a14, 0x2638, 0x3202, 0x0e4a,
- 0x0126, 0x1914, 0x2538, 0x3102, 0x0d4a,
- 0x0026, 0x1814, 0x2438, 0x3002, 0x0c4a,
- 0x0028, 0x1816, 0x243a, 0x3004, 0x0c4c,
- 0x0128, 0x1916, 0x253a, 0x3104, 0x0d4c,
- 0x0228, 0x1a16, 0x263a, 0x3204, 0x0e4c,
- 0x022a, 0x1a18, 0x263c, 0x3206, 0x0e4e,
- 0x012a, 0x1918, 0x253c, 0x3106, 0x0d4e,
- 0x002a, 0x1818, 0x243c, 0x3006, 0x0c4e,
- 0x002c, 0x181a, 0x243e, 0x3008, 0x0c50,
- 0x012c, 0x191a, 0x253e, 0x3108, 0x0d50,
- 0x022c, 0x1a1a, 0x263e, 0x3208, 0x0e50,
- 0x022e, 0x1a1c, 0x2640, 0x320a, 0x0e52,
- 0x012e, 0x191c, 0x2540, 0x310a, 0x0d52,
- 0x002e, 0x181c, 0x2440, 0x300a, 0x0c52,
- 0x0030, 0x181e, 0x2442, 0x300c, 0x0c54,
- 0x0130, 0x191e, 0x2542, 0x310c, 0x0d54,
- 0x0230, 0x1a1e, 0x2642, 0x320c, 0x0e54,
- 0x0232, 0x1a20, 0x2644, 0x320e, 0x0e56,
- 0x0132, 0x1920, 0x2544, 0x310e, 0x0d56,
- 0x0032, 0x1820, 0x2444, 0x300e, 0x0c56,
- 0x0034, 0x1822, 0x2446, 0x3010, 0x0c58,
- 0x0134, 0x1922, 0x2546, 0x3110, 0x0d58,
- 0x0234, 0x1a22, 0x2646, 0x3210, 0x0e58,
- 0x0624, 0x1e12, 0x2a36, 0x3600, 0x1248,
- 0x0724, 0x1f12, 0x2b36, 0x3700, 0x1348,
- 0x0824, 0x2012, 0x2c36, 0x3800, 0x1448,
- 0x0826, 0x2014, 0x2c38, 0x3802, 0x144a,
- 0x0726, 0x1f14, 0x2b38, 0x3702, 0x134a,
- 0x0626, 0x1e14, 0x2a38, 0x3602, 0x124a,
- 0x0628, 0x1e16, 0x2a3a, 0x3604, 0x124c,
- 0x0728, 0x1f16, 0x2b3a, 0x3704, 0x134c,
- 0x0828, 0x2016, 0x2c3a, 0x3804, 0x144c,
- 0x082a, 0x2018, 0x2c3c, 0x3806, 0x144e,
- 0x072a, 0x1f18, 0x2b3c, 0x3706, 0x134e,
- 0x062a, 0x1e18, 0x2a3c, 0x3606, 0x124e,
- 0x062c, 0x1e1a, 0x2a3e, 0x3608, 0x1250,
- 0x072c, 0x1f1a, 0x2b3e, 0x3708, 0x1350,
- 0x082c, 0x201a, 0x2c3e, 0x3808, 0x1450,
- 0x082e, 0x201c, 0x2c40, 0x380a, 0x1452,
- 0x072e, 0x1f1c, 0x2b40, 0x370a, 0x1352,
- 0x062e, 0x1e1c, 0x2a40, 0x360a, 0x1252,
- 0x0630, 0x1e1e, 0x2a42, 0x360c, 0x1254,
- 0x0730, 0x1f1e, 0x2b42, 0x370c, 0x1354,
- 0x0830, 0x201e, 0x2c42, 0x380c, 0x1454,
- 0x0832, 0x2020, 0x2c44, 0x380e, 0x1456,
- 0x0732, 0x1f20, 0x2b44, 0x370e, 0x1356,
- 0x0632, 0x1e20, 0x2a44, 0x360e, 0x1256,
- 0x0634, 0x1e22, 0x2a46, 0x3610, 0x1258,
- 0x0734, 0x1f22, 0x2b46, 0x3710, 0x1358,
- 0x0834, 0x2022, 0x2c46, 0x3810, 0x1458,
- 0x0f24, 0x2712, 0x3336, 0x0300, 0x1b48,
- 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48,
- 0x1124, 0x2912, 0x3536, 0x0500, 0x1d48,
- 0x1126, 0x2914, 0x3538, 0x0502, 0x1d4a,
- 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a,
- 0x0f26, 0x2714, 0x3338, 0x0302, 0x1b4a,
- 0x0f28, 0x2716, 0x333a, 0x0304, 0x1b4c,
- 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c,
- 0x1128, 0x2916, 0x353a, 0x0504, 0x1d4c,
- 0x112a, 0x2918, 0x353c, 0x0506, 0x1d4e,
- 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e,
- 0x0f2a, 0x2718, 0x333c, 0x0306, 0x1b4e,
- 0x0f2c, 0x271a, 0x333e, 0x0308, 0x1b50,
- 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50,
- 0x112c, 0x291a, 0x353e, 0x0508, 0x1d50,
- 0x112e, 0x291c, 0x3540, 0x050a, 0x1d52,
- 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52,
- 0x0f2e, 0x271c, 0x3340, 0x030a, 0x1b52,
- 0x0f30, 0x271e, 0x3342, 0x030c, 0x1b54,
- 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54,
- 0x1130, 0x291e, 0x3542, 0x050c, 0x1d54,
- 0x1132, 0x2920, 0x3544, 0x050e, 0x1d56,
- 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56,
- 0x0f32, 0x2720, 0x3344, 0x030e, 0x1b56,
- 0x0f34, 0x2722, 0x3346, 0x0310, 0x1b58,
- 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58,
- 0x1134, 0x2922, 0x3546, 0x0510, 0x1d58,
- 0x1524, 0x2d12, 0x3936, 0x0900, 0x2148,
- 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248,
- 0x1724, 0x2f12, 0x3b36, 0x0b00, 0x2348,
- 0x1726, 0x2f14, 0x3b38, 0x0b02, 0x234a,
- 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a,
- 0x1526, 0x2d14, 0x3938, 0x0902, 0x214a,
- 0x1528, 0x2d16, 0x393a, 0x0904, 0x214c,
- 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c,
- 0x1728, 0x2f16, 0x3b3a, 0x0b04, 0x234c,
- 0x172a, 0x2f18, 0x3b3c, 0x0b06, 0x234e,
- 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e,
- 0x152a, 0x2d18, 0x393c, 0x0906, 0x214e,
- 0x152c, 0x2d1a, 0x393e, 0x0908, 0x2150,
- 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250,
- 0x172c, 0x2f1a, 0x3b3e, 0x0b08, 0x2350,
- 0x172e, 0x2f1c, 0x3b40, 0x0b0a, 0x2352,
- 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252,
- 0x152e, 0x2d1c, 0x3940, 0x090a, 0x2152,
- 0x1530, 0x2d1e, 0x3942, 0x090c, 0x2154,
- 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254,
- 0x1730, 0x2f1e, 0x3b42, 0x0b0c, 0x2354,
- 0x1732, 0x2f20, 0x3b44, 0x0b0e, 0x2356,
- 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256,
- 0x1532, 0x2d20, 0x3944, 0x090e, 0x2156,
- 0x1534, 0x2d22, 0x3946, 0x0910, 0x2158,
- 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258,
- 0x1734, 0x2f22, 0x3b46, 0x0b10, 0x2358,
- 0x1b24, 0x3312, 0x0336, 0x0f00, 0x2748,
- 0x1c24, 0x3412, 0x0436, 0x1000, 0x2848,
- 0x1d24, 0x3512, 0x0536, 0x1100, 0x2948,
- 0x1d26, 0x3514, 0x0538, 0x1102, 0x294a,
- 0x1c26, 0x3414, 0x0438, 0x1002, 0x284a,
- 0x1b26, 0x3314, 0x0338, 0x0f02, 0x274a,
- 0x1b28, 0x3316, 0x033a, 0x0f04, 0x274c,
- 0x1c28, 0x3416, 0x043a, 0x1004, 0x284c,
- 0x1d28, 0x3516, 0x053a, 0x1104, 0x294c,
- 0x1d2a, 0x3518, 0x053c, 0x1106, 0x294e,
- 0x1c2a, 0x3418, 0x043c, 0x1006, 0x284e,
- 0x1b2a, 0x3318, 0x033c, 0x0f06, 0x274e,
- 0x1b2c, 0x331a, 0x033e, 0x0f08, 0x2750,
- 0x1c2c, 0x341a, 0x043e, 0x1008, 0x2850,
- 0x1d2c, 0x351a, 0x053e, 0x1108, 0x2950,
- 0x1d2e, 0x351c, 0x0540, 0x110a, 0x2952,
- 0x1c2e, 0x341c, 0x0440, 0x100a, 0x2852,
- 0x1b2e, 0x331c, 0x0340, 0x0f0a, 0x2752,
- 0x1b30, 0x331e, 0x0342, 0x0f0c, 0x2754,
- 0x1c30, 0x341e, 0x0442, 0x100c, 0x2854,
- 0x1d30, 0x351e, 0x0542, 0x110c, 0x2954,
- 0x1d32, 0x3520, 0x0544, 0x110e, 0x2956,
- 0x1c32, 0x3420, 0x0444, 0x100e, 0x2856,
- 0x1b32, 0x3320, 0x0344, 0x0f0e, 0x2756,
- 0x1b34, 0x3322, 0x0346, 0x0f10, 0x2758,
- 0x1c34, 0x3422, 0x0446, 0x1010, 0x2858,
- 0x1d34, 0x3522, 0x0546, 0x1110, 0x2958,
- 0x2124, 0x3912, 0x0936, 0x1500, 0x2d48,
- 0x2224, 0x3a12, 0x0a36, 0x1600, 0x2e48,
- 0x2324, 0x3b12, 0x0b36, 0x1700, 0x2f48,
- 0x2326, 0x3b14, 0x0b38, 0x1702, 0x2f4a,
- 0x2226, 0x3a14, 0x0a38, 0x1602, 0x2e4a,
- 0x2126, 0x3914, 0x0938, 0x1502, 0x2d4a,
- 0x2128, 0x3916, 0x093a, 0x1504, 0x2d4c,
- 0x2228, 0x3a16, 0x0a3a, 0x1604, 0x2e4c,
- 0x2328, 0x3b16, 0x0b3a, 0x1704, 0x2f4c,
- 0x232a, 0x3b18, 0x0b3c, 0x1706, 0x2f4e,
- 0x222a, 0x3a18, 0x0a3c, 0x1606, 0x2e4e,
- 0x212a, 0x3918, 0x093c, 0x1506, 0x2d4e,
- 0x212c, 0x391a, 0x093e, 0x1508, 0x2d50,
- 0x222c, 0x3a1a, 0x0a3e, 0x1608, 0x2e50,
- 0x232c, 0x3b1a, 0x0b3e, 0x1708, 0x2f50,
- 0x232e, 0x3b1c, 0x0b40, 0x170a, 0x2f52,
- 0x222e, 0x3a1c, 0x0a40, 0x160a, 0x2e52,
- 0x212e, 0x391c, 0x0940, 0x150a, 0x2d52,
- 0x2130, 0x391e, 0x0942, 0x150c, 0x2d54,
- 0x2230, 0x3a1e, 0x0a42, 0x160c, 0x2e54,
- 0x2330, 0x3b1e, 0x0b42, 0x170c, 0x2f54,
- 0x2332, 0x3b20, 0x0b44, 0x170e, 0x2f56,
- 0x2232, 0x3a20, 0x0a44, 0x160e, 0x2e56,
- 0x2132, 0x3920, 0x0944, 0x150e, 0x2d56,
- 0x2134, 0x3922, 0x0946, 0x1510, 0x2d58,
- 0x2234, 0x3a22, 0x0a46, 0x1610, 0x2e58,
- 0x2334, 0x3b22, 0x0b46, 0x1710, 0x2f58,
- 0x2724, 0x0312, 0x0f36, 0x1b00, 0x3348,
- 0x2824, 0x0412, 0x1036, 0x1c00, 0x3448,
- 0x2924, 0x0512, 0x1136, 0x1d00, 0x3548,
- 0x2926, 0x0514, 0x1138, 0x1d02, 0x354a,
- 0x2826, 0x0414, 0x1038, 0x1c02, 0x344a,
- 0x2726, 0x0314, 0x0f38, 0x1b02, 0x334a,
- 0x2728, 0x0316, 0x0f3a, 0x1b04, 0x334c,
- 0x2828, 0x0416, 0x103a, 0x1c04, 0x344c,
- 0x2928, 0x0516, 0x113a, 0x1d04, 0x354c,
- 0x292a, 0x0518, 0x113c, 0x1d06, 0x354e,
- 0x282a, 0x0418, 0x103c, 0x1c06, 0x344e,
- 0x272a, 0x0318, 0x0f3c, 0x1b06, 0x334e,
- 0x272c, 0x031a, 0x0f3e, 0x1b08, 0x3350,
- 0x282c, 0x041a, 0x103e, 0x1c08, 0x3450,
- 0x292c, 0x051a, 0x113e, 0x1d08, 0x3550,
- 0x292e, 0x051c, 0x1140, 0x1d0a, 0x3552,
- 0x282e, 0x041c, 0x1040, 0x1c0a, 0x3452,
- 0x272e, 0x031c, 0x0f40, 0x1b0a, 0x3352,
- 0x2730, 0x031e, 0x0f42, 0x1b0c, 0x3354,
- 0x2830, 0x041e, 0x1042, 0x1c0c, 0x3454,
- 0x2930, 0x051e, 0x1142, 0x1d0c, 0x3554,
- 0x2932, 0x0520, 0x1144, 0x1d0e, 0x3556,
- 0x2832, 0x0420, 0x1044, 0x1c0e, 0x3456,
- 0x2732, 0x0320, 0x0f44, 0x1b0e, 0x3356,
- 0x2734, 0x0322, 0x0f46, 0x1b10, 0x3358,
- 0x2834, 0x0422, 0x1046, 0x1c10, 0x3458,
- 0x2934, 0x0522, 0x1146, 0x1d10, 0x3558,
- 0x2d24, 0x0912, 0x1536, 0x2100, 0x3948,
- 0x2e24, 0x0a12, 0x1636, 0x2200, 0x3a48,
- 0x2f24, 0x0b12, 0x1736, 0x2300, 0x3b48,
- 0x2f26, 0x0b14, 0x1738, 0x2302, 0x3b4a,
- 0x2e26, 0x0a14, 0x1638, 0x2202, 0x3a4a,
- 0x2d26, 0x0914, 0x1538, 0x2102, 0x394a,
- 0x2d28, 0x0916, 0x153a, 0x2104, 0x394c,
- 0x2e28, 0x0a16, 0x163a, 0x2204, 0x3a4c,
- 0x2f28, 0x0b16, 0x173a, 0x2304, 0x3b4c,
- 0x2f2a, 0x0b18, 0x173c, 0x2306, 0x3b4e,
- 0x2e2a, 0x0a18, 0x163c, 0x2206, 0x3a4e,
- 0x2d2a, 0x0918, 0x153c, 0x2106, 0x394e,
- 0x2d2c, 0x091a, 0x153e, 0x2108, 0x3950,
- 0x2e2c, 0x0a1a, 0x163e, 0x2208, 0x3a50,
- 0x2f2c, 0x0b1a, 0x173e, 0x2308, 0x3b50,
- 0x2f2e, 0x0b1c, 0x1740, 0x230a, 0x3b52,
- 0x2e2e, 0x0a1c, 0x1640, 0x220a, 0x3a52,
- 0x2d2e, 0x091c, 0x1540, 0x210a, 0x3952,
- 0x2d30, 0x091e, 0x1542, 0x210c, 0x3954,
- 0x2e30, 0x0a1e, 0x1642, 0x220c, 0x3a54,
- 0x2f30, 0x0b1e, 0x1742, 0x230c, 0x3b54,
- 0x2f32, 0x0b20, 0x1744, 0x230e, 0x3b56,
- 0x2e32, 0x0a20, 0x1644, 0x220e, 0x3a56,
- 0x2d32, 0x0920, 0x1544, 0x210e, 0x3956,
- 0x2d34, 0x0922, 0x1546, 0x2110, 0x3958,
- 0x2e34, 0x0a22, 0x1646, 0x2210, 0x3a58,
- 0x2f34, 0x0b22, 0x1746, 0x2310, 0x3b58,
- 0x3324, 0x0f12, 0x1b36, 0x2700, 0x0348,
- 0x3424, 0x1012, 0x1c36, 0x2800, 0x0448,
- 0x3524, 0x1112, 0x1d36, 0x2900, 0x0548,
- 0x3526, 0x1114, 0x1d38, 0x2902, 0x054a,
- 0x3426, 0x1014, 0x1c38, 0x2802, 0x044a,
- 0x3326, 0x0f14, 0x1b38, 0x2702, 0x034a,
- 0x3328, 0x0f16, 0x1b3a, 0x2704, 0x034c,
- 0x3428, 0x1016, 0x1c3a, 0x2804, 0x044c,
- 0x3528, 0x1116, 0x1d3a, 0x2904, 0x054c,
- 0x352a, 0x1118, 0x1d3c, 0x2906, 0x054e,
- 0x342a, 0x1018, 0x1c3c, 0x2806, 0x044e,
- 0x332a, 0x0f18, 0x1b3c, 0x2706, 0x034e,
- 0x332c, 0x0f1a, 0x1b3e, 0x2708, 0x0350,
- 0x342c, 0x101a, 0x1c3e, 0x2808, 0x0450,
- 0x352c, 0x111a, 0x1d3e, 0x2908, 0x0550,
- 0x352e, 0x111c, 0x1d40, 0x290a, 0x0552,
- 0x342e, 0x101c, 0x1c40, 0x280a, 0x0452,
- 0x332e, 0x0f1c, 0x1b40, 0x270a, 0x0352,
- 0x3330, 0x0f1e, 0x1b42, 0x270c, 0x0354,
- 0x3430, 0x101e, 0x1c42, 0x280c, 0x0454,
- 0x3530, 0x111e, 0x1d42, 0x290c, 0x0554,
- 0x3532, 0x1120, 0x1d44, 0x290e, 0x0556,
- 0x3432, 0x1020, 0x1c44, 0x280e, 0x0456,
- 0x3332, 0x0f20, 0x1b44, 0x270e, 0x0356,
- 0x3334, 0x0f22, 0x1b46, 0x2710, 0x0358,
- 0x3434, 0x1022, 0x1c46, 0x2810, 0x0458,
- 0x3534, 0x1122, 0x1d46, 0x2910, 0x0558,
- 0x3924, 0x1512, 0x2136, 0x2d00, 0x0948,
- 0x3a24, 0x1612, 0x2236, 0x2e00, 0x0a48,
- 0x3b24, 0x1712, 0x2336, 0x2f00, 0x0b48,
- 0x3b26, 0x1714, 0x2338, 0x2f02, 0x0b4a,
- 0x3a26, 0x1614, 0x2238, 0x2e02, 0x0a4a,
- 0x3926, 0x1514, 0x2138, 0x2d02, 0x094a,
- 0x3928, 0x1516, 0x213a, 0x2d04, 0x094c,
- 0x3a28, 0x1616, 0x223a, 0x2e04, 0x0a4c,
- 0x3b28, 0x1716, 0x233a, 0x2f04, 0x0b4c,
- 0x3b2a, 0x1718, 0x233c, 0x2f06, 0x0b4e,
- 0x3a2a, 0x1618, 0x223c, 0x2e06, 0x0a4e,
- 0x392a, 0x1518, 0x213c, 0x2d06, 0x094e,
- 0x392c, 0x151a, 0x213e, 0x2d08, 0x0950,
- 0x3a2c, 0x161a, 0x223e, 0x2e08, 0x0a50,
- 0x3b2c, 0x171a, 0x233e, 0x2f08, 0x0b50,
- 0x3b2e, 0x171c, 0x2340, 0x2f0a, 0x0b52,
- 0x3a2e, 0x161c, 0x2240, 0x2e0a, 0x0a52,
- 0x392e, 0x151c, 0x2140, 0x2d0a, 0x0952,
- 0x3930, 0x151e, 0x2142, 0x2d0c, 0x0954,
- 0x3a30, 0x161e, 0x2242, 0x2e0c, 0x0a54,
- 0x3b30, 0x171e, 0x2342, 0x2f0c, 0x0b54,
- 0x3b32, 0x1720, 0x2344, 0x2f0e, 0x0b56,
- 0x3a32, 0x1620, 0x2244, 0x2e0e, 0x0a56,
- 0x3932, 0x1520, 0x2144, 0x2d0e, 0x0956,
- 0x3934, 0x1522, 0x2146, 0x2d10, 0x0958,
- 0x3a34, 0x1622, 0x2246, 0x2e10, 0x0a58,
- 0x3b34, 0x1722, 0x2346, 0x2f10, 0x0b58,
- 0x0324, 0x1b12, 0x2736, 0x3300, 0x0f48,
- 0x0424, 0x1c12, 0x2836, 0x3400, 0x1048,
- 0x0524, 0x1d12, 0x2936, 0x3500, 0x1148,
- 0x0526, 0x1d14, 0x2938, 0x3502, 0x114a,
- 0x0426, 0x1c14, 0x2838, 0x3402, 0x104a,
- 0x0326, 0x1b14, 0x2738, 0x3302, 0x0f4a,
- 0x0328, 0x1b16, 0x273a, 0x3304, 0x0f4c,
- 0x0428, 0x1c16, 0x283a, 0x3404, 0x104c,
- 0x0528, 0x1d16, 0x293a, 0x3504, 0x114c,
- 0x052a, 0x1d18, 0x293c, 0x3506, 0x114e,
- 0x042a, 0x1c18, 0x283c, 0x3406, 0x104e,
- 0x032a, 0x1b18, 0x273c, 0x3306, 0x0f4e,
- 0x032c, 0x1b1a, 0x273e, 0x3308, 0x0f50,
- 0x042c, 0x1c1a, 0x283e, 0x3408, 0x1050,
- 0x052c, 0x1d1a, 0x293e, 0x3508, 0x1150,
- 0x052e, 0x1d1c, 0x2940, 0x350a, 0x1152,
- 0x042e, 0x1c1c, 0x2840, 0x340a, 0x1052,
- 0x032e, 0x1b1c, 0x2740, 0x330a, 0x0f52,
- 0x0330, 0x1b1e, 0x2742, 0x330c, 0x0f54,
- 0x0430, 0x1c1e, 0x2842, 0x340c, 0x1054,
- 0x0530, 0x1d1e, 0x2942, 0x350c, 0x1154,
- 0x0532, 0x1d20, 0x2944, 0x350e, 0x1156,
- 0x0432, 0x1c20, 0x2844, 0x340e, 0x1056,
- 0x0332, 0x1b20, 0x2744, 0x330e, 0x0f56,
- 0x0334, 0x1b22, 0x2746, 0x3310, 0x0f58,
- 0x0434, 0x1c22, 0x2846, 0x3410, 0x1058,
- 0x0534, 0x1d22, 0x2946, 0x3510, 0x1158,
- 0x0924, 0x2112, 0x2d36, 0x3900, 0x1548,
- 0x0a24, 0x2212, 0x2e36, 0x3a00, 0x1648,
- 0x0b24, 0x2312, 0x2f36, 0x3b00, 0x1748,
- 0x0b26, 0x2314, 0x2f38, 0x3b02, 0x174a,
- 0x0a26, 0x2214, 0x2e38, 0x3a02, 0x164a,
- 0x0926, 0x2114, 0x2d38, 0x3902, 0x154a,
- 0x0928, 0x2116, 0x2d3a, 0x3904, 0x154c,
- 0x0a28, 0x2216, 0x2e3a, 0x3a04, 0x164c,
- 0x0b28, 0x2316, 0x2f3a, 0x3b04, 0x174c,
- 0x0b2a, 0x2318, 0x2f3c, 0x3b06, 0x174e,
- 0x0a2a, 0x2218, 0x2e3c, 0x3a06, 0x164e,
- 0x092a, 0x2118, 0x2d3c, 0x3906, 0x154e,
- 0x092c, 0x211a, 0x2d3e, 0x3908, 0x1550,
- 0x0a2c, 0x221a, 0x2e3e, 0x3a08, 0x1650,
- 0x0b2c, 0x231a, 0x2f3e, 0x3b08, 0x1750,
- 0x0b2e, 0x231c, 0x2f40, 0x3b0a, 0x1752,
- 0x0a2e, 0x221c, 0x2e40, 0x3a0a, 0x1652,
- 0x092e, 0x211c, 0x2d40, 0x390a, 0x1552,
- 0x0930, 0x211e, 0x2d42, 0x390c, 0x1554,
- 0x0a30, 0x221e, 0x2e42, 0x3a0c, 0x1654,
- 0x0b30, 0x231e, 0x2f42, 0x3b0c, 0x1754,
- 0x0b32, 0x2320, 0x2f44, 0x3b0e, 0x1756,
- 0x0a32, 0x2220, 0x2e44, 0x3a0e, 0x1656,
- 0x0932, 0x2120, 0x2d44, 0x390e, 0x1556,
- 0x0934, 0x2122, 0x2d46, 0x3910, 0x1558,
- 0x0a34, 0x2222, 0x2e46, 0x3a10, 0x1658,
- 0x0b34, 0x2322, 0x2f46, 0x3b10, 0x1758,
-};
-
-/* 2 channels per frame, 12 DIF sequences per channel,
- 27 video segments per DIF sequence, 5 macroblocks per video segment */
-static const uint16_t dv_place_422_625[2*12*27*5] = {
- 0x0c24, 0x2412, 0x3036, 0x0000, 0x1848,
- 0x0d24, 0x2512, 0x3136, 0x0100, 0x1948,
- 0x0e24, 0x2612, 0x3236, 0x0200, 0x1a48,
- 0x0e26, 0x2614, 0x3238, 0x0202, 0x1a4a,
- 0x0d26, 0x2514, 0x3138, 0x0102, 0x194a,
- 0x0c26, 0x2414, 0x3038, 0x0002, 0x184a,
- 0x0c28, 0x2416, 0x303a, 0x0004, 0x184c,
- 0x0d28, 0x2516, 0x313a, 0x0104, 0x194c,
- 0x0e28, 0x2616, 0x323a, 0x0204, 0x1a4c,
- 0x0e2a, 0x2618, 0x323c, 0x0206, 0x1a4e,
- 0x0d2a, 0x2518, 0x313c, 0x0106, 0x194e,
- 0x0c2a, 0x2418, 0x303c, 0x0006, 0x184e,
- 0x0c2c, 0x241a, 0x303e, 0x0008, 0x1850,
- 0x0d2c, 0x251a, 0x313e, 0x0108, 0x1950,
- 0x0e2c, 0x261a, 0x323e, 0x0208, 0x1a50,
- 0x0e2e, 0x261c, 0x3240, 0x020a, 0x1a52,
- 0x0d2e, 0x251c, 0x3140, 0x010a, 0x1952,
- 0x0c2e, 0x241c, 0x3040, 0x000a, 0x1852,
- 0x0c30, 0x241e, 0x3042, 0x000c, 0x1854,
- 0x0d30, 0x251e, 0x3142, 0x010c, 0x1954,
- 0x0e30, 0x261e, 0x3242, 0x020c, 0x1a54,
- 0x0e32, 0x2620, 0x3244, 0x020e, 0x1a56,
- 0x0d32, 0x2520, 0x3144, 0x010e, 0x1956,
- 0x0c32, 0x2420, 0x3044, 0x000e, 0x1856,
- 0x0c34, 0x2422, 0x3046, 0x0010, 0x1858,
- 0x0d34, 0x2522, 0x3146, 0x0110, 0x1958,
- 0x0e34, 0x2622, 0x3246, 0x0210, 0x1a58,
- 0x1224, 0x2a12, 0x3636, 0x0600, 0x1e48,
- 0x1324, 0x2b12, 0x3736, 0x0700, 0x1f48,
- 0x1424, 0x2c12, 0x3836, 0x0800, 0x2048,
- 0x1426, 0x2c14, 0x3838, 0x0802, 0x204a,
- 0x1326, 0x2b14, 0x3738, 0x0702, 0x1f4a,
- 0x1226, 0x2a14, 0x3638, 0x0602, 0x1e4a,
- 0x1228, 0x2a16, 0x363a, 0x0604, 0x1e4c,
- 0x1328, 0x2b16, 0x373a, 0x0704, 0x1f4c,
- 0x1428, 0x2c16, 0x383a, 0x0804, 0x204c,
- 0x142a, 0x2c18, 0x383c, 0x0806, 0x204e,
- 0x132a, 0x2b18, 0x373c, 0x0706, 0x1f4e,
- 0x122a, 0x2a18, 0x363c, 0x0606, 0x1e4e,
- 0x122c, 0x2a1a, 0x363e, 0x0608, 0x1e50,
- 0x132c, 0x2b1a, 0x373e, 0x0708, 0x1f50,
- 0x142c, 0x2c1a, 0x383e, 0x0808, 0x2050,
- 0x142e, 0x2c1c, 0x3840, 0x080a, 0x2052,
- 0x132e, 0x2b1c, 0x3740, 0x070a, 0x1f52,
- 0x122e, 0x2a1c, 0x3640, 0x060a, 0x1e52,
- 0x1230, 0x2a1e, 0x3642, 0x060c, 0x1e54,
- 0x1330, 0x2b1e, 0x3742, 0x070c, 0x1f54,
- 0x1430, 0x2c1e, 0x3842, 0x080c, 0x2054,
- 0x1432, 0x2c20, 0x3844, 0x080e, 0x2056,
- 0x1332, 0x2b20, 0x3744, 0x070e, 0x1f56,
- 0x1232, 0x2a20, 0x3644, 0x060e, 0x1e56,
- 0x1234, 0x2a22, 0x3646, 0x0610, 0x1e58,
- 0x1334, 0x2b22, 0x3746, 0x0710, 0x1f58,
- 0x1434, 0x2c22, 0x3846, 0x0810, 0x2058,
- 0x1824, 0x3012, 0x3c36, 0x0c00, 0x2448,
- 0x1924, 0x3112, 0x3d36, 0x0d00, 0x2548,
- 0x1a24, 0x3212, 0x3e36, 0x0e00, 0x2648,
- 0x1a26, 0x3214, 0x3e38, 0x0e02, 0x264a,
- 0x1926, 0x3114, 0x3d38, 0x0d02, 0x254a,
- 0x1826, 0x3014, 0x3c38, 0x0c02, 0x244a,
- 0x1828, 0x3016, 0x3c3a, 0x0c04, 0x244c,
- 0x1928, 0x3116, 0x3d3a, 0x0d04, 0x254c,
- 0x1a28, 0x3216, 0x3e3a, 0x0e04, 0x264c,
- 0x1a2a, 0x3218, 0x3e3c, 0x0e06, 0x264e,
- 0x192a, 0x3118, 0x3d3c, 0x0d06, 0x254e,
- 0x182a, 0x3018, 0x3c3c, 0x0c06, 0x244e,
- 0x182c, 0x301a, 0x3c3e, 0x0c08, 0x2450,
- 0x192c, 0x311a, 0x3d3e, 0x0d08, 0x2550,
- 0x1a2c, 0x321a, 0x3e3e, 0x0e08, 0x2650,
- 0x1a2e, 0x321c, 0x3e40, 0x0e0a, 0x2652,
- 0x192e, 0x311c, 0x3d40, 0x0d0a, 0x2552,
- 0x182e, 0x301c, 0x3c40, 0x0c0a, 0x2452,
- 0x1830, 0x301e, 0x3c42, 0x0c0c, 0x2454,
- 0x1930, 0x311e, 0x3d42, 0x0d0c, 0x2554,
- 0x1a30, 0x321e, 0x3e42, 0x0e0c, 0x2654,
- 0x1a32, 0x3220, 0x3e44, 0x0e0e, 0x2656,
- 0x1932, 0x3120, 0x3d44, 0x0d0e, 0x2556,
- 0x1832, 0x3020, 0x3c44, 0x0c0e, 0x2456,
- 0x1834, 0x3022, 0x3c46, 0x0c10, 0x2458,
- 0x1934, 0x3122, 0x3d46, 0x0d10, 0x2558,
- 0x1a34, 0x3222, 0x3e46, 0x0e10, 0x2658,
- 0x1e24, 0x3612, 0x4236, 0x1200, 0x2a48,
- 0x1f24, 0x3712, 0x4336, 0x1300, 0x2b48,
- 0x2024, 0x3812, 0x4436, 0x1400, 0x2c48,
- 0x2026, 0x3814, 0x4438, 0x1402, 0x2c4a,
- 0x1f26, 0x3714, 0x4338, 0x1302, 0x2b4a,
- 0x1e26, 0x3614, 0x4238, 0x1202, 0x2a4a,
- 0x1e28, 0x3616, 0x423a, 0x1204, 0x2a4c,
- 0x1f28, 0x3716, 0x433a, 0x1304, 0x2b4c,
- 0x2028, 0x3816, 0x443a, 0x1404, 0x2c4c,
- 0x202a, 0x3818, 0x443c, 0x1406, 0x2c4e,
- 0x1f2a, 0x3718, 0x433c, 0x1306, 0x2b4e,
- 0x1e2a, 0x3618, 0x423c, 0x1206, 0x2a4e,
- 0x1e2c, 0x361a, 0x423e, 0x1208, 0x2a50,
- 0x1f2c, 0x371a, 0x433e, 0x1308, 0x2b50,
- 0x202c, 0x381a, 0x443e, 0x1408, 0x2c50,
- 0x202e, 0x381c, 0x4440, 0x140a, 0x2c52,
- 0x1f2e, 0x371c, 0x4340, 0x130a, 0x2b52,
- 0x1e2e, 0x361c, 0x4240, 0x120a, 0x2a52,
- 0x1e30, 0x361e, 0x4242, 0x120c, 0x2a54,
- 0x1f30, 0x371e, 0x4342, 0x130c, 0x2b54,
- 0x2030, 0x381e, 0x4442, 0x140c, 0x2c54,
- 0x2032, 0x3820, 0x4444, 0x140e, 0x2c56,
- 0x1f32, 0x3720, 0x4344, 0x130e, 0x2b56,
- 0x1e32, 0x3620, 0x4244, 0x120e, 0x2a56,
- 0x1e34, 0x3622, 0x4246, 0x1210, 0x2a58,
- 0x1f34, 0x3722, 0x4346, 0x1310, 0x2b58,
- 0x2034, 0x3822, 0x4446, 0x1410, 0x2c58,
- 0x2424, 0x3c12, 0x0036, 0x1800, 0x3048,
- 0x2524, 0x3d12, 0x0136, 0x1900, 0x3148,
- 0x2624, 0x3e12, 0x0236, 0x1a00, 0x3248,
- 0x2626, 0x3e14, 0x0238, 0x1a02, 0x324a,
- 0x2526, 0x3d14, 0x0138, 0x1902, 0x314a,
- 0x2426, 0x3c14, 0x0038, 0x1802, 0x304a,
- 0x2428, 0x3c16, 0x003a, 0x1804, 0x304c,
- 0x2528, 0x3d16, 0x013a, 0x1904, 0x314c,
- 0x2628, 0x3e16, 0x023a, 0x1a04, 0x324c,
- 0x262a, 0x3e18, 0x023c, 0x1a06, 0x324e,
- 0x252a, 0x3d18, 0x013c, 0x1906, 0x314e,
- 0x242a, 0x3c18, 0x003c, 0x1806, 0x304e,
- 0x242c, 0x3c1a, 0x003e, 0x1808, 0x3050,
- 0x252c, 0x3d1a, 0x013e, 0x1908, 0x3150,
- 0x262c, 0x3e1a, 0x023e, 0x1a08, 0x3250,
- 0x262e, 0x3e1c, 0x0240, 0x1a0a, 0x3252,
- 0x252e, 0x3d1c, 0x0140, 0x190a, 0x3152,
- 0x242e, 0x3c1c, 0x0040, 0x180a, 0x3052,
- 0x2430, 0x3c1e, 0x0042, 0x180c, 0x3054,
- 0x2530, 0x3d1e, 0x0142, 0x190c, 0x3154,
- 0x2630, 0x3e1e, 0x0242, 0x1a0c, 0x3254,
- 0x2632, 0x3e20, 0x0244, 0x1a0e, 0x3256,
- 0x2532, 0x3d20, 0x0144, 0x190e, 0x3156,
- 0x2432, 0x3c20, 0x0044, 0x180e, 0x3056,
- 0x2434, 0x3c22, 0x0046, 0x1810, 0x3058,
- 0x2534, 0x3d22, 0x0146, 0x1910, 0x3158,
- 0x2634, 0x3e22, 0x0246, 0x1a10, 0x3258,
- 0x2a24, 0x4212, 0x0636, 0x1e00, 0x3648,
- 0x2b24, 0x4312, 0x0736, 0x1f00, 0x3748,
- 0x2c24, 0x4412, 0x0836, 0x2000, 0x3848,
- 0x2c26, 0x4414, 0x0838, 0x2002, 0x384a,
- 0x2b26, 0x4314, 0x0738, 0x1f02, 0x374a,
- 0x2a26, 0x4214, 0x0638, 0x1e02, 0x364a,
- 0x2a28, 0x4216, 0x063a, 0x1e04, 0x364c,
- 0x2b28, 0x4316, 0x073a, 0x1f04, 0x374c,
- 0x2c28, 0x4416, 0x083a, 0x2004, 0x384c,
- 0x2c2a, 0x4418, 0x083c, 0x2006, 0x384e,
- 0x2b2a, 0x4318, 0x073c, 0x1f06, 0x374e,
- 0x2a2a, 0x4218, 0x063c, 0x1e06, 0x364e,
- 0x2a2c, 0x421a, 0x063e, 0x1e08, 0x3650,
- 0x2b2c, 0x431a, 0x073e, 0x1f08, 0x3750,
- 0x2c2c, 0x441a, 0x083e, 0x2008, 0x3850,
- 0x2c2e, 0x441c, 0x0840, 0x200a, 0x3852,
- 0x2b2e, 0x431c, 0x0740, 0x1f0a, 0x3752,
- 0x2a2e, 0x421c, 0x0640, 0x1e0a, 0x3652,
- 0x2a30, 0x421e, 0x0642, 0x1e0c, 0x3654,
- 0x2b30, 0x431e, 0x0742, 0x1f0c, 0x3754,
- 0x2c30, 0x441e, 0x0842, 0x200c, 0x3854,
- 0x2c32, 0x4420, 0x0844, 0x200e, 0x3856,
- 0x2b32, 0x4320, 0x0744, 0x1f0e, 0x3756,
- 0x2a32, 0x4220, 0x0644, 0x1e0e, 0x3656,
- 0x2a34, 0x4222, 0x0646, 0x1e10, 0x3658,
- 0x2b34, 0x4322, 0x0746, 0x1f10, 0x3758,
- 0x2c34, 0x4422, 0x0846, 0x2010, 0x3858,
- 0x3024, 0x0012, 0x0c36, 0x2400, 0x3c48,
- 0x3124, 0x0112, 0x0d36, 0x2500, 0x3d48,
- 0x3224, 0x0212, 0x0e36, 0x2600, 0x3e48,
- 0x3226, 0x0214, 0x0e38, 0x2602, 0x3e4a,
- 0x3126, 0x0114, 0x0d38, 0x2502, 0x3d4a,
- 0x3026, 0x0014, 0x0c38, 0x2402, 0x3c4a,
- 0x3028, 0x0016, 0x0c3a, 0x2404, 0x3c4c,
- 0x3128, 0x0116, 0x0d3a, 0x2504, 0x3d4c,
- 0x3228, 0x0216, 0x0e3a, 0x2604, 0x3e4c,
- 0x322a, 0x0218, 0x0e3c, 0x2606, 0x3e4e,
- 0x312a, 0x0118, 0x0d3c, 0x2506, 0x3d4e,
- 0x302a, 0x0018, 0x0c3c, 0x2406, 0x3c4e,
- 0x302c, 0x001a, 0x0c3e, 0x2408, 0x3c50,
- 0x312c, 0x011a, 0x0d3e, 0x2508, 0x3d50,
- 0x322c, 0x021a, 0x0e3e, 0x2608, 0x3e50,
- 0x322e, 0x021c, 0x0e40, 0x260a, 0x3e52,
- 0x312e, 0x011c, 0x0d40, 0x250a, 0x3d52,
- 0x302e, 0x001c, 0x0c40, 0x240a, 0x3c52,
- 0x3030, 0x001e, 0x0c42, 0x240c, 0x3c54,
- 0x3130, 0x011e, 0x0d42, 0x250c, 0x3d54,
- 0x3230, 0x021e, 0x0e42, 0x260c, 0x3e54,
- 0x3232, 0x0220, 0x0e44, 0x260e, 0x3e56,
- 0x3132, 0x0120, 0x0d44, 0x250e, 0x3d56,
- 0x3032, 0x0020, 0x0c44, 0x240e, 0x3c56,
- 0x3034, 0x0022, 0x0c46, 0x2410, 0x3c58,
- 0x3134, 0x0122, 0x0d46, 0x2510, 0x3d58,
- 0x3234, 0x0222, 0x0e46, 0x2610, 0x3e58,
- 0x3624, 0x0612, 0x1236, 0x2a00, 0x4248,
- 0x3724, 0x0712, 0x1336, 0x2b00, 0x4348,
- 0x3824, 0x0812, 0x1436, 0x2c00, 0x4448,
- 0x3826, 0x0814, 0x1438, 0x2c02, 0x444a,
- 0x3726, 0x0714, 0x1338, 0x2b02, 0x434a,
- 0x3626, 0x0614, 0x1238, 0x2a02, 0x424a,
- 0x3628, 0x0616, 0x123a, 0x2a04, 0x424c,
- 0x3728, 0x0716, 0x133a, 0x2b04, 0x434c,
- 0x3828, 0x0816, 0x143a, 0x2c04, 0x444c,
- 0x382a, 0x0818, 0x143c, 0x2c06, 0x444e,
- 0x372a, 0x0718, 0x133c, 0x2b06, 0x434e,
- 0x362a, 0x0618, 0x123c, 0x2a06, 0x424e,
- 0x362c, 0x061a, 0x123e, 0x2a08, 0x4250,
- 0x372c, 0x071a, 0x133e, 0x2b08, 0x4350,
- 0x382c, 0x081a, 0x143e, 0x2c08, 0x4450,
- 0x382e, 0x081c, 0x1440, 0x2c0a, 0x4452,
- 0x372e, 0x071c, 0x1340, 0x2b0a, 0x4352,
- 0x362e, 0x061c, 0x1240, 0x2a0a, 0x4252,
- 0x3630, 0x061e, 0x1242, 0x2a0c, 0x4254,
- 0x3730, 0x071e, 0x1342, 0x2b0c, 0x4354,
- 0x3830, 0x081e, 0x1442, 0x2c0c, 0x4454,
- 0x3832, 0x0820, 0x1444, 0x2c0e, 0x4456,
- 0x3732, 0x0720, 0x1344, 0x2b0e, 0x4356,
- 0x3632, 0x0620, 0x1244, 0x2a0e, 0x4256,
- 0x3634, 0x0622, 0x1246, 0x2a10, 0x4258,
- 0x3734, 0x0722, 0x1346, 0x2b10, 0x4358,
- 0x3834, 0x0822, 0x1446, 0x2c10, 0x4458,
- 0x3c24, 0x0c12, 0x1836, 0x3000, 0x0048,
- 0x3d24, 0x0d12, 0x1936, 0x3100, 0x0148,
- 0x3e24, 0x0e12, 0x1a36, 0x3200, 0x0248,
- 0x3e26, 0x0e14, 0x1a38, 0x3202, 0x024a,
- 0x3d26, 0x0d14, 0x1938, 0x3102, 0x014a,
- 0x3c26, 0x0c14, 0x1838, 0x3002, 0x004a,
- 0x3c28, 0x0c16, 0x183a, 0x3004, 0x004c,
- 0x3d28, 0x0d16, 0x193a, 0x3104, 0x014c,
- 0x3e28, 0x0e16, 0x1a3a, 0x3204, 0x024c,
- 0x3e2a, 0x0e18, 0x1a3c, 0x3206, 0x024e,
- 0x3d2a, 0x0d18, 0x193c, 0x3106, 0x014e,
- 0x3c2a, 0x0c18, 0x183c, 0x3006, 0x004e,
- 0x3c2c, 0x0c1a, 0x183e, 0x3008, 0x0050,
- 0x3d2c, 0x0d1a, 0x193e, 0x3108, 0x0150,
- 0x3e2c, 0x0e1a, 0x1a3e, 0x3208, 0x0250,
- 0x3e2e, 0x0e1c, 0x1a40, 0x320a, 0x0252,
- 0x3d2e, 0x0d1c, 0x1940, 0x310a, 0x0152,
- 0x3c2e, 0x0c1c, 0x1840, 0x300a, 0x0052,
- 0x3c30, 0x0c1e, 0x1842, 0x300c, 0x0054,
- 0x3d30, 0x0d1e, 0x1942, 0x310c, 0x0154,
- 0x3e30, 0x0e1e, 0x1a42, 0x320c, 0x0254,
- 0x3e32, 0x0e20, 0x1a44, 0x320e, 0x0256,
- 0x3d32, 0x0d20, 0x1944, 0x310e, 0x0156,
- 0x3c32, 0x0c20, 0x1844, 0x300e, 0x0056,
- 0x3c34, 0x0c22, 0x1846, 0x3010, 0x0058,
- 0x3d34, 0x0d22, 0x1946, 0x3110, 0x0158,
- 0x3e34, 0x0e22, 0x1a46, 0x3210, 0x0258,
- 0x4224, 0x1212, 0x1e36, 0x3600, 0x0648,
- 0x4324, 0x1312, 0x1f36, 0x3700, 0x0748,
- 0x4424, 0x1412, 0x2036, 0x3800, 0x0848,
- 0x4426, 0x1414, 0x2038, 0x3802, 0x084a,
- 0x4326, 0x1314, 0x1f38, 0x3702, 0x074a,
- 0x4226, 0x1214, 0x1e38, 0x3602, 0x064a,
- 0x4228, 0x1216, 0x1e3a, 0x3604, 0x064c,
- 0x4328, 0x1316, 0x1f3a, 0x3704, 0x074c,
- 0x4428, 0x1416, 0x203a, 0x3804, 0x084c,
- 0x442a, 0x1418, 0x203c, 0x3806, 0x084e,
- 0x432a, 0x1318, 0x1f3c, 0x3706, 0x074e,
- 0x422a, 0x1218, 0x1e3c, 0x3606, 0x064e,
- 0x422c, 0x121a, 0x1e3e, 0x3608, 0x0650,
- 0x432c, 0x131a, 0x1f3e, 0x3708, 0x0750,
- 0x442c, 0x141a, 0x203e, 0x3808, 0x0850,
- 0x442e, 0x141c, 0x2040, 0x380a, 0x0852,
- 0x432e, 0x131c, 0x1f40, 0x370a, 0x0752,
- 0x422e, 0x121c, 0x1e40, 0x360a, 0x0652,
- 0x4230, 0x121e, 0x1e42, 0x360c, 0x0654,
- 0x4330, 0x131e, 0x1f42, 0x370c, 0x0754,
- 0x4430, 0x141e, 0x2042, 0x380c, 0x0854,
- 0x4432, 0x1420, 0x2044, 0x380e, 0x0856,
- 0x4332, 0x1320, 0x1f44, 0x370e, 0x0756,
- 0x4232, 0x1220, 0x1e44, 0x360e, 0x0656,
- 0x4234, 0x1222, 0x1e46, 0x3610, 0x0658,
- 0x4334, 0x1322, 0x1f46, 0x3710, 0x0758,
- 0x4434, 0x1422, 0x2046, 0x3810, 0x0858,
- 0x0024, 0x1812, 0x2436, 0x3c00, 0x0c48,
- 0x0124, 0x1912, 0x2536, 0x3d00, 0x0d48,
- 0x0224, 0x1a12, 0x2636, 0x3e00, 0x0e48,
- 0x0226, 0x1a14, 0x2638, 0x3e02, 0x0e4a,
- 0x0126, 0x1914, 0x2538, 0x3d02, 0x0d4a,
- 0x0026, 0x1814, 0x2438, 0x3c02, 0x0c4a,
- 0x0028, 0x1816, 0x243a, 0x3c04, 0x0c4c,
- 0x0128, 0x1916, 0x253a, 0x3d04, 0x0d4c,
- 0x0228, 0x1a16, 0x263a, 0x3e04, 0x0e4c,
- 0x022a, 0x1a18, 0x263c, 0x3e06, 0x0e4e,
- 0x012a, 0x1918, 0x253c, 0x3d06, 0x0d4e,
- 0x002a, 0x1818, 0x243c, 0x3c06, 0x0c4e,
- 0x002c, 0x181a, 0x243e, 0x3c08, 0x0c50,
- 0x012c, 0x191a, 0x253e, 0x3d08, 0x0d50,
- 0x022c, 0x1a1a, 0x263e, 0x3e08, 0x0e50,
- 0x022e, 0x1a1c, 0x2640, 0x3e0a, 0x0e52,
- 0x012e, 0x191c, 0x2540, 0x3d0a, 0x0d52,
- 0x002e, 0x181c, 0x2440, 0x3c0a, 0x0c52,
- 0x0030, 0x181e, 0x2442, 0x3c0c, 0x0c54,
- 0x0130, 0x191e, 0x2542, 0x3d0c, 0x0d54,
- 0x0230, 0x1a1e, 0x2642, 0x3e0c, 0x0e54,
- 0x0232, 0x1a20, 0x2644, 0x3e0e, 0x0e56,
- 0x0132, 0x1920, 0x2544, 0x3d0e, 0x0d56,
- 0x0032, 0x1820, 0x2444, 0x3c0e, 0x0c56,
- 0x0034, 0x1822, 0x2446, 0x3c10, 0x0c58,
- 0x0134, 0x1922, 0x2546, 0x3d10, 0x0d58,
- 0x0234, 0x1a22, 0x2646, 0x3e10, 0x0e58,
- 0x0624, 0x1e12, 0x2a36, 0x4200, 0x1248,
- 0x0724, 0x1f12, 0x2b36, 0x4300, 0x1348,
- 0x0824, 0x2012, 0x2c36, 0x4400, 0x1448,
- 0x0826, 0x2014, 0x2c38, 0x4402, 0x144a,
- 0x0726, 0x1f14, 0x2b38, 0x4302, 0x134a,
- 0x0626, 0x1e14, 0x2a38, 0x4202, 0x124a,
- 0x0628, 0x1e16, 0x2a3a, 0x4204, 0x124c,
- 0x0728, 0x1f16, 0x2b3a, 0x4304, 0x134c,
- 0x0828, 0x2016, 0x2c3a, 0x4404, 0x144c,
- 0x082a, 0x2018, 0x2c3c, 0x4406, 0x144e,
- 0x072a, 0x1f18, 0x2b3c, 0x4306, 0x134e,
- 0x062a, 0x1e18, 0x2a3c, 0x4206, 0x124e,
- 0x062c, 0x1e1a, 0x2a3e, 0x4208, 0x1250,
- 0x072c, 0x1f1a, 0x2b3e, 0x4308, 0x1350,
- 0x082c, 0x201a, 0x2c3e, 0x4408, 0x1450,
- 0x082e, 0x201c, 0x2c40, 0x440a, 0x1452,
- 0x072e, 0x1f1c, 0x2b40, 0x430a, 0x1352,
- 0x062e, 0x1e1c, 0x2a40, 0x420a, 0x1252,
- 0x0630, 0x1e1e, 0x2a42, 0x420c, 0x1254,
- 0x0730, 0x1f1e, 0x2b42, 0x430c, 0x1354,
- 0x0830, 0x201e, 0x2c42, 0x440c, 0x1454,
- 0x0832, 0x2020, 0x2c44, 0x440e, 0x1456,
- 0x0732, 0x1f20, 0x2b44, 0x430e, 0x1356,
- 0x0632, 0x1e20, 0x2a44, 0x420e, 0x1256,
- 0x0634, 0x1e22, 0x2a46, 0x4210, 0x1258,
- 0x0734, 0x1f22, 0x2b46, 0x4310, 0x1358,
- 0x0834, 0x2022, 0x2c46, 0x4410, 0x1458,
- 0x0f24, 0x2712, 0x3336, 0x0300, 0x1b48,
- 0x1024, 0x2812, 0x3436, 0x0400, 0x1c48,
- 0x1124, 0x2912, 0x3536, 0x0500, 0x1d48,
- 0x1126, 0x2914, 0x3538, 0x0502, 0x1d4a,
- 0x1026, 0x2814, 0x3438, 0x0402, 0x1c4a,
- 0x0f26, 0x2714, 0x3338, 0x0302, 0x1b4a,
- 0x0f28, 0x2716, 0x333a, 0x0304, 0x1b4c,
- 0x1028, 0x2816, 0x343a, 0x0404, 0x1c4c,
- 0x1128, 0x2916, 0x353a, 0x0504, 0x1d4c,
- 0x112a, 0x2918, 0x353c, 0x0506, 0x1d4e,
- 0x102a, 0x2818, 0x343c, 0x0406, 0x1c4e,
- 0x0f2a, 0x2718, 0x333c, 0x0306, 0x1b4e,
- 0x0f2c, 0x271a, 0x333e, 0x0308, 0x1b50,
- 0x102c, 0x281a, 0x343e, 0x0408, 0x1c50,
- 0x112c, 0x291a, 0x353e, 0x0508, 0x1d50,
- 0x112e, 0x291c, 0x3540, 0x050a, 0x1d52,
- 0x102e, 0x281c, 0x3440, 0x040a, 0x1c52,
- 0x0f2e, 0x271c, 0x3340, 0x030a, 0x1b52,
- 0x0f30, 0x271e, 0x3342, 0x030c, 0x1b54,
- 0x1030, 0x281e, 0x3442, 0x040c, 0x1c54,
- 0x1130, 0x291e, 0x3542, 0x050c, 0x1d54,
- 0x1132, 0x2920, 0x3544, 0x050e, 0x1d56,
- 0x1032, 0x2820, 0x3444, 0x040e, 0x1c56,
- 0x0f32, 0x2720, 0x3344, 0x030e, 0x1b56,
- 0x0f34, 0x2722, 0x3346, 0x0310, 0x1b58,
- 0x1034, 0x2822, 0x3446, 0x0410, 0x1c58,
- 0x1134, 0x2922, 0x3546, 0x0510, 0x1d58,
- 0x1524, 0x2d12, 0x3936, 0x0900, 0x2148,
- 0x1624, 0x2e12, 0x3a36, 0x0a00, 0x2248,
- 0x1724, 0x2f12, 0x3b36, 0x0b00, 0x2348,
- 0x1726, 0x2f14, 0x3b38, 0x0b02, 0x234a,
- 0x1626, 0x2e14, 0x3a38, 0x0a02, 0x224a,
- 0x1526, 0x2d14, 0x3938, 0x0902, 0x214a,
- 0x1528, 0x2d16, 0x393a, 0x0904, 0x214c,
- 0x1628, 0x2e16, 0x3a3a, 0x0a04, 0x224c,
- 0x1728, 0x2f16, 0x3b3a, 0x0b04, 0x234c,
- 0x172a, 0x2f18, 0x3b3c, 0x0b06, 0x234e,
- 0x162a, 0x2e18, 0x3a3c, 0x0a06, 0x224e,
- 0x152a, 0x2d18, 0x393c, 0x0906, 0x214e,
- 0x152c, 0x2d1a, 0x393e, 0x0908, 0x2150,
- 0x162c, 0x2e1a, 0x3a3e, 0x0a08, 0x2250,
- 0x172c, 0x2f1a, 0x3b3e, 0x0b08, 0x2350,
- 0x172e, 0x2f1c, 0x3b40, 0x0b0a, 0x2352,
- 0x162e, 0x2e1c, 0x3a40, 0x0a0a, 0x2252,
- 0x152e, 0x2d1c, 0x3940, 0x090a, 0x2152,
- 0x1530, 0x2d1e, 0x3942, 0x090c, 0x2154,
- 0x1630, 0x2e1e, 0x3a42, 0x0a0c, 0x2254,
- 0x1730, 0x2f1e, 0x3b42, 0x0b0c, 0x2354,
- 0x1732, 0x2f20, 0x3b44, 0x0b0e, 0x2356,
- 0x1632, 0x2e20, 0x3a44, 0x0a0e, 0x2256,
- 0x1532, 0x2d20, 0x3944, 0x090e, 0x2156,
- 0x1534, 0x2d22, 0x3946, 0x0910, 0x2158,
- 0x1634, 0x2e22, 0x3a46, 0x0a10, 0x2258,
- 0x1734, 0x2f22, 0x3b46, 0x0b10, 0x2358,
- 0x1b24, 0x3312, 0x3f36, 0x0f00, 0x2748,
- 0x1c24, 0x3412, 0x4036, 0x1000, 0x2848,
- 0x1d24, 0x3512, 0x4136, 0x1100, 0x2948,
- 0x1d26, 0x3514, 0x4138, 0x1102, 0x294a,
- 0x1c26, 0x3414, 0x4038, 0x1002, 0x284a,
- 0x1b26, 0x3314, 0x3f38, 0x0f02, 0x274a,
- 0x1b28, 0x3316, 0x3f3a, 0x0f04, 0x274c,
- 0x1c28, 0x3416, 0x403a, 0x1004, 0x284c,
- 0x1d28, 0x3516, 0x413a, 0x1104, 0x294c,
- 0x1d2a, 0x3518, 0x413c, 0x1106, 0x294e,
- 0x1c2a, 0x3418, 0x403c, 0x1006, 0x284e,
- 0x1b2a, 0x3318, 0x3f3c, 0x0f06, 0x274e,
- 0x1b2c, 0x331a, 0x3f3e, 0x0f08, 0x2750,
- 0x1c2c, 0x341a, 0x403e, 0x1008, 0x2850,
- 0x1d2c, 0x351a, 0x413e, 0x1108, 0x2950,
- 0x1d2e, 0x351c, 0x4140, 0x110a, 0x2952,
- 0x1c2e, 0x341c, 0x4040, 0x100a, 0x2852,
- 0x1b2e, 0x331c, 0x3f40, 0x0f0a, 0x2752,
- 0x1b30, 0x331e, 0x3f42, 0x0f0c, 0x2754,
- 0x1c30, 0x341e, 0x4042, 0x100c, 0x2854,
- 0x1d30, 0x351e, 0x4142, 0x110c, 0x2954,
- 0x1d32, 0x3520, 0x4144, 0x110e, 0x2956,
- 0x1c32, 0x3420, 0x4044, 0x100e, 0x2856,
- 0x1b32, 0x3320, 0x3f44, 0x0f0e, 0x2756,
- 0x1b34, 0x3322, 0x3f46, 0x0f10, 0x2758,
- 0x1c34, 0x3422, 0x4046, 0x1010, 0x2858,
- 0x1d34, 0x3522, 0x4146, 0x1110, 0x2958,
- 0x2124, 0x3912, 0x4536, 0x1500, 0x2d48,
- 0x2224, 0x3a12, 0x4636, 0x1600, 0x2e48,
- 0x2324, 0x3b12, 0x4736, 0x1700, 0x2f48,
- 0x2326, 0x3b14, 0x4738, 0x1702, 0x2f4a,
- 0x2226, 0x3a14, 0x4638, 0x1602, 0x2e4a,
- 0x2126, 0x3914, 0x4538, 0x1502, 0x2d4a,
- 0x2128, 0x3916, 0x453a, 0x1504, 0x2d4c,
- 0x2228, 0x3a16, 0x463a, 0x1604, 0x2e4c,
- 0x2328, 0x3b16, 0x473a, 0x1704, 0x2f4c,
- 0x232a, 0x3b18, 0x473c, 0x1706, 0x2f4e,
- 0x222a, 0x3a18, 0x463c, 0x1606, 0x2e4e,
- 0x212a, 0x3918, 0x453c, 0x1506, 0x2d4e,
- 0x212c, 0x391a, 0x453e, 0x1508, 0x2d50,
- 0x222c, 0x3a1a, 0x463e, 0x1608, 0x2e50,
- 0x232c, 0x3b1a, 0x473e, 0x1708, 0x2f50,
- 0x232e, 0x3b1c, 0x4740, 0x170a, 0x2f52,
- 0x222e, 0x3a1c, 0x4640, 0x160a, 0x2e52,
- 0x212e, 0x391c, 0x4540, 0x150a, 0x2d52,
- 0x2130, 0x391e, 0x4542, 0x150c, 0x2d54,
- 0x2230, 0x3a1e, 0x4642, 0x160c, 0x2e54,
- 0x2330, 0x3b1e, 0x4742, 0x170c, 0x2f54,
- 0x2332, 0x3b20, 0x4744, 0x170e, 0x2f56,
- 0x2232, 0x3a20, 0x4644, 0x160e, 0x2e56,
- 0x2132, 0x3920, 0x4544, 0x150e, 0x2d56,
- 0x2134, 0x3922, 0x4546, 0x1510, 0x2d58,
- 0x2234, 0x3a22, 0x4646, 0x1610, 0x2e58,
- 0x2334, 0x3b22, 0x4746, 0x1710, 0x2f58,
- 0x2724, 0x3f12, 0x0336, 0x1b00, 0x3348,
- 0x2824, 0x4012, 0x0436, 0x1c00, 0x3448,
- 0x2924, 0x4112, 0x0536, 0x1d00, 0x3548,
- 0x2926, 0x4114, 0x0538, 0x1d02, 0x354a,
- 0x2826, 0x4014, 0x0438, 0x1c02, 0x344a,
- 0x2726, 0x3f14, 0x0338, 0x1b02, 0x334a,
- 0x2728, 0x3f16, 0x033a, 0x1b04, 0x334c,
- 0x2828, 0x4016, 0x043a, 0x1c04, 0x344c,
- 0x2928, 0x4116, 0x053a, 0x1d04, 0x354c,
- 0x292a, 0x4118, 0x053c, 0x1d06, 0x354e,
- 0x282a, 0x4018, 0x043c, 0x1c06, 0x344e,
- 0x272a, 0x3f18, 0x033c, 0x1b06, 0x334e,
- 0x272c, 0x3f1a, 0x033e, 0x1b08, 0x3350,
- 0x282c, 0x401a, 0x043e, 0x1c08, 0x3450,
- 0x292c, 0x411a, 0x053e, 0x1d08, 0x3550,
- 0x292e, 0x411c, 0x0540, 0x1d0a, 0x3552,
- 0x282e, 0x401c, 0x0440, 0x1c0a, 0x3452,
- 0x272e, 0x3f1c, 0x0340, 0x1b0a, 0x3352,
- 0x2730, 0x3f1e, 0x0342, 0x1b0c, 0x3354,
- 0x2830, 0x401e, 0x0442, 0x1c0c, 0x3454,
- 0x2930, 0x411e, 0x0542, 0x1d0c, 0x3554,
- 0x2932, 0x4120, 0x0544, 0x1d0e, 0x3556,
- 0x2832, 0x4020, 0x0444, 0x1c0e, 0x3456,
- 0x2732, 0x3f20, 0x0344, 0x1b0e, 0x3356,
- 0x2734, 0x3f22, 0x0346, 0x1b10, 0x3358,
- 0x2834, 0x4022, 0x0446, 0x1c10, 0x3458,
- 0x2934, 0x4122, 0x0546, 0x1d10, 0x3558,
- 0x2d24, 0x4512, 0x0936, 0x2100, 0x3948,
- 0x2e24, 0x4612, 0x0a36, 0x2200, 0x3a48,
- 0x2f24, 0x4712, 0x0b36, 0x2300, 0x3b48,
- 0x2f26, 0x4714, 0x0b38, 0x2302, 0x3b4a,
- 0x2e26, 0x4614, 0x0a38, 0x2202, 0x3a4a,
- 0x2d26, 0x4514, 0x0938, 0x2102, 0x394a,
- 0x2d28, 0x4516, 0x093a, 0x2104, 0x394c,
- 0x2e28, 0x4616, 0x0a3a, 0x2204, 0x3a4c,
- 0x2f28, 0x4716, 0x0b3a, 0x2304, 0x3b4c,
- 0x2f2a, 0x4718, 0x0b3c, 0x2306, 0x3b4e,
- 0x2e2a, 0x4618, 0x0a3c, 0x2206, 0x3a4e,
- 0x2d2a, 0x4518, 0x093c, 0x2106, 0x394e,
- 0x2d2c, 0x451a, 0x093e, 0x2108, 0x3950,
- 0x2e2c, 0x461a, 0x0a3e, 0x2208, 0x3a50,
- 0x2f2c, 0x471a, 0x0b3e, 0x2308, 0x3b50,
- 0x2f2e, 0x471c, 0x0b40, 0x230a, 0x3b52,
- 0x2e2e, 0x461c, 0x0a40, 0x220a, 0x3a52,
- 0x2d2e, 0x451c, 0x0940, 0x210a, 0x3952,
- 0x2d30, 0x451e, 0x0942, 0x210c, 0x3954,
- 0x2e30, 0x461e, 0x0a42, 0x220c, 0x3a54,
- 0x2f30, 0x471e, 0x0b42, 0x230c, 0x3b54,
- 0x2f32, 0x4720, 0x0b44, 0x230e, 0x3b56,
- 0x2e32, 0x4620, 0x0a44, 0x220e, 0x3a56,
- 0x2d32, 0x4520, 0x0944, 0x210e, 0x3956,
- 0x2d34, 0x4522, 0x0946, 0x2110, 0x3958,
- 0x2e34, 0x4622, 0x0a46, 0x2210, 0x3a58,
- 0x2f34, 0x4722, 0x0b46, 0x2310, 0x3b58,
- 0x3324, 0x0312, 0x0f36, 0x2700, 0x3f48,
- 0x3424, 0x0412, 0x1036, 0x2800, 0x4048,
- 0x3524, 0x0512, 0x1136, 0x2900, 0x4148,
- 0x3526, 0x0514, 0x1138, 0x2902, 0x414a,
- 0x3426, 0x0414, 0x1038, 0x2802, 0x404a,
- 0x3326, 0x0314, 0x0f38, 0x2702, 0x3f4a,
- 0x3328, 0x0316, 0x0f3a, 0x2704, 0x3f4c,
- 0x3428, 0x0416, 0x103a, 0x2804, 0x404c,
- 0x3528, 0x0516, 0x113a, 0x2904, 0x414c,
- 0x352a, 0x0518, 0x113c, 0x2906, 0x414e,
- 0x342a, 0x0418, 0x103c, 0x2806, 0x404e,
- 0x332a, 0x0318, 0x0f3c, 0x2706, 0x3f4e,
- 0x332c, 0x031a, 0x0f3e, 0x2708, 0x3f50,
- 0x342c, 0x041a, 0x103e, 0x2808, 0x4050,
- 0x352c, 0x051a, 0x113e, 0x2908, 0x4150,
- 0x352e, 0x051c, 0x1140, 0x290a, 0x4152,
- 0x342e, 0x041c, 0x1040, 0x280a, 0x4052,
- 0x332e, 0x031c, 0x0f40, 0x270a, 0x3f52,
- 0x3330, 0x031e, 0x0f42, 0x270c, 0x3f54,
- 0x3430, 0x041e, 0x1042, 0x280c, 0x4054,
- 0x3530, 0x051e, 0x1142, 0x290c, 0x4154,
- 0x3532, 0x0520, 0x1144, 0x290e, 0x4156,
- 0x3432, 0x0420, 0x1044, 0x280e, 0x4056,
- 0x3332, 0x0320, 0x0f44, 0x270e, 0x3f56,
- 0x3334, 0x0322, 0x0f46, 0x2710, 0x3f58,
- 0x3434, 0x0422, 0x1046, 0x2810, 0x4058,
- 0x3534, 0x0522, 0x1146, 0x2910, 0x4158,
- 0x3924, 0x0912, 0x1536, 0x2d00, 0x4548,
- 0x3a24, 0x0a12, 0x1636, 0x2e00, 0x4648,
- 0x3b24, 0x0b12, 0x1736, 0x2f00, 0x4748,
- 0x3b26, 0x0b14, 0x1738, 0x2f02, 0x474a,
- 0x3a26, 0x0a14, 0x1638, 0x2e02, 0x464a,
- 0x3926, 0x0914, 0x1538, 0x2d02, 0x454a,
- 0x3928, 0x0916, 0x153a, 0x2d04, 0x454c,
- 0x3a28, 0x0a16, 0x163a, 0x2e04, 0x464c,
- 0x3b28, 0x0b16, 0x173a, 0x2f04, 0x474c,
- 0x3b2a, 0x0b18, 0x173c, 0x2f06, 0x474e,
- 0x3a2a, 0x0a18, 0x163c, 0x2e06, 0x464e,
- 0x392a, 0x0918, 0x153c, 0x2d06, 0x454e,
- 0x392c, 0x091a, 0x153e, 0x2d08, 0x4550,
- 0x3a2c, 0x0a1a, 0x163e, 0x2e08, 0x4650,
- 0x3b2c, 0x0b1a, 0x173e, 0x2f08, 0x4750,
- 0x3b2e, 0x0b1c, 0x1740, 0x2f0a, 0x4752,
- 0x3a2e, 0x0a1c, 0x1640, 0x2e0a, 0x4652,
- 0x392e, 0x091c, 0x1540, 0x2d0a, 0x4552,
- 0x3930, 0x091e, 0x1542, 0x2d0c, 0x4554,
- 0x3a30, 0x0a1e, 0x1642, 0x2e0c, 0x4654,
- 0x3b30, 0x0b1e, 0x1742, 0x2f0c, 0x4754,
- 0x3b32, 0x0b20, 0x1744, 0x2f0e, 0x4756,
- 0x3a32, 0x0a20, 0x1644, 0x2e0e, 0x4656,
- 0x3932, 0x0920, 0x1544, 0x2d0e, 0x4556,
- 0x3934, 0x0922, 0x1546, 0x2d10, 0x4558,
- 0x3a34, 0x0a22, 0x1646, 0x2e10, 0x4658,
- 0x3b34, 0x0b22, 0x1746, 0x2f10, 0x4758,
- 0x3f24, 0x0f12, 0x1b36, 0x3300, 0x0348,
- 0x4024, 0x1012, 0x1c36, 0x3400, 0x0448,
- 0x4124, 0x1112, 0x1d36, 0x3500, 0x0548,
- 0x4126, 0x1114, 0x1d38, 0x3502, 0x054a,
- 0x4026, 0x1014, 0x1c38, 0x3402, 0x044a,
- 0x3f26, 0x0f14, 0x1b38, 0x3302, 0x034a,
- 0x3f28, 0x0f16, 0x1b3a, 0x3304, 0x034c,
- 0x4028, 0x1016, 0x1c3a, 0x3404, 0x044c,
- 0x4128, 0x1116, 0x1d3a, 0x3504, 0x054c,
- 0x412a, 0x1118, 0x1d3c, 0x3506, 0x054e,
- 0x402a, 0x1018, 0x1c3c, 0x3406, 0x044e,
- 0x3f2a, 0x0f18, 0x1b3c, 0x3306, 0x034e,
- 0x3f2c, 0x0f1a, 0x1b3e, 0x3308, 0x0350,
- 0x402c, 0x101a, 0x1c3e, 0x3408, 0x0450,
- 0x412c, 0x111a, 0x1d3e, 0x3508, 0x0550,
- 0x412e, 0x111c, 0x1d40, 0x350a, 0x0552,
- 0x402e, 0x101c, 0x1c40, 0x340a, 0x0452,
- 0x3f2e, 0x0f1c, 0x1b40, 0x330a, 0x0352,
- 0x3f30, 0x0f1e, 0x1b42, 0x330c, 0x0354,
- 0x4030, 0x101e, 0x1c42, 0x340c, 0x0454,
- 0x4130, 0x111e, 0x1d42, 0x350c, 0x0554,
- 0x4132, 0x1120, 0x1d44, 0x350e, 0x0556,
- 0x4032, 0x1020, 0x1c44, 0x340e, 0x0456,
- 0x3f32, 0x0f20, 0x1b44, 0x330e, 0x0356,
- 0x3f34, 0x0f22, 0x1b46, 0x3310, 0x0358,
- 0x4034, 0x1022, 0x1c46, 0x3410, 0x0458,
- 0x4134, 0x1122, 0x1d46, 0x3510, 0x0558,
- 0x4524, 0x1512, 0x2136, 0x3900, 0x0948,
- 0x4624, 0x1612, 0x2236, 0x3a00, 0x0a48,
- 0x4724, 0x1712, 0x2336, 0x3b00, 0x0b48,
- 0x4726, 0x1714, 0x2338, 0x3b02, 0x0b4a,
- 0x4626, 0x1614, 0x2238, 0x3a02, 0x0a4a,
- 0x4526, 0x1514, 0x2138, 0x3902, 0x094a,
- 0x4528, 0x1516, 0x213a, 0x3904, 0x094c,
- 0x4628, 0x1616, 0x223a, 0x3a04, 0x0a4c,
- 0x4728, 0x1716, 0x233a, 0x3b04, 0x0b4c,
- 0x472a, 0x1718, 0x233c, 0x3b06, 0x0b4e,
- 0x462a, 0x1618, 0x223c, 0x3a06, 0x0a4e,
- 0x452a, 0x1518, 0x213c, 0x3906, 0x094e,
- 0x452c, 0x151a, 0x213e, 0x3908, 0x0950,
- 0x462c, 0x161a, 0x223e, 0x3a08, 0x0a50,
- 0x472c, 0x171a, 0x233e, 0x3b08, 0x0b50,
- 0x472e, 0x171c, 0x2340, 0x3b0a, 0x0b52,
- 0x462e, 0x161c, 0x2240, 0x3a0a, 0x0a52,
- 0x452e, 0x151c, 0x2140, 0x390a, 0x0952,
- 0x4530, 0x151e, 0x2142, 0x390c, 0x0954,
- 0x4630, 0x161e, 0x2242, 0x3a0c, 0x0a54,
- 0x4730, 0x171e, 0x2342, 0x3b0c, 0x0b54,
- 0x4732, 0x1720, 0x2344, 0x3b0e, 0x0b56,
- 0x4632, 0x1620, 0x2244, 0x3a0e, 0x0a56,
- 0x4532, 0x1520, 0x2144, 0x390e, 0x0956,
- 0x4534, 0x1522, 0x2146, 0x3910, 0x0958,
- 0x4634, 0x1622, 0x2246, 0x3a10, 0x0a58,
- 0x4734, 0x1722, 0x2346, 0x3b10, 0x0b58,
- 0x0324, 0x1b12, 0x2736, 0x3f00, 0x0f48,
- 0x0424, 0x1c12, 0x2836, 0x4000, 0x1048,
- 0x0524, 0x1d12, 0x2936, 0x4100, 0x1148,
- 0x0526, 0x1d14, 0x2938, 0x4102, 0x114a,
- 0x0426, 0x1c14, 0x2838, 0x4002, 0x104a,
- 0x0326, 0x1b14, 0x2738, 0x3f02, 0x0f4a,
- 0x0328, 0x1b16, 0x273a, 0x3f04, 0x0f4c,
- 0x0428, 0x1c16, 0x283a, 0x4004, 0x104c,
- 0x0528, 0x1d16, 0x293a, 0x4104, 0x114c,
- 0x052a, 0x1d18, 0x293c, 0x4106, 0x114e,
- 0x042a, 0x1c18, 0x283c, 0x4006, 0x104e,
- 0x032a, 0x1b18, 0x273c, 0x3f06, 0x0f4e,
- 0x032c, 0x1b1a, 0x273e, 0x3f08, 0x0f50,
- 0x042c, 0x1c1a, 0x283e, 0x4008, 0x1050,
- 0x052c, 0x1d1a, 0x293e, 0x4108, 0x1150,
- 0x052e, 0x1d1c, 0x2940, 0x410a, 0x1152,
- 0x042e, 0x1c1c, 0x2840, 0x400a, 0x1052,
- 0x032e, 0x1b1c, 0x2740, 0x3f0a, 0x0f52,
- 0x0330, 0x1b1e, 0x2742, 0x3f0c, 0x0f54,
- 0x0430, 0x1c1e, 0x2842, 0x400c, 0x1054,
- 0x0530, 0x1d1e, 0x2942, 0x410c, 0x1154,
- 0x0532, 0x1d20, 0x2944, 0x410e, 0x1156,
- 0x0432, 0x1c20, 0x2844, 0x400e, 0x1056,
- 0x0332, 0x1b20, 0x2744, 0x3f0e, 0x0f56,
- 0x0334, 0x1b22, 0x2746, 0x3f10, 0x0f58,
- 0x0434, 0x1c22, 0x2846, 0x4010, 0x1058,
- 0x0534, 0x1d22, 0x2946, 0x4110, 0x1158,
- 0x0924, 0x2112, 0x2d36, 0x4500, 0x1548,
- 0x0a24, 0x2212, 0x2e36, 0x4600, 0x1648,
- 0x0b24, 0x2312, 0x2f36, 0x4700, 0x1748,
- 0x0b26, 0x2314, 0x2f38, 0x4702, 0x174a,
- 0x0a26, 0x2214, 0x2e38, 0x4602, 0x164a,
- 0x0926, 0x2114, 0x2d38, 0x4502, 0x154a,
- 0x0928, 0x2116, 0x2d3a, 0x4504, 0x154c,
- 0x0a28, 0x2216, 0x2e3a, 0x4604, 0x164c,
- 0x0b28, 0x2316, 0x2f3a, 0x4704, 0x174c,
- 0x0b2a, 0x2318, 0x2f3c, 0x4706, 0x174e,
- 0x0a2a, 0x2218, 0x2e3c, 0x4606, 0x164e,
- 0x092a, 0x2118, 0x2d3c, 0x4506, 0x154e,
- 0x092c, 0x211a, 0x2d3e, 0x4508, 0x1550,
- 0x0a2c, 0x221a, 0x2e3e, 0x4608, 0x1650,
- 0x0b2c, 0x231a, 0x2f3e, 0x4708, 0x1750,
- 0x0b2e, 0x231c, 0x2f40, 0x470a, 0x1752,
- 0x0a2e, 0x221c, 0x2e40, 0x460a, 0x1652,
- 0x092e, 0x211c, 0x2d40, 0x450a, 0x1552,
- 0x0930, 0x211e, 0x2d42, 0x450c, 0x1554,
- 0x0a30, 0x221e, 0x2e42, 0x460c, 0x1654,
- 0x0b30, 0x231e, 0x2f42, 0x470c, 0x1754,
- 0x0b32, 0x2320, 0x2f44, 0x470e, 0x1756,
- 0x0a32, 0x2220, 0x2e44, 0x460e, 0x1656,
- 0x0932, 0x2120, 0x2d44, 0x450e, 0x1556,
- 0x0934, 0x2122, 0x2d46, 0x4510, 0x1558,
- 0x0a34, 0x2222, 0x2e46, 0x4610, 0x1658,
- 0x0b34, 0x2322, 0x2f46, 0x4710, 0x1758,
-};
-
-static const uint16_t dv_place_1080i60[4*10*27*5] = {
- 0x2048, 0x5024, 0x686c, 0x0800, 0x3890,
- 0x3848, 0x6824, 0x086c, 0x2000, 0x5090,
- 0x5048, 0x0824, 0x206c, 0x3800, 0x6890,
- 0x6848, 0x2024, 0x386c, 0x5000, 0x0890,
- 0x0848, 0x3824, 0x506c, 0x6800, 0x2090,
- 0x204a, 0x5026, 0x686e, 0x0802, 0x3892,
- 0x384a, 0x6826, 0x086e, 0x2002, 0x5092,
- 0x504a, 0x0826, 0x206e, 0x3802, 0x6892,
- 0x684a, 0x2026, 0x386e, 0x5002, 0x0892,
- 0x084a, 0x3826, 0x506e, 0x6802, 0x2092,
- 0x204c, 0x5028, 0x6870, 0x0804, 0x3894,
- 0x384c, 0x6828, 0x0870, 0x2004, 0x5094,
- 0x504c, 0x0828, 0x2070, 0x3804, 0x6894,
- 0x684c, 0x2028, 0x3870, 0x5004, 0x0894,
- 0x084c, 0x3828, 0x5070, 0x6804, 0x2094,
- 0x204e, 0x502a, 0x6872, 0x0806, 0x3896,
- 0x384e, 0x682a, 0x0872, 0x2006, 0x5096,
- 0x504e, 0x082a, 0x2072, 0x3806, 0x6896,
- 0x684e, 0x202a, 0x3872, 0x5006, 0x0896,
- 0x084e, 0x382a, 0x5072, 0x6806, 0x2096,
- 0x2050, 0x502c, 0x6874, 0x0808, 0x3898,
- 0x3850, 0x682c, 0x0874, 0x2008, 0x5098,
- 0x5050, 0x082c, 0x2074, 0x3808, 0x6898,
- 0x6850, 0x202c, 0x3874, 0x5008, 0x0898,
- 0x0850, 0x382c, 0x5074, 0x6808, 0x2098,
- 0x2052, 0x502e, 0x6876, 0x080a, 0x389a,
- 0x3852, 0x682e, 0x0876, 0x200a, 0x509a,
- 0x5052, 0x082e, 0x2076, 0x380a, 0x689a,
- 0x6852, 0x202e, 0x3876, 0x500a, 0x089a,
- 0x0852, 0x382e, 0x5076, 0x680a, 0x209a,
- 0x2054, 0x5030, 0x6878, 0x080c, 0x389c,
- 0x3854, 0x6830, 0x0878, 0x200c, 0x509c,
- 0x5054, 0x0830, 0x2078, 0x380c, 0x689c,
- 0x6854, 0x2030, 0x3878, 0x500c, 0x089c,
- 0x0854, 0x3830, 0x5078, 0x680c, 0x209c,
- 0x2056, 0x5032, 0x687a, 0x080e, 0x389e,
- 0x3856, 0x6832, 0x087a, 0x200e, 0x509e,
- 0x5056, 0x0832, 0x207a, 0x380e, 0x689e,
- 0x6856, 0x2032, 0x387a, 0x500e, 0x089e,
- 0x0856, 0x3832, 0x507a, 0x680e, 0x209e,
- 0x2058, 0x5034, 0x687c, 0x0810, 0x0078,
- 0x3858, 0x6834, 0x087c, 0x2010, 0x8214,
- 0x5058, 0x0834, 0x207c, 0x3810, 0x8264,
- 0x6858, 0x2034, 0x387c, 0x5010, 0x0000,
- 0x0858, 0x3834, 0x507c, 0x6810, 0x003c,
- 0x2448, 0x5424, 0x6c6c, 0x0c00, 0x3c90,
- 0x3c48, 0x6c24, 0x0c6c, 0x2400, 0x5490,
- 0x5448, 0x0c24, 0x246c, 0x3c00, 0x6c90,
- 0x6c48, 0x2424, 0x3c6c, 0x5400, 0x0c90,
- 0x0c48, 0x3c24, 0x546c, 0x6c00, 0x2490,
- 0x244a, 0x5426, 0x6c6e, 0x0c02, 0x3c92,
- 0x3c4a, 0x6c26, 0x0c6e, 0x2402, 0x5492,
- 0x544a, 0x0c26, 0x246e, 0x3c02, 0x6c92,
- 0x6c4a, 0x2426, 0x3c6e, 0x5402, 0x0c92,
- 0x0c4a, 0x3c26, 0x546e, 0x6c02, 0x2492,
- 0x244c, 0x5428, 0x6c70, 0x0c04, 0x3c94,
- 0x3c4c, 0x6c28, 0x0c70, 0x2404, 0x5494,
- 0x544c, 0x0c28, 0x2470, 0x3c04, 0x6c94,
- 0x6c4c, 0x2428, 0x3c70, 0x5404, 0x0c94,
- 0x0c4c, 0x3c28, 0x5470, 0x6c04, 0x2494,
- 0x244e, 0x542a, 0x6c72, 0x0c06, 0x3c96,
- 0x3c4e, 0x6c2a, 0x0c72, 0x2406, 0x5496,
- 0x544e, 0x0c2a, 0x2472, 0x3c06, 0x6c96,
- 0x6c4e, 0x242a, 0x3c72, 0x5406, 0x0c96,
- 0x0c4e, 0x3c2a, 0x5472, 0x6c06, 0x2496,
- 0x2450, 0x542c, 0x6c74, 0x0c08, 0x3c98,
- 0x3c50, 0x6c2c, 0x0c74, 0x2408, 0x5498,
- 0x5450, 0x0c2c, 0x2474, 0x3c08, 0x6c98,
- 0x6c50, 0x242c, 0x3c74, 0x5408, 0x0c98,
- 0x0c50, 0x3c2c, 0x5474, 0x6c08, 0x2498,
- 0x2452, 0x542e, 0x6c76, 0x0c0a, 0x3c9a,
- 0x3c52, 0x6c2e, 0x0c76, 0x240a, 0x549a,
- 0x5452, 0x0c2e, 0x2476, 0x3c0a, 0x6c9a,
- 0x6c52, 0x242e, 0x3c76, 0x540a, 0x0c9a,
- 0x0c52, 0x3c2e, 0x5476, 0x6c0a, 0x249a,
- 0x2454, 0x5430, 0x6c78, 0x0c0c, 0x3c9c,
- 0x3c54, 0x6c30, 0x0c78, 0x240c, 0x549c,
- 0x5454, 0x0c30, 0x2478, 0x3c0c, 0x6c9c,
- 0x6c54, 0x2430, 0x3c78, 0x540c, 0x0c9c,
- 0x0c54, 0x3c30, 0x5478, 0x6c0c, 0x249c,
- 0x2456, 0x5432, 0x6c7a, 0x0c0e, 0x3c9e,
- 0x3c56, 0x6c32, 0x0c7a, 0x240e, 0x549e,
- 0x5456, 0x0c32, 0x247a, 0x3c0e, 0x6c9e,
- 0x6c56, 0x2432, 0x3c7a, 0x540e, 0x0c9e,
- 0x0c56, 0x3c32, 0x547a, 0x6c0e, 0x249e,
- 0x2458, 0x5434, 0x6c7c, 0x0c10, 0x0478,
- 0x3c58, 0x6c34, 0x0c7c, 0x2410, 0x8028,
- 0x5458, 0x0c34, 0x247c, 0x3c10, 0x8078,
- 0x6c58, 0x2434, 0x3c7c, 0x5410, 0x0400,
- 0x0c58, 0x3c34, 0x547c, 0x6c10, 0x043c,
- 0x2848, 0x5824, 0x706c, 0x1000, 0x4090,
- 0x4048, 0x7024, 0x106c, 0x2800, 0x5890,
- 0x5848, 0x1024, 0x286c, 0x4000, 0x7090,
- 0x7048, 0x2824, 0x406c, 0x5800, 0x1090,
- 0x1048, 0x4024, 0x586c, 0x7000, 0x2890,
- 0x284a, 0x5826, 0x706e, 0x1002, 0x4092,
- 0x404a, 0x7026, 0x106e, 0x2802, 0x5892,
- 0x584a, 0x1026, 0x286e, 0x4002, 0x7092,
- 0x704a, 0x2826, 0x406e, 0x5802, 0x1092,
- 0x104a, 0x4026, 0x586e, 0x7002, 0x2892,
- 0x284c, 0x5828, 0x7070, 0x1004, 0x4094,
- 0x404c, 0x7028, 0x1070, 0x2804, 0x5894,
- 0x584c, 0x1028, 0x2870, 0x4004, 0x7094,
- 0x704c, 0x2828, 0x4070, 0x5804, 0x1094,
- 0x104c, 0x4028, 0x5870, 0x7004, 0x2894,
- 0x284e, 0x582a, 0x7072, 0x1006, 0x4096,
- 0x404e, 0x702a, 0x1072, 0x2806, 0x5896,
- 0x584e, 0x102a, 0x2872, 0x4006, 0x7096,
- 0x704e, 0x282a, 0x4072, 0x5806, 0x1096,
- 0x104e, 0x402a, 0x5872, 0x7006, 0x2896,
- 0x2850, 0x582c, 0x7074, 0x1008, 0x4098,
- 0x4050, 0x702c, 0x1074, 0x2808, 0x5898,
- 0x5850, 0x102c, 0x2874, 0x4008, 0x7098,
- 0x7050, 0x282c, 0x4074, 0x5808, 0x1098,
- 0x1050, 0x402c, 0x5874, 0x7008, 0x2898,
- 0x2852, 0x582e, 0x7076, 0x100a, 0x409a,
- 0x4052, 0x702e, 0x1076, 0x280a, 0x589a,
- 0x5852, 0x102e, 0x2876, 0x400a, 0x709a,
- 0x7052, 0x282e, 0x4076, 0x580a, 0x109a,
- 0x1052, 0x402e, 0x5876, 0x700a, 0x289a,
- 0x2854, 0x5830, 0x7078, 0x100c, 0x409c,
- 0x4054, 0x7030, 0x1078, 0x280c, 0x589c,
- 0x5854, 0x1030, 0x2878, 0x400c, 0x709c,
- 0x7054, 0x2830, 0x4078, 0x580c, 0x109c,
- 0x1054, 0x4030, 0x5878, 0x700c, 0x289c,
- 0x2856, 0x5832, 0x707a, 0x100e, 0x409e,
- 0x4056, 0x7032, 0x107a, 0x280e, 0x589e,
- 0x5856, 0x1032, 0x287a, 0x400e, 0x709e,
- 0x7056, 0x2832, 0x407a, 0x580e, 0x109e,
- 0x1056, 0x4032, 0x587a, 0x700e, 0x289e,
- 0x2858, 0x5834, 0x707c, 0x1010, 0x008c,
- 0x4058, 0x7034, 0x107c, 0x2810, 0x8428,
- 0x5858, 0x1034, 0x287c, 0x4010, 0x8478,
- 0x7058, 0x2834, 0x407c, 0x5810, 0x0014,
- 0x1058, 0x4034, 0x587c, 0x7010, 0x0050,
- 0x2c48, 0x5c24, 0x746c, 0x1400, 0x4490,
- 0x4448, 0x7424, 0x146c, 0x2c00, 0x5c90,
- 0x5c48, 0x1424, 0x2c6c, 0x4400, 0x7490,
- 0x7448, 0x2c24, 0x446c, 0x5c00, 0x1490,
- 0x1448, 0x4424, 0x5c6c, 0x7400, 0x2c90,
- 0x2c4a, 0x5c26, 0x746e, 0x1402, 0x4492,
- 0x444a, 0x7426, 0x146e, 0x2c02, 0x5c92,
- 0x5c4a, 0x1426, 0x2c6e, 0x4402, 0x7492,
- 0x744a, 0x2c26, 0x446e, 0x5c02, 0x1492,
- 0x144a, 0x4426, 0x5c6e, 0x7402, 0x2c92,
- 0x2c4c, 0x5c28, 0x7470, 0x1404, 0x4494,
- 0x444c, 0x7428, 0x1470, 0x2c04, 0x5c94,
- 0x5c4c, 0x1428, 0x2c70, 0x4404, 0x7494,
- 0x744c, 0x2c28, 0x4470, 0x5c04, 0x1494,
- 0x144c, 0x4428, 0x5c70, 0x7404, 0x2c94,
- 0x2c4e, 0x5c2a, 0x7472, 0x1406, 0x4496,
- 0x444e, 0x742a, 0x1472, 0x2c06, 0x5c96,
- 0x5c4e, 0x142a, 0x2c72, 0x4406, 0x7496,
- 0x744e, 0x2c2a, 0x4472, 0x5c06, 0x1496,
- 0x144e, 0x442a, 0x5c72, 0x7406, 0x2c96,
- 0x2c50, 0x5c2c, 0x7474, 0x1408, 0x4498,
- 0x4450, 0x742c, 0x1474, 0x2c08, 0x5c98,
- 0x5c50, 0x142c, 0x2c74, 0x4408, 0x7498,
- 0x7450, 0x2c2c, 0x4474, 0x5c08, 0x1498,
- 0x1450, 0x442c, 0x5c74, 0x7408, 0x2c98,
- 0x2c52, 0x5c2e, 0x7476, 0x140a, 0x449a,
- 0x4452, 0x742e, 0x1476, 0x2c0a, 0x5c9a,
- 0x5c52, 0x142e, 0x2c76, 0x440a, 0x749a,
- 0x7452, 0x2c2e, 0x4476, 0x5c0a, 0x149a,
- 0x1452, 0x442e, 0x5c76, 0x740a, 0x2c9a,
- 0x2c54, 0x5c30, 0x7478, 0x140c, 0x449c,
- 0x4454, 0x7430, 0x1478, 0x2c0c, 0x5c9c,
- 0x5c54, 0x1430, 0x2c78, 0x440c, 0x749c,
- 0x7454, 0x2c30, 0x4478, 0x5c0c, 0x149c,
- 0x1454, 0x4430, 0x5c78, 0x740c, 0x2c9c,
- 0x2c56, 0x5c32, 0x747a, 0x140e, 0x449e,
- 0x4456, 0x7432, 0x147a, 0x2c0e, 0x5c9e,
- 0x5c56, 0x1432, 0x2c7a, 0x440e, 0x749e,
- 0x7456, 0x2c32, 0x447a, 0x5c0e, 0x149e,
- 0x1456, 0x4432, 0x5c7a, 0x740e, 0x2c9e,
- 0x2c58, 0x5c34, 0x747c, 0x1410, 0x048c,
- 0x4458, 0x7434, 0x147c, 0x2c10, 0x823c,
- 0x5c58, 0x1434, 0x2c7c, 0x4410, 0x828c,
- 0x7458, 0x2c34, 0x447c, 0x5c10, 0x0414,
- 0x1458, 0x4434, 0x5c7c, 0x7410, 0x0450,
- 0x3048, 0x6024, 0x786c, 0x1800, 0x4890,
- 0x4848, 0x7824, 0x186c, 0x3000, 0x6090,
- 0x6048, 0x1824, 0x306c, 0x4800, 0x7890,
- 0x7848, 0x3024, 0x486c, 0x6000, 0x1890,
- 0x1848, 0x4824, 0x606c, 0x7800, 0x3090,
- 0x304a, 0x6026, 0x786e, 0x1802, 0x4892,
- 0x484a, 0x7826, 0x186e, 0x3002, 0x6092,
- 0x604a, 0x1826, 0x306e, 0x4802, 0x7892,
- 0x784a, 0x3026, 0x486e, 0x6002, 0x1892,
- 0x184a, 0x4826, 0x606e, 0x7802, 0x3092,
- 0x304c, 0x6028, 0x7870, 0x1804, 0x4894,
- 0x484c, 0x7828, 0x1870, 0x3004, 0x6094,
- 0x604c, 0x1828, 0x3070, 0x4804, 0x7894,
- 0x784c, 0x3028, 0x4870, 0x6004, 0x1894,
- 0x184c, 0x4828, 0x6070, 0x7804, 0x3094,
- 0x304e, 0x602a, 0x7872, 0x1806, 0x4896,
- 0x484e, 0x782a, 0x1872, 0x3006, 0x6096,
- 0x604e, 0x182a, 0x3072, 0x4806, 0x7896,
- 0x784e, 0x302a, 0x4872, 0x6006, 0x1896,
- 0x184e, 0x482a, 0x6072, 0x7806, 0x3096,
- 0x3050, 0x602c, 0x7874, 0x1808, 0x4898,
- 0x4850, 0x782c, 0x1874, 0x3008, 0x6098,
- 0x6050, 0x182c, 0x3074, 0x4808, 0x7898,
- 0x7850, 0x302c, 0x4874, 0x6008, 0x1898,
- 0x1850, 0x482c, 0x6074, 0x7808, 0x3098,
- 0x3052, 0x602e, 0x7876, 0x180a, 0x489a,
- 0x4852, 0x782e, 0x1876, 0x300a, 0x609a,
- 0x6052, 0x182e, 0x3076, 0x480a, 0x789a,
- 0x7852, 0x302e, 0x4876, 0x600a, 0x189a,
- 0x1852, 0x482e, 0x6076, 0x780a, 0x309a,
- 0x3054, 0x6030, 0x7878, 0x180c, 0x489c,
- 0x4854, 0x7830, 0x1878, 0x300c, 0x609c,
- 0x6054, 0x1830, 0x3078, 0x480c, 0x789c,
- 0x7854, 0x3030, 0x4878, 0x600c, 0x189c,
- 0x1854, 0x4830, 0x6078, 0x780c, 0x309c,
- 0x3056, 0x6032, 0x787a, 0x180e, 0x489e,
- 0x4856, 0x7832, 0x187a, 0x300e, 0x609e,
- 0x6056, 0x1832, 0x307a, 0x480e, 0x789e,
- 0x7856, 0x3032, 0x487a, 0x600e, 0x189e,
- 0x1856, 0x4832, 0x607a, 0x780e, 0x309e,
- 0x3058, 0x6034, 0x787c, 0x1810, 0x8000,
- 0x4858, 0x7834, 0x187c, 0x3010, 0x8050,
- 0x6058, 0x1834, 0x307c, 0x4810, 0x8600,
- 0x7858, 0x3034, 0x487c, 0x6010, 0x0028,
- 0x1858, 0x4834, 0x607c, 0x7810, 0x0064,
- 0x3448, 0x6424, 0x7c6c, 0x1c00, 0x4c90,
- 0x4c48, 0x7c24, 0x1c6c, 0x3400, 0x6490,
- 0x6448, 0x1c24, 0x346c, 0x4c00, 0x7c90,
- 0x7c48, 0x3424, 0x4c6c, 0x6400, 0x1c90,
- 0x1c48, 0x4c24, 0x646c, 0x7c00, 0x3490,
- 0x344a, 0x6426, 0x7c6e, 0x1c02, 0x4c92,
- 0x4c4a, 0x7c26, 0x1c6e, 0x3402, 0x6492,
- 0x644a, 0x1c26, 0x346e, 0x4c02, 0x7c92,
- 0x7c4a, 0x3426, 0x4c6e, 0x6402, 0x1c92,
- 0x1c4a, 0x4c26, 0x646e, 0x7c02, 0x3492,
- 0x344c, 0x6428, 0x7c70, 0x1c04, 0x4c94,
- 0x4c4c, 0x7c28, 0x1c70, 0x3404, 0x6494,
- 0x644c, 0x1c28, 0x3470, 0x4c04, 0x7c94,
- 0x7c4c, 0x3428, 0x4c70, 0x6404, 0x1c94,
- 0x1c4c, 0x4c28, 0x6470, 0x7c04, 0x3494,
- 0x344e, 0x642a, 0x7c72, 0x1c06, 0x4c96,
- 0x4c4e, 0x7c2a, 0x1c72, 0x3406, 0x6496,
- 0x644e, 0x1c2a, 0x3472, 0x4c06, 0x7c96,
- 0x7c4e, 0x342a, 0x4c72, 0x6406, 0x1c96,
- 0x1c4e, 0x4c2a, 0x6472, 0x7c06, 0x3496,
- 0x3450, 0x642c, 0x7c74, 0x1c08, 0x4c98,
- 0x4c50, 0x7c2c, 0x1c74, 0x3408, 0x6498,
- 0x6450, 0x1c2c, 0x3474, 0x4c08, 0x7c98,
- 0x7c50, 0x342c, 0x4c74, 0x6408, 0x1c98,
- 0x1c50, 0x4c2c, 0x6474, 0x7c08, 0x3498,
- 0x3452, 0x642e, 0x7c76, 0x1c0a, 0x4c9a,
- 0x4c52, 0x7c2e, 0x1c76, 0x340a, 0x649a,
- 0x6452, 0x1c2e, 0x3476, 0x4c0a, 0x7c9a,
- 0x7c52, 0x342e, 0x4c76, 0x640a, 0x1c9a,
- 0x1c52, 0x4c2e, 0x6476, 0x7c0a, 0x349a,
- 0x3454, 0x6430, 0x7c78, 0x1c0c, 0x4c9c,
- 0x4c54, 0x7c30, 0x1c78, 0x340c, 0x649c,
- 0x6454, 0x1c30, 0x3478, 0x4c0c, 0x7c9c,
- 0x7c54, 0x3430, 0x4c78, 0x640c, 0x1c9c,
- 0x1c54, 0x4c30, 0x6478, 0x7c0c, 0x349c,
- 0x3456, 0x6432, 0x7c7a, 0x1c0e, 0x4c9e,
- 0x4c56, 0x7c32, 0x1c7a, 0x340e, 0x649e,
- 0x6456, 0x1c32, 0x347a, 0x4c0e, 0x7c9e,
- 0x7c56, 0x3432, 0x4c7a, 0x640e, 0x1c9e,
- 0x1c56, 0x4c32, 0x647a, 0x7c0e, 0x349e,
- 0x3458, 0x6434, 0x7c7c, 0x1c10, 0x8400,
- 0x4c58, 0x7c34, 0x1c7c, 0x3410, 0x8450,
- 0x6458, 0x1c34, 0x347c, 0x4c10, 0x8650,
- 0x7c58, 0x3434, 0x4c7c, 0x6410, 0x0428,
- 0x1c58, 0x4c34, 0x647c, 0x7c10, 0x0464,
- 0x505a, 0x0836, 0x207e, 0x3812, 0x8266,
- 0x685a, 0x2036, 0x387e, 0x5012, 0x0002,
- 0x085a, 0x3836, 0x507e, 0x6812, 0x003e,
- 0x205a, 0x5036, 0x687e, 0x0812, 0x007a,
- 0x385a, 0x6836, 0x087e, 0x2012, 0x8216,
- 0x505c, 0x0838, 0x2080, 0x3814, 0x8268,
- 0x685c, 0x2038, 0x3880, 0x5014, 0x0004,
- 0x085c, 0x3838, 0x5080, 0x6814, 0x0040,
- 0x205c, 0x5038, 0x6880, 0x0814, 0x007c,
- 0x385c, 0x6838, 0x0880, 0x2014, 0x8218,
- 0x505e, 0x083a, 0x2082, 0x3816, 0x826a,
- 0x685e, 0x203a, 0x3882, 0x5016, 0x0006,
- 0x085e, 0x383a, 0x5082, 0x6816, 0x0042,
- 0x205e, 0x503a, 0x6882, 0x0816, 0x007e,
- 0x385e, 0x683a, 0x0882, 0x2016, 0x821a,
- 0x5060, 0x083c, 0x2084, 0x3818, 0x826c,
- 0x6860, 0x203c, 0x3884, 0x5018, 0x0008,
- 0x0860, 0x383c, 0x5084, 0x6818, 0x0044,
- 0x2060, 0x503c, 0x6884, 0x0818, 0x0080,
- 0x3860, 0x683c, 0x0884, 0x2018, 0x821c,
- 0x5062, 0x083e, 0x2086, 0x381a, 0x826e,
- 0x6862, 0x203e, 0x3886, 0x501a, 0x000a,
- 0x0862, 0x383e, 0x5086, 0x681a, 0x0046,
- 0x2062, 0x503e, 0x6886, 0x081a, 0x0082,
- 0x3862, 0x683e, 0x0886, 0x201a, 0x821e,
- 0x5064, 0x0840, 0x2088, 0x381c, 0x8270,
- 0x6864, 0x2040, 0x3888, 0x501c, 0x000c,
- 0x0864, 0x3840, 0x5088, 0x681c, 0x0048,
- 0x2064, 0x5040, 0x6888, 0x081c, 0x0084,
- 0x3864, 0x6840, 0x0888, 0x201c, 0x8220,
- 0x5066, 0x0842, 0x208a, 0x381e, 0x8272,
- 0x6866, 0x2042, 0x388a, 0x501e, 0x000e,
- 0x0866, 0x3842, 0x508a, 0x681e, 0x004a,
- 0x2066, 0x5042, 0x688a, 0x081e, 0x0086,
- 0x3866, 0x6842, 0x088a, 0x201e, 0x8222,
- 0x5068, 0x0844, 0x208c, 0x3820, 0x8274,
- 0x6868, 0x2044, 0x388c, 0x5020, 0x0010,
- 0x0868, 0x3844, 0x508c, 0x6820, 0x004c,
- 0x2068, 0x5044, 0x688c, 0x0820, 0x0088,
- 0x3868, 0x6844, 0x088c, 0x2020, 0x8224,
- 0x506a, 0x0846, 0x208e, 0x3822, 0x8276,
- 0x686a, 0x2046, 0x388e, 0x5022, 0x0012,
- 0x086a, 0x3846, 0x508e, 0x6822, 0x004e,
- 0x206a, 0x5046, 0x688e, 0x0822, 0x008a,
- 0x386a, 0x6846, 0x088e, 0x2022, 0x8226,
- 0x545a, 0x0c36, 0x247e, 0x3c12, 0x807a,
- 0x6c5a, 0x2436, 0x3c7e, 0x5412, 0x0402,
- 0x0c5a, 0x3c36, 0x547e, 0x6c12, 0x043e,
- 0x245a, 0x5436, 0x6c7e, 0x0c12, 0x047a,
- 0x3c5a, 0x6c36, 0x0c7e, 0x2412, 0x802a,
- 0x545c, 0x0c38, 0x2480, 0x3c14, 0x807c,
- 0x6c5c, 0x2438, 0x3c80, 0x5414, 0x0404,
- 0x0c5c, 0x3c38, 0x5480, 0x6c14, 0x0440,
- 0x245c, 0x5438, 0x6c80, 0x0c14, 0x047c,
- 0x3c5c, 0x6c38, 0x0c80, 0x2414, 0x802c,
- 0x545e, 0x0c3a, 0x2482, 0x3c16, 0x807e,
- 0x6c5e, 0x243a, 0x3c82, 0x5416, 0x0406,
- 0x0c5e, 0x3c3a, 0x5482, 0x6c16, 0x0442,
- 0x245e, 0x543a, 0x6c82, 0x0c16, 0x047e,
- 0x3c5e, 0x6c3a, 0x0c82, 0x2416, 0x802e,
- 0x5460, 0x0c3c, 0x2484, 0x3c18, 0x8080,
- 0x6c60, 0x243c, 0x3c84, 0x5418, 0x0408,
- 0x0c60, 0x3c3c, 0x5484, 0x6c18, 0x0444,
- 0x2460, 0x543c, 0x6c84, 0x0c18, 0x0480,
- 0x3c60, 0x6c3c, 0x0c84, 0x2418, 0x8030,
- 0x5462, 0x0c3e, 0x2486, 0x3c1a, 0x8082,
- 0x6c62, 0x243e, 0x3c86, 0x541a, 0x040a,
- 0x0c62, 0x3c3e, 0x5486, 0x6c1a, 0x0446,
- 0x2462, 0x543e, 0x6c86, 0x0c1a, 0x0482,
- 0x3c62, 0x6c3e, 0x0c86, 0x241a, 0x8032,
- 0x5464, 0x0c40, 0x2488, 0x3c1c, 0x8084,
- 0x6c64, 0x2440, 0x3c88, 0x541c, 0x040c,
- 0x0c64, 0x3c40, 0x5488, 0x6c1c, 0x0448,
- 0x2464, 0x5440, 0x6c88, 0x0c1c, 0x0484,
- 0x3c64, 0x6c40, 0x0c88, 0x241c, 0x8034,
- 0x5466, 0x0c42, 0x248a, 0x3c1e, 0x8086,
- 0x6c66, 0x2442, 0x3c8a, 0x541e, 0x040e,
- 0x0c66, 0x3c42, 0x548a, 0x6c1e, 0x044a,
- 0x2466, 0x5442, 0x6c8a, 0x0c1e, 0x0486,
- 0x3c66, 0x6c42, 0x0c8a, 0x241e, 0x8036,
- 0x5468, 0x0c44, 0x248c, 0x3c20, 0x8088,
- 0x6c68, 0x2444, 0x3c8c, 0x5420, 0x0410,
- 0x0c68, 0x3c44, 0x548c, 0x6c20, 0x044c,
- 0x2468, 0x5444, 0x6c8c, 0x0c20, 0x0488,
- 0x3c68, 0x6c44, 0x0c8c, 0x2420, 0x8038,
- 0x546a, 0x0c46, 0x248e, 0x3c22, 0x808a,
- 0x6c6a, 0x2446, 0x3c8e, 0x5422, 0x0412,
- 0x0c6a, 0x3c46, 0x548e, 0x6c22, 0x044e,
- 0x246a, 0x5446, 0x6c8e, 0x0c22, 0x048a,
- 0x3c6a, 0x6c46, 0x0c8e, 0x2422, 0x803a,
- 0x585a, 0x1036, 0x287e, 0x4012, 0x847a,
- 0x705a, 0x2836, 0x407e, 0x5812, 0x0016,
- 0x105a, 0x4036, 0x587e, 0x7012, 0x0052,
- 0x285a, 0x5836, 0x707e, 0x1012, 0x008e,
- 0x405a, 0x7036, 0x107e, 0x2812, 0x842a,
- 0x585c, 0x1038, 0x2880, 0x4014, 0x847c,
- 0x705c, 0x2838, 0x4080, 0x5814, 0x0018,
- 0x105c, 0x4038, 0x5880, 0x7014, 0x0054,
- 0x285c, 0x5838, 0x7080, 0x1014, 0x0090,
- 0x405c, 0x7038, 0x1080, 0x2814, 0x842c,
- 0x585e, 0x103a, 0x2882, 0x4016, 0x847e,
- 0x705e, 0x283a, 0x4082, 0x5816, 0x001a,
- 0x105e, 0x403a, 0x5882, 0x7016, 0x0056,
- 0x285e, 0x583a, 0x7082, 0x1016, 0x0092,
- 0x405e, 0x703a, 0x1082, 0x2816, 0x842e,
- 0x5860, 0x103c, 0x2884, 0x4018, 0x8480,
- 0x7060, 0x283c, 0x4084, 0x5818, 0x001c,
- 0x1060, 0x403c, 0x5884, 0x7018, 0x0058,
- 0x2860, 0x583c, 0x7084, 0x1018, 0x0094,
- 0x4060, 0x703c, 0x1084, 0x2818, 0x8430,
- 0x5862, 0x103e, 0x2886, 0x401a, 0x8482,
- 0x7062, 0x283e, 0x4086, 0x581a, 0x001e,
- 0x1062, 0x403e, 0x5886, 0x701a, 0x005a,
- 0x2862, 0x583e, 0x7086, 0x101a, 0x0096,
- 0x4062, 0x703e, 0x1086, 0x281a, 0x8432,
- 0x5864, 0x1040, 0x2888, 0x401c, 0x8484,
- 0x7064, 0x2840, 0x4088, 0x581c, 0x0020,
- 0x1064, 0x4040, 0x5888, 0x701c, 0x005c,
- 0x2864, 0x5840, 0x7088, 0x101c, 0x0098,
- 0x4064, 0x7040, 0x1088, 0x281c, 0x8434,
- 0x5866, 0x1042, 0x288a, 0x401e, 0x8486,
- 0x7066, 0x2842, 0x408a, 0x581e, 0x0022,
- 0x1066, 0x4042, 0x588a, 0x701e, 0x005e,
- 0x2866, 0x5842, 0x708a, 0x101e, 0x009a,
- 0x4066, 0x7042, 0x108a, 0x281e, 0x8436,
- 0x5868, 0x1044, 0x288c, 0x4020, 0x8488,
- 0x7068, 0x2844, 0x408c, 0x5820, 0x0024,
- 0x1068, 0x4044, 0x588c, 0x7020, 0x0060,
- 0x2868, 0x5844, 0x708c, 0x1020, 0x009c,
- 0x4068, 0x7044, 0x108c, 0x2820, 0x8438,
- 0x586a, 0x1046, 0x288e, 0x4022, 0x848a,
- 0x706a, 0x2846, 0x408e, 0x5822, 0x0026,
- 0x106a, 0x4046, 0x588e, 0x7022, 0x0062,
- 0x286a, 0x5846, 0x708e, 0x1022, 0x009e,
- 0x406a, 0x7046, 0x108e, 0x2822, 0x843a,
- 0x5c5a, 0x1436, 0x2c7e, 0x4412, 0x828e,
- 0x745a, 0x2c36, 0x447e, 0x5c12, 0x0416,
- 0x145a, 0x4436, 0x5c7e, 0x7412, 0x0452,
- 0x2c5a, 0x5c36, 0x747e, 0x1412, 0x048e,
- 0x445a, 0x7436, 0x147e, 0x2c12, 0x823e,
- 0x5c5c, 0x1438, 0x2c80, 0x4414, 0x8290,
- 0x745c, 0x2c38, 0x4480, 0x5c14, 0x0418,
- 0x145c, 0x4438, 0x5c80, 0x7414, 0x0454,
- 0x2c5c, 0x5c38, 0x7480, 0x1414, 0x0490,
- 0x445c, 0x7438, 0x1480, 0x2c14, 0x8240,
- 0x5c5e, 0x143a, 0x2c82, 0x4416, 0x8292,
- 0x745e, 0x2c3a, 0x4482, 0x5c16, 0x041a,
- 0x145e, 0x443a, 0x5c82, 0x7416, 0x0456,
- 0x2c5e, 0x5c3a, 0x7482, 0x1416, 0x0492,
- 0x445e, 0x743a, 0x1482, 0x2c16, 0x8242,
- 0x5c60, 0x143c, 0x2c84, 0x4418, 0x8294,
- 0x7460, 0x2c3c, 0x4484, 0x5c18, 0x041c,
- 0x1460, 0x443c, 0x5c84, 0x7418, 0x0458,
- 0x2c60, 0x5c3c, 0x7484, 0x1418, 0x0494,
- 0x4460, 0x743c, 0x1484, 0x2c18, 0x8244,
- 0x5c62, 0x143e, 0x2c86, 0x441a, 0x8296,
- 0x7462, 0x2c3e, 0x4486, 0x5c1a, 0x041e,
- 0x1462, 0x443e, 0x5c86, 0x741a, 0x045a,
- 0x2c62, 0x5c3e, 0x7486, 0x141a, 0x0496,
- 0x4462, 0x743e, 0x1486, 0x2c1a, 0x8246,
- 0x5c64, 0x1440, 0x2c88, 0x441c, 0x8298,
- 0x7464, 0x2c40, 0x4488, 0x5c1c, 0x0420,
- 0x1464, 0x4440, 0x5c88, 0x741c, 0x045c,
- 0x2c64, 0x5c40, 0x7488, 0x141c, 0x0498,
- 0x4464, 0x7440, 0x1488, 0x2c1c, 0x8248,
- 0x5c66, 0x1442, 0x2c8a, 0x441e, 0x829a,
- 0x7466, 0x2c42, 0x448a, 0x5c1e, 0x0422,
- 0x1466, 0x4442, 0x5c8a, 0x741e, 0x045e,
- 0x2c66, 0x5c42, 0x748a, 0x141e, 0x049a,
- 0x4466, 0x7442, 0x148a, 0x2c1e, 0x824a,
- 0x5c68, 0x1444, 0x2c8c, 0x4420, 0x829c,
- 0x7468, 0x2c44, 0x448c, 0x5c20, 0x0424,
- 0x1468, 0x4444, 0x5c8c, 0x7420, 0x0460,
- 0x2c68, 0x5c44, 0x748c, 0x1420, 0x049c,
- 0x4468, 0x7444, 0x148c, 0x2c20, 0x824c,
- 0x5c6a, 0x1446, 0x2c8e, 0x4422, 0x829e,
- 0x746a, 0x2c46, 0x448e, 0x5c22, 0x0426,
- 0x146a, 0x4446, 0x5c8e, 0x7422, 0x0462,
- 0x2c6a, 0x5c46, 0x748e, 0x1422, 0x049e,
- 0x446a, 0x7446, 0x148e, 0x2c22, 0x824e,
- 0x605a, 0x1836, 0x307e, 0x4812, 0x8604,
- 0x785a, 0x3036, 0x487e, 0x6012, 0x002a,
- 0x185a, 0x4836, 0x607e, 0x7812, 0x0066,
- 0x305a, 0x6036, 0x787e, 0x1812, 0x8002,
- 0x485a, 0x7836, 0x187e, 0x3012, 0x8052,
- 0x605c, 0x1838, 0x3080, 0x4814, 0x8608,
- 0x785c, 0x3038, 0x4880, 0x6014, 0x002c,
- 0x185c, 0x4838, 0x6080, 0x7814, 0x0068,
- 0x305c, 0x6038, 0x7880, 0x1814, 0x8004,
- 0x485c, 0x7838, 0x1880, 0x3014, 0x8054,
- 0x605e, 0x183a, 0x3082, 0x4816, 0x860c,
- 0x785e, 0x303a, 0x4882, 0x6016, 0x002e,
- 0x185e, 0x483a, 0x6082, 0x7816, 0x006a,
- 0x305e, 0x603a, 0x7882, 0x1816, 0x8006,
- 0x485e, 0x783a, 0x1882, 0x3016, 0x8056,
- 0x6060, 0x183c, 0x3084, 0x4818, 0x8610,
- 0x7860, 0x303c, 0x4884, 0x6018, 0x0030,
- 0x1860, 0x483c, 0x6084, 0x7818, 0x006c,
- 0x3060, 0x603c, 0x7884, 0x1818, 0x8008,
- 0x4860, 0x783c, 0x1884, 0x3018, 0x8058,
- 0x6062, 0x183e, 0x3086, 0x481a, 0x8614,
- 0x7862, 0x303e, 0x4886, 0x601a, 0x0032,
- 0x1862, 0x483e, 0x6086, 0x781a, 0x006e,
- 0x3062, 0x603e, 0x7886, 0x181a, 0x800a,
- 0x4862, 0x783e, 0x1886, 0x301a, 0x805a,
- 0x6064, 0x1840, 0x3088, 0x481c, 0x8618,
- 0x7864, 0x3040, 0x4888, 0x601c, 0x0034,
- 0x1864, 0x4840, 0x6088, 0x781c, 0x0070,
- 0x3064, 0x6040, 0x7888, 0x181c, 0x800c,
- 0x4864, 0x7840, 0x1888, 0x301c, 0x805c,
- 0x6066, 0x1842, 0x308a, 0x481e, 0x861c,
- 0x7866, 0x3042, 0x488a, 0x601e, 0x0036,
- 0x1866, 0x4842, 0x608a, 0x781e, 0x0072,
- 0x3066, 0x6042, 0x788a, 0x181e, 0x800e,
- 0x4866, 0x7842, 0x188a, 0x301e, 0x805e,
- 0x6068, 0x1844, 0x308c, 0x4820, 0x8620,
- 0x7868, 0x3044, 0x488c, 0x6020, 0x0038,
- 0x1868, 0x4844, 0x608c, 0x7820, 0x0074,
- 0x3068, 0x6044, 0x788c, 0x1820, 0x8010,
- 0x4868, 0x7844, 0x188c, 0x3020, 0x8060,
- 0x606a, 0x1846, 0x308e, 0x4822, 0x8624,
- 0x786a, 0x3046, 0x488e, 0x6022, 0x003a,
- 0x186a, 0x4846, 0x608e, 0x7822, 0x0076,
- 0x306a, 0x6046, 0x788e, 0x1822, 0x8012,
- 0x486a, 0x7846, 0x188e, 0x3022, 0x8062,
- 0x645a, 0x1c36, 0x347e, 0x4c12, 0x8654,
- 0x7c5a, 0x3436, 0x4c7e, 0x6412, 0x042a,
- 0x1c5a, 0x4c36, 0x647e, 0x7c12, 0x0466,
- 0x345a, 0x6436, 0x7c7e, 0x1c12, 0x8402,
- 0x4c5a, 0x7c36, 0x1c7e, 0x3412, 0x8452,
- 0x645c, 0x1c38, 0x3480, 0x4c14, 0x8658,
- 0x7c5c, 0x3438, 0x4c80, 0x6414, 0x042c,
- 0x1c5c, 0x4c38, 0x6480, 0x7c14, 0x0468,
- 0x345c, 0x6438, 0x7c80, 0x1c14, 0x8404,
- 0x4c5c, 0x7c38, 0x1c80, 0x3414, 0x8454,
- 0x645e, 0x1c3a, 0x3482, 0x4c16, 0x865c,
- 0x7c5e, 0x343a, 0x4c82, 0x6416, 0x042e,
- 0x1c5e, 0x4c3a, 0x6482, 0x7c16, 0x046a,
- 0x345e, 0x643a, 0x7c82, 0x1c16, 0x8406,
- 0x4c5e, 0x7c3a, 0x1c82, 0x3416, 0x8456,
- 0x6460, 0x1c3c, 0x3484, 0x4c18, 0x8660,
- 0x7c60, 0x343c, 0x4c84, 0x6418, 0x0430,
- 0x1c60, 0x4c3c, 0x6484, 0x7c18, 0x046c,
- 0x3460, 0x643c, 0x7c84, 0x1c18, 0x8408,
- 0x4c60, 0x7c3c, 0x1c84, 0x3418, 0x8458,
- 0x6462, 0x1c3e, 0x3486, 0x4c1a, 0x8664,
- 0x7c62, 0x343e, 0x4c86, 0x641a, 0x0432,
- 0x1c62, 0x4c3e, 0x6486, 0x7c1a, 0x046e,
- 0x3462, 0x643e, 0x7c86, 0x1c1a, 0x840a,
- 0x4c62, 0x7c3e, 0x1c86, 0x341a, 0x845a,
- 0x6464, 0x1c40, 0x3488, 0x4c1c, 0x8668,
- 0x7c64, 0x3440, 0x4c88, 0x641c, 0x0434,
- 0x1c64, 0x4c40, 0x6488, 0x7c1c, 0x0470,
- 0x3464, 0x6440, 0x7c88, 0x1c1c, 0x840c,
- 0x4c64, 0x7c40, 0x1c88, 0x341c, 0x845c,
- 0x6466, 0x1c42, 0x348a, 0x4c1e, 0x866c,
- 0x7c66, 0x3442, 0x4c8a, 0x641e, 0x0436,
- 0x1c66, 0x4c42, 0x648a, 0x7c1e, 0x0472,
- 0x3466, 0x6442, 0x7c8a, 0x1c1e, 0x840e,
- 0x4c66, 0x7c42, 0x1c8a, 0x341e, 0x845e,
- 0x6468, 0x1c44, 0x348c, 0x4c20, 0x8670,
- 0x7c68, 0x3444, 0x4c8c, 0x6420, 0x0438,
- 0x1c68, 0x4c44, 0x648c, 0x7c20, 0x0474,
- 0x3468, 0x6444, 0x7c8c, 0x1c20, 0x8410,
- 0x4c68, 0x7c44, 0x1c8c, 0x3420, 0x8460,
- 0x646a, 0x1c46, 0x348e, 0x4c22, 0x8674,
- 0x7c6a, 0x3446, 0x4c8e, 0x6422, 0x043a,
- 0x1c6a, 0x4c46, 0x648e, 0x7c22, 0x0476,
- 0x346a, 0x6446, 0x7c8e, 0x1c22, 0x8412,
- 0x4c6a, 0x7c46, 0x1c8e, 0x3422, 0x8462,
- 0x0a48, 0x3a24, 0x526c, 0x6a00, 0x2290,
- 0x2248, 0x5224, 0x6a6c, 0x0a00, 0x3a90,
- 0x3a48, 0x6a24, 0x0a6c, 0x2200, 0x5290,
- 0x5248, 0x0a24, 0x226c, 0x3a00, 0x6a90,
- 0x6a48, 0x2224, 0x3a6c, 0x5200, 0x0a90,
- 0x0a4a, 0x3a26, 0x526e, 0x6a02, 0x2292,
- 0x224a, 0x5226, 0x6a6e, 0x0a02, 0x3a92,
- 0x3a4a, 0x6a26, 0x0a6e, 0x2202, 0x5292,
- 0x524a, 0x0a26, 0x226e, 0x3a02, 0x6a92,
- 0x6a4a, 0x2226, 0x3a6e, 0x5202, 0x0a92,
- 0x0a4c, 0x3a28, 0x5270, 0x6a04, 0x2294,
- 0x224c, 0x5228, 0x6a70, 0x0a04, 0x3a94,
- 0x3a4c, 0x6a28, 0x0a70, 0x2204, 0x5294,
- 0x524c, 0x0a28, 0x2270, 0x3a04, 0x6a94,
- 0x6a4c, 0x2228, 0x3a70, 0x5204, 0x0a94,
- 0x0a4e, 0x3a2a, 0x5272, 0x6a06, 0x2296,
- 0x224e, 0x522a, 0x6a72, 0x0a06, 0x3a96,
- 0x3a4e, 0x6a2a, 0x0a72, 0x2206, 0x5296,
- 0x524e, 0x0a2a, 0x2272, 0x3a06, 0x6a96,
- 0x6a4e, 0x222a, 0x3a72, 0x5206, 0x0a96,
- 0x0a50, 0x3a2c, 0x5274, 0x6a08, 0x2298,
- 0x2250, 0x522c, 0x6a74, 0x0a08, 0x3a98,
- 0x3a50, 0x6a2c, 0x0a74, 0x2208, 0x5298,
- 0x5250, 0x0a2c, 0x2274, 0x3a08, 0x6a98,
- 0x6a50, 0x222c, 0x3a74, 0x5208, 0x0a98,
- 0x0a52, 0x3a2e, 0x5276, 0x6a0a, 0x229a,
- 0x2252, 0x522e, 0x6a76, 0x0a0a, 0x3a9a,
- 0x3a52, 0x6a2e, 0x0a76, 0x220a, 0x529a,
- 0x5252, 0x0a2e, 0x2276, 0x3a0a, 0x6a9a,
- 0x6a52, 0x222e, 0x3a76, 0x520a, 0x0a9a,
- 0x0a54, 0x3a30, 0x5278, 0x6a0c, 0x229c,
- 0x2254, 0x5230, 0x6a78, 0x0a0c, 0x3a9c,
- 0x3a54, 0x6a30, 0x0a78, 0x220c, 0x529c,
- 0x5254, 0x0a30, 0x2278, 0x3a0c, 0x6a9c,
- 0x6a54, 0x2230, 0x3a78, 0x520c, 0x0a9c,
- 0x0a56, 0x3a32, 0x527a, 0x6a0e, 0x229e,
- 0x2256, 0x5232, 0x6a7a, 0x0a0e, 0x3a9e,
- 0x3a56, 0x6a32, 0x0a7a, 0x220e, 0x529e,
- 0x5256, 0x0a32, 0x227a, 0x3a0e, 0x6a9e,
- 0x6a56, 0x2232, 0x3a7a, 0x520e, 0x0a9e,
- 0x0a58, 0x3a34, 0x527c, 0x6a10, 0x023c,
- 0x2258, 0x5234, 0x6a7c, 0x0a10, 0x0278,
- 0x3a58, 0x6a34, 0x0a7c, 0x2210, 0x8414,
- 0x5258, 0x0a34, 0x227c, 0x3a10, 0x8464,
- 0x6a58, 0x2234, 0x3a7c, 0x5210, 0x0200,
- 0x0e48, 0x3e24, 0x566c, 0x6e00, 0x2690,
- 0x2648, 0x5624, 0x6e6c, 0x0e00, 0x3e90,
- 0x3e48, 0x6e24, 0x0e6c, 0x2600, 0x5690,
- 0x5648, 0x0e24, 0x266c, 0x3e00, 0x6e90,
- 0x6e48, 0x2624, 0x3e6c, 0x5600, 0x0e90,
- 0x0e4a, 0x3e26, 0x566e, 0x6e02, 0x2692,
- 0x264a, 0x5626, 0x6e6e, 0x0e02, 0x3e92,
- 0x3e4a, 0x6e26, 0x0e6e, 0x2602, 0x5692,
- 0x564a, 0x0e26, 0x266e, 0x3e02, 0x6e92,
- 0x6e4a, 0x2626, 0x3e6e, 0x5602, 0x0e92,
- 0x0e4c, 0x3e28, 0x5670, 0x6e04, 0x2694,
- 0x264c, 0x5628, 0x6e70, 0x0e04, 0x3e94,
- 0x3e4c, 0x6e28, 0x0e70, 0x2604, 0x5694,
- 0x564c, 0x0e28, 0x2670, 0x3e04, 0x6e94,
- 0x6e4c, 0x2628, 0x3e70, 0x5604, 0x0e94,
- 0x0e4e, 0x3e2a, 0x5672, 0x6e06, 0x2696,
- 0x264e, 0x562a, 0x6e72, 0x0e06, 0x3e96,
- 0x3e4e, 0x6e2a, 0x0e72, 0x2606, 0x5696,
- 0x564e, 0x0e2a, 0x2672, 0x3e06, 0x6e96,
- 0x6e4e, 0x262a, 0x3e72, 0x5606, 0x0e96,
- 0x0e50, 0x3e2c, 0x5674, 0x6e08, 0x2698,
- 0x2650, 0x562c, 0x6e74, 0x0e08, 0x3e98,
- 0x3e50, 0x6e2c, 0x0e74, 0x2608, 0x5698,
- 0x5650, 0x0e2c, 0x2674, 0x3e08, 0x6e98,
- 0x6e50, 0x262c, 0x3e74, 0x5608, 0x0e98,
- 0x0e52, 0x3e2e, 0x5676, 0x6e0a, 0x269a,
- 0x2652, 0x562e, 0x6e76, 0x0e0a, 0x3e9a,
- 0x3e52, 0x6e2e, 0x0e76, 0x260a, 0x569a,
- 0x5652, 0x0e2e, 0x2676, 0x3e0a, 0x6e9a,
- 0x6e52, 0x262e, 0x3e76, 0x560a, 0x0e9a,
- 0x0e54, 0x3e30, 0x5678, 0x6e0c, 0x269c,
- 0x2654, 0x5630, 0x6e78, 0x0e0c, 0x3e9c,
- 0x3e54, 0x6e30, 0x0e78, 0x260c, 0x569c,
- 0x5654, 0x0e30, 0x2678, 0x3e0c, 0x6e9c,
- 0x6e54, 0x2630, 0x3e78, 0x560c, 0x0e9c,
- 0x0e56, 0x3e32, 0x567a, 0x6e0e, 0x269e,
- 0x2656, 0x5632, 0x6e7a, 0x0e0e, 0x3e9e,
- 0x3e56, 0x6e32, 0x0e7a, 0x260e, 0x569e,
- 0x5656, 0x0e32, 0x267a, 0x3e0e, 0x6e9e,
- 0x6e56, 0x2632, 0x3e7a, 0x560e, 0x0e9e,
- 0x0e58, 0x3e34, 0x567c, 0x6e10, 0x063c,
- 0x2658, 0x5634, 0x6e7c, 0x0e10, 0x0678,
- 0x3e58, 0x6e34, 0x0e7c, 0x2610, 0x8228,
- 0x5658, 0x0e34, 0x267c, 0x3e10, 0x8278,
- 0x6e58, 0x2634, 0x3e7c, 0x5610, 0x0600,
- 0x1248, 0x4224, 0x5a6c, 0x7200, 0x2a90,
- 0x2a48, 0x5a24, 0x726c, 0x1200, 0x4290,
- 0x4248, 0x7224, 0x126c, 0x2a00, 0x5a90,
- 0x5a48, 0x1224, 0x2a6c, 0x4200, 0x7290,
- 0x7248, 0x2a24, 0x426c, 0x5a00, 0x1290,
- 0x124a, 0x4226, 0x5a6e, 0x7202, 0x2a92,
- 0x2a4a, 0x5a26, 0x726e, 0x1202, 0x4292,
- 0x424a, 0x7226, 0x126e, 0x2a02, 0x5a92,
- 0x5a4a, 0x1226, 0x2a6e, 0x4202, 0x7292,
- 0x724a, 0x2a26, 0x426e, 0x5a02, 0x1292,
- 0x124c, 0x4228, 0x5a70, 0x7204, 0x2a94,
- 0x2a4c, 0x5a28, 0x7270, 0x1204, 0x4294,
- 0x424c, 0x7228, 0x1270, 0x2a04, 0x5a94,
- 0x5a4c, 0x1228, 0x2a70, 0x4204, 0x7294,
- 0x724c, 0x2a28, 0x4270, 0x5a04, 0x1294,
- 0x124e, 0x422a, 0x5a72, 0x7206, 0x2a96,
- 0x2a4e, 0x5a2a, 0x7272, 0x1206, 0x4296,
- 0x424e, 0x722a, 0x1272, 0x2a06, 0x5a96,
- 0x5a4e, 0x122a, 0x2a72, 0x4206, 0x7296,
- 0x724e, 0x2a2a, 0x4272, 0x5a06, 0x1296,
- 0x1250, 0x422c, 0x5a74, 0x7208, 0x2a98,
- 0x2a50, 0x5a2c, 0x7274, 0x1208, 0x4298,
- 0x4250, 0x722c, 0x1274, 0x2a08, 0x5a98,
- 0x5a50, 0x122c, 0x2a74, 0x4208, 0x7298,
- 0x7250, 0x2a2c, 0x4274, 0x5a08, 0x1298,
- 0x1252, 0x422e, 0x5a76, 0x720a, 0x2a9a,
- 0x2a52, 0x5a2e, 0x7276, 0x120a, 0x429a,
- 0x4252, 0x722e, 0x1276, 0x2a0a, 0x5a9a,
- 0x5a52, 0x122e, 0x2a76, 0x420a, 0x729a,
- 0x7252, 0x2a2e, 0x4276, 0x5a0a, 0x129a,
- 0x1254, 0x4230, 0x5a78, 0x720c, 0x2a9c,
- 0x2a54, 0x5a30, 0x7278, 0x120c, 0x429c,
- 0x4254, 0x7230, 0x1278, 0x2a0c, 0x5a9c,
- 0x5a54, 0x1230, 0x2a78, 0x420c, 0x729c,
- 0x7254, 0x2a30, 0x4278, 0x5a0c, 0x129c,
- 0x1256, 0x4232, 0x5a7a, 0x720e, 0x2a9e,
- 0x2a56, 0x5a32, 0x727a, 0x120e, 0x429e,
- 0x4256, 0x7232, 0x127a, 0x2a0e, 0x5a9e,
- 0x5a56, 0x1232, 0x2a7a, 0x420e, 0x729e,
- 0x7256, 0x2a32, 0x427a, 0x5a0e, 0x129e,
- 0x1258, 0x4234, 0x5a7c, 0x7210, 0x0250,
- 0x2a58, 0x5a34, 0x727c, 0x1210, 0x028c,
- 0x4258, 0x7234, 0x127c, 0x2a10, 0x803c,
- 0x5a58, 0x1234, 0x2a7c, 0x4210, 0x808c,
- 0x7258, 0x2a34, 0x427c, 0x5a10, 0x0214,
- 0x1648, 0x4624, 0x5e6c, 0x7600, 0x2e90,
- 0x2e48, 0x5e24, 0x766c, 0x1600, 0x4690,
- 0x4648, 0x7624, 0x166c, 0x2e00, 0x5e90,
- 0x5e48, 0x1624, 0x2e6c, 0x4600, 0x7690,
- 0x7648, 0x2e24, 0x466c, 0x5e00, 0x1690,
- 0x164a, 0x4626, 0x5e6e, 0x7602, 0x2e92,
- 0x2e4a, 0x5e26, 0x766e, 0x1602, 0x4692,
- 0x464a, 0x7626, 0x166e, 0x2e02, 0x5e92,
- 0x5e4a, 0x1626, 0x2e6e, 0x4602, 0x7692,
- 0x764a, 0x2e26, 0x466e, 0x5e02, 0x1692,
- 0x164c, 0x4628, 0x5e70, 0x7604, 0x2e94,
- 0x2e4c, 0x5e28, 0x7670, 0x1604, 0x4694,
- 0x464c, 0x7628, 0x1670, 0x2e04, 0x5e94,
- 0x5e4c, 0x1628, 0x2e70, 0x4604, 0x7694,
- 0x764c, 0x2e28, 0x4670, 0x5e04, 0x1694,
- 0x164e, 0x462a, 0x5e72, 0x7606, 0x2e96,
- 0x2e4e, 0x5e2a, 0x7672, 0x1606, 0x4696,
- 0x464e, 0x762a, 0x1672, 0x2e06, 0x5e96,
- 0x5e4e, 0x162a, 0x2e72, 0x4606, 0x7696,
- 0x764e, 0x2e2a, 0x4672, 0x5e06, 0x1696,
- 0x1650, 0x462c, 0x5e74, 0x7608, 0x2e98,
- 0x2e50, 0x5e2c, 0x7674, 0x1608, 0x4698,
- 0x4650, 0x762c, 0x1674, 0x2e08, 0x5e98,
- 0x5e50, 0x162c, 0x2e74, 0x4608, 0x7698,
- 0x7650, 0x2e2c, 0x4674, 0x5e08, 0x1698,
- 0x1652, 0x462e, 0x5e76, 0x760a, 0x2e9a,
- 0x2e52, 0x5e2e, 0x7676, 0x160a, 0x469a,
- 0x4652, 0x762e, 0x1676, 0x2e0a, 0x5e9a,
- 0x5e52, 0x162e, 0x2e76, 0x460a, 0x769a,
- 0x7652, 0x2e2e, 0x4676, 0x5e0a, 0x169a,
- 0x1654, 0x4630, 0x5e78, 0x760c, 0x2e9c,
- 0x2e54, 0x5e30, 0x7678, 0x160c, 0x469c,
- 0x4654, 0x7630, 0x1678, 0x2e0c, 0x5e9c,
- 0x5e54, 0x1630, 0x2e78, 0x460c, 0x769c,
- 0x7654, 0x2e30, 0x4678, 0x5e0c, 0x169c,
- 0x1656, 0x4632, 0x5e7a, 0x760e, 0x2e9e,
- 0x2e56, 0x5e32, 0x767a, 0x160e, 0x469e,
- 0x4656, 0x7632, 0x167a, 0x2e0e, 0x5e9e,
- 0x5e56, 0x1632, 0x2e7a, 0x460e, 0x769e,
- 0x7656, 0x2e32, 0x467a, 0x5e0e, 0x169e,
- 0x1658, 0x4634, 0x5e7c, 0x7610, 0x0650,
- 0x2e58, 0x5e34, 0x767c, 0x1610, 0x068c,
- 0x4658, 0x7634, 0x167c, 0x2e10, 0x843c,
- 0x5e58, 0x1634, 0x2e7c, 0x4610, 0x848c,
- 0x7658, 0x2e34, 0x467c, 0x5e10, 0x0614,
- 0x1a48, 0x4a24, 0x626c, 0x7a00, 0x3290,
- 0x3248, 0x6224, 0x7a6c, 0x1a00, 0x4a90,
- 0x4a48, 0x7a24, 0x1a6c, 0x3200, 0x6290,
- 0x6248, 0x1a24, 0x326c, 0x4a00, 0x7a90,
- 0x7a48, 0x3224, 0x4a6c, 0x6200, 0x1a90,
- 0x1a4a, 0x4a26, 0x626e, 0x7a02, 0x3292,
- 0x324a, 0x6226, 0x7a6e, 0x1a02, 0x4a92,
- 0x4a4a, 0x7a26, 0x1a6e, 0x3202, 0x6292,
- 0x624a, 0x1a26, 0x326e, 0x4a02, 0x7a92,
- 0x7a4a, 0x3226, 0x4a6e, 0x6202, 0x1a92,
- 0x1a4c, 0x4a28, 0x6270, 0x7a04, 0x3294,
- 0x324c, 0x6228, 0x7a70, 0x1a04, 0x4a94,
- 0x4a4c, 0x7a28, 0x1a70, 0x3204, 0x6294,
- 0x624c, 0x1a28, 0x3270, 0x4a04, 0x7a94,
- 0x7a4c, 0x3228, 0x4a70, 0x6204, 0x1a94,
- 0x1a4e, 0x4a2a, 0x6272, 0x7a06, 0x3296,
- 0x324e, 0x622a, 0x7a72, 0x1a06, 0x4a96,
- 0x4a4e, 0x7a2a, 0x1a72, 0x3206, 0x6296,
- 0x624e, 0x1a2a, 0x3272, 0x4a06, 0x7a96,
- 0x7a4e, 0x322a, 0x4a72, 0x6206, 0x1a96,
- 0x1a50, 0x4a2c, 0x6274, 0x7a08, 0x3298,
- 0x3250, 0x622c, 0x7a74, 0x1a08, 0x4a98,
- 0x4a50, 0x7a2c, 0x1a74, 0x3208, 0x6298,
- 0x6250, 0x1a2c, 0x3274, 0x4a08, 0x7a98,
- 0x7a50, 0x322c, 0x4a74, 0x6208, 0x1a98,
- 0x1a52, 0x4a2e, 0x6276, 0x7a0a, 0x329a,
- 0x3252, 0x622e, 0x7a76, 0x1a0a, 0x4a9a,
- 0x4a52, 0x7a2e, 0x1a76, 0x320a, 0x629a,
- 0x6252, 0x1a2e, 0x3276, 0x4a0a, 0x7a9a,
- 0x7a52, 0x322e, 0x4a76, 0x620a, 0x1a9a,
- 0x1a54, 0x4a30, 0x6278, 0x7a0c, 0x329c,
- 0x3254, 0x6230, 0x7a78, 0x1a0c, 0x4a9c,
- 0x4a54, 0x7a30, 0x1a78, 0x320c, 0x629c,
- 0x6254, 0x1a30, 0x3278, 0x4a0c, 0x7a9c,
- 0x7a54, 0x3230, 0x4a78, 0x620c, 0x1a9c,
- 0x1a56, 0x4a32, 0x627a, 0x7a0e, 0x329e,
- 0x3256, 0x6232, 0x7a7a, 0x1a0e, 0x4a9e,
- 0x4a56, 0x7a32, 0x1a7a, 0x320e, 0x629e,
- 0x6256, 0x1a32, 0x327a, 0x4a0e, 0x7a9e,
- 0x7a56, 0x3232, 0x4a7a, 0x620e, 0x1a9e,
- 0x1a58, 0x4a34, 0x627c, 0x7a10, 0x0264,
- 0x3258, 0x6234, 0x7a7c, 0x1a10, 0x8200,
- 0x4a58, 0x7a34, 0x1a7c, 0x3210, 0x8250,
- 0x6258, 0x1a34, 0x327c, 0x4a10, 0x8628,
- 0x7a58, 0x3234, 0x4a7c, 0x6210, 0x0228,
- 0x1e48, 0x4e24, 0x666c, 0x7e00, 0x3690,
- 0x3648, 0x6624, 0x7e6c, 0x1e00, 0x4e90,
- 0x4e48, 0x7e24, 0x1e6c, 0x3600, 0x6690,
- 0x6648, 0x1e24, 0x366c, 0x4e00, 0x7e90,
- 0x7e48, 0x3624, 0x4e6c, 0x6600, 0x1e90,
- 0x1e4a, 0x4e26, 0x666e, 0x7e02, 0x3692,
- 0x364a, 0x6626, 0x7e6e, 0x1e02, 0x4e92,
- 0x4e4a, 0x7e26, 0x1e6e, 0x3602, 0x6692,
- 0x664a, 0x1e26, 0x366e, 0x4e02, 0x7e92,
- 0x7e4a, 0x3626, 0x4e6e, 0x6602, 0x1e92,
- 0x1e4c, 0x4e28, 0x6670, 0x7e04, 0x3694,
- 0x364c, 0x6628, 0x7e70, 0x1e04, 0x4e94,
- 0x4e4c, 0x7e28, 0x1e70, 0x3604, 0x6694,
- 0x664c, 0x1e28, 0x3670, 0x4e04, 0x7e94,
- 0x7e4c, 0x3628, 0x4e70, 0x6604, 0x1e94,
- 0x1e4e, 0x4e2a, 0x6672, 0x7e06, 0x3696,
- 0x364e, 0x662a, 0x7e72, 0x1e06, 0x4e96,
- 0x4e4e, 0x7e2a, 0x1e72, 0x3606, 0x6696,
- 0x664e, 0x1e2a, 0x3672, 0x4e06, 0x7e96,
- 0x7e4e, 0x362a, 0x4e72, 0x6606, 0x1e96,
- 0x1e50, 0x4e2c, 0x6674, 0x7e08, 0x3698,
- 0x3650, 0x662c, 0x7e74, 0x1e08, 0x4e98,
- 0x4e50, 0x7e2c, 0x1e74, 0x3608, 0x6698,
- 0x6650, 0x1e2c, 0x3674, 0x4e08, 0x7e98,
- 0x7e50, 0x362c, 0x4e74, 0x6608, 0x1e98,
- 0x1e52, 0x4e2e, 0x6676, 0x7e0a, 0x369a,
- 0x3652, 0x662e, 0x7e76, 0x1e0a, 0x4e9a,
- 0x4e52, 0x7e2e, 0x1e76, 0x360a, 0x669a,
- 0x6652, 0x1e2e, 0x3676, 0x4e0a, 0x7e9a,
- 0x7e52, 0x362e, 0x4e76, 0x660a, 0x1e9a,
- 0x1e54, 0x4e30, 0x6678, 0x7e0c, 0x369c,
- 0x3654, 0x6630, 0x7e78, 0x1e0c, 0x4e9c,
- 0x4e54, 0x7e30, 0x1e78, 0x360c, 0x669c,
- 0x6654, 0x1e30, 0x3678, 0x4e0c, 0x7e9c,
- 0x7e54, 0x3630, 0x4e78, 0x660c, 0x1e9c,
- 0x1e56, 0x4e32, 0x667a, 0x7e0e, 0x369e,
- 0x3656, 0x6632, 0x7e7a, 0x1e0e, 0x4e9e,
- 0x4e56, 0x7e32, 0x1e7a, 0x360e, 0x669e,
- 0x6656, 0x1e32, 0x367a, 0x4e0e, 0x7e9e,
- 0x7e56, 0x3632, 0x4e7a, 0x660e, 0x1e9e,
- 0x1e58, 0x4e34, 0x667c, 0x7e10, 0x0664,
- 0x3658, 0x6634, 0x7e7c, 0x1e10, 0x8014,
- 0x4e58, 0x7e34, 0x1e7c, 0x3610, 0x8064,
- 0x6658, 0x1e34, 0x367c, 0x4e10, 0x8678,
- 0x7e58, 0x3634, 0x4e7c, 0x6610, 0x0628,
- 0x3a5a, 0x6a36, 0x0a7e, 0x2212, 0x8416,
- 0x525a, 0x0a36, 0x227e, 0x3a12, 0x8466,
- 0x6a5a, 0x2236, 0x3a7e, 0x5212, 0x0202,
- 0x0a5a, 0x3a36, 0x527e, 0x6a12, 0x023e,
- 0x225a, 0x5236, 0x6a7e, 0x0a12, 0x027a,
- 0x3a5c, 0x6a38, 0x0a80, 0x2214, 0x8418,
- 0x525c, 0x0a38, 0x2280, 0x3a14, 0x8468,
- 0x6a5c, 0x2238, 0x3a80, 0x5214, 0x0204,
- 0x0a5c, 0x3a38, 0x5280, 0x6a14, 0x0240,
- 0x225c, 0x5238, 0x6a80, 0x0a14, 0x027c,
- 0x3a5e, 0x6a3a, 0x0a82, 0x2216, 0x841a,
- 0x525e, 0x0a3a, 0x2282, 0x3a16, 0x846a,
- 0x6a5e, 0x223a, 0x3a82, 0x5216, 0x0206,
- 0x0a5e, 0x3a3a, 0x5282, 0x6a16, 0x0242,
- 0x225e, 0x523a, 0x6a82, 0x0a16, 0x027e,
- 0x3a60, 0x6a3c, 0x0a84, 0x2218, 0x841c,
- 0x5260, 0x0a3c, 0x2284, 0x3a18, 0x846c,
- 0x6a60, 0x223c, 0x3a84, 0x5218, 0x0208,
- 0x0a60, 0x3a3c, 0x5284, 0x6a18, 0x0244,
- 0x2260, 0x523c, 0x6a84, 0x0a18, 0x0280,
- 0x3a62, 0x6a3e, 0x0a86, 0x221a, 0x841e,
- 0x5262, 0x0a3e, 0x2286, 0x3a1a, 0x846e,
- 0x6a62, 0x223e, 0x3a86, 0x521a, 0x020a,
- 0x0a62, 0x3a3e, 0x5286, 0x6a1a, 0x0246,
- 0x2262, 0x523e, 0x6a86, 0x0a1a, 0x0282,
- 0x3a64, 0x6a40, 0x0a88, 0x221c, 0x8420,
- 0x5264, 0x0a40, 0x2288, 0x3a1c, 0x8470,
- 0x6a64, 0x2240, 0x3a88, 0x521c, 0x020c,
- 0x0a64, 0x3a40, 0x5288, 0x6a1c, 0x0248,
- 0x2264, 0x5240, 0x6a88, 0x0a1c, 0x0284,
- 0x3a66, 0x6a42, 0x0a8a, 0x221e, 0x8422,
- 0x5266, 0x0a42, 0x228a, 0x3a1e, 0x8472,
- 0x6a66, 0x2242, 0x3a8a, 0x521e, 0x020e,
- 0x0a66, 0x3a42, 0x528a, 0x6a1e, 0x024a,
- 0x2266, 0x5242, 0x6a8a, 0x0a1e, 0x0286,
- 0x3a68, 0x6a44, 0x0a8c, 0x2220, 0x8424,
- 0x5268, 0x0a44, 0x228c, 0x3a20, 0x8474,
- 0x6a68, 0x2244, 0x3a8c, 0x5220, 0x0210,
- 0x0a68, 0x3a44, 0x528c, 0x6a20, 0x024c,
- 0x2268, 0x5244, 0x6a8c, 0x0a20, 0x0288,
- 0x3a6a, 0x6a46, 0x0a8e, 0x2222, 0x8426,
- 0x526a, 0x0a46, 0x228e, 0x3a22, 0x8476,
- 0x6a6a, 0x2246, 0x3a8e, 0x5222, 0x0212,
- 0x0a6a, 0x3a46, 0x528e, 0x6a22, 0x024e,
- 0x226a, 0x5246, 0x6a8e, 0x0a22, 0x028a,
- 0x3e5a, 0x6e36, 0x0e7e, 0x2612, 0x822a,
- 0x565a, 0x0e36, 0x267e, 0x3e12, 0x827a,
- 0x6e5a, 0x2636, 0x3e7e, 0x5612, 0x0602,
- 0x0e5a, 0x3e36, 0x567e, 0x6e12, 0x063e,
- 0x265a, 0x5636, 0x6e7e, 0x0e12, 0x067a,
- 0x3e5c, 0x6e38, 0x0e80, 0x2614, 0x822c,
- 0x565c, 0x0e38, 0x2680, 0x3e14, 0x827c,
- 0x6e5c, 0x2638, 0x3e80, 0x5614, 0x0604,
- 0x0e5c, 0x3e38, 0x5680, 0x6e14, 0x0640,
- 0x265c, 0x5638, 0x6e80, 0x0e14, 0x067c,
- 0x3e5e, 0x6e3a, 0x0e82, 0x2616, 0x822e,
- 0x565e, 0x0e3a, 0x2682, 0x3e16, 0x827e,
- 0x6e5e, 0x263a, 0x3e82, 0x5616, 0x0606,
- 0x0e5e, 0x3e3a, 0x5682, 0x6e16, 0x0642,
- 0x265e, 0x563a, 0x6e82, 0x0e16, 0x067e,
- 0x3e60, 0x6e3c, 0x0e84, 0x2618, 0x8230,
- 0x5660, 0x0e3c, 0x2684, 0x3e18, 0x8280,
- 0x6e60, 0x263c, 0x3e84, 0x5618, 0x0608,
- 0x0e60, 0x3e3c, 0x5684, 0x6e18, 0x0644,
- 0x2660, 0x563c, 0x6e84, 0x0e18, 0x0680,
- 0x3e62, 0x6e3e, 0x0e86, 0x261a, 0x8232,
- 0x5662, 0x0e3e, 0x2686, 0x3e1a, 0x8282,
- 0x6e62, 0x263e, 0x3e86, 0x561a, 0x060a,
- 0x0e62, 0x3e3e, 0x5686, 0x6e1a, 0x0646,
- 0x2662, 0x563e, 0x6e86, 0x0e1a, 0x0682,
- 0x3e64, 0x6e40, 0x0e88, 0x261c, 0x8234,
- 0x5664, 0x0e40, 0x2688, 0x3e1c, 0x8284,
- 0x6e64, 0x2640, 0x3e88, 0x561c, 0x060c,
- 0x0e64, 0x3e40, 0x5688, 0x6e1c, 0x0648,
- 0x2664, 0x5640, 0x6e88, 0x0e1c, 0x0684,
- 0x3e66, 0x6e42, 0x0e8a, 0x261e, 0x8236,
- 0x5666, 0x0e42, 0x268a, 0x3e1e, 0x8286,
- 0x6e66, 0x2642, 0x3e8a, 0x561e, 0x060e,
- 0x0e66, 0x3e42, 0x568a, 0x6e1e, 0x064a,
- 0x2666, 0x5642, 0x6e8a, 0x0e1e, 0x0686,
- 0x3e68, 0x6e44, 0x0e8c, 0x2620, 0x8238,
- 0x5668, 0x0e44, 0x268c, 0x3e20, 0x8288,
- 0x6e68, 0x2644, 0x3e8c, 0x5620, 0x0610,
- 0x0e68, 0x3e44, 0x568c, 0x6e20, 0x064c,
- 0x2668, 0x5644, 0x6e8c, 0x0e20, 0x0688,
- 0x3e6a, 0x6e46, 0x0e8e, 0x2622, 0x823a,
- 0x566a, 0x0e46, 0x268e, 0x3e22, 0x828a,
- 0x6e6a, 0x2646, 0x3e8e, 0x5622, 0x0612,
- 0x0e6a, 0x3e46, 0x568e, 0x6e22, 0x064e,
- 0x266a, 0x5646, 0x6e8e, 0x0e22, 0x068a,
- 0x425a, 0x7236, 0x127e, 0x2a12, 0x803e,
- 0x5a5a, 0x1236, 0x2a7e, 0x4212, 0x808e,
- 0x725a, 0x2a36, 0x427e, 0x5a12, 0x0216,
- 0x125a, 0x4236, 0x5a7e, 0x7212, 0x0252,
- 0x2a5a, 0x5a36, 0x727e, 0x1212, 0x028e,
- 0x425c, 0x7238, 0x1280, 0x2a14, 0x8040,
- 0x5a5c, 0x1238, 0x2a80, 0x4214, 0x8090,
- 0x725c, 0x2a38, 0x4280, 0x5a14, 0x0218,
- 0x125c, 0x4238, 0x5a80, 0x7214, 0x0254,
- 0x2a5c, 0x5a38, 0x7280, 0x1214, 0x0290,
- 0x425e, 0x723a, 0x1282, 0x2a16, 0x8042,
- 0x5a5e, 0x123a, 0x2a82, 0x4216, 0x8092,
- 0x725e, 0x2a3a, 0x4282, 0x5a16, 0x021a,
- 0x125e, 0x423a, 0x5a82, 0x7216, 0x0256,
- 0x2a5e, 0x5a3a, 0x7282, 0x1216, 0x0292,
- 0x4260, 0x723c, 0x1284, 0x2a18, 0x8044,
- 0x5a60, 0x123c, 0x2a84, 0x4218, 0x8094,
- 0x7260, 0x2a3c, 0x4284, 0x5a18, 0x021c,
- 0x1260, 0x423c, 0x5a84, 0x7218, 0x0258,
- 0x2a60, 0x5a3c, 0x7284, 0x1218, 0x0294,
- 0x4262, 0x723e, 0x1286, 0x2a1a, 0x8046,
- 0x5a62, 0x123e, 0x2a86, 0x421a, 0x8096,
- 0x7262, 0x2a3e, 0x4286, 0x5a1a, 0x021e,
- 0x1262, 0x423e, 0x5a86, 0x721a, 0x025a,
- 0x2a62, 0x5a3e, 0x7286, 0x121a, 0x0296,
- 0x4264, 0x7240, 0x1288, 0x2a1c, 0x8048,
- 0x5a64, 0x1240, 0x2a88, 0x421c, 0x8098,
- 0x7264, 0x2a40, 0x4288, 0x5a1c, 0x0220,
- 0x1264, 0x4240, 0x5a88, 0x721c, 0x025c,
- 0x2a64, 0x5a40, 0x7288, 0x121c, 0x0298,
- 0x4266, 0x7242, 0x128a, 0x2a1e, 0x804a,
- 0x5a66, 0x1242, 0x2a8a, 0x421e, 0x809a,
- 0x7266, 0x2a42, 0x428a, 0x5a1e, 0x0222,
- 0x1266, 0x4242, 0x5a8a, 0x721e, 0x025e,
- 0x2a66, 0x5a42, 0x728a, 0x121e, 0x029a,
- 0x4268, 0x7244, 0x128c, 0x2a20, 0x804c,
- 0x5a68, 0x1244, 0x2a8c, 0x4220, 0x809c,
- 0x7268, 0x2a44, 0x428c, 0x5a20, 0x0224,
- 0x1268, 0x4244, 0x5a8c, 0x7220, 0x0260,
- 0x2a68, 0x5a44, 0x728c, 0x1220, 0x029c,
- 0x426a, 0x7246, 0x128e, 0x2a22, 0x804e,
- 0x5a6a, 0x1246, 0x2a8e, 0x4222, 0x809e,
- 0x726a, 0x2a46, 0x428e, 0x5a22, 0x0226,
- 0x126a, 0x4246, 0x5a8e, 0x7222, 0x0262,
- 0x2a6a, 0x5a46, 0x728e, 0x1222, 0x029e,
- 0x465a, 0x7636, 0x167e, 0x2e12, 0x843e,
- 0x5e5a, 0x1636, 0x2e7e, 0x4612, 0x848e,
- 0x765a, 0x2e36, 0x467e, 0x5e12, 0x0616,
- 0x165a, 0x4636, 0x5e7e, 0x7612, 0x0652,
- 0x2e5a, 0x5e36, 0x767e, 0x1612, 0x068e,
- 0x465c, 0x7638, 0x1680, 0x2e14, 0x8440,
- 0x5e5c, 0x1638, 0x2e80, 0x4614, 0x8490,
- 0x765c, 0x2e38, 0x4680, 0x5e14, 0x0618,
- 0x165c, 0x4638, 0x5e80, 0x7614, 0x0654,
- 0x2e5c, 0x5e38, 0x7680, 0x1614, 0x0690,
- 0x465e, 0x763a, 0x1682, 0x2e16, 0x8442,
- 0x5e5e, 0x163a, 0x2e82, 0x4616, 0x8492,
- 0x765e, 0x2e3a, 0x4682, 0x5e16, 0x061a,
- 0x165e, 0x463a, 0x5e82, 0x7616, 0x0656,
- 0x2e5e, 0x5e3a, 0x7682, 0x1616, 0x0692,
- 0x4660, 0x763c, 0x1684, 0x2e18, 0x8444,
- 0x5e60, 0x163c, 0x2e84, 0x4618, 0x8494,
- 0x7660, 0x2e3c, 0x4684, 0x5e18, 0x061c,
- 0x1660, 0x463c, 0x5e84, 0x7618, 0x0658,
- 0x2e60, 0x5e3c, 0x7684, 0x1618, 0x0694,
- 0x4662, 0x763e, 0x1686, 0x2e1a, 0x8446,
- 0x5e62, 0x163e, 0x2e86, 0x461a, 0x8496,
- 0x7662, 0x2e3e, 0x4686, 0x5e1a, 0x061e,
- 0x1662, 0x463e, 0x5e86, 0x761a, 0x065a,
- 0x2e62, 0x5e3e, 0x7686, 0x161a, 0x0696,
- 0x4664, 0x7640, 0x1688, 0x2e1c, 0x8448,
- 0x5e64, 0x1640, 0x2e88, 0x461c, 0x8498,
- 0x7664, 0x2e40, 0x4688, 0x5e1c, 0x0620,
- 0x1664, 0x4640, 0x5e88, 0x761c, 0x065c,
- 0x2e64, 0x5e40, 0x7688, 0x161c, 0x0698,
- 0x4666, 0x7642, 0x168a, 0x2e1e, 0x844a,
- 0x5e66, 0x1642, 0x2e8a, 0x461e, 0x849a,
- 0x7666, 0x2e42, 0x468a, 0x5e1e, 0x0622,
- 0x1666, 0x4642, 0x5e8a, 0x761e, 0x065e,
- 0x2e66, 0x5e42, 0x768a, 0x161e, 0x069a,
- 0x4668, 0x7644, 0x168c, 0x2e20, 0x844c,
- 0x5e68, 0x1644, 0x2e8c, 0x4620, 0x849c,
- 0x7668, 0x2e44, 0x468c, 0x5e20, 0x0624,
- 0x1668, 0x4644, 0x5e8c, 0x7620, 0x0660,
- 0x2e68, 0x5e44, 0x768c, 0x1620, 0x069c,
- 0x466a, 0x7646, 0x168e, 0x2e22, 0x844e,
- 0x5e6a, 0x1646, 0x2e8e, 0x4622, 0x849e,
- 0x766a, 0x2e46, 0x468e, 0x5e22, 0x0626,
- 0x166a, 0x4646, 0x5e8e, 0x7622, 0x0662,
- 0x2e6a, 0x5e46, 0x768e, 0x1622, 0x069e,
- 0x4a5a, 0x7a36, 0x1a7e, 0x3212, 0x8252,
- 0x625a, 0x1a36, 0x327e, 0x4a12, 0x862c,
- 0x7a5a, 0x3236, 0x4a7e, 0x6212, 0x022a,
- 0x1a5a, 0x4a36, 0x627e, 0x7a12, 0x0266,
- 0x325a, 0x6236, 0x7a7e, 0x1a12, 0x8202,
- 0x4a5c, 0x7a38, 0x1a80, 0x3214, 0x8254,
- 0x625c, 0x1a38, 0x3280, 0x4a14, 0x8630,
- 0x7a5c, 0x3238, 0x4a80, 0x6214, 0x022c,
- 0x1a5c, 0x4a38, 0x6280, 0x7a14, 0x0268,
- 0x325c, 0x6238, 0x7a80, 0x1a14, 0x8204,
- 0x4a5e, 0x7a3a, 0x1a82, 0x3216, 0x8256,
- 0x625e, 0x1a3a, 0x3282, 0x4a16, 0x8634,
- 0x7a5e, 0x323a, 0x4a82, 0x6216, 0x022e,
- 0x1a5e, 0x4a3a, 0x6282, 0x7a16, 0x026a,
- 0x325e, 0x623a, 0x7a82, 0x1a16, 0x8206,
- 0x4a60, 0x7a3c, 0x1a84, 0x3218, 0x8258,
- 0x6260, 0x1a3c, 0x3284, 0x4a18, 0x8638,
- 0x7a60, 0x323c, 0x4a84, 0x6218, 0x0230,
- 0x1a60, 0x4a3c, 0x6284, 0x7a18, 0x026c,
- 0x3260, 0x623c, 0x7a84, 0x1a18, 0x8208,
- 0x4a62, 0x7a3e, 0x1a86, 0x321a, 0x825a,
- 0x6262, 0x1a3e, 0x3286, 0x4a1a, 0x863c,
- 0x7a62, 0x323e, 0x4a86, 0x621a, 0x0232,
- 0x1a62, 0x4a3e, 0x6286, 0x7a1a, 0x026e,
- 0x3262, 0x623e, 0x7a86, 0x1a1a, 0x820a,
- 0x4a64, 0x7a40, 0x1a88, 0x321c, 0x825c,
- 0x6264, 0x1a40, 0x3288, 0x4a1c, 0x8640,
- 0x7a64, 0x3240, 0x4a88, 0x621c, 0x0234,
- 0x1a64, 0x4a40, 0x6288, 0x7a1c, 0x0270,
- 0x3264, 0x6240, 0x7a88, 0x1a1c, 0x820c,
- 0x4a66, 0x7a42, 0x1a8a, 0x321e, 0x825e,
- 0x6266, 0x1a42, 0x328a, 0x4a1e, 0x8644,
- 0x7a66, 0x3242, 0x4a8a, 0x621e, 0x0236,
- 0x1a66, 0x4a42, 0x628a, 0x7a1e, 0x0272,
- 0x3266, 0x6242, 0x7a8a, 0x1a1e, 0x820e,
- 0x4a68, 0x7a44, 0x1a8c, 0x3220, 0x8260,
- 0x6268, 0x1a44, 0x328c, 0x4a20, 0x8648,
- 0x7a68, 0x3244, 0x4a8c, 0x6220, 0x0238,
- 0x1a68, 0x4a44, 0x628c, 0x7a20, 0x0274,
- 0x3268, 0x6244, 0x7a8c, 0x1a20, 0x8210,
- 0x4a6a, 0x7a46, 0x1a8e, 0x3222, 0x8262,
- 0x626a, 0x1a46, 0x328e, 0x4a22, 0x864c,
- 0x7a6a, 0x3246, 0x4a8e, 0x6222, 0x023a,
- 0x1a6a, 0x4a46, 0x628e, 0x7a22, 0x0276,
- 0x326a, 0x6246, 0x7a8e, 0x1a22, 0x8212,
- 0x4e5a, 0x7e36, 0x1e7e, 0x3612, 0x8066,
- 0x665a, 0x1e36, 0x367e, 0x4e12, 0x867c,
- 0x7e5a, 0x3636, 0x4e7e, 0x6612, 0x062a,
- 0x1e5a, 0x4e36, 0x667e, 0x7e12, 0x0666,
- 0x365a, 0x6636, 0x7e7e, 0x1e12, 0x8016,
- 0x4e5c, 0x7e38, 0x1e80, 0x3614, 0x8068,
- 0x665c, 0x1e38, 0x3680, 0x4e14, 0x8680,
- 0x7e5c, 0x3638, 0x4e80, 0x6614, 0x062c,
- 0x1e5c, 0x4e38, 0x6680, 0x7e14, 0x0668,
- 0x365c, 0x6638, 0x7e80, 0x1e14, 0x8018,
- 0x4e5e, 0x7e3a, 0x1e82, 0x3616, 0x806a,
- 0x665e, 0x1e3a, 0x3682, 0x4e16, 0x8684,
- 0x7e5e, 0x363a, 0x4e82, 0x6616, 0x062e,
- 0x1e5e, 0x4e3a, 0x6682, 0x7e16, 0x066a,
- 0x365e, 0x663a, 0x7e82, 0x1e16, 0x801a,
- 0x4e60, 0x7e3c, 0x1e84, 0x3618, 0x806c,
- 0x6660, 0x1e3c, 0x3684, 0x4e18, 0x8688,
- 0x7e60, 0x363c, 0x4e84, 0x6618, 0x0630,
- 0x1e60, 0x4e3c, 0x6684, 0x7e18, 0x066c,
- 0x3660, 0x663c, 0x7e84, 0x1e18, 0x801c,
- 0x4e62, 0x7e3e, 0x1e86, 0x361a, 0x806e,
- 0x6662, 0x1e3e, 0x3686, 0x4e1a, 0x868c,
- 0x7e62, 0x363e, 0x4e86, 0x661a, 0x0632,
- 0x1e62, 0x4e3e, 0x6686, 0x7e1a, 0x066e,
- 0x3662, 0x663e, 0x7e86, 0x1e1a, 0x801e,
- 0x4e64, 0x7e40, 0x1e88, 0x361c, 0x8070,
- 0x6664, 0x1e40, 0x3688, 0x4e1c, 0x8690,
- 0x7e64, 0x3640, 0x4e88, 0x661c, 0x0634,
- 0x1e64, 0x4e40, 0x6688, 0x7e1c, 0x0670,
- 0x3664, 0x6640, 0x7e88, 0x1e1c, 0x8020,
- 0x4e66, 0x7e42, 0x1e8a, 0x361e, 0x8072,
- 0x6666, 0x1e42, 0x368a, 0x4e1e, 0x8694,
- 0x7e66, 0x3642, 0x4e8a, 0x661e, 0x0636,
- 0x1e66, 0x4e42, 0x668a, 0x7e1e, 0x0672,
- 0x3666, 0x6642, 0x7e8a, 0x1e1e, 0x8022,
- 0x4e68, 0x7e44, 0x1e8c, 0x3620, 0x8074,
- 0x6668, 0x1e44, 0x368c, 0x4e20, 0x8698,
- 0x7e68, 0x3644, 0x4e8c, 0x6620, 0x0638,
- 0x1e68, 0x4e44, 0x668c, 0x7e20, 0x0674,
- 0x3668, 0x6644, 0x7e8c, 0x1e20, 0x8024,
- 0x4e6a, 0x7e46, 0x1e8e, 0x3622, 0x8076,
- 0x666a, 0x1e46, 0x368e, 0x4e22, 0x869c,
- 0x7e6a, 0x3646, 0x4e8e, 0x6622, 0x063a,
- 0x1e6a, 0x4e46, 0x668e, 0x7e22, 0x0676,
- 0x366a, 0x6646, 0x7e8e, 0x1e22, 0x8026,
-};
-
-static const uint16_t dv_place_1080i50[4*12*27*5] = {
- 0x1a48, 0x4a24, 0x626c, 0x0200, 0x3290,
- 0x2648, 0x5624, 0x6e6c, 0x0e00, 0x3e90,
- 0x3248, 0x6224, 0x7a6c, 0x1a00, 0x4a90,
- 0x3e48, 0x6e24, 0x026c, 0x2600, 0x5690,
- 0x4a48, 0x7a24, 0x0e6c, 0x3200, 0x6290,
- 0x5648, 0x0224, 0x1a6c, 0x3e00, 0x6e90,
- 0x6248, 0x0e24, 0x266c, 0x4a00, 0x7a90,
- 0x6e48, 0x1a24, 0x326c, 0x5600, 0x0290,
- 0x7a48, 0x2624, 0x3e6c, 0x6200, 0x0e90,
- 0x0248, 0x3224, 0x4a6c, 0x6e00, 0x1a90,
- 0x0e48, 0x3e24, 0x566c, 0x7a00, 0x2690,
- 0x1a4a, 0x4a26, 0x626e, 0x0202, 0x3292,
- 0x264a, 0x5626, 0x6e6e, 0x0e02, 0x3e92,
- 0x324a, 0x6226, 0x7a6e, 0x1a02, 0x4a92,
- 0x3e4a, 0x6e26, 0x026e, 0x2602, 0x5692,
- 0x4a4a, 0x7a26, 0x0e6e, 0x3202, 0x6292,
- 0x564a, 0x0226, 0x1a6e, 0x3e02, 0x6e92,
- 0x624a, 0x0e26, 0x266e, 0x4a02, 0x7a92,
- 0x6e4a, 0x1a26, 0x326e, 0x5602, 0x0292,
- 0x7a4a, 0x2626, 0x3e6e, 0x6202, 0x0e92,
- 0x024a, 0x3226, 0x4a6e, 0x6e02, 0x1a92,
- 0x0e4a, 0x3e26, 0x566e, 0x7a02, 0x2692,
- 0x1a4c, 0x4a28, 0x6270, 0x0204, 0x3294,
- 0x264c, 0x5628, 0x6e70, 0x0e04, 0x3e94,
- 0x324c, 0x6228, 0x7a70, 0x1a04, 0x4a94,
- 0x3e4c, 0x6e28, 0x0270, 0x2604, 0x5694,
- 0x4a4c, 0x7a28, 0x0e70, 0x3204, 0x6294,
- 0x564c, 0x0228, 0x1a70, 0x3e04, 0x6e94,
- 0x624c, 0x0e28, 0x2670, 0x4a04, 0x7a94,
- 0x6e4c, 0x1a28, 0x3270, 0x5604, 0x0294,
- 0x7a4c, 0x2628, 0x3e70, 0x6204, 0x0e94,
- 0x024c, 0x3228, 0x4a70, 0x6e04, 0x1a94,
- 0x0e4c, 0x3e28, 0x5670, 0x7a04, 0x2694,
- 0x1a4e, 0x4a2a, 0x6272, 0x0206, 0x3296,
- 0x264e, 0x562a, 0x6e72, 0x0e06, 0x3e96,
- 0x324e, 0x622a, 0x7a72, 0x1a06, 0x4a96,
- 0x3e4e, 0x6e2a, 0x0272, 0x2606, 0x5696,
- 0x4a4e, 0x7a2a, 0x0e72, 0x3206, 0x6296,
- 0x564e, 0x022a, 0x1a72, 0x3e06, 0x6e96,
- 0x624e, 0x0e2a, 0x2672, 0x4a06, 0x7a96,
- 0x6e4e, 0x1a2a, 0x3272, 0x5606, 0x0296,
- 0x7a4e, 0x262a, 0x3e72, 0x6206, 0x0e96,
- 0x024e, 0x322a, 0x4a72, 0x6e06, 0x1a96,
- 0x0e4e, 0x3e2a, 0x5672, 0x7a06, 0x2696,
- 0x1a50, 0x4a2c, 0x6274, 0x0208, 0x3298,
- 0x2650, 0x562c, 0x6e74, 0x0e08, 0x3e98,
- 0x3250, 0x622c, 0x7a74, 0x1a08, 0x4a98,
- 0x3e50, 0x6e2c, 0x0274, 0x2608, 0x5698,
- 0x4a50, 0x7a2c, 0x0e74, 0x3208, 0x6298,
- 0x5650, 0x022c, 0x1a74, 0x3e08, 0x6e98,
- 0x6250, 0x0e2c, 0x2674, 0x4a08, 0x7a98,
- 0x6e50, 0x1a2c, 0x3274, 0x5608, 0x0298,
- 0x7a50, 0x262c, 0x3e74, 0x6208, 0x0e98,
- 0x0250, 0x322c, 0x4a74, 0x6e08, 0x1a98,
- 0x0e50, 0x3e2c, 0x5674, 0x7a08, 0x2698,
- 0x1a52, 0x4a2e, 0x6276, 0x020a, 0x329a,
- 0x2652, 0x562e, 0x6e76, 0x0e0a, 0x3e9a,
- 0x3252, 0x622e, 0x7a76, 0x1a0a, 0x4a9a,
- 0x3e52, 0x6e2e, 0x0276, 0x260a, 0x569a,
- 0x4a52, 0x7a2e, 0x0e76, 0x320a, 0x629a,
- 0x5652, 0x022e, 0x1a76, 0x3e0a, 0x6e9a,
- 0x6252, 0x0e2e, 0x2676, 0x4a0a, 0x7a9a,
- 0x6e52, 0x1a2e, 0x3276, 0x560a, 0x029a,
- 0x7a52, 0x262e, 0x3e76, 0x620a, 0x0e9a,
- 0x0252, 0x322e, 0x4a76, 0x6e0a, 0x1a9a,
- 0x0e52, 0x3e2e, 0x5676, 0x7a0a, 0x269a,
- 0x1a54, 0x4a30, 0x6278, 0x020c, 0x329c,
- 0x2654, 0x5630, 0x6e78, 0x0e0c, 0x3e9c,
- 0x3254, 0x6230, 0x7a78, 0x1a0c, 0x4a9c,
- 0x3e54, 0x6e30, 0x0278, 0x260c, 0x569c,
- 0x4a54, 0x7a30, 0x0e78, 0x320c, 0x629c,
- 0x5654, 0x0230, 0x1a78, 0x3e0c, 0x6e9c,
- 0x6254, 0x0e30, 0x2678, 0x4a0c, 0x7a9c,
- 0x6e54, 0x1a30, 0x3278, 0x560c, 0x029c,
- 0x7a54, 0x2630, 0x3e78, 0x620c, 0x0e9c,
- 0x0254, 0x3230, 0x4a78, 0x6e0c, 0x1a9c,
- 0x0e54, 0x3e30, 0x5678, 0x7a0c, 0x269c,
- 0x1a56, 0x4a32, 0x627a, 0x020e, 0x329e,
- 0x2656, 0x5632, 0x6e7a, 0x0e0e, 0x3e9e,
- 0x3256, 0x6232, 0x7a7a, 0x1a0e, 0x4a9e,
- 0x3e56, 0x6e32, 0x027a, 0x260e, 0x569e,
- 0x4a56, 0x7a32, 0x0e7a, 0x320e, 0x629e,
- 0x5656, 0x0232, 0x1a7a, 0x3e0e, 0x6e9e,
- 0x6256, 0x0e32, 0x267a, 0x4a0e, 0x7a9e,
- 0x6e56, 0x1a32, 0x327a, 0x560e, 0x029e,
- 0x7a56, 0x2632, 0x3e7a, 0x620e, 0x0e9e,
- 0x0256, 0x3232, 0x4a7a, 0x6e0e, 0x1a9e,
- 0x0e56, 0x3e32, 0x567a, 0x7a0e, 0x269e,
- 0x1a58, 0x4a34, 0x627c, 0x0210, 0x32a0,
- 0x2658, 0x5634, 0x6e7c, 0x0e10, 0x3ea0,
- 0x3258, 0x6234, 0x7a7c, 0x1a10, 0x4aa0,
- 0x3e58, 0x6e34, 0x027c, 0x2610, 0x56a0,
- 0x4a58, 0x7a34, 0x0e7c, 0x3210, 0x62a0,
- 0x5658, 0x0234, 0x1a7c, 0x3e10, 0x6ea0,
- 0x6258, 0x0e34, 0x267c, 0x4a10, 0x7aa0,
- 0x6e58, 0x1a34, 0x327c, 0x5610, 0x02a0,
- 0x7a58, 0x2634, 0x3e7c, 0x6210, 0x0ea0,
- 0x0258, 0x3234, 0x4a7c, 0x6e10, 0x1aa0,
- 0x0e58, 0x3e34, 0x567c, 0x7a10, 0x26a0,
- 0x1e48, 0x4e24, 0x666c, 0x0600, 0x3690,
- 0x2a48, 0x5a24, 0x726c, 0x1200, 0x4290,
- 0x3648, 0x6624, 0x7e6c, 0x1e00, 0x4e90,
- 0x4248, 0x7224, 0x066c, 0x2a00, 0x5a90,
- 0x4e48, 0x7e24, 0x126c, 0x3600, 0x6690,
- 0x5a48, 0x0624, 0x1e6c, 0x4200, 0x7290,
- 0x6648, 0x1224, 0x2a6c, 0x4e00, 0x7e90,
- 0x7248, 0x1e24, 0x366c, 0x5a00, 0x0690,
- 0x7e48, 0x2a24, 0x426c, 0x6600, 0x1290,
- 0x0648, 0x3624, 0x4e6c, 0x7200, 0x1e90,
- 0x1248, 0x4224, 0x5a6c, 0x7e00, 0x2a90,
- 0x1e4a, 0x4e26, 0x666e, 0x0602, 0x3692,
- 0x2a4a, 0x5a26, 0x726e, 0x1202, 0x4292,
- 0x364a, 0x6626, 0x7e6e, 0x1e02, 0x4e92,
- 0x424a, 0x7226, 0x066e, 0x2a02, 0x5a92,
- 0x4e4a, 0x7e26, 0x126e, 0x3602, 0x6692,
- 0x5a4a, 0x0626, 0x1e6e, 0x4202, 0x7292,
- 0x664a, 0x1226, 0x2a6e, 0x4e02, 0x7e92,
- 0x724a, 0x1e26, 0x366e, 0x5a02, 0x0692,
- 0x7e4a, 0x2a26, 0x426e, 0x6602, 0x1292,
- 0x064a, 0x3626, 0x4e6e, 0x7202, 0x1e92,
- 0x124a, 0x4226, 0x5a6e, 0x7e02, 0x2a92,
- 0x1e4c, 0x4e28, 0x6670, 0x0604, 0x3694,
- 0x2a4c, 0x5a28, 0x7270, 0x1204, 0x4294,
- 0x364c, 0x6628, 0x7e70, 0x1e04, 0x4e94,
- 0x424c, 0x7228, 0x0670, 0x2a04, 0x5a94,
- 0x4e4c, 0x7e28, 0x1270, 0x3604, 0x6694,
- 0x5a4c, 0x0628, 0x1e70, 0x4204, 0x7294,
- 0x664c, 0x1228, 0x2a70, 0x4e04, 0x7e94,
- 0x724c, 0x1e28, 0x3670, 0x5a04, 0x0694,
- 0x7e4c, 0x2a28, 0x4270, 0x6604, 0x1294,
- 0x064c, 0x3628, 0x4e70, 0x7204, 0x1e94,
- 0x124c, 0x4228, 0x5a70, 0x7e04, 0x2a94,
- 0x1e4e, 0x4e2a, 0x6672, 0x0606, 0x3696,
- 0x2a4e, 0x5a2a, 0x7272, 0x1206, 0x4296,
- 0x364e, 0x662a, 0x7e72, 0x1e06, 0x4e96,
- 0x424e, 0x722a, 0x0672, 0x2a06, 0x5a96,
- 0x4e4e, 0x7e2a, 0x1272, 0x3606, 0x6696,
- 0x5a4e, 0x062a, 0x1e72, 0x4206, 0x7296,
- 0x664e, 0x122a, 0x2a72, 0x4e06, 0x7e96,
- 0x724e, 0x1e2a, 0x3672, 0x5a06, 0x0696,
- 0x7e4e, 0x2a2a, 0x4272, 0x6606, 0x1296,
- 0x064e, 0x362a, 0x4e72, 0x7206, 0x1e96,
- 0x124e, 0x422a, 0x5a72, 0x7e06, 0x2a96,
- 0x1e50, 0x4e2c, 0x6674, 0x0608, 0x3698,
- 0x2a50, 0x5a2c, 0x7274, 0x1208, 0x4298,
- 0x3650, 0x662c, 0x7e74, 0x1e08, 0x4e98,
- 0x4250, 0x722c, 0x0674, 0x2a08, 0x5a98,
- 0x4e50, 0x7e2c, 0x1274, 0x3608, 0x6698,
- 0x5a50, 0x062c, 0x1e74, 0x4208, 0x7298,
- 0x6650, 0x122c, 0x2a74, 0x4e08, 0x7e98,
- 0x7250, 0x1e2c, 0x3674, 0x5a08, 0x0698,
- 0x7e50, 0x2a2c, 0x4274, 0x6608, 0x1298,
- 0x0650, 0x362c, 0x4e74, 0x7208, 0x1e98,
- 0x1250, 0x422c, 0x5a74, 0x7e08, 0x2a98,
- 0x1e52, 0x4e2e, 0x6676, 0x060a, 0x369a,
- 0x2a52, 0x5a2e, 0x7276, 0x120a, 0x429a,
- 0x3652, 0x662e, 0x7e76, 0x1e0a, 0x4e9a,
- 0x4252, 0x722e, 0x0676, 0x2a0a, 0x5a9a,
- 0x4e52, 0x7e2e, 0x1276, 0x360a, 0x669a,
- 0x5a52, 0x062e, 0x1e76, 0x420a, 0x729a,
- 0x6652, 0x122e, 0x2a76, 0x4e0a, 0x7e9a,
- 0x7252, 0x1e2e, 0x3676, 0x5a0a, 0x069a,
- 0x7e52, 0x2a2e, 0x4276, 0x660a, 0x129a,
- 0x0652, 0x362e, 0x4e76, 0x720a, 0x1e9a,
- 0x1252, 0x422e, 0x5a76, 0x7e0a, 0x2a9a,
- 0x1e54, 0x4e30, 0x6678, 0x060c, 0x369c,
- 0x2a54, 0x5a30, 0x7278, 0x120c, 0x429c,
- 0x3654, 0x6630, 0x7e78, 0x1e0c, 0x4e9c,
- 0x4254, 0x7230, 0x0678, 0x2a0c, 0x5a9c,
- 0x4e54, 0x7e30, 0x1278, 0x360c, 0x669c,
- 0x5a54, 0x0630, 0x1e78, 0x420c, 0x729c,
- 0x6654, 0x1230, 0x2a78, 0x4e0c, 0x7e9c,
- 0x7254, 0x1e30, 0x3678, 0x5a0c, 0x069c,
- 0x7e54, 0x2a30, 0x4278, 0x660c, 0x129c,
- 0x0654, 0x3630, 0x4e78, 0x720c, 0x1e9c,
- 0x1254, 0x4230, 0x5a78, 0x7e0c, 0x2a9c,
- 0x1e56, 0x4e32, 0x667a, 0x060e, 0x369e,
- 0x2a56, 0x5a32, 0x727a, 0x120e, 0x429e,
- 0x3656, 0x6632, 0x7e7a, 0x1e0e, 0x4e9e,
- 0x4256, 0x7232, 0x067a, 0x2a0e, 0x5a9e,
- 0x4e56, 0x7e32, 0x127a, 0x360e, 0x669e,
- 0x5a56, 0x0632, 0x1e7a, 0x420e, 0x729e,
- 0x6656, 0x1232, 0x2a7a, 0x4e0e, 0x7e9e,
- 0x7256, 0x1e32, 0x367a, 0x5a0e, 0x069e,
- 0x7e56, 0x2a32, 0x427a, 0x660e, 0x129e,
- 0x0656, 0x3632, 0x4e7a, 0x720e, 0x1e9e,
- 0x1256, 0x4232, 0x5a7a, 0x7e0e, 0x2a9e,
- 0x1e58, 0x4e34, 0x667c, 0x0610, 0x36a0,
- 0x2a58, 0x5a34, 0x727c, 0x1210, 0x42a0,
- 0x3658, 0x6634, 0x7e7c, 0x1e10, 0x4ea0,
- 0x4258, 0x7234, 0x067c, 0x2a10, 0x5aa0,
- 0x4e58, 0x7e34, 0x127c, 0x3610, 0x66a0,
- 0x5a58, 0x0634, 0x1e7c, 0x4210, 0x72a0,
- 0x6658, 0x1234, 0x2a7c, 0x4e10, 0x7ea0,
- 0x7258, 0x1e34, 0x367c, 0x5a10, 0x06a0,
- 0x7e58, 0x2a34, 0x427c, 0x6610, 0x12a0,
- 0x0658, 0x3634, 0x4e7c, 0x7210, 0x1ea0,
- 0x1258, 0x4234, 0x5a7c, 0x7e10, 0x2aa0,
- 0x2248, 0x5224, 0x6a6c, 0x0a00, 0x3a90,
- 0x2e48, 0x5e24, 0x766c, 0x1600, 0x4690,
- 0x3a48, 0x6a24, 0x826c, 0x2200, 0x5290,
- 0x4648, 0x7624, 0x0a6c, 0x2e00, 0x5e90,
- 0x5248, 0x8224, 0x166c, 0x3a00, 0x6a90,
- 0x5e48, 0x0a24, 0x226c, 0x4600, 0x7690,
- 0x6a48, 0x1624, 0x2e6c, 0x5200, 0x8290,
- 0x7648, 0x2224, 0x3a6c, 0x5e00, 0x0a90,
- 0x8248, 0x2e24, 0x466c, 0x6a00, 0x1690,
- 0x0a48, 0x3a24, 0x526c, 0x7600, 0x2290,
- 0x1648, 0x4624, 0x5e6c, 0x8200, 0x2e90,
- 0x224a, 0x5226, 0x6a6e, 0x0a02, 0x3a92,
- 0x2e4a, 0x5e26, 0x766e, 0x1602, 0x4692,
- 0x3a4a, 0x6a26, 0x826e, 0x2202, 0x5292,
- 0x464a, 0x7626, 0x0a6e, 0x2e02, 0x5e92,
- 0x524a, 0x8226, 0x166e, 0x3a02, 0x6a92,
- 0x5e4a, 0x0a26, 0x226e, 0x4602, 0x7692,
- 0x6a4a, 0x1626, 0x2e6e, 0x5202, 0x8292,
- 0x764a, 0x2226, 0x3a6e, 0x5e02, 0x0a92,
- 0x824a, 0x2e26, 0x466e, 0x6a02, 0x1692,
- 0x0a4a, 0x3a26, 0x526e, 0x7602, 0x2292,
- 0x164a, 0x4626, 0x5e6e, 0x8202, 0x2e92,
- 0x224c, 0x5228, 0x6a70, 0x0a04, 0x3a94,
- 0x2e4c, 0x5e28, 0x7670, 0x1604, 0x4694,
- 0x3a4c, 0x6a28, 0x8270, 0x2204, 0x5294,
- 0x464c, 0x7628, 0x0a70, 0x2e04, 0x5e94,
- 0x524c, 0x8228, 0x1670, 0x3a04, 0x6a94,
- 0x5e4c, 0x0a28, 0x2270, 0x4604, 0x7694,
- 0x6a4c, 0x1628, 0x2e70, 0x5204, 0x8294,
- 0x764c, 0x2228, 0x3a70, 0x5e04, 0x0a94,
- 0x824c, 0x2e28, 0x4670, 0x6a04, 0x1694,
- 0x0a4c, 0x3a28, 0x5270, 0x7604, 0x2294,
- 0x164c, 0x4628, 0x5e70, 0x8204, 0x2e94,
- 0x224e, 0x522a, 0x6a72, 0x0a06, 0x3a96,
- 0x2e4e, 0x5e2a, 0x7672, 0x1606, 0x4696,
- 0x3a4e, 0x6a2a, 0x8272, 0x2206, 0x5296,
- 0x464e, 0x762a, 0x0a72, 0x2e06, 0x5e96,
- 0x524e, 0x822a, 0x1672, 0x3a06, 0x6a96,
- 0x5e4e, 0x0a2a, 0x2272, 0x4606, 0x7696,
- 0x6a4e, 0x162a, 0x2e72, 0x5206, 0x8296,
- 0x764e, 0x222a, 0x3a72, 0x5e06, 0x0a96,
- 0x824e, 0x2e2a, 0x4672, 0x6a06, 0x1696,
- 0x0a4e, 0x3a2a, 0x5272, 0x7606, 0x2296,
- 0x164e, 0x462a, 0x5e72, 0x8206, 0x2e96,
- 0x2250, 0x522c, 0x6a74, 0x0a08, 0x3a98,
- 0x2e50, 0x5e2c, 0x7674, 0x1608, 0x4698,
- 0x3a50, 0x6a2c, 0x8274, 0x2208, 0x5298,
- 0x4650, 0x762c, 0x0a74, 0x2e08, 0x5e98,
- 0x5250, 0x822c, 0x1674, 0x3a08, 0x6a98,
- 0x5e50, 0x0a2c, 0x2274, 0x4608, 0x7698,
- 0x6a50, 0x162c, 0x2e74, 0x5208, 0x8298,
- 0x7650, 0x222c, 0x3a74, 0x5e08, 0x0a98,
- 0x8250, 0x2e2c, 0x4674, 0x6a08, 0x1698,
- 0x0a50, 0x3a2c, 0x5274, 0x7608, 0x2298,
- 0x1650, 0x462c, 0x5e74, 0x8208, 0x2e98,
- 0x2252, 0x522e, 0x6a76, 0x0a0a, 0x3a9a,
- 0x2e52, 0x5e2e, 0x7676, 0x160a, 0x469a,
- 0x3a52, 0x6a2e, 0x8276, 0x220a, 0x529a,
- 0x4652, 0x762e, 0x0a76, 0x2e0a, 0x5e9a,
- 0x5252, 0x822e, 0x1676, 0x3a0a, 0x6a9a,
- 0x5e52, 0x0a2e, 0x2276, 0x460a, 0x769a,
- 0x6a52, 0x162e, 0x2e76, 0x520a, 0x829a,
- 0x7652, 0x222e, 0x3a76, 0x5e0a, 0x0a9a,
- 0x8252, 0x2e2e, 0x4676, 0x6a0a, 0x169a,
- 0x0a52, 0x3a2e, 0x5276, 0x760a, 0x229a,
- 0x1652, 0x462e, 0x5e76, 0x820a, 0x2e9a,
- 0x2254, 0x5230, 0x6a78, 0x0a0c, 0x3a9c,
- 0x2e54, 0x5e30, 0x7678, 0x160c, 0x469c,
- 0x3a54, 0x6a30, 0x8278, 0x220c, 0x529c,
- 0x4654, 0x7630, 0x0a78, 0x2e0c, 0x5e9c,
- 0x5254, 0x8230, 0x1678, 0x3a0c, 0x6a9c,
- 0x5e54, 0x0a30, 0x2278, 0x460c, 0x769c,
- 0x6a54, 0x1630, 0x2e78, 0x520c, 0x829c,
- 0x7654, 0x2230, 0x3a78, 0x5e0c, 0x0a9c,
- 0x8254, 0x2e30, 0x4678, 0x6a0c, 0x169c,
- 0x0a54, 0x3a30, 0x5278, 0x760c, 0x229c,
- 0x1654, 0x4630, 0x5e78, 0x820c, 0x2e9c,
- 0x2256, 0x5232, 0x6a7a, 0x0a0e, 0x3a9e,
- 0x2e56, 0x5e32, 0x767a, 0x160e, 0x469e,
- 0x3a56, 0x6a32, 0x827a, 0x220e, 0x529e,
- 0x4656, 0x7632, 0x0a7a, 0x2e0e, 0x5e9e,
- 0x5256, 0x8232, 0x167a, 0x3a0e, 0x6a9e,
- 0x5e56, 0x0a32, 0x227a, 0x460e, 0x769e,
- 0x6a56, 0x1632, 0x2e7a, 0x520e, 0x829e,
- 0x7656, 0x2232, 0x3a7a, 0x5e0e, 0x0a9e,
- 0x8256, 0x2e32, 0x467a, 0x6a0e, 0x169e,
- 0x0a56, 0x3a32, 0x527a, 0x760e, 0x229e,
- 0x1656, 0x4632, 0x5e7a, 0x820e, 0x2e9e,
- 0x2258, 0x5234, 0x6a7c, 0x0a10, 0x3aa0,
- 0x2e58, 0x5e34, 0x767c, 0x1610, 0x46a0,
- 0x3a58, 0x6a34, 0x827c, 0x2210, 0x52a0,
- 0x4658, 0x7634, 0x0a7c, 0x2e10, 0x5ea0,
- 0x5258, 0x8234, 0x167c, 0x3a10, 0x6aa0,
- 0x5e58, 0x0a34, 0x227c, 0x4610, 0x76a0,
- 0x6a58, 0x1634, 0x2e7c, 0x5210, 0x82a0,
- 0x7658, 0x2234, 0x3a7c, 0x5e10, 0x0aa0,
- 0x8258, 0x2e34, 0x467c, 0x6a10, 0x16a0,
- 0x0a58, 0x3a34, 0x527c, 0x7610, 0x22a0,
- 0x1658, 0x4634, 0x5e7c, 0x8210, 0x2ea0,
- 0x0000, 0x0036, 0x006c, 0x00a2, 0x8648,
- 0x0002, 0x0038, 0x006e, 0x00a4, 0x864c,
- 0x0004, 0x003a, 0x0070, 0x00a6, 0x8650,
- 0x0006, 0x003c, 0x0072, 0x00a8, 0x8654,
- 0x0008, 0x003e, 0x0074, 0x00aa, 0x8658,
- 0x000a, 0x0040, 0x0076, 0x00ac, 0x865c,
- 0x000c, 0x0042, 0x0078, 0x00ae, 0x8660,
- 0x000e, 0x0044, 0x007a, 0x00b0, 0x8664,
- 0x0010, 0x0046, 0x007c, 0x00b2, 0x8668,
- 0x0012, 0x0048, 0x007e, 0x8600, 0x866c,
- 0x0014, 0x004a, 0x0080, 0x8604, 0x8670,
- 0x0016, 0x004c, 0x0082, 0x8608, 0x8674,
- 0x0018, 0x004e, 0x0084, 0x860c, 0x8678,
- 0x001a, 0x0050, 0x0086, 0x8610, 0x867c,
- 0x001c, 0x0052, 0x0088, 0x8614, 0x8680,
- 0x001e, 0x0054, 0x008a, 0x8618, 0x8684,
- 0x0020, 0x0056, 0x008c, 0x861c, 0x8688,
- 0x0022, 0x0058, 0x008e, 0x8620, 0x868c,
- 0x0024, 0x005a, 0x0090, 0x8624, 0x8690,
- 0x0026, 0x005c, 0x0092, 0x8628, 0x8694,
- 0x0028, 0x005e, 0x0094, 0x862c, 0x8698,
- 0x002a, 0x0060, 0x0096, 0x8630, 0x869c,
- 0x002c, 0x0062, 0x0098, 0x8634, 0x86a0,
- 0x002e, 0x0064, 0x009a, 0x8638, 0x86a4,
- 0x0030, 0x0066, 0x009c, 0x863c, 0x86a8,
- 0x0032, 0x0068, 0x009e, 0x8640, 0x86ac,
- 0x0034, 0x006a, 0x00a0, 0x8644, 0x86b0,
- 0x4a5a, 0x7a36, 0x0e7e, 0x3212, 0x62a2,
- 0x565a, 0x0236, 0x1a7e, 0x3e12, 0x6ea2,
- 0x625a, 0x0e36, 0x267e, 0x4a12, 0x7aa2,
- 0x6e5a, 0x1a36, 0x327e, 0x5612, 0x02a2,
- 0x7a5a, 0x2636, 0x3e7e, 0x6212, 0x0ea2,
- 0x025a, 0x3236, 0x4a7e, 0x6e12, 0x1aa2,
- 0x0e5a, 0x3e36, 0x567e, 0x7a12, 0x26a2,
- 0x1a5a, 0x4a36, 0x627e, 0x0212, 0x32a2,
- 0x265a, 0x5636, 0x6e7e, 0x0e12, 0x3ea2,
- 0x325a, 0x6236, 0x7a7e, 0x1a12, 0x4aa2,
- 0x3e5a, 0x6e36, 0x027e, 0x2612, 0x56a2,
- 0x4a5c, 0x7a38, 0x0e80, 0x3214, 0x62a4,
- 0x565c, 0x0238, 0x1a80, 0x3e14, 0x6ea4,
- 0x625c, 0x0e38, 0x2680, 0x4a14, 0x7aa4,
- 0x6e5c, 0x1a38, 0x3280, 0x5614, 0x02a4,
- 0x7a5c, 0x2638, 0x3e80, 0x6214, 0x0ea4,
- 0x025c, 0x3238, 0x4a80, 0x6e14, 0x1aa4,
- 0x0e5c, 0x3e38, 0x5680, 0x7a14, 0x26a4,
- 0x1a5c, 0x4a38, 0x6280, 0x0214, 0x32a4,
- 0x265c, 0x5638, 0x6e80, 0x0e14, 0x3ea4,
- 0x325c, 0x6238, 0x7a80, 0x1a14, 0x4aa4,
- 0x3e5c, 0x6e38, 0x0280, 0x2614, 0x56a4,
- 0x4a5e, 0x7a3a, 0x0e82, 0x3216, 0x62a6,
- 0x565e, 0x023a, 0x1a82, 0x3e16, 0x6ea6,
- 0x625e, 0x0e3a, 0x2682, 0x4a16, 0x7aa6,
- 0x6e5e, 0x1a3a, 0x3282, 0x5616, 0x02a6,
- 0x7a5e, 0x263a, 0x3e82, 0x6216, 0x0ea6,
- 0x025e, 0x323a, 0x4a82, 0x6e16, 0x1aa6,
- 0x0e5e, 0x3e3a, 0x5682, 0x7a16, 0x26a6,
- 0x1a5e, 0x4a3a, 0x6282, 0x0216, 0x32a6,
- 0x265e, 0x563a, 0x6e82, 0x0e16, 0x3ea6,
- 0x325e, 0x623a, 0x7a82, 0x1a16, 0x4aa6,
- 0x3e5e, 0x6e3a, 0x0282, 0x2616, 0x56a6,
- 0x4a60, 0x7a3c, 0x0e84, 0x3218, 0x62a8,
- 0x5660, 0x023c, 0x1a84, 0x3e18, 0x6ea8,
- 0x6260, 0x0e3c, 0x2684, 0x4a18, 0x7aa8,
- 0x6e60, 0x1a3c, 0x3284, 0x5618, 0x02a8,
- 0x7a60, 0x263c, 0x3e84, 0x6218, 0x0ea8,
- 0x0260, 0x323c, 0x4a84, 0x6e18, 0x1aa8,
- 0x0e60, 0x3e3c, 0x5684, 0x7a18, 0x26a8,
- 0x1a60, 0x4a3c, 0x6284, 0x0218, 0x32a8,
- 0x2660, 0x563c, 0x6e84, 0x0e18, 0x3ea8,
- 0x3260, 0x623c, 0x7a84, 0x1a18, 0x4aa8,
- 0x3e60, 0x6e3c, 0x0284, 0x2618, 0x56a8,
- 0x4a62, 0x7a3e, 0x0e86, 0x321a, 0x62aa,
- 0x5662, 0x023e, 0x1a86, 0x3e1a, 0x6eaa,
- 0x6262, 0x0e3e, 0x2686, 0x4a1a, 0x7aaa,
- 0x6e62, 0x1a3e, 0x3286, 0x561a, 0x02aa,
- 0x7a62, 0x263e, 0x3e86, 0x621a, 0x0eaa,
- 0x0262, 0x323e, 0x4a86, 0x6e1a, 0x1aaa,
- 0x0e62, 0x3e3e, 0x5686, 0x7a1a, 0x26aa,
- 0x1a62, 0x4a3e, 0x6286, 0x021a, 0x32aa,
- 0x2662, 0x563e, 0x6e86, 0x0e1a, 0x3eaa,
- 0x3262, 0x623e, 0x7a86, 0x1a1a, 0x4aaa,
- 0x3e62, 0x6e3e, 0x0286, 0x261a, 0x56aa,
- 0x4a64, 0x7a40, 0x0e88, 0x321c, 0x62ac,
- 0x5664, 0x0240, 0x1a88, 0x3e1c, 0x6eac,
- 0x6264, 0x0e40, 0x2688, 0x4a1c, 0x7aac,
- 0x6e64, 0x1a40, 0x3288, 0x561c, 0x02ac,
- 0x7a64, 0x2640, 0x3e88, 0x621c, 0x0eac,
- 0x0264, 0x3240, 0x4a88, 0x6e1c, 0x1aac,
- 0x0e64, 0x3e40, 0x5688, 0x7a1c, 0x26ac,
- 0x1a64, 0x4a40, 0x6288, 0x021c, 0x32ac,
- 0x2664, 0x5640, 0x6e88, 0x0e1c, 0x3eac,
- 0x3264, 0x6240, 0x7a88, 0x1a1c, 0x4aac,
- 0x3e64, 0x6e40, 0x0288, 0x261c, 0x56ac,
- 0x4a66, 0x7a42, 0x0e8a, 0x321e, 0x62ae,
- 0x5666, 0x0242, 0x1a8a, 0x3e1e, 0x6eae,
- 0x6266, 0x0e42, 0x268a, 0x4a1e, 0x7aae,
- 0x6e66, 0x1a42, 0x328a, 0x561e, 0x02ae,
- 0x7a66, 0x2642, 0x3e8a, 0x621e, 0x0eae,
- 0x0266, 0x3242, 0x4a8a, 0x6e1e, 0x1aae,
- 0x0e66, 0x3e42, 0x568a, 0x7a1e, 0x26ae,
- 0x1a66, 0x4a42, 0x628a, 0x021e, 0x32ae,
- 0x2666, 0x5642, 0x6e8a, 0x0e1e, 0x3eae,
- 0x3266, 0x6242, 0x7a8a, 0x1a1e, 0x4aae,
- 0x3e66, 0x6e42, 0x028a, 0x261e, 0x56ae,
- 0x4a68, 0x7a44, 0x0e8c, 0x3220, 0x62b0,
- 0x5668, 0x0244, 0x1a8c, 0x3e20, 0x6eb0,
- 0x6268, 0x0e44, 0x268c, 0x4a20, 0x7ab0,
- 0x6e68, 0x1a44, 0x328c, 0x5620, 0x02b0,
- 0x7a68, 0x2644, 0x3e8c, 0x6220, 0x0eb0,
- 0x0268, 0x3244, 0x4a8c, 0x6e20, 0x1ab0,
- 0x0e68, 0x3e44, 0x568c, 0x7a20, 0x26b0,
- 0x1a68, 0x4a44, 0x628c, 0x0220, 0x32b0,
- 0x2668, 0x5644, 0x6e8c, 0x0e20, 0x3eb0,
- 0x3268, 0x6244, 0x7a8c, 0x1a20, 0x4ab0,
- 0x3e68, 0x6e44, 0x028c, 0x2620, 0x56b0,
- 0x4a6a, 0x7a46, 0x0e8e, 0x3222, 0x62b2,
- 0x566a, 0x0246, 0x1a8e, 0x3e22, 0x6eb2,
- 0x626a, 0x0e46, 0x268e, 0x4a22, 0x7ab2,
- 0x6e6a, 0x1a46, 0x328e, 0x5622, 0x02b2,
- 0x7a6a, 0x2646, 0x3e8e, 0x6222, 0x0eb2,
- 0x026a, 0x3246, 0x4a8e, 0x6e22, 0x1ab2,
- 0x0e6a, 0x3e46, 0x568e, 0x7a22, 0x26b2,
- 0x1a6a, 0x4a46, 0x628e, 0x0222, 0x32b2,
- 0x266a, 0x5646, 0x6e8e, 0x0e22, 0x3eb2,
- 0x326a, 0x6246, 0x7a8e, 0x1a22, 0x4ab2,
- 0x3e6a, 0x6e46, 0x028e, 0x2622, 0x56b2,
- 0x4e5a, 0x7e36, 0x127e, 0x3612, 0x66a2,
- 0x5a5a, 0x0636, 0x1e7e, 0x4212, 0x72a2,
- 0x665a, 0x1236, 0x2a7e, 0x4e12, 0x7ea2,
- 0x725a, 0x1e36, 0x367e, 0x5a12, 0x06a2,
- 0x7e5a, 0x2a36, 0x427e, 0x6612, 0x12a2,
- 0x065a, 0x3636, 0x4e7e, 0x7212, 0x1ea2,
- 0x125a, 0x4236, 0x5a7e, 0x7e12, 0x2aa2,
- 0x1e5a, 0x4e36, 0x667e, 0x0612, 0x36a2,
- 0x2a5a, 0x5a36, 0x727e, 0x1212, 0x42a2,
- 0x365a, 0x6636, 0x7e7e, 0x1e12, 0x4ea2,
- 0x425a, 0x7236, 0x067e, 0x2a12, 0x5aa2,
- 0x4e5c, 0x7e38, 0x1280, 0x3614, 0x66a4,
- 0x5a5c, 0x0638, 0x1e80, 0x4214, 0x72a4,
- 0x665c, 0x1238, 0x2a80, 0x4e14, 0x7ea4,
- 0x725c, 0x1e38, 0x3680, 0x5a14, 0x06a4,
- 0x7e5c, 0x2a38, 0x4280, 0x6614, 0x12a4,
- 0x065c, 0x3638, 0x4e80, 0x7214, 0x1ea4,
- 0x125c, 0x4238, 0x5a80, 0x7e14, 0x2aa4,
- 0x1e5c, 0x4e38, 0x6680, 0x0614, 0x36a4,
- 0x2a5c, 0x5a38, 0x7280, 0x1214, 0x42a4,
- 0x365c, 0x6638, 0x7e80, 0x1e14, 0x4ea4,
- 0x425c, 0x7238, 0x0680, 0x2a14, 0x5aa4,
- 0x4e5e, 0x7e3a, 0x1282, 0x3616, 0x66a6,
- 0x5a5e, 0x063a, 0x1e82, 0x4216, 0x72a6,
- 0x665e, 0x123a, 0x2a82, 0x4e16, 0x7ea6,
- 0x725e, 0x1e3a, 0x3682, 0x5a16, 0x06a6,
- 0x7e5e, 0x2a3a, 0x4282, 0x6616, 0x12a6,
- 0x065e, 0x363a, 0x4e82, 0x7216, 0x1ea6,
- 0x125e, 0x423a, 0x5a82, 0x7e16, 0x2aa6,
- 0x1e5e, 0x4e3a, 0x6682, 0x0616, 0x36a6,
- 0x2a5e, 0x5a3a, 0x7282, 0x1216, 0x42a6,
- 0x365e, 0x663a, 0x7e82, 0x1e16, 0x4ea6,
- 0x425e, 0x723a, 0x0682, 0x2a16, 0x5aa6,
- 0x4e60, 0x7e3c, 0x1284, 0x3618, 0x66a8,
- 0x5a60, 0x063c, 0x1e84, 0x4218, 0x72a8,
- 0x6660, 0x123c, 0x2a84, 0x4e18, 0x7ea8,
- 0x7260, 0x1e3c, 0x3684, 0x5a18, 0x06a8,
- 0x7e60, 0x2a3c, 0x4284, 0x6618, 0x12a8,
- 0x0660, 0x363c, 0x4e84, 0x7218, 0x1ea8,
- 0x1260, 0x423c, 0x5a84, 0x7e18, 0x2aa8,
- 0x1e60, 0x4e3c, 0x6684, 0x0618, 0x36a8,
- 0x2a60, 0x5a3c, 0x7284, 0x1218, 0x42a8,
- 0x3660, 0x663c, 0x7e84, 0x1e18, 0x4ea8,
- 0x4260, 0x723c, 0x0684, 0x2a18, 0x5aa8,
- 0x4e62, 0x7e3e, 0x1286, 0x361a, 0x66aa,
- 0x5a62, 0x063e, 0x1e86, 0x421a, 0x72aa,
- 0x6662, 0x123e, 0x2a86, 0x4e1a, 0x7eaa,
- 0x7262, 0x1e3e, 0x3686, 0x5a1a, 0x06aa,
- 0x7e62, 0x2a3e, 0x4286, 0x661a, 0x12aa,
- 0x0662, 0x363e, 0x4e86, 0x721a, 0x1eaa,
- 0x1262, 0x423e, 0x5a86, 0x7e1a, 0x2aaa,
- 0x1e62, 0x4e3e, 0x6686, 0x061a, 0x36aa,
- 0x2a62, 0x5a3e, 0x7286, 0x121a, 0x42aa,
- 0x3662, 0x663e, 0x7e86, 0x1e1a, 0x4eaa,
- 0x4262, 0x723e, 0x0686, 0x2a1a, 0x5aaa,
- 0x4e64, 0x7e40, 0x1288, 0x361c, 0x66ac,
- 0x5a64, 0x0640, 0x1e88, 0x421c, 0x72ac,
- 0x6664, 0x1240, 0x2a88, 0x4e1c, 0x7eac,
- 0x7264, 0x1e40, 0x3688, 0x5a1c, 0x06ac,
- 0x7e64, 0x2a40, 0x4288, 0x661c, 0x12ac,
- 0x0664, 0x3640, 0x4e88, 0x721c, 0x1eac,
- 0x1264, 0x4240, 0x5a88, 0x7e1c, 0x2aac,
- 0x1e64, 0x4e40, 0x6688, 0x061c, 0x36ac,
- 0x2a64, 0x5a40, 0x7288, 0x121c, 0x42ac,
- 0x3664, 0x6640, 0x7e88, 0x1e1c, 0x4eac,
- 0x4264, 0x7240, 0x0688, 0x2a1c, 0x5aac,
- 0x4e66, 0x7e42, 0x128a, 0x361e, 0x66ae,
- 0x5a66, 0x0642, 0x1e8a, 0x421e, 0x72ae,
- 0x6666, 0x1242, 0x2a8a, 0x4e1e, 0x7eae,
- 0x7266, 0x1e42, 0x368a, 0x5a1e, 0x06ae,
- 0x7e66, 0x2a42, 0x428a, 0x661e, 0x12ae,
- 0x0666, 0x3642, 0x4e8a, 0x721e, 0x1eae,
- 0x1266, 0x4242, 0x5a8a, 0x7e1e, 0x2aae,
- 0x1e66, 0x4e42, 0x668a, 0x061e, 0x36ae,
- 0x2a66, 0x5a42, 0x728a, 0x121e, 0x42ae,
- 0x3666, 0x6642, 0x7e8a, 0x1e1e, 0x4eae,
- 0x4266, 0x7242, 0x068a, 0x2a1e, 0x5aae,
- 0x4e68, 0x7e44, 0x128c, 0x3620, 0x66b0,
- 0x5a68, 0x0644, 0x1e8c, 0x4220, 0x72b0,
- 0x6668, 0x1244, 0x2a8c, 0x4e20, 0x7eb0,
- 0x7268, 0x1e44, 0x368c, 0x5a20, 0x06b0,
- 0x7e68, 0x2a44, 0x428c, 0x6620, 0x12b0,
- 0x0668, 0x3644, 0x4e8c, 0x7220, 0x1eb0,
- 0x1268, 0x4244, 0x5a8c, 0x7e20, 0x2ab0,
- 0x1e68, 0x4e44, 0x668c, 0x0620, 0x36b0,
- 0x2a68, 0x5a44, 0x728c, 0x1220, 0x42b0,
- 0x3668, 0x6644, 0x7e8c, 0x1e20, 0x4eb0,
- 0x4268, 0x7244, 0x068c, 0x2a20, 0x5ab0,
- 0x4e6a, 0x7e46, 0x128e, 0x3622, 0x66b2,
- 0x5a6a, 0x0646, 0x1e8e, 0x4222, 0x72b2,
- 0x666a, 0x1246, 0x2a8e, 0x4e22, 0x7eb2,
- 0x726a, 0x1e46, 0x368e, 0x5a22, 0x06b2,
- 0x7e6a, 0x2a46, 0x428e, 0x6622, 0x12b2,
- 0x066a, 0x3646, 0x4e8e, 0x7222, 0x1eb2,
- 0x126a, 0x4246, 0x5a8e, 0x7e22, 0x2ab2,
- 0x1e6a, 0x4e46, 0x668e, 0x0622, 0x36b2,
- 0x2a6a, 0x5a46, 0x728e, 0x1222, 0x42b2,
- 0x366a, 0x6646, 0x7e8e, 0x1e22, 0x4eb2,
- 0x426a, 0x7246, 0x068e, 0x2a22, 0x5ab2,
- 0x525a, 0x8236, 0x167e, 0x3a12, 0x6aa2,
- 0x5e5a, 0x0a36, 0x227e, 0x4612, 0x76a2,
- 0x6a5a, 0x1636, 0x2e7e, 0x5212, 0x82a2,
- 0x765a, 0x2236, 0x3a7e, 0x5e12, 0x0aa2,
- 0x825a, 0x2e36, 0x467e, 0x6a12, 0x16a2,
- 0x0a5a, 0x3a36, 0x527e, 0x7612, 0x22a2,
- 0x165a, 0x4636, 0x5e7e, 0x8212, 0x2ea2,
- 0x225a, 0x5236, 0x6a7e, 0x0a12, 0x3aa2,
- 0x2e5a, 0x5e36, 0x767e, 0x1612, 0x46a2,
- 0x3a5a, 0x6a36, 0x827e, 0x2212, 0x52a2,
- 0x465a, 0x7636, 0x0a7e, 0x2e12, 0x5ea2,
- 0x525c, 0x8238, 0x1680, 0x3a14, 0x6aa4,
- 0x5e5c, 0x0a38, 0x2280, 0x4614, 0x76a4,
- 0x6a5c, 0x1638, 0x2e80, 0x5214, 0x82a4,
- 0x765c, 0x2238, 0x3a80, 0x5e14, 0x0aa4,
- 0x825c, 0x2e38, 0x4680, 0x6a14, 0x16a4,
- 0x0a5c, 0x3a38, 0x5280, 0x7614, 0x22a4,
- 0x165c, 0x4638, 0x5e80, 0x8214, 0x2ea4,
- 0x225c, 0x5238, 0x6a80, 0x0a14, 0x3aa4,
- 0x2e5c, 0x5e38, 0x7680, 0x1614, 0x46a4,
- 0x3a5c, 0x6a38, 0x8280, 0x2214, 0x52a4,
- 0x465c, 0x7638, 0x0a80, 0x2e14, 0x5ea4,
- 0x525e, 0x823a, 0x1682, 0x3a16, 0x6aa6,
- 0x5e5e, 0x0a3a, 0x2282, 0x4616, 0x76a6,
- 0x6a5e, 0x163a, 0x2e82, 0x5216, 0x82a6,
- 0x765e, 0x223a, 0x3a82, 0x5e16, 0x0aa6,
- 0x825e, 0x2e3a, 0x4682, 0x6a16, 0x16a6,
- 0x0a5e, 0x3a3a, 0x5282, 0x7616, 0x22a6,
- 0x165e, 0x463a, 0x5e82, 0x8216, 0x2ea6,
- 0x225e, 0x523a, 0x6a82, 0x0a16, 0x3aa6,
- 0x2e5e, 0x5e3a, 0x7682, 0x1616, 0x46a6,
- 0x3a5e, 0x6a3a, 0x8282, 0x2216, 0x52a6,
- 0x465e, 0x763a, 0x0a82, 0x2e16, 0x5ea6,
- 0x5260, 0x823c, 0x1684, 0x3a18, 0x6aa8,
- 0x5e60, 0x0a3c, 0x2284, 0x4618, 0x76a8,
- 0x6a60, 0x163c, 0x2e84, 0x5218, 0x82a8,
- 0x7660, 0x223c, 0x3a84, 0x5e18, 0x0aa8,
- 0x8260, 0x2e3c, 0x4684, 0x6a18, 0x16a8,
- 0x0a60, 0x3a3c, 0x5284, 0x7618, 0x22a8,
- 0x1660, 0x463c, 0x5e84, 0x8218, 0x2ea8,
- 0x2260, 0x523c, 0x6a84, 0x0a18, 0x3aa8,
- 0x2e60, 0x5e3c, 0x7684, 0x1618, 0x46a8,
- 0x3a60, 0x6a3c, 0x8284, 0x2218, 0x52a8,
- 0x4660, 0x763c, 0x0a84, 0x2e18, 0x5ea8,
- 0x5262, 0x823e, 0x1686, 0x3a1a, 0x6aaa,
- 0x5e62, 0x0a3e, 0x2286, 0x461a, 0x76aa,
- 0x6a62, 0x163e, 0x2e86, 0x521a, 0x82aa,
- 0x7662, 0x223e, 0x3a86, 0x5e1a, 0x0aaa,
- 0x8262, 0x2e3e, 0x4686, 0x6a1a, 0x16aa,
- 0x0a62, 0x3a3e, 0x5286, 0x761a, 0x22aa,
- 0x1662, 0x463e, 0x5e86, 0x821a, 0x2eaa,
- 0x2262, 0x523e, 0x6a86, 0x0a1a, 0x3aaa,
- 0x2e62, 0x5e3e, 0x7686, 0x161a, 0x46aa,
- 0x3a62, 0x6a3e, 0x8286, 0x221a, 0x52aa,
- 0x4662, 0x763e, 0x0a86, 0x2e1a, 0x5eaa,
- 0x5264, 0x8240, 0x1688, 0x3a1c, 0x6aac,
- 0x5e64, 0x0a40, 0x2288, 0x461c, 0x76ac,
- 0x6a64, 0x1640, 0x2e88, 0x521c, 0x82ac,
- 0x7664, 0x2240, 0x3a88, 0x5e1c, 0x0aac,
- 0x8264, 0x2e40, 0x4688, 0x6a1c, 0x16ac,
- 0x0a64, 0x3a40, 0x5288, 0x761c, 0x22ac,
- 0x1664, 0x4640, 0x5e88, 0x821c, 0x2eac,
- 0x2264, 0x5240, 0x6a88, 0x0a1c, 0x3aac,
- 0x2e64, 0x5e40, 0x7688, 0x161c, 0x46ac,
- 0x3a64, 0x6a40, 0x8288, 0x221c, 0x52ac,
- 0x4664, 0x7640, 0x0a88, 0x2e1c, 0x5eac,
- 0x5266, 0x8242, 0x168a, 0x3a1e, 0x6aae,
- 0x5e66, 0x0a42, 0x228a, 0x461e, 0x76ae,
- 0x6a66, 0x1642, 0x2e8a, 0x521e, 0x82ae,
- 0x7666, 0x2242, 0x3a8a, 0x5e1e, 0x0aae,
- 0x8266, 0x2e42, 0x468a, 0x6a1e, 0x16ae,
- 0x0a66, 0x3a42, 0x528a, 0x761e, 0x22ae,
- 0x1666, 0x4642, 0x5e8a, 0x821e, 0x2eae,
- 0x2266, 0x5242, 0x6a8a, 0x0a1e, 0x3aae,
- 0x2e66, 0x5e42, 0x768a, 0x161e, 0x46ae,
- 0x3a66, 0x6a42, 0x828a, 0x221e, 0x52ae,
- 0x4666, 0x7642, 0x0a8a, 0x2e1e, 0x5eae,
- 0x5268, 0x8244, 0x168c, 0x3a20, 0x6ab0,
- 0x5e68, 0x0a44, 0x228c, 0x4620, 0x76b0,
- 0x6a68, 0x1644, 0x2e8c, 0x5220, 0x82b0,
- 0x7668, 0x2244, 0x3a8c, 0x5e20, 0x0ab0,
- 0x8268, 0x2e44, 0x468c, 0x6a20, 0x16b0,
- 0x0a68, 0x3a44, 0x528c, 0x7620, 0x22b0,
- 0x1668, 0x4644, 0x5e8c, 0x8220, 0x2eb0,
- 0x2268, 0x5244, 0x6a8c, 0x0a20, 0x3ab0,
- 0x2e68, 0x5e44, 0x768c, 0x1620, 0x46b0,
- 0x3a68, 0x6a44, 0x828c, 0x2220, 0x52b0,
- 0x4668, 0x7644, 0x0a8c, 0x2e20, 0x5eb0,
- 0x526a, 0x8246, 0x168e, 0x3a22, 0x6ab2,
- 0x5e6a, 0x0a46, 0x228e, 0x4622, 0x76b2,
- 0x6a6a, 0x1646, 0x2e8e, 0x5222, 0x82b2,
- 0x766a, 0x2246, 0x3a8e, 0x5e22, 0x0ab2,
- 0x826a, 0x2e46, 0x468e, 0x6a22, 0x16b2,
- 0x0a6a, 0x3a46, 0x528e, 0x7622, 0x22b2,
- 0x166a, 0x4646, 0x5e8e, 0x8222, 0x2eb2,
- 0x226a, 0x5246, 0x6a8e, 0x0a22, 0x3ab2,
- 0x2e6a, 0x5e46, 0x768e, 0x1622, 0x46b2,
- 0x3a6a, 0x6a46, 0x828e, 0x2222, 0x52b2,
- 0x466a, 0x7646, 0x0a8e, 0x2e22, 0x5eb2,
- 0x4a5a, 0x7a36, 0x0e7e, 0x3212, 0x62a2,
- 0x565a, 0x0236, 0x1a7e, 0x3e12, 0x6ea2,
- 0x625a, 0x0e36, 0x267e, 0x4a12, 0x7aa2,
- 0x6e5a, 0x1a36, 0x327e, 0x5612, 0x02a2,
- 0x7a5a, 0x2636, 0x3e7e, 0x6212, 0x0ea2,
- 0x025a, 0x3236, 0x4a7e, 0x6e12, 0x1aa2,
- 0x0e5a, 0x3e36, 0x567e, 0x7a12, 0x26a2,
- 0x1a5a, 0x4a36, 0x627e, 0x0212, 0x32a2,
- 0x265a, 0x5636, 0x6e7e, 0x0e12, 0x3ea2,
- 0x325a, 0x6236, 0x7a7e, 0x1a12, 0x4aa2,
- 0x3e5a, 0x6e36, 0x027e, 0x2612, 0x56a2,
- 0x4a5c, 0x7a38, 0x0e80, 0x3214, 0x62a4,
- 0x565c, 0x0238, 0x1a80, 0x3e14, 0x6ea4,
- 0x625c, 0x0e38, 0x2680, 0x4a14, 0x7aa4,
- 0x6e5c, 0x1a38, 0x3280, 0x5614, 0x02a4,
- 0x7a5c, 0x2638, 0x3e80, 0x6214, 0x0ea4,
- 0x025c, 0x3238, 0x4a80, 0x6e14, 0x1aa4,
- 0x0e5c, 0x3e38, 0x5680, 0x7a14, 0x26a4,
- 0x1a5c, 0x4a38, 0x6280, 0x0214, 0x32a4,
- 0x265c, 0x5638, 0x6e80, 0x0e14, 0x3ea4,
- 0x325c, 0x6238, 0x7a80, 0x1a14, 0x4aa4,
- 0x3e5c, 0x6e38, 0x0280, 0x2614, 0x56a4,
- 0x4a5e, 0x7a3a, 0x0e82, 0x3216, 0x62a6,
- 0x565e, 0x023a, 0x1a82, 0x3e16, 0x6ea6,
- 0x625e, 0x0e3a, 0x2682, 0x4a16, 0x7aa6,
- 0x6e5e, 0x1a3a, 0x3282, 0x5616, 0x02a6,
- 0x7a5e, 0x263a, 0x3e82, 0x6216, 0x0ea6,
- 0x7c48, 0x2824, 0x406c, 0x6400, 0x1090,
- 0x0448, 0x3424, 0x4c6c, 0x7000, 0x1c90,
- 0x1048, 0x4024, 0x586c, 0x7c00, 0x2890,
- 0x1c48, 0x4c24, 0x646c, 0x0400, 0x3490,
- 0x2848, 0x5824, 0x706c, 0x1000, 0x4090,
- 0x3448, 0x6424, 0x7c6c, 0x1c00, 0x4c90,
- 0x4048, 0x7024, 0x046c, 0x2800, 0x5890,
- 0x4c48, 0x7c24, 0x106c, 0x3400, 0x6490,
- 0x5848, 0x0424, 0x1c6c, 0x4000, 0x7090,
- 0x6448, 0x1024, 0x286c, 0x4c00, 0x7c90,
- 0x7048, 0x1c24, 0x346c, 0x5800, 0x0490,
- 0x7c4a, 0x2826, 0x406e, 0x6402, 0x1092,
- 0x044a, 0x3426, 0x4c6e, 0x7002, 0x1c92,
- 0x104a, 0x4026, 0x586e, 0x7c02, 0x2892,
- 0x1c4a, 0x4c26, 0x646e, 0x0402, 0x3492,
- 0x284a, 0x5826, 0x706e, 0x1002, 0x4092,
- 0x344a, 0x6426, 0x7c6e, 0x1c02, 0x4c92,
- 0x404a, 0x7026, 0x046e, 0x2802, 0x5892,
- 0x4c4a, 0x7c26, 0x106e, 0x3402, 0x6492,
- 0x584a, 0x0426, 0x1c6e, 0x4002, 0x7092,
- 0x644a, 0x1026, 0x286e, 0x4c02, 0x7c92,
- 0x704a, 0x1c26, 0x346e, 0x5802, 0x0492,
- 0x7c4c, 0x2828, 0x4070, 0x6404, 0x1094,
- 0x044c, 0x3428, 0x4c70, 0x7004, 0x1c94,
- 0x104c, 0x4028, 0x5870, 0x7c04, 0x2894,
- 0x1c4c, 0x4c28, 0x6470, 0x0404, 0x3494,
- 0x284c, 0x5828, 0x7070, 0x1004, 0x4094,
- 0x344c, 0x6428, 0x7c70, 0x1c04, 0x4c94,
- 0x404c, 0x7028, 0x0470, 0x2804, 0x5894,
- 0x4c4c, 0x7c28, 0x1070, 0x3404, 0x6494,
- 0x584c, 0x0428, 0x1c70, 0x4004, 0x7094,
- 0x644c, 0x1028, 0x2870, 0x4c04, 0x7c94,
- 0x704c, 0x1c28, 0x3470, 0x5804, 0x0494,
- 0x7c4e, 0x282a, 0x4072, 0x6406, 0x1096,
- 0x044e, 0x342a, 0x4c72, 0x7006, 0x1c96,
- 0x104e, 0x402a, 0x5872, 0x7c06, 0x2896,
- 0x1c4e, 0x4c2a, 0x6472, 0x0406, 0x3496,
- 0x284e, 0x582a, 0x7072, 0x1006, 0x4096,
- 0x344e, 0x642a, 0x7c72, 0x1c06, 0x4c96,
- 0x404e, 0x702a, 0x0472, 0x2806, 0x5896,
- 0x4c4e, 0x7c2a, 0x1072, 0x3406, 0x6496,
- 0x584e, 0x042a, 0x1c72, 0x4006, 0x7096,
- 0x644e, 0x102a, 0x2872, 0x4c06, 0x7c96,
- 0x704e, 0x1c2a, 0x3472, 0x5806, 0x0496,
- 0x7c50, 0x282c, 0x4074, 0x6408, 0x1098,
- 0x0450, 0x342c, 0x4c74, 0x7008, 0x1c98,
- 0x1050, 0x402c, 0x5874, 0x7c08, 0x2898,
- 0x1c50, 0x4c2c, 0x6474, 0x0408, 0x3498,
- 0x2850, 0x582c, 0x7074, 0x1008, 0x4098,
- 0x3450, 0x642c, 0x7c74, 0x1c08, 0x4c98,
- 0x4050, 0x702c, 0x0474, 0x2808, 0x5898,
- 0x4c50, 0x7c2c, 0x1074, 0x3408, 0x6498,
- 0x5850, 0x042c, 0x1c74, 0x4008, 0x7098,
- 0x6450, 0x102c, 0x2874, 0x4c08, 0x7c98,
- 0x7050, 0x1c2c, 0x3474, 0x5808, 0x0498,
- 0x7c52, 0x282e, 0x4076, 0x640a, 0x109a,
- 0x0452, 0x342e, 0x4c76, 0x700a, 0x1c9a,
- 0x1052, 0x402e, 0x5876, 0x7c0a, 0x289a,
- 0x1c52, 0x4c2e, 0x6476, 0x040a, 0x349a,
- 0x2852, 0x582e, 0x7076, 0x100a, 0x409a,
- 0x3452, 0x642e, 0x7c76, 0x1c0a, 0x4c9a,
- 0x4052, 0x702e, 0x0476, 0x280a, 0x589a,
- 0x4c52, 0x7c2e, 0x1076, 0x340a, 0x649a,
- 0x5852, 0x042e, 0x1c76, 0x400a, 0x709a,
- 0x6452, 0x102e, 0x2876, 0x4c0a, 0x7c9a,
- 0x7052, 0x1c2e, 0x3476, 0x580a, 0x049a,
- 0x7c54, 0x2830, 0x4078, 0x640c, 0x109c,
- 0x0454, 0x3430, 0x4c78, 0x700c, 0x1c9c,
- 0x1054, 0x4030, 0x5878, 0x7c0c, 0x289c,
- 0x1c54, 0x4c30, 0x6478, 0x040c, 0x349c,
- 0x2854, 0x5830, 0x7078, 0x100c, 0x409c,
- 0x3454, 0x6430, 0x7c78, 0x1c0c, 0x4c9c,
- 0x4054, 0x7030, 0x0478, 0x280c, 0x589c,
- 0x4c54, 0x7c30, 0x1078, 0x340c, 0x649c,
- 0x5854, 0x0430, 0x1c78, 0x400c, 0x709c,
- 0x6454, 0x1030, 0x2878, 0x4c0c, 0x7c9c,
- 0x7054, 0x1c30, 0x3478, 0x580c, 0x049c,
- 0x7c56, 0x2832, 0x407a, 0x640e, 0x109e,
- 0x0456, 0x3432, 0x4c7a, 0x700e, 0x1c9e,
- 0x1056, 0x4032, 0x587a, 0x7c0e, 0x289e,
- 0x1c56, 0x4c32, 0x647a, 0x040e, 0x349e,
- 0x2856, 0x5832, 0x707a, 0x100e, 0x409e,
- 0x3456, 0x6432, 0x7c7a, 0x1c0e, 0x4c9e,
- 0x4056, 0x7032, 0x047a, 0x280e, 0x589e,
- 0x4c56, 0x7c32, 0x107a, 0x340e, 0x649e,
- 0x5856, 0x0432, 0x1c7a, 0x400e, 0x709e,
- 0x6456, 0x1032, 0x287a, 0x4c0e, 0x7c9e,
- 0x7056, 0x1c32, 0x347a, 0x580e, 0x049e,
- 0x7c58, 0x2834, 0x407c, 0x6410, 0x10a0,
- 0x0458, 0x3434, 0x4c7c, 0x7010, 0x1ca0,
- 0x1058, 0x4034, 0x587c, 0x7c10, 0x28a0,
- 0x1c58, 0x4c34, 0x647c, 0x0410, 0x34a0,
- 0x2858, 0x5834, 0x707c, 0x1010, 0x40a0,
- 0x3458, 0x6434, 0x7c7c, 0x1c10, 0x4ca0,
- 0x4058, 0x7034, 0x047c, 0x2810, 0x58a0,
- 0x4c58, 0x7c34, 0x107c, 0x3410, 0x64a0,
- 0x5858, 0x0434, 0x1c7c, 0x4010, 0x70a0,
- 0x6458, 0x1034, 0x287c, 0x4c10, 0x7ca0,
- 0x7058, 0x1c34, 0x347c, 0x5810, 0x04a0,
- 0x8048, 0x2c24, 0x446c, 0x6800, 0x1490,
- 0x0848, 0x3824, 0x506c, 0x7400, 0x2090,
- 0x1448, 0x4424, 0x5c6c, 0x8000, 0x2c90,
- 0x2048, 0x5024, 0x686c, 0x0800, 0x3890,
- 0x2c48, 0x5c24, 0x746c, 0x1400, 0x4490,
- 0x3848, 0x6824, 0x806c, 0x2000, 0x5090,
- 0x4448, 0x7424, 0x086c, 0x2c00, 0x5c90,
- 0x5048, 0x8024, 0x146c, 0x3800, 0x6890,
- 0x5c48, 0x0824, 0x206c, 0x4400, 0x7490,
- 0x6848, 0x1424, 0x2c6c, 0x5000, 0x8090,
- 0x7448, 0x2024, 0x386c, 0x5c00, 0x0890,
- 0x804a, 0x2c26, 0x446e, 0x6802, 0x1492,
- 0x084a, 0x3826, 0x506e, 0x7402, 0x2092,
- 0x144a, 0x4426, 0x5c6e, 0x8002, 0x2c92,
- 0x204a, 0x5026, 0x686e, 0x0802, 0x3892,
- 0x2c4a, 0x5c26, 0x746e, 0x1402, 0x4492,
- 0x384a, 0x6826, 0x806e, 0x2002, 0x5092,
- 0x444a, 0x7426, 0x086e, 0x2c02, 0x5c92,
- 0x504a, 0x8026, 0x146e, 0x3802, 0x6892,
- 0x5c4a, 0x0826, 0x206e, 0x4402, 0x7492,
- 0x684a, 0x1426, 0x2c6e, 0x5002, 0x8092,
- 0x744a, 0x2026, 0x386e, 0x5c02, 0x0892,
- 0x804c, 0x2c28, 0x4470, 0x6804, 0x1494,
- 0x084c, 0x3828, 0x5070, 0x7404, 0x2094,
- 0x144c, 0x4428, 0x5c70, 0x8004, 0x2c94,
- 0x204c, 0x5028, 0x6870, 0x0804, 0x3894,
- 0x2c4c, 0x5c28, 0x7470, 0x1404, 0x4494,
- 0x384c, 0x6828, 0x8070, 0x2004, 0x5094,
- 0x444c, 0x7428, 0x0870, 0x2c04, 0x5c94,
- 0x504c, 0x8028, 0x1470, 0x3804, 0x6894,
- 0x5c4c, 0x0828, 0x2070, 0x4404, 0x7494,
- 0x684c, 0x1428, 0x2c70, 0x5004, 0x8094,
- 0x744c, 0x2028, 0x3870, 0x5c04, 0x0894,
- 0x804e, 0x2c2a, 0x4472, 0x6806, 0x1496,
- 0x084e, 0x382a, 0x5072, 0x7406, 0x2096,
- 0x144e, 0x442a, 0x5c72, 0x8006, 0x2c96,
- 0x204e, 0x502a, 0x6872, 0x0806, 0x3896,
- 0x2c4e, 0x5c2a, 0x7472, 0x1406, 0x4496,
- 0x384e, 0x682a, 0x8072, 0x2006, 0x5096,
- 0x444e, 0x742a, 0x0872, 0x2c06, 0x5c96,
- 0x504e, 0x802a, 0x1472, 0x3806, 0x6896,
- 0x5c4e, 0x082a, 0x2072, 0x4406, 0x7496,
- 0x684e, 0x142a, 0x2c72, 0x5006, 0x8096,
- 0x744e, 0x202a, 0x3872, 0x5c06, 0x0896,
- 0x8050, 0x2c2c, 0x4474, 0x6808, 0x1498,
- 0x0850, 0x382c, 0x5074, 0x7408, 0x2098,
- 0x1450, 0x442c, 0x5c74, 0x8008, 0x2c98,
- 0x2050, 0x502c, 0x6874, 0x0808, 0x3898,
- 0x2c50, 0x5c2c, 0x7474, 0x1408, 0x4498,
- 0x3850, 0x682c, 0x8074, 0x2008, 0x5098,
- 0x4450, 0x742c, 0x0874, 0x2c08, 0x5c98,
- 0x5050, 0x802c, 0x1474, 0x3808, 0x6898,
- 0x5c50, 0x082c, 0x2074, 0x4408, 0x7498,
- 0x6850, 0x142c, 0x2c74, 0x5008, 0x8098,
- 0x7450, 0x202c, 0x3874, 0x5c08, 0x0898,
- 0x8052, 0x2c2e, 0x4476, 0x680a, 0x149a,
- 0x0852, 0x382e, 0x5076, 0x740a, 0x209a,
- 0x1452, 0x442e, 0x5c76, 0x800a, 0x2c9a,
- 0x2052, 0x502e, 0x6876, 0x080a, 0x389a,
- 0x2c52, 0x5c2e, 0x7476, 0x140a, 0x449a,
- 0x3852, 0x682e, 0x8076, 0x200a, 0x509a,
- 0x4452, 0x742e, 0x0876, 0x2c0a, 0x5c9a,
- 0x5052, 0x802e, 0x1476, 0x380a, 0x689a,
- 0x5c52, 0x082e, 0x2076, 0x440a, 0x749a,
- 0x6852, 0x142e, 0x2c76, 0x500a, 0x809a,
- 0x7452, 0x202e, 0x3876, 0x5c0a, 0x089a,
- 0x8054, 0x2c30, 0x4478, 0x680c, 0x149c,
- 0x0854, 0x3830, 0x5078, 0x740c, 0x209c,
- 0x1454, 0x4430, 0x5c78, 0x800c, 0x2c9c,
- 0x2054, 0x5030, 0x6878, 0x080c, 0x389c,
- 0x2c54, 0x5c30, 0x7478, 0x140c, 0x449c,
- 0x3854, 0x6830, 0x8078, 0x200c, 0x509c,
- 0x4454, 0x7430, 0x0878, 0x2c0c, 0x5c9c,
- 0x5054, 0x8030, 0x1478, 0x380c, 0x689c,
- 0x5c54, 0x0830, 0x2078, 0x440c, 0x749c,
- 0x6854, 0x1430, 0x2c78, 0x500c, 0x809c,
- 0x7454, 0x2030, 0x3878, 0x5c0c, 0x089c,
- 0x8056, 0x2c32, 0x447a, 0x680e, 0x149e,
- 0x0856, 0x3832, 0x507a, 0x740e, 0x209e,
- 0x1456, 0x4432, 0x5c7a, 0x800e, 0x2c9e,
- 0x2056, 0x5032, 0x687a, 0x080e, 0x389e,
- 0x2c56, 0x5c32, 0x747a, 0x140e, 0x449e,
- 0x3856, 0x6832, 0x807a, 0x200e, 0x509e,
- 0x4456, 0x7432, 0x087a, 0x2c0e, 0x5c9e,
- 0x5056, 0x8032, 0x147a, 0x380e, 0x689e,
- 0x5c56, 0x0832, 0x207a, 0x440e, 0x749e,
- 0x6856, 0x1432, 0x2c7a, 0x500e, 0x809e,
- 0x7456, 0x2032, 0x387a, 0x5c0e, 0x089e,
- 0x8058, 0x2c34, 0x447c, 0x6810, 0x14a0,
- 0x0858, 0x3834, 0x507c, 0x7410, 0x20a0,
- 0x1458, 0x4434, 0x5c7c, 0x8010, 0x2ca0,
- 0x2058, 0x5034, 0x687c, 0x0810, 0x38a0,
- 0x2c58, 0x5c34, 0x747c, 0x1410, 0x44a0,
- 0x3858, 0x6834, 0x807c, 0x2010, 0x50a0,
- 0x4458, 0x7434, 0x087c, 0x2c10, 0x5ca0,
- 0x5058, 0x8034, 0x147c, 0x3810, 0x68a0,
- 0x5c58, 0x0834, 0x207c, 0x4410, 0x74a0,
- 0x6858, 0x1434, 0x2c7c, 0x5010, 0x80a0,
- 0x7458, 0x2034, 0x387c, 0x5c10, 0x08a0,
- 0x8448, 0x3024, 0x486c, 0x6c00, 0x1890,
- 0x0c48, 0x3c24, 0x546c, 0x7800, 0x2490,
- 0x1848, 0x4824, 0x606c, 0x8400, 0x3090,
- 0x2448, 0x5424, 0x6c6c, 0x0c00, 0x3c90,
- 0x3048, 0x6024, 0x786c, 0x1800, 0x4890,
- 0x3c48, 0x6c24, 0x846c, 0x2400, 0x5490,
- 0x4848, 0x7824, 0x0c6c, 0x3000, 0x6090,
- 0x5448, 0x8424, 0x186c, 0x3c00, 0x6c90,
- 0x6048, 0x0c24, 0x246c, 0x4800, 0x7890,
- 0x6c48, 0x1824, 0x306c, 0x5400, 0x8490,
- 0x7848, 0x2424, 0x3c6c, 0x6000, 0x0c90,
- 0x844a, 0x3026, 0x486e, 0x6c02, 0x1892,
- 0x0c4a, 0x3c26, 0x546e, 0x7802, 0x2492,
- 0x184a, 0x4826, 0x606e, 0x8402, 0x3092,
- 0x244a, 0x5426, 0x6c6e, 0x0c02, 0x3c92,
- 0x304a, 0x6026, 0x786e, 0x1802, 0x4892,
- 0x3c4a, 0x6c26, 0x846e, 0x2402, 0x5492,
- 0x484a, 0x7826, 0x0c6e, 0x3002, 0x6092,
- 0x544a, 0x8426, 0x186e, 0x3c02, 0x6c92,
- 0x604a, 0x0c26, 0x246e, 0x4802, 0x7892,
- 0x6c4a, 0x1826, 0x306e, 0x5402, 0x8492,
- 0x784a, 0x2426, 0x3c6e, 0x6002, 0x0c92,
- 0x844c, 0x3028, 0x4870, 0x6c04, 0x1894,
- 0x0c4c, 0x3c28, 0x5470, 0x7804, 0x2494,
- 0x184c, 0x4828, 0x6070, 0x8404, 0x3094,
- 0x244c, 0x5428, 0x6c70, 0x0c04, 0x3c94,
- 0x304c, 0x6028, 0x7870, 0x1804, 0x4894,
- 0x3c4c, 0x6c28, 0x8470, 0x2404, 0x5494,
- 0x484c, 0x7828, 0x0c70, 0x3004, 0x6094,
- 0x544c, 0x8428, 0x1870, 0x3c04, 0x6c94,
- 0x604c, 0x0c28, 0x2470, 0x4804, 0x7894,
- 0x6c4c, 0x1828, 0x3070, 0x5404, 0x8494,
- 0x784c, 0x2428, 0x3c70, 0x6004, 0x0c94,
- 0x844e, 0x302a, 0x4872, 0x6c06, 0x1896,
- 0x0c4e, 0x3c2a, 0x5472, 0x7806, 0x2496,
- 0x184e, 0x482a, 0x6072, 0x8406, 0x3096,
- 0x244e, 0x542a, 0x6c72, 0x0c06, 0x3c96,
- 0x304e, 0x602a, 0x7872, 0x1806, 0x4896,
- 0x3c4e, 0x6c2a, 0x8472, 0x2406, 0x5496,
- 0x484e, 0x782a, 0x0c72, 0x3006, 0x6096,
- 0x544e, 0x842a, 0x1872, 0x3c06, 0x6c96,
- 0x604e, 0x0c2a, 0x2472, 0x4806, 0x7896,
- 0x6c4e, 0x182a, 0x3072, 0x5406, 0x8496,
- 0x784e, 0x242a, 0x3c72, 0x6006, 0x0c96,
- 0x8450, 0x302c, 0x4874, 0x6c08, 0x1898,
- 0x0c50, 0x3c2c, 0x5474, 0x7808, 0x2498,
- 0x1850, 0x482c, 0x6074, 0x8408, 0x3098,
- 0x2450, 0x542c, 0x6c74, 0x0c08, 0x3c98,
- 0x3050, 0x602c, 0x7874, 0x1808, 0x4898,
- 0x3c50, 0x6c2c, 0x8474, 0x2408, 0x5498,
- 0x4850, 0x782c, 0x0c74, 0x3008, 0x6098,
- 0x5450, 0x842c, 0x1874, 0x3c08, 0x6c98,
- 0x6050, 0x0c2c, 0x2474, 0x4808, 0x7898,
- 0x6c50, 0x182c, 0x3074, 0x5408, 0x8498,
- 0x7850, 0x242c, 0x3c74, 0x6008, 0x0c98,
- 0x8452, 0x302e, 0x4876, 0x6c0a, 0x189a,
- 0x0c52, 0x3c2e, 0x5476, 0x780a, 0x249a,
- 0x1852, 0x482e, 0x6076, 0x840a, 0x309a,
- 0x2452, 0x542e, 0x6c76, 0x0c0a, 0x3c9a,
- 0x3052, 0x602e, 0x7876, 0x180a, 0x489a,
- 0x3c52, 0x6c2e, 0x8476, 0x240a, 0x549a,
- 0x4852, 0x782e, 0x0c76, 0x300a, 0x609a,
- 0x5452, 0x842e, 0x1876, 0x3c0a, 0x6c9a,
- 0x6052, 0x0c2e, 0x2476, 0x480a, 0x789a,
- 0x6c52, 0x182e, 0x3076, 0x540a, 0x849a,
- 0x7852, 0x242e, 0x3c76, 0x600a, 0x0c9a,
- 0x8454, 0x3030, 0x4878, 0x6c0c, 0x189c,
- 0x0c54, 0x3c30, 0x5478, 0x780c, 0x249c,
- 0x1854, 0x4830, 0x6078, 0x840c, 0x309c,
- 0x2454, 0x5430, 0x6c78, 0x0c0c, 0x3c9c,
- 0x3054, 0x6030, 0x7878, 0x180c, 0x489c,
- 0x3c54, 0x6c30, 0x8478, 0x240c, 0x549c,
- 0x4854, 0x7830, 0x0c78, 0x300c, 0x609c,
- 0x5454, 0x8430, 0x1878, 0x3c0c, 0x6c9c,
- 0x6054, 0x0c30, 0x2478, 0x480c, 0x789c,
- 0x6c54, 0x1830, 0x3078, 0x540c, 0x849c,
- 0x7854, 0x2430, 0x3c78, 0x600c, 0x0c9c,
- 0x8456, 0x3032, 0x487a, 0x6c0e, 0x189e,
- 0x0c56, 0x3c32, 0x547a, 0x780e, 0x249e,
- 0x1856, 0x4832, 0x607a, 0x840e, 0x309e,
- 0x2456, 0x5432, 0x6c7a, 0x0c0e, 0x3c9e,
- 0x3056, 0x6032, 0x787a, 0x180e, 0x489e,
- 0x3c56, 0x6c32, 0x847a, 0x240e, 0x549e,
- 0x4856, 0x7832, 0x0c7a, 0x300e, 0x609e,
- 0x5456, 0x8432, 0x187a, 0x3c0e, 0x6c9e,
- 0x6056, 0x0c32, 0x247a, 0x480e, 0x789e,
- 0x6c56, 0x1832, 0x307a, 0x540e, 0x849e,
- 0x7856, 0x2432, 0x3c7a, 0x600e, 0x0c9e,
- 0x8458, 0x3034, 0x487c, 0x6c10, 0x18a0,
- 0x0c58, 0x3c34, 0x547c, 0x7810, 0x24a0,
- 0x1858, 0x4834, 0x607c, 0x8410, 0x30a0,
- 0x2458, 0x5434, 0x6c7c, 0x0c10, 0x3ca0,
- 0x3058, 0x6034, 0x787c, 0x1810, 0x48a0,
- 0x3c58, 0x6c34, 0x847c, 0x2410, 0x54a0,
- 0x4858, 0x7834, 0x0c7c, 0x3010, 0x60a0,
- 0x5458, 0x8434, 0x187c, 0x3c10, 0x6ca0,
- 0x6058, 0x0c34, 0x247c, 0x4810, 0x78a0,
- 0x6c58, 0x1834, 0x307c, 0x5410, 0x84a0,
- 0x7858, 0x2434, 0x3c7c, 0x6010, 0x0ca0,
- 0x7c48, 0x2824, 0x406c, 0x6400, 0x1090,
- 0x0448, 0x3424, 0x4c6c, 0x7000, 0x1c90,
- 0x1048, 0x4024, 0x586c, 0x7c00, 0x2890,
- 0x1c48, 0x4c24, 0x646c, 0x0400, 0x3490,
- 0x2848, 0x5824, 0x706c, 0x1000, 0x4090,
- 0x3448, 0x6424, 0x7c6c, 0x1c00, 0x4c90,
- 0x4048, 0x7024, 0x046c, 0x2800, 0x5890,
- 0x4c48, 0x7c24, 0x106c, 0x3400, 0x6490,
- 0x5848, 0x0424, 0x1c6c, 0x4000, 0x7090,
- 0x6448, 0x1024, 0x286c, 0x4c00, 0x7c90,
- 0x7048, 0x1c24, 0x346c, 0x5800, 0x0490,
- 0x7c4a, 0x2826, 0x406e, 0x6402, 0x1092,
- 0x044a, 0x3426, 0x4c6e, 0x7002, 0x1c92,
- 0x104a, 0x4026, 0x586e, 0x7c02, 0x2892,
- 0x1c4a, 0x4c26, 0x646e, 0x0402, 0x3492,
- 0x284a, 0x5826, 0x706e, 0x1002, 0x4092,
- 0x344a, 0x6426, 0x7c6e, 0x1c02, 0x4c92,
- 0x404a, 0x7026, 0x046e, 0x2802, 0x5892,
- 0x4c4a, 0x7c26, 0x106e, 0x3402, 0x6492,
- 0x584a, 0x0426, 0x1c6e, 0x4002, 0x7092,
- 0x644a, 0x1026, 0x286e, 0x4c02, 0x7c92,
- 0x704a, 0x1c26, 0x346e, 0x5802, 0x0492,
- 0x7c4c, 0x2828, 0x4070, 0x6404, 0x1094,
- 0x044c, 0x3428, 0x4c70, 0x7004, 0x1c94,
- 0x104c, 0x4028, 0x5870, 0x7c04, 0x2894,
- 0x1c4c, 0x4c28, 0x6470, 0x0404, 0x3494,
- 0x284c, 0x5828, 0x7070, 0x1004, 0x4094,
- 0x285a, 0x5836, 0x707e, 0x1012, 0x40a2,
- 0x345a, 0x6436, 0x7c7e, 0x1c12, 0x4ca2,
- 0x405a, 0x7036, 0x047e, 0x2812, 0x58a2,
- 0x4c5a, 0x7c36, 0x107e, 0x3412, 0x64a2,
- 0x585a, 0x0436, 0x1c7e, 0x4012, 0x70a2,
- 0x645a, 0x1036, 0x287e, 0x4c12, 0x7ca2,
- 0x705a, 0x1c36, 0x347e, 0x5812, 0x04a2,
- 0x7c5a, 0x2836, 0x407e, 0x6412, 0x10a2,
- 0x045a, 0x3436, 0x4c7e, 0x7012, 0x1ca2,
- 0x105a, 0x4036, 0x587e, 0x7c12, 0x28a2,
- 0x1c5a, 0x4c36, 0x647e, 0x0412, 0x34a2,
- 0x285c, 0x5838, 0x7080, 0x1014, 0x40a4,
- 0x345c, 0x6438, 0x7c80, 0x1c14, 0x4ca4,
- 0x405c, 0x7038, 0x0480, 0x2814, 0x58a4,
- 0x4c5c, 0x7c38, 0x1080, 0x3414, 0x64a4,
- 0x585c, 0x0438, 0x1c80, 0x4014, 0x70a4,
- 0x645c, 0x1038, 0x2880, 0x4c14, 0x7ca4,
- 0x705c, 0x1c38, 0x3480, 0x5814, 0x04a4,
- 0x7c5c, 0x2838, 0x4080, 0x6414, 0x10a4,
- 0x045c, 0x3438, 0x4c80, 0x7014, 0x1ca4,
- 0x105c, 0x4038, 0x5880, 0x7c14, 0x28a4,
- 0x1c5c, 0x4c38, 0x6480, 0x0414, 0x34a4,
- 0x285e, 0x583a, 0x7082, 0x1016, 0x40a6,
- 0x345e, 0x643a, 0x7c82, 0x1c16, 0x4ca6,
- 0x405e, 0x703a, 0x0482, 0x2816, 0x58a6,
- 0x4c5e, 0x7c3a, 0x1082, 0x3416, 0x64a6,
- 0x585e, 0x043a, 0x1c82, 0x4016, 0x70a6,
- 0x645e, 0x103a, 0x2882, 0x4c16, 0x7ca6,
- 0x705e, 0x1c3a, 0x3482, 0x5816, 0x04a6,
- 0x7c5e, 0x283a, 0x4082, 0x6416, 0x10a6,
- 0x045e, 0x343a, 0x4c82, 0x7016, 0x1ca6,
- 0x105e, 0x403a, 0x5882, 0x7c16, 0x28a6,
- 0x1c5e, 0x4c3a, 0x6482, 0x0416, 0x34a6,
- 0x2860, 0x583c, 0x7084, 0x1018, 0x40a8,
- 0x3460, 0x643c, 0x7c84, 0x1c18, 0x4ca8,
- 0x4060, 0x703c, 0x0484, 0x2818, 0x58a8,
- 0x4c60, 0x7c3c, 0x1084, 0x3418, 0x64a8,
- 0x5860, 0x043c, 0x1c84, 0x4018, 0x70a8,
- 0x6460, 0x103c, 0x2884, 0x4c18, 0x7ca8,
- 0x7060, 0x1c3c, 0x3484, 0x5818, 0x04a8,
- 0x7c60, 0x283c, 0x4084, 0x6418, 0x10a8,
- 0x0460, 0x343c, 0x4c84, 0x7018, 0x1ca8,
- 0x1060, 0x403c, 0x5884, 0x7c18, 0x28a8,
- 0x1c60, 0x4c3c, 0x6484, 0x0418, 0x34a8,
- 0x2862, 0x583e, 0x7086, 0x101a, 0x40aa,
- 0x3462, 0x643e, 0x7c86, 0x1c1a, 0x4caa,
- 0x4062, 0x703e, 0x0486, 0x281a, 0x58aa,
- 0x4c62, 0x7c3e, 0x1086, 0x341a, 0x64aa,
- 0x5862, 0x043e, 0x1c86, 0x401a, 0x70aa,
- 0x6462, 0x103e, 0x2886, 0x4c1a, 0x7caa,
- 0x7062, 0x1c3e, 0x3486, 0x581a, 0x04aa,
- 0x7c62, 0x283e, 0x4086, 0x641a, 0x10aa,
- 0x0462, 0x343e, 0x4c86, 0x701a, 0x1caa,
- 0x1062, 0x403e, 0x5886, 0x7c1a, 0x28aa,
- 0x1c62, 0x4c3e, 0x6486, 0x041a, 0x34aa,
- 0x2864, 0x5840, 0x7088, 0x101c, 0x40ac,
- 0x3464, 0x6440, 0x7c88, 0x1c1c, 0x4cac,
- 0x4064, 0x7040, 0x0488, 0x281c, 0x58ac,
- 0x4c64, 0x7c40, 0x1088, 0x341c, 0x64ac,
- 0x5864, 0x0440, 0x1c88, 0x401c, 0x70ac,
- 0x6464, 0x1040, 0x2888, 0x4c1c, 0x7cac,
- 0x7064, 0x1c40, 0x3488, 0x581c, 0x04ac,
- 0x7c64, 0x2840, 0x4088, 0x641c, 0x10ac,
- 0x0464, 0x3440, 0x4c88, 0x701c, 0x1cac,
- 0x1064, 0x4040, 0x5888, 0x7c1c, 0x28ac,
- 0x1c64, 0x4c40, 0x6488, 0x041c, 0x34ac,
- 0x2866, 0x5842, 0x708a, 0x101e, 0x40ae,
- 0x3466, 0x6442, 0x7c8a, 0x1c1e, 0x4cae,
- 0x4066, 0x7042, 0x048a, 0x281e, 0x58ae,
- 0x4c66, 0x7c42, 0x108a, 0x341e, 0x64ae,
- 0x5866, 0x0442, 0x1c8a, 0x401e, 0x70ae,
- 0x6466, 0x1042, 0x288a, 0x4c1e, 0x7cae,
- 0x7066, 0x1c42, 0x348a, 0x581e, 0x04ae,
- 0x7c66, 0x2842, 0x408a, 0x641e, 0x10ae,
- 0x0466, 0x3442, 0x4c8a, 0x701e, 0x1cae,
- 0x1066, 0x4042, 0x588a, 0x7c1e, 0x28ae,
- 0x1c66, 0x4c42, 0x648a, 0x041e, 0x34ae,
- 0x2868, 0x5844, 0x708c, 0x1020, 0x40b0,
- 0x3468, 0x6444, 0x7c8c, 0x1c20, 0x4cb0,
- 0x4068, 0x7044, 0x048c, 0x2820, 0x58b0,
- 0x4c68, 0x7c44, 0x108c, 0x3420, 0x64b0,
- 0x5868, 0x0444, 0x1c8c, 0x4020, 0x70b0,
- 0x6468, 0x1044, 0x288c, 0x4c20, 0x7cb0,
- 0x7068, 0x1c44, 0x348c, 0x5820, 0x04b0,
- 0x7c68, 0x2844, 0x408c, 0x6420, 0x10b0,
- 0x0468, 0x3444, 0x4c8c, 0x7020, 0x1cb0,
- 0x1068, 0x4044, 0x588c, 0x7c20, 0x28b0,
- 0x1c68, 0x4c44, 0x648c, 0x0420, 0x34b0,
- 0x286a, 0x5846, 0x708e, 0x1022, 0x40b2,
- 0x346a, 0x6446, 0x7c8e, 0x1c22, 0x4cb2,
- 0x406a, 0x7046, 0x048e, 0x2822, 0x58b2,
- 0x4c6a, 0x7c46, 0x108e, 0x3422, 0x64b2,
- 0x586a, 0x0446, 0x1c8e, 0x4022, 0x70b2,
- 0x646a, 0x1046, 0x288e, 0x4c22, 0x7cb2,
- 0x706a, 0x1c46, 0x348e, 0x5822, 0x04b2,
- 0x7c6a, 0x2846, 0x408e, 0x6422, 0x10b2,
- 0x046a, 0x3446, 0x4c8e, 0x7022, 0x1cb2,
- 0x106a, 0x4046, 0x588e, 0x7c22, 0x28b2,
- 0x1c6a, 0x4c46, 0x648e, 0x0422, 0x34b2,
- 0x2c5a, 0x5c36, 0x747e, 0x1412, 0x44a2,
- 0x385a, 0x6836, 0x807e, 0x2012, 0x50a2,
- 0x445a, 0x7436, 0x087e, 0x2c12, 0x5ca2,
- 0x505a, 0x8036, 0x147e, 0x3812, 0x68a2,
- 0x5c5a, 0x0836, 0x207e, 0x4412, 0x74a2,
- 0x685a, 0x1436, 0x2c7e, 0x5012, 0x80a2,
- 0x745a, 0x2036, 0x387e, 0x5c12, 0x08a2,
- 0x805a, 0x2c36, 0x447e, 0x6812, 0x14a2,
- 0x085a, 0x3836, 0x507e, 0x7412, 0x20a2,
- 0x145a, 0x4436, 0x5c7e, 0x8012, 0x2ca2,
- 0x205a, 0x5036, 0x687e, 0x0812, 0x38a2,
- 0x2c5c, 0x5c38, 0x7480, 0x1414, 0x44a4,
- 0x385c, 0x6838, 0x8080, 0x2014, 0x50a4,
- 0x445c, 0x7438, 0x0880, 0x2c14, 0x5ca4,
- 0x505c, 0x8038, 0x1480, 0x3814, 0x68a4,
- 0x5c5c, 0x0838, 0x2080, 0x4414, 0x74a4,
- 0x685c, 0x1438, 0x2c80, 0x5014, 0x80a4,
- 0x745c, 0x2038, 0x3880, 0x5c14, 0x08a4,
- 0x805c, 0x2c38, 0x4480, 0x6814, 0x14a4,
- 0x085c, 0x3838, 0x5080, 0x7414, 0x20a4,
- 0x145c, 0x4438, 0x5c80, 0x8014, 0x2ca4,
- 0x205c, 0x5038, 0x6880, 0x0814, 0x38a4,
- 0x2c5e, 0x5c3a, 0x7482, 0x1416, 0x44a6,
- 0x385e, 0x683a, 0x8082, 0x2016, 0x50a6,
- 0x445e, 0x743a, 0x0882, 0x2c16, 0x5ca6,
- 0x505e, 0x803a, 0x1482, 0x3816, 0x68a6,
- 0x5c5e, 0x083a, 0x2082, 0x4416, 0x74a6,
- 0x685e, 0x143a, 0x2c82, 0x5016, 0x80a6,
- 0x745e, 0x203a, 0x3882, 0x5c16, 0x08a6,
- 0x805e, 0x2c3a, 0x4482, 0x6816, 0x14a6,
- 0x085e, 0x383a, 0x5082, 0x7416, 0x20a6,
- 0x145e, 0x443a, 0x5c82, 0x8016, 0x2ca6,
- 0x205e, 0x503a, 0x6882, 0x0816, 0x38a6,
- 0x2c60, 0x5c3c, 0x7484, 0x1418, 0x44a8,
- 0x3860, 0x683c, 0x8084, 0x2018, 0x50a8,
- 0x4460, 0x743c, 0x0884, 0x2c18, 0x5ca8,
- 0x5060, 0x803c, 0x1484, 0x3818, 0x68a8,
- 0x5c60, 0x083c, 0x2084, 0x4418, 0x74a8,
- 0x6860, 0x143c, 0x2c84, 0x5018, 0x80a8,
- 0x7460, 0x203c, 0x3884, 0x5c18, 0x08a8,
- 0x8060, 0x2c3c, 0x4484, 0x6818, 0x14a8,
- 0x0860, 0x383c, 0x5084, 0x7418, 0x20a8,
- 0x1460, 0x443c, 0x5c84, 0x8018, 0x2ca8,
- 0x2060, 0x503c, 0x6884, 0x0818, 0x38a8,
- 0x2c62, 0x5c3e, 0x7486, 0x141a, 0x44aa,
- 0x3862, 0x683e, 0x8086, 0x201a, 0x50aa,
- 0x4462, 0x743e, 0x0886, 0x2c1a, 0x5caa,
- 0x5062, 0x803e, 0x1486, 0x381a, 0x68aa,
- 0x5c62, 0x083e, 0x2086, 0x441a, 0x74aa,
- 0x6862, 0x143e, 0x2c86, 0x501a, 0x80aa,
- 0x7462, 0x203e, 0x3886, 0x5c1a, 0x08aa,
- 0x8062, 0x2c3e, 0x4486, 0x681a, 0x14aa,
- 0x0862, 0x383e, 0x5086, 0x741a, 0x20aa,
- 0x1462, 0x443e, 0x5c86, 0x801a, 0x2caa,
- 0x2062, 0x503e, 0x6886, 0x081a, 0x38aa,
- 0x2c64, 0x5c40, 0x7488, 0x141c, 0x44ac,
- 0x3864, 0x6840, 0x8088, 0x201c, 0x50ac,
- 0x4464, 0x7440, 0x0888, 0x2c1c, 0x5cac,
- 0x5064, 0x8040, 0x1488, 0x381c, 0x68ac,
- 0x5c64, 0x0840, 0x2088, 0x441c, 0x74ac,
- 0x6864, 0x1440, 0x2c88, 0x501c, 0x80ac,
- 0x7464, 0x2040, 0x3888, 0x5c1c, 0x08ac,
- 0x8064, 0x2c40, 0x4488, 0x681c, 0x14ac,
- 0x0864, 0x3840, 0x5088, 0x741c, 0x20ac,
- 0x1464, 0x4440, 0x5c88, 0x801c, 0x2cac,
- 0x2064, 0x5040, 0x6888, 0x081c, 0x38ac,
- 0x2c66, 0x5c42, 0x748a, 0x141e, 0x44ae,
- 0x3866, 0x6842, 0x808a, 0x201e, 0x50ae,
- 0x4466, 0x7442, 0x088a, 0x2c1e, 0x5cae,
- 0x5066, 0x8042, 0x148a, 0x381e, 0x68ae,
- 0x5c66, 0x0842, 0x208a, 0x441e, 0x74ae,
- 0x6866, 0x1442, 0x2c8a, 0x501e, 0x80ae,
- 0x7466, 0x2042, 0x388a, 0x5c1e, 0x08ae,
- 0x8066, 0x2c42, 0x448a, 0x681e, 0x14ae,
- 0x0866, 0x3842, 0x508a, 0x741e, 0x20ae,
- 0x1466, 0x4442, 0x5c8a, 0x801e, 0x2cae,
- 0x2066, 0x5042, 0x688a, 0x081e, 0x38ae,
- 0x2c68, 0x5c44, 0x748c, 0x1420, 0x44b0,
- 0x3868, 0x6844, 0x808c, 0x2020, 0x50b0,
- 0x4468, 0x7444, 0x088c, 0x2c20, 0x5cb0,
- 0x5068, 0x8044, 0x148c, 0x3820, 0x68b0,
- 0x5c68, 0x0844, 0x208c, 0x4420, 0x74b0,
- 0x6868, 0x1444, 0x2c8c, 0x5020, 0x80b0,
- 0x7468, 0x2044, 0x388c, 0x5c20, 0x08b0,
- 0x8068, 0x2c44, 0x448c, 0x6820, 0x14b0,
- 0x0868, 0x3844, 0x508c, 0x7420, 0x20b0,
- 0x1468, 0x4444, 0x5c8c, 0x8020, 0x2cb0,
- 0x2068, 0x5044, 0x688c, 0x0820, 0x38b0,
- 0x2c6a, 0x5c46, 0x748e, 0x1422, 0x44b2,
- 0x386a, 0x6846, 0x808e, 0x2022, 0x50b2,
- 0x446a, 0x7446, 0x088e, 0x2c22, 0x5cb2,
- 0x506a, 0x8046, 0x148e, 0x3822, 0x68b2,
- 0x5c6a, 0x0846, 0x208e, 0x4422, 0x74b2,
- 0x686a, 0x1446, 0x2c8e, 0x5022, 0x80b2,
- 0x746a, 0x2046, 0x388e, 0x5c22, 0x08b2,
- 0x806a, 0x2c46, 0x448e, 0x6822, 0x14b2,
- 0x086a, 0x3846, 0x508e, 0x7422, 0x20b2,
- 0x146a, 0x4446, 0x5c8e, 0x8022, 0x2cb2,
- 0x206a, 0x5046, 0x688e, 0x0822, 0x38b2,
- 0x305a, 0x6036, 0x787e, 0x1812, 0x48a2,
- 0x3c5a, 0x6c36, 0x847e, 0x2412, 0x54a2,
- 0x485a, 0x7836, 0x0c7e, 0x3012, 0x60a2,
- 0x545a, 0x8436, 0x187e, 0x3c12, 0x6ca2,
- 0x605a, 0x0c36, 0x247e, 0x4812, 0x78a2,
- 0x6c5a, 0x1836, 0x307e, 0x5412, 0x84a2,
- 0x785a, 0x2436, 0x3c7e, 0x6012, 0x0ca2,
- 0x845a, 0x3036, 0x487e, 0x6c12, 0x18a2,
- 0x0c5a, 0x3c36, 0x547e, 0x7812, 0x24a2,
- 0x185a, 0x4836, 0x607e, 0x8412, 0x30a2,
- 0x245a, 0x5436, 0x6c7e, 0x0c12, 0x3ca2,
- 0x305c, 0x6038, 0x7880, 0x1814, 0x48a4,
- 0x3c5c, 0x6c38, 0x8480, 0x2414, 0x54a4,
- 0x485c, 0x7838, 0x0c80, 0x3014, 0x60a4,
- 0x545c, 0x8438, 0x1880, 0x3c14, 0x6ca4,
- 0x605c, 0x0c38, 0x2480, 0x4814, 0x78a4,
- 0x6c5c, 0x1838, 0x3080, 0x5414, 0x84a4,
- 0x785c, 0x2438, 0x3c80, 0x6014, 0x0ca4,
- 0x845c, 0x3038, 0x4880, 0x6c14, 0x18a4,
- 0x0c5c, 0x3c38, 0x5480, 0x7814, 0x24a4,
- 0x185c, 0x4838, 0x6080, 0x8414, 0x30a4,
- 0x245c, 0x5438, 0x6c80, 0x0c14, 0x3ca4,
- 0x305e, 0x603a, 0x7882, 0x1816, 0x48a6,
- 0x3c5e, 0x6c3a, 0x8482, 0x2416, 0x54a6,
- 0x485e, 0x783a, 0x0c82, 0x3016, 0x60a6,
- 0x545e, 0x843a, 0x1882, 0x3c16, 0x6ca6,
- 0x605e, 0x0c3a, 0x2482, 0x4816, 0x78a6,
- 0x6c5e, 0x183a, 0x3082, 0x5416, 0x84a6,
- 0x785e, 0x243a, 0x3c82, 0x6016, 0x0ca6,
- 0x845e, 0x303a, 0x4882, 0x6c16, 0x18a6,
- 0x0c5e, 0x3c3a, 0x5482, 0x7816, 0x24a6,
- 0x185e, 0x483a, 0x6082, 0x8416, 0x30a6,
- 0x245e, 0x543a, 0x6c82, 0x0c16, 0x3ca6,
- 0x3060, 0x603c, 0x7884, 0x1818, 0x48a8,
- 0x3c60, 0x6c3c, 0x8484, 0x2418, 0x54a8,
- 0x4860, 0x783c, 0x0c84, 0x3018, 0x60a8,
- 0x5460, 0x843c, 0x1884, 0x3c18, 0x6ca8,
- 0x6060, 0x0c3c, 0x2484, 0x4818, 0x78a8,
- 0x6c60, 0x183c, 0x3084, 0x5418, 0x84a8,
- 0x7860, 0x243c, 0x3c84, 0x6018, 0x0ca8,
- 0x8460, 0x303c, 0x4884, 0x6c18, 0x18a8,
- 0x0c60, 0x3c3c, 0x5484, 0x7818, 0x24a8,
- 0x1860, 0x483c, 0x6084, 0x8418, 0x30a8,
- 0x2460, 0x543c, 0x6c84, 0x0c18, 0x3ca8,
- 0x3062, 0x603e, 0x7886, 0x181a, 0x48aa,
- 0x3c62, 0x6c3e, 0x8486, 0x241a, 0x54aa,
- 0x4862, 0x783e, 0x0c86, 0x301a, 0x60aa,
- 0x5462, 0x843e, 0x1886, 0x3c1a, 0x6caa,
- 0x6062, 0x0c3e, 0x2486, 0x481a, 0x78aa,
- 0x6c62, 0x183e, 0x3086, 0x541a, 0x84aa,
- 0x7862, 0x243e, 0x3c86, 0x601a, 0x0caa,
- 0x8462, 0x303e, 0x4886, 0x6c1a, 0x18aa,
- 0x0c62, 0x3c3e, 0x5486, 0x781a, 0x24aa,
- 0x1862, 0x483e, 0x6086, 0x841a, 0x30aa,
- 0x2462, 0x543e, 0x6c86, 0x0c1a, 0x3caa,
- 0x3064, 0x6040, 0x7888, 0x181c, 0x48ac,
- 0x3c64, 0x6c40, 0x8488, 0x241c, 0x54ac,
- 0x4864, 0x7840, 0x0c88, 0x301c, 0x60ac,
- 0x5464, 0x8440, 0x1888, 0x3c1c, 0x6cac,
- 0x6064, 0x0c40, 0x2488, 0x481c, 0x78ac,
- 0x6c64, 0x1840, 0x3088, 0x541c, 0x84ac,
- 0x7864, 0x2440, 0x3c88, 0x601c, 0x0cac,
- 0x8464, 0x3040, 0x4888, 0x6c1c, 0x18ac,
- 0x0c64, 0x3c40, 0x5488, 0x781c, 0x24ac,
- 0x1864, 0x4840, 0x6088, 0x841c, 0x30ac,
- 0x2464, 0x5440, 0x6c88, 0x0c1c, 0x3cac,
- 0x3066, 0x6042, 0x788a, 0x181e, 0x48ae,
- 0x3c66, 0x6c42, 0x848a, 0x241e, 0x54ae,
- 0x4866, 0x7842, 0x0c8a, 0x301e, 0x60ae,
- 0x5466, 0x8442, 0x188a, 0x3c1e, 0x6cae,
- 0x6066, 0x0c42, 0x248a, 0x481e, 0x78ae,
- 0x6c66, 0x1842, 0x308a, 0x541e, 0x84ae,
- 0x7866, 0x2442, 0x3c8a, 0x601e, 0x0cae,
- 0x8466, 0x3042, 0x488a, 0x6c1e, 0x18ae,
- 0x0c66, 0x3c42, 0x548a, 0x781e, 0x24ae,
- 0x1866, 0x4842, 0x608a, 0x841e, 0x30ae,
- 0x2466, 0x5442, 0x6c8a, 0x0c1e, 0x3cae,
- 0x3068, 0x6044, 0x788c, 0x1820, 0x48b0,
- 0x3c68, 0x6c44, 0x848c, 0x2420, 0x54b0,
- 0x4868, 0x7844, 0x0c8c, 0x3020, 0x60b0,
- 0x5468, 0x8444, 0x188c, 0x3c20, 0x6cb0,
- 0x6068, 0x0c44, 0x248c, 0x4820, 0x78b0,
- 0x6c68, 0x1844, 0x308c, 0x5420, 0x84b0,
- 0x7868, 0x2444, 0x3c8c, 0x6020, 0x0cb0,
- 0x8468, 0x3044, 0x488c, 0x6c20, 0x18b0,
- 0x0c68, 0x3c44, 0x548c, 0x7820, 0x24b0,
- 0x1868, 0x4844, 0x608c, 0x8420, 0x30b0,
- 0x2468, 0x5444, 0x6c8c, 0x0c20, 0x3cb0,
- 0x306a, 0x6046, 0x788e, 0x1822, 0x48b2,
- 0x3c6a, 0x6c46, 0x848e, 0x2422, 0x54b2,
- 0x486a, 0x7846, 0x0c8e, 0x3022, 0x60b2,
- 0x546a, 0x8446, 0x188e, 0x3c22, 0x6cb2,
- 0x606a, 0x0c46, 0x248e, 0x4822, 0x78b2,
- 0x6c6a, 0x1846, 0x308e, 0x5422, 0x84b2,
- 0x786a, 0x2446, 0x3c8e, 0x6022, 0x0cb2,
- 0x846a, 0x3046, 0x488e, 0x6c22, 0x18b2,
- 0x0c6a, 0x3c46, 0x548e, 0x7822, 0x24b2,
- 0x186a, 0x4846, 0x608e, 0x8422, 0x30b2,
- 0x246a, 0x5446, 0x6c8e, 0x0c22, 0x3cb2,
- 0x285a, 0x5836, 0x707e, 0x1012, 0x40a2,
- 0x345a, 0x6436, 0x7c7e, 0x1c12, 0x4ca2,
- 0x405a, 0x7036, 0x047e, 0x2812, 0x58a2,
- 0x4c5a, 0x7c36, 0x107e, 0x3412, 0x64a2,
- 0x585a, 0x0436, 0x1c7e, 0x4012, 0x70a2,
- 0x645a, 0x1036, 0x287e, 0x4c12, 0x7ca2,
- 0x705a, 0x1c36, 0x347e, 0x5812, 0x04a2,
- 0x7c5a, 0x2836, 0x407e, 0x6412, 0x10a2,
- 0x045a, 0x3436, 0x4c7e, 0x7012, 0x1ca2,
- 0x105a, 0x4036, 0x587e, 0x7c12, 0x28a2,
- 0x1c5a, 0x4c36, 0x647e, 0x0412, 0x34a2,
- 0x285c, 0x5838, 0x7080, 0x1014, 0x40a4,
- 0x345c, 0x6438, 0x7c80, 0x1c14, 0x4ca4,
- 0x405c, 0x7038, 0x0480, 0x2814, 0x58a4,
- 0x4c5c, 0x7c38, 0x1080, 0x3414, 0x64a4,
- 0x585c, 0x0438, 0x1c80, 0x4014, 0x70a4,
- 0x645c, 0x1038, 0x2880, 0x4c14, 0x7ca4,
- 0x705c, 0x1c38, 0x3480, 0x5814, 0x04a4,
- 0x7c5c, 0x2838, 0x4080, 0x6414, 0x10a4,
- 0x045c, 0x3438, 0x4c80, 0x7014, 0x1ca4,
- 0x105c, 0x4038, 0x5880, 0x7c14, 0x28a4,
- 0x1c5c, 0x4c38, 0x6480, 0x0414, 0x34a4,
- 0x285e, 0x583a, 0x7082, 0x1016, 0x40a6,
- 0x345e, 0x643a, 0x7c82, 0x1c16, 0x4ca6,
- 0x405e, 0x703a, 0x0482, 0x2816, 0x58a6,
- 0x4c5e, 0x7c3a, 0x1082, 0x3416, 0x64a6,
- 0x585e, 0x043a, 0x1c82, 0x4016, 0x70a6,
-};
-
-static const uint16_t dv_place_720p60[2*10*27*5] = {
- 0x1230, 0x3618, 0x4848, 0x0000, 0x2460,
- 0x2430, 0x4818, 0x0048, 0x1200, 0x3660,
- 0x3630, 0x0018, 0x1248, 0x2400, 0x4860,
- 0x4830, 0x1218, 0x2448, 0x3600, 0x0060,
- 0x0030, 0x2418, 0x3648, 0x4800, 0x1260,
- 0x1232, 0x361a, 0x484a, 0x0002, 0x2462,
- 0x2432, 0x481a, 0x004a, 0x1202, 0x3662,
- 0x3632, 0x001a, 0x124a, 0x2402, 0x4862,
- 0x4832, 0x121a, 0x244a, 0x3602, 0x0062,
- 0x0032, 0x241a, 0x364a, 0x4802, 0x1262,
- 0x1234, 0x361c, 0x484c, 0x0004, 0x2464,
- 0x2434, 0x481c, 0x004c, 0x1204, 0x3664,
- 0x3634, 0x001c, 0x124c, 0x2404, 0x4864,
- 0x4834, 0x121c, 0x244c, 0x3604, 0x0064,
- 0x0034, 0x241c, 0x364c, 0x4804, 0x1264,
- 0x1236, 0x361e, 0x484e, 0x0006, 0x2466,
- 0x2436, 0x481e, 0x004e, 0x1206, 0x3666,
- 0x3636, 0x001e, 0x124e, 0x2406, 0x4866,
- 0x4836, 0x121e, 0x244e, 0x3606, 0x0066,
- 0x0036, 0x241e, 0x364e, 0x4806, 0x1266,
- 0x1238, 0x3620, 0x4850, 0x0008, 0x2468,
- 0x2438, 0x4820, 0x0050, 0x1208, 0x3668,
- 0x3638, 0x0020, 0x1250, 0x2408, 0x4868,
- 0x4838, 0x1220, 0x2450, 0x3608, 0x0068,
- 0x0038, 0x2420, 0x3650, 0x4808, 0x1268,
- 0x123a, 0x3622, 0x4852, 0x000a, 0x246a,
- 0x243a, 0x4822, 0x0052, 0x120a, 0x366a,
- 0x363a, 0x0022, 0x1252, 0x240a, 0x486a,
- 0x483a, 0x1222, 0x2452, 0x360a, 0x006a,
- 0x003a, 0x2422, 0x3652, 0x480a, 0x126a,
- 0x1430, 0x3818, 0x4a48, 0x0200, 0x2660,
- 0x2630, 0x4a18, 0x0248, 0x1400, 0x3860,
- 0x3830, 0x0218, 0x1448, 0x2600, 0x4a60,
- 0x4a30, 0x1418, 0x2648, 0x3800, 0x0260,
- 0x0230, 0x2618, 0x3848, 0x4a00, 0x1460,
- 0x1432, 0x381a, 0x4a4a, 0x0202, 0x2662,
- 0x2632, 0x4a1a, 0x024a, 0x1402, 0x3862,
- 0x3832, 0x021a, 0x144a, 0x2602, 0x4a62,
- 0x4a32, 0x141a, 0x264a, 0x3802, 0x0262,
- 0x0232, 0x261a, 0x384a, 0x4a02, 0x1462,
- 0x1434, 0x381c, 0x4a4c, 0x0204, 0x2664,
- 0x2634, 0x4a1c, 0x024c, 0x1404, 0x3864,
- 0x3834, 0x021c, 0x144c, 0x2604, 0x4a64,
- 0x4a34, 0x141c, 0x264c, 0x3804, 0x0264,
- 0x0234, 0x261c, 0x384c, 0x4a04, 0x1464,
- 0x1436, 0x381e, 0x4a4e, 0x0206, 0x2666,
- 0x2636, 0x4a1e, 0x024e, 0x1406, 0x3866,
- 0x3836, 0x021e, 0x144e, 0x2606, 0x4a66,
- 0x4a36, 0x141e, 0x264e, 0x3806, 0x0266,
- 0x0236, 0x261e, 0x384e, 0x4a06, 0x1466,
- 0x1438, 0x3820, 0x4a50, 0x0208, 0x2668,
- 0x2638, 0x4a20, 0x0250, 0x1408, 0x3868,
- 0x3838, 0x0220, 0x1450, 0x2608, 0x4a68,
- 0x4a38, 0x1420, 0x2650, 0x3808, 0x0268,
- 0x0238, 0x2620, 0x3850, 0x4a08, 0x1468,
- 0x143a, 0x3822, 0x4a52, 0x020a, 0x266a,
- 0x263a, 0x4a22, 0x0252, 0x140a, 0x386a,
- 0x383a, 0x0222, 0x1452, 0x260a, 0x4a6a,
- 0x4a3a, 0x1422, 0x2652, 0x380a, 0x026a,
- 0x023a, 0x2622, 0x3852, 0x4a0a, 0x146a,
- 0x1630, 0x3a18, 0x4c48, 0x0400, 0x2860,
- 0x2830, 0x4c18, 0x0448, 0x1600, 0x3a60,
- 0x3a30, 0x0418, 0x1648, 0x2800, 0x4c60,
- 0x4c30, 0x1618, 0x2848, 0x3a00, 0x0460,
- 0x0430, 0x2818, 0x3a48, 0x4c00, 0x1660,
- 0x1632, 0x3a1a, 0x4c4a, 0x0402, 0x2862,
- 0x2832, 0x4c1a, 0x044a, 0x1602, 0x3a62,
- 0x3a32, 0x041a, 0x164a, 0x2802, 0x4c62,
- 0x4c32, 0x161a, 0x284a, 0x3a02, 0x0462,
- 0x0432, 0x281a, 0x3a4a, 0x4c02, 0x1662,
- 0x1634, 0x3a1c, 0x4c4c, 0x0404, 0x2864,
- 0x2834, 0x4c1c, 0x044c, 0x1604, 0x3a64,
- 0x3a34, 0x041c, 0x164c, 0x2804, 0x4c64,
- 0x4c34, 0x161c, 0x284c, 0x3a04, 0x0464,
- 0x0434, 0x281c, 0x3a4c, 0x4c04, 0x1664,
- 0x1636, 0x3a1e, 0x4c4e, 0x0406, 0x2866,
- 0x2836, 0x4c1e, 0x044e, 0x1606, 0x3a66,
- 0x3a36, 0x041e, 0x164e, 0x2806, 0x4c66,
- 0x4c36, 0x161e, 0x284e, 0x3a06, 0x0466,
- 0x0436, 0x281e, 0x3a4e, 0x4c06, 0x1666,
- 0x1638, 0x3a20, 0x4c50, 0x0408, 0x2868,
- 0x2838, 0x4c20, 0x0450, 0x1608, 0x3a68,
- 0x3a38, 0x0420, 0x1650, 0x2808, 0x4c68,
- 0x4c38, 0x1620, 0x2850, 0x3a08, 0x0468,
- 0x0438, 0x2820, 0x3a50, 0x4c08, 0x1668,
- 0x163a, 0x3a22, 0x4c52, 0x040a, 0x286a,
- 0x283a, 0x4c22, 0x0452, 0x160a, 0x3a6a,
- 0x3a3a, 0x0422, 0x1652, 0x280a, 0x4c6a,
- 0x4c3a, 0x1622, 0x2852, 0x3a0a, 0x046a,
- 0x043a, 0x2822, 0x3a52, 0x4c0a, 0x166a,
- 0x1830, 0x3c18, 0x4e48, 0x0600, 0x2a60,
- 0x2a30, 0x4e18, 0x0648, 0x1800, 0x3c60,
- 0x3c30, 0x0618, 0x1848, 0x2a00, 0x4e60,
- 0x4e30, 0x1818, 0x2a48, 0x3c00, 0x0660,
- 0x0630, 0x2a18, 0x3c48, 0x4e00, 0x1860,
- 0x1832, 0x3c1a, 0x4e4a, 0x0602, 0x2a62,
- 0x2a32, 0x4e1a, 0x064a, 0x1802, 0x3c62,
- 0x3c32, 0x061a, 0x184a, 0x2a02, 0x4e62,
- 0x4e32, 0x181a, 0x2a4a, 0x3c02, 0x0662,
- 0x0632, 0x2a1a, 0x3c4a, 0x4e02, 0x1862,
- 0x1834, 0x3c1c, 0x4e4c, 0x0604, 0x2a64,
- 0x2a34, 0x4e1c, 0x064c, 0x1804, 0x3c64,
- 0x3c34, 0x061c, 0x184c, 0x2a04, 0x4e64,
- 0x4e34, 0x181c, 0x2a4c, 0x3c04, 0x0664,
- 0x0634, 0x2a1c, 0x3c4c, 0x4e04, 0x1864,
- 0x1836, 0x3c1e, 0x4e4e, 0x0606, 0x2a66,
- 0x2a36, 0x4e1e, 0x064e, 0x1806, 0x3c66,
- 0x3c36, 0x061e, 0x184e, 0x2a06, 0x4e66,
- 0x4e36, 0x181e, 0x2a4e, 0x3c06, 0x0666,
- 0x0636, 0x2a1e, 0x3c4e, 0x4e06, 0x1866,
- 0x1838, 0x3c20, 0x4e50, 0x0608, 0x2a68,
- 0x2a38, 0x4e20, 0x0650, 0x1808, 0x3c68,
- 0x3c38, 0x0620, 0x1850, 0x2a08, 0x4e68,
- 0x4e38, 0x1820, 0x2a50, 0x3c08, 0x0668,
- 0x0638, 0x2a20, 0x3c50, 0x4e08, 0x1868,
- 0x183a, 0x3c22, 0x4e52, 0x060a, 0x2a6a,
- 0x2a3a, 0x4e22, 0x0652, 0x180a, 0x3c6a,
- 0x3c3a, 0x0622, 0x1852, 0x2a0a, 0x4e6a,
- 0x4e3a, 0x1822, 0x2a52, 0x3c0a, 0x066a,
- 0x063a, 0x2a22, 0x3c52, 0x4e0a, 0x186a,
- 0x1a30, 0x3e18, 0x5048, 0x0800, 0x2c60,
- 0x2c30, 0x5018, 0x0848, 0x1a00, 0x3e60,
- 0x3e30, 0x0818, 0x1a48, 0x2c00, 0x5060,
- 0x5030, 0x1a18, 0x2c48, 0x3e00, 0x0860,
- 0x0830, 0x2c18, 0x3e48, 0x5000, 0x1a60,
- 0x1a32, 0x3e1a, 0x504a, 0x0802, 0x2c62,
- 0x2c32, 0x501a, 0x084a, 0x1a02, 0x3e62,
- 0x3e32, 0x081a, 0x1a4a, 0x2c02, 0x5062,
- 0x5032, 0x1a1a, 0x2c4a, 0x3e02, 0x0862,
- 0x0832, 0x2c1a, 0x3e4a, 0x5002, 0x1a62,
- 0x1a34, 0x3e1c, 0x504c, 0x0804, 0x2c64,
- 0x2c34, 0x501c, 0x084c, 0x1a04, 0x3e64,
- 0x3e34, 0x081c, 0x1a4c, 0x2c04, 0x5064,
- 0x5034, 0x1a1c, 0x2c4c, 0x3e04, 0x0864,
- 0x0834, 0x2c1c, 0x3e4c, 0x5004, 0x1a64,
- 0x1a36, 0x3e1e, 0x504e, 0x0806, 0x2c66,
- 0x2c36, 0x501e, 0x084e, 0x1a06, 0x3e66,
- 0x3e36, 0x081e, 0x1a4e, 0x2c06, 0x5066,
- 0x5036, 0x1a1e, 0x2c4e, 0x3e06, 0x0866,
- 0x0836, 0x2c1e, 0x3e4e, 0x5006, 0x1a66,
- 0x1a38, 0x3e20, 0x5050, 0x0808, 0x2c68,
- 0x2c38, 0x5020, 0x0850, 0x1a08, 0x3e68,
- 0x3e38, 0x0820, 0x1a50, 0x2c08, 0x5068,
- 0x5038, 0x1a20, 0x2c50, 0x3e08, 0x0868,
- 0x0838, 0x2c20, 0x3e50, 0x5008, 0x1a68,
- 0x1a3a, 0x3e22, 0x5052, 0x080a, 0x2c6a,
- 0x2c3a, 0x5022, 0x0852, 0x1a0a, 0x3e6a,
- 0x3e3a, 0x0822, 0x1a52, 0x2c0a, 0x506a,
- 0x503a, 0x1a22, 0x2c52, 0x3e0a, 0x086a,
- 0x083a, 0x2c22, 0x3e52, 0x500a, 0x1a6a,
- 0x1c30, 0x4018, 0x5248, 0x0a00, 0x2e60,
- 0x2e30, 0x5218, 0x0a48, 0x1c00, 0x4060,
- 0x4030, 0x0a18, 0x1c48, 0x2e00, 0x5260,
- 0x5230, 0x1c18, 0x2e48, 0x4000, 0x0a60,
- 0x0a30, 0x2e18, 0x4048, 0x5200, 0x1c60,
- 0x1c32, 0x401a, 0x524a, 0x0a02, 0x2e62,
- 0x2e32, 0x521a, 0x0a4a, 0x1c02, 0x4062,
- 0x4032, 0x0a1a, 0x1c4a, 0x2e02, 0x5262,
- 0x5232, 0x1c1a, 0x2e4a, 0x4002, 0x0a62,
- 0x0a32, 0x2e1a, 0x404a, 0x5202, 0x1c62,
- 0x1c34, 0x401c, 0x524c, 0x0a04, 0x2e64,
- 0x2e34, 0x521c, 0x0a4c, 0x1c04, 0x4064,
- 0x4034, 0x0a1c, 0x1c4c, 0x2e04, 0x5264,
- 0x5234, 0x1c1c, 0x2e4c, 0x4004, 0x0a64,
- 0x0a34, 0x2e1c, 0x404c, 0x5204, 0x1c64,
- 0x1c36, 0x401e, 0x524e, 0x0a06, 0x2e66,
- 0x2e36, 0x521e, 0x0a4e, 0x1c06, 0x4066,
- 0x4036, 0x0a1e, 0x1c4e, 0x2e06, 0x5266,
- 0x5236, 0x1c1e, 0x2e4e, 0x4006, 0x0a66,
- 0x0a36, 0x2e1e, 0x404e, 0x5206, 0x1c66,
- 0x1c38, 0x4020, 0x5250, 0x0a08, 0x2e68,
- 0x2e38, 0x5220, 0x0a50, 0x1c08, 0x4068,
- 0x4038, 0x0a20, 0x1c50, 0x2e08, 0x5268,
- 0x5238, 0x1c20, 0x2e50, 0x4008, 0x0a68,
- 0x0a38, 0x2e20, 0x4050, 0x5208, 0x1c68,
- 0x1c3a, 0x4022, 0x5252, 0x0a0a, 0x2e6a,
- 0x2e3a, 0x5222, 0x0a52, 0x1c0a, 0x406a,
- 0x403a, 0x0a22, 0x1c52, 0x2e0a, 0x526a,
- 0x523a, 0x1c22, 0x2e52, 0x400a, 0x0a6a,
- 0x0a3a, 0x2e22, 0x4052, 0x520a, 0x1c6a,
- 0x1e30, 0x4218, 0x5448, 0x0c00, 0x3060,
- 0x3030, 0x5418, 0x0c48, 0x1e00, 0x4260,
- 0x4230, 0x0c18, 0x1e48, 0x3000, 0x5460,
- 0x5430, 0x1e18, 0x3048, 0x4200, 0x0c60,
- 0x0c30, 0x3018, 0x4248, 0x5400, 0x1e60,
- 0x1e32, 0x421a, 0x544a, 0x0c02, 0x3062,
- 0x3032, 0x541a, 0x0c4a, 0x1e02, 0x4262,
- 0x4232, 0x0c1a, 0x1e4a, 0x3002, 0x5462,
- 0x5432, 0x1e1a, 0x304a, 0x4202, 0x0c62,
- 0x0c32, 0x301a, 0x424a, 0x5402, 0x1e62,
- 0x1e34, 0x421c, 0x544c, 0x0c04, 0x3064,
- 0x3034, 0x541c, 0x0c4c, 0x1e04, 0x4264,
- 0x4234, 0x0c1c, 0x1e4c, 0x3004, 0x5464,
- 0x5434, 0x1e1c, 0x304c, 0x4204, 0x0c64,
- 0x0c34, 0x301c, 0x424c, 0x5404, 0x1e64,
- 0x1e36, 0x421e, 0x544e, 0x0c06, 0x3066,
- 0x3036, 0x541e, 0x0c4e, 0x1e06, 0x4266,
- 0x4236, 0x0c1e, 0x1e4e, 0x3006, 0x5466,
- 0x5436, 0x1e1e, 0x304e, 0x4206, 0x0c66,
- 0x0c36, 0x301e, 0x424e, 0x5406, 0x1e66,
- 0x1e38, 0x4220, 0x5450, 0x0c08, 0x3068,
- 0x3038, 0x5420, 0x0c50, 0x1e08, 0x4268,
- 0x4238, 0x0c20, 0x1e50, 0x3008, 0x5468,
- 0x5438, 0x1e20, 0x3050, 0x4208, 0x0c68,
- 0x0c38, 0x3020, 0x4250, 0x5408, 0x1e68,
- 0x1e3a, 0x4222, 0x5452, 0x0c0a, 0x306a,
- 0x303a, 0x5422, 0x0c52, 0x1e0a, 0x426a,
- 0x423a, 0x0c22, 0x1e52, 0x300a, 0x546a,
- 0x543a, 0x1e22, 0x3052, 0x420a, 0x0c6a,
- 0x0c3a, 0x3022, 0x4252, 0x540a, 0x1e6a,
- 0x2030, 0x4418, 0x5648, 0x0e00, 0x3260,
- 0x3230, 0x5618, 0x0e48, 0x2000, 0x4460,
- 0x4430, 0x0e18, 0x2048, 0x3200, 0x5660,
- 0x5630, 0x2018, 0x3248, 0x4400, 0x0e60,
- 0x0e30, 0x3218, 0x4448, 0x5600, 0x2060,
- 0x2032, 0x441a, 0x564a, 0x0e02, 0x3262,
- 0x3232, 0x561a, 0x0e4a, 0x2002, 0x4462,
- 0x4432, 0x0e1a, 0x204a, 0x3202, 0x5662,
- 0x5632, 0x201a, 0x324a, 0x4402, 0x0e62,
- 0x0e32, 0x321a, 0x444a, 0x5602, 0x2062,
- 0x2034, 0x441c, 0x564c, 0x0e04, 0x3264,
- 0x3234, 0x561c, 0x0e4c, 0x2004, 0x4464,
- 0x4434, 0x0e1c, 0x204c, 0x3204, 0x5664,
- 0x5634, 0x201c, 0x324c, 0x4404, 0x0e64,
- 0x0e34, 0x321c, 0x444c, 0x5604, 0x2064,
- 0x2036, 0x441e, 0x564e, 0x0e06, 0x3266,
- 0x3236, 0x561e, 0x0e4e, 0x2006, 0x4466,
- 0x4436, 0x0e1e, 0x204e, 0x3206, 0x5666,
- 0x5636, 0x201e, 0x324e, 0x4406, 0x0e66,
- 0x0e36, 0x321e, 0x444e, 0x5606, 0x2066,
- 0x2038, 0x4420, 0x5650, 0x0e08, 0x3268,
- 0x3238, 0x5620, 0x0e50, 0x2008, 0x4468,
- 0x4438, 0x0e20, 0x2050, 0x3208, 0x5668,
- 0x5638, 0x2020, 0x3250, 0x4408, 0x0e68,
- 0x0e38, 0x3220, 0x4450, 0x5608, 0x2068,
- 0x203a, 0x4422, 0x5652, 0x0e0a, 0x326a,
- 0x323a, 0x5622, 0x0e52, 0x200a, 0x446a,
- 0x443a, 0x0e22, 0x2052, 0x320a, 0x566a,
- 0x563a, 0x2022, 0x3252, 0x440a, 0x0e6a,
- 0x0e3a, 0x3222, 0x4452, 0x560a, 0x206a,
- 0x2230, 0x4618, 0x5848, 0x1000, 0x3460,
- 0x3430, 0x5818, 0x1048, 0x2200, 0x4660,
- 0x4630, 0x1018, 0x2248, 0x3400, 0x5860,
- 0x5830, 0x2218, 0x3448, 0x4600, 0x1060,
- 0x1030, 0x3418, 0x4648, 0x5800, 0x2260,
- 0x2232, 0x461a, 0x584a, 0x1002, 0x3462,
- 0x3432, 0x581a, 0x104a, 0x2202, 0x4662,
- 0x4632, 0x101a, 0x224a, 0x3402, 0x5862,
- 0x5832, 0x221a, 0x344a, 0x4602, 0x1062,
- 0x1032, 0x341a, 0x464a, 0x5802, 0x2262,
- 0x2234, 0x461c, 0x584c, 0x1004, 0x3464,
- 0x3434, 0x581c, 0x104c, 0x2204, 0x4664,
- 0x4634, 0x101c, 0x224c, 0x3404, 0x5864,
- 0x5834, 0x221c, 0x344c, 0x4604, 0x1064,
- 0x1034, 0x341c, 0x464c, 0x5804, 0x2264,
- 0x2236, 0x461e, 0x584e, 0x1006, 0x3466,
- 0x3436, 0x581e, 0x104e, 0x2206, 0x4666,
- 0x4636, 0x101e, 0x224e, 0x3406, 0x5866,
- 0x5836, 0x221e, 0x344e, 0x4606, 0x1066,
- 0x1036, 0x341e, 0x464e, 0x5806, 0x2266,
- 0x2238, 0x4620, 0x5850, 0x1008, 0x3468,
- 0x3438, 0x5820, 0x1050, 0x2208, 0x4668,
- 0x4638, 0x1020, 0x2250, 0x3408, 0x5868,
- 0x5838, 0x2220, 0x3450, 0x4608, 0x1068,
- 0x1038, 0x3420, 0x4650, 0x5808, 0x2268,
- 0x223a, 0x4622, 0x5852, 0x100a, 0x346a,
- 0x343a, 0x5822, 0x1052, 0x220a, 0x466a,
- 0x463a, 0x1022, 0x2252, 0x340a, 0x586a,
- 0x583a, 0x2222, 0x3452, 0x460a, 0x106a,
- 0x103a, 0x3422, 0x4652, 0x580a, 0x226a,
- 0x363c, 0x0024, 0x1254, 0x240c, 0x486c,
- 0x483c, 0x1224, 0x2454, 0x360c, 0x006c,
- 0x003c, 0x2424, 0x3654, 0x480c, 0x126c,
- 0x123c, 0x3624, 0x4854, 0x000c, 0x246c,
- 0x243c, 0x4824, 0x0054, 0x120c, 0x366c,
- 0x363e, 0x0026, 0x1256, 0x240e, 0x486e,
- 0x483e, 0x1226, 0x2456, 0x360e, 0x006e,
- 0x003e, 0x2426, 0x3656, 0x480e, 0x126e,
- 0x123e, 0x3626, 0x4856, 0x000e, 0x246e,
- 0x243e, 0x4826, 0x0056, 0x120e, 0x366e,
- 0x3640, 0x0028, 0x1258, 0x2410, 0x4870,
- 0x4840, 0x1228, 0x2458, 0x3610, 0x0070,
- 0x0040, 0x2428, 0x3658, 0x4810, 0x1270,
- 0x1240, 0x3628, 0x4858, 0x0010, 0x2470,
- 0x2440, 0x4828, 0x0058, 0x1210, 0x3670,
- 0x3642, 0x002a, 0x125a, 0x2412, 0x4872,
- 0x4842, 0x122a, 0x245a, 0x3612, 0x0072,
- 0x0042, 0x242a, 0x365a, 0x4812, 0x1272,
- 0x1242, 0x362a, 0x485a, 0x0012, 0x2472,
- 0x2442, 0x482a, 0x005a, 0x1212, 0x3672,
- 0x3644, 0x002c, 0x125c, 0x2414, 0x4874,
- 0x4844, 0x122c, 0x245c, 0x3614, 0x0074,
- 0x0044, 0x242c, 0x365c, 0x4814, 0x1274,
- 0x1244, 0x362c, 0x485c, 0x0014, 0x2474,
- 0x2444, 0x482c, 0x005c, 0x1214, 0x3674,
- 0x3646, 0x002e, 0x125e, 0x2416, 0x4876,
- 0x4846, 0x122e, 0x245e, 0x3616, 0x0076,
- 0x0046, 0x242e, 0x365e, 0x4816, 0x1276,
- 0x1246, 0x362e, 0x485e, 0x0016, 0x2476,
- 0x2446, 0x482e, 0x005e, 0x1216, 0x3676,
- 0x383c, 0x0224, 0x1454, 0x260c, 0x4a6c,
- 0x4a3c, 0x1424, 0x2654, 0x380c, 0x026c,
- 0x023c, 0x2624, 0x3854, 0x4a0c, 0x146c,
- 0x143c, 0x3824, 0x4a54, 0x020c, 0x266c,
- 0x263c, 0x4a24, 0x0254, 0x140c, 0x386c,
- 0x383e, 0x0226, 0x1456, 0x260e, 0x4a6e,
- 0x4a3e, 0x1426, 0x2656, 0x380e, 0x026e,
- 0x023e, 0x2626, 0x3856, 0x4a0e, 0x146e,
- 0x143e, 0x3826, 0x4a56, 0x020e, 0x266e,
- 0x263e, 0x4a26, 0x0256, 0x140e, 0x386e,
- 0x3840, 0x0228, 0x1458, 0x2610, 0x4a70,
- 0x4a40, 0x1428, 0x2658, 0x3810, 0x0270,
- 0x0240, 0x2628, 0x3858, 0x4a10, 0x1470,
- 0x1440, 0x3828, 0x4a58, 0x0210, 0x2670,
- 0x2640, 0x4a28, 0x0258, 0x1410, 0x3870,
- 0x3842, 0x022a, 0x145a, 0x2612, 0x4a72,
- 0x4a42, 0x142a, 0x265a, 0x3812, 0x0272,
- 0x0242, 0x262a, 0x385a, 0x4a12, 0x1472,
- 0x1442, 0x382a, 0x4a5a, 0x0212, 0x2672,
- 0x2642, 0x4a2a, 0x025a, 0x1412, 0x3872,
- 0x3844, 0x022c, 0x145c, 0x2614, 0x4a74,
- 0x4a44, 0x142c, 0x265c, 0x3814, 0x0274,
- 0x0244, 0x262c, 0x385c, 0x4a14, 0x1474,
- 0x1444, 0x382c, 0x4a5c, 0x0214, 0x2674,
- 0x2644, 0x4a2c, 0x025c, 0x1414, 0x3874,
- 0x3846, 0x022e, 0x145e, 0x2616, 0x4a76,
- 0x4a46, 0x142e, 0x265e, 0x3816, 0x0276,
- 0x0246, 0x262e, 0x385e, 0x4a16, 0x1476,
- 0x1446, 0x382e, 0x4a5e, 0x0216, 0x2676,
- 0x2646, 0x4a2e, 0x025e, 0x1416, 0x3876,
- 0x3a3c, 0x0424, 0x1654, 0x280c, 0x4c6c,
- 0x4c3c, 0x1624, 0x2854, 0x3a0c, 0x046c,
- 0x043c, 0x2824, 0x3a54, 0x4c0c, 0x166c,
- 0x163c, 0x3a24, 0x4c54, 0x040c, 0x286c,
- 0x283c, 0x4c24, 0x0454, 0x160c, 0x3a6c,
- 0x3a3e, 0x0426, 0x1656, 0x280e, 0x4c6e,
- 0x4c3e, 0x1626, 0x2856, 0x3a0e, 0x046e,
- 0x043e, 0x2826, 0x3a56, 0x4c0e, 0x166e,
- 0x163e, 0x3a26, 0x4c56, 0x040e, 0x286e,
- 0x283e, 0x4c26, 0x0456, 0x160e, 0x3a6e,
- 0x3a40, 0x0428, 0x1658, 0x2810, 0x4c70,
- 0x4c40, 0x1628, 0x2858, 0x3a10, 0x0470,
- 0x0440, 0x2828, 0x3a58, 0x4c10, 0x1670,
- 0x1640, 0x3a28, 0x4c58, 0x0410, 0x2870,
- 0x2840, 0x4c28, 0x0458, 0x1610, 0x3a70,
- 0x3a42, 0x042a, 0x165a, 0x2812, 0x4c72,
- 0x4c42, 0x162a, 0x285a, 0x3a12, 0x0472,
- 0x0442, 0x282a, 0x3a5a, 0x4c12, 0x1672,
- 0x1642, 0x3a2a, 0x4c5a, 0x0412, 0x2872,
- 0x2842, 0x4c2a, 0x045a, 0x1612, 0x3a72,
- 0x3a44, 0x042c, 0x165c, 0x2814, 0x4c74,
- 0x4c44, 0x162c, 0x285c, 0x3a14, 0x0474,
- 0x0444, 0x282c, 0x3a5c, 0x4c14, 0x1674,
- 0x1644, 0x3a2c, 0x4c5c, 0x0414, 0x2874,
- 0x2844, 0x4c2c, 0x045c, 0x1614, 0x3a74,
- 0x3a46, 0x042e, 0x165e, 0x2816, 0x4c76,
- 0x4c46, 0x162e, 0x285e, 0x3a16, 0x0476,
- 0x0446, 0x282e, 0x3a5e, 0x4c16, 0x1676,
- 0x1646, 0x3a2e, 0x4c5e, 0x0416, 0x2876,
- 0x2846, 0x4c2e, 0x045e, 0x1616, 0x3a76,
- 0x3c3c, 0x0624, 0x1854, 0x2a0c, 0x4e6c,
- 0x4e3c, 0x1824, 0x2a54, 0x3c0c, 0x066c,
- 0x063c, 0x2a24, 0x3c54, 0x4e0c, 0x186c,
- 0x183c, 0x3c24, 0x4e54, 0x060c, 0x2a6c,
- 0x2a3c, 0x4e24, 0x0654, 0x180c, 0x3c6c,
- 0x3c3e, 0x0626, 0x1856, 0x2a0e, 0x4e6e,
- 0x4e3e, 0x1826, 0x2a56, 0x3c0e, 0x066e,
- 0x063e, 0x2a26, 0x3c56, 0x4e0e, 0x186e,
- 0x183e, 0x3c26, 0x4e56, 0x060e, 0x2a6e,
- 0x2a3e, 0x4e26, 0x0656, 0x180e, 0x3c6e,
- 0x3c40, 0x0628, 0x1858, 0x2a10, 0x4e70,
- 0x4e40, 0x1828, 0x2a58, 0x3c10, 0x0670,
- 0x0640, 0x2a28, 0x3c58, 0x4e10, 0x1870,
- 0x1840, 0x3c28, 0x4e58, 0x0610, 0x2a70,
- 0x2a40, 0x4e28, 0x0658, 0x1810, 0x3c70,
- 0x3c42, 0x062a, 0x185a, 0x2a12, 0x4e72,
- 0x4e42, 0x182a, 0x2a5a, 0x3c12, 0x0672,
- 0x0642, 0x2a2a, 0x3c5a, 0x4e12, 0x1872,
- 0x1842, 0x3c2a, 0x4e5a, 0x0612, 0x2a72,
- 0x2a42, 0x4e2a, 0x065a, 0x1812, 0x3c72,
- 0x3c44, 0x062c, 0x185c, 0x2a14, 0x4e74,
- 0x4e44, 0x182c, 0x2a5c, 0x3c14, 0x0674,
- 0x0644, 0x2a2c, 0x3c5c, 0x4e14, 0x1874,
- 0x1844, 0x3c2c, 0x4e5c, 0x0614, 0x2a74,
- 0x2a44, 0x4e2c, 0x065c, 0x1814, 0x3c74,
- 0x3c46, 0x062e, 0x185e, 0x2a16, 0x4e76,
- 0x4e46, 0x182e, 0x2a5e, 0x3c16, 0x0676,
- 0x0646, 0x2a2e, 0x3c5e, 0x4e16, 0x1876,
- 0x1846, 0x3c2e, 0x4e5e, 0x0616, 0x2a76,
- 0x2a46, 0x4e2e, 0x065e, 0x1816, 0x3c76,
- 0x3e3c, 0x0824, 0x1a54, 0x2c0c, 0x506c,
- 0x503c, 0x1a24, 0x2c54, 0x3e0c, 0x086c,
- 0x083c, 0x2c24, 0x3e54, 0x500c, 0x1a6c,
- 0x1a3c, 0x3e24, 0x5054, 0x080c, 0x2c6c,
- 0x2c3c, 0x5024, 0x0854, 0x1a0c, 0x3e6c,
- 0x3e3e, 0x0826, 0x1a56, 0x2c0e, 0x506e,
- 0x503e, 0x1a26, 0x2c56, 0x3e0e, 0x086e,
- 0x083e, 0x2c26, 0x3e56, 0x500e, 0x1a6e,
- 0x1a3e, 0x3e26, 0x5056, 0x080e, 0x2c6e,
- 0x2c3e, 0x5026, 0x0856, 0x1a0e, 0x3e6e,
- 0x3e40, 0x0828, 0x1a58, 0x2c10, 0x5070,
- 0x5040, 0x1a28, 0x2c58, 0x3e10, 0x0870,
- 0x0840, 0x2c28, 0x3e58, 0x5010, 0x1a70,
- 0x1a40, 0x3e28, 0x5058, 0x0810, 0x2c70,
- 0x2c40, 0x5028, 0x0858, 0x1a10, 0x3e70,
- 0x3e42, 0x082a, 0x1a5a, 0x2c12, 0x5072,
- 0x5042, 0x1a2a, 0x2c5a, 0x3e12, 0x0872,
- 0x0842, 0x2c2a, 0x3e5a, 0x5012, 0x1a72,
- 0x1a42, 0x3e2a, 0x505a, 0x0812, 0x2c72,
- 0x2c42, 0x502a, 0x085a, 0x1a12, 0x3e72,
- 0x3e44, 0x082c, 0x1a5c, 0x2c14, 0x5074,
- 0x5044, 0x1a2c, 0x2c5c, 0x3e14, 0x0874,
- 0x0844, 0x2c2c, 0x3e5c, 0x5014, 0x1a74,
- 0x1a44, 0x3e2c, 0x505c, 0x0814, 0x2c74,
- 0x2c44, 0x502c, 0x085c, 0x1a14, 0x3e74,
- 0x3e46, 0x082e, 0x1a5e, 0x2c16, 0x5076,
- 0x5046, 0x1a2e, 0x2c5e, 0x3e16, 0x0876,
- 0x0846, 0x2c2e, 0x3e5e, 0x5016, 0x1a76,
- 0x1a46, 0x3e2e, 0x505e, 0x0816, 0x2c76,
- 0x2c46, 0x502e, 0x085e, 0x1a16, 0x3e76,
- 0x403c, 0x0a24, 0x1c54, 0x2e0c, 0x526c,
- 0x523c, 0x1c24, 0x2e54, 0x400c, 0x0a6c,
- 0x0a3c, 0x2e24, 0x4054, 0x520c, 0x1c6c,
- 0x1c3c, 0x4024, 0x5254, 0x0a0c, 0x2e6c,
- 0x2e3c, 0x5224, 0x0a54, 0x1c0c, 0x406c,
- 0x403e, 0x0a26, 0x1c56, 0x2e0e, 0x526e,
- 0x523e, 0x1c26, 0x2e56, 0x400e, 0x0a6e,
- 0x0a3e, 0x2e26, 0x4056, 0x520e, 0x1c6e,
- 0x1c3e, 0x4026, 0x5256, 0x0a0e, 0x2e6e,
- 0x2e3e, 0x5226, 0x0a56, 0x1c0e, 0x406e,
- 0x4040, 0x0a28, 0x1c58, 0x2e10, 0x5270,
- 0x5240, 0x1c28, 0x2e58, 0x4010, 0x0a70,
- 0x0a40, 0x2e28, 0x4058, 0x5210, 0x1c70,
- 0x1c40, 0x4028, 0x5258, 0x0a10, 0x2e70,
- 0x2e40, 0x5228, 0x0a58, 0x1c10, 0x4070,
- 0x4042, 0x0a2a, 0x1c5a, 0x2e12, 0x5272,
- 0x5242, 0x1c2a, 0x2e5a, 0x4012, 0x0a72,
- 0x0a42, 0x2e2a, 0x405a, 0x5212, 0x1c72,
- 0x1c42, 0x402a, 0x525a, 0x0a12, 0x2e72,
- 0x2e42, 0x522a, 0x0a5a, 0x1c12, 0x4072,
- 0x4044, 0x0a2c, 0x1c5c, 0x2e14, 0x5274,
- 0x5244, 0x1c2c, 0x2e5c, 0x4014, 0x0a74,
- 0x0a44, 0x2e2c, 0x405c, 0x5214, 0x1c74,
- 0x1c44, 0x402c, 0x525c, 0x0a14, 0x2e74,
- 0x2e44, 0x522c, 0x0a5c, 0x1c14, 0x4074,
- 0x4046, 0x0a2e, 0x1c5e, 0x2e16, 0x5276,
- 0x5246, 0x1c2e, 0x2e5e, 0x4016, 0x0a76,
- 0x0a46, 0x2e2e, 0x405e, 0x5216, 0x1c76,
- 0x1c46, 0x402e, 0x525e, 0x0a16, 0x2e76,
- 0x2e46, 0x522e, 0x0a5e, 0x1c16, 0x4076,
- 0x423c, 0x0c24, 0x1e54, 0x300c, 0x546c,
- 0x543c, 0x1e24, 0x3054, 0x420c, 0x0c6c,
- 0x0c3c, 0x3024, 0x4254, 0x540c, 0x1e6c,
- 0x1e3c, 0x4224, 0x5454, 0x0c0c, 0x306c,
- 0x303c, 0x5424, 0x0c54, 0x1e0c, 0x426c,
- 0x423e, 0x0c26, 0x1e56, 0x300e, 0x546e,
- 0x543e, 0x1e26, 0x3056, 0x420e, 0x0c6e,
- 0x0c3e, 0x3026, 0x4256, 0x540e, 0x1e6e,
- 0x1e3e, 0x4226, 0x5456, 0x0c0e, 0x306e,
- 0x303e, 0x5426, 0x0c56, 0x1e0e, 0x426e,
- 0x4240, 0x0c28, 0x1e58, 0x3010, 0x5470,
- 0x5440, 0x1e28, 0x3058, 0x4210, 0x0c70,
- 0x0c40, 0x3028, 0x4258, 0x5410, 0x1e70,
- 0x1e40, 0x4228, 0x5458, 0x0c10, 0x3070,
- 0x3040, 0x5428, 0x0c58, 0x1e10, 0x4270,
- 0x4242, 0x0c2a, 0x1e5a, 0x3012, 0x5472,
- 0x5442, 0x1e2a, 0x305a, 0x4212, 0x0c72,
- 0x0c42, 0x302a, 0x425a, 0x5412, 0x1e72,
- 0x1e42, 0x422a, 0x545a, 0x0c12, 0x3072,
- 0x3042, 0x542a, 0x0c5a, 0x1e12, 0x4272,
- 0x4244, 0x0c2c, 0x1e5c, 0x3014, 0x5474,
- 0x5444, 0x1e2c, 0x305c, 0x4214, 0x0c74,
- 0x0c44, 0x302c, 0x425c, 0x5414, 0x1e74,
- 0x1e44, 0x422c, 0x545c, 0x0c14, 0x3074,
- 0x3044, 0x542c, 0x0c5c, 0x1e14, 0x4274,
- 0x4246, 0x0c2e, 0x1e5e, 0x3016, 0x5476,
- 0x5446, 0x1e2e, 0x305e, 0x4216, 0x0c76,
- 0x0c46, 0x302e, 0x425e, 0x5416, 0x1e76,
- 0x1e46, 0x422e, 0x545e, 0x0c16, 0x3076,
- 0x3046, 0x542e, 0x0c5e, 0x1e16, 0x4276,
- 0x443c, 0x0e24, 0x2054, 0x320c, 0x566c,
- 0x563c, 0x2024, 0x3254, 0x440c, 0x0e6c,
- 0x0e3c, 0x3224, 0x4454, 0x560c, 0x206c,
- 0x203c, 0x4424, 0x5654, 0x0e0c, 0x326c,
- 0x323c, 0x5624, 0x0e54, 0x200c, 0x446c,
- 0x443e, 0x0e26, 0x2056, 0x320e, 0x566e,
- 0x563e, 0x2026, 0x3256, 0x440e, 0x0e6e,
- 0x0e3e, 0x3226, 0x4456, 0x560e, 0x206e,
- 0x203e, 0x4426, 0x5656, 0x0e0e, 0x326e,
- 0x323e, 0x5626, 0x0e56, 0x200e, 0x446e,
- 0x4440, 0x0e28, 0x2058, 0x3210, 0x5670,
- 0x5640, 0x2028, 0x3258, 0x4410, 0x0e70,
- 0x0e40, 0x3228, 0x4458, 0x5610, 0x2070,
- 0x2040, 0x4428, 0x5658, 0x0e10, 0x3270,
- 0x3240, 0x5628, 0x0e58, 0x2010, 0x4470,
- 0x4442, 0x0e2a, 0x205a, 0x3212, 0x5672,
- 0x5642, 0x202a, 0x325a, 0x4412, 0x0e72,
- 0x0e42, 0x322a, 0x445a, 0x5612, 0x2072,
- 0x2042, 0x442a, 0x565a, 0x0e12, 0x3272,
- 0x3242, 0x562a, 0x0e5a, 0x2012, 0x4472,
- 0x4444, 0x0e2c, 0x205c, 0x3214, 0x5674,
- 0x5644, 0x202c, 0x325c, 0x4414, 0x0e74,
- 0x0e44, 0x322c, 0x445c, 0x5614, 0x2074,
- 0x2044, 0x442c, 0x565c, 0x0e14, 0x3274,
- 0x3244, 0x562c, 0x0e5c, 0x2014, 0x4474,
- 0x4446, 0x0e2e, 0x205e, 0x3216, 0x5676,
- 0x5646, 0x202e, 0x325e, 0x4416, 0x0e76,
- 0x0e46, 0x322e, 0x445e, 0x5616, 0x2076,
- 0x2046, 0x442e, 0x565e, 0x0e16, 0x3276,
- 0x3246, 0x562e, 0x0e5e, 0x2016, 0x4476,
- 0x463c, 0x1024, 0x2254, 0x340c, 0x586c,
- 0x583c, 0x2224, 0x3454, 0x460c, 0x106c,
- 0x103c, 0x3424, 0x4654, 0x580c, 0x226c,
- 0x223c, 0x4624, 0x5854, 0x100c, 0x346c,
- 0x343c, 0x5824, 0x1054, 0x220c, 0x466c,
- 0x463e, 0x1026, 0x2256, 0x340e, 0x586e,
- 0x583e, 0x2226, 0x3456, 0x460e, 0x106e,
- 0x103e, 0x3426, 0x4656, 0x580e, 0x226e,
- 0x223e, 0x4626, 0x5856, 0x100e, 0x346e,
- 0x343e, 0x5826, 0x1056, 0x220e, 0x466e,
- 0x4640, 0x1028, 0x2258, 0x3410, 0x5870,
- 0x5840, 0x2228, 0x3458, 0x4610, 0x1070,
- 0x1040, 0x3428, 0x4658, 0x5810, 0x2270,
- 0x2240, 0x4628, 0x5858, 0x1010, 0x3470,
- 0x3440, 0x5828, 0x1058, 0x2210, 0x4670,
- 0x4642, 0x102a, 0x225a, 0x3412, 0x5872,
- 0x5842, 0x222a, 0x345a, 0x4612, 0x1072,
- 0x1042, 0x342a, 0x465a, 0x5812, 0x2272,
- 0x2242, 0x462a, 0x585a, 0x1012, 0x3472,
- 0x3442, 0x582a, 0x105a, 0x2212, 0x4672,
- 0x4644, 0x102c, 0x225c, 0x3414, 0x5874,
- 0x5844, 0x222c, 0x345c, 0x4614, 0x1074,
- 0x1044, 0x342c, 0x465c, 0x5814, 0x2274,
- 0x2244, 0x462c, 0x585c, 0x1014, 0x3474,
- 0x3444, 0x582c, 0x105c, 0x2214, 0x4674,
- 0x4646, 0x102e, 0x225e, 0x3416, 0x5876,
- 0x5846, 0x222e, 0x345e, 0x4616, 0x1076,
- 0x1046, 0x342e, 0x465e, 0x5816, 0x2276,
- 0x2246, 0x462e, 0x585e, 0x1016, 0x3476,
- 0x3446, 0x582e, 0x105e, 0x2216, 0x4676,
-};
-
-static const uint16_t dv_place_720p50[2*12*27*5] = {
- 0x1230, 0x3618, 0x4848, 0x0000, 0x2460,
- 0x2430, 0x4818, 0x0048, 0x1200, 0x3660,
- 0x3630, 0x0018, 0x1248, 0x2400, 0x4860,
- 0x4830, 0x1218, 0x2448, 0x3600, 0x0060,
- 0x0030, 0x2418, 0x3648, 0x4800, 0x1260,
- 0x1232, 0x361a, 0x484a, 0x0002, 0x2462,
- 0x2432, 0x481a, 0x004a, 0x1202, 0x3662,
- 0x3632, 0x001a, 0x124a, 0x2402, 0x4862,
- 0x4832, 0x121a, 0x244a, 0x3602, 0x0062,
- 0x0032, 0x241a, 0x364a, 0x4802, 0x1262,
- 0x1234, 0x361c, 0x484c, 0x0004, 0x2464,
- 0x2434, 0x481c, 0x004c, 0x1204, 0x3664,
- 0x3634, 0x001c, 0x124c, 0x2404, 0x4864,
- 0x4834, 0x121c, 0x244c, 0x3604, 0x0064,
- 0x0034, 0x241c, 0x364c, 0x4804, 0x1264,
- 0x1236, 0x361e, 0x484e, 0x0006, 0x2466,
- 0x2436, 0x481e, 0x004e, 0x1206, 0x3666,
- 0x3636, 0x001e, 0x124e, 0x2406, 0x4866,
- 0x4836, 0x121e, 0x244e, 0x3606, 0x0066,
- 0x0036, 0x241e, 0x364e, 0x4806, 0x1266,
- 0x1238, 0x3620, 0x4850, 0x0008, 0x2468,
- 0x2438, 0x4820, 0x0050, 0x1208, 0x3668,
- 0x3638, 0x0020, 0x1250, 0x2408, 0x4868,
- 0x4838, 0x1220, 0x2450, 0x3608, 0x0068,
- 0x0038, 0x2420, 0x3650, 0x4808, 0x1268,
- 0x123a, 0x3622, 0x4852, 0x000a, 0x246a,
- 0x243a, 0x4822, 0x0052, 0x120a, 0x366a,
- 0x363a, 0x0022, 0x1252, 0x240a, 0x486a,
- 0x483a, 0x1222, 0x2452, 0x360a, 0x006a,
- 0x003a, 0x2422, 0x3652, 0x480a, 0x126a,
- 0x1430, 0x3818, 0x4a48, 0x0200, 0x2660,
- 0x2630, 0x4a18, 0x0248, 0x1400, 0x3860,
- 0x3830, 0x0218, 0x1448, 0x2600, 0x4a60,
- 0x4a30, 0x1418, 0x2648, 0x3800, 0x0260,
- 0x0230, 0x2618, 0x3848, 0x4a00, 0x1460,
- 0x1432, 0x381a, 0x4a4a, 0x0202, 0x2662,
- 0x2632, 0x4a1a, 0x024a, 0x1402, 0x3862,
- 0x3832, 0x021a, 0x144a, 0x2602, 0x4a62,
- 0x4a32, 0x141a, 0x264a, 0x3802, 0x0262,
- 0x0232, 0x261a, 0x384a, 0x4a02, 0x1462,
- 0x1434, 0x381c, 0x4a4c, 0x0204, 0x2664,
- 0x2634, 0x4a1c, 0x024c, 0x1404, 0x3864,
- 0x3834, 0x021c, 0x144c, 0x2604, 0x4a64,
- 0x4a34, 0x141c, 0x264c, 0x3804, 0x0264,
- 0x0234, 0x261c, 0x384c, 0x4a04, 0x1464,
- 0x1436, 0x381e, 0x4a4e, 0x0206, 0x2666,
- 0x2636, 0x4a1e, 0x024e, 0x1406, 0x3866,
- 0x3836, 0x021e, 0x144e, 0x2606, 0x4a66,
- 0x4a36, 0x141e, 0x264e, 0x3806, 0x0266,
- 0x0236, 0x261e, 0x384e, 0x4a06, 0x1466,
- 0x1438, 0x3820, 0x4a50, 0x0208, 0x2668,
- 0x2638, 0x4a20, 0x0250, 0x1408, 0x3868,
- 0x3838, 0x0220, 0x1450, 0x2608, 0x4a68,
- 0x4a38, 0x1420, 0x2650, 0x3808, 0x0268,
- 0x0238, 0x2620, 0x3850, 0x4a08, 0x1468,
- 0x143a, 0x3822, 0x4a52, 0x020a, 0x266a,
- 0x263a, 0x4a22, 0x0252, 0x140a, 0x386a,
- 0x383a, 0x0222, 0x1452, 0x260a, 0x4a6a,
- 0x4a3a, 0x1422, 0x2652, 0x380a, 0x026a,
- 0x023a, 0x2622, 0x3852, 0x4a0a, 0x146a,
- 0x1630, 0x3a18, 0x4c48, 0x0400, 0x2860,
- 0x2830, 0x4c18, 0x0448, 0x1600, 0x3a60,
- 0x3a30, 0x0418, 0x1648, 0x2800, 0x4c60,
- 0x4c30, 0x1618, 0x2848, 0x3a00, 0x0460,
- 0x0430, 0x2818, 0x3a48, 0x4c00, 0x1660,
- 0x1632, 0x3a1a, 0x4c4a, 0x0402, 0x2862,
- 0x2832, 0x4c1a, 0x044a, 0x1602, 0x3a62,
- 0x3a32, 0x041a, 0x164a, 0x2802, 0x4c62,
- 0x4c32, 0x161a, 0x284a, 0x3a02, 0x0462,
- 0x0432, 0x281a, 0x3a4a, 0x4c02, 0x1662,
- 0x1634, 0x3a1c, 0x4c4c, 0x0404, 0x2864,
- 0x2834, 0x4c1c, 0x044c, 0x1604, 0x3a64,
- 0x3a34, 0x041c, 0x164c, 0x2804, 0x4c64,
- 0x4c34, 0x161c, 0x284c, 0x3a04, 0x0464,
- 0x0434, 0x281c, 0x3a4c, 0x4c04, 0x1664,
- 0x1636, 0x3a1e, 0x4c4e, 0x0406, 0x2866,
- 0x2836, 0x4c1e, 0x044e, 0x1606, 0x3a66,
- 0x3a36, 0x041e, 0x164e, 0x2806, 0x4c66,
- 0x4c36, 0x161e, 0x284e, 0x3a06, 0x0466,
- 0x0436, 0x281e, 0x3a4e, 0x4c06, 0x1666,
- 0x1638, 0x3a20, 0x4c50, 0x0408, 0x2868,
- 0x2838, 0x4c20, 0x0450, 0x1608, 0x3a68,
- 0x3a38, 0x0420, 0x1650, 0x2808, 0x4c68,
- 0x4c38, 0x1620, 0x2850, 0x3a08, 0x0468,
- 0x0438, 0x2820, 0x3a50, 0x4c08, 0x1668,
- 0x163a, 0x3a22, 0x4c52, 0x040a, 0x286a,
- 0x283a, 0x4c22, 0x0452, 0x160a, 0x3a6a,
- 0x3a3a, 0x0422, 0x1652, 0x280a, 0x4c6a,
- 0x4c3a, 0x1622, 0x2852, 0x3a0a, 0x046a,
- 0x043a, 0x2822, 0x3a52, 0x4c0a, 0x166a,
- 0x1830, 0x3c18, 0x4e48, 0x0600, 0x2a60,
- 0x2a30, 0x4e18, 0x0648, 0x1800, 0x3c60,
- 0x3c30, 0x0618, 0x1848, 0x2a00, 0x4e60,
- 0x4e30, 0x1818, 0x2a48, 0x3c00, 0x0660,
- 0x0630, 0x2a18, 0x3c48, 0x4e00, 0x1860,
- 0x1832, 0x3c1a, 0x4e4a, 0x0602, 0x2a62,
- 0x2a32, 0x4e1a, 0x064a, 0x1802, 0x3c62,
- 0x3c32, 0x061a, 0x184a, 0x2a02, 0x4e62,
- 0x4e32, 0x181a, 0x2a4a, 0x3c02, 0x0662,
- 0x0632, 0x2a1a, 0x3c4a, 0x4e02, 0x1862,
- 0x1834, 0x3c1c, 0x4e4c, 0x0604, 0x2a64,
- 0x2a34, 0x4e1c, 0x064c, 0x1804, 0x3c64,
- 0x3c34, 0x061c, 0x184c, 0x2a04, 0x4e64,
- 0x4e34, 0x181c, 0x2a4c, 0x3c04, 0x0664,
- 0x0634, 0x2a1c, 0x3c4c, 0x4e04, 0x1864,
- 0x1836, 0x3c1e, 0x4e4e, 0x0606, 0x2a66,
- 0x2a36, 0x4e1e, 0x064e, 0x1806, 0x3c66,
- 0x3c36, 0x061e, 0x184e, 0x2a06, 0x4e66,
- 0x4e36, 0x181e, 0x2a4e, 0x3c06, 0x0666,
- 0x0636, 0x2a1e, 0x3c4e, 0x4e06, 0x1866,
- 0x1838, 0x3c20, 0x4e50, 0x0608, 0x2a68,
- 0x2a38, 0x4e20, 0x0650, 0x1808, 0x3c68,
- 0x3c38, 0x0620, 0x1850, 0x2a08, 0x4e68,
- 0x4e38, 0x1820, 0x2a50, 0x3c08, 0x0668,
- 0x0638, 0x2a20, 0x3c50, 0x4e08, 0x1868,
- 0x183a, 0x3c22, 0x4e52, 0x060a, 0x2a6a,
- 0x2a3a, 0x4e22, 0x0652, 0x180a, 0x3c6a,
- 0x3c3a, 0x0622, 0x1852, 0x2a0a, 0x4e6a,
- 0x4e3a, 0x1822, 0x2a52, 0x3c0a, 0x066a,
- 0x063a, 0x2a22, 0x3c52, 0x4e0a, 0x186a,
- 0x1a30, 0x3e18, 0x5048, 0x0800, 0x2c60,
- 0x2c30, 0x5018, 0x0848, 0x1a00, 0x3e60,
- 0x3e30, 0x0818, 0x1a48, 0x2c00, 0x5060,
- 0x5030, 0x1a18, 0x2c48, 0x3e00, 0x0860,
- 0x0830, 0x2c18, 0x3e48, 0x5000, 0x1a60,
- 0x1a32, 0x3e1a, 0x504a, 0x0802, 0x2c62,
- 0x2c32, 0x501a, 0x084a, 0x1a02, 0x3e62,
- 0x3e32, 0x081a, 0x1a4a, 0x2c02, 0x5062,
- 0x5032, 0x1a1a, 0x2c4a, 0x3e02, 0x0862,
- 0x0832, 0x2c1a, 0x3e4a, 0x5002, 0x1a62,
- 0x1a34, 0x3e1c, 0x504c, 0x0804, 0x2c64,
- 0x2c34, 0x501c, 0x084c, 0x1a04, 0x3e64,
- 0x3e34, 0x081c, 0x1a4c, 0x2c04, 0x5064,
- 0x5034, 0x1a1c, 0x2c4c, 0x3e04, 0x0864,
- 0x0834, 0x2c1c, 0x3e4c, 0x5004, 0x1a64,
- 0x1a36, 0x3e1e, 0x504e, 0x0806, 0x2c66,
- 0x2c36, 0x501e, 0x084e, 0x1a06, 0x3e66,
- 0x3e36, 0x081e, 0x1a4e, 0x2c06, 0x5066,
- 0x5036, 0x1a1e, 0x2c4e, 0x3e06, 0x0866,
- 0x0836, 0x2c1e, 0x3e4e, 0x5006, 0x1a66,
- 0x1a38, 0x3e20, 0x5050, 0x0808, 0x2c68,
- 0x2c38, 0x5020, 0x0850, 0x1a08, 0x3e68,
- 0x3e38, 0x0820, 0x1a50, 0x2c08, 0x5068,
- 0x5038, 0x1a20, 0x2c50, 0x3e08, 0x0868,
- 0x0838, 0x2c20, 0x3e50, 0x5008, 0x1a68,
- 0x1a3a, 0x3e22, 0x5052, 0x080a, 0x2c6a,
- 0x2c3a, 0x5022, 0x0852, 0x1a0a, 0x3e6a,
- 0x3e3a, 0x0822, 0x1a52, 0x2c0a, 0x506a,
- 0x503a, 0x1a22, 0x2c52, 0x3e0a, 0x086a,
- 0x083a, 0x2c22, 0x3e52, 0x500a, 0x1a6a,
- 0x1c30, 0x4018, 0x5248, 0x0a00, 0x2e60,
- 0x2e30, 0x5218, 0x0a48, 0x1c00, 0x4060,
- 0x4030, 0x0a18, 0x1c48, 0x2e00, 0x5260,
- 0x5230, 0x1c18, 0x2e48, 0x4000, 0x0a60,
- 0x0a30, 0x2e18, 0x4048, 0x5200, 0x1c60,
- 0x1c32, 0x401a, 0x524a, 0x0a02, 0x2e62,
- 0x2e32, 0x521a, 0x0a4a, 0x1c02, 0x4062,
- 0x4032, 0x0a1a, 0x1c4a, 0x2e02, 0x5262,
- 0x5232, 0x1c1a, 0x2e4a, 0x4002, 0x0a62,
- 0x0a32, 0x2e1a, 0x404a, 0x5202, 0x1c62,
- 0x1c34, 0x401c, 0x524c, 0x0a04, 0x2e64,
- 0x2e34, 0x521c, 0x0a4c, 0x1c04, 0x4064,
- 0x4034, 0x0a1c, 0x1c4c, 0x2e04, 0x5264,
- 0x5234, 0x1c1c, 0x2e4c, 0x4004, 0x0a64,
- 0x0a34, 0x2e1c, 0x404c, 0x5204, 0x1c64,
- 0x1c36, 0x401e, 0x524e, 0x0a06, 0x2e66,
- 0x2e36, 0x521e, 0x0a4e, 0x1c06, 0x4066,
- 0x4036, 0x0a1e, 0x1c4e, 0x2e06, 0x5266,
- 0x5236, 0x1c1e, 0x2e4e, 0x4006, 0x0a66,
- 0x0a36, 0x2e1e, 0x404e, 0x5206, 0x1c66,
- 0x1c38, 0x4020, 0x5250, 0x0a08, 0x2e68,
- 0x2e38, 0x5220, 0x0a50, 0x1c08, 0x4068,
- 0x4038, 0x0a20, 0x1c50, 0x2e08, 0x5268,
- 0x5238, 0x1c20, 0x2e50, 0x4008, 0x0a68,
- 0x0a38, 0x2e20, 0x4050, 0x5208, 0x1c68,
- 0x1c3a, 0x4022, 0x5252, 0x0a0a, 0x2e6a,
- 0x2e3a, 0x5222, 0x0a52, 0x1c0a, 0x406a,
- 0x403a, 0x0a22, 0x1c52, 0x2e0a, 0x526a,
- 0x523a, 0x1c22, 0x2e52, 0x400a, 0x0a6a,
- 0x0a3a, 0x2e22, 0x4052, 0x520a, 0x1c6a,
- 0x1e30, 0x4218, 0x5448, 0x0c00, 0x3060,
- 0x3030, 0x5418, 0x0c48, 0x1e00, 0x4260,
- 0x4230, 0x0c18, 0x1e48, 0x3000, 0x5460,
- 0x5430, 0x1e18, 0x3048, 0x4200, 0x0c60,
- 0x0c30, 0x3018, 0x4248, 0x5400, 0x1e60,
- 0x1e32, 0x421a, 0x544a, 0x0c02, 0x3062,
- 0x3032, 0x541a, 0x0c4a, 0x1e02, 0x4262,
- 0x4232, 0x0c1a, 0x1e4a, 0x3002, 0x5462,
- 0x5432, 0x1e1a, 0x304a, 0x4202, 0x0c62,
- 0x0c32, 0x301a, 0x424a, 0x5402, 0x1e62,
- 0x1e34, 0x421c, 0x544c, 0x0c04, 0x3064,
- 0x3034, 0x541c, 0x0c4c, 0x1e04, 0x4264,
- 0x4234, 0x0c1c, 0x1e4c, 0x3004, 0x5464,
- 0x5434, 0x1e1c, 0x304c, 0x4204, 0x0c64,
- 0x0c34, 0x301c, 0x424c, 0x5404, 0x1e64,
- 0x1e36, 0x421e, 0x544e, 0x0c06, 0x3066,
- 0x3036, 0x541e, 0x0c4e, 0x1e06, 0x4266,
- 0x4236, 0x0c1e, 0x1e4e, 0x3006, 0x5466,
- 0x5436, 0x1e1e, 0x304e, 0x4206, 0x0c66,
- 0x0c36, 0x301e, 0x424e, 0x5406, 0x1e66,
- 0x1e38, 0x4220, 0x5450, 0x0c08, 0x3068,
- 0x3038, 0x5420, 0x0c50, 0x1e08, 0x4268,
- 0x4238, 0x0c20, 0x1e50, 0x3008, 0x5468,
- 0x5438, 0x1e20, 0x3050, 0x4208, 0x0c68,
- 0x0c38, 0x3020, 0x4250, 0x5408, 0x1e68,
- 0x1e3a, 0x4222, 0x5452, 0x0c0a, 0x306a,
- 0x303a, 0x5422, 0x0c52, 0x1e0a, 0x426a,
- 0x423a, 0x0c22, 0x1e52, 0x300a, 0x546a,
- 0x543a, 0x1e22, 0x3052, 0x420a, 0x0c6a,
- 0x0c3a, 0x3022, 0x4252, 0x540a, 0x1e6a,
- 0x2030, 0x4418, 0x5648, 0x0e00, 0x3260,
- 0x3230, 0x5618, 0x0e48, 0x2000, 0x4460,
- 0x4430, 0x0e18, 0x2048, 0x3200, 0x5660,
- 0x5630, 0x2018, 0x3248, 0x4400, 0x0e60,
- 0x0e30, 0x3218, 0x4448, 0x5600, 0x2060,
- 0x2032, 0x441a, 0x564a, 0x0e02, 0x3262,
- 0x3232, 0x561a, 0x0e4a, 0x2002, 0x4462,
- 0x4432, 0x0e1a, 0x204a, 0x3202, 0x5662,
- 0x5632, 0x201a, 0x324a, 0x4402, 0x0e62,
- 0x0e32, 0x321a, 0x444a, 0x5602, 0x2062,
- 0x2034, 0x441c, 0x564c, 0x0e04, 0x3264,
- 0x3234, 0x561c, 0x0e4c, 0x2004, 0x4464,
- 0x4434, 0x0e1c, 0x204c, 0x3204, 0x5664,
- 0x5634, 0x201c, 0x324c, 0x4404, 0x0e64,
- 0x0e34, 0x321c, 0x444c, 0x5604, 0x2064,
- 0x2036, 0x441e, 0x564e, 0x0e06, 0x3266,
- 0x3236, 0x561e, 0x0e4e, 0x2006, 0x4466,
- 0x4436, 0x0e1e, 0x204e, 0x3206, 0x5666,
- 0x5636, 0x201e, 0x324e, 0x4406, 0x0e66,
- 0x0e36, 0x321e, 0x444e, 0x5606, 0x2066,
- 0x2038, 0x4420, 0x5650, 0x0e08, 0x3268,
- 0x3238, 0x5620, 0x0e50, 0x2008, 0x4468,
- 0x4438, 0x0e20, 0x2050, 0x3208, 0x5668,
- 0x5638, 0x2020, 0x3250, 0x4408, 0x0e68,
- 0x0e38, 0x3220, 0x4450, 0x5608, 0x2068,
- 0x203a, 0x4422, 0x5652, 0x0e0a, 0x326a,
- 0x323a, 0x5622, 0x0e52, 0x200a, 0x446a,
- 0x443a, 0x0e22, 0x2052, 0x320a, 0x566a,
- 0x563a, 0x2022, 0x3252, 0x440a, 0x0e6a,
- 0x0e3a, 0x3222, 0x4452, 0x560a, 0x206a,
- 0x2230, 0x4618, 0x5848, 0x1000, 0x3460,
- 0x3430, 0x5818, 0x1048, 0x2200, 0x4660,
- 0x4630, 0x1018, 0x2248, 0x3400, 0x5860,
- 0x5830, 0x2218, 0x3448, 0x4600, 0x1060,
- 0x1030, 0x3418, 0x4648, 0x5800, 0x2260,
- 0x2232, 0x461a, 0x584a, 0x1002, 0x3462,
- 0x3432, 0x581a, 0x104a, 0x2202, 0x4662,
- 0x4632, 0x101a, 0x224a, 0x3402, 0x5862,
- 0x5832, 0x221a, 0x344a, 0x4602, 0x1062,
- 0x1032, 0x341a, 0x464a, 0x5802, 0x2262,
- 0x2234, 0x461c, 0x584c, 0x1004, 0x3464,
- 0x3434, 0x581c, 0x104c, 0x2204, 0x4664,
- 0x4634, 0x101c, 0x224c, 0x3404, 0x5864,
- 0x5834, 0x221c, 0x344c, 0x4604, 0x1064,
- 0x1034, 0x341c, 0x464c, 0x5804, 0x2264,
- 0x2236, 0x461e, 0x584e, 0x1006, 0x3466,
- 0x3436, 0x581e, 0x104e, 0x2206, 0x4666,
- 0x4636, 0x101e, 0x224e, 0x3406, 0x5866,
- 0x5836, 0x221e, 0x344e, 0x4606, 0x1066,
- 0x1036, 0x341e, 0x464e, 0x5806, 0x2266,
- 0x2238, 0x4620, 0x5850, 0x1008, 0x3468,
- 0x3438, 0x5820, 0x1050, 0x2208, 0x4668,
- 0x4638, 0x1020, 0x2250, 0x3408, 0x5868,
- 0x5838, 0x2220, 0x3450, 0x4608, 0x1068,
- 0x1038, 0x3420, 0x4650, 0x5808, 0x2268,
- 0x223a, 0x4622, 0x5852, 0x100a, 0x346a,
- 0x343a, 0x5822, 0x1052, 0x220a, 0x466a,
- 0x463a, 0x1022, 0x2252, 0x340a, 0x586a,
- 0x583a, 0x2222, 0x3452, 0x460a, 0x106a,
- 0x103a, 0x3422, 0x4652, 0x580a, 0x226a,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x363c, 0x0024, 0x1254, 0x240c, 0x486c,
- 0x483c, 0x1224, 0x2454, 0x360c, 0x006c,
- 0x003c, 0x2424, 0x3654, 0x480c, 0x126c,
- 0x123c, 0x3624, 0x4854, 0x000c, 0x246c,
- 0x243c, 0x4824, 0x0054, 0x120c, 0x366c,
- 0x363e, 0x0026, 0x1256, 0x240e, 0x486e,
- 0x483e, 0x1226, 0x2456, 0x360e, 0x006e,
- 0x003e, 0x2426, 0x3656, 0x480e, 0x126e,
- 0x123e, 0x3626, 0x4856, 0x000e, 0x246e,
- 0x243e, 0x4826, 0x0056, 0x120e, 0x366e,
- 0x3640, 0x0028, 0x1258, 0x2410, 0x4870,
- 0x4840, 0x1228, 0x2458, 0x3610, 0x0070,
- 0x0040, 0x2428, 0x3658, 0x4810, 0x1270,
- 0x1240, 0x3628, 0x4858, 0x0010, 0x2470,
- 0x2440, 0x4828, 0x0058, 0x1210, 0x3670,
- 0x3642, 0x002a, 0x125a, 0x2412, 0x4872,
- 0x4842, 0x122a, 0x245a, 0x3612, 0x0072,
- 0x0042, 0x242a, 0x365a, 0x4812, 0x1272,
- 0x1242, 0x362a, 0x485a, 0x0012, 0x2472,
- 0x2442, 0x482a, 0x005a, 0x1212, 0x3672,
- 0x3644, 0x002c, 0x125c, 0x2414, 0x4874,
- 0x4844, 0x122c, 0x245c, 0x3614, 0x0074,
- 0x0044, 0x242c, 0x365c, 0x4814, 0x1274,
- 0x1244, 0x362c, 0x485c, 0x0014, 0x2474,
- 0x2444, 0x482c, 0x005c, 0x1214, 0x3674,
- 0x3646, 0x002e, 0x125e, 0x2416, 0x4876,
- 0x4846, 0x122e, 0x245e, 0x3616, 0x0076,
- 0x0046, 0x242e, 0x365e, 0x4816, 0x1276,
- 0x1246, 0x362e, 0x485e, 0x0016, 0x2476,
- 0x2446, 0x482e, 0x005e, 0x1216, 0x3676,
- 0x383c, 0x0224, 0x1454, 0x260c, 0x4a6c,
- 0x4a3c, 0x1424, 0x2654, 0x380c, 0x026c,
- 0x023c, 0x2624, 0x3854, 0x4a0c, 0x146c,
- 0x143c, 0x3824, 0x4a54, 0x020c, 0x266c,
- 0x263c, 0x4a24, 0x0254, 0x140c, 0x386c,
- 0x383e, 0x0226, 0x1456, 0x260e, 0x4a6e,
- 0x4a3e, 0x1426, 0x2656, 0x380e, 0x026e,
- 0x023e, 0x2626, 0x3856, 0x4a0e, 0x146e,
- 0x143e, 0x3826, 0x4a56, 0x020e, 0x266e,
- 0x263e, 0x4a26, 0x0256, 0x140e, 0x386e,
- 0x3840, 0x0228, 0x1458, 0x2610, 0x4a70,
- 0x4a40, 0x1428, 0x2658, 0x3810, 0x0270,
- 0x0240, 0x2628, 0x3858, 0x4a10, 0x1470,
- 0x1440, 0x3828, 0x4a58, 0x0210, 0x2670,
- 0x2640, 0x4a28, 0x0258, 0x1410, 0x3870,
- 0x3842, 0x022a, 0x145a, 0x2612, 0x4a72,
- 0x4a42, 0x142a, 0x265a, 0x3812, 0x0272,
- 0x0242, 0x262a, 0x385a, 0x4a12, 0x1472,
- 0x1442, 0x382a, 0x4a5a, 0x0212, 0x2672,
- 0x2642, 0x4a2a, 0x025a, 0x1412, 0x3872,
- 0x3844, 0x022c, 0x145c, 0x2614, 0x4a74,
- 0x4a44, 0x142c, 0x265c, 0x3814, 0x0274,
- 0x0244, 0x262c, 0x385c, 0x4a14, 0x1474,
- 0x1444, 0x382c, 0x4a5c, 0x0214, 0x2674,
- 0x2644, 0x4a2c, 0x025c, 0x1414, 0x3874,
- 0x3846, 0x022e, 0x145e, 0x2616, 0x4a76,
- 0x4a46, 0x142e, 0x265e, 0x3816, 0x0276,
- 0x0246, 0x262e, 0x385e, 0x4a16, 0x1476,
- 0x1446, 0x382e, 0x4a5e, 0x0216, 0x2676,
- 0x2646, 0x4a2e, 0x025e, 0x1416, 0x3876,
- 0x3a3c, 0x0424, 0x1654, 0x280c, 0x4c6c,
- 0x4c3c, 0x1624, 0x2854, 0x3a0c, 0x046c,
- 0x043c, 0x2824, 0x3a54, 0x4c0c, 0x166c,
- 0x163c, 0x3a24, 0x4c54, 0x040c, 0x286c,
- 0x283c, 0x4c24, 0x0454, 0x160c, 0x3a6c,
- 0x3a3e, 0x0426, 0x1656, 0x280e, 0x4c6e,
- 0x4c3e, 0x1626, 0x2856, 0x3a0e, 0x046e,
- 0x043e, 0x2826, 0x3a56, 0x4c0e, 0x166e,
- 0x163e, 0x3a26, 0x4c56, 0x040e, 0x286e,
- 0x283e, 0x4c26, 0x0456, 0x160e, 0x3a6e,
- 0x3a40, 0x0428, 0x1658, 0x2810, 0x4c70,
- 0x4c40, 0x1628, 0x2858, 0x3a10, 0x0470,
- 0x0440, 0x2828, 0x3a58, 0x4c10, 0x1670,
- 0x1640, 0x3a28, 0x4c58, 0x0410, 0x2870,
- 0x2840, 0x4c28, 0x0458, 0x1610, 0x3a70,
- 0x3a42, 0x042a, 0x165a, 0x2812, 0x4c72,
- 0x4c42, 0x162a, 0x285a, 0x3a12, 0x0472,
- 0x0442, 0x282a, 0x3a5a, 0x4c12, 0x1672,
- 0x1642, 0x3a2a, 0x4c5a, 0x0412, 0x2872,
- 0x2842, 0x4c2a, 0x045a, 0x1612, 0x3a72,
- 0x3a44, 0x042c, 0x165c, 0x2814, 0x4c74,
- 0x4c44, 0x162c, 0x285c, 0x3a14, 0x0474,
- 0x0444, 0x282c, 0x3a5c, 0x4c14, 0x1674,
- 0x1644, 0x3a2c, 0x4c5c, 0x0414, 0x2874,
- 0x2844, 0x4c2c, 0x045c, 0x1614, 0x3a74,
- 0x3a46, 0x042e, 0x165e, 0x2816, 0x4c76,
- 0x4c46, 0x162e, 0x285e, 0x3a16, 0x0476,
- 0x0446, 0x282e, 0x3a5e, 0x4c16, 0x1676,
- 0x1646, 0x3a2e, 0x4c5e, 0x0416, 0x2876,
- 0x2846, 0x4c2e, 0x045e, 0x1616, 0x3a76,
- 0x3c3c, 0x0624, 0x1854, 0x2a0c, 0x4e6c,
- 0x4e3c, 0x1824, 0x2a54, 0x3c0c, 0x066c,
- 0x063c, 0x2a24, 0x3c54, 0x4e0c, 0x186c,
- 0x183c, 0x3c24, 0x4e54, 0x060c, 0x2a6c,
- 0x2a3c, 0x4e24, 0x0654, 0x180c, 0x3c6c,
- 0x3c3e, 0x0626, 0x1856, 0x2a0e, 0x4e6e,
- 0x4e3e, 0x1826, 0x2a56, 0x3c0e, 0x066e,
- 0x063e, 0x2a26, 0x3c56, 0x4e0e, 0x186e,
- 0x183e, 0x3c26, 0x4e56, 0x060e, 0x2a6e,
- 0x2a3e, 0x4e26, 0x0656, 0x180e, 0x3c6e,
- 0x3c40, 0x0628, 0x1858, 0x2a10, 0x4e70,
- 0x4e40, 0x1828, 0x2a58, 0x3c10, 0x0670,
- 0x0640, 0x2a28, 0x3c58, 0x4e10, 0x1870,
- 0x1840, 0x3c28, 0x4e58, 0x0610, 0x2a70,
- 0x2a40, 0x4e28, 0x0658, 0x1810, 0x3c70,
- 0x3c42, 0x062a, 0x185a, 0x2a12, 0x4e72,
- 0x4e42, 0x182a, 0x2a5a, 0x3c12, 0x0672,
- 0x0642, 0x2a2a, 0x3c5a, 0x4e12, 0x1872,
- 0x1842, 0x3c2a, 0x4e5a, 0x0612, 0x2a72,
- 0x2a42, 0x4e2a, 0x065a, 0x1812, 0x3c72,
- 0x3c44, 0x062c, 0x185c, 0x2a14, 0x4e74,
- 0x4e44, 0x182c, 0x2a5c, 0x3c14, 0x0674,
- 0x0644, 0x2a2c, 0x3c5c, 0x4e14, 0x1874,
- 0x1844, 0x3c2c, 0x4e5c, 0x0614, 0x2a74,
- 0x2a44, 0x4e2c, 0x065c, 0x1814, 0x3c74,
- 0x3c46, 0x062e, 0x185e, 0x2a16, 0x4e76,
- 0x4e46, 0x182e, 0x2a5e, 0x3c16, 0x0676,
- 0x0646, 0x2a2e, 0x3c5e, 0x4e16, 0x1876,
- 0x1846, 0x3c2e, 0x4e5e, 0x0616, 0x2a76,
- 0x2a46, 0x4e2e, 0x065e, 0x1816, 0x3c76,
- 0x3e3c, 0x0824, 0x1a54, 0x2c0c, 0x506c,
- 0x503c, 0x1a24, 0x2c54, 0x3e0c, 0x086c,
- 0x083c, 0x2c24, 0x3e54, 0x500c, 0x1a6c,
- 0x1a3c, 0x3e24, 0x5054, 0x080c, 0x2c6c,
- 0x2c3c, 0x5024, 0x0854, 0x1a0c, 0x3e6c,
- 0x3e3e, 0x0826, 0x1a56, 0x2c0e, 0x506e,
- 0x503e, 0x1a26, 0x2c56, 0x3e0e, 0x086e,
- 0x083e, 0x2c26, 0x3e56, 0x500e, 0x1a6e,
- 0x1a3e, 0x3e26, 0x5056, 0x080e, 0x2c6e,
- 0x2c3e, 0x5026, 0x0856, 0x1a0e, 0x3e6e,
- 0x3e40, 0x0828, 0x1a58, 0x2c10, 0x5070,
- 0x5040, 0x1a28, 0x2c58, 0x3e10, 0x0870,
- 0x0840, 0x2c28, 0x3e58, 0x5010, 0x1a70,
- 0x1a40, 0x3e28, 0x5058, 0x0810, 0x2c70,
- 0x2c40, 0x5028, 0x0858, 0x1a10, 0x3e70,
- 0x3e42, 0x082a, 0x1a5a, 0x2c12, 0x5072,
- 0x5042, 0x1a2a, 0x2c5a, 0x3e12, 0x0872,
- 0x0842, 0x2c2a, 0x3e5a, 0x5012, 0x1a72,
- 0x1a42, 0x3e2a, 0x505a, 0x0812, 0x2c72,
- 0x2c42, 0x502a, 0x085a, 0x1a12, 0x3e72,
- 0x3e44, 0x082c, 0x1a5c, 0x2c14, 0x5074,
- 0x5044, 0x1a2c, 0x2c5c, 0x3e14, 0x0874,
- 0x0844, 0x2c2c, 0x3e5c, 0x5014, 0x1a74,
- 0x1a44, 0x3e2c, 0x505c, 0x0814, 0x2c74,
- 0x2c44, 0x502c, 0x085c, 0x1a14, 0x3e74,
- 0x3e46, 0x082e, 0x1a5e, 0x2c16, 0x5076,
- 0x5046, 0x1a2e, 0x2c5e, 0x3e16, 0x0876,
- 0x0846, 0x2c2e, 0x3e5e, 0x5016, 0x1a76,
- 0x1a46, 0x3e2e, 0x505e, 0x0816, 0x2c76,
- 0x2c46, 0x502e, 0x085e, 0x1a16, 0x3e76,
- 0x403c, 0x0a24, 0x1c54, 0x2e0c, 0x526c,
- 0x523c, 0x1c24, 0x2e54, 0x400c, 0x0a6c,
- 0x0a3c, 0x2e24, 0x4054, 0x520c, 0x1c6c,
- 0x1c3c, 0x4024, 0x5254, 0x0a0c, 0x2e6c,
- 0x2e3c, 0x5224, 0x0a54, 0x1c0c, 0x406c,
- 0x403e, 0x0a26, 0x1c56, 0x2e0e, 0x526e,
- 0x523e, 0x1c26, 0x2e56, 0x400e, 0x0a6e,
- 0x0a3e, 0x2e26, 0x4056, 0x520e, 0x1c6e,
- 0x1c3e, 0x4026, 0x5256, 0x0a0e, 0x2e6e,
- 0x2e3e, 0x5226, 0x0a56, 0x1c0e, 0x406e,
- 0x4040, 0x0a28, 0x1c58, 0x2e10, 0x5270,
- 0x5240, 0x1c28, 0x2e58, 0x4010, 0x0a70,
- 0x0a40, 0x2e28, 0x4058, 0x5210, 0x1c70,
- 0x1c40, 0x4028, 0x5258, 0x0a10, 0x2e70,
- 0x2e40, 0x5228, 0x0a58, 0x1c10, 0x4070,
- 0x4042, 0x0a2a, 0x1c5a, 0x2e12, 0x5272,
- 0x5242, 0x1c2a, 0x2e5a, 0x4012, 0x0a72,
- 0x0a42, 0x2e2a, 0x405a, 0x5212, 0x1c72,
- 0x1c42, 0x402a, 0x525a, 0x0a12, 0x2e72,
- 0x2e42, 0x522a, 0x0a5a, 0x1c12, 0x4072,
- 0x4044, 0x0a2c, 0x1c5c, 0x2e14, 0x5274,
- 0x5244, 0x1c2c, 0x2e5c, 0x4014, 0x0a74,
- 0x0a44, 0x2e2c, 0x405c, 0x5214, 0x1c74,
- 0x1c44, 0x402c, 0x525c, 0x0a14, 0x2e74,
- 0x2e44, 0x522c, 0x0a5c, 0x1c14, 0x4074,
- 0x4046, 0x0a2e, 0x1c5e, 0x2e16, 0x5276,
- 0x5246, 0x1c2e, 0x2e5e, 0x4016, 0x0a76,
- 0x0a46, 0x2e2e, 0x405e, 0x5216, 0x1c76,
- 0x1c46, 0x402e, 0x525e, 0x0a16, 0x2e76,
- 0x2e46, 0x522e, 0x0a5e, 0x1c16, 0x4076,
- 0x423c, 0x0c24, 0x1e54, 0x300c, 0x546c,
- 0x543c, 0x1e24, 0x3054, 0x420c, 0x0c6c,
- 0x0c3c, 0x3024, 0x4254, 0x540c, 0x1e6c,
- 0x1e3c, 0x4224, 0x5454, 0x0c0c, 0x306c,
- 0x303c, 0x5424, 0x0c54, 0x1e0c, 0x426c,
- 0x423e, 0x0c26, 0x1e56, 0x300e, 0x546e,
- 0x543e, 0x1e26, 0x3056, 0x420e, 0x0c6e,
- 0x0c3e, 0x3026, 0x4256, 0x540e, 0x1e6e,
- 0x1e3e, 0x4226, 0x5456, 0x0c0e, 0x306e,
- 0x303e, 0x5426, 0x0c56, 0x1e0e, 0x426e,
- 0x4240, 0x0c28, 0x1e58, 0x3010, 0x5470,
- 0x5440, 0x1e28, 0x3058, 0x4210, 0x0c70,
- 0x0c40, 0x3028, 0x4258, 0x5410, 0x1e70,
- 0x1e40, 0x4228, 0x5458, 0x0c10, 0x3070,
- 0x3040, 0x5428, 0x0c58, 0x1e10, 0x4270,
- 0x4242, 0x0c2a, 0x1e5a, 0x3012, 0x5472,
- 0x5442, 0x1e2a, 0x305a, 0x4212, 0x0c72,
- 0x0c42, 0x302a, 0x425a, 0x5412, 0x1e72,
- 0x1e42, 0x422a, 0x545a, 0x0c12, 0x3072,
- 0x3042, 0x542a, 0x0c5a, 0x1e12, 0x4272,
- 0x4244, 0x0c2c, 0x1e5c, 0x3014, 0x5474,
- 0x5444, 0x1e2c, 0x305c, 0x4214, 0x0c74,
- 0x0c44, 0x302c, 0x425c, 0x5414, 0x1e74,
- 0x1e44, 0x422c, 0x545c, 0x0c14, 0x3074,
- 0x3044, 0x542c, 0x0c5c, 0x1e14, 0x4274,
- 0x4246, 0x0c2e, 0x1e5e, 0x3016, 0x5476,
- 0x5446, 0x1e2e, 0x305e, 0x4216, 0x0c76,
- 0x0c46, 0x302e, 0x425e, 0x5416, 0x1e76,
- 0x1e46, 0x422e, 0x545e, 0x0c16, 0x3076,
- 0x3046, 0x542e, 0x0c5e, 0x1e16, 0x4276,
- 0x443c, 0x0e24, 0x2054, 0x320c, 0x566c,
- 0x563c, 0x2024, 0x3254, 0x440c, 0x0e6c,
- 0x0e3c, 0x3224, 0x4454, 0x560c, 0x206c,
- 0x203c, 0x4424, 0x5654, 0x0e0c, 0x326c,
- 0x323c, 0x5624, 0x0e54, 0x200c, 0x446c,
- 0x443e, 0x0e26, 0x2056, 0x320e, 0x566e,
- 0x563e, 0x2026, 0x3256, 0x440e, 0x0e6e,
- 0x0e3e, 0x3226, 0x4456, 0x560e, 0x206e,
- 0x203e, 0x4426, 0x5656, 0x0e0e, 0x326e,
- 0x323e, 0x5626, 0x0e56, 0x200e, 0x446e,
- 0x4440, 0x0e28, 0x2058, 0x3210, 0x5670,
- 0x5640, 0x2028, 0x3258, 0x4410, 0x0e70,
- 0x0e40, 0x3228, 0x4458, 0x5610, 0x2070,
- 0x2040, 0x4428, 0x5658, 0x0e10, 0x3270,
- 0x3240, 0x5628, 0x0e58, 0x2010, 0x4470,
- 0x4442, 0x0e2a, 0x205a, 0x3212, 0x5672,
- 0x5642, 0x202a, 0x325a, 0x4412, 0x0e72,
- 0x0e42, 0x322a, 0x445a, 0x5612, 0x2072,
- 0x2042, 0x442a, 0x565a, 0x0e12, 0x3272,
- 0x3242, 0x562a, 0x0e5a, 0x2012, 0x4472,
- 0x4444, 0x0e2c, 0x205c, 0x3214, 0x5674,
- 0x5644, 0x202c, 0x325c, 0x4414, 0x0e74,
- 0x0e44, 0x322c, 0x445c, 0x5614, 0x2074,
- 0x2044, 0x442c, 0x565c, 0x0e14, 0x3274,
- 0x3244, 0x562c, 0x0e5c, 0x2014, 0x4474,
- 0x4446, 0x0e2e, 0x205e, 0x3216, 0x5676,
- 0x5646, 0x202e, 0x325e, 0x4416, 0x0e76,
- 0x0e46, 0x322e, 0x445e, 0x5616, 0x2076,
- 0x2046, 0x442e, 0x565e, 0x0e16, 0x3276,
- 0x3246, 0x562e, 0x0e5e, 0x2016, 0x4476,
- 0x463c, 0x1024, 0x2254, 0x340c, 0x586c,
- 0x583c, 0x2224, 0x3454, 0x460c, 0x106c,
- 0x103c, 0x3424, 0x4654, 0x580c, 0x226c,
- 0x223c, 0x4624, 0x5854, 0x100c, 0x346c,
- 0x343c, 0x5824, 0x1054, 0x220c, 0x466c,
- 0x463e, 0x1026, 0x2256, 0x340e, 0x586e,
- 0x583e, 0x2226, 0x3456, 0x460e, 0x106e,
- 0x103e, 0x3426, 0x4656, 0x580e, 0x226e,
- 0x223e, 0x4626, 0x5856, 0x100e, 0x346e,
- 0x343e, 0x5826, 0x1056, 0x220e, 0x466e,
- 0x4640, 0x1028, 0x2258, 0x3410, 0x5870,
- 0x5840, 0x2228, 0x3458, 0x4610, 0x1070,
- 0x1040, 0x3428, 0x4658, 0x5810, 0x2270,
- 0x2240, 0x4628, 0x5858, 0x1010, 0x3470,
- 0x3440, 0x5828, 0x1058, 0x2210, 0x4670,
- 0x4642, 0x102a, 0x225a, 0x3412, 0x5872,
- 0x5842, 0x222a, 0x345a, 0x4612, 0x1072,
- 0x1042, 0x342a, 0x465a, 0x5812, 0x2272,
- 0x2242, 0x462a, 0x585a, 0x1012, 0x3472,
- 0x3442, 0x582a, 0x105a, 0x2212, 0x4672,
- 0x4644, 0x102c, 0x225c, 0x3414, 0x5874,
- 0x5844, 0x222c, 0x345c, 0x4614, 0x1074,
- 0x1044, 0x342c, 0x465c, 0x5814, 0x2274,
- 0x2244, 0x462c, 0x585c, 0x1014, 0x3474,
- 0x3444, 0x582c, 0x105c, 0x2214, 0x4674,
- 0x4646, 0x102e, 0x225e, 0x3416, 0x5876,
- 0x5846, 0x222e, 0x345e, 0x4616, 0x1076,
- 0x1046, 0x342e, 0x465e, 0x5816, 0x2276,
- 0x2246, 0x462e, 0x585e, 0x1016, 0x3476,
- 0x3446, 0x582e, 0x105e, 0x2216, 0x4676,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
- 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
-};
-
-
/* DV25/50 DCT coefficient weights and inverse weights */
/* created by dvtables.py */
static const int dv_weight_bits = 18;
@@ -6175,6 +470,10 @@ static DVwork_chunk work_chunks_dv100ntscp[2*10*27];
static DVwork_chunk work_chunks_dv100pali [4*12*27];
static DVwork_chunk work_chunks_dv100ntsci[4*10*27];
+static uint32_t dv_idct_factor_sd [2*2*22*64];
+static uint32_t dv_idct_factor_hd1080[2*4*16*64];
+static uint32_t dv_idct_factor_hd720 [2*4*16*64];
+
static const DVprofile dv_profiles[] = {
{ .dsf = 0,
.video_stype = 0x0,
@@ -6186,8 +485,8 @@ static const DVprofile dv_profiles[] = {
.height = 480,
.width = 720,
.sar = {{10, 11}, {40, 33}},
- .video_place = dv_place_411,
.work_chunks = &work_chunks_dv25ntsc[0],
+ .idct_factor = &dv_idct_factor_sd[0],
.pix_fmt = PIX_FMT_YUV411P,
.bpm = 6,
.block_sizes = block_sizes_dv2550,
@@ -6206,8 +505,8 @@ static const DVprofile dv_profiles[] = {
.height = 576,
.width = 720,
.sar = {{59, 54}, {118, 81}},
- .video_place = dv_place_420,
.work_chunks = &work_chunks_dv25pal[0],
+ .idct_factor = &dv_idct_factor_sd[0],
.pix_fmt = PIX_FMT_YUV420P,
.bpm = 6,
.block_sizes = block_sizes_dv2550,
@@ -6226,8 +525,8 @@ static const DVprofile dv_profiles[] = {
.height = 576,
.width = 720,
.sar = {{59, 54}, {118, 81}},
- .video_place = dv_place_411P,
.work_chunks = &work_chunks_dv25pal411[0],
+ .idct_factor = &dv_idct_factor_sd[0],
.pix_fmt = PIX_FMT_YUV411P,
.bpm = 6,
.block_sizes = block_sizes_dv2550,
@@ -6246,8 +545,8 @@ static const DVprofile dv_profiles[] = {
.height = 480,
.width = 720,
.sar = {{10, 11}, {40, 33}},
- .video_place = dv_place_422_525,
.work_chunks = &work_chunks_dv50ntsc[0],
+ .idct_factor = &dv_idct_factor_sd[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 6,
.block_sizes = block_sizes_dv2550,
@@ -6266,8 +565,8 @@ static const DVprofile dv_profiles[] = {
.height = 576,
.width = 720,
.sar = {{59, 54}, {118, 81}},
- .video_place = dv_place_422_625,
.work_chunks = &work_chunks_dv50pal[0],
+ .idct_factor = &dv_idct_factor_sd[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 6,
.block_sizes = block_sizes_dv2550,
@@ -6286,8 +585,8 @@ static const DVprofile dv_profiles[] = {
.height = 1080,
.width = 1280,
.sar = {{1, 1}, {1, 1}},
- .video_place = dv_place_1080i60,
.work_chunks = &work_chunks_dv100ntsci[0],
+ .idct_factor = &dv_idct_factor_hd1080[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 8,
.block_sizes = block_sizes_dv100,
@@ -6306,8 +605,8 @@ static const DVprofile dv_profiles[] = {
.height = 1080,
.width = 1440,
.sar = {{1, 1}, {1, 1}},
- .video_place = dv_place_1080i50,
.work_chunks = &work_chunks_dv100pali[0],
+ .idct_factor = &dv_idct_factor_hd1080[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 8,
.block_sizes = block_sizes_dv100,
@@ -6326,8 +625,8 @@ static const DVprofile dv_profiles[] = {
.height = 720,
.width = 960,
.sar = {{1, 1}, {1, 1}},
- .video_place = dv_place_720p60,
.work_chunks = &work_chunks_dv100ntscp[0],
+ .idct_factor = &dv_idct_factor_hd720[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 8,
.block_sizes = block_sizes_dv100,
@@ -6346,8 +645,8 @@ static const DVprofile dv_profiles[] = {
.height = 720,
.width = 960,
.sar = {{1, 1}, {1, 1}},
- .video_place = dv_place_720p50,
.work_chunks = &work_chunks_dv100palp[0],
+ .idct_factor = &dv_idct_factor_hd720[0],
.pix_fmt = PIX_FMT_YUV422P,
.bpm = 8,
.block_sizes = block_sizes_dv100,
diff --git a/libavcodec/dvdsubdec.c b/libavcodec/dvdsubdec.c
index f95c329..dac152b 100644
--- a/libavcodec/dvdsubdec.c
+++ b/libavcodec/dvdsubdec.c
@@ -319,17 +319,19 @@ static int decode_dvd_subtitles(AVSubtitle *sub_header,
if (w > 0 && h > 0) {
if (sub_header->rects != NULL) {
for (i = 0; i < sub_header->num_rects; i++) {
- av_free(sub_header->rects[i].bitmap);
- av_free(sub_header->rects[i].rgba_palette);
+ av_freep(&sub_header->rects[i]->pict.data[0]);
+ av_freep(&sub_header->rects[i]->pict.data[1]);
+ av_freep(&sub_header->rects[i]);
}
av_freep(&sub_header->rects);
sub_header->num_rects = 0;
}
bitmap = av_malloc(w * h);
- sub_header->rects = av_mallocz(sizeof(AVSubtitleRect));
+ sub_header->rects = av_mallocz(sizeof(*sub_header->rects));
+ sub_header->rects[0] = av_mallocz(sizeof(AVSubtitleRect));
sub_header->num_rects = 1;
- sub_header->rects[0].bitmap = bitmap;
+ sub_header->rects[0]->pict.data[0] = bitmap;
decode_rle(bitmap, w * 2, w, (h + 1) / 2,
buf, offset1, buf_size, is_8bit);
decode_rle(bitmap + w, w * 2, w, h / 2,
@@ -337,20 +339,20 @@ static int decode_dvd_subtitles(AVSubtitle *sub_header,
if (is_8bit) {
if (yuv_palette == 0)
goto fail;
- sub_header->rects[0].rgba_palette = av_malloc(256 * 4);
- sub_header->rects[0].nb_colors = 256;
- yuv_a_to_rgba(yuv_palette, alpha, sub_header->rects[0].rgba_palette, 256);
+ sub_header->rects[0]->pict.data[1] = av_malloc(256 * 4);
+ sub_header->rects[0]->nb_colors = 256;
+ yuv_a_to_rgba(yuv_palette, alpha, (uint32_t*)sub_header->rects[0]->pict.data[1], 256);
} else {
- sub_header->rects[0].rgba_palette = av_malloc(4 * 4);
- sub_header->rects[0].nb_colors = 4;
- guess_palette(sub_header->rects[0].rgba_palette,
+ sub_header->rects[0]->pict.data[1] = av_malloc(4 * 4);
+ sub_header->rects[0]->nb_colors = 4;
+ guess_palette((uint32_t*)sub_header->rects[0]->pict.data[1],
colormap, alpha, 0xffff00);
}
- sub_header->rects[0].x = x1;
- sub_header->rects[0].y = y1;
- sub_header->rects[0].w = w;
- sub_header->rects[0].h = h;
- sub_header->rects[0].linesize = w;
+ sub_header->rects[0]->x = x1;
+ sub_header->rects[0]->y = y1;
+ sub_header->rects[0]->w = w;
+ sub_header->rects[0]->h = h;
+ sub_header->rects[0]->pict.linesize[0] = w;
}
}
if (next_cmd_pos == cmd_pos)
@@ -362,8 +364,9 @@ static int decode_dvd_subtitles(AVSubtitle *sub_header,
fail:
if (sub_header->rects != NULL) {
for (i = 0; i < sub_header->num_rects; i++) {
- av_free(sub_header->rects[i].bitmap);
- av_free(sub_header->rects[i].rgba_palette);
+ av_freep(&sub_header->rects[i]->pict.data[0]);
+ av_freep(&sub_header->rects[i]->pict.data[1]);
+ av_freep(&sub_header->rects[i]);
}
av_freep(&sub_header->rects);
sub_header->num_rects = 0;
@@ -390,34 +393,34 @@ static int find_smallest_bounding_rectangle(AVSubtitle *s)
int y1, y2, x1, x2, y, w, h, i;
uint8_t *bitmap;
- if (s->num_rects == 0 || s->rects == NULL || s->rects[0].w <= 0 || s->rects[0].h <= 0)
+ if (s->num_rects == 0 || s->rects == NULL || s->rects[0]->w <= 0 || s->rects[0]->h <= 0)
return 0;
memset(transp_color, 0, 256);
- for(i = 0; i < s->rects[0].nb_colors; i++) {
- if ((s->rects[0].rgba_palette[i] >> 24) == 0)
+ for(i = 0; i < s->rects[0]->nb_colors; i++) {
+ if ((((uint32_t*)s->rects[0]->pict.data[1])[i] >> 24) == 0)
transp_color[i] = 1;
}
y1 = 0;
- while (y1 < s->rects[0].h && is_transp(s->rects[0].bitmap + y1 * s->rects[0].linesize,
- 1, s->rects[0].w, transp_color))
+ while (y1 < s->rects[0]->h && is_transp(s->rects[0]->pict.data[0] + y1 * s->rects[0]->pict.linesize[0],
+ 1, s->rects[0]->w, transp_color))
y1++;
- if (y1 == s->rects[0].h) {
- av_freep(&s->rects[0].bitmap);
- s->rects[0].w = s->rects[0].h = 0;
+ if (y1 == s->rects[0]->h) {
+ av_freep(&s->rects[0]->pict.data[0]);
+ s->rects[0]->w = s->rects[0]->h = 0;
return 0;
}
- y2 = s->rects[0].h - 1;
- while (y2 > 0 && is_transp(s->rects[0].bitmap + y2 * s->rects[0].linesize, 1,
- s->rects[0].w, transp_color))
+ y2 = s->rects[0]->h - 1;
+ while (y2 > 0 && is_transp(s->rects[0]->pict.data[0] + y2 * s->rects[0]->pict.linesize[0], 1,
+ s->rects[0]->w, transp_color))
y2--;
x1 = 0;
- while (x1 < (s->rects[0].w - 1) && is_transp(s->rects[0].bitmap + x1, s->rects[0].linesize,
- s->rects[0].h, transp_color))
+ while (x1 < (s->rects[0]->w - 1) && is_transp(s->rects[0]->pict.data[0] + x1, s->rects[0]->pict.linesize[0],
+ s->rects[0]->h, transp_color))
x1++;
- x2 = s->rects[0].w - 1;
- while (x2 > 0 && is_transp(s->rects[0].bitmap + x2, s->rects[0].linesize, s->rects[0].h,
+ x2 = s->rects[0]->w - 1;
+ while (x2 > 0 && is_transp(s->rects[0]->pict.data[0] + x2, s->rects[0]->pict.linesize[0], s->rects[0]->h,
transp_color))
x2--;
w = x2 - x1 + 1;
@@ -426,15 +429,15 @@ static int find_smallest_bounding_rectangle(AVSubtitle *s)
if (!bitmap)
return 1;
for(y = 0; y < h; y++) {
- memcpy(bitmap + w * y, s->rects[0].bitmap + x1 + (y1 + y) * s->rects[0].linesize, w);
+ memcpy(bitmap + w * y, s->rects[0]->pict.data[0] + x1 + (y1 + y) * s->rects[0]->pict.linesize[0], w);
}
- av_freep(&s->rects[0].bitmap);
- s->rects[0].bitmap = bitmap;
- s->rects[0].linesize = w;
- s->rects[0].w = w;
- s->rects[0].h = h;
- s->rects[0].x += x1;
- s->rects[0].y += y1;
+ av_freep(&s->rects[0]->pict.data[0]);
+ s->rects[0]->pict.data[0] = bitmap;
+ s->rects[0]->pict.linesize[0] = w;
+ s->rects[0]->w = w;
+ s->rects[0]->h = h;
+ s->rects[0]->x += x1;
+ s->rects[0]->y += y1;
return 1;
}
@@ -491,8 +494,8 @@ static int dvdsub_decode(AVCodecContext *avctx,
av_log(NULL, AV_LOG_INFO, "start=%d ms end =%d ms\n",
sub->start_display_time,
sub->end_display_time);
- ppm_save("/tmp/a.ppm", sub->rects[0].bitmap,
- sub->rects[0].w, sub->rects[0].h, sub->rects[0].rgba_palette);
+ ppm_save("/tmp/a.ppm", sub->rects[0]->pict.data[0],
+ sub->rects[0]->w, sub->rects[0]->h, sub->rects[0]->pict.data[1]);
#endif
*data_size = 1;
diff --git a/libavcodec/dvdsubenc.c b/libavcodec/dvdsubenc.c
index a5380ce..5f6bc21 100644
--- a/libavcodec/dvdsubenc.c
+++ b/libavcodec/dvdsubenc.c
@@ -108,10 +108,10 @@ static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
cmap[i] = 0;
}
for (object_id = 0; object_id < rects; object_id++)
- for (i=0; i<h->rects[object_id].w*h->rects[object_id].h; ++i) {
- color = h->rects[object_id].bitmap[i];
+ for (i=0; i<h->rects[object_id]->w*h->rects[object_id]->h; ++i) {
+ color = h->rects[object_id]->pict.data[0][i];
// only count non-transparent pixels
- alpha = h->rects[object_id].rgba_palette[color] >> 24;
+ alpha = ((uint32_t*)h->rects[object_id]->pict.data[1])[color] >> 24;
hist[color] += alpha;
}
for (color=3;; --color) {
@@ -138,19 +138,19 @@ static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
for (object_id = 0; object_id < rects; object_id++) {
offset1[object_id] = q - outbuf;
// worst case memory requirement: 1 nibble per pixel..
- if ((q - outbuf) + h->rects[object_id].w*h->rects[object_id].h/2
+ if ((q - outbuf) + h->rects[object_id]->w*h->rects[object_id]->h/2
+ 17*rects + 21 > outbuf_size) {
av_log(NULL, AV_LOG_ERROR, "dvd_subtitle too big\n");
return -1;
}
- dvd_encode_rle(&q, h->rects[object_id].bitmap,
- h->rects[object_id].w*2,
- h->rects[object_id].w, h->rects[object_id].h >> 1,
+ dvd_encode_rle(&q, h->rects[object_id]->pict.data[0],
+ h->rects[object_id]->w*2,
+ h->rects[object_id]->w, h->rects[object_id]->h >> 1,
cmap);
offset2[object_id] = q - outbuf;
- dvd_encode_rle(&q, h->rects[object_id].bitmap + h->rects[object_id].w,
- h->rects[object_id].w*2,
- h->rects[object_id].w, h->rects[object_id].h >> 1,
+ dvd_encode_rle(&q, h->rects[object_id]->pict.data[0] + h->rects[object_id]->w,
+ h->rects[object_id]->w*2,
+ h->rects[object_id]->w, h->rects[object_id]->h >> 1,
cmap);
}
@@ -170,17 +170,17 @@ static int encode_dvd_subtitles(uint8_t *outbuf, int outbuf_size,
// XXX not sure if more than one rect can really be encoded..
// 12 bytes per rect
for (object_id = 0; object_id < rects; object_id++) {
- int x2 = h->rects[object_id].x + h->rects[object_id].w - 1;
- int y2 = h->rects[object_id].y + h->rects[object_id].h - 1;
+ int x2 = h->rects[object_id]->x + h->rects[object_id]->w - 1;
+ int y2 = h->rects[object_id]->y + h->rects[object_id]->h - 1;
*q++ = 0x05;
// x1 x2 -> 6 nibbles
- *q++ = h->rects[object_id].x >> 4;
- *q++ = (h->rects[object_id].x << 4) | ((x2 >> 8) & 0xf);
+ *q++ = h->rects[object_id]->x >> 4;
+ *q++ = (h->rects[object_id]->x << 4) | ((x2 >> 8) & 0xf);
*q++ = x2;
// y1 y2 -> 6 nibbles
- *q++ = h->rects[object_id].y >> 4;
- *q++ = (h->rects[object_id].y << 4) | ((y2 >> 8) & 0xf);
+ *q++ = h->rects[object_id]->y >> 4;
+ *q++ = (h->rects[object_id]->y << 4) | ((y2 >> 8) & 0xf);
*q++ = y2;
*q++ = 0x06;
diff --git a/libavcodec/eac3dec.c b/libavcodec/eac3dec.c
index 0b10b41..f57c1cc 100644
--- a/libavcodec/eac3dec.c
+++ b/libavcodec/eac3dec.c
@@ -21,6 +21,8 @@
*/
#include "avcodec.h"
+#include "internal.h"
+#include "aac_ac3_parser.h"
#include "ac3.h"
#include "ac3_parser.h"
#include "ac3dec.h"
@@ -182,11 +184,11 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
application can select from. each independent stream can also contain
dependent streams which are used to add or replace channels. */
if (s->frame_type == EAC3_FRAME_TYPE_DEPENDENT) {
- av_log_missing_feature(s->avctx, "Dependent substream decoding", 1);
- return AC3_PARSE_ERROR_FRAME_TYPE;
+ ff_log_missing_feature(s->avctx, "Dependent substream decoding", 1);
+ return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
} else if (s->frame_type == EAC3_FRAME_TYPE_RESERVED) {
av_log(s->avctx, AV_LOG_ERROR, "Reserved frame type\n");
- return AC3_PARSE_ERROR_FRAME_TYPE;
+ return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
}
/* The substream id indicates which substream this frame belongs to. each
@@ -194,8 +196,8 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
associated to an independent stream have matching substream id's. */
if (s->substreamid) {
/* only decode substream with id=0. skip any additional substreams. */
- av_log_missing_feature(s->avctx, "Additional substreams", 1);
- return AC3_PARSE_ERROR_FRAME_TYPE;
+ ff_log_missing_feature(s->avctx, "Additional substreams", 1);
+ return AAC_AC3_PARSE_ERROR_FRAME_TYPE;
}
if (s->bit_alloc_params.sr_code == EAC3_SR_CODE_REDUCED) {
@@ -203,7 +205,7 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
rates in bit allocation. The best assumption would be that it is
handled like AC-3 DolbyNet, but we cannot be sure until we have a
sample which utilizes this feature. */
- av_log_missing_feature(s->avctx, "Reduced sampling rates", 1);
+ ff_log_missing_feature(s->avctx, "Reduced sampling rates", 1);
return -1;
}
skip_bits(gbc, 5); // skip bitstream id
@@ -460,7 +462,7 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
/* spectral extension attenuation data */
if (parse_spx_atten_data) {
- av_log_missing_feature(s->avctx, "Spectral extension attenuation", 1);
+ ff_log_missing_feature(s->avctx, "Spectral extension attenuation", 1);
for (ch = 1; ch <= s->fbw_channels; ch++) {
if (get_bits1(gbc)) { // channel has spx attenuation
skip_bits(gbc, 5); // skip spx attenuation code
@@ -475,7 +477,8 @@ int ff_eac3_parse_header(AC3DecodeContext *s)
The spec does not say what this data is or what it's used for.
It is likely the offset of each block within the frame. */
int block_start_bits = (s->num_blocks-1) * (4 + av_log2(s->frame_size-2));
- skip_bits(gbc, block_start_bits);
+ skip_bits_long(gbc, block_start_bits);
+ ff_log_missing_feature(s->avctx, "Block start info", 1);
}
/* syntax state initialization */
diff --git a/libavcodec/eatgq.c b/libavcodec/eatgq.c
index b65c8a5..fa8faf4 100644
--- a/libavcodec/eatgq.c
+++ b/libavcodec/eatgq.c
@@ -33,7 +33,7 @@
#include "bitstream.h"
#include "bytestream.h"
#include "dsputil.h"
-extern const uint16_t ff_inv_aanscales[64]; //mpegvideo_enc.c
+#include "aandcttab.h"
typedef struct TgqContext {
AVCodecContext *avctx;
diff --git a/libavcodec/error_resilience.c b/libavcodec/error_resilience.c
index f15b0b6..bdd3a66 100644
--- a/libavcodec/error_resilience.c
+++ b/libavcodec/error_resilience.c
@@ -563,7 +563,7 @@ static int is_intra_more_likely(MpegEncContext *s){
if(undamaged_count < 5) return 0; //almost all MBs damaged -> use temporal prediction
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
//prevent dsp.sad() check, that requires access to the image
if(s->avctx->xvmc_acceleration && s->pict_type==FF_I_TYPE) return 1;
#endif
@@ -681,6 +681,7 @@ void ff_er_frame_end(MpegEncContext *s){
Picture *pic= s->current_picture_ptr;
if(!s->error_recognition || s->error_count==0 || s->avctx->lowres ||
+ s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU ||
s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return;
if(s->current_picture.motion_val[0] == NULL){
@@ -934,7 +935,7 @@ void ff_er_frame_end(MpegEncContext *s){
}else
guess_mv(s);
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
/* the filters below are not XvMC compatible, skip them */
if(s->avctx->xvmc_acceleration) goto ec_clean;
#endif
@@ -1023,7 +1024,7 @@ void ff_er_frame_end(MpegEncContext *s){
v_block_filter(s, s->current_picture.data[2], s->mb_width , s->mb_height , s->uvlinesize, 0);
}
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
ec_clean:
#endif
/* clean a few tables */
diff --git a/libavcodec/eval.c b/libavcodec/eval.c
index be393a6..fce6da5 100644
--- a/libavcodec/eval.c
+++ b/libavcodec/eval.c
@@ -47,12 +47,12 @@
typedef struct Parser{
int stack_index;
char *s;
- double *const_value;
- const char **const_name; // NULL terminated
+ const double *const_value;
+ const char * const *const_name; // NULL terminated
double (**func1)(void *, double a); // NULL terminated
const char **func1_name; // NULL terminated
double (**func2)(void *, double a, double b); // NULL terminated
- char **func2_name; // NULL terminated
+ const char **func2_name; // NULL terminated
void *opaque;
const char **error;
#define VARS 10
@@ -375,9 +375,9 @@ static int verify_expr(AVEvalExpr * e) {
}
}
-AVEvalExpr * ff_parse(const char *s, const char **const_name,
+AVEvalExpr * ff_parse(const char *s, const char * const *const_name,
double (**func1)(void *, double), const char **func1_name,
- double (**func2)(void *, double, double), char **func2_name,
+ double (**func2)(void *, double, double), const char **func2_name,
const char **error){
Parser p;
AVEvalExpr * e;
@@ -404,7 +404,7 @@ AVEvalExpr * ff_parse(const char *s, const char **const_name,
return e;
}
-double ff_parse_eval(AVEvalExpr * e, double *const_value, void *opaque) {
+double ff_parse_eval(AVEvalExpr * e, const double *const_value, void *opaque) {
Parser p;
p.const_value= const_value;
@@ -412,9 +412,9 @@ double ff_parse_eval(AVEvalExpr * e, double *const_value, void *opaque) {
return eval_expr(&p, e);
}
-double ff_eval2(const char *s, double *const_value, const char **const_name,
+double ff_eval2(const char *s, const double *const_value, const char * const *const_name,
double (**func1)(void *, double), const char **func1_name,
- double (**func2)(void *, double, double), char **func2_name,
+ double (**func2)(void *, double, double), const char **func2_name,
void *opaque, const char **error){
AVEvalExpr * e = ff_parse(s, const_name, func1, func1_name, func2, func2_name, error);
double d;
diff --git a/libavcodec/eval.h b/libavcodec/eval.h
index 0918fc9..c450332 100644
--- a/libavcodec/eval.h
+++ b/libavcodec/eval.h
@@ -42,9 +42,9 @@
* @param opaque a pointer which will be passed to all functions from func1 and func2
* @return the value of the expression
*/
-double ff_eval2(const char *s, double *const_value, const char **const_name,
+double ff_eval2(const char *s, const double *const_value, const char * const *const_name,
double (**func1)(void *, double), const char **func1_name,
- double (**func2)(void *, double, double), char **func2_name,
+ double (**func2)(void *, double, double), const char **func2_name,
void *opaque, const char **error);
typedef struct ff_expr_s AVEvalExpr;
@@ -61,9 +61,9 @@ typedef struct ff_expr_s AVEvalExpr;
* @return AVEvalExpr which must be freed with ff_eval_free by the user when it is not needed anymore
* NULL if anything went wrong
*/
-AVEvalExpr * ff_parse(const char *s, const char **const_name,
+AVEvalExpr * ff_parse(const char *s, const char * const *const_name,
double (**func1)(void *, double), const char **func1_name,
- double (**func2)(void *, double, double), char **func2_name,
+ double (**func2)(void *, double, double), const char **func2_name,
const char **error);
/**
* Evaluates a previously parsed expression.
@@ -71,7 +71,7 @@ AVEvalExpr * ff_parse(const char *s, const char **const_name,
* @param opaque a pointer which will be passed to all functions from func1 and func2
* @return the value of the expression
*/
-double ff_parse_eval(AVEvalExpr * e, double *const_value, void *opaque);
+double ff_parse_eval(AVEvalExpr * e, const double *const_value, void *opaque);
void ff_eval_free(AVEvalExpr * e);
#endif /* AVCODEC_EVAL_H */
diff --git a/libavcodec/faxcompr.c b/libavcodec/faxcompr.c
new file mode 100644
index 0000000..e7f7706
--- /dev/null
+++ b/libavcodec/faxcompr.c
@@ -0,0 +1,313 @@
+/*
+ * CCITT Fax Group 3 and 4 decompression
+ * Copyright (c) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * CCITT Fax Group 3 and 4 decompression
+ * @file faxcompr.c
+ * @author Konstantin Shishkov
+ */
+#include "avcodec.h"
+#include "bitstream.h"
+#include "faxcompr.h"
+
+#define CCITT_SYMS 104
+
+static const uint16_t ccitt_syms[CCITT_SYMS] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
+ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
+ 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
+ 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
+ 128, 192, 256, 320, 384, 448, 512, 576, 640, 704, 768, 832, 896,
+ 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728,
+ 1792, 1856, 1920, 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560
+};
+
+static const uint8_t ccitt_codes_bits[2][CCITT_SYMS] =
+{
+ {
+ 0x35, 0x07, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, 0x13, 0x14, 0x07, 0x08, 0x08,
+ 0x03, 0x34, 0x35, 0x2A, 0x2B, 0x27, 0x0C, 0x08, 0x17, 0x03, 0x04, 0x28, 0x2B,
+ 0x13, 0x24, 0x18, 0x02, 0x03, 0x1A, 0x1B, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x04, 0x05, 0x0A, 0x0B, 0x52, 0x53, 0x54,
+ 0x55, 0x24, 0x25, 0x58, 0x59, 0x5A, 0x5B, 0x4A, 0x4B, 0x32, 0x33, 0x34, 0x1B,
+ 0x12, 0x17, 0x37, 0x36, 0x37, 0x64, 0x65, 0x68, 0x67, 0xCC, 0xCD, 0xD2, 0xD3,
+ 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0x98, 0x99, 0x9A, 0x18, 0x9B,
+ 0x08, 0x0C, 0x0D, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F
+ },
+ {
+ 0x37, 0x02, 0x03, 0x02, 0x03, 0x03, 0x02, 0x03, 0x05, 0x04, 0x04, 0x05, 0x07,
+ 0x04, 0x07, 0x18, 0x17, 0x18, 0x08, 0x67, 0x68, 0x6C, 0x37, 0x28, 0x17, 0x18,
+ 0xCA, 0xCB, 0xCC, 0xCD, 0x68, 0x69, 0x6A, 0x6B, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
+ 0xD7, 0x6C, 0x6D, 0xDA, 0xDB, 0x54, 0x55, 0x56, 0x57, 0x64, 0x65, 0x52, 0x53,
+ 0x24, 0x37, 0x38, 0x27, 0x28, 0x58, 0x59, 0x2B, 0x2C, 0x5A, 0x66, 0x67, 0x0F,
+ 0xC8, 0xC9, 0x5B, 0x33, 0x34, 0x35, 0x6C, 0x6D, 0x4A, 0x4B, 0x4C, 0x4D, 0x72,
+ 0x73, 0x74, 0x75, 0x76, 0x77, 0x52, 0x53, 0x54, 0x55, 0x5A, 0x5B, 0x64, 0x65,
+ 0x08, 0x0C, 0x0D, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F
+ }
+};
+
+static const uint8_t ccitt_codes_lens[2][CCITT_SYMS] =
+{
+ {
+ 8, 6, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7,
+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+ 8, 8, 8, 8, 5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9,
+ 9, 9, 9, 9, 9, 9, 9, 9, 9, 6, 9, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12
+ },
+ {
+ 10, 3, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 7, 8, 8, 9, 10, 10, 10, 11,
+ 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12, 10, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+ 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 11, 11, 11, 12, 12, 12, 12, 12, 12,
+ 12, 12, 12, 12
+ }
+};
+
+static const uint8_t ccitt_group3_2d_bits[11] = {
+ 1, 1, 2, 2, 2, 1, 3, 3, 3, 1, 1
+};
+
+static const uint8_t ccitt_group3_2d_lens[11] = {
+ 4, 3, 7, 6, 3, 1, 3, 6, 7, 7, 9
+};
+
+static VLC ccitt_vlc[2], ccitt_group3_2d_vlc;
+
+av_cold void ff_ccitt_unpack_init()
+{
+ static VLC_TYPE code_table1[528][2];
+ static VLC_TYPE code_table2[648][2];
+ int i;
+ static int initialized = 0;
+
+ if(initialized)
+ return;
+ ccitt_vlc[0].table = code_table1;
+ ccitt_vlc[0].table_allocated = 528;
+ ccitt_vlc[1].table = code_table2;
+ ccitt_vlc[1].table_allocated = 648;
+ for(i = 0; i < 2; i++){
+ init_vlc_sparse(&ccitt_vlc[i], 9, CCITT_SYMS,
+ ccitt_codes_lens[i], 1, 1,
+ ccitt_codes_bits[i], 1, 1,
+ ccitt_syms, 2, 2,
+ INIT_VLC_USE_NEW_STATIC);
+ }
+ INIT_VLC_STATIC(&ccitt_group3_2d_vlc, 9, 11,
+ ccitt_group3_2d_lens, 1, 1,
+ ccitt_group3_2d_bits, 1, 1, 512);
+ initialized = 1;
+}
+
+
+static int decode_group3_1d_line(AVCodecContext *avctx, GetBitContext *gb,
+ unsigned int pix_left, int *runs, const int *runend)
+{
+ int mode = 0;
+ unsigned int run=0;
+ unsigned int t;
+ for(;;){
+ t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
+ run += t;
+ if(t < 64){
+ *runs++ = run;
+ if(runs >= runend){
+ av_log(avctx, AV_LOG_ERROR, "Run overrun\n");
+ return -1;
+ }
+ if(pix_left <= run){
+ if(pix_left == run)
+ break;
+ av_log(avctx, AV_LOG_ERROR, "Run went out of bounds\n");
+ return -1;
+ }
+ pix_left -= run;
+ run = 0;
+ mode = !mode;
+ }else if((int)t == -1){
+ av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
+ return -1;
+ }
+ }
+ *runs++ = 0;
+ return 0;
+}
+
+static int decode_group3_2d_line(AVCodecContext *avctx, GetBitContext *gb,
+ unsigned int width, int *runs, const int *runend, const int *ref)
+{
+ int mode = 0, saved_run = 0, t;
+ int run_off = *ref++;
+ unsigned int offs=0, run= 0;
+
+ runend--; // for the last written 0
+
+ while(offs < width){
+ int cmode = get_vlc2(gb, ccitt_group3_2d_vlc.table, 9, 1);
+ if(cmode == -1){
+ av_log(avctx, AV_LOG_ERROR, "Incorrect mode VLC\n");
+ return -1;
+ }
+ if(!cmode){//pass mode
+ run_off += *ref++;
+ run = run_off - offs;
+ offs= run_off;
+ run_off += *ref++;
+ if(offs > width){
+ av_log(avctx, AV_LOG_ERROR, "Run went out of bounds\n");
+ return -1;
+ }
+ saved_run += run;
+ }else if(cmode == 1){//horizontal mode
+ int k;
+ for(k = 0; k < 2; k++){
+ run = 0;
+ for(;;){
+ t = get_vlc2(gb, ccitt_vlc[mode].table, 9, 2);
+ if(t == -1){
+ av_log(avctx, AV_LOG_ERROR, "Incorrect code\n");
+ return -1;
+ }
+ run += t;
+ if(t < 64)
+ break;
+ }
+ *runs++ = run + saved_run;
+ if(runs >= runend){
+ av_log(avctx, AV_LOG_ERROR, "Run overrun\n");
+ return -1;
+ }
+ saved_run = 0;
+ offs += run;
+ if(offs > width || run > width){
+ av_log(avctx, AV_LOG_ERROR, "Run went out of bounds\n");
+ return -1;
+ }
+ mode = !mode;
+ }
+ }else if(cmode == 9 || cmode == 10){
+ av_log(avctx, AV_LOG_ERROR, "Special modes are not supported (yet)\n");
+ return -1;
+ }else{//vertical mode
+ run = run_off - offs + (cmode - 5);
+ run_off -= *--ref;
+ offs += run;
+ if(offs > width || run > width){
+ av_log(avctx, AV_LOG_ERROR, "Run went out of bounds\n");
+ return -1;
+ }
+ *runs++ = run + saved_run;
+ if(runs >= runend){
+ av_log(avctx, AV_LOG_ERROR, "Run overrun\n");
+ return -1;
+ }
+ saved_run = 0;
+ mode = !mode;
+ }
+ //sync line pointers
+ while(run_off <= offs){
+ run_off += *ref++;
+ run_off += *ref++;
+ }
+ }
+ *runs++ = saved_run;
+ *runs++ = 0;
+ return 0;
+}
+
+static void put_line(uint8_t *dst, int size, int width, const int *runs)
+{
+ PutBitContext pb;
+ int run, mode = ~0, pix_left = width, run_idx = 0;
+
+ init_put_bits(&pb, dst, size*8);
+ while(pix_left > 0){
+ run = runs[run_idx++];
+ mode = ~mode;
+ pix_left -= run;
+ for(; run > 16; run -= 16)
+ put_sbits(&pb, 16, mode);
+ if(run)
+ put_sbits(&pb, run, mode);
+ }
+}
+
+static int find_group3_syncmarker(GetBitContext *gb, int srcsize)
+{
+ unsigned int state = -1;
+ srcsize -= get_bits_count(gb);
+ while(srcsize-- > 0){
+ state+= state + get_bits1(gb);
+ if((state & 0xFFF) == 1)
+ return 0;
+ }
+ return -1;
+}
+
+int ff_ccitt_unpack(AVCodecContext *avctx,
+ const uint8_t *src, int srcsize,
+ uint8_t *dst, int height, int stride, enum TiffCompr compr)
+{
+ int j;
+ GetBitContext gb;
+ int *runs, *ref, *runend;
+ int ret;
+ int runsize= avctx->width + 2;
+
+ runs = av_malloc(runsize * sizeof(runs[0]));
+ ref = av_malloc(runsize * sizeof(ref[0]));
+ ref[0] = avctx->width;
+ ref[1] = 0;
+ ref[2] = 0;
+ init_get_bits(&gb, src, srcsize*8);
+ for(j = 0; j < height; j++){
+ runend = runs + runsize;
+ if(compr == TIFF_G4){
+ ret = decode_group3_2d_line(avctx, &gb, avctx->width, runs, runend, ref);
+ if(ret < 0){
+ av_free(runs);
+ av_free(ref);
+ return -1;
+ }
+ }else{
+ if(find_group3_syncmarker(&gb, srcsize*8) < 0)
+ break;
+ if(compr==TIFF_CCITT_RLE || get_bits1(&gb))
+ ret = decode_group3_1d_line(avctx, &gb, avctx->width, runs, runend);
+ else
+ ret = decode_group3_2d_line(avctx, &gb, avctx->width, runs, runend, ref);
+ }
+ if(ret < 0){
+ put_line(dst, stride, avctx->width, ref);
+ }else{
+ put_line(dst, stride, avctx->width, runs);
+ FFSWAP(int*, runs, ref);
+ }
+ dst += stride;
+ }
+ av_free(runs);
+ av_free(ref);
+ return 0;
+}
diff --git a/libavcodec/faxcompr.h b/libavcodec/faxcompr.h
new file mode 100644
index 0000000..632744b
--- /dev/null
+++ b/libavcodec/faxcompr.h
@@ -0,0 +1,45 @@
+/*
+ * CCITT Fax Group 3 and 4 decompression
+ * Copyright (c) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * CCITT Fax Group 3 and 4 decompression
+ * @file faxcompr.h
+ * @author Konstantin Shishkov
+ */
+#ifndef AVCODEC_FAXCOMPR_H
+#define AVCODEC_FAXCOMPR_H
+
+#include "avcodec.h"
+#include "tiff.h"
+
+/**
+ * initialize upacker code
+ */
+void ff_ccitt_unpack_init();
+
+/**
+ * unpack data compressed with CCITT Group 3 1/2-D or Group 4 method
+ */
+int ff_ccitt_unpack(AVCodecContext *avctx,
+ const uint8_t *src, int srcsize,
+ uint8_t *dst, int height, int stride, enum TiffCompr compr);
+
+#endif /* AVCODEC_FAXCOMPR_H */
diff --git a/libavcodec/flashsv.c b/libavcodec/flashsv.c
index 3842594..abe1d87 100644
--- a/libavcodec/flashsv.c
+++ b/libavcodec/flashsv.c
@@ -211,7 +211,7 @@ static int flashsv_decode_frame(AVCodecContext *avctx,
/* return -1; */
}
copy_region(s->tmpblock, s->frame.data[0], s->image_height-(hp+hs+1), wp, hs, ws, s->frame.linesize[0]);
- skip_bits(&gb, 8*size); /* skip the consumed bits */
+ skip_bits_long(&gb, 8*size); /* skip the consumed bits */
}
}
}
diff --git a/libavcodec/fraps.c b/libavcodec/fraps.c
index e03c1fa..0c31e03 100644
--- a/libavcodec/fraps.c
+++ b/libavcodec/fraps.c
@@ -148,10 +148,10 @@ static int decode_frame(AVCodecContext *avctx,
version = header & 0xff;
header_size = (header & (1<<30))? 8 : 4; /* bit 30 means pad to 8 bytes */
- if (version > 2 && version != 4 && version != 5) {
+ if (version > 5) {
av_log(avctx, AV_LOG_ERROR,
"This file is encoded with Fraps version %d. " \
- "This codec can only decode version 0, 1, 2 and 4.\n", version);
+ "This codec can only decode versions <= 5.\n", version);
return -1;
}
@@ -288,6 +288,7 @@ static int decode_frame(AVCodecContext *avctx,
}
}
break;
+ case 3:
case 5:
/* Virtually the same as version 4, but is for RGB24 */
avctx->pix_fmt = PIX_FMT_BGR24;
diff --git a/libavcodec/golomb.c b/libavcodec/golomb.c
index 79dc0a7..51e0f9d 100644
--- a/libavcodec/golomb.c
+++ b/libavcodec/golomb.c
@@ -29,7 +29,7 @@
#include "libavutil/common.h"
const uint8_t ff_golomb_vlc_len[512]={
-0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+14,13,12,12,11,11,11,11,10,10,10,10,10,10,10,10,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
@@ -48,7 +48,7 @@ const uint8_t ff_golomb_vlc_len[512]={
};
const uint8_t ff_ue_golomb_vlc_code[512]={
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
+31,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
@@ -67,7 +67,7 @@ const uint8_t ff_ue_golomb_vlc_code[512]={
};
const int8_t ff_se_golomb_vlc_code[512]={
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, -8, 9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
+ 16, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 8, -8, 9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
4, 4, 4, 4, -4, -4, -4, -4, 5, 5, 5, 5, -5, -5, -5, -5, 6, 6, 6, 6, -6, -6, -6, -6, 7, 7, 7, 7, -7, -7, -7, -7,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3, -3,
diff --git a/libavcodec/golomb.h b/libavcodec/golomb.h
index 627fcdc..3c3f759 100644
--- a/libavcodec/golomb.h
+++ b/libavcodec/golomb.h
@@ -74,6 +74,23 @@ static inline int get_ue_golomb(GetBitContext *gb){
}
}
+ /**
+ * read unsigned exp golomb code, constraint to a max of 31
+ */
+static inline int get_ue_golomb_31(GetBitContext *gb){
+ unsigned int buf;
+
+ OPEN_READER(re, gb);
+ UPDATE_CACHE(re, gb);
+ buf=GET_CACHE(re, gb);
+
+ buf >>= 32 - 9;
+ LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
+ CLOSE_READER(re, gb);
+
+ return ff_ue_golomb_vlc_code[buf];
+}
+
static inline int svq3_get_ue_golomb(GetBitContext *gb){
uint32_t buf;
diff --git a/libavcodec/h263.c b/libavcodec/h263.c
index f72f440..781c007 100644
--- a/libavcodec/h263.c
+++ b/libavcodec/h263.c
@@ -810,7 +810,7 @@ static inline int get_p_cbp(MpegEncContext * s,
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1;
- memset(s->block[i], 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(s->block[i]);
}
}
}else{
@@ -853,7 +853,7 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
for (i = 0; i < 6; i++) {
if (s->block_last_index[i] >= 0 && ((cbp >> (5 - i))&1)==0 ){
s->block_last_index[i]= -1;
- memset(s->block[i], 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(s->block[i]);
}
}
}else{
@@ -1485,17 +1485,17 @@ void ff_h263_loop_filter(MpegEncContext * s){
qp_c= 0;
if(s->mb_y){
- int qp_dt, qp_t, qp_tc;
+ int qp_dt, qp_tt, qp_tc;
if(IS_SKIP(s->current_picture.mb_type[xy-s->mb_stride]))
- qp_t=0;
+ qp_tt=0;
else
- qp_t= s->current_picture.qscale_table[xy-s->mb_stride];
+ qp_tt= s->current_picture.qscale_table[xy-s->mb_stride];
if(qp_c)
qp_tc= qp_c;
else
- qp_tc= qp_t;
+ qp_tc= qp_tt;
if(qp_tc){
const int chroma_qp= s->chroma_qscale_table[qp_tc];
@@ -1506,12 +1506,12 @@ void ff_h263_loop_filter(MpegEncContext * s){
s->dsp.h263_v_loop_filter(dest_cr , uvlinesize, chroma_qp);
}
- if(qp_t)
- s->dsp.h263_h_loop_filter(dest_y-8*linesize+8 , linesize, qp_t);
+ if(qp_tt)
+ s->dsp.h263_h_loop_filter(dest_y-8*linesize+8 , linesize, qp_tt);
if(s->mb_x){
- if(qp_t || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride]))
- qp_dt= qp_t;
+ if(qp_tt || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride]))
+ qp_dt= qp_tt;
else
qp_dt= s->current_picture.qscale_table[xy-1-s->mb_stride];
@@ -4651,7 +4651,7 @@ retry:
rl = &rl_intra_aic;
i = 0;
s->gb= gb;
- memset(block, 0, sizeof(DCTELEM)*64);
+ s->dsp.clear_block(block);
goto retry;
}
av_log(s->avctx, AV_LOG_ERROR, "run overflow at %dx%d i:%d\n", s->mb_x, s->mb_y, s->mb_intra);
@@ -5666,7 +5666,58 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
s->quarter_sample= get_bits1(gb);
else s->quarter_sample=0;
- if(!get_bits1(gb)) av_log(s->avctx, AV_LOG_ERROR, "Complexity estimation not supported\n");
+ if(!get_bits1(gb)){
+ int pos= get_bits_count(gb);
+ int estimation_method= get_bits(gb, 2);
+ if(estimation_method<2){
+ if(!get_bits1(gb)){
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //opaque
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //transparent
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //intra_cae
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //inter_cae
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //no_update
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //upampling
+ }
+ if(!get_bits1(gb)){
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //intra_blocks
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //inter_blocks
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //inter4v_blocks
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //not coded blocks
+ }
+ if(!check_marker(gb, "in complexity estimation part 1")){
+ skip_bits_long(gb, pos - get_bits_count(gb));
+ goto no_cplx_est;
+ }
+ if(!get_bits1(gb)){
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //dct_coeffs
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //dct_lines
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //vlc_syms
+ s->cplx_estimation_trash_i += 4*get_bits1(gb); //vlc_bits
+ }
+ if(!get_bits1(gb)){
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //apm
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //npm
+ s->cplx_estimation_trash_b += 8*get_bits1(gb); //interpolate_mc_q
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //forwback_mc_q
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //halfpel2
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //halfpel4
+ }
+ if(!check_marker(gb, "in complexity estimation part 2")){
+ skip_bits_long(gb, pos - get_bits_count(gb));
+ goto no_cplx_est;
+ }
+ if(estimation_method==1){
+ s->cplx_estimation_trash_i += 8*get_bits1(gb); //sadct
+ s->cplx_estimation_trash_p += 8*get_bits1(gb); //qpel
+ }
+ }else
+ av_log(s->avctx, AV_LOG_ERROR, "Invalid Complexity estimation method %d\n", estimation_method);
+ }else{
+no_cplx_est:
+ s->cplx_estimation_trash_i=
+ s->cplx_estimation_trash_p=
+ s->cplx_estimation_trash_b= 0;
+ }
s->resync_marker= !get_bits1(gb); /* resync_marker_disabled */
@@ -5903,6 +5954,12 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
//FIXME complexity estimation stuff
if (s->shape != BIN_ONLY_SHAPE) {
+ skip_bits_long(gb, s->cplx_estimation_trash_i);
+ if(s->pict_type != FF_I_TYPE)
+ skip_bits_long(gb, s->cplx_estimation_trash_p);
+ if(s->pict_type == FF_B_TYPE)
+ skip_bits_long(gb, s->cplx_estimation_trash_b);
+
s->intra_dc_threshold= mpeg4_dc_threshold[ get_bits(gb, 3) ];
if(!s->progressive_sequence){
s->top_field_first= get_bits1(gb);
@@ -5951,12 +6008,12 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
s->b_code=1;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
- av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n",
+ av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d ce:%d/%d/%d\n",
s->qscale, s->f_code, s->b_code,
s->pict_type == FF_I_TYPE ? "I" : (s->pict_type == FF_P_TYPE ? "P" : (s->pict_type == FF_B_TYPE ? "B" : "S")),
gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first,
s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
- s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold);
+ s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold, s->cplx_estimation_trash_i, s->cplx_estimation_trash_p, s->cplx_estimation_trash_b);
}
if(!s->scalability){
@@ -6008,7 +6065,7 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
if(s->codec_tag == ff_get_fourcc("WV1F") && show_bits(gb, 24) == 0x575630){
skip_bits(gb, 24);
if(get_bits(gb, 8) == 0xF0)
- return decode_vop_header(s, gb);
+ goto end;
}
startcode = 0xff;
@@ -6071,12 +6128,17 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
mpeg4_decode_gop_header(s, gb);
}
else if(startcode == VOP_STARTCODE){
- return decode_vop_header(s, gb);
+ break;
}
align_get_bits(gb);
startcode = 0xff;
}
+end:
+ if(s->flags& CODEC_FLAG_LOW_DELAY)
+ s->low_delay=1;
+ s->avctx->has_b_frames= !s->low_delay;
+ return decode_vop_header(s, gb);
}
/* don't understand why they choose a different header ! */
diff --git a/libavcodec/h263dec.c b/libavcodec/h263dec.c
index 141d153..ae318b5 100644
--- a/libavcodec/h263dec.c
+++ b/libavcodec/h263dec.c
@@ -402,9 +402,6 @@ retry:
ret = ff_mpeg4_decode_picture_header(s, &gb);
}
ret = ff_mpeg4_decode_picture_header(s, &s->gb);
-
- if(s->flags& CODEC_FLAG_LOW_DELAY)
- s->low_delay=1;
} else if (s->codec_id == CODEC_ID_H263I) {
ret = intel_h263_decode_picture_header(s);
} else if (s->h263_flv) {
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 915c078..a8bd062 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -33,10 +33,11 @@
#include "h264_parser.h"
#include "golomb.h"
#include "rectangle.h"
+#include "vdpau_internal.h"
#include "cabac.h"
#ifdef ARCH_X86
-#include "i386/h264_i386.h"
+#include "x86/h264_i386.h"
#endif
//#undef NDEBUG
@@ -101,12 +102,15 @@ static const int left_block_options[4][8]={
{0,2,0,2,7,10,7,10}
};
+#define LEVEL_TAB_BITS 8
+static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
+
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
- int * left_block;
+ const int * left_block;
int topleft_partition= -1;
int i;
@@ -128,46 +132,34 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
const int top_pair_xy = pair_xy - s->mb_stride;
const int topleft_pair_xy = top_pair_xy - 1;
const int topright_pair_xy = top_pair_xy + 1;
- const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
- const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
- const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
- const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
- const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
+ const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
+ const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+ const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
+ const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+ const int curr_mb_field_flag = IS_INTERLACED(mb_type);
const int bottom = (s->mb_y & 1);
- tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
- if (bottom
- ? !curr_mb_frame_flag // bottom macroblock
- : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
- ) {
+ tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
+
+ if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
top_xy -= s->mb_stride;
}
- if (bottom
- ? !curr_mb_frame_flag // bottom macroblock
- : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
- ) {
+ if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
topleft_xy -= s->mb_stride;
- } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+ } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
topleft_xy += s->mb_stride;
// take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
topleft_partition = 0;
}
- if (bottom
- ? !curr_mb_frame_flag // bottom macroblock
- : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
- ) {
+ if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
topright_xy -= s->mb_stride;
}
- if (left_mb_frame_flag != curr_mb_frame_flag) {
+ if (left_mb_field_flag != curr_mb_field_flag) {
left_xy[1] = left_xy[0] = pair_xy - 1;
- if (curr_mb_frame_flag) {
- if (bottom) {
- left_block = left_block_options[1];
- } else {
- left_block= left_block_options[2];
- }
- } else {
+ if (curr_mb_field_flag) {
left_xy[1] += s->mb_stride;
left_block = left_block_options[3];
+ } else {
+ left_block= left_block_options[2 - bottom];
}
}
}
@@ -589,13 +581,13 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
static const int mask[4]={0x8000,0x2000,0x80,0x20};
for(i=0; i<4; i++){
if(!(h->left_samples_available&mask[i])){
- int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
- if(status<0){
- av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
- return -1;
- } else if(status){
- h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
- }
+ int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
+ if(status<0){
+ av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
+ return -1;
+ } else if(status){
+ h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
+ }
}
}
}
@@ -882,8 +874,8 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
- || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
- || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
+ || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
+ || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
*mx = *my = 0;
return;
@@ -1387,9 +1379,26 @@ static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_le
for(i=0; i<length; i++)
printf("%2X ", src[i]);
#endif
+
+#ifdef HAVE_FAST_UNALIGNED
+# ifdef HAVE_FAST_64BIT
+# define RS 7
+ for(i=0; i+1<length; i+=9){
+ if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
+# else
+# define RS 3
+ for(i=0; i+1<length; i+=5){
+ if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
+# endif
+ continue;
+ if(i>0 && !src[i]) i--;
+ while(src[i]) i++;
+#else
+# define RS 0
for(i=0; i+1<length; i+=2){
if(src[i]) continue;
if(i>0 && src[i-1]==0) i--;
+#endif
if(i+2<length && src[i+1]==0 && src[i+2]<=3){
if(src[i+2]!=3){
/* startcode, so we must be past the end */
@@ -1397,6 +1406,7 @@ static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_le
}
break;
}
+ i-= RS;
}
if(i>=length-1){ //no escaped 0
@@ -1406,7 +1416,7 @@ static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_le
}
bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
- h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
+ h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
dst= h->rbsp_buffer[bufidx];
if (dst == NULL){
@@ -1414,21 +1424,30 @@ static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_le
}
//printf("decoding esc\n");
- si=di=0;
- while(si<length){
+ memcpy(dst, src, i);
+ si=di=i;
+ while(si+2<length){
//remove escapes (very rare 1:2^22)
- if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
+ if(src[si+2]>3){
+ dst[di++]= src[si++];
+ dst[di++]= src[si++];
+ }else if(src[si]==0 && src[si+1]==0){
if(src[si+2]==3){ //escape
dst[di++]= 0;
dst[di++]= 0;
si+=3;
continue;
}else //next start code
- break;
+ goto nsc;
}
dst[di++]= src[si++];
}
+ while(si<length)
+ dst[di++]= src[si++];
+nsc:
+
+ memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
*dst_length= di;
*consumed= si + 1;//+1 for the header
@@ -1587,85 +1606,6 @@ static inline int get_chroma_qp(H264Context *h, int t, int qscale){
return h->pps.chroma_qp_table[t][qscale];
}
-//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
-//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
-static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
- int i;
- const int * const quant_table= quant_coeff[qscale];
- const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
- const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
- const unsigned int threshold2= (threshold1<<1);
- int last_non_zero;
-
- if(separate_dc){
- if(qscale<=18){
- //avoid overflows
- const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
- const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
- const unsigned int dc_threshold2= (dc_threshold1<<1);
-
- int level= block[0]*quant_coeff[qscale+18][0];
- if(((unsigned)(level+dc_threshold1))>dc_threshold2){
- if(level>0){
- level= (dc_bias + level)>>(QUANT_SHIFT-2);
- block[0]= level;
- }else{
- level= (dc_bias - level)>>(QUANT_SHIFT-2);
- block[0]= -level;
- }
-// last_non_zero = i;
- }else{
- block[0]=0;
- }
- }else{
- const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
- const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
- const unsigned int dc_threshold2= (dc_threshold1<<1);
-
- int level= block[0]*quant_table[0];
- if(((unsigned)(level+dc_threshold1))>dc_threshold2){
- if(level>0){
- level= (dc_bias + level)>>(QUANT_SHIFT+1);
- block[0]= level;
- }else{
- level= (dc_bias - level)>>(QUANT_SHIFT+1);
- block[0]= -level;
- }
-// last_non_zero = i;
- }else{
- block[0]=0;
- }
- }
- last_non_zero= 0;
- i=1;
- }else{
- last_non_zero= -1;
- i=0;
- }
-
- for(; i<16; i++){
- const int j= scantable[i];
- int level= block[j]*quant_table[j];
-
-// if( bias+level >= (1<<(QMAT_SHIFT - 3))
-// || bias-level >= (1<<(QMAT_SHIFT - 3))){
- if(((unsigned)(level+threshold1))>threshold2){
- if(level>0){
- level= (bias + level)>>QUANT_SHIFT;
- block[j]= level;
- }else{
- level= (bias - level)>>QUANT_SHIFT;
- block[j]= -level;
- }
- last_non_zero = i;
- }else{
- block[j]=0;
- }
- }
-
- return last_non_zero;
-}
-
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
@@ -1684,9 +1624,6 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
const int pic_width = 16*s->mb_width;
const int pic_height = 16*s->mb_height >> MB_FIELD;
- if(!pic->data[0]) //FIXME this is unacceptable, some sensible error concealment must be done for missing reference frames
- return;
-
if(mx&7) extra_width -= 3;
if(my&7) extra_height -= 3;
@@ -1949,6 +1886,31 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
prefetch_motion(h, 1);
}
+static av_cold void init_cavlc_level_tab(void){
+ int suffix_length, mask;
+ unsigned int i;
+
+ for(suffix_length=0; suffix_length<7; suffix_length++){
+ for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
+ int prefix= LEVEL_TAB_BITS - av_log2(2*i);
+ int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
+
+ mask= -(level_code&1);
+ level_code= (((2+level_code)>>1) ^ mask) - mask;
+ if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
+ cavlc_level_tab[suffix_length][i][0]= level_code;
+ cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
+ }else if(prefix + 1 <= LEVEL_TAB_BITS){
+ cavlc_level_tab[suffix_length][i][0]= prefix+100;
+ cavlc_level_tab[suffix_length][i][1]= prefix + 1;
+ }else{
+ cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
+ cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
+ }
+ }
+ }
+}
+
static av_cold void decode_init_vlc(void){
static int done = 0;
@@ -2015,6 +1977,8 @@ static av_cold void decode_init_vlc(void){
&run_len [6][0], 1, 1,
&run_bits[6][0], 1, 1,
INIT_VLC_USE_NEW_STATIC);
+
+ init_cavlc_level_tab();
}
}
@@ -2200,6 +2164,8 @@ static av_cold void common_init(H264Context *h){
s->unrestricted_mv=1;
s->decode=1; //FIXME
+ dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
+
memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
}
@@ -2223,6 +2189,8 @@ static av_cold int decode_init(AVCodecContext *avctx){
if(avctx->codec_id == CODEC_ID_SVQ3)
avctx->pix_fmt= PIX_FMT_YUVJ420P;
+ else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+ avctx->pix_fmt= PIX_FMT_VDPAU_H264;
else
avctx->pix_fmt= PIX_FMT_YUV420P;
@@ -2445,13 +2413,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
int linesize, uvlinesize /*dct_offset*/;
int i;
int *block_offset = &h->block_offset[0];
- const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
+ const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
+ /* is_h264 should always be true if SVQ3 is disabled. */
+ const int is_h264 = !ENABLE_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
- dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
- dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
- dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
+ dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
+ dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
+ dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
@@ -2488,17 +2458,6 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
// dct_offset = s->linesize * 16;
}
- if(transform_bypass){
- idct_dc_add =
- idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
- }else if(IS_8x8DCT(mb_type)){
- idct_dc_add = s->dsp.h264_idct8_dc_add;
- idct_add = s->dsp.h264_idct8_add;
- }else{
- idct_dc_add = s->dsp.h264_idct_dc_add;
- idct_add = s->dsp.h264_idct_add;
- }
-
if (!simple && IS_INTRA_PCM(mb_type)) {
for (i=0; i<16; i++) {
memcpy(dest_y + i* linesize, h->mb + i*8, 16);
@@ -2520,47 +2479,70 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
if(IS_INTRA4x4(mb_type)){
if(simple || !s->encoding){
if(IS_8x8DCT(mb_type)){
+ if(transform_bypass){
+ idct_dc_add =
+ idct_add = s->dsp.add_pixels8;
+ }else{
+ idct_dc_add = s->dsp.h264_idct8_dc_add;
+ idct_add = s->dsp.h264_idct8_add;
+ }
for(i=0; i<16; i+=4){
uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
- const int nnz = h->non_zero_count_cache[ scan8[i] ];
- h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
- (h->topright_samples_available<<i)&0x4000, linesize);
- if(nnz){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- else
- idct_add(ptr, h->mb + i*16, linesize);
+ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+ h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
+ }else{
+ const int nnz = h->non_zero_count_cache[ scan8[i] ];
+ h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+ (h->topright_samples_available<<i)&0x4000, linesize);
+ if(nnz){
+ if(nnz == 1 && h->mb[i*16])
+ idct_dc_add(ptr, h->mb + i*16, linesize);
+ else
+ idct_add (ptr, h->mb + i*16, linesize);
+ }
}
}
- }else
- for(i=0; i<16; i++){
- uint8_t * const ptr= dest_y + block_offset[i];
- uint8_t *topright;
- const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
- int nnz, tr;
-
- if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
- const int topright_avail= (h->topright_samples_available<<i)&0x8000;
- assert(mb_y || linesize <= block_offset[i]);
- if(!topright_avail){
- tr= ptr[3 - linesize]*0x01010101;
- topright= (uint8_t*) &tr;
- }else
- topright= ptr + 4 - linesize;
- }else
- topright= NULL;
-
- h->hpc.pred4x4[ dir ](ptr, topright, linesize);
- nnz = h->non_zero_count_cache[ scan8[i] ];
- if(nnz){
- if(is_h264){
- if(nnz == 1 && h->mb[i*16])
- idct_dc_add(ptr, h->mb + i*16, linesize);
- else
- idct_add(ptr, h->mb + i*16, linesize);
- }else
- svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+ }else{
+ if(transform_bypass){
+ idct_dc_add =
+ idct_add = s->dsp.add_pixels4;
+ }else{
+ idct_dc_add = s->dsp.h264_idct_dc_add;
+ idct_add = s->dsp.h264_idct_add;
+ }
+ for(i=0; i<16; i++){
+ uint8_t * const ptr= dest_y + block_offset[i];
+ const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
+
+ if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
+ h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
+ }else{
+ uint8_t *topright;
+ int nnz, tr;
+ if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
+ const int topright_avail= (h->topright_samples_available<<i)&0x8000;
+ assert(mb_y || linesize <= block_offset[i]);
+ if(!topright_avail){
+ tr= ptr[3 - linesize]*0x01010101;
+ topright= (uint8_t*) &tr;
+ }else
+ topright= ptr + 4 - linesize;
+ }else
+ topright= NULL;
+
+ h->hpc.pred4x4[ dir ](ptr, topright, linesize);
+ nnz = h->non_zero_count_cache[ scan8[i] ];
+ if(nnz){
+ if(is_h264){
+ if(nnz == 1 && h->mb[i*16])
+ idct_dc_add(ptr, h->mb + i*16, linesize);
+ else
+ idct_add (ptr, h->mb + i*16, linesize);
+ }else
+ svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
+ }
+ }
}
}
}
@@ -2585,21 +2567,32 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
if(!IS_INTRA4x4(mb_type)){
if(is_h264){
if(IS_INTRA16x16(mb_type)){
- for(i=0; i<16; i++){
- if(h->non_zero_count_cache[ scan8[i] ])
- idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
- else if(h->mb[i*16])
- idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+ if(transform_bypass){
+ if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
+ h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
+ }else{
+ for(i=0; i<16; i++){
+ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
+ s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
+ }
+ }
+ }else{
+ s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
}
- }else{
- const int di = IS_8x8DCT(mb_type) ? 4 : 1;
- for(i=0; i<16; i+=di){
- int nnz = h->non_zero_count_cache[ scan8[i] ];
- if(nnz){
- if(nnz==1 && h->mb[i*16])
- idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
- else
+ }else if(h->cbp&15){
+ if(transform_bypass){
+ const int di = IS_8x8DCT(mb_type) ? 4 : 1;
+ idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
+ for(i=0; i<16; i+=di){
+ if(h->non_zero_count_cache[ scan8[i] ]){
idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
+ }
+ }
+ }else{
+ if(IS_8x8DCT(mb_type)){
+ s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+ }else{
+ s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
}
}
}
@@ -2613,33 +2606,45 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
}
}
- if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+ if((simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){
- idct_add = idct_dc_add = s->dsp.add_pixels4;
+ if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
+ h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
+ h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
+ }else{
+ idct_add = s->dsp.add_pixels4;
+ for(i=16; i<16+8; i++){
+ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
+ idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+ }
+ }
}else{
- idct_add = s->dsp.h264_idct_add;
- idct_dc_add = s->dsp.h264_idct_dc_add;
chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
- }
- if(is_h264){
- for(i=16; i<16+8; i++){
- if(h->non_zero_count_cache[ scan8[i] ])
- idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- else if(h->mb[i*16])
- idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- }
- }else{
- for(i=16; i<16+8; i++){
- if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
- uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
- svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+ if(is_h264){
+ idct_add = s->dsp.h264_idct_add;
+ idct_dc_add = s->dsp.h264_idct_dc_add;
+ for(i=16; i<16+8; i++){
+ if(h->non_zero_count_cache[ scan8[i] ])
+ idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+ else if(h->mb[i*16])
+ idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
+ }
+ }else{
+ for(i=16; i<16+8; i++){
+ if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+ uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
+ svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+ }
}
}
}
}
}
+ if(h->cbp || IS_INTRA(mb_type))
+ s->dsp.clear_blocks(h->mb);
+
if(h->deblocking_filter) {
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
@@ -2671,8 +2676,7 @@ static void hl_decode_mb(H264Context *h){
MpegEncContext * const s = &h->s;
const int mb_xy= h->mb_xy;
const int mb_type= s->current_picture.mb_type[mb_xy];
- int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 ||
- (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding) || ENABLE_SMALL;
+ int is_complex = ENABLE_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
if(ENABLE_H264_ENCODER && !s->decode)
return;
@@ -2848,7 +2852,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
int pred= h->curr_pic_num;
for(index=0; ; index++){
- unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
+ unsigned int reordering_of_pic_nums_idc= get_ue_golomb_31(&s->gb);
unsigned int pic_id;
int i;
Picture *ref = NULL;
@@ -3414,7 +3418,7 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
}else{
if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
for(i= 0; i<MAX_MMCO_COUNT; i++) {
- MMCOOpcode opcode= get_ue_golomb(gb);
+ MMCOOpcode opcode= get_ue_golomb_31(gb);
h->mmco[i].opcode= opcode;
if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
@@ -3425,7 +3429,7 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
}*/
}
if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
- unsigned int long_arg= get_ue_golomb(gb);
+ unsigned int long_arg= get_ue_golomb_31(gb);
if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
return -1;
@@ -3631,7 +3635,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
unsigned int first_mb_in_slice;
unsigned int pps_id;
int num_ref_idx_active_override_flag;
- static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
unsigned int slice_type, tmp, i, j;
int default_ref_list_done = 0;
int last_pic_structure;
@@ -3654,7 +3657,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
s->current_picture_ptr= NULL;
}
- slice_type= get_ue_golomb(&s->gb);
+ slice_type= get_ue_golomb_31(&s->gb);
if(slice_type > 9){
av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
return -1;
@@ -3665,7 +3668,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
}else
h->slice_type_fixed=0;
- slice_type= slice_type_map[ slice_type ];
+ slice_type= golomb_to_pict_type[ slice_type ];
if (slice_type == FF_I_TYPE
|| (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
default_ref_list_done = 1;
@@ -3719,6 +3722,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
if(h != h0)
return -1; // width / height changed during parallelized decoding
free_tables(h);
+ flush_dpb(s->avctx);
MPV_common_end(s);
}
if (!s->context_initialized) {
@@ -3947,7 +3951,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
direct_ref_list_init(h);
if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
- tmp = get_ue_golomb(&s->gb);
+ tmp = get_ue_golomb_31(&s->gb);
if(tmp > 2){
av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
return -1;
@@ -3976,7 +3980,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
h->slice_alpha_c0_offset = 0;
h->slice_beta_offset = 0;
if( h->pps.deblocking_filter_parameters_present ) {
- tmp= get_ue_golomb(&s->gb);
+ tmp= get_ue_golomb_31(&s->gb);
if(tmp > 2){
av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
return -1;
@@ -4041,6 +4045,8 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
+ s->avctx->refs= h->sps.ref_frame_count;
+
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
h->slice_num,
@@ -4085,13 +4091,10 @@ static inline int get_level_prefix(GetBitContext *gb){
}
static inline int get_dct8x8_allowed(H264Context *h){
- int i;
- for(i=0; i<4; i++){
- if(!IS_SUB_8X8(h->sub_mb_type[i])
- || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
- return 0;
- }
- return 1;
+ if(h->sps.direct_8x8_inference_flag)
+ return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
+ else
+ return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
}
/**
@@ -4138,56 +4141,81 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
assert(total_coeff<=16);
- for(i=0; i<trailing_ones; i++){
- level[i]= 1 - 2*get_bits1(gb);
- }
+ i = show_bits(gb, 3);
+ skip_bits(gb, trailing_ones);
+ level[0] = 1-((i&4)>>1);
+ level[1] = 1-((i&2) );
+ level[2] = 1-((i&1)<<1);
- if(i<total_coeff) {
- int level_code, mask;
+ if(trailing_ones<total_coeff) {
+ int mask, prefix;
int suffix_length = total_coeff > 10 && trailing_ones < 3;
- int prefix= get_level_prefix(gb);
+ int bitsi= show_bits(gb, LEVEL_TAB_BITS);
+ int level_code= cavlc_level_tab[suffix_length][bitsi][0];
+
+ skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
+ if(level_code >= 100){
+ prefix= level_code - 100;
+ if(prefix == LEVEL_TAB_BITS)
+ prefix += get_level_prefix(gb);
+
+ //first coefficient has suffix_length equal to 0 or 1
+ if(prefix<14){ //FIXME try to build a large unified VLC table for all this
+ if(suffix_length)
+ level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
+ else
+ level_code= (prefix<<suffix_length); //part
+ }else if(prefix==14){
+ if(suffix_length)
+ level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
+ else
+ level_code= prefix + get_bits(gb, 4); //part
+ }else{
+ level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
+ if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
+ if(prefix>=16)
+ level_code += (1<<(prefix-3))-4096;
+ }
- //first coefficient has suffix_length equal to 0 or 1
- if(prefix<14){ //FIXME try to build a large unified VLC table for all this
- if(suffix_length)
- level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
- else
- level_code= (prefix<<suffix_length); //part
- }else if(prefix==14){
- if(suffix_length)
- level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
- else
- level_code= prefix + get_bits(gb, 4); //part
- }else{
- level_code= (15<<suffix_length) + get_bits(gb, prefix-3); //part
- if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
- if(prefix>=16)
- level_code += (1<<(prefix-3))-4096;
- }
+ if(trailing_ones < 3) level_code += 2;
- if(trailing_ones < 3) level_code += 2;
+ suffix_length = 2;
+ mask= -(level_code&1);
+ level[trailing_ones]= (((2+level_code)>>1) ^ mask) - mask;
+ }else{
+ if(trailing_ones < 3) level_code += (level_code>>31)|1;
- suffix_length = 1;
- if(level_code > 5)
- suffix_length++;
- mask= -(level_code&1);
- level[i]= (((2+level_code)>>1) ^ mask) - mask;
- i++;
+ suffix_length = 1;
+ if(level_code + 3U > 6U)
+ suffix_length++;
+ level[trailing_ones]= level_code;
+ }
//remaining coefficients have suffix_length > 0
- for(;i<total_coeff;i++) {
- static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
- prefix = get_level_prefix(gb);
- if(prefix<15){
- level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
- }else{
- level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
- if(prefix>=16)
- level_code += (1<<(prefix-3))-4096;
+ for(i=trailing_ones+1;i<total_coeff;i++) {
+ static const unsigned int suffix_limit[7] = {0,3,6,12,24,48,INT_MAX };
+ int bitsi= show_bits(gb, LEVEL_TAB_BITS);
+ level_code= cavlc_level_tab[suffix_length][bitsi][0];
+
+ skip_bits(gb, cavlc_level_tab[suffix_length][bitsi][1]);
+ if(level_code >= 100){
+ prefix= level_code - 100;
+ if(prefix == LEVEL_TAB_BITS){
+ prefix += get_level_prefix(gb);
+ }
+ if(prefix<15){
+ level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
+ }else{
+ level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
+ if(prefix>=16)
+ level_code += (1<<(prefix-3))-4096;
+ }
+ mask= -(level_code&1);
+ level_code= (((2+level_code)>>1) ^ mask) - mask;
}
- mask= -(level_code&1);
- level[i]= (((2+level_code)>>1) ^ mask) - mask;
- if(level_code > suffix_limit[suffix_length])
+ level[i]= level_code;
+
+ if(suffix_limit[suffix_length] + level_code > 2U*suffix_limit[suffix_length])
suffix_length++;
}
}
@@ -4310,8 +4338,6 @@ static int decode_mb_cavlc(H264Context *h){
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
- s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
-
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
cbp = 0; /* avoid warning. FIXME: find a solution without slowing
down the code */
@@ -4436,7 +4462,7 @@ decode_intra_mb:
return -1;
}
if(CHROMA){
- pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
+ pred_mode= check_intra_pred_mode(h, get_ue_golomb_31(&s->gb));
if(pred_mode < 0)
return -1;
h->chroma_pred_mode= pred_mode;
@@ -4446,7 +4472,7 @@ decode_intra_mb:
if(h->slice_type_nos == FF_B_TYPE){
for(i=0; i<4; i++){
- h->sub_mb_type[i]= get_ue_golomb(&s->gb);
+ h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
if(h->sub_mb_type[i] >=13){
av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
return -1;
@@ -4465,7 +4491,7 @@ decode_intra_mb:
}else{
assert(h->slice_type_nos == FF_P_TYPE); //FIXME SP correct ?
for(i=0; i<4; i++){
- h->sub_mb_type[i]= get_ue_golomb(&s->gb);
+ h->sub_mb_type[i]= get_ue_golomb_31(&s->gb);
if(h->sub_mb_type[i] >=4){
av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
return -1;
@@ -4480,10 +4506,17 @@ decode_intra_mb:
for(i=0; i<4; i++){
if(IS_DIRECT(h->sub_mb_type[i])) continue;
if(IS_DIR(h->sub_mb_type[i], 0, list)){
- unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
- if(tmp>=ref_count){
- av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
- return -1;
+ unsigned int tmp;
+ if(ref_count == 1){
+ tmp= 0;
+ }else if(ref_count == 2){
+ tmp= get_bits1(&s->gb)^1;
+ }else{
+ tmp= get_ue_golomb_31(&s->gb);
+ if(tmp>=ref_count){
+ av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
+ return -1;
+ }
}
ref[list][i]= tmp;
}else{
@@ -4549,10 +4582,16 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
unsigned int val;
if(IS_DIR(mb_type, 0, list)){
- val= get_te0_golomb(&s->gb, h->ref_count[list]);
- if(val >= h->ref_count[list]){
- av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
- return -1;
+ if(h->ref_count[list]==1){
+ val= 0;
+ }else if(h->ref_count[list]==2){
+ val= get_bits1(&s->gb)^1;
+ }else{
+ val= get_ue_golomb_31(&s->gb);
+ if(val >= h->ref_count[list]){
+ av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+ return -1;
+ }
}
}else
val= LIST_NOT_USED&0xFF;
@@ -4577,10 +4616,16 @@ decode_intra_mb:
for(i=0; i<2; i++){
unsigned int val;
if(IS_DIR(mb_type, i, list)){
- val= get_te0_golomb(&s->gb, h->ref_count[list]);
- if(val >= h->ref_count[list]){
- av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
- return -1;
+ if(h->ref_count[list] == 1){
+ val= 0;
+ }else if(h->ref_count[list] == 2){
+ val= get_bits1(&s->gb)^1;
+ }else{
+ val= get_ue_golomb_31(&s->gb);
+ if(val >= h->ref_count[list]){
+ av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+ return -1;
+ }
}
}else
val= LIST_NOT_USED&0xFF;
@@ -4608,10 +4653,16 @@ decode_intra_mb:
for(i=0; i<2; i++){
unsigned int val;
if(IS_DIR(mb_type, i, list)){ //FIXME optimize
- val= get_te0_golomb(&s->gb, h->ref_count[list]);
- if(val >= h->ref_count[list]){
- av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
- return -1;
+ if(h->ref_count[list]==1){
+ val= 0;
+ }else if(h->ref_count[list]==2){
+ val= get_bits1(&s->gb)^1;
+ }else{
+ val= get_ue_golomb_31(&s->gb);
+ if(val >= h->ref_count[list]){
+ av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
+ return -1;
+ }
}
}else
val= LIST_NOT_USED&0xFF;
@@ -4836,29 +4887,14 @@ static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_sl
return mb_type;
}
-static int decode_cabac_mb_type( H264Context *h ) {
+static int decode_cabac_mb_type_b( H264Context *h ) {
MpegEncContext * const s = &h->s;
- if( h->slice_type_nos == FF_I_TYPE ) {
- return decode_cabac_intra_mb_type(h, 3, 1);
- } else if( h->slice_type_nos == FF_P_TYPE ) {
- if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
- /* P-type */
- if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
- /* P_L0_D16x16, P_8x8 */
- return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
- } else {
- /* P_L0_D8x16, P_L0_D16x8 */
- return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
- }
- } else {
- return decode_cabac_intra_mb_type(h, 17, 0) + 5;
- }
- } else if( h->slice_type_nos == FF_B_TYPE ) {
const int mba_xy = h->left_mb_xy[0];
const int mbb_xy = h->top_mb_xy;
int ctx = 0;
int bits;
+ assert(h->slice_type_nos == FF_B_TYPE);
if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
ctx++;
@@ -4887,10 +4923,6 @@ static int decode_cabac_mb_type( H264Context *h ) {
bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
- } else {
- /* TODO SI/SP frames? */
- return -1;
- }
}
static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
@@ -5004,26 +5036,20 @@ static int decode_cabac_mb_cbp_chroma( H264Context *h) {
return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
}
static int decode_cabac_mb_dqp( H264Context *h) {
- int ctx = 0;
+ int ctx= h->last_qscale_diff != 0;
int val = 0;
- if( h->last_qscale_diff != 0 )
- ctx++;
-
while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
- if( ctx < 2 )
- ctx = 2;
- else
- ctx = 3;
+ ctx= 2+(ctx>>1);
val++;
if(val > 102) //prevent infinite loop
return INT_MIN;
}
if( val&0x01 )
- return (val + 1)/2;
+ return (val + 1)>>1 ;
else
- return -(val + 1)/2;
+ return -((val + 1)>>1);
}
static int decode_cabac_p_mb_sub_type( H264Context *h ) {
if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
@@ -5075,13 +5101,9 @@ static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
ref++;
- if( ctx < 4 )
- ctx = 4;
- else
- ctx = 5;
+ ctx = (ctx>>2)+4;
if(ref >= 32 /*h->ref_list[list]*/){
- av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
- return 0; //FIXME we should return -1 and check the return everywhere
+ return -1;
}
}
return ref;
@@ -5091,14 +5113,8 @@ static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
abs( h->mvd_cache[list][scan8[n] - 8][l] );
int ctxbase = (l == 0) ? 40 : 47;
- int ctx, mvd;
-
- if( amvd < 3 )
- ctx = 0;
- else if( amvd > 32 )
- ctx = 2;
- else
- ctx = 1;
+ int mvd;
+ int ctx = (amvd>2) + (amvd>32);
if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
return 0;
@@ -5142,14 +5158,9 @@ static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx,
nzb = (h-> top_cbp>>(6+idx))&0x01;
}
} else {
- if( cat == 4 ) {
- nza = h->non_zero_count_cache[scan8[16+idx] - 1];
- nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
- } else {
- assert(cat == 1 || cat == 2);
- nza = h->non_zero_count_cache[scan8[idx] - 1];
- nzb = h->non_zero_count_cache[scan8[idx] - 8];
- }
+ assert(cat == 1 || cat == 2 || cat == 4);
+ nza = h->non_zero_count_cache[scan8[idx] - 1];
+ nzb = h->non_zero_count_cache[scan8[idx] - 8];
}
if( nza > 0 )
@@ -5231,19 +5242,15 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
* 1-> AC 16x16 n = luma4x4idx
* 2-> Luma4x4 n = luma4x4idx
* 3-> DC Chroma n = iCbCr
- * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
+ * 4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
* 5-> Luma8x8 n = 4 * luma8x8idx
*/
/* read coded block flag */
if( is_dc || cat != 5 ) {
if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
- if( !is_dc ) {
- if( cat == 4 )
- h->non_zero_count_cache[scan8[16+n]] = 0;
- else
- h->non_zero_count_cache[scan8[n]] = 0;
- }
+ if( !is_dc )
+ h->non_zero_count_cache[scan8[n]] = 0;
#ifdef CABAC_ON_STACK
h->cabac.range = cc.range ;
@@ -5298,10 +5305,8 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT
} else {
if( cat == 5 )
fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
- else if( cat == 4 )
- h->non_zero_count_cache[scan8[16+n]] = coeff_count;
else {
- assert( cat == 1 || cat == 2 );
+ assert( cat == 1 || cat == 2 || cat == 4 );
h->non_zero_count_cache[scan8[n]] = coeff_count;
}
}
@@ -5383,17 +5388,15 @@ static inline void compute_mb_neighbors(H264Context *h)
if(FRAME_MBAFF){
const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
const int top_pair_xy = pair_xy - s->mb_stride;
- const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
- const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
- const int curr_mb_frame_flag = !MB_FIELD;
+ const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
+ const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
+ const int curr_mb_field_flag = MB_FIELD;
const int bottom = (s->mb_y & 1);
- if (bottom
- ? !curr_mb_frame_flag // bottom macroblock
- : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
- ) {
+
+ if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
h->top_mb_xy -= s->mb_stride;
}
- if (left_mb_frame_flag != curr_mb_frame_flag) {
+ if (!left_mb_field_flag == curr_mb_field_flag) {
h->left_mb_xy[0] = pair_xy - 1;
}
} else if (FIELD_PICTURE) {
@@ -5414,8 +5417,6 @@ static int decode_mb_cabac(H264Context *h) {
mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
- s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
-
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
if( h->slice_type_nos != FF_I_TYPE ) {
int skip;
@@ -5431,9 +5432,7 @@ static int decode_mb_cabac(H264Context *h) {
if( FRAME_MBAFF && (s->mb_y&1)==0 ){
s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
- if(h->next_mb_skipped)
- predict_field_decoding_flag(h);
- else
+ if(!h->next_mb_skipped)
h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
}
@@ -5456,12 +5455,9 @@ static int decode_mb_cabac(H264Context *h) {
h->prev_mb_skipped = 0;
compute_mb_neighbors(h);
- if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
- av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
- return -1;
- }
if( h->slice_type_nos == FF_B_TYPE ) {
+ mb_type = decode_cabac_mb_type_b( h );
if( mb_type < 23 ){
partition_count= b_mb_type_info[mb_type].partition_count;
mb_type= b_mb_type_info[mb_type].type;
@@ -5470,14 +5466,23 @@ static int decode_mb_cabac(H264Context *h) {
goto decode_intra_mb;
}
} else if( h->slice_type_nos == FF_P_TYPE ) {
- if( mb_type < 5) {
+ if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
+ /* P-type */
+ if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
+ /* P_L0_D16x16, P_8x8 */
+ mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
+ } else {
+ /* P_L0_D8x16, P_L0_D16x8 */
+ mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
+ }
partition_count= p_mb_type_info[mb_type].partition_count;
mb_type= p_mb_type_info[mb_type].type;
} else {
- mb_type -= 5;
+ mb_type= decode_cabac_intra_mb_type(h, 17, 0);
goto decode_intra_mb;
}
} else {
+ mb_type= decode_cabac_intra_mb_type(h, 3, 1);
if(h->slice_type == FF_SI_TYPE && mb_type)
mb_type--;
assert(h->slice_type_nos == FF_I_TYPE);
@@ -5597,9 +5602,13 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
if(IS_DIRECT(h->sub_mb_type[i])) continue;
if(IS_DIR(h->sub_mb_type[i], 0, list)){
- if( h->ref_count[list] > 1 )
+ if( h->ref_count[list] > 1 ){
ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
- else
+ if(ref[list][i] >= (unsigned)h->ref_count[list]){
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
+ return -1;
+ }
+ }else
ref[list][i] = 0;
} else {
ref[list][i] = -1;
@@ -5682,7 +5691,15 @@ decode_intra_mb:
if(IS_16X16(mb_type)){
for(list=0; list<h->list_count; list++){
if(IS_DIR(mb_type, 0, list)){
- const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
+ int ref;
+ if(h->ref_count[list] > 1){
+ ref= decode_cabac_mb_ref(h, list, 0);
+ if(ref >= (unsigned)h->ref_count[list]){
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ return -1;
+ }
+ }else
+ ref=0;
fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
}else
fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
@@ -5705,7 +5722,15 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){
- const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
+ int ref;
+ if(h->ref_count[list] > 1){
+ ref= decode_cabac_mb_ref( h, list, 8*i );
+ if(ref >= (unsigned)h->ref_count[list]){
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ return -1;
+ }
+ }else
+ ref=0;
fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
}else
fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
@@ -5732,7 +5757,15 @@ decode_intra_mb:
for(list=0; list<h->list_count; list++){
for(i=0; i<2; i++){
if(IS_DIR(mb_type, i, list)){ //FIXME optimize
- const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
+ int ref;
+ if(h->ref_count[list] > 1){
+ ref= decode_cabac_mb_ref( h, list, 4*i );
+ if(ref >= (unsigned)h->ref_count[list]){
+ av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
+ return -1;
+ }
+ }else
+ ref=0;
fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
}else
fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
@@ -5857,7 +5890,7 @@ decode_intra_mb:
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
- decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
+ decode_cabac_residual(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
}
}
} else {
@@ -5886,76 +5919,32 @@ decode_intra_mb:
static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
- int i, d;
const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = (alpha_table+52)[index_a];
const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) {
int8_t tc[4];
- for(i=0; i<4; i++)
- tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+ tc[0] = (tc0_table+52)[index_a][bS[0]];
+ tc[1] = (tc0_table+52)[index_a][bS[1]];
+ tc[2] = (tc0_table+52)[index_a][bS[2]];
+ tc[3] = (tc0_table+52)[index_a][bS[3]];
h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
- /* 16px edge length, because bS=4 is triggered by being at
- * the edge of an intra MB, so all 4 bS are the same */
- for( d = 0; d < 16; d++ ) {
- const int p0 = pix[-1];
- const int p1 = pix[-2];
- const int p2 = pix[-3];
-
- const int q0 = pix[0];
- const int q1 = pix[1];
- const int q2 = pix[2];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
-
- if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
- if( FFABS( p2 - p0 ) < beta)
- {
- const int p3 = pix[-4];
- /* p0', p1', p2' */
- pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
- pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
- pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- } else {
- /* p0' */
- pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- }
- if( FFABS( q2 - q0 ) < beta)
- {
- const int q3 = pix[3];
- /* q0', q1', q2' */
- pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
- pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
- pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- } else {
- /* q0' */
- pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- }else{
- /* p0', q0' */
- pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
- }
- pix += stride;
- }
+ h->s.dsp.h264_h_loop_filter_luma_intra(pix, stride, alpha, beta);
}
}
static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
- int i;
const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = (alpha_table+52)[index_a];
const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) {
int8_t tc[4];
- for(i=0; i<4; i++)
- tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+ tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
+ tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
+ tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
+ tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
@@ -5986,7 +5975,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
if( bS[bS_index] < 4 ) {
- const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
+ const int tc0 = (tc0_table+52)[index_a][bS[bS_index]];
const int p0 = pix[-1];
const int p1 = pix[-2];
const int p2 = pix[-3];
@@ -6080,7 +6069,7 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
if( bS[bS_index] < 4 ) {
- const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
+ const int tc = (tc0_table+52)[index_a][bS[bS_index]] + 1;
const int p0 = pix[-1];
const int p1 = pix[-2];
const int q0 = pix[0];
@@ -6114,75 +6103,33 @@ static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, in
}
static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
- int i, d;
const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = (alpha_table+52)[index_a];
const int beta = (beta_table+52)[qp + h->slice_beta_offset];
- const int pix_next = stride;
if( bS[0] < 4 ) {
int8_t tc[4];
- for(i=0; i<4; i++)
- tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
+ tc[0] = (tc0_table+52)[index_a][bS[0]];
+ tc[1] = (tc0_table+52)[index_a][bS[1]];
+ tc[2] = (tc0_table+52)[index_a][bS[2]];
+ tc[3] = (tc0_table+52)[index_a][bS[3]];
h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
} else {
- /* 16px edge length, see filter_mb_edgev */
- for( d = 0; d < 16; d++ ) {
- const int p0 = pix[-1*pix_next];
- const int p1 = pix[-2*pix_next];
- const int p2 = pix[-3*pix_next];
- const int q0 = pix[0];
- const int q1 = pix[1*pix_next];
- const int q2 = pix[2*pix_next];
-
- if( FFABS( p0 - q0 ) < alpha &&
- FFABS( p1 - p0 ) < beta &&
- FFABS( q1 - q0 ) < beta ) {
-
- const int p3 = pix[-4*pix_next];
- const int q3 = pix[ 3*pix_next];
-
- if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
- if( FFABS( p2 - p0 ) < beta) {
- /* p0', p1', p2' */
- pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
- pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
- pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
- } else {
- /* p0' */
- pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- }
- if( FFABS( q2 - q0 ) < beta) {
- /* q0', q1', q2' */
- pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
- pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
- pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
- } else {
- /* q0' */
- pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- }else{
- /* p0', q0' */
- pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
- pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
- }
- tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
- }
- pix++;
- }
+ h->s.dsp.h264_v_loop_filter_luma_intra(pix, stride, alpha, beta);
}
}
static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
- int i;
const int index_a = qp + h->slice_alpha_c0_offset;
const int alpha = (alpha_table+52)[index_a];
const int beta = (beta_table+52)[qp + h->slice_beta_offset];
if( bS[0] < 4 ) {
int8_t tc[4];
- for(i=0; i<4; i++)
- tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
+ tc[0] = (tc0_table+52)[index_a][bS[0]]+1;
+ tc[1] = (tc0_table+52)[index_a][bS[1]]+1;
+ tc[2] = (tc0_table+52)[index_a][bS[2]]+1;
+ tc[3] = (tc0_table+52)[index_a][bS[3]]+1;
h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
} else {
h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
@@ -6198,7 +6145,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
mb_xy = h->mb_xy;
if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
-1 ||
+ !(s->flags2 & CODEC_FLAG2_FAST) || //FIXME filter_mb_fast is broken, thus hasto be, but should not under CODEC_FLAG2_FAST
(h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
@@ -6303,6 +6250,210 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y,
}
}
+
+static void av_always_inline filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
+ MpegEncContext * const s = &h->s;
+ int edge;
+ const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
+ const int mbm_type = s->current_picture.mb_type[mbm_xy];
+ int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
+
+ const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
+ == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+ // how often to recheck mv-based bS when iterating between edges
+ const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
+ (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
+ // how often to recheck mv-based bS when iterating along each edge
+ const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
+
+ if (first_vertical_edge_done) {
+ start = 1;
+ }
+
+ if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
+ start = 1;
+
+ if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
+ && !IS_INTERLACED(mb_type)
+ && IS_INTERLACED(mbm_type)
+ ) {
+ // This is a special case in the norm where the filtering must
+ // be done twice (one each of the field) even if we are in a
+ // frame macroblock.
+ //
+ static const int nnz_idx[4] = {4,5,6,3};
+ unsigned int tmp_linesize = 2 * linesize;
+ unsigned int tmp_uvlinesize = 2 * uvlinesize;
+ int mbn_xy = mb_xy - 2 * s->mb_stride;
+ int qp;
+ int i, j;
+ int16_t bS[4];
+
+ for(j=0; j<2; j++, mbn_xy += s->mb_stride){
+ if( IS_INTRA(mb_type) ||
+ IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
+ bS[0] = bS[1] = bS[2] = bS[3] = 3;
+ } else {
+ const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
+ for( i = 0; i < 4; i++ ) {
+ if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
+ mbn_nnz[nnz_idx[i]] != 0 )
+ bS[i] = 2;
+ else
+ bS[i] = 1;
+ }
+ }
+ // Do not use s->qscale as luma quantizer because it has not the same
+ // value in IPCM macroblocks.
+ qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+ tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
+ { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+ filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
+ filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ }
+
+ start = 1;
+ }
+
+ /* Calculate bS */
+ for( edge = start; edge < edges; edge++ ) {
+ /* mbn_xy: neighbor macroblock */
+ const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
+ const int mbn_type = s->current_picture.mb_type[mbn_xy];
+ int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
+ int16_t bS[4];
+ int qp;
+
+ if( (edge&1) && IS_8x8DCT(mb_type) )
+ continue;
+
+ if( IS_INTRA(mb_type) ||
+ IS_INTRA(mbn_type) ) {
+ int value;
+ if (edge == 0) {
+ if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
+ || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
+ ) {
+ value = 4;
+ } else {
+ value = 3;
+ }
+ } else {
+ value = 3;
+ }
+ bS[0] = bS[1] = bS[2] = bS[3] = value;
+ } else {
+ int i, l;
+ int mv_done;
+
+ if( edge & mask_edge ) {
+ bS[0] = bS[1] = bS[2] = bS[3] = 0;
+ mv_done = 1;
+ }
+ else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
+ bS[0] = bS[1] = bS[2] = bS[3] = 1;
+ mv_done = 1;
+ }
+ else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
+ int b_idx= 8 + 4 + edge * (dir ? 8:1);
+ int bn_idx= b_idx - (dir ? 8:1);
+ int v = 0;
+
+ for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+ v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
+ FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+ FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
+ }
+
+ if(h->slice_type_nos == FF_B_TYPE && v){
+ v=0;
+ for( l = 0; !v && l < 2; l++ ) {
+ int ln= 1-l;
+ v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
+ FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+ FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
+ }
+ }
+
+ bS[0] = bS[1] = bS[2] = bS[3] = v;
+ mv_done = 1;
+ }
+ else
+ mv_done = 0;
+
+ for( i = 0; i < 4; i++ ) {
+ int x = dir == 0 ? edge : i;
+ int y = dir == 0 ? i : edge;
+ int b_idx= 8 + 4 + x + 8*y;
+ int bn_idx= b_idx - (dir ? 8:1);
+
+ if( h->non_zero_count_cache[b_idx] |
+ h->non_zero_count_cache[bn_idx] ) {
+ bS[i] = 2;
+ }
+ else if(!mv_done)
+ {
+ bS[i] = 0;
+ for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
+ if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
+ FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+ FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
+ bS[i] = 1;
+ break;
+ }
+ }
+
+ if(h->slice_type_nos == FF_B_TYPE && bS[i]){
+ bS[i] = 0;
+ for( l = 0; l < 2; l++ ) {
+ int ln= 1-l;
+ if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
+ FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
+ FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
+ bS[i] = 1;
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
+ continue;
+ }
+
+ /* Filter edge */
+ // Do not use s->qscale as luma quantizer because it has not the same
+ // value in IPCM macroblocks.
+ qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
+ //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
+ tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
+ { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
+ if( dir == 0 ) {
+ filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
+ if( (edge&1) == 0 ) {
+ filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ }
+ } else {
+ filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
+ if( (edge&1) == 0 ) {
+ filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
+ ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
+ ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
+ }
+ }
+ }
+}
+
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
MpegEncContext * const s = &h->s;
const int mb_xy= mb_x + mb_y*s->mb_stride;
@@ -6347,16 +6498,16 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
if(IS_8x8DCT(mb_type)){
h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
- h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1;
+ h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
- h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2;
+ h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
- h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4;
+ h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
- h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8;
+ h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
}
}
@@ -6420,210 +6571,14 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
}
- /* dir : 0 -> vertical edge, 1 -> horizontal edge */
- for( dir = 0; dir < 2; dir++ )
- {
- int edge;
- const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
- const int mbm_type = s->current_picture.mb_type[mbm_xy];
- int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
-
- const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
- == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
- // how often to recheck mv-based bS when iterating between edges
- const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
- (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
- // how often to recheck mv-based bS when iterating along each edge
- const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
-
- if (first_vertical_edge_done) {
- start = 1;
- first_vertical_edge_done = 0;
- }
-
- if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
- start = 1;
-
- if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
- && !IS_INTERLACED(mb_type)
- && IS_INTERLACED(mbm_type)
- ) {
- // This is a special case in the norm where the filtering must
- // be done twice (one each of the field) even if we are in a
- // frame macroblock.
- //
- static const int nnz_idx[4] = {4,5,6,3};
- unsigned int tmp_linesize = 2 * linesize;
- unsigned int tmp_uvlinesize = 2 * uvlinesize;
- int mbn_xy = mb_xy - 2 * s->mb_stride;
- int qp;
- int i, j;
- int16_t bS[4];
-
- for(j=0; j<2; j++, mbn_xy += s->mb_stride){
- if( IS_INTRA(mb_type) ||
- IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
- bS[0] = bS[1] = bS[2] = bS[3] = 3;
- } else {
- const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
- for( i = 0; i < 4; i++ ) {
- if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
- mbn_nnz[nnz_idx[i]] != 0 )
- bS[i] = 2;
- else
- bS[i] = 1;
- }
- }
- // Do not use s->qscale as luma quantizer because it has not the same
- // value in IPCM macroblocks.
- qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
- tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
- { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
- filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
- filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
- ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
- ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- }
-
- start = 1;
- }
-
- /* Calculate bS */
- for( edge = start; edge < edges; edge++ ) {
- /* mbn_xy: neighbor macroblock */
- const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
- const int mbn_type = s->current_picture.mb_type[mbn_xy];
- int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
- int16_t bS[4];
- int qp;
-
- if( (edge&1) && IS_8x8DCT(mb_type) )
- continue;
- if( IS_INTRA(mb_type) ||
- IS_INTRA(mbn_type) ) {
- int value;
- if (edge == 0) {
- if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
- || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
- ) {
- value = 4;
- } else {
- value = 3;
- }
- } else {
- value = 3;
- }
- bS[0] = bS[1] = bS[2] = bS[3] = value;
- } else {
- int i, l;
- int mv_done;
-
- if( edge & mask_edge ) {
- bS[0] = bS[1] = bS[2] = bS[3] = 0;
- mv_done = 1;
- }
- else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
- bS[0] = bS[1] = bS[2] = bS[3] = 1;
- mv_done = 1;
- }
- else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
- int b_idx= 8 + 4 + edge * (dir ? 8:1);
- int bn_idx= b_idx - (dir ? 8:1);
- int v = 0;
-
- for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
- v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
- FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
- FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
- }
-
- if(h->slice_type_nos == FF_B_TYPE && v){
- v=0;
- for( l = 0; !v && l < 2; l++ ) {
- int ln= 1-l;
- v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
- FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
- FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
- }
- }
-
- bS[0] = bS[1] = bS[2] = bS[3] = v;
- mv_done = 1;
- }
- else
- mv_done = 0;
-
- for( i = 0; i < 4; i++ ) {
- int x = dir == 0 ? edge : i;
- int y = dir == 0 ? i : edge;
- int b_idx= 8 + 4 + x + 8*y;
- int bn_idx= b_idx - (dir ? 8:1);
-
- if( h->non_zero_count_cache[b_idx] != 0 ||
- h->non_zero_count_cache[bn_idx] != 0 ) {
- bS[i] = 2;
- }
- else if(!mv_done)
- {
- bS[i] = 0;
- for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
- if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] ||
- FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
- FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
- bS[i] = 1;
- break;
- }
- }
-
- if(h->slice_type_nos == FF_B_TYPE && bS[i]){
- bS[i] = 0;
- for( l = 0; l < 2; l++ ) {
- int ln= 1-l;
- if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] ||
- FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] ) >= 4 ||
- FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
- bS[i] = 1;
- break;
- }
- }
- }
- }
- }
-
- if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
- continue;
- }
-
- /* Filter edge */
- // Do not use s->qscale as luma quantizer because it has not the same
- // value in IPCM macroblocks.
- qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
- //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
- tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
- { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
- if( dir == 0 ) {
- filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
- if( (edge&1) == 0 ) {
- filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
- ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
- ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- }
- } else {
- filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
- if( (edge&1) == 0 ) {
- filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
- ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
- ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
- }
- }
- }
- }
+#ifdef CONFIG_SMALL
+ for( dir = 0; dir < 2; dir++ )
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, dir);
+#else
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, 0);
+ filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, 1);
+#endif
}
static int decode_slice(struct AVCodecContext *avctx, void *arg){
@@ -6633,6 +6588,9 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
s->mb_skip_run= -1;
+ h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
+ (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || (ENABLE_H264_ENCODER && s->encoding);
+
if( h->pps.cabac ) {
int i;
@@ -6669,7 +6627,7 @@ static int decode_slice(struct AVCodecContext *avctx, void *arg){
if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
s->mb_y++;
- if(ret>=0) ret = decode_mb_cabac(h);
+ ret = decode_mb_cabac(h);
if(ret>=0) hl_decode_mb(h);
s->mb_y--;
@@ -6913,10 +6871,16 @@ static int decode_sei(H264Context *h){
return 0;
}
-static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
+static inline int decode_hrd_parameters(H264Context *h, SPS *sps){
MpegEncContext * const s = &h->s;
int cpb_count, i;
- cpb_count = get_ue_golomb(&s->gb) + 1;
+ cpb_count = get_ue_golomb_31(&s->gb) + 1;
+
+ if(cpb_count > 32U){
+ av_log(h->s.avctx, AV_LOG_ERROR, "cpb_count %d invalid\n", cpb_count);
+ return -1;
+ }
+
get_bits(&s->gb, 4); /* bit_rate_scale */
get_bits(&s->gb, 4); /* cpb_size_scale */
for(i=0; i<cpb_count; i++){
@@ -6928,6 +6892,7 @@ static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
sps->cpb_removal_delay_length = get_bits(&s->gb, 5) + 1;
sps->dpb_output_delay_length = get_bits(&s->gb, 5) + 1;
sps->time_offset_length = get_bits(&s->gb, 5);
+ return 0;
}
static inline int decode_vui_parameters(H264Context *h, SPS *sps){
@@ -6982,31 +6947,30 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
sps->nal_hrd_parameters_present_flag = get_bits1(&s->gb);
if(sps->nal_hrd_parameters_present_flag)
- decode_hrd_parameters(h, sps);
+ if(decode_hrd_parameters(h, sps) < 0)
+ return -1;
sps->vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
if(sps->vcl_hrd_parameters_present_flag)
- decode_hrd_parameters(h, sps);
+ if(decode_hrd_parameters(h, sps) < 0)
+ return -1;
if(sps->nal_hrd_parameters_present_flag || sps->vcl_hrd_parameters_present_flag)
get_bits1(&s->gb); /* low_delay_hrd_flag */
sps->pic_struct_present_flag = get_bits1(&s->gb);
sps->bitstream_restriction_flag = get_bits1(&s->gb);
if(sps->bitstream_restriction_flag){
- unsigned int num_reorder_frames;
get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
- num_reorder_frames= get_ue_golomb(&s->gb);
+ sps->num_reorder_frames= get_ue_golomb(&s->gb);
get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
- if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
- av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
+ if(sps->num_reorder_frames > 16U /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
+ av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", sps->num_reorder_frames);
return -1;
}
-
- sps->num_reorder_frames= num_reorder_frames;
}
return 0;
@@ -7056,30 +7020,10 @@ static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_s
}
}
-/**
- * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
- */
-static void *
-alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
- const size_t size, const char *name)
-{
- if(id>=max) {
- av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
- return NULL;
- }
-
- if(!vec[id]) {
- vec[id] = av_mallocz(size);
- if(vec[id] == NULL)
- av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
- }
- return vec[id];
-}
-
static inline int decode_seq_parameter_set(H264Context *h){
MpegEncContext * const s = &h->s;
int profile_idc, level_idc;
- unsigned int sps_id, tmp, mb_width, mb_height;
+ unsigned int sps_id;
int i;
SPS *sps;
@@ -7090,9 +7034,13 @@ static inline int decode_seq_parameter_set(H264Context *h){
get_bits1(&s->gb); //constraint_set3_flag
get_bits(&s->gb, 4); // reserved
level_idc= get_bits(&s->gb, 8);
- sps_id= get_ue_golomb(&s->gb);
+ sps_id= get_ue_golomb_31(&s->gb);
- sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
+ if(sps_id >= MAX_SPS_COUNT) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "sps_id (%d) out of range\n", sps_id);
+ return -1;
+ }
+ sps= av_mallocz(sizeof(SPS));
if(sps == NULL)
return -1;
@@ -7104,7 +7052,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
sps->scaling_matrix_present = 0;
if(sps->profile_idc >= 100){ //high profile
- sps->chroma_format_idc= get_ue_golomb(&s->gb);
+ sps->chroma_format_idc= get_ue_golomb_31(&s->gb);
if(sps->chroma_format_idc == 3)
get_bits1(&s->gb); //residual_color_transform_flag
get_ue_golomb(&s->gb); //bit_depth_luma_minus8
@@ -7116,7 +7064,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
}
sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
- sps->poc_type= get_ue_golomb(&s->gb);
+ sps->poc_type= get_ue_golomb_31(&s->gb);
if(sps->poc_type == 0){ //FIXME #define
sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
@@ -7124,37 +7072,33 @@ static inline int decode_seq_parameter_set(H264Context *h){
sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
- tmp= get_ue_golomb(&s->gb);
+ sps->poc_cycle_length = get_ue_golomb(&s->gb);
- if(tmp >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
- av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
- return -1;
+ if((unsigned)sps->poc_cycle_length >= FF_ARRAY_ELEMS(sps->offset_for_ref_frame)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", sps->poc_cycle_length);
+ goto fail;
}
- sps->poc_cycle_length= tmp;
for(i=0; i<sps->poc_cycle_length; i++)
sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
}else if(sps->poc_type != 2){
av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
- return -1;
+ goto fail;
}
- tmp= get_ue_golomb(&s->gb);
- if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
+ sps->ref_frame_count= get_ue_golomb_31(&s->gb);
+ if(sps->ref_frame_count > MAX_PICTURE_COUNT-2 || sps->ref_frame_count >= 32U){
av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
- return -1;
+ goto fail;
}
- sps->ref_frame_count= tmp;
sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
- mb_width= get_ue_golomb(&s->gb) + 1;
- mb_height= get_ue_golomb(&s->gb) + 1;
- if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
- avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
+ sps->mb_width = get_ue_golomb(&s->gb) + 1;
+ sps->mb_height= get_ue_golomb(&s->gb) + 1;
+ if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
+ avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height)){
av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
- return -1;
+ goto fail;
}
- sps->mb_width = mb_width;
- sps->mb_height= mb_height;
sps->frame_mbs_only_flag= get_bits1(&s->gb);
if(!sps->frame_mbs_only_flag)
@@ -7205,7 +7149,12 @@ static inline int decode_seq_parameter_set(H264Context *h){
((const char*[]){"Gray","420","422","444"})[sps->chroma_format_idc]
);
}
+ av_free(h->sps_buffers[sps_id]);
+ h->sps_buffers[sps_id]= sps;
return 0;
+fail:
+ av_free(sps);
+ return -1;
}
static void
@@ -7218,19 +7167,22 @@ build_qp_table(PPS *pps, int t, int index)
static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
MpegEncContext * const s = &h->s;
- unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
+ unsigned int pps_id= get_ue_golomb(&s->gb);
PPS *pps;
- pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
- if(pps == NULL)
+ if(pps_id >= MAX_PPS_COUNT) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "pps_id (%d) out of range\n", pps_id);
return -1;
+ }
- tmp= get_ue_golomb(&s->gb);
- if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
- av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
+ pps= av_mallocz(sizeof(PPS));
+ if(pps == NULL)
return -1;
+ pps->sps_id= get_ue_golomb_31(&s->gb);
+ if((unsigned)pps->sps_id>=MAX_SPS_COUNT || h->sps_buffers[pps->sps_id] == NULL){
+ av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
+ goto fail;
}
- pps->sps_id= tmp;
pps->cabac= get_bits1(&s->gb);
pps->pic_order_present= get_bits1(&s->gb);
@@ -7276,8 +7228,7 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
- pps->ref_count[0]= pps->ref_count[1]= 1;
- return -1;
+ goto fail;
}
pps->weighted_pred= get_bits1(&s->gb);
@@ -7322,7 +7273,12 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
);
}
+ av_free(h->pps_buffers[pps_id]);
+ h->pps_buffers[pps_id]= pps;
return 0;
+fail:
+ av_free(pps);
+ return -1;
}
/**
@@ -7337,6 +7293,8 @@ static void execute_decode_slices(H264Context *h, int context_count){
H264Context *hx;
int i;
+ if(avctx->codec_id == CODEC_ID_H264_VDPAU)
+ return;
if(context_count == 1) {
decode_slice(avctx, &h);
} else {
@@ -7464,8 +7422,14 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
- && avctx->skip_frame < AVDISCARD_ALL)
- context_count++;
+ && avctx->skip_frame < AVDISCARD_ALL){
+ if(ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU){
+ static const uint8_t start_code[] = {0x00, 0x00, 0x01};
+ ff_vdpau_h264_add_data_chunk(h, start_code, sizeof(start_code));
+ ff_vdpau_h264_add_data_chunk(h, &buf[buf_index - consumed], consumed );
+ }else
+ context_count++;
+ }
break;
case NAL_DPA:
init_get_bits(&hx->s.gb, ptr, bit_length);
@@ -7668,6 +7632,9 @@ static int decode_frame(AVCodecContext *avctx,
h->prev_frame_num_offset= h->frame_num_offset;
h->prev_frame_num= h->frame_num;
+ if (ENABLE_H264_VDPAU_DECODER && avctx->codec_id == CODEC_ID_H264_VDPAU)
+ ff_vdpau_h264_picture_complete(h);
+
/*
* FIXME: Error handling code does not seem to support interlaced
* when slices span multiple rows
@@ -8053,4 +8020,22 @@ AVCodec h264_decoder = {
.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
};
+#ifdef CONFIG_H264_VDPAU_DECODER
+AVCodec h264_vdpau_decoder = {
+ "h264_vdpau",
+ CODEC_TYPE_VIDEO,
+ CODEC_ID_H264_VDPAU,
+ sizeof(H264Context),
+ decode_init,
+ NULL,
+ decode_end,
+ decode_frame,
+ CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
+ .flush= flush_dpb,
+ .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
+};
+#endif
+
+#ifdef CONFIG_SVQ3_DECODER
#include "svq3.c"
+#endif
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index f7ea19e..40a94a0 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -92,6 +92,11 @@
#define EXTENDED_SAR 255
+#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16 bit
+#define MB_TYPE_8x8DCT 0x01000000
+#define IS_REF0(a) ((a) & MB_TYPE_REF0)
+#define IS_8x8DCT(a) ((a) & MB_TYPE_8x8DCT)
+
/* NAL unit types */
enum {
NAL_SLICE=1,
@@ -334,7 +339,7 @@ typedef struct H264Context{
int mb_field_decoding_flag;
int mb_mbaff; ///< mb_aff_frame && mb_field_decoding_flag
- unsigned int sub_mb_type[4];
+ uint16_t sub_mb_type[4];
//POC stuff
int poc_lsb;
@@ -486,6 +491,8 @@ typedef struct H264Context{
* pic_struct in picture timing SEI message
*/
SEI_PicStructType sei_pic_struct;
+
+ int is_complex;
}H264Context;
#endif /* AVCODEC_H264_H */
diff --git a/libavcodec/h264_mp4toannexb_bsf.c b/libavcodec/h264_mp4toannexb_bsf.c
index 03eb956..e94f8c6 100644
--- a/libavcodec/h264_mp4toannexb_bsf.c
+++ b/libavcodec/h264_mp4toannexb_bsf.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2007 Benoit Fouet <benoit.fouet at purplelabs.com>
+ * Copyright (c) 2007 Benoit Fouet <benoit.fouet at free.fr>
*
* This file is part of FFmpeg.
*
diff --git a/libavcodec/h264_parser.c b/libavcodec/h264_parser.c
index e0973bf..cfa04c4 100644
--- a/libavcodec/h264_parser.c
+++ b/libavcodec/h264_parser.c
@@ -44,6 +44,18 @@ int ff_h264_find_frame_end(H264Context *h, const uint8_t *buf, int buf_size)
for(i=0; i<buf_size; i++){
if(state==7){
+#ifdef HAVE_FAST_UNALIGNED
+ /* we check i<buf_size instead of i+3/7 because its simpler
+ * and there should be FF_INPUT_BUFFER_PADDING_SIZE bytes at the end
+ */
+# ifdef HAVE_FAST_64BIT
+ while(i<buf_size && !((~*(uint64_t*)(buf+i) & (*(uint64_t*)(buf+i) - 0x0101010101010101ULL)) & 0x8080808080808080ULL))
+ i+=8;
+# else
+ while(i<buf_size && !((~*(uint32_t*)(buf+i) & (*(uint32_t*)(buf+i) - 0x01010101U)) & 0x80808080U))
+ i+=4;
+# endif
+#endif
for(; i<buf_size; i++){
if(!buf[i]){
state=2;
@@ -149,7 +161,8 @@ static void close(AVCodecParserContext *s)
AVCodecParser h264_parser = {
- { CODEC_ID_H264 },
+ { CODEC_ID_H264,
+ CODEC_ID_H264_VDPAU },
sizeof(H264Context),
NULL,
h264_parse,
diff --git a/libavcodec/h264data.h b/libavcodec/h264data.h
index fa4bff8..20ea3bb 100644
--- a/libavcodec/h264data.h
+++ b/libavcodec/h264data.h
@@ -32,6 +32,7 @@
#include <stdint.h>
#include "libavutil/rational.h"
#include "mpegvideo.h"
+#include "h264.h"
static const AVRational pixel_aspect[17]={
@@ -57,9 +58,6 @@ static const AVRational pixel_aspect[17]={
static const uint8_t golomb_to_pict_type[5]=
{FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
-static const uint8_t pict_type_to_golomb[7]=
-{-1, 2, 0, 1, -1, 4, 3};
-
static const uint8_t chroma_qp[52]={
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,
12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,
@@ -80,18 +78,6 @@ static const uint8_t golomb_to_inter_cbp[48]={
17, 18, 20, 24, 19, 21, 26, 28, 23, 27, 29, 30, 22, 25, 38, 41
};
-static const uint8_t intra4x4_cbp_to_golomb[48]={
- 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
- 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
- 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0
-};
-
-static const uint8_t inter_cbp_to_golomb[48]={
- 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
- 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
- 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
-};
-
static const uint8_t golomb_to_inter_cbp_gray[16]={
0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
};
@@ -377,12 +363,6 @@ static const uint8_t field_scan8x8_cavlc[64]={
6+3*8, 6+5*8, 7+3*8, 7+7*8,
};
-#define MB_TYPE_REF0 MB_TYPE_ACPRED //dirty but it fits in 16bit
-#define MB_TYPE_8x8DCT 0x01000000
-#define IS_REF0(a) ((a)&MB_TYPE_REF0)
-#define IS_8x8DCT(a) ((a)&MB_TYPE_8x8DCT)
-
-
typedef struct IMbInfo{
uint16_t type;
uint8_t pred_mode;
@@ -533,63 +513,6 @@ static const uint8_t dequant8_coeff_init[6][6]={
{36,32,58,34,46,43},
};
-#define QUANT_SHIFT 22
-
-static const int quant_coeff[52][16]={
- { 419430,258111,419430,258111,258111,167772,258111,167772,419430,258111,419430,258111,258111,167772,258111,167772,},
- { 381300,239675,381300,239675,239675,149131,239675,149131,381300,239675,381300,239675,239675,149131,239675,149131,},
- { 322639,209715,322639,209715,209715,134218,209715,134218,322639,209715,322639,209715,209715,134218,209715,134218,},
- { 299593,186414,299593,186414,186414,116711,186414,116711,299593,186414,299593,186414,186414,116711,186414,116711,},
- { 262144,167772,262144,167772,167772,107374,167772,107374,262144,167772,262144,167772,167772,107374,167772,107374,},
- { 233017,145889,233017,145889,145889, 92564,145889, 92564,233017,145889,233017,145889,145889, 92564,145889, 92564,},
- { 209715,129056,209715,129056,129056, 83886,129056, 83886,209715,129056,209715,129056,129056, 83886,129056, 83886,},
- { 190650,119837,190650,119837,119837, 74565,119837, 74565,190650,119837,190650,119837,119837, 74565,119837, 74565,},
- { 161319,104858,161319,104858,104858, 67109,104858, 67109,161319,104858,161319,104858,104858, 67109,104858, 67109,},
- { 149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,149797, 93207,149797, 93207, 93207, 58356, 93207, 58356,},
- { 131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,131072, 83886,131072, 83886, 83886, 53687, 83886, 53687,},
- { 116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,116508, 72944,116508, 72944, 72944, 46282, 72944, 46282,},
- { 104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,104858, 64528,104858, 64528, 64528, 41943, 64528, 41943,},
- { 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283, 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283,},
- { 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554, 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554,},
- { 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178, 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178,},
- { 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844, 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844,},
- { 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141, 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141,},
- { 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972, 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972,},
- { 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641, 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641,},
- { 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777, 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777,},
- { 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589, 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589,},
- { 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422, 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422,},
- { 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570, 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570,},
- { 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486, 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486,},
- { 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321, 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321,},
- { 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389, 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389,},
- { 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294, 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294,},
- { 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711, 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711,},
- { 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785, 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785,},
- { 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243, 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243,},
- { 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660, 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660,},
- { 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194, 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194,},
- { 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647, 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647,},
- { 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355, 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355,},
- { 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893, 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893,},
- { 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621, 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621,},
- { 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330, 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330,},
- { 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097, 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097,},
- { 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824, 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824,},
- { 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678, 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678,},
- { 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446, 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446,},
- { 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311, 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311,},
- { 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165, 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165,},
- { 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049, 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049,},
- { 2341, 1456, 2341, 1456, 1456, 912, 1456, 912, 2341, 1456, 2341, 1456, 1456, 912, 1456, 912,},
- { 2048, 1311, 2048, 1311, 1311, 839, 1311, 839, 2048, 1311, 2048, 1311, 1311, 839, 1311, 839,},
- { 1820, 1140, 1820, 1140, 1140, 723, 1140, 723, 1820, 1140, 1820, 1140, 1140, 723, 1140, 723,},
- { 1638, 1008, 1638, 1008, 1008, 655, 1008, 655, 1638, 1008, 1638, 1008, 1008, 655, 1008, 655,},
- { 1489, 936, 1489, 936, 936, 583, 936, 583, 1489, 936, 1489, 936, 936, 583, 936, 583,},
- { 1260, 819, 1260, 819, 819, 524, 819, 524, 1260, 819, 1260, 819, 819, 524, 819, 524,},
- { 1170, 728, 1170, 728, 728, 456, 728, 456, 1170, 728, 1170, 728, 728, 456, 728, 456,},
-};
-
/* Deblocking filter (p153) */
static const uint8_t alpha_table[52*3] = {
@@ -624,34 +547,34 @@ static const uint8_t beta_table[52*3] = {
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18,
};
-static const uint8_t tc0_table[52*3][3] = {
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 },
- { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 1 },
- { 0, 0, 1 }, { 0, 0, 1 }, { 0, 0, 1 }, { 0, 1, 1 }, { 0, 1, 1 }, { 1, 1, 1 },
- { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 2 }, { 1, 1, 2 }, { 1, 1, 2 },
- { 1, 1, 2 }, { 1, 2, 3 }, { 1, 2, 3 }, { 2, 2, 3 }, { 2, 2, 4 }, { 2, 3, 4 },
- { 2, 3, 4 }, { 3, 3, 5 }, { 3, 4, 6 }, { 3, 4, 6 }, { 4, 5, 7 }, { 4, 5, 8 },
- { 4, 6, 9 }, { 5, 7,10 }, { 6, 8,11 }, { 6, 8,13 }, { 7,10,14 }, { 8,11,16 },
- { 9,12,18 }, {10,13,20 }, {11,15,23 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
- {13,17,25 }, {13,17,25 }, {13,17,25 }, {13,17,25 },
+static const uint8_t tc0_table[52*3][4] = {
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 },
+ {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 0 }, {-1, 0, 0, 1 },
+ {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 0, 1 }, {-1, 0, 1, 1 }, {-1, 0, 1, 1 }, {-1, 1, 1, 1 },
+ {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 1 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 }, {-1, 1, 1, 2 },
+ {-1, 1, 1, 2 }, {-1, 1, 2, 3 }, {-1, 1, 2, 3 }, {-1, 2, 2, 3 }, {-1, 2, 2, 4 }, {-1, 2, 3, 4 },
+ {-1, 2, 3, 4 }, {-1, 3, 3, 5 }, {-1, 3, 4, 6 }, {-1, 3, 4, 6 }, {-1, 4, 5, 7 }, {-1, 4, 5, 8 },
+ {-1, 4, 6, 9 }, {-1, 5, 7,10 }, {-1, 6, 8,11 }, {-1, 6, 8,13 }, {-1, 7,10,14 }, {-1, 8,11,16 },
+ {-1, 9,12,18 }, {-1,10,13,20 }, {-1,11,15,23 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
+ {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 }, {-1,13,17,25 },
};
/* Cabac pre state table */
diff --git a/libavcodec/h264enc.c b/libavcodec/h264enc.c
index 2cff901..788e0ae 100644
--- a/libavcodec/h264enc.c
+++ b/libavcodec/h264enc.c
@@ -107,3 +107,154 @@ static uint8_t *h264_write_nal_unit(int nal_ref_idc, int nal_unit_type, uint8_t
return dest+destpos;
}
+static const uint8_t pict_type_to_golomb[7] = {-1, 2, 0, 1, -1, 4, 3};
+
+static const uint8_t intra4x4_cbp_to_golomb[48] = {
+ 3, 29, 30, 17, 31, 18, 37, 8, 32, 38, 19, 9, 20, 10, 11, 2,
+ 16, 33, 34, 21, 35, 22, 39, 4, 36, 40, 23, 5, 24, 6, 7, 1,
+ 41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15, 0
+};
+
+static const uint8_t inter_cbp_to_golomb[48] = {
+ 0, 2, 3, 7, 4, 8, 17, 13, 5, 18, 9, 14, 10, 15, 16, 11,
+ 1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
+ 6, 24, 25, 20, 26, 21, 46, 28, 27, 47, 22, 29, 23, 30, 31, 12
+};
+
+#define QUANT_SHIFT 22
+
+static const int quant_coeff[52][16] = {
+ { 419430, 258111, 419430, 258111, 258111, 167772, 258111, 167772, 419430, 258111, 419430, 258111, 258111, 167772, 258111, 167772,},
+ { 381300, 239675, 381300, 239675, 239675, 149131, 239675, 149131, 381300, 239675, 381300, 239675, 239675, 149131, 239675, 149131,},
+ { 322639, 209715, 322639, 209715, 209715, 134218, 209715, 134218, 322639, 209715, 322639, 209715, 209715, 134218, 209715, 134218,},
+ { 299593, 186414, 299593, 186414, 186414, 116711, 186414, 116711, 299593, 186414, 299593, 186414, 186414, 116711, 186414, 116711,},
+ { 262144, 167772, 262144, 167772, 167772, 107374, 167772, 107374, 262144, 167772, 262144, 167772, 167772, 107374, 167772, 107374,},
+ { 233017, 145889, 233017, 145889, 145889, 92564, 145889, 92564, 233017, 145889, 233017, 145889, 145889, 92564, 145889, 92564,},
+ { 209715, 129056, 209715, 129056, 129056, 83886, 129056, 83886, 209715, 129056, 209715, 129056, 129056, 83886, 129056, 83886,},
+ { 190650, 119837, 190650, 119837, 119837, 74565, 119837, 74565, 190650, 119837, 190650, 119837, 119837, 74565, 119837, 74565,},
+ { 161319, 104858, 161319, 104858, 104858, 67109, 104858, 67109, 161319, 104858, 161319, 104858, 104858, 67109, 104858, 67109,},
+ { 149797, 93207, 149797, 93207, 93207, 58356, 93207, 58356, 149797, 93207, 149797, 93207, 93207, 58356, 93207, 58356,},
+ { 131072, 83886, 131072, 83886, 83886, 53687, 83886, 53687, 131072, 83886, 131072, 83886, 83886, 53687, 83886, 53687,},
+ { 116508, 72944, 116508, 72944, 72944, 46282, 72944, 46282, 116508, 72944, 116508, 72944, 72944, 46282, 72944, 46282,},
+ { 104858, 64528, 104858, 64528, 64528, 41943, 64528, 41943, 104858, 64528, 104858, 64528, 64528, 41943, 64528, 41943,},
+ { 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283, 95325, 59919, 95325, 59919, 59919, 37283, 59919, 37283,},
+ { 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554, 80660, 52429, 80660, 52429, 52429, 33554, 52429, 33554,},
+ { 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178, 74898, 46603, 74898, 46603, 46603, 29178, 46603, 29178,},
+ { 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844, 65536, 41943, 65536, 41943, 41943, 26844, 41943, 26844,},
+ { 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141, 58254, 36472, 58254, 36472, 36472, 23141, 36472, 23141,},
+ { 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972, 52429, 32264, 52429, 32264, 32264, 20972, 32264, 20972,},
+ { 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641, 47663, 29959, 47663, 29959, 29959, 18641, 29959, 18641,},
+ { 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777, 40330, 26214, 40330, 26214, 26214, 16777, 26214, 16777,},
+ { 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589, 37449, 23302, 37449, 23302, 23302, 14589, 23302, 14589,},
+ { 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422, 32768, 20972, 32768, 20972, 20972, 13422, 20972, 13422,},
+ { 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570, 29127, 18236, 29127, 18236, 18236, 11570, 18236, 11570,},
+ { 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486, 26214, 16132, 26214, 16132, 16132, 10486, 16132, 10486,},
+ { 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321, 23831, 14980, 23831, 14980, 14980, 9321, 14980, 9321,},
+ { 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389, 20165, 13107, 20165, 13107, 13107, 8389, 13107, 8389,},
+ { 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294, 18725, 11651, 18725, 11651, 11651, 7294, 11651, 7294,},
+ { 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711, 16384, 10486, 16384, 10486, 10486, 6711, 10486, 6711,},
+ { 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785, 14564, 9118, 14564, 9118, 9118, 5785, 9118, 5785,},
+ { 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243, 13107, 8066, 13107, 8066, 8066, 5243, 8066, 5243,},
+ { 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660, 11916, 7490, 11916, 7490, 7490, 4660, 7490, 4660,},
+ { 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194, 10082, 6554, 10082, 6554, 6554, 4194, 6554, 4194,},
+ { 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647, 9362, 5825, 9362, 5825, 5825, 3647, 5825, 3647,},
+ { 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355, 8192, 5243, 8192, 5243, 5243, 3355, 5243, 3355,},
+ { 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893, 7282, 4559, 7282, 4559, 4559, 2893, 4559, 2893,},
+ { 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621, 6554, 4033, 6554, 4033, 4033, 2621, 4033, 2621,},
+ { 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330, 5958, 3745, 5958, 3745, 3745, 2330, 3745, 2330,},
+ { 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097, 5041, 3277, 5041, 3277, 3277, 2097, 3277, 2097,},
+ { 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824, 4681, 2913, 4681, 2913, 2913, 1824, 2913, 1824,},
+ { 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678, 4096, 2621, 4096, 2621, 2621, 1678, 2621, 1678,},
+ { 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446, 3641, 2280, 3641, 2280, 2280, 1446, 2280, 1446,},
+ { 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311, 3277, 2016, 3277, 2016, 2016, 1311, 2016, 1311,},
+ { 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165, 2979, 1872, 2979, 1872, 1872, 1165, 1872, 1165,},
+ { 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049, 2521, 1638, 2521, 1638, 1638, 1049, 1638, 1049,},
+ { 2341, 1456, 2341, 1456, 1456, 912, 1456, 912, 2341, 1456, 2341, 1456, 1456, 912, 1456, 912,},
+ { 2048, 1311, 2048, 1311, 1311, 839, 1311, 839, 2048, 1311, 2048, 1311, 1311, 839, 1311, 839,},
+ { 1820, 1140, 1820, 1140, 1140, 723, 1140, 723, 1820, 1140, 1820, 1140, 1140, 723, 1140, 723,},
+ { 1638, 1008, 1638, 1008, 1008, 655, 1008, 655, 1638, 1008, 1638, 1008, 1008, 655, 1008, 655,},
+ { 1489, 936, 1489, 936, 936, 583, 936, 583, 1489, 936, 1489, 936, 936, 583, 936, 583,},
+ { 1260, 819, 1260, 819, 819, 524, 819, 524, 1260, 819, 1260, 819, 819, 524, 819, 524,},
+ { 1170, 728, 1170, 728, 728, 456, 728, 456, 1170, 728, 1170, 728, 728, 456, 728, 456,},
+};
+
+//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
+//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
+static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale,
+ int intra, int separate_dc)
+{
+ int i;
+ const int * const quant_3Btable = quant_coeff[qscale];
+ const int bias = intra ? (1 << QUANT_SHIFT) / 3 : (1 << QUANT_SHIFT) / 6;
+ const unsigned int threshold1 = (1 << QUANT_SHIFT) - bias - 1;
+ const unsigned int threshold2 = (threshold1 << 1);
+ int last_non_zero;
+
+ if (separate_dc) {
+ if (qscale <= 18) {
+ //avoid overflows
+ const int dc_bias = intra ? (1 << (QUANT_SHIFT - 2)) / 3 : (1 << (QUANT_SHIFT - 2)) / 6;
+ const unsigned int dc_threshold1 = (1 << (QUANT_SHIFT - 2)) - dc_bias - 1;
+ const unsigned int dc_threshold2 = (dc_threshold1 << 1);
+
+ int level = block[0]*quant_coeff[qscale+18][0];
+ if (((unsigned)(level + dc_threshold1)) > dc_threshold2) {
+ if (level > 0) {
+ level = (dc_bias + level) >> (QUANT_SHIFT - 2);
+ block[0] = level;
+ } else {
+ level = (dc_bias - level) >> (QUANT_SHIFT - 2);
+ block[0] = -level;
+ }
+// last_non_zero = i;
+ } else {
+ block[0] = 0;
+ }
+ } else {
+ const int dc_bias = intra ? (1 << (QUANT_SHIFT + 1)) / 3 : (1 << (QUANT_SHIFT + 1)) / 6;
+ const unsigned int dc_threshold1 = (1 << (QUANT_SHIFT + 1)) - dc_bias - 1;
+ const unsigned int dc_threshold2 = (dc_threshold1 << 1);
+
+ int level = block[0]*quant_table[0];
+ if (((unsigned)(level + dc_threshold1)) > dc_threshold2) {
+ if (level > 0) {
+ level = (dc_bias + level) >> (QUANT_SHIFT + 1);
+ block[0] = level;
+ } else {
+ level = (dc_bias - level) >> (QUANT_SHIFT + 1);
+ block[0] = -level;
+ }
+// last_non_zero = i;
+ } else {
+ block[0] = 0;
+ }
+ }
+ last_non_zero = 0;
+ i = 1;
+ } else {
+ last_non_zero = -1;
+ i = 0;
+ }
+
+ for (; i < 16; i++) {
+ const int j = scantable[i];
+ int level = block[j]*quant_table[j];
+
+// if ( bias+level >= (1 << (QMAT_SHIFT - 3))
+// || bias-level >= (1 << (QMAT_SHIFT - 3))) {
+ if (((unsigned)(level + threshold1)) > threshold2) {
+ if (level > 0) {
+ level = (bias + level) >> QUANT_SHIFT;
+ block[j] = level;
+ } else {
+ level = (bias - level) >> QUANT_SHIFT;
+ block[j] = -level;
+ }
+ last_non_zero = i;
+ } else {
+ block[j] = 0;
+ }
+ }
+
+ return last_non_zero;
+}
diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
index 571e2e9..57d1b75 100644
--- a/libavcodec/h264idct.c
+++ b/libavcodec/h264idct.c
@@ -72,58 +72,57 @@ void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){
void ff_h264_idct8_add_c(uint8_t *dst, DCTELEM *block, int stride){
int i;
- DCTELEM (*src)[8] = (DCTELEM(*)[8])block;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
block[0] += 32;
for( i = 0; i < 8; i++ )
{
- const int a0 = src[i][0] + src[i][4];
- const int a2 = src[i][0] - src[i][4];
- const int a4 = (src[i][2]>>1) - src[i][6];
- const int a6 = (src[i][6]>>1) + src[i][2];
+ const int a0 = block[0+i*8] + block[4+i*8];
+ const int a2 = block[0+i*8] - block[4+i*8];
+ const int a4 = (block[2+i*8]>>1) - block[6+i*8];
+ const int a6 = (block[6+i*8]>>1) + block[2+i*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -src[i][3] + src[i][5] - src[i][7] - (src[i][7]>>1);
- const int a3 = src[i][1] + src[i][7] - src[i][3] - (src[i][3]>>1);
- const int a5 = -src[i][1] + src[i][7] + src[i][5] + (src[i][5]>>1);
- const int a7 = src[i][3] + src[i][5] + src[i][1] + (src[i][1]>>1);
+ const int a1 = -block[3+i*8] + block[5+i*8] - block[7+i*8] - (block[7+i*8]>>1);
+ const int a3 = block[1+i*8] + block[7+i*8] - block[3+i*8] - (block[3+i*8]>>1);
+ const int a5 = -block[1+i*8] + block[7+i*8] + block[5+i*8] + (block[5+i*8]>>1);
+ const int a7 = block[3+i*8] + block[5+i*8] + block[1+i*8] + (block[1+i*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
const int b5 = (a3>>2) - a5;
const int b7 = a7 - (a1>>2);
- src[i][0] = b0 + b7;
- src[i][7] = b0 - b7;
- src[i][1] = b2 + b5;
- src[i][6] = b2 - b5;
- src[i][2] = b4 + b3;
- src[i][5] = b4 - b3;
- src[i][3] = b6 + b1;
- src[i][4] = b6 - b1;
+ block[0+i*8] = b0 + b7;
+ block[7+i*8] = b0 - b7;
+ block[1+i*8] = b2 + b5;
+ block[6+i*8] = b2 - b5;
+ block[2+i*8] = b4 + b3;
+ block[5+i*8] = b4 - b3;
+ block[3+i*8] = b6 + b1;
+ block[4+i*8] = b6 - b1;
}
for( i = 0; i < 8; i++ )
{
- const int a0 = src[0][i] + src[4][i];
- const int a2 = src[0][i] - src[4][i];
- const int a4 = (src[2][i]>>1) - src[6][i];
- const int a6 = (src[6][i]>>1) + src[2][i];
+ const int a0 = block[i+0*8] + block[i+4*8];
+ const int a2 = block[i+0*8] - block[i+4*8];
+ const int a4 = (block[i+2*8]>>1) - block[i+6*8];
+ const int a6 = (block[i+6*8]>>1) + block[i+2*8];
const int b0 = a0 + a6;
const int b2 = a2 + a4;
const int b4 = a2 - a4;
const int b6 = a0 - a6;
- const int a1 = -src[3][i] + src[5][i] - src[7][i] - (src[7][i]>>1);
- const int a3 = src[1][i] + src[7][i] - src[3][i] - (src[3][i]>>1);
- const int a5 = -src[1][i] + src[7][i] + src[5][i] + (src[5][i]>>1);
- const int a7 = src[3][i] + src[5][i] + src[1][i] + (src[1][i]>>1);
+ const int a1 = -block[i+3*8] + block[i+5*8] - block[i+7*8] - (block[i+7*8]>>1);
+ const int a3 = block[i+1*8] + block[i+7*8] - block[i+3*8] - (block[i+3*8]>>1);
+ const int a5 = -block[i+1*8] + block[i+7*8] + block[i+5*8] + (block[i+5*8]>>1);
+ const int a7 = block[i+3*8] + block[i+5*8] + block[i+1*8] + (block[i+1*8]>>1);
const int b1 = (a7>>2) + a1;
const int b3 = a3 + (a5>>2);
@@ -165,3 +164,55 @@ void ff_h264_idct8_dc_add_c(uint8_t *dst, DCTELEM *block, int stride){
dst += stride;
}
}
+
+//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+
+void ff_h264_idct_add16_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
+ else idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
+ }
+ }
+}
+
+void ff_h264_idct_add16intra_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ]) idct_internal (dst + block_offset[i], block + i*16, stride, 4, 6, 1);
+ else if(block[i*16]) ff_h264_idct_dc_add_c(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+void ff_h264_idct8_add4_c(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_c(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_c (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+void ff_h264_idct_add8_c(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_c (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ else if(block[i*16])
+ ff_h264_idct_dc_add_c(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c
index 0b7394a..3282ff4 100644
--- a/libavcodec/h264pred.c
+++ b/libavcodec/h264pred.c
@@ -198,28 +198,6 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, uint8_t *topright, int stride
src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
}
-static void pred4x4_down_left_rv40_notop_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_LEFT_EDGE
- LOAD_DOWN_LEFT_EDGE
-
- src[0+0*stride]=(l0 + l2 + 2*l1 + 2)>>2;
- src[1+0*stride]=
- src[0+1*stride]=(l1 + l3 + 2*l2 + 2)>>2;
- src[2+0*stride]=
- src[1+1*stride]=
- src[0+2*stride]=(l2 + l4 + 2*l3 + 2)>>2;
- src[3+0*stride]=
- src[2+1*stride]=
- src[1+2*stride]=
- src[0+3*stride]=(l3 + l5 + 2*l4 + 2)>>2;
- src[3+1*stride]=
- src[2+2*stride]=
- src[1+3*stride]=(l4 + l6 + 2*l5 + 2)>>2;
- src[3+2*stride]=
- src[2+3*stride]=(l5 + l7 + 2*l6 + 2)>>2;
- src[3+3*stride]=(l6 + l7 + 1)>>1;
-}
-
static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, uint8_t *topright, int stride){
LOAD_TOP_EDGE
LOAD_TOP_RIGHT_EDGE
@@ -1009,6 +987,93 @@ static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topr
#undef PL
#undef SRC
+static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+ int i;
+ pix -= stride;
+ for(i=0; i<4; i++){
+ uint8_t v = pix[0];
+ pix[1*stride]= v += block[0];
+ pix[2*stride]= v += block[4];
+ pix[3*stride]= v += block[8];
+ pix[4*stride]= v += block[12];
+ pix++;
+ block++;
+ }
+}
+
+static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<4; i++){
+ uint8_t v = pix[-1];
+ pix[0]= v += block[0];
+ pix[1]= v += block[1];
+ pix[2]= v += block[2];
+ pix[3]= v += block[3];
+ pix+= stride;
+ block+= 4;
+ }
+}
+
+static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+ int i;
+ pix -= stride;
+ for(i=0; i<8; i++){
+ uint8_t v = pix[0];
+ pix[1*stride]= v += block[0];
+ pix[2*stride]= v += block[8];
+ pix[3*stride]= v += block[16];
+ pix[4*stride]= v += block[24];
+ pix[5*stride]= v += block[32];
+ pix[6*stride]= v += block[40];
+ pix[7*stride]= v += block[48];
+ pix[8*stride]= v += block[56];
+ pix++;
+ block++;
+ }
+}
+
+static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<8; i++){
+ uint8_t v = pix[-1];
+ pix[0]= v += block[0];
+ pix[1]= v += block[1];
+ pix[2]= v += block[2];
+ pix[3]= v += block[3];
+ pix[4]= v += block[4];
+ pix[5]= v += block[5];
+ pix[6]= v += block[6];
+ pix[7]= v += block[7];
+ pix+= stride;
+ block+= 8;
+ }
+}
+
+static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<16; i++)
+ pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<16; i++)
+ pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<4; i++)
+ pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
+ int i;
+ for(i=0; i<4; i++)
+ pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
+}
+
+
/**
* Sets the intra prediction function pointers.
*/
@@ -1097,4 +1162,14 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id){
h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
+
+ //special lossless h/v prediction for h264
+ h->pred4x4_add [VERT_PRED ]= pred4x4_vertical_add_c;
+ h->pred4x4_add [ HOR_PRED ]= pred4x4_horizontal_add_c;
+ h->pred8x8l_add [VERT_PRED ]= pred8x8l_vertical_add_c;
+ h->pred8x8l_add [ HOR_PRED ]= pred8x8l_horizontal_add_c;
+ h->pred8x8_add [VERT_PRED8x8]= pred8x8_vertical_add_c;
+ h->pred8x8_add [ HOR_PRED8x8]= pred8x8_horizontal_add_c;
+ h->pred16x16_add[VERT_PRED8x8]= pred16x16_vertical_add_c;
+ h->pred16x16_add[ HOR_PRED8x8]= pred16x16_horizontal_add_c;
}
diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h
index 150567d..d87df2a 100644
--- a/libavcodec/h264pred.h
+++ b/libavcodec/h264pred.h
@@ -29,6 +29,7 @@
#define AVCODEC_H264PRED_H
#include "libavutil/common.h"
+#include "dsputil.h"
/**
* Prediction types
@@ -75,6 +76,11 @@ typedef struct H264PredContext{
void (*pred8x8l [9+3])(uint8_t *src, int topleft, int topright, int stride);
void (*pred8x8 [4+3+4])(uint8_t *src, int stride);
void (*pred16x16[4+3])(uint8_t *src, int stride);
+
+ void (*pred4x4_add [2])(uint8_t *pix/*align 4*/, const DCTELEM *block/*align 16*/, int stride);
+ void (*pred8x8l_add [2])(uint8_t *pix/*align 8*/, const DCTELEM *block/*align 16*/, int stride);
+ void (*pred8x8_add [3])(uint8_t *pix/*align 8*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
+ void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride);
}H264PredContext;
void ff_h264_pred_init(H264PredContext *h, int codec_id);
diff --git a/libavcodec/huffman.c b/libavcodec/huffman.c
index d41dabb..f21a4f8 100644
--- a/libavcodec/huffman.c
+++ b/libavcodec/huffman.c
@@ -67,7 +67,7 @@ static int build_huff_tree(VLC *vlc, Node *nodes, int head, int flags)
* first nb_codes nodes.count must be set
*/
int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
- Node *nodes, huff_cmp_t cmp, int flags)
+ Node *nodes, HuffCmp cmp, int flags)
{
int i, j;
int cur_node;
diff --git a/libavcodec/huffman.h b/libavcodec/huffman.h
index 0b11af8..bfb7723 100644
--- a/libavcodec/huffman.h
+++ b/libavcodec/huffman.h
@@ -35,8 +35,8 @@ typedef struct {
#define FF_HUFFMAN_FLAG_HNODE_FIRST 0x01
#define FF_HUFFMAN_FLAG_ZERO_COUNT 0x02
-typedef int (*huff_cmp_t)(const void *va, const void *vb);
+typedef int (*HuffCmp)(const void *va, const void *vb);
int ff_huff_build_tree(AVCodecContext *avctx, VLC *vlc, int nb_codes,
- Node *nodes, huff_cmp_t cmp, int flags);
+ Node *nodes, HuffCmp cmp, int flags);
#endif /* AVCODEC_HUFFMAN_H */
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c
index cf90adc..bf13e44 100644
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -265,16 +265,16 @@ static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
typedef struct {
uint64_t val;
int name;
-} heap_elem_t;
+} HeapElem;
-static void heap_sift(heap_elem_t *h, int root, int size)
+static void heap_sift(HeapElem *h, int root, int size)
{
while(root*2+1 < size) {
int child = root*2+1;
if(child < size-1 && h[child].val > h[child+1].val)
child++;
if(h[root].val > h[child].val) {
- FFSWAP(heap_elem_t, h[root], h[child]);
+ FFSWAP(HeapElem, h[root], h[child]);
root = child;
} else
break;
@@ -282,7 +282,7 @@ static void heap_sift(heap_elem_t *h, int root, int size)
}
static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
- heap_elem_t h[size];
+ HeapElem h[size];
int up[2*size];
int len[2*size];
int offset, i, next;
diff --git a/libavcodec/i386/cavsdsp_mmx.c b/libavcodec/i386/cavsdsp_mmx.c
deleted file mode 100644
index 15cafd6..0000000
--- a/libavcodec/i386/cavsdsp_mmx.c
+++ /dev/null
@@ -1,497 +0,0 @@
-/*
- * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
- * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer at gmx.de>
- *
- * MMX-optimized DSP functions, based on H.264 optimizations by
- * Michael Niedermayer and Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/common.h"
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "dsputil_mmx.h"
-
-/*****************************************************************************
- *
- * inverse transform
- *
- ****************************************************************************/
-
-static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
-{
- __asm__ volatile(
- "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
- "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
- "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
- "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
- "movq %%mm4, %%mm0 \n\t"
- "movq %%mm5, %%mm3 \n\t"
- "movq %%mm2, %%mm6 \n\t"
- "movq %%mm7, %%mm1 \n\t"
-
- "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2*src7 */
- "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2*src1 */
- "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2*src5 */
- "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2*src3 */
- "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3*src7 */
- "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3*src1 */
- "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3*src5 */
- "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3*src3 */
- "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
- "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
- "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
- "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
-
- "movq %%mm5, %%mm4 \n\t"
- "movq %%mm7, %%mm6 \n\t"
- "movq %%mm3, %%mm0 \n\t"
- "movq %%mm1, %%mm2 \n\t"
- SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
- "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
- "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
- "paddw %%mm7, %%mm7 \n\t"
- "paddw %%mm5, %%mm5 \n\t"
- "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
- "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
-
- SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
- "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
- "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
- "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
- "paddw %%mm1, %%mm1 \n\t"
- "paddw %%mm3, %%mm3 \n\t"
- "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
- "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
-
- "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
- "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
- "movq %%mm2, %%mm4 \n\t"
- "movq %%mm6, %%mm0 \n\t"
- "psllw $2, %%mm4 \n\t" /* mm4 = 4*src2 */
- "psllw $2, %%mm6 \n\t" /* mm6 = 4*src6 */
- "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5*src2 */
- "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5*src6 */
- "paddw %%mm2, %%mm2 \n\t"
- "paddw %%mm0, %%mm0 \n\t"
- "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
- "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
-
- "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
- "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
- SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
- "psllw $3, %%mm0 \n\t"
- "psllw $3, %%mm2 \n\t"
- "paddw %1, %%mm0 \n\t" /* add rounding bias */
- "paddw %1, %%mm2 \n\t" /* add rounding bias */
-
- SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
- SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
- SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
- SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
- SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
- SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
- :: "r"(block), "m"(bias)
- );
-}
-
-static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
-{
- int i;
- DECLARE_ALIGNED_8(int16_t, b2[64]);
-
- for(i=0; i<2; i++){
- DECLARE_ALIGNED_8(uint64_t, tmp);
-
- cavs_idct8_1d(block+4*i, ff_pw_4);
-
- __asm__ volatile(
- "psraw $3, %%mm7 \n\t"
- "psraw $3, %%mm6 \n\t"
- "psraw $3, %%mm5 \n\t"
- "psraw $3, %%mm4 \n\t"
- "psraw $3, %%mm3 \n\t"
- "psraw $3, %%mm2 \n\t"
- "psraw $3, %%mm1 \n\t"
- "psraw $3, %%mm0 \n\t"
- "movq %%mm7, %0 \n\t"
- TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
- "movq %%mm0, 8(%1) \n\t"
- "movq %%mm6, 24(%1) \n\t"
- "movq %%mm7, 40(%1) \n\t"
- "movq %%mm4, 56(%1) \n\t"
- "movq %0, %%mm7 \n\t"
- TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
- "movq %%mm7, (%1) \n\t"
- "movq %%mm1, 16(%1) \n\t"
- "movq %%mm0, 32(%1) \n\t"
- "movq %%mm3, 48(%1) \n\t"
- : "=m"(tmp)
- : "r"(b2+32*i)
- : "memory"
- );
- }
-
- for(i=0; i<2; i++){
- cavs_idct8_1d(b2+4*i, ff_pw_64);
-
- __asm__ volatile(
- "psraw $7, %%mm7 \n\t"
- "psraw $7, %%mm6 \n\t"
- "psraw $7, %%mm5 \n\t"
- "psraw $7, %%mm4 \n\t"
- "psraw $7, %%mm3 \n\t"
- "psraw $7, %%mm2 \n\t"
- "psraw $7, %%mm1 \n\t"
- "psraw $7, %%mm0 \n\t"
- "movq %%mm7, (%0) \n\t"
- "movq %%mm5, 16(%0) \n\t"
- "movq %%mm3, 32(%0) \n\t"
- "movq %%mm1, 48(%0) \n\t"
- "movq %%mm0, 64(%0) \n\t"
- "movq %%mm2, 80(%0) \n\t"
- "movq %%mm4, 96(%0) \n\t"
- "movq %%mm6, 112(%0) \n\t"
- :: "r"(b2+4*i)
- : "memory"
- );
- }
-
- add_pixels_clamped_mmx(b2, dst, stride);
-
- /* clear block */
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movq %%mm7, (%0) \n\t"
- "movq %%mm7, 8(%0) \n\t"
- "movq %%mm7, 16(%0) \n\t"
- "movq %%mm7, 24(%0) \n\t"
- "movq %%mm7, 32(%0) \n\t"
- "movq %%mm7, 40(%0) \n\t"
- "movq %%mm7, 48(%0) \n\t"
- "movq %%mm7, 56(%0) \n\t"
- "movq %%mm7, 64(%0) \n\t"
- "movq %%mm7, 72(%0) \n\t"
- "movq %%mm7, 80(%0) \n\t"
- "movq %%mm7, 88(%0) \n\t"
- "movq %%mm7, 96(%0) \n\t"
- "movq %%mm7, 104(%0) \n\t"
- "movq %%mm7, 112(%0) \n\t"
- "movq %%mm7, 120(%0) \n\t"
- :: "r" (block)
- );
-}
-
-/*****************************************************************************
- *
- * motion compensation
- *
- ****************************************************************************/
-
-/* vertical filter [-1 -2 96 42 -7 0] */
-#define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
- "movd (%0), "#F" \n\t"\
- "movq "#C", %%mm6 \n\t"\
- "pmullw %5, %%mm6 \n\t"\
- "movq "#D", %%mm7 \n\t"\
- "pmullw %6, %%mm7 \n\t"\
- "psllw $3, "#E" \n\t"\
- "psubw "#E", %%mm6 \n\t"\
- "psraw $3, "#E" \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw "#E", %%mm6 \n\t"\
- "paddw "#B", "#B" \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, "#F" \n\t"\
- "psubw "#B", %%mm6 \n\t"\
- "psraw $1, "#B" \n\t"\
- "psubw "#A", %%mm6 \n\t"\
- "paddw %4, %%mm6 \n\t"\
- "psraw $7, %%mm6 \n\t"\
- "packuswb %%mm6, %%mm6 \n\t"\
- OP(%%mm6, (%1), A, d) \
- "add %3, %1 \n\t"
-
-/* vertical filter [ 0 -1 5 5 -1 0] */
-#define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
- "movd (%0), "#F" \n\t"\
- "movq "#C", %%mm6 \n\t"\
- "paddw "#D", %%mm6 \n\t"\
- "pmullw %5, %%mm6 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, "#F" \n\t"\
- "psubw "#B", %%mm6 \n\t"\
- "psubw "#E", %%mm6 \n\t"\
- "paddw %4, %%mm6 \n\t"\
- "psraw $3, %%mm6 \n\t"\
- "packuswb %%mm6, %%mm6 \n\t"\
- OP(%%mm6, (%1), A, d) \
- "add %3, %1 \n\t"
-
-/* vertical filter [ 0 -7 42 96 -2 -1] */
-#define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
- "movd (%0), "#F" \n\t"\
- "movq "#C", %%mm6 \n\t"\
- "pmullw %6, %%mm6 \n\t"\
- "movq "#D", %%mm7 \n\t"\
- "pmullw %5, %%mm7 \n\t"\
- "psllw $3, "#B" \n\t"\
- "psubw "#B", %%mm6 \n\t"\
- "psraw $3, "#B" \n\t"\
- "paddw %%mm7, %%mm6 \n\t"\
- "paddw "#B", %%mm6 \n\t"\
- "paddw "#E", "#E" \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, "#F" \n\t"\
- "psubw "#E", %%mm6 \n\t"\
- "psraw $1, "#E" \n\t"\
- "psubw "#F", %%mm6 \n\t"\
- "paddw %4, %%mm6 \n\t"\
- "psraw $7, %%mm6 \n\t"\
- "packuswb %%mm6, %%mm6 \n\t"\
- OP(%%mm6, (%1), A, d) \
- "add %3, %1 \n\t"
-
-
-#define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
- int w= 2;\
- src -= 2*srcStride;\
- \
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
- : "memory"\
- );\
- if(h==16){\
- __asm__ volatile(\
- VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
- : "memory"\
- );\
- }\
- src += 4-(h+5)*srcStride;\
- dst += 4-h*dstStride;\
- }
-
-#define QPEL_CAVS(OPNAME, OP, MMX)\
-static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm6 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 1(%0), %%mm2 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "pmullw %%mm6, %%mm0 \n\t"\
- "pmullw %%mm6, %%mm1 \n\t"\
- "movq -1(%0), %%mm2 \n\t"\
- "movq 2(%0), %%mm4 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "movq %%mm4, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm3, %%mm5 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm5, %%mm1 \n\t"\
- "movq %6, %%mm5 \n\t"\
- "paddw %%mm5, %%mm0 \n\t"\
- "paddw %%mm5, %%mm1 \n\t"\
- "psraw $3, %%mm0 \n\t"\
- "psraw $3, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm5, q) \
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+m"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
- : "memory"\
- );\
-}\
-\
-static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
-}\
-\
-static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
-}\
-\
-static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
-}\
-\
-static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}\
-\
-static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}\
-\
-static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}\
-\
-static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
- src += 8*srcStride;\
- dst += 8*dstStride;\
- OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-}\
-
-#define CAVS_MC(OPNAME, SIZE, MMX) \
-static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
-}\
-
-#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
-QPEL_CAVS(put_, PUT_OP, 3dnow)
-QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
-QPEL_CAVS(put_, PUT_OP, mmx2)
-QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
-
-CAVS_MC(put_, 8, 3dnow)
-CAVS_MC(put_, 16,3dnow)
-CAVS_MC(avg_, 8, 3dnow)
-CAVS_MC(avg_, 16,3dnow)
-CAVS_MC(put_, 8, mmx2)
-CAVS_MC(put_, 16,mmx2)
-CAVS_MC(avg_, 8, mmx2)
-CAVS_MC(avg_, 16,mmx2)
-
-void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
-void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
-void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
-
-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
-#define dspfunc(PFX, IDX, NUM) \
- c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
- c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
- c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
- c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
- c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
-
- dspfunc(put_cavs_qpel, 0, 16);
- dspfunc(put_cavs_qpel, 1, 8);
- dspfunc(avg_cavs_qpel, 0, 16);
- dspfunc(avg_cavs_qpel, 1, 8);
-#undef dspfunc
- c->cavs_idct8_add = cavs_idct8_add_mmx;
-}
-
-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
-#define dspfunc(PFX, IDX, NUM) \
- c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
- c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
- c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
- c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
- c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
-
- dspfunc(put_cavs_qpel, 0, 16);
- dspfunc(put_cavs_qpel, 1, 8);
- dspfunc(avg_cavs_qpel, 0, 16);
- dspfunc(avg_cavs_qpel, 1, 8);
-#undef dspfunc
- c->cavs_idct8_add = cavs_idct8_add_mmx;
-}
diff --git a/libavcodec/i386/cpuid.c b/libavcodec/i386/cpuid.c
deleted file mode 100644
index 2f2a669..0000000
--- a/libavcodec/i386/cpuid.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * CPU detection code, extracted from mmx.h
- * (c)1997-99 by H. Dietz and R. Fisher
- * Converted to C and improved by Fabrice Bellard.
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <stdlib.h>
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-
-#undef printf
-
-/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
-#define cpuid(index,eax,ebx,ecx,edx)\
- __asm__ volatile\
- ("mov %%"REG_b", %%"REG_S"\n\t"\
- "cpuid\n\t"\
- "xchg %%"REG_b", %%"REG_S\
- : "=a" (eax), "=S" (ebx),\
- "=c" (ecx), "=d" (edx)\
- : "0" (index));
-
-/* Function to test if multimedia instructions are supported... */
-int mm_support(void)
-{
- int rval = 0;
- int eax, ebx, ecx, edx;
- int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
- x86_reg a, c;
-
- __asm__ volatile (
- /* See if CPUID instruction is supported ... */
- /* ... Get copies of EFLAGS into eax and ecx */
- "pushf\n\t"
- "pop %0\n\t"
- "mov %0, %1\n\t"
-
- /* ... Toggle the ID bit in one copy and store */
- /* to the EFLAGS reg */
- "xor $0x200000, %0\n\t"
- "push %0\n\t"
- "popf\n\t"
-
- /* ... Get the (hopefully modified) EFLAGS */
- "pushf\n\t"
- "pop %0\n\t"
- : "=a" (a), "=c" (c)
- :
- : "cc"
- );
-
- if (a == c)
- return 0; /* CPUID not supported */
-
- cpuid(0, max_std_level, ebx, ecx, edx);
-
- if(max_std_level >= 1){
- cpuid(1, eax, ebx, ecx, std_caps);
- if (std_caps & (1<<23))
- rval |= FF_MM_MMX;
- if (std_caps & (1<<25))
- rval |= FF_MM_MMXEXT
-#if !defined(__GNUC__) || __GNUC__ > 2
- | FF_MM_SSE;
- if (std_caps & (1<<26))
- rval |= FF_MM_SSE2;
- if (ecx & 1)
- rval |= FF_MM_SSE3;
- if (ecx & 0x00000200 )
- rval |= FF_MM_SSSE3
-#endif
- ;
- }
-
- cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
-
- if(max_ext_level >= 0x80000001){
- cpuid(0x80000001, eax, ebx, ecx, ext_caps);
- if (ext_caps & (1<<31))
- rval |= FF_MM_3DNOW;
- if (ext_caps & (1<<30))
- rval |= FF_MM_3DNOWEXT;
- if (ext_caps & (1<<23))
- rval |= FF_MM_MMX;
- if (ext_caps & (1<<22))
- rval |= FF_MM_MMXEXT;
- }
-
-#if 0
- av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
- (rval&FF_MM_MMX) ? "MMX ":"",
- (rval&FF_MM_MMXEXT) ? "MMX2 ":"",
- (rval&FF_MM_SSE) ? "SSE ":"",
- (rval&FF_MM_SSE2) ? "SSE2 ":"",
- (rval&FF_MM_SSE3) ? "SSE3 ":"",
- (rval&FF_MM_SSSE3) ? "SSSE3 ":"",
- (rval&FF_MM_3DNOW) ? "3DNow ":"",
- (rval&FF_MM_3DNOWEXT) ? "3DNowExt ":"");
-#endif
- return rval;
-}
-
-#ifdef TEST
-int main ( void )
-{
- int mm_flags;
- mm_flags = mm_support();
- printf("mm_support = 0x%08X\n",mm_flags);
- return 0;
-}
-#endif
diff --git a/libavcodec/i386/dsputil_h264_template_mmx.c b/libavcodec/i386/dsputil_h264_template_mmx.c
deleted file mode 100644
index 0bf8732..0000000
--- a/libavcodec/i386/dsputil_h264_template_mmx.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
- * Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * MMX optimized version of (put|avg)_h264_chroma_mc8.
- * H264_CHROMA_MC8_TMPL must be defined to the desired function name
- * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg
- * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
- */
-static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, int rnd)
-{
- const uint64_t *rnd_reg;
- DECLARE_ALIGNED_8(uint64_t, AA);
- DECLARE_ALIGNED_8(uint64_t, DD);
- int i;
-
- if(y==0 && x==0) {
- /* no filter needed */
- H264_CHROMA_MC8_MV0(dst, src, stride, h);
- return;
- }
-
- assert(x<8 && y<8 && x>=0 && y>=0);
-
- if(y==0 || x==0)
- {
- /* 1 dimensional filter only */
- const int dxy = x ? 1 : stride;
-
- rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3;
-
- __asm__ volatile(
- "movd %0, %%mm5\n\t"
- "movq %1, %%mm4\n\t"
- "movq %2, %%mm6\n\t" /* mm6 = rnd */
- "punpcklwd %%mm5, %%mm5\n\t"
- "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
- "pxor %%mm7, %%mm7\n\t"
- "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */
- :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg));
-
- for(i=0; i<h; i++) {
- __asm__ volatile(
- /* mm0 = src[0..7], mm1 = src[1..8] */
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm2\n\t"
- :: "m"(src[0]), "m"(src[dxy]));
-
- __asm__ volatile(
- /* [mm0,mm1] = A * src[0..7] */
- /* [mm2,mm3] = B * src[1..8] */
- "movq %%mm0, %%mm1\n\t"
- "movq %%mm2, %%mm3\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "pmullw %%mm4, %%mm0\n\t"
- "pmullw %%mm4, %%mm1\n\t"
- "pmullw %%mm5, %%mm2\n\t"
- "pmullw %%mm5, %%mm3\n\t"
-
- /* dst[0..7] = (A * src[0..7] + B * src[1..8] + 4) >> 3 */
- "paddw %%mm6, %%mm0\n\t"
- "paddw %%mm6, %%mm1\n\t"
- "paddw %%mm2, %%mm0\n\t"
- "paddw %%mm3, %%mm1\n\t"
- "psrlw $3, %%mm0\n\t"
- "psrlw $3, %%mm1\n\t"
- "packuswb %%mm1, %%mm0\n\t"
- H264_CHROMA_OP(%0, %%mm0)
- "movq %%mm0, %0\n\t"
- : "=m" (dst[0]));
-
- src += stride;
- dst += stride;
- }
- return;
- }
-
- /* general case, bilinear */
- rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a;
- __asm__ volatile("movd %2, %%mm4\n\t"
- "movd %3, %%mm6\n\t"
- "punpcklwd %%mm4, %%mm4\n\t"
- "punpcklwd %%mm6, %%mm6\n\t"
- "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
- "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
- "movq %%mm4, %%mm5\n\t"
- "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */
- "psllw $3, %%mm5\n\t"
- "psllw $3, %%mm6\n\t"
- "movq %%mm5, %%mm7\n\t"
- "paddw %%mm6, %%mm7\n\t"
- "movq %%mm4, %1\n\t" /* DD = x * y */
- "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */
- "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */
- "paddw %4, %%mm4\n\t"
- "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */
- "pxor %%mm7, %%mm7\n\t"
- "movq %%mm4, %0\n\t"
- : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
-
- __asm__ volatile(
- /* mm0 = src[0..7], mm1 = src[1..8] */
- "movq %0, %%mm0\n\t"
- "movq %1, %%mm1\n\t"
- : : "m" (src[0]), "m" (src[1]));
-
- for(i=0; i<h; i++) {
- src += stride;
-
- __asm__ volatile(
- /* mm2 = A * src[0..3] + B * src[1..4] */
- /* mm3 = A * src[4..7] + B * src[5..8] */
- "movq %%mm0, %%mm2\n\t"
- "movq %%mm1, %%mm3\n\t"
- "punpckhbw %%mm7, %%mm0\n\t"
- "punpcklbw %%mm7, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "punpckhbw %%mm7, %%mm3\n\t"
- "pmullw %0, %%mm0\n\t"
- "pmullw %0, %%mm2\n\t"
- "pmullw %%mm5, %%mm1\n\t"
- "pmullw %%mm5, %%mm3\n\t"
- "paddw %%mm1, %%mm2\n\t"
- "paddw %%mm0, %%mm3\n\t"
- : : "m" (AA));
-
- __asm__ volatile(
- /* [mm2,mm3] += C * src[0..7] */
- "movq %0, %%mm0\n\t"
- "movq %%mm0, %%mm1\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm1\n\t"
- "pmullw %%mm6, %%mm0\n\t"
- "pmullw %%mm6, %%mm1\n\t"
- "paddw %%mm0, %%mm2\n\t"
- "paddw %%mm1, %%mm3\n\t"
- : : "m" (src[0]));
-
- __asm__ volatile(
- /* [mm2,mm3] += D * src[1..8] */
- "movq %1, %%mm1\n\t"
- "movq %%mm1, %%mm0\n\t"
- "movq %%mm1, %%mm4\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "punpckhbw %%mm7, %%mm4\n\t"
- "pmullw %2, %%mm0\n\t"
- "pmullw %2, %%mm4\n\t"
- "paddw %%mm0, %%mm2\n\t"
- "paddw %%mm4, %%mm3\n\t"
- "movq %0, %%mm0\n\t"
- : : "m" (src[0]), "m" (src[1]), "m" (DD));
-
- __asm__ volatile(
- /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
- "paddw %1, %%mm2\n\t"
- "paddw %1, %%mm3\n\t"
- "psrlw $6, %%mm2\n\t"
- "psrlw $6, %%mm3\n\t"
- "packuswb %%mm3, %%mm2\n\t"
- H264_CHROMA_OP(%0, %%mm2)
- "movq %%mm2, %0\n\t"
- : "=m" (dst[0]) : "m" (*rnd_reg));
- dst+= stride;
- }
-}
-
-static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movd %5, %%mm2 \n\t"
- "movd %6, %%mm3 \n\t"
- "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
- "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm3, %%mm3 \n\t"
- "punpcklwd %%mm2, %%mm2 \n\t"
- "punpcklwd %%mm3, %%mm3 \n\t"
- "psubw %%mm2, %%mm4 \n\t"
- "psubw %%mm3, %%mm5 \n\t"
-
- "movd (%1), %%mm0 \n\t"
- "movd 1(%1), %%mm6 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm2, %%mm6 \n\t"
- "paddw %%mm0, %%mm6 \n\t"
-
- "1: \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd 1(%1), %%mm1 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm2, %%mm1 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "movq %%mm1, %%mm0 \n\t"
- "pmullw %%mm5, %%mm6 \n\t"
- "pmullw %%mm3, %%mm1 \n\t"
- "paddw %4, %%mm6 \n\t"
- "paddw %%mm6, %%mm1 \n\t"
- "psrlw $6, %%mm1 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm6)
- "movd %%mm1, (%0) \n\t"
- "add %3, %0 \n\t"
- "movd (%1), %%mm6 \n\t"
- "movd 1(%1), %%mm1 \n\t"
- "add %3, %1 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm6 \n\t"
- "pmullw %%mm2, %%mm1 \n\t"
- "paddw %%mm6, %%mm1 \n\t"
- "movq %%mm1, %%mm6 \n\t"
- "pmullw %%mm5, %%mm0 \n\t"
- "pmullw %%mm3, %%mm1 \n\t"
- "paddw %4, %%mm0 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psrlw $6, %%mm1 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm0)
- "movd %%mm1, (%0) \n\t"
- "add %3, %0 \n\t"
- "sub $2, %2 \n\t"
- "jnz 1b \n\t"
- : "+r"(dst), "+r"(src), "+r"(h)
- : "r"((x86_reg)stride), "m"(ff_pw_32), "m"(x), "m"(y)
- );
-}
-
-#ifdef H264_CHROMA_MC2_TMPL
-static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- int tmp = ((1<<16)-1)*x + 8;
- int CD= tmp*y;
- int AB= (tmp<<3) - CD;
- __asm__ volatile(
- /* mm5 = {A,B,A,B} */
- /* mm6 = {C,D,C,D} */
- "movd %0, %%mm5\n\t"
- "movd %1, %%mm6\n\t"
- "punpckldq %%mm5, %%mm5\n\t"
- "punpckldq %%mm6, %%mm6\n\t"
- "pxor %%mm7, %%mm7\n\t"
- /* mm0 = src[0,1,1,2] */
- "movd %2, %%mm2\n\t"
- "punpcklbw %%mm7, %%mm2\n\t"
- "pshufw $0x94, %%mm2, %%mm2\n\t"
- :: "r"(AB), "r"(CD), "m"(src[0]));
-
-
- __asm__ volatile(
- "1:\n\t"
- "add %4, %1\n\t"
- /* mm1 = A * src[0,1] + B * src[1,2] */
- "movq %%mm2, %%mm1\n\t"
- "pmaddwd %%mm5, %%mm1\n\t"
- /* mm0 = src[0,1,1,2] */
- "movd (%1), %%mm0\n\t"
- "punpcklbw %%mm7, %%mm0\n\t"
- "pshufw $0x94, %%mm0, %%mm0\n\t"
- /* mm1 += C * src[0,1] + D * src[1,2] */
- "movq %%mm0, %%mm2\n\t"
- "pmaddwd %%mm6, %%mm0\n\t"
- "paddw %3, %%mm1\n\t"
- "paddw %%mm0, %%mm1\n\t"
- /* dst[0,1] = pack((mm1 + 32) >> 6) */
- "psrlw $6, %%mm1\n\t"
- "packssdw %%mm7, %%mm1\n\t"
- "packuswb %%mm7, %%mm1\n\t"
- H264_CHROMA_OP4((%0), %%mm1, %%mm3)
- "movd %%mm1, %%esi\n\t"
- "movw %%si, (%0)\n\t"
- "add %4, %0\n\t"
- "sub $1, %2\n\t"
- "jnz 1b\n\t"
- : "+r" (dst), "+r"(src), "+r"(h)
- : "m" (ff_pw_32), "r"((x86_reg)stride)
- : "%esi");
-
-}
-#endif
-
diff --git a/libavcodec/i386/dsputil_mmx.c b/libavcodec/i386/dsputil_mmx.c
deleted file mode 100644
index 161afab..0000000
--- a/libavcodec/i386/dsputil_mmx.c
+++ /dev/null
@@ -1,2916 +0,0 @@
-/*
- * MMX optimized DSP utils
- * Copyright (c) 2000, 2001 Fabrice Bellard.
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni at gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
- * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
- */
-
-#include "libavutil/x86_cpu.h"
-#include "libavcodec/dsputil.h"
-#include "libavcodec/h263.h"
-#include "libavcodec/mpegvideo.h"
-#include "libavcodec/simple_idct.h"
-#include "dsputil_mmx.h"
-#include "mmx.h"
-#include "vp3dsp_mmx.h"
-#include "vp3dsp_sse2.h"
-#include "idct_xvid.h"
-
-//#undef NDEBUG
-//#include <assert.h>
-
-int mm_flags; /* multimedia extension flags */
-
-/* pixel operations */
-DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
-
-DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000[2]) =
-{0x8000000080000000ULL, 0x8000000080000000ULL};
-
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
-DECLARE_ALIGNED_16(const xmm_t, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
-DECLARE_ALIGNED_16(const xmm_t, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
-DECLARE_ALIGNED_16(const xmm_t, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
-DECLARE_ALIGNED_16(const xmm_t, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
-DECLARE_ALIGNED_16(const xmm_t, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
-
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
-DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
-
-DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 };
-DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
-
-#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
-#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
-
-#define MOVQ_BFE(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
- "paddb %%" #regd ", %%" #regd " \n\t" ::)
-
-#ifndef PIC
-#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
-#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
-#else
-// for shared library it's better to use this way for accessing constants
-// pcmpeqd -> -1
-#define MOVQ_BONE(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
- "psrlw $15, %%" #regd " \n\t" \
- "packuswb %%" #regd ", %%" #regd " \n\t" ::)
-
-#define MOVQ_WTWO(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
- "psrlw $15, %%" #regd " \n\t" \
- "psllw $1, %%" #regd " \n\t"::)
-
-#endif
-
-// using regr as temporary and for the output result
-// first argument is unmodifed and second is trashed
-// regfe is supposed to contain 0xfefefefefefefefe
-#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
- "movq " #rega ", " #regr " \n\t"\
- "pand " #regb ", " #regr " \n\t"\
- "pxor " #rega ", " #regb " \n\t"\
- "pand " #regfe "," #regb " \n\t"\
- "psrlq $1, " #regb " \n\t"\
- "paddb " #regb ", " #regr " \n\t"
-
-#define PAVGB_MMX(rega, regb, regr, regfe) \
- "movq " #rega ", " #regr " \n\t"\
- "por " #regb ", " #regr " \n\t"\
- "pxor " #rega ", " #regb " \n\t"\
- "pand " #regfe "," #regb " \n\t"\
- "psrlq $1, " #regb " \n\t"\
- "psubb " #regb ", " #regr " \n\t"
-
-// mm6 is supposed to contain 0xfefefefefefefefe
-#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
- "movq " #rega ", " #regr " \n\t"\
- "movq " #regc ", " #regp " \n\t"\
- "pand " #regb ", " #regr " \n\t"\
- "pand " #regd ", " #regp " \n\t"\
- "pxor " #rega ", " #regb " \n\t"\
- "pxor " #regc ", " #regd " \n\t"\
- "pand %%mm6, " #regb " \n\t"\
- "pand %%mm6, " #regd " \n\t"\
- "psrlq $1, " #regb " \n\t"\
- "psrlq $1, " #regd " \n\t"\
- "paddb " #regb ", " #regr " \n\t"\
- "paddb " #regd ", " #regp " \n\t"
-
-#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
- "movq " #rega ", " #regr " \n\t"\
- "movq " #regc ", " #regp " \n\t"\
- "por " #regb ", " #regr " \n\t"\
- "por " #regd ", " #regp " \n\t"\
- "pxor " #rega ", " #regb " \n\t"\
- "pxor " #regc ", " #regd " \n\t"\
- "pand %%mm6, " #regb " \n\t"\
- "pand %%mm6, " #regd " \n\t"\
- "psrlq $1, " #regd " \n\t"\
- "psrlq $1, " #regb " \n\t"\
- "psubb " #regb ", " #regr " \n\t"\
- "psubb " #regd ", " #regp " \n\t"
-
-/***********************************/
-/* MMX no rounding */
-#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
-#define SET_RND MOVQ_WONE
-#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
-#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
-
-#include "dsputil_mmx_rnd_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef PAVGBP
-#undef PAVGB
-/***********************************/
-/* MMX rounding */
-
-#define DEF(x, y) x ## _ ## y ##_mmx
-#define SET_RND MOVQ_WTWO
-#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
-#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
-
-#include "dsputil_mmx_rnd_template.c"
-
-#undef DEF
-#undef SET_RND
-#undef PAVGBP
-#undef PAVGB
-
-/***********************************/
-/* 3Dnow specific */
-
-#define DEF(x) x ## _3dnow
-#define PAVGB "pavgusb"
-
-#include "dsputil_mmx_avg_template.c"
-
-#undef DEF
-#undef PAVGB
-
-/***********************************/
-/* MMX2 specific */
-
-#define DEF(x) x ## _mmx2
-
-/* Introduced only in MMX2 set */
-#define PAVGB "pavgb"
-
-#include "dsputil_mmx_avg_template.c"
-
-#undef DEF
-#undef PAVGB
-
-#define put_no_rnd_pixels16_mmx put_pixels16_mmx
-#define put_no_rnd_pixels8_mmx put_pixels8_mmx
-#define put_pixels16_mmx2 put_pixels16_mmx
-#define put_pixels8_mmx2 put_pixels8_mmx
-#define put_pixels4_mmx2 put_pixels4_mmx
-#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
-#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
-#define put_pixels16_3dnow put_pixels16_mmx
-#define put_pixels8_3dnow put_pixels8_mmx
-#define put_pixels4_3dnow put_pixels4_mmx
-#define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
-#define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
-
-/***********************************/
-/* standard MMX */
-
-void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
-{
- const DCTELEM *p;
- uint8_t *pix;
-
- /* read the pixels */
- p = block;
- pix = pixels;
- /* unrolled loop */
- __asm__ volatile(
- "movq %3, %%mm0 \n\t"
- "movq 8%3, %%mm1 \n\t"
- "movq 16%3, %%mm2 \n\t"
- "movq 24%3, %%mm3 \n\t"
- "movq 32%3, %%mm4 \n\t"
- "movq 40%3, %%mm5 \n\t"
- "movq 48%3, %%mm6 \n\t"
- "movq 56%3, %%mm7 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "packuswb %%mm7, %%mm6 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm2, (%0, %1) \n\t"
- "movq %%mm4, (%0, %1, 2) \n\t"
- "movq %%mm6, (%0, %2) \n\t"
- ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
- :"memory");
- pix += line_size*4;
- p += 32;
-
- // if here would be an exact copy of the code above
- // compiler would generate some very strange code
- // thus using "r"
- __asm__ volatile(
- "movq (%3), %%mm0 \n\t"
- "movq 8(%3), %%mm1 \n\t"
- "movq 16(%3), %%mm2 \n\t"
- "movq 24(%3), %%mm3 \n\t"
- "movq 32(%3), %%mm4 \n\t"
- "movq 40(%3), %%mm5 \n\t"
- "movq 48(%3), %%mm6 \n\t"
- "movq 56(%3), %%mm7 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "packuswb %%mm7, %%mm6 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm2, (%0, %1) \n\t"
- "movq %%mm4, (%0, %1, 2) \n\t"
- "movq %%mm6, (%0, %2) \n\t"
- ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
- :"memory");
-}
-
-static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
- { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
-
-void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
-{
- int i;
-
- movq_m2r(*vector128, mm1);
- for (i = 0; i < 8; i++) {
- movq_m2r(*(block), mm0);
- packsswb_m2r(*(block + 4), mm0);
- block += 8;
- paddb_r2r(mm1, mm0);
- movq_r2m(mm0, *pixels);
- pixels += line_size;
- }
-}
-
-void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
-{
- const DCTELEM *p;
- uint8_t *pix;
- int i;
-
- /* read the pixels */
- p = block;
- pix = pixels;
- MOVQ_ZERO(mm7);
- i = 4;
- do {
- __asm__ volatile(
- "movq (%2), %%mm0 \n\t"
- "movq 8(%2), %%mm1 \n\t"
- "movq 16(%2), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "movq %0, %%mm4 \n\t"
- "movq %1, %%mm6 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddsw %%mm4, %%mm0 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
- "movq %%mm6, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm6 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddsw %%mm6, %%mm2 \n\t"
- "paddsw %%mm5, %%mm3 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "packuswb %%mm3, %%mm2 \n\t"
- "movq %%mm0, %0 \n\t"
- "movq %%mm2, %1 \n\t"
- :"+m"(*pix), "+m"(*(pix+line_size))
- :"r"(p)
- :"memory");
- pix += line_size*2;
- p += 16;
- } while (--i);
-}
-
-static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd (%1, %3), %%mm1 \n\t"
- "movd %%mm0, (%2) \n\t"
- "movd %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd (%1, %3), %%mm1 \n\t"
- "movd %%mm0, (%2) \n\t"
- "movd %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
- : "r"((x86_reg)line_size)
- : "%"REG_a, "memory"
- );
-}
-
-static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
- : "r"((x86_reg)line_size)
- : "%"REG_a, "memory"
- );
-}
-
-static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm4 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm5 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm4 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm5 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
- : "r"((x86_reg)line_size)
- : "%"REG_a, "memory"
- );
-}
-
-static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "1: \n\t"
- "movdqu (%1), %%xmm0 \n\t"
- "movdqu (%1,%3), %%xmm1 \n\t"
- "movdqu (%1,%3,2), %%xmm2 \n\t"
- "movdqu (%1,%4), %%xmm3 \n\t"
- "movdqa %%xmm0, (%2) \n\t"
- "movdqa %%xmm1, (%2,%3) \n\t"
- "movdqa %%xmm2, (%2,%3,2) \n\t"
- "movdqa %%xmm3, (%2,%4) \n\t"
- "subl $4, %0 \n\t"
- "lea (%1,%3,4), %1 \n\t"
- "lea (%2,%3,4), %2 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
- : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
- : "memory"
- );
-}
-
-static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "1: \n\t"
- "movdqu (%1), %%xmm0 \n\t"
- "movdqu (%1,%3), %%xmm1 \n\t"
- "movdqu (%1,%3,2), %%xmm2 \n\t"
- "movdqu (%1,%4), %%xmm3 \n\t"
- "pavgb (%2), %%xmm0 \n\t"
- "pavgb (%2,%3), %%xmm1 \n\t"
- "pavgb (%2,%3,2), %%xmm2 \n\t"
- "pavgb (%2,%4), %%xmm3 \n\t"
- "movdqa %%xmm0, (%2) \n\t"
- "movdqa %%xmm1, (%2,%3) \n\t"
- "movdqa %%xmm2, (%2,%3,2) \n\t"
- "movdqa %%xmm3, (%2,%4) \n\t"
- "subl $4, %0 \n\t"
- "lea (%1,%3,4), %1 \n\t"
- "lea (%2,%3,4), %2 \n\t"
- "jnz 1b \n\t"
- : "+g"(h), "+r" (pixels), "+r" (block)
- : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
- : "memory"
- );
-}
-
-static void clear_blocks_mmx(DCTELEM *blocks)
-{
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "mov $-128*6, %%"REG_a" \n\t"
- "1: \n\t"
- "movq %%mm7, (%0, %%"REG_a") \n\t"
- "movq %%mm7, 8(%0, %%"REG_a") \n\t"
- "movq %%mm7, 16(%0, %%"REG_a") \n\t"
- "movq %%mm7, 24(%0, %%"REG_a") \n\t"
- "add $32, %%"REG_a" \n\t"
- " js 1b \n\t"
- : : "r" (((uint8_t *)blocks)+128*6)
- : "%"REG_a
- );
-}
-
-static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
- x86_reg i=0;
- __asm__ volatile(
- "jmp 2f \n\t"
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq (%2, %0), %%mm1 \n\t"
- "paddb %%mm0, %%mm1 \n\t"
- "movq %%mm1, (%2, %0) \n\t"
- "movq 8(%1, %0), %%mm0 \n\t"
- "movq 8(%2, %0), %%mm1 \n\t"
- "paddb %%mm0, %%mm1 \n\t"
- "movq %%mm1, 8(%2, %0) \n\t"
- "add $16, %0 \n\t"
- "2: \n\t"
- "cmp %3, %0 \n\t"
- " js 1b \n\t"
- : "+r" (i)
- : "r"(src), "r"(dst), "r"((x86_reg)w-15)
- );
- for(; i<w; i++)
- dst[i+0] += src[i+0];
-}
-
-static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
- x86_reg i=0;
- __asm__ volatile(
- "jmp 2f \n\t"
- "1: \n\t"
- "movq (%2, %0), %%mm0 \n\t"
- "movq 8(%2, %0), %%mm1 \n\t"
- "paddb (%3, %0), %%mm0 \n\t"
- "paddb 8(%3, %0), %%mm1 \n\t"
- "movq %%mm0, (%1, %0) \n\t"
- "movq %%mm1, 8(%1, %0) \n\t"
- "add $16, %0 \n\t"
- "2: \n\t"
- "cmp %4, %0 \n\t"
- " js 1b \n\t"
- : "+r" (i)
- : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
- );
- for(; i<w; i++)
- dst[i] = src1[i] + src2[i];
-}
-
-#define H263_LOOP_FILTER \
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %0, %%mm0 \n\t"\
- "movq %0, %%mm1 \n\t"\
- "movq %3, %%mm2 \n\t"\
- "movq %3, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm3, %%mm1 \n\t"\
- "movq %1, %%mm2 \n\t"\
- "movq %1, %%mm3 \n\t"\
- "movq %2, %%mm4 \n\t"\
- "movq %2, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "psubw %%mm2, %%mm4 \n\t"\
- "psubw %%mm3, %%mm5 \n\t"\
- "psllw $2, %%mm4 \n\t"\
- "psllw $2, %%mm5 \n\t"\
- "paddw %%mm0, %%mm4 \n\t"\
- "paddw %%mm1, %%mm5 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pcmpgtw %%mm4, %%mm6 \n\t"\
- "pcmpgtw %%mm5, %%mm7 \n\t"\
- "pxor %%mm6, %%mm4 \n\t"\
- "pxor %%mm7, %%mm5 \n\t"\
- "psubw %%mm6, %%mm4 \n\t"\
- "psubw %%mm7, %%mm5 \n\t"\
- "psrlw $3, %%mm4 \n\t"\
- "psrlw $3, %%mm5 \n\t"\
- "packuswb %%mm5, %%mm4 \n\t"\
- "packsswb %%mm7, %%mm6 \n\t"\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd %4, %%mm2 \n\t"\
- "punpcklbw %%mm2, %%mm2 \n\t"\
- "punpcklbw %%mm2, %%mm2 \n\t"\
- "punpcklbw %%mm2, %%mm2 \n\t"\
- "psubusb %%mm4, %%mm2 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "psubusb %%mm4, %%mm3 \n\t"\
- "psubb %%mm3, %%mm2 \n\t"\
- "movq %1, %%mm3 \n\t"\
- "movq %2, %%mm4 \n\t"\
- "pxor %%mm6, %%mm3 \n\t"\
- "pxor %%mm6, %%mm4 \n\t"\
- "paddusb %%mm2, %%mm3 \n\t"\
- "psubusb %%mm2, %%mm4 \n\t"\
- "pxor %%mm6, %%mm3 \n\t"\
- "pxor %%mm6, %%mm4 \n\t"\
- "paddusb %%mm2, %%mm2 \n\t"\
- "packsswb %%mm1, %%mm0 \n\t"\
- "pcmpgtb %%mm0, %%mm7 \n\t"\
- "pxor %%mm7, %%mm0 \n\t"\
- "psubb %%mm7, %%mm0 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "psubusb %%mm2, %%mm0 \n\t"\
- "psubb %%mm0, %%mm1 \n\t"\
- "pand %5, %%mm1 \n\t"\
- "psrlw $2, %%mm1 \n\t"\
- "pxor %%mm7, %%mm1 \n\t"\
- "psubb %%mm7, %%mm1 \n\t"\
- "movq %0, %%mm5 \n\t"\
- "movq %3, %%mm6 \n\t"\
- "psubb %%mm1, %%mm5 \n\t"\
- "paddb %%mm1, %%mm6 \n\t"
-
-static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
- if(ENABLE_ANY_H263) {
- const int strength= ff_h263_loop_filter_strength[qscale];
-
- __asm__ volatile(
-
- H263_LOOP_FILTER
-
- "movq %%mm3, %1 \n\t"
- "movq %%mm4, %2 \n\t"
- "movq %%mm5, %0 \n\t"
- "movq %%mm6, %3 \n\t"
- : "+m" (*(uint64_t*)(src - 2*stride)),
- "+m" (*(uint64_t*)(src - 1*stride)),
- "+m" (*(uint64_t*)(src + 0*stride)),
- "+m" (*(uint64_t*)(src + 1*stride))
- : "g" (2*strength), "m"(ff_pb_FC)
- );
- }
-}
-
-static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
- __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
- "movd %4, %%mm0 \n\t"
- "movd %5, %%mm1 \n\t"
- "movd %6, %%mm2 \n\t"
- "movd %7, %%mm3 \n\t"
- "punpcklbw %%mm1, %%mm0 \n\t"
- "punpcklbw %%mm3, %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "punpcklwd %%mm2, %%mm0 \n\t"
- "punpckhwd %%mm2, %%mm1 \n\t"
- "movd %%mm0, %0 \n\t"
- "punpckhdq %%mm0, %%mm0 \n\t"
- "movd %%mm0, %1 \n\t"
- "movd %%mm1, %2 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movd %%mm1, %3 \n\t"
-
- : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
- "=m" (*(uint32_t*)(dst + 1*dst_stride)),
- "=m" (*(uint32_t*)(dst + 2*dst_stride)),
- "=m" (*(uint32_t*)(dst + 3*dst_stride))
- : "m" (*(uint32_t*)(src + 0*src_stride)),
- "m" (*(uint32_t*)(src + 1*src_stride)),
- "m" (*(uint32_t*)(src + 2*src_stride)),
- "m" (*(uint32_t*)(src + 3*src_stride))
- );
-}
-
-static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
- if(ENABLE_ANY_H263) {
- const int strength= ff_h263_loop_filter_strength[qscale];
- DECLARE_ALIGNED(8, uint64_t, temp[4]);
- uint8_t *btemp= (uint8_t*)temp;
-
- src -= 2;
-
- transpose4x4(btemp , src , 8, stride);
- transpose4x4(btemp+4, src + 4*stride, 8, stride);
- __asm__ volatile(
- H263_LOOP_FILTER // 5 3 4 6
-
- : "+m" (temp[0]),
- "+m" (temp[1]),
- "+m" (temp[2]),
- "+m" (temp[3])
- : "g" (2*strength), "m"(ff_pb_FC)
- );
-
- __asm__ volatile(
- "movq %%mm5, %%mm1 \n\t"
- "movq %%mm4, %%mm0 \n\t"
- "punpcklbw %%mm3, %%mm5 \n\t"
- "punpcklbw %%mm6, %%mm4 \n\t"
- "punpckhbw %%mm3, %%mm1 \n\t"
- "punpckhbw %%mm6, %%mm0 \n\t"
- "movq %%mm5, %%mm3 \n\t"
- "movq %%mm1, %%mm6 \n\t"
- "punpcklwd %%mm4, %%mm5 \n\t"
- "punpcklwd %%mm0, %%mm1 \n\t"
- "punpckhwd %%mm4, %%mm3 \n\t"
- "punpckhwd %%mm0, %%mm6 \n\t"
- "movd %%mm5, (%0) \n\t"
- "punpckhdq %%mm5, %%mm5 \n\t"
- "movd %%mm5, (%0,%2) \n\t"
- "movd %%mm3, (%0,%2,2) \n\t"
- "punpckhdq %%mm3, %%mm3 \n\t"
- "movd %%mm3, (%0,%3) \n\t"
- "movd %%mm1, (%1) \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movd %%mm1, (%1,%2) \n\t"
- "movd %%mm6, (%1,%2,2) \n\t"
- "punpckhdq %%mm6, %%mm6 \n\t"
- "movd %%mm6, (%1,%3) \n\t"
- :: "r" (src),
- "r" (src + 4*stride),
- "r" ((x86_reg) stride ),
- "r" ((x86_reg)(3*stride))
- );
- }
-}
-
-/* draw the edges of width 'w' of an image of size width, height
- this mmx version can only handle w==8 || w==16 */
-static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
-{
- uint8_t *ptr, *last_line;
- int i;
-
- last_line = buf + (height - 1) * wrap;
- /* left and right */
- ptr = buf;
- if(w==8)
- {
- __asm__ volatile(
- "1: \n\t"
- "movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t"
- "punpckldq %%mm0, %%mm0 \n\t"
- "movq %%mm0, -8(%0) \n\t"
- "movq -8(%0, %2), %%mm1 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpckhwd %%mm1, %%mm1 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm1, (%0, %2) \n\t"
- "add %1, %0 \n\t"
- "cmp %3, %0 \n\t"
- " jb 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
- );
- }
- else
- {
- __asm__ volatile(
- "1: \n\t"
- "movd (%0), %%mm0 \n\t"
- "punpcklbw %%mm0, %%mm0 \n\t"
- "punpcklwd %%mm0, %%mm0 \n\t"
- "punpckldq %%mm0, %%mm0 \n\t"
- "movq %%mm0, -8(%0) \n\t"
- "movq %%mm0, -16(%0) \n\t"
- "movq -8(%0, %2), %%mm1 \n\t"
- "punpckhbw %%mm1, %%mm1 \n\t"
- "punpckhwd %%mm1, %%mm1 \n\t"
- "punpckhdq %%mm1, %%mm1 \n\t"
- "movq %%mm1, (%0, %2) \n\t"
- "movq %%mm1, 8(%0, %2) \n\t"
- "add %1, %0 \n\t"
- "cmp %3, %0 \n\t"
- " jb 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
- );
- }
-
- for(i=0;i<w;i+=4) {
- /* top and bottom (and hopefully also the corners) */
- ptr= buf - (i + 1) * wrap - w;
- __asm__ volatile(
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm0, (%0, %2) \n\t"
- "movq %%mm0, (%0, %2, 2) \n\t"
- "movq %%mm0, (%0, %3) \n\t"
- "add $8, %0 \n\t"
- "cmp %4, %0 \n\t"
- " jb 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
- );
- ptr= last_line + (i + 1) * wrap - w;
- __asm__ volatile(
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq %%mm0, (%0) \n\t"
- "movq %%mm0, (%0, %2) \n\t"
- "movq %%mm0, (%0, %2, 2) \n\t"
- "movq %%mm0, (%0, %3) \n\t"
- "add $8, %0 \n\t"
- "cmp %4, %0 \n\t"
- " jb 1b \n\t"
- : "+r" (ptr)
- : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
- );
- }
-}
-
-#define PAETH(cpu, abs3)\
-static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
-{\
- x86_reg i = -bpp;\
- x86_reg end = w-3;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n"\
- "movd (%1,%0), %%mm0 \n"\
- "movd (%2,%0), %%mm1 \n"\
- "punpcklbw %%mm7, %%mm0 \n"\
- "punpcklbw %%mm7, %%mm1 \n"\
- "add %4, %0 \n"\
- "1: \n"\
- "movq %%mm1, %%mm2 \n"\
- "movd (%2,%0), %%mm1 \n"\
- "movq %%mm2, %%mm3 \n"\
- "punpcklbw %%mm7, %%mm1 \n"\
- "movq %%mm2, %%mm4 \n"\
- "psubw %%mm1, %%mm3 \n"\
- "psubw %%mm0, %%mm4 \n"\
- "movq %%mm3, %%mm5 \n"\
- "paddw %%mm4, %%mm5 \n"\
- abs3\
- "movq %%mm4, %%mm6 \n"\
- "pminsw %%mm5, %%mm6 \n"\
- "pcmpgtw %%mm6, %%mm3 \n"\
- "pcmpgtw %%mm5, %%mm4 \n"\
- "movq %%mm4, %%mm6 \n"\
- "pand %%mm3, %%mm4 \n"\
- "pandn %%mm3, %%mm6 \n"\
- "pandn %%mm0, %%mm3 \n"\
- "movd (%3,%0), %%mm0 \n"\
- "pand %%mm1, %%mm6 \n"\
- "pand %%mm4, %%mm2 \n"\
- "punpcklbw %%mm7, %%mm0 \n"\
- "movq %6, %%mm5 \n"\
- "paddw %%mm6, %%mm0 \n"\
- "paddw %%mm2, %%mm3 \n"\
- "paddw %%mm3, %%mm0 \n"\
- "pand %%mm5, %%mm0 \n"\
- "movq %%mm0, %%mm3 \n"\
- "packuswb %%mm3, %%mm3 \n"\
- "movd %%mm3, (%1,%0) \n"\
- "add %4, %0 \n"\
- "cmp %5, %0 \n"\
- "jle 1b \n"\
- :"+r"(i)\
- :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
- "m"(ff_pw_255)\
- :"memory"\
- );\
-}
-
-#define ABS3_MMX2\
- "psubw %%mm5, %%mm7 \n"\
- "pmaxsw %%mm7, %%mm5 \n"\
- "pxor %%mm6, %%mm6 \n"\
- "pxor %%mm7, %%mm7 \n"\
- "psubw %%mm3, %%mm6 \n"\
- "psubw %%mm4, %%mm7 \n"\
- "pmaxsw %%mm6, %%mm3 \n"\
- "pmaxsw %%mm7, %%mm4 \n"\
- "pxor %%mm7, %%mm7 \n"
-
-#define ABS3_SSSE3\
- "pabsw %%mm3, %%mm3 \n"\
- "pabsw %%mm4, %%mm4 \n"\
- "pabsw %%mm5, %%mm5 \n"
-
-PAETH(mmx2, ABS3_MMX2)
-#ifdef HAVE_SSSE3
-PAETH(ssse3, ABS3_SSSE3)
-#endif
-
-#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
- "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
- "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
- "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
- "movq "#in7", " #m3 " \n\t" /* d */\
- "movq "#in0", %%mm5 \n\t" /* D */\
- "paddw " #m3 ", %%mm5 \n\t" /* x4 */\
- "psubw %%mm5, %%mm4 \n\t" /* 20x1 - x4 */\
- "movq "#in1", %%mm5 \n\t" /* C */\
- "movq "#in2", %%mm6 \n\t" /* B */\
- "paddw " #m6 ", %%mm5 \n\t" /* x3 */\
- "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
- "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
- "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
- "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
- "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
- "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
- "psraw $5, %%mm5 \n\t"\
- "packuswb %%mm5, %%mm5 \n\t"\
- OP(%%mm5, out, %%mm7, d)
-
-#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
-static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- uint64_t temp;\
-\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
- "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
- "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
- "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
- "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
- "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
- "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
- "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
- "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
- "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
- "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
- "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
- "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
- "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
- "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
- "paddw %%mm3, %%mm5 \n\t" /* b */\
- "paddw %%mm2, %%mm6 \n\t" /* c */\
- "paddw %%mm5, %%mm5 \n\t" /* 2b */\
- "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
- "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
- "paddw %%mm4, %%mm0 \n\t" /* a */\
- "paddw %%mm1, %%mm5 \n\t" /* d */\
- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
- "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
- "paddw %6, %%mm6 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm0 \n\t"\
- "movq %%mm0, %5 \n\t"\
- /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
- \
- "movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\
- "movq %%mm0, %%mm5 \n\t" /* FGHIJKLM */\
- "movq %%mm0, %%mm6 \n\t" /* FGHIJKLM */\
- "psrlq $8, %%mm0 \n\t" /* GHIJKLM0 */\
- "psrlq $16, %%mm5 \n\t" /* HIJKLM00 */\
- "punpcklbw %%mm7, %%mm0 \n\t" /* 0G0H0I0J */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0H0I0J0K */\
- "paddw %%mm0, %%mm2 \n\t" /* b */\
- "paddw %%mm5, %%mm3 \n\t" /* c */\
- "paddw %%mm2, %%mm2 \n\t" /* 2b */\
- "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
- "movq %%mm6, %%mm2 \n\t" /* FGHIJKLM */\
- "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
- "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
- "paddw %%mm2, %%mm1 \n\t" /* a */\
- "paddw %%mm6, %%mm4 \n\t" /* d */\
- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
- "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
- "paddw %6, %%mm1 \n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
- "psraw $5, %%mm3 \n\t"\
- "movq %5, %%mm1 \n\t"\
- "packuswb %%mm3, %%mm1 \n\t"\
- OP_MMX2(%%mm1, (%1),%%mm4, q)\
- /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
- \
- "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */\
- "movq %%mm1, %%mm4 \n\t" /* JKLMNOPQ */\
- "movq %%mm1, %%mm3 \n\t" /* JKLMNOPQ */\
- "psrlq $8, %%mm1 \n\t" /* KLMNOPQ0 */\
- "psrlq $16, %%mm4 \n\t" /* LMNOPQ00 */\
- "punpcklbw %%mm7, %%mm1 \n\t" /* 0K0L0M0N */\
- "punpcklbw %%mm7, %%mm4 \n\t" /* 0L0M0N0O */\
- "paddw %%mm1, %%mm5 \n\t" /* b */\
- "paddw %%mm4, %%mm0 \n\t" /* c */\
- "paddw %%mm5, %%mm5 \n\t" /* 2b */\
- "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
- "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
- "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
- "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
- "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
- "paddw %%mm3, %%mm2 \n\t" /* d */\
- "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
- "movq %%mm5, %%mm2 \n\t" /* JKLMNOPQ */\
- "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
- "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
- "paddw %%mm2, %%mm6 \n\t" /* a */\
- "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
- "paddw %6, %%mm0 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm0 \n\t"\
- /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
- \
- "paddw %%mm5, %%mm3 \n\t" /* a */\
- "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */\
- "paddw %%mm4, %%mm6 \n\t" /* b */\
- "pshufw $0xBE, %%mm5, %%mm4 \n\t" /* 0P0Q0Q0P */\
- "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0Q0Q0P0O */\
- "paddw %%mm1, %%mm4 \n\t" /* c */\
- "paddw %%mm2, %%mm5 \n\t" /* d */\
- "paddw %%mm6, %%mm6 \n\t" /* 2b */\
- "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
- "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
- "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
- "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
- "paddw %6, %%mm4 \n\t"\
- "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm4 \n\t"\
- "packuswb %%mm4, %%mm0 \n\t"\
- OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
- \
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+D"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
- : "memory"\
- );\
-}\
-\
-static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- int i;\
- int16_t temp[16];\
- /* quick HACK, XXX FIXME MUST be optimized */\
- for(i=0; i<h; i++)\
- {\
- temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
- temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
- temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
- temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
- temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
- temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]);\
- temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]);\
- temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]);\
- temp[ 8]= (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]);\
- temp[ 9]= (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]);\
- temp[10]= (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]);\
- temp[11]= (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]);\
- temp[12]= (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]);\
- temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
- temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
- temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
- __asm__ volatile(\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm1 \n\t"\
- "paddw %2, %%mm0 \n\t"\
- "paddw %2, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP_3DNOW(%%mm0, (%1), %%mm1, q)\
- "movq 16(%0), %%mm0 \n\t"\
- "movq 24(%0), %%mm1 \n\t"\
- "paddw %2, %%mm0 \n\t"\
- "paddw %2, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
- :: "r"(temp), "r"(dst), "m"(ROUNDER)\
- : "memory"\
- );\
- dst+=dstStride;\
- src+=srcStride;\
- }\
-}\
-\
-static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
- "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
- "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
- "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
- "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
- "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
- "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
- "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
- "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
- "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
- "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
- "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
- "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
- "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
- "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
- "paddw %%mm3, %%mm5 \n\t" /* b */\
- "paddw %%mm2, %%mm6 \n\t" /* c */\
- "paddw %%mm5, %%mm5 \n\t" /* 2b */\
- "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
- "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
- "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
- "paddw %%mm4, %%mm0 \n\t" /* a */\
- "paddw %%mm1, %%mm5 \n\t" /* d */\
- "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
- "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
- "paddw %5, %%mm6 \n\t"\
- "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm0 \n\t"\
- /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
- \
- "movd 5(%0), %%mm5 \n\t" /* FGHI */\
- "punpcklbw %%mm7, %%mm5 \n\t" /* 0F0G0H0I */\
- "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0G0H0I0I */\
- "paddw %%mm5, %%mm1 \n\t" /* a */\
- "paddw %%mm6, %%mm2 \n\t" /* b */\
- "pshufw $0xBE, %%mm5, %%mm6 \n\t" /* 0H0I0I0H */\
- "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0I0I0H0G */\
- "paddw %%mm6, %%mm3 \n\t" /* c */\
- "paddw %%mm5, %%mm4 \n\t" /* d */\
- "paddw %%mm2, %%mm2 \n\t" /* 2b */\
- "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
- "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
- "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
- "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
- "paddw %5, %%mm1 \n\t"\
- "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
- "psraw $5, %%mm3 \n\t"\
- "packuswb %%mm3, %%mm0 \n\t"\
- OP_MMX2(%%mm0, (%1), %%mm4, q)\
- \
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+d"(h)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
- : "memory"\
- );\
-}\
-\
-static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- int i;\
- int16_t temp[8];\
- /* quick HACK, XXX FIXME MUST be optimized */\
- for(i=0; i<h; i++)\
- {\
- temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
- temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
- temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
- temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
- temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
- temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
- temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
- temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
- __asm__ volatile(\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm1 \n\t"\
- "paddw %2, %%mm0 \n\t"\
- "paddw %2, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP_3DNOW(%%mm0, (%1), %%mm1, q)\
- :: "r"(temp), "r"(dst), "m"(ROUNDER)\
- :"memory"\
- );\
- dst+=dstStride;\
- src+=srcStride;\
- }\
-}
-
-#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
-\
-static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- uint64_t temp[17*4];\
- uint64_t *temp_ptr= temp;\
- int count= 17;\
-\
- /*FIXME unroll */\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq (%0), %%mm1 \n\t"\
- "movq 8(%0), %%mm2 \n\t"\
- "movq 8(%0), %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "movq %%mm0, (%1) \n\t"\
- "movq %%mm1, 17*8(%1) \n\t"\
- "movq %%mm2, 2*17*8(%1) \n\t"\
- "movq %%mm3, 3*17*8(%1) \n\t"\
- "add $8, %1 \n\t"\
- "add %3, %0 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+r" (src), "+r" (temp_ptr), "+r"(count)\
- : "r" ((x86_reg)srcStride)\
- : "memory"\
- );\
- \
- temp_ptr= temp;\
- count=4;\
- \
-/*FIXME reorder for speed */\
- __asm__ volatile(\
- /*"pxor %%mm7, %%mm7 \n\t"*/\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm1 \n\t"\
- "movq 16(%0), %%mm2 \n\t"\
- "movq 24(%0), %%mm3 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
- \
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
- \
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
- "add %4, %1 \n\t" \
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
- \
- "add $136, %0 \n\t"\
- "add %6, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- \
- : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
- : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
- :"memory"\
- );\
-}\
-\
-static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- uint64_t temp[9*2];\
- uint64_t *temp_ptr= temp;\
- int count= 9;\
-\
- /*FIXME unroll */\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq (%0), %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "movq %%mm0, (%1) \n\t"\
- "movq %%mm1, 9*8(%1) \n\t"\
- "add $8, %1 \n\t"\
- "add %3, %0 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+r" (src), "+r" (temp_ptr), "+r"(count)\
- : "r" ((x86_reg)srcStride)\
- : "memory"\
- );\
- \
- temp_ptr= temp;\
- count=2;\
- \
-/*FIXME reorder for speed */\
- __asm__ volatile(\
- /*"pxor %%mm7, %%mm7 \n\t"*/\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm1 \n\t"\
- "movq 16(%0), %%mm2 \n\t"\
- "movq 24(%0), %%mm3 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
- \
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
- \
- QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
- "add %4, %1 \n\t"\
- QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
- QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
- \
- "add $72, %0 \n\t"\
- "add %6, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- \
- : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
- : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-\
-static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);\
-}\
-\
-static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[8];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
-}\
-\
-static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
-}\
-\
-static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[8];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\
-}\
-\
-static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[8];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
- OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
-}\
-\
-static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[8];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
- OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\
-}\
-static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half) + 64;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
- OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
-}\
-static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[8 + 9];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
- OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
-}\
-static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[9];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
- OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
-}\
-static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);\
-}\
-\
-static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[32];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
-}\
-\
-static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
-}\
-\
-static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[32];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\
-}\
-\
-static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[32];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
- OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
-}\
-\
-static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t temp[32];\
- uint8_t * const half= (uint8_t*)temp;\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
- OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\
-}\
-static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[16*2 + 17*2];\
- uint8_t * const halfH= ((uint8_t*)half) + 256;\
- uint8_t * const halfHV= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[17*2];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
- OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
-}\
-static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[17*2];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
- OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
-}\
-static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- uint64_t half[17*2];\
- uint8_t * const halfH= ((uint8_t*)half);\
- put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
- OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
-}
-
-#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
-QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)
-QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)
-QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
-QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)
-QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)
-QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
-QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
-QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
-QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
-
-/***********************************/
-/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
-
-#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
-}
-#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
-}
-
-#define QPEL_2TAP(OPNAME, SIZE, MMX)\
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
-QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
- OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
- OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
-static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
- OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
-}\
-static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
-}\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\
-QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
-
-QPEL_2TAP(put_, 16, mmx2)
-QPEL_2TAP(avg_, 16, mmx2)
-QPEL_2TAP(put_, 8, mmx2)
-QPEL_2TAP(avg_, 8, mmx2)
-QPEL_2TAP(put_, 16, 3dnow)
-QPEL_2TAP(avg_, 16, 3dnow)
-QPEL_2TAP(put_, 8, 3dnow)
-QPEL_2TAP(avg_, 8, 3dnow)
-
-
-#if 0
-static void just_return() { return; }
-#endif
-
-static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
- int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){
- const int w = 8;
- const int ix = ox>>(16+shift);
- const int iy = oy>>(16+shift);
- const int oxs = ox>>4;
- const int oys = oy>>4;
- const int dxxs = dxx>>4;
- const int dxys = dxy>>4;
- const int dyxs = dyx>>4;
- const int dyys = dyy>>4;
- const uint16_t r4[4] = {r,r,r,r};
- const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys};
- const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys};
- const uint64_t shift2 = 2*shift;
- uint8_t edge_buf[(h+1)*stride];
- int x, y;
-
- const int dxw = (dxx-(1<<(16+shift)))*(w-1);
- const int dyh = (dyy-(1<<(16+shift)))*(h-1);
- const int dxh = dxy*(h-1);
- const int dyw = dyx*(w-1);
- if( // non-constant fullpel offset (3% of blocks)
- ((ox^(ox+dxw)) | (ox^(ox+dxh)) | (ox^(ox+dxw+dxh)) |
- (oy^(oy+dyw)) | (oy^(oy+dyh)) | (oy^(oy+dyw+dyh))) >> (16+shift)
- // uses more than 16 bits of subpel mv (only at huge resolution)
- || (dxx|dxy|dyx|dyy)&15 )
- {
- //FIXME could still use mmx for some of the rows
- ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height);
- return;
- }
-
- src += ix + iy*stride;
- if( (unsigned)ix >= width-w ||
- (unsigned)iy >= height-h )
- {
- ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
- src = edge_buf;
- }
-
- __asm__ volatile(
- "movd %0, %%mm6 \n\t"
- "pxor %%mm7, %%mm7 \n\t"
- "punpcklwd %%mm6, %%mm6 \n\t"
- "punpcklwd %%mm6, %%mm6 \n\t"
- :: "r"(1<<shift)
- );
-
- for(x=0; x<w; x+=4){
- uint16_t dx4[4] = { oxs - dxys + dxxs*(x+0),
- oxs - dxys + dxxs*(x+1),
- oxs - dxys + dxxs*(x+2),
- oxs - dxys + dxxs*(x+3) };
- uint16_t dy4[4] = { oys - dyys + dyxs*(x+0),
- oys - dyys + dyxs*(x+1),
- oys - dyys + dyxs*(x+2),
- oys - dyys + dyxs*(x+3) };
-
- for(y=0; y<h; y++){
- __asm__ volatile(
- "movq %0, %%mm4 \n\t"
- "movq %1, %%mm5 \n\t"
- "paddw %2, %%mm4 \n\t"
- "paddw %3, %%mm5 \n\t"
- "movq %%mm4, %0 \n\t"
- "movq %%mm5, %1 \n\t"
- "psrlw $12, %%mm4 \n\t"
- "psrlw $12, %%mm5 \n\t"
- : "+m"(*dx4), "+m"(*dy4)
- : "m"(*dxy4), "m"(*dyy4)
- );
-
- __asm__ volatile(
- "movq %%mm6, %%mm2 \n\t"
- "movq %%mm6, %%mm1 \n\t"
- "psubw %%mm4, %%mm2 \n\t"
- "psubw %%mm5, %%mm1 \n\t"
- "movq %%mm2, %%mm0 \n\t"
- "movq %%mm4, %%mm3 \n\t"
- "pmullw %%mm1, %%mm0 \n\t" // (s-dx)*(s-dy)
- "pmullw %%mm5, %%mm3 \n\t" // dx*dy
- "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
- "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
-
- "movd %4, %%mm5 \n\t"
- "movd %3, %%mm4 \n\t"
- "punpcklbw %%mm7, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
- "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
-
- "movd %2, %%mm5 \n\t"
- "movd %1, %%mm4 \n\t"
- "punpcklbw %%mm7, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
- "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
- "paddw %5, %%mm1 \n\t"
- "paddw %%mm3, %%mm2 \n\t"
- "paddw %%mm1, %%mm0 \n\t"
- "paddw %%mm2, %%mm0 \n\t"
-
- "psrlw %6, %%mm0 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "movd %%mm0, %0 \n\t"
-
- : "=m"(dst[x+y*stride])
- : "m"(src[0]), "m"(src[1]),
- "m"(src[stride]), "m"(src[stride+1]),
- "m"(*r4), "m"(shift2)
- );
- src += stride;
- }
- src += 4-h*stride;
- }
-}
-
-#define PREFETCH(name, op) \
-static void name(void *mem, int stride, int h){\
- const uint8_t *p= mem;\
- do{\
- __asm__ volatile(#op" %0" :: "m"(*p));\
- p+= stride;\
- }while(--h);\
-}
-PREFETCH(prefetch_mmx2, prefetcht0)
-PREFETCH(prefetch_3dnow, prefetch)
-#undef PREFETCH
-
-#include "h264dsp_mmx.c"
-
-/* CAVS specific */
-void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
-void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
-
-void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
- put_pixels8_mmx(dst, src, stride, 8);
-}
-void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
- avg_pixels8_mmx(dst, src, stride, 8);
-}
-void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
- put_pixels16_mmx(dst, src, stride, 16);
-}
-void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
- avg_pixels16_mmx(dst, src, stride, 16);
-}
-
-/* VC1 specific */
-void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
-
-void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
- put_pixels8_mmx(dst, src, stride, 8);
-}
-
-/* external functions, from idct_mmx.c */
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
- converted */
-#ifdef CONFIG_GPL
-static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_mmx_idct (block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_mmx_idct (block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_mmxext_idct (block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_mmxext_idct (block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
-#endif
-static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx (block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx (block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx2 (block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
- ff_idct_xvid_mmx2 (block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
-
-static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
-{
- int i;
- __asm__ volatile("pxor %%mm7, %%mm7":);
- for(i=0; i<blocksize; i+=2) {
- __asm__ volatile(
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
- "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
- "pslld $31, %%mm2 \n\t" // keep only the sign bit
- "pxor %%mm2, %%mm1 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "pand %%mm1, %%mm3 \n\t"
- "pandn %%mm1, %%mm4 \n\t"
- "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
- "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
- "movq %%mm3, %1 \n\t"
- "movq %%mm0, %0 \n\t"
- :"+m"(mag[i]), "+m"(ang[i])
- ::"memory"
- );
- }
- __asm__ volatile("femms");
-}
-static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
-{
- int i;
-
- __asm__ volatile(
- "movaps %0, %%xmm5 \n\t"
- ::"m"(ff_pdw_80000000[0])
- );
- for(i=0; i<blocksize; i+=4) {
- __asm__ volatile(
- "movaps %0, %%xmm0 \n\t"
- "movaps %1, %%xmm1 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "xorps %%xmm3, %%xmm3 \n\t"
- "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
- "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
- "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
- "xorps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm3, %%xmm4 \n\t"
- "andps %%xmm1, %%xmm3 \n\t"
- "andnps %%xmm1, %%xmm4 \n\t"
- "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
- "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
- "movaps %%xmm3, %1 \n\t"
- "movaps %%xmm0, %0 \n\t"
- :"+m"(mag[i]), "+m"(ang[i])
- ::"memory"
- );
- }
-}
-
-#define IF1(x) x
-#define IF0(x)
-
-#define MIX5(mono,stereo)\
- __asm__ volatile(\
- "movss 0(%2), %%xmm5 \n"\
- "movss 8(%2), %%xmm6 \n"\
- "movss 24(%2), %%xmm7 \n"\
- "shufps $0, %%xmm5, %%xmm5 \n"\
- "shufps $0, %%xmm6, %%xmm6 \n"\
- "shufps $0, %%xmm7, %%xmm7 \n"\
- "1: \n"\
- "movaps (%0,%1), %%xmm0 \n"\
- "movaps 0x400(%0,%1), %%xmm1 \n"\
- "movaps 0x800(%0,%1), %%xmm2 \n"\
- "movaps 0xc00(%0,%1), %%xmm3 \n"\
- "movaps 0x1000(%0,%1), %%xmm4 \n"\
- "mulps %%xmm5, %%xmm0 \n"\
- "mulps %%xmm6, %%xmm1 \n"\
- "mulps %%xmm5, %%xmm2 \n"\
- "mulps %%xmm7, %%xmm3 \n"\
- "mulps %%xmm7, %%xmm4 \n"\
- stereo("addps %%xmm1, %%xmm0 \n")\
- "addps %%xmm1, %%xmm2 \n"\
- "addps %%xmm3, %%xmm0 \n"\
- "addps %%xmm4, %%xmm2 \n"\
- mono("addps %%xmm2, %%xmm0 \n")\
- "movaps %%xmm0, (%0,%1) \n"\
- stereo("movaps %%xmm2, 0x400(%0,%1) \n")\
- "add $16, %0 \n"\
- "jl 1b \n"\
- :"+&r"(i)\
- :"r"(samples[0]+len), "r"(matrix)\
- :"memory"\
- );
-
-#define MIX_MISC(stereo)\
- __asm__ volatile(\
- "1: \n"\
- "movaps (%3,%0), %%xmm0 \n"\
- stereo("movaps %%xmm0, %%xmm1 \n")\
- "mulps %%xmm6, %%xmm0 \n"\
- stereo("mulps %%xmm7, %%xmm1 \n")\
- "lea 1024(%3,%0), %1 \n"\
- "mov %5, %2 \n"\
- "2: \n"\
- "movaps (%1), %%xmm2 \n"\
- stereo("movaps %%xmm2, %%xmm3 \n")\
- "mulps (%4,%2), %%xmm2 \n"\
- stereo("mulps 16(%4,%2), %%xmm3 \n")\
- "addps %%xmm2, %%xmm0 \n"\
- stereo("addps %%xmm3, %%xmm1 \n")\
- "add $1024, %1 \n"\
- "add $32, %2 \n"\
- "jl 2b \n"\
- "movaps %%xmm0, (%3,%0) \n"\
- stereo("movaps %%xmm1, 1024(%3,%0) \n")\
- "add $16, %0 \n"\
- "jl 1b \n"\
- :"+&r"(i), "=&r"(j), "=&r"(k)\
- :"r"(samples[0]+len), "r"(matrix_simd+in_ch), "g"((intptr_t)-32*(in_ch-1))\
- :"memory"\
- );
-
-static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len)
-{
- int (*matrix_cmp)[2] = (int(*)[2])matrix;
- intptr_t i,j,k;
-
- i = -len*sizeof(float);
- if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_cmp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^matrix_cmp[2][1]))) {
- MIX5(IF0,IF1);
- } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
- MIX5(IF1,IF0);
- } else {
- DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]);
- j = 2*in_ch*sizeof(float);
- __asm__ volatile(
- "1: \n"
- "sub $8, %0 \n"
- "movss (%2,%0), %%xmm6 \n"
- "movss 4(%2,%0), %%xmm7 \n"
- "shufps $0, %%xmm6, %%xmm6 \n"
- "shufps $0, %%xmm7, %%xmm7 \n"
- "movaps %%xmm6, (%1,%0,4) \n"
- "movaps %%xmm7, 16(%1,%0,4) \n"
- "jg 1b \n"
- :"+&r"(j)
- :"r"(matrix_simd), "r"(matrix)
- :"memory"
- );
- if(out_ch == 2) {
- MIX_MISC(IF1);
- } else {
- MIX_MISC(IF0);
- }
- }
-}
-
-static void vector_fmul_3dnow(float *dst, const float *src, int len){
- x86_reg i = (len-4)*4;
- __asm__ volatile(
- "1: \n\t"
- "movq (%1,%0), %%mm0 \n\t"
- "movq 8(%1,%0), %%mm1 \n\t"
- "pfmul (%2,%0), %%mm0 \n\t"
- "pfmul 8(%2,%0), %%mm1 \n\t"
- "movq %%mm0, (%1,%0) \n\t"
- "movq %%mm1, 8(%1,%0) \n\t"
- "sub $16, %0 \n\t"
- "jge 1b \n\t"
- "femms \n\t"
- :"+r"(i)
- :"r"(dst), "r"(src)
- :"memory"
- );
-}
-static void vector_fmul_sse(float *dst, const float *src, int len){
- x86_reg i = (len-8)*4;
- __asm__ volatile(
- "1: \n\t"
- "movaps (%1,%0), %%xmm0 \n\t"
- "movaps 16(%1,%0), %%xmm1 \n\t"
- "mulps (%2,%0), %%xmm0 \n\t"
- "mulps 16(%2,%0), %%xmm1 \n\t"
- "movaps %%xmm0, (%1,%0) \n\t"
- "movaps %%xmm1, 16(%1,%0) \n\t"
- "sub $32, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i)
- :"r"(dst), "r"(src)
- :"memory"
- );
-}
-
-static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
- x86_reg i = len*4-16;
- __asm__ volatile(
- "1: \n\t"
- "pswapd 8(%1), %%mm0 \n\t"
- "pswapd (%1), %%mm1 \n\t"
- "pfmul (%3,%0), %%mm0 \n\t"
- "pfmul 8(%3,%0), %%mm1 \n\t"
- "movq %%mm0, (%2,%0) \n\t"
- "movq %%mm1, 8(%2,%0) \n\t"
- "add $16, %1 \n\t"
- "sub $16, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i), "+r"(src1)
- :"r"(dst), "r"(src0)
- );
- __asm__ volatile("femms");
-}
-static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
- x86_reg i = len*4-32;
- __asm__ volatile(
- "1: \n\t"
- "movaps 16(%1), %%xmm0 \n\t"
- "movaps (%1), %%xmm1 \n\t"
- "shufps $0x1b, %%xmm0, %%xmm0 \n\t"
- "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
- "mulps (%3,%0), %%xmm0 \n\t"
- "mulps 16(%3,%0), %%xmm1 \n\t"
- "movaps %%xmm0, (%2,%0) \n\t"
- "movaps %%xmm1, 16(%2,%0) \n\t"
- "add $32, %1 \n\t"
- "sub $32, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i), "+r"(src1)
- :"r"(dst), "r"(src0)
- );
-}
-
-static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
- const float *src2, int src3, int len, int step){
- x86_reg i = (len-4)*4;
- if(step == 2 && src3 == 0){
- dst += (len-4)*2;
- __asm__ volatile(
- "1: \n\t"
- "movq (%2,%0), %%mm0 \n\t"
- "movq 8(%2,%0), %%mm1 \n\t"
- "pfmul (%3,%0), %%mm0 \n\t"
- "pfmul 8(%3,%0), %%mm1 \n\t"
- "pfadd (%4,%0), %%mm0 \n\t"
- "pfadd 8(%4,%0), %%mm1 \n\t"
- "movd %%mm0, (%1) \n\t"
- "movd %%mm1, 16(%1) \n\t"
- "psrlq $32, %%mm0 \n\t"
- "psrlq $32, %%mm1 \n\t"
- "movd %%mm0, 8(%1) \n\t"
- "movd %%mm1, 24(%1) \n\t"
- "sub $32, %1 \n\t"
- "sub $16, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i), "+r"(dst)
- :"r"(src0), "r"(src1), "r"(src2)
- :"memory"
- );
- }
- else if(step == 1 && src3 == 0){
- __asm__ volatile(
- "1: \n\t"
- "movq (%2,%0), %%mm0 \n\t"
- "movq 8(%2,%0), %%mm1 \n\t"
- "pfmul (%3,%0), %%mm0 \n\t"
- "pfmul 8(%3,%0), %%mm1 \n\t"
- "pfadd (%4,%0), %%mm0 \n\t"
- "pfadd 8(%4,%0), %%mm1 \n\t"
- "movq %%mm0, (%1,%0) \n\t"
- "movq %%mm1, 8(%1,%0) \n\t"
- "sub $16, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i)
- :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
- :"memory"
- );
- }
- else
- ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
- __asm__ volatile("femms");
-}
-static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
- const float *src2, int src3, int len, int step){
- x86_reg i = (len-8)*4;
- if(step == 2 && src3 == 0){
- dst += (len-8)*2;
- __asm__ volatile(
- "1: \n\t"
- "movaps (%2,%0), %%xmm0 \n\t"
- "movaps 16(%2,%0), %%xmm1 \n\t"
- "mulps (%3,%0), %%xmm0 \n\t"
- "mulps 16(%3,%0), %%xmm1 \n\t"
- "addps (%4,%0), %%xmm0 \n\t"
- "addps 16(%4,%0), %%xmm1 \n\t"
- "movss %%xmm0, (%1) \n\t"
- "movss %%xmm1, 32(%1) \n\t"
- "movhlps %%xmm0, %%xmm2 \n\t"
- "movhlps %%xmm1, %%xmm3 \n\t"
- "movss %%xmm2, 16(%1) \n\t"
- "movss %%xmm3, 48(%1) \n\t"
- "shufps $0xb1, %%xmm0, %%xmm0 \n\t"
- "shufps $0xb1, %%xmm1, %%xmm1 \n\t"
- "movss %%xmm0, 8(%1) \n\t"
- "movss %%xmm1, 40(%1) \n\t"
- "movhlps %%xmm0, %%xmm2 \n\t"
- "movhlps %%xmm1, %%xmm3 \n\t"
- "movss %%xmm2, 24(%1) \n\t"
- "movss %%xmm3, 56(%1) \n\t"
- "sub $64, %1 \n\t"
- "sub $32, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i), "+r"(dst)
- :"r"(src0), "r"(src1), "r"(src2)
- :"memory"
- );
- }
- else if(step == 1 && src3 == 0){
- __asm__ volatile(
- "1: \n\t"
- "movaps (%2,%0), %%xmm0 \n\t"
- "movaps 16(%2,%0), %%xmm1 \n\t"
- "mulps (%3,%0), %%xmm0 \n\t"
- "mulps 16(%3,%0), %%xmm1 \n\t"
- "addps (%4,%0), %%xmm0 \n\t"
- "addps 16(%4,%0), %%xmm1 \n\t"
- "movaps %%xmm0, (%1,%0) \n\t"
- "movaps %%xmm1, 16(%1,%0) \n\t"
- "sub $32, %0 \n\t"
- "jge 1b \n\t"
- :"+r"(i)
- :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
- :"memory"
- );
- }
- else
- ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
-}
-
-static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
- const float *win, float add_bias, int len){
-#ifdef HAVE_6REGS
- if(add_bias == 0){
- x86_reg i = -len*4;
- x86_reg j = len*4-8;
- __asm__ volatile(
- "1: \n"
- "pswapd (%5,%1), %%mm1 \n"
- "movq (%5,%0), %%mm0 \n"
- "pswapd (%4,%1), %%mm5 \n"
- "movq (%3,%0), %%mm4 \n"
- "movq %%mm0, %%mm2 \n"
- "movq %%mm1, %%mm3 \n"
- "pfmul %%mm4, %%mm2 \n" // src0[len+i]*win[len+i]
- "pfmul %%mm5, %%mm3 \n" // src1[ j]*win[len+j]
- "pfmul %%mm4, %%mm1 \n" // src0[len+i]*win[len+j]
- "pfmul %%mm5, %%mm0 \n" // src1[ j]*win[len+i]
- "pfadd %%mm3, %%mm2 \n"
- "pfsub %%mm0, %%mm1 \n"
- "pswapd %%mm2, %%mm2 \n"
- "movq %%mm1, (%2,%0) \n"
- "movq %%mm2, (%2,%1) \n"
- "sub $8, %1 \n"
- "add $8, %0 \n"
- "jl 1b \n"
- "femms \n"
- :"+r"(i), "+r"(j)
- :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
- );
- }else
-#endif
- ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
-}
-
-static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
- const float *win, float add_bias, int len){
-#ifdef HAVE_6REGS
- if(add_bias == 0){
- x86_reg i = -len*4;
- x86_reg j = len*4-16;
- __asm__ volatile(
- "1: \n"
- "movaps (%5,%1), %%xmm1 \n"
- "movaps (%5,%0), %%xmm0 \n"
- "movaps (%4,%1), %%xmm5 \n"
- "movaps (%3,%0), %%xmm4 \n"
- "shufps $0x1b, %%xmm1, %%xmm1 \n"
- "shufps $0x1b, %%xmm5, %%xmm5 \n"
- "movaps %%xmm0, %%xmm2 \n"
- "movaps %%xmm1, %%xmm3 \n"
- "mulps %%xmm4, %%xmm2 \n" // src0[len+i]*win[len+i]
- "mulps %%xmm5, %%xmm3 \n" // src1[ j]*win[len+j]
- "mulps %%xmm4, %%xmm1 \n" // src0[len+i]*win[len+j]
- "mulps %%xmm5, %%xmm0 \n" // src1[ j]*win[len+i]
- "addps %%xmm3, %%xmm2 \n"
- "subps %%xmm0, %%xmm1 \n"
- "shufps $0x1b, %%xmm2, %%xmm2 \n"
- "movaps %%xmm1, (%2,%0) \n"
- "movaps %%xmm2, (%2,%1) \n"
- "sub $16, %1 \n"
- "add $16, %0 \n"
- "jl 1b \n"
- :"+r"(i), "+r"(j)
- :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
- );
- }else
-#endif
- ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
-}
-
-static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
-{
- x86_reg i = -4*len;
- __asm__ volatile(
- "movss %3, %%xmm4 \n"
- "shufps $0, %%xmm4, %%xmm4 \n"
- "1: \n"
- "cvtpi2ps (%2,%0), %%xmm0 \n"
- "cvtpi2ps 8(%2,%0), %%xmm1 \n"
- "cvtpi2ps 16(%2,%0), %%xmm2 \n"
- "cvtpi2ps 24(%2,%0), %%xmm3 \n"
- "movlhps %%xmm1, %%xmm0 \n"
- "movlhps %%xmm3, %%xmm2 \n"
- "mulps %%xmm4, %%xmm0 \n"
- "mulps %%xmm4, %%xmm2 \n"
- "movaps %%xmm0, (%1,%0) \n"
- "movaps %%xmm2, 16(%1,%0) \n"
- "add $32, %0 \n"
- "jl 1b \n"
- :"+r"(i)
- :"r"(dst+len), "r"(src+len), "m"(mul)
- );
-}
-
-static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len)
-{
- x86_reg i = -4*len;
- __asm__ volatile(
- "movss %3, %%xmm4 \n"
- "shufps $0, %%xmm4, %%xmm4 \n"
- "1: \n"
- "cvtdq2ps (%2,%0), %%xmm0 \n"
- "cvtdq2ps 16(%2,%0), %%xmm1 \n"
- "mulps %%xmm4, %%xmm0 \n"
- "mulps %%xmm4, %%xmm1 \n"
- "movaps %%xmm0, (%1,%0) \n"
- "movaps %%xmm1, 16(%1,%0) \n"
- "add $32, %0 \n"
- "jl 1b \n"
- :"+r"(i)
- :"r"(dst+len), "r"(src+len), "m"(mul)
- );
-}
-
-static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
- x86_reg reglen = len;
- // not bit-exact: pf2id uses different rounding than C and SSE
- __asm__ volatile(
- "add %0 , %0 \n\t"
- "lea (%2,%0,2) , %2 \n\t"
- "add %0 , %1 \n\t"
- "neg %0 \n\t"
- "1: \n\t"
- "pf2id (%2,%0,2) , %%mm0 \n\t"
- "pf2id 8(%2,%0,2) , %%mm1 \n\t"
- "pf2id 16(%2,%0,2) , %%mm2 \n\t"
- "pf2id 24(%2,%0,2) , %%mm3 \n\t"
- "packssdw %%mm1 , %%mm0 \n\t"
- "packssdw %%mm3 , %%mm2 \n\t"
- "movq %%mm0 , (%1,%0) \n\t"
- "movq %%mm2 , 8(%1,%0) \n\t"
- "add $16 , %0 \n\t"
- " js 1b \n\t"
- "femms \n\t"
- :"+r"(reglen), "+r"(dst), "+r"(src)
- );
-}
-static void float_to_int16_sse(int16_t *dst, const float *src, long len){
- x86_reg reglen = len;
- __asm__ volatile(
- "add %0 , %0 \n\t"
- "lea (%2,%0,2) , %2 \n\t"
- "add %0 , %1 \n\t"
- "neg %0 \n\t"
- "1: \n\t"
- "cvtps2pi (%2,%0,2) , %%mm0 \n\t"
- "cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
- "cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
- "cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
- "packssdw %%mm1 , %%mm0 \n\t"
- "packssdw %%mm3 , %%mm2 \n\t"
- "movq %%mm0 , (%1,%0) \n\t"
- "movq %%mm2 , 8(%1,%0) \n\t"
- "add $16 , %0 \n\t"
- " js 1b \n\t"
- "emms \n\t"
- :"+r"(reglen), "+r"(dst), "+r"(src)
- );
-}
-
-static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
- x86_reg reglen = len;
- __asm__ volatile(
- "add %0 , %0 \n\t"
- "lea (%2,%0,2) , %2 \n\t"
- "add %0 , %1 \n\t"
- "neg %0 \n\t"
- "1: \n\t"
- "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
- "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
- "packssdw %%xmm1 , %%xmm0 \n\t"
- "movdqa %%xmm0 , (%1,%0) \n\t"
- "add $16 , %0 \n\t"
- " js 1b \n\t"
- :"+r"(reglen), "+r"(dst), "+r"(src)
- );
-}
-
-#ifdef HAVE_YASM
-void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
-void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
-void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
-#else
-#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
-#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
-#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
-#endif
-#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
-
-#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
-/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
-static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
- DECLARE_ALIGNED_16(int16_t, tmp[len]);\
- int i,j,c;\
- for(c=0; c<channels; c++){\
- float_to_int16_##cpu(tmp, src[c], len);\
- for(i=0, j=c; i<len; i++, j+=channels)\
- dst[j] = tmp[i];\
- }\
-}\
-\
-static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
- if(channels==1)\
- float_to_int16_##cpu(dst, src[0], len);\
- else if(channels==2){\
- x86_reg reglen = len; \
- const float *src0 = src[0];\
- const float *src1 = src[1];\
- __asm__ volatile(\
- "shl $2, %0 \n"\
- "add %0, %1 \n"\
- "add %0, %2 \n"\
- "add %0, %3 \n"\
- "neg %0 \n"\
- body\
- :"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
- );\
- }else if(channels==6){\
- ff_float_to_int16_interleave6_##cpu(dst, src, len);\
- }else\
- float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
-}
-
-FLOAT_TO_INT16_INTERLEAVE(3dnow,
- "1: \n"
- "pf2id (%2,%0), %%mm0 \n"
- "pf2id 8(%2,%0), %%mm1 \n"
- "pf2id (%3,%0), %%mm2 \n"
- "pf2id 8(%3,%0), %%mm3 \n"
- "packssdw %%mm1, %%mm0 \n"
- "packssdw %%mm3, %%mm2 \n"
- "movq %%mm0, %%mm1 \n"
- "punpcklwd %%mm2, %%mm0 \n"
- "punpckhwd %%mm2, %%mm1 \n"
- "movq %%mm0, (%1,%0)\n"
- "movq %%mm1, 8(%1,%0)\n"
- "add $16, %0 \n"
- "js 1b \n"
- "femms \n"
-)
-
-FLOAT_TO_INT16_INTERLEAVE(sse,
- "1: \n"
- "cvtps2pi (%2,%0), %%mm0 \n"
- "cvtps2pi 8(%2,%0), %%mm1 \n"
- "cvtps2pi (%3,%0), %%mm2 \n"
- "cvtps2pi 8(%3,%0), %%mm3 \n"
- "packssdw %%mm1, %%mm0 \n"
- "packssdw %%mm3, %%mm2 \n"
- "movq %%mm0, %%mm1 \n"
- "punpcklwd %%mm2, %%mm0 \n"
- "punpckhwd %%mm2, %%mm1 \n"
- "movq %%mm0, (%1,%0)\n"
- "movq %%mm1, 8(%1,%0)\n"
- "add $16, %0 \n"
- "js 1b \n"
- "emms \n"
-)
-
-FLOAT_TO_INT16_INTERLEAVE(sse2,
- "1: \n"
- "cvtps2dq (%2,%0), %%xmm0 \n"
- "cvtps2dq (%3,%0), %%xmm1 \n"
- "packssdw %%xmm1, %%xmm0 \n"
- "movhlps %%xmm0, %%xmm1 \n"
- "punpcklwd %%xmm1, %%xmm0 \n"
- "movdqa %%xmm0, (%1,%0) \n"
- "add $16, %0 \n"
- "js 1b \n"
-)
-
-static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
- if(channels==6)
- ff_float_to_int16_interleave6_3dn2(dst, src, len);
- else
- float_to_int16_interleave_3dnow(dst, src, len, channels);
-}
-
-
-extern void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
-extern void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
-extern void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
-extern void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
-extern void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
-extern void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
- int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
-
-
-static void add_int16_sse2(int16_t * v1, int16_t * v2, int order)
-{
- x86_reg o = -(order << 1);
- v1 += order;
- v2 += order;
- __asm__ volatile(
- "1: \n\t"
- "movdqu (%1,%2), %%xmm0 \n\t"
- "movdqu 16(%1,%2), %%xmm1 \n\t"
- "paddw (%0,%2), %%xmm0 \n\t"
- "paddw 16(%0,%2), %%xmm1 \n\t"
- "movdqa %%xmm0, (%0,%2) \n\t"
- "movdqa %%xmm1, 16(%0,%2) \n\t"
- "add $32, %2 \n\t"
- "js 1b \n\t"
- : "+r"(v1), "+r"(v2), "+r"(o)
- );
-}
-
-static void sub_int16_sse2(int16_t * v1, int16_t * v2, int order)
-{
- x86_reg o = -(order << 1);
- v1 += order;
- v2 += order;
- __asm__ volatile(
- "1: \n\t"
- "movdqa (%0,%2), %%xmm0 \n\t"
- "movdqa 16(%0,%2), %%xmm2 \n\t"
- "movdqu (%1,%2), %%xmm1 \n\t"
- "movdqu 16(%1,%2), %%xmm3 \n\t"
- "psubw %%xmm1, %%xmm0 \n\t"
- "psubw %%xmm3, %%xmm2 \n\t"
- "movdqa %%xmm0, (%0,%2) \n\t"
- "movdqa %%xmm2, 16(%0,%2) \n\t"
- "add $32, %2 \n\t"
- "js 1b \n\t"
- : "+r"(v1), "+r"(v2), "+r"(o)
- );
-}
-
-static int32_t scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, int shift)
-{
- int res = 0;
- DECLARE_ALIGNED_16(int64_t, sh);
- x86_reg o = -(order << 1);
-
- v1 += order;
- v2 += order;
- sh = shift;
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7 \n\t"
- "1: \n\t"
- "movdqu (%0,%3), %%xmm0 \n\t"
- "movdqu 16(%0,%3), %%xmm1 \n\t"
- "pmaddwd (%1,%3), %%xmm0 \n\t"
- "pmaddwd 16(%1,%3), %%xmm1 \n\t"
- "paddd %%xmm0, %%xmm7 \n\t"
- "paddd %%xmm1, %%xmm7 \n\t"
- "add $32, %3 \n\t"
- "js 1b \n\t"
- "movhlps %%xmm7, %%xmm2 \n\t"
- "paddd %%xmm2, %%xmm7 \n\t"
- "psrad %4, %%xmm7 \n\t"
- "pshuflw $0x4E, %%xmm7,%%xmm2 \n\t"
- "paddd %%xmm2, %%xmm7 \n\t"
- "movd %%xmm7, %2 \n\t"
- : "+r"(v1), "+r"(v2), "=r"(res), "+r"(o)
- : "m"(sh)
- );
- return res;
-}
-
-void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
-{
- mm_flags = mm_support();
-
- if (avctx->dsp_mask) {
- if (avctx->dsp_mask & FF_MM_FORCE)
- mm_flags |= (avctx->dsp_mask & 0xffff);
- else
- mm_flags &= ~(avctx->dsp_mask & 0xffff);
- }
-
-#if 0
- av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
- if (mm_flags & FF_MM_MMX)
- av_log(avctx, AV_LOG_INFO, " mmx");
- if (mm_flags & FF_MM_MMXEXT)
- av_log(avctx, AV_LOG_INFO, " mmxext");
- if (mm_flags & FF_MM_3DNOW)
- av_log(avctx, AV_LOG_INFO, " 3dnow");
- if (mm_flags & FF_MM_SSE)
- av_log(avctx, AV_LOG_INFO, " sse");
- if (mm_flags & FF_MM_SSE2)
- av_log(avctx, AV_LOG_INFO, " sse2");
- av_log(avctx, AV_LOG_INFO, "\n");
-#endif
-
- if (mm_flags & FF_MM_MMX) {
- const int idct_algo= avctx->idct_algo;
-
- if(avctx->lowres==0){
- if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
- c->idct_put= ff_simple_idct_put_mmx;
- c->idct_add= ff_simple_idct_add_mmx;
- c->idct = ff_simple_idct_mmx;
- c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
-#ifdef CONFIG_GPL
- }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
- if(mm_flags & FF_MM_MMXEXT){
- c->idct_put= ff_libmpeg2mmx2_idct_put;
- c->idct_add= ff_libmpeg2mmx2_idct_add;
- c->idct = ff_mmxext_idct;
- }else{
- c->idct_put= ff_libmpeg2mmx_idct_put;
- c->idct_add= ff_libmpeg2mmx_idct_add;
- c->idct = ff_mmx_idct;
- }
- c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-#endif
- }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) &&
- idct_algo==FF_IDCT_VP3){
- if(mm_flags & FF_MM_SSE2){
- c->idct_put= ff_vp3_idct_put_sse2;
- c->idct_add= ff_vp3_idct_add_sse2;
- c->idct = ff_vp3_idct_sse2;
- c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
- }else{
- c->idct_put= ff_vp3_idct_put_mmx;
- c->idct_add= ff_vp3_idct_add_mmx;
- c->idct = ff_vp3_idct_mmx;
- c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
- }
- }else if(idct_algo==FF_IDCT_CAVS){
- c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
- }else if(idct_algo==FF_IDCT_XVIDMMX){
- if(mm_flags & FF_MM_SSE2){
- c->idct_put= ff_idct_xvid_sse2_put;
- c->idct_add= ff_idct_xvid_sse2_add;
- c->idct = ff_idct_xvid_sse2;
- c->idct_permutation_type= FF_SSE2_IDCT_PERM;
- }else if(mm_flags & FF_MM_MMXEXT){
- c->idct_put= ff_idct_xvid_mmx2_put;
- c->idct_add= ff_idct_xvid_mmx2_add;
- c->idct = ff_idct_xvid_mmx2;
- }else{
- c->idct_put= ff_idct_xvid_mmx_put;
- c->idct_add= ff_idct_xvid_mmx_add;
- c->idct = ff_idct_xvid_mmx;
- }
- }
- }
-
- c->put_pixels_clamped = put_pixels_clamped_mmx;
- c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
- c->add_pixels_clamped = add_pixels_clamped_mmx;
- c->clear_blocks = clear_blocks_mmx;
-
-#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
- c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
- c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
-
- SET_HPEL_FUNCS(put, 0, 16, mmx);
- SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
- SET_HPEL_FUNCS(avg, 0, 16, mmx);
- SET_HPEL_FUNCS(avg_no_rnd, 0, 16, mmx);
- SET_HPEL_FUNCS(put, 1, 8, mmx);
- SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
- SET_HPEL_FUNCS(avg, 1, 8, mmx);
- SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
-
- c->gmc= gmc_mmx;
-
- c->add_bytes= add_bytes_mmx;
- c->add_bytes_l2= add_bytes_l2_mmx;
-
- c->draw_edges = draw_edges_mmx;
-
- if (ENABLE_ANY_H263) {
- c->h263_v_loop_filter= h263_v_loop_filter_mmx;
- c->h263_h_loop_filter= h263_h_loop_filter_mmx;
- }
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
- c->put_no_rnd_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_nornd;
-
- c->h264_idct_dc_add=
- c->h264_idct_add= ff_h264_idct_add_mmx;
- c->h264_idct8_dc_add=
- c->h264_idct8_add= ff_h264_idct8_add_mmx;
- if (mm_flags & FF_MM_SSE2)
- c->h264_idct8_add= ff_h264_idct8_add_sse2;
-
- if (mm_flags & FF_MM_MMXEXT) {
- c->prefetch = prefetch_mmx2;
-
- c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
- c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
-
- c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
- c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
-
- c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
- c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
- c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
-
- c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
- c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
-
- if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
- c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
-
- if (ENABLE_VP3_DECODER || ENABLE_THEORA_DECODER) {
- c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
- c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
- }
- }
-
-#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
- c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
- c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
-
- SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
-
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
-
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
-
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
- c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
- c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
- c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
- c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
- c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
- c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
- c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
-
- c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
- c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
- c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
- c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
- c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
- c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
- c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
- c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
-
- c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
- c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
- c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
- c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
- c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
- c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
- c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
- c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
-
- if (ENABLE_CAVS_DECODER)
- ff_cavsdsp_init_mmx2(c, avctx);
-
- if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
- ff_vc1dsp_init_mmx(c, avctx);
-
- c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
- } else if (mm_flags & FF_MM_3DNOW) {
- c->prefetch = prefetch_3dnow;
-
- c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
- c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
-
- c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
- c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
- c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
-
- c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
- c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
-
- c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
- c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
- c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
-
- if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
- c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
- c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
- c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
- c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
- c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
- }
-
- SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
-
- SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
-
- SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
- SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
-
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
-
- if (ENABLE_CAVS_DECODER)
- ff_cavsdsp_init_3dnow(c, avctx);
- }
-
-
-#define H264_QPEL_FUNCS(x, y, CPU)\
- c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
- c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
- c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
- c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
- if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
- // these functions are slower than mmx on AMD, but faster on Intel
-/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
- c->put_pixels_tab[0][0] = put_pixels16_sse2;
- c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
-*/
- H264_QPEL_FUNCS(0, 0, sse2);
- }
- if(mm_flags & FF_MM_SSE2){
- H264_QPEL_FUNCS(0, 1, sse2);
- H264_QPEL_FUNCS(0, 2, sse2);
- H264_QPEL_FUNCS(0, 3, sse2);
- H264_QPEL_FUNCS(1, 1, sse2);
- H264_QPEL_FUNCS(1, 2, sse2);
- H264_QPEL_FUNCS(1, 3, sse2);
- H264_QPEL_FUNCS(2, 1, sse2);
- H264_QPEL_FUNCS(2, 2, sse2);
- H264_QPEL_FUNCS(2, 3, sse2);
- H264_QPEL_FUNCS(3, 1, sse2);
- H264_QPEL_FUNCS(3, 2, sse2);
- H264_QPEL_FUNCS(3, 3, sse2);
- }
-#ifdef HAVE_SSSE3
- if(mm_flags & FF_MM_SSSE3){
- H264_QPEL_FUNCS(1, 0, ssse3);
- H264_QPEL_FUNCS(1, 1, ssse3);
- H264_QPEL_FUNCS(1, 2, ssse3);
- H264_QPEL_FUNCS(1, 3, ssse3);
- H264_QPEL_FUNCS(2, 0, ssse3);
- H264_QPEL_FUNCS(2, 1, ssse3);
- H264_QPEL_FUNCS(2, 2, ssse3);
- H264_QPEL_FUNCS(2, 3, ssse3);
- H264_QPEL_FUNCS(3, 0, ssse3);
- H264_QPEL_FUNCS(3, 1, ssse3);
- H264_QPEL_FUNCS(3, 2, ssse3);
- H264_QPEL_FUNCS(3, 3, ssse3);
- c->put_no_rnd_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_nornd;
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
- c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
- }
-#endif
-
-#ifdef CONFIG_SNOW_DECODER
- if(mm_flags & FF_MM_SSE2 & 0){
- c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
-#ifdef HAVE_7REGS
- c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
-#endif
- c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
- }
- else{
- if(mm_flags & FF_MM_MMXEXT){
- c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
-#ifdef HAVE_7REGS
- c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
-#endif
- }
- c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
- }
-#endif
-
- if(mm_flags & FF_MM_3DNOW){
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
- c->vector_fmul = vector_fmul_3dnow;
- if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->float_to_int16 = float_to_int16_3dnow;
- c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
- }
- }
- if(mm_flags & FF_MM_3DNOWEXT){
- c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
- c->vector_fmul_window = vector_fmul_window_3dnow2;
- if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
- c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
- }
- }
- if(mm_flags & FF_MM_SSE){
- c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
- c->ac3_downmix = ac3_downmix_sse;
- c->vector_fmul = vector_fmul_sse;
- c->vector_fmul_reverse = vector_fmul_reverse_sse;
- c->vector_fmul_add_add = vector_fmul_add_add_sse;
- c->vector_fmul_window = vector_fmul_window_sse;
- c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
- c->float_to_int16 = float_to_int16_sse;
- c->float_to_int16_interleave = float_to_int16_interleave_sse;
- }
- if(mm_flags & FF_MM_3DNOW)
- c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
- if(mm_flags & FF_MM_SSE2){
- c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
- c->float_to_int16 = float_to_int16_sse2;
- c->float_to_int16_interleave = float_to_int16_interleave_sse2;
- c->add_int16 = add_int16_sse2;
- c->sub_int16 = sub_int16_sse2;
- c->scalarproduct_int16 = scalarproduct_int16_sse2;
- }
- }
-
- if (ENABLE_ENCODERS)
- dsputilenc_init_mmx(c, avctx);
-
-#if 0
- // for speed testing
- get_pixels = just_return;
- put_pixels_clamped = just_return;
- add_pixels_clamped = just_return;
-
- pix_abs16x16 = just_return;
- pix_abs16x16_x2 = just_return;
- pix_abs16x16_y2 = just_return;
- pix_abs16x16_xy2 = just_return;
-
- put_pixels_tab[0] = just_return;
- put_pixels_tab[1] = just_return;
- put_pixels_tab[2] = just_return;
- put_pixels_tab[3] = just_return;
-
- put_no_rnd_pixels_tab[0] = just_return;
- put_no_rnd_pixels_tab[1] = just_return;
- put_no_rnd_pixels_tab[2] = just_return;
- put_no_rnd_pixels_tab[3] = just_return;
-
- avg_pixels_tab[0] = just_return;
- avg_pixels_tab[1] = just_return;
- avg_pixels_tab[2] = just_return;
- avg_pixels_tab[3] = just_return;
-
- avg_no_rnd_pixels_tab[0] = just_return;
- avg_no_rnd_pixels_tab[1] = just_return;
- avg_no_rnd_pixels_tab[2] = just_return;
- avg_no_rnd_pixels_tab[3] = just_return;
-
- //av_fdct = just_return;
- //ff_idct = just_return;
-#endif
-}
diff --git a/libavcodec/i386/dsputil_mmx.h b/libavcodec/i386/dsputil_mmx.h
deleted file mode 100644
index 6c056f7..0000000
--- a/libavcodec/i386/dsputil_mmx.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * MMX optimized DSP utils
- * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_I386_DSPUTIL_MMX_H
-#define AVCODEC_I386_DSPUTIL_MMX_H
-
-#include <stdint.h>
-#include "libavcodec/dsputil.h"
-
-typedef struct { uint64_t a, b; } xmm_t;
-
-extern const uint64_t ff_bone;
-extern const uint64_t ff_wtwo;
-
-extern const uint64_t ff_pdw_80000000[2];
-
-extern const uint64_t ff_pw_3;
-extern const uint64_t ff_pw_4;
-extern const xmm_t ff_pw_5;
-extern const xmm_t ff_pw_8;
-extern const uint64_t ff_pw_15;
-extern const xmm_t ff_pw_16;
-extern const uint64_t ff_pw_20;
-extern const xmm_t ff_pw_28;
-extern const xmm_t ff_pw_32;
-extern const uint64_t ff_pw_42;
-extern const uint64_t ff_pw_64;
-extern const uint64_t ff_pw_96;
-extern const uint64_t ff_pw_128;
-extern const uint64_t ff_pw_255;
-
-extern const uint64_t ff_pb_1;
-extern const uint64_t ff_pb_3;
-extern const uint64_t ff_pb_7;
-extern const uint64_t ff_pb_1F;
-extern const uint64_t ff_pb_3F;
-extern const uint64_t ff_pb_81;
-extern const uint64_t ff_pb_A1;
-extern const uint64_t ff_pb_FC;
-
-extern const double ff_pd_1[2];
-extern const double ff_pd_2[2];
-
-#define LOAD4(stride,in,a,b,c,d)\
- "movq 0*"#stride"+"#in", "#a"\n\t"\
- "movq 1*"#stride"+"#in", "#b"\n\t"\
- "movq 2*"#stride"+"#in", "#c"\n\t"\
- "movq 3*"#stride"+"#in", "#d"\n\t"
-
-#define STORE4(stride,out,a,b,c,d)\
- "movq "#a", 0*"#stride"+"#out"\n\t"\
- "movq "#b", 1*"#stride"+"#out"\n\t"\
- "movq "#c", 2*"#stride"+"#out"\n\t"\
- "movq "#d", 3*"#stride"+"#out"\n\t"
-
-/* in/out: mma=mma+mmb, mmb=mmb-mma */
-#define SUMSUB_BA( a, b ) \
- "paddw "#b", "#a" \n\t"\
- "paddw "#b", "#b" \n\t"\
- "psubw "#a", "#b" \n\t"
-
-#define SBUTTERFLY(a,b,t,n,m)\
- "mov" #m " " #a ", " #t " \n\t" /* abcd */\
- "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
- "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
-
-#define TRANSPOSE4(a,b,c,d,t)\
- SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
- SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
- SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
- SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
-
-// e,f,g,h can be memory
-// out: a,d,t,c
-#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
- "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\
- "punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\
- "punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\
- "punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\
- SBUTTERFLY(a, b, t, bw, q) /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\
- /* t= a2 b2 e2 f2 a3 b3 e3 f3 */\
- SBUTTERFLY(c, d, b, bw, q) /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\
- /* b= c2 d2 g2 h2 c3 d3 g3 h3 */\
- SBUTTERFLY(a, c, d, wd, q) /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\
- /* d= a1 b1 c1 d1 e1 f1 g1 h1 */\
- SBUTTERFLY(t, b, c, wd, q) /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\
- /* c= a3 b3 c3 d3 e3 f3 g3 h3 */
-
-#ifdef ARCH_X86_64
-// permutes 01234567 -> 05736421
-#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
- SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
- SBUTTERFLY(c,d,b,wd,dqa)\
- SBUTTERFLY(e,f,d,wd,dqa)\
- SBUTTERFLY(g,h,f,wd,dqa)\
- SBUTTERFLY(a,c,h,dq,dqa)\
- SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
- SBUTTERFLY(e,g,b,dq,dqa)\
- SBUTTERFLY(d,f,g,dq,dqa)\
- SBUTTERFLY(a,e,f,qdq,dqa)\
- SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
- SBUTTERFLY(h,b,d,qdq,dqa)\
- SBUTTERFLY(c,g,b,qdq,dqa)\
- "movdqa %%xmm8, "#g" \n\t"
-#else
-#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
- "movdqa "#h", "#t" \n\t"\
- SBUTTERFLY(a,b,h,wd,dqa)\
- "movdqa "#h", 16"#t" \n\t"\
- "movdqa "#t", "#h" \n\t"\
- SBUTTERFLY(c,d,b,wd,dqa)\
- SBUTTERFLY(e,f,d,wd,dqa)\
- SBUTTERFLY(g,h,f,wd,dqa)\
- SBUTTERFLY(a,c,h,dq,dqa)\
- "movdqa "#h", "#t" \n\t"\
- "movdqa 16"#t", "#h" \n\t"\
- SBUTTERFLY(h,b,c,dq,dqa)\
- SBUTTERFLY(e,g,b,dq,dqa)\
- SBUTTERFLY(d,f,g,dq,dqa)\
- SBUTTERFLY(a,e,f,qdq,dqa)\
- SBUTTERFLY(h,d,e,qdq,dqa)\
- "movdqa "#h", 16"#t" \n\t"\
- "movdqa "#t", "#h" \n\t"\
- SBUTTERFLY(h,b,d,qdq,dqa)\
- SBUTTERFLY(c,g,b,qdq,dqa)\
- "movdqa 16"#t", "#g" \n\t"
-#endif
-
-#define MOVQ_WONE(regd) \
- __asm__ volatile ( \
- "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
- "psrlw $15, %%" #regd ::)
-
-void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
-
-#endif /* AVCODEC_I386_DSPUTIL_MMX_H */
diff --git a/libavcodec/i386/dsputil_mmx_avg_template.c b/libavcodec/i386/dsputil_mmx_avg_template.c
deleted file mode 100644
index a3f2068..0000000
--- a/libavcodec/i386/dsputil_mmx_avg_template.c
+++ /dev/null
@@ -1,900 +0,0 @@
-/*
- * DSP utils : average functions are compiled twice for 3dnow/mmx2
- * Copyright (c) 2000, 2001 Fabrice Bellard.
- * Copyright (c) 2002-2004 Michael Niedermayer
- *
- * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
- * mostly rewritten by Michael Niedermayer <michaelni at gmx.at>
- * and improved by Zdenek Kabelac <kabi at users.sf.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* This header intentionally has no multiple inclusion guards. It is meant to
- * be included multiple times and generates different code depending on the
- * value of certain #defines. */
-
-/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
- clobber bug - now it will work with 2.95.2 and also with -fPIC
- */
-static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- "add %%"REG_a", %1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $4, %2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movd (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movd (%1), %%mm1 \n\t"
- "movd (%2), %%mm2 \n\t"
- "movd 4(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movd %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "movd (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movd (%1), %%mm1 \n\t"
- "movd 8(%2), %%mm2 \n\t"
- "movd 12(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movd %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "add $16, %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-}
-
-
-static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $8, %2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" 16(%2), %%mm0 \n\t"
- PAVGB" 24(%2), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "pcmpeqb %%mm6, %%mm6 \n\t"
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $8, %2 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq (%2), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "pxor %%mm6, %%mm2 \n\t"
- "pxor %%mm6, %%mm3 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq 16(%2), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "pxor %%mm6, %%mm2 \n\t"
- "pxor %%mm6, %%mm3 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movd (%1), %%mm0 \n\t"
- "movd (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $4, %2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movd (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movd (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 4(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- PAVGB" (%3), %%mm1 \n\t"
- "movd %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "movd (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movd (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" 8(%2), %%mm0 \n\t"
- PAVGB" 12(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movd %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- PAVGB" (%3), %%mm1 \n\t"
- "movd %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "add $16, %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-}
-
-
-static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $8, %2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- PAVGB" (%3), %%mm1 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" 16(%2), %%mm0 \n\t"
- PAVGB" 24(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- "movq %%mm0, (%3) \n\t"
- "add %5, %3 \n\t"
- PAVGB" (%3), %%mm1 \n\t"
- "movq %%mm1, (%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1), %%mm2 \n\t"
- "movq 8(%1, %3), %%mm3 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- PAVGB" 9(%1), %%mm2 \n\t"
- PAVGB" 9(%1, %3), %%mm3 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm2, 8(%2) \n\t"
- "movq %%mm3, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq 8(%1), %%mm2 \n\t"
- "movq 8(%1, %3), %%mm3 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- PAVGB" 9(%1), %%mm2 \n\t"
- PAVGB" 9(%1, %3), %%mm3 \n\t"
- "add %%"REG_a", %1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq %%mm2, 8(%2) \n\t"
- "movq %%mm3, 8(%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $16, %2 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" 16(%2), %%mm0 \n\t"
- PAVGB" 24(%2), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $16, %2 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- PAVGB" 8(%3), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" 8(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- PAVGB" 8(%3), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- PAVGB" 16(%2), %%mm0 \n\t"
- PAVGB" 24(%2), %%mm1 \n\t"
- PAVGB" (%3), %%mm0 \n\t"
- PAVGB" 8(%3), %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- __asm__ volatile(
- "pcmpeqb %%mm6, %%mm6 \n\t"
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "movq (%2), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "pxor %%mm6, %%mm2 \n\t"
- "pxor %%mm6, %%mm3 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $16, %2 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq (%2), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "pxor %%mm6, %%mm2 \n\t"
- "pxor %%mm6, %%mm3 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 8(%1), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq 16(%2), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "pxor %%mm6, %%mm2 \n\t"
- "pxor %%mm6, %%mm3 \n\t"
- PAVGB" %%mm2, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm1 \n\t"
- "pxor %%mm6, %%mm0 \n\t"
- "pxor %%mm6, %%mm1 \n\t"
- "movq %%mm0, (%3) \n\t"
- "movq %%mm1, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-//the following should be used, though better not with gcc ...
-/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
- :"r"(src1Stride), "r"(dstStride)
- :"memory");*/
-}
-
-/* GL: this function does incorrect rounding if overflow */
-static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BONE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- "add %%"REG_a", %1 \n\t"
- "psubusb %%mm6, %%mm0 \n\t"
- "psubusb %%mm6, %%mm2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm2 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- "psubusb %%mm6, %%mm0 \n\t"
- "psubusb %%mm6, %%mm2 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm3, %%mm2 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- "sub %3, %2 \n\t"
- "1: \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" %%mm1, %%mm2 \n\t"
- PAVGB" %%mm0, %%mm1 \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D" (block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-/* GL: this function does incorrect rounding if overflow */
-static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BONE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- "sub %3, %2 \n\t"
- "1: \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm2 \n\t"
- "add %%"REG_a", %1 \n\t"
- "psubusb %%mm6, %%mm1 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm2, %%mm1 \n\t"
- "movq %%mm0, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- "psubusb %%mm6, %%mm1 \n\t"
- PAVGB" %%mm1, %%mm2 \n\t"
- PAVGB" %%mm0, %%mm1 \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D" (block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%2), %%mm0 \n\t"
- "movq (%2, %3), %%mm1 \n\t"
- PAVGB" (%1), %%mm0 \n\t"
- PAVGB" (%1, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%2), %%mm0 \n\t"
- "movq (%2, %3), %%mm1 \n\t"
- PAVGB" (%1), %%mm0 \n\t"
- PAVGB" (%1, %3), %%mm1 \n\t"
- "add %%"REG_a", %1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm2 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" (%2, %3), %%mm2 \n\t"
- "add %%"REG_a", %1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm2 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" (%2, %3), %%mm2 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- "sub %3, %2 \n\t"
- "1: \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm2, %%mm1 \n\t"
- "movq (%2, %3), %%mm3 \n\t"
- "movq (%2, %%"REG_a"), %%mm4 \n\t"
- PAVGB" %%mm3, %%mm0 \n\t"
- PAVGB" %%mm4, %%mm1 \n\t"
- "movq %%mm0, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- PAVGB" %%mm1, %%mm2 \n\t"
- PAVGB" %%mm0, %%mm1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- "movq (%2, %3), %%mm3 \n\t"
- "movq (%2, %%"REG_a"), %%mm4 \n\t"
- PAVGB" %%mm3, %%mm2 \n\t"
- PAVGB" %%mm4, %%mm1 \n\t"
- "movq %%mm2, (%2, %3) \n\t"
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-/* Note this is not correctly rounded, but this function is only
- * used for B-frames so it does not matter. */
-static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BONE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- PAVGB" 1(%1), %%mm0 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1, %%"REG_a"), %%mm2 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "psubusb %%mm6, %%mm2 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" %%mm1, %%mm0 \n\t"
- PAVGB" %%mm2, %%mm1 \n\t"
- PAVGB" (%2), %%mm0 \n\t"
- PAVGB" (%2, %3), %%mm1 \n\t"
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- PAVGB" 1(%1, %3), %%mm1 \n\t"
- PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t"
- "add %%"REG_a", %2 \n\t"
- "add %%"REG_a", %1 \n\t"
- PAVGB" %%mm1, %%mm2 \n\t"
- PAVGB" %%mm0, %%mm1 \n\t"
- PAVGB" (%2), %%mm2 \n\t"
- PAVGB" (%2, %3), %%mm1 \n\t"
- "movq %%mm2, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r" ((x86_reg)line_size)
- :"%"REG_a, "memory");
-}
-
-static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- do {
- __asm__ volatile(
- "movd (%1), %%mm0 \n\t"
- "movd (%1, %2), %%mm1 \n\t"
- "movd (%1, %2, 2), %%mm2 \n\t"
- "movd (%1, %3), %%mm3 \n\t"
- PAVGB" (%0), %%mm0 \n\t"
- PAVGB" (%0, %2), %%mm1 \n\t"
- PAVGB" (%0, %2, 2), %%mm2 \n\t"
- PAVGB" (%0, %3), %%mm3 \n\t"
- "movd %%mm0, (%1) \n\t"
- "movd %%mm1, (%1, %2) \n\t"
- "movd %%mm2, (%1, %2, 2) \n\t"
- "movd %%mm3, (%1, %3) \n\t"
- ::"S"(pixels), "D"(block),
- "r" ((x86_reg)line_size), "r"((x86_reg)3L*line_size)
- :"memory");
- block += 4*line_size;
- pixels += 4*line_size;
- h -= 4;
- } while(h > 0);
-}
-
-//FIXME the following could be optimized too ...
-static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
- DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
-}
-static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(put_pixels8_y2)(block , pixels , line_size, h);
- DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
-}
-static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
- DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
-}
-static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg_pixels8)(block , pixels , line_size, h);
- DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
-}
-static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg_pixels8_x2)(block , pixels , line_size, h);
- DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
-}
-static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg_pixels8_y2)(block , pixels , line_size, h);
- DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
-}
-static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
- DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
-}
-
-#define QPEL_2TAP_L3(OPNAME) \
-static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%1,%2), %%mm0 \n\t"\
- "movq 8(%1,%2), %%mm1 \n\t"\
- PAVGB" (%1,%3), %%mm0 \n\t"\
- PAVGB" 8(%1,%3), %%mm1 \n\t"\
- PAVGB" (%1), %%mm0 \n\t"\
- PAVGB" 8(%1), %%mm1 \n\t"\
- STORE_OP( (%1,%4),%%mm0)\
- STORE_OP(8(%1,%4),%%mm1)\
- "movq %%mm0, (%1,%4) \n\t"\
- "movq %%mm1, 8(%1,%4) \n\t"\
- "add %5, %1 \n\t"\
- "decl %0 \n\t"\
- "jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
- :"r"((x86_reg)off1), "r"((x86_reg)off2),\
- "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
- :"memory"\
- );\
-}\
-static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%1,%2), %%mm0 \n\t"\
- PAVGB" (%1,%3), %%mm0 \n\t"\
- PAVGB" (%1), %%mm0 \n\t"\
- STORE_OP((%1,%4),%%mm0)\
- "movq %%mm0, (%1,%4) \n\t"\
- "add %5, %1 \n\t"\
- "decl %0 \n\t"\
- "jnz 1b \n\t"\
- :"+g"(h), "+r"(src)\
- :"r"((x86_reg)off1), "r"((x86_reg)off2),\
- "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
- :"memory"\
- );\
-}
-
-#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
-QPEL_2TAP_L3(avg_)
-#undef STORE_OP
-#define STORE_OP(a,b)
-QPEL_2TAP_L3(put_)
-#undef STORE_OP
-#undef QPEL_2TAP_L3
diff --git a/libavcodec/i386/dsputil_mmx_qns_template.c b/libavcodec/i386/dsputil_mmx_qns_template.c
deleted file mode 100644
index 1f484e7..0000000
--- a/libavcodec/i386/dsputil_mmx_qns_template.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
- * Copyright (c) 2004 Michael Niedermayer
- *
- * MMX optimization by Michael Niedermayer <michaelni at gmx.at>
- * 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng at gmail.com>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* This header intentionally has no multiple inclusion guards. It is meant to
- * be included multiple times and generates different code depending on the
- * value of certain #defines. */
-
-#define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0))
-
-static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
-{
- x86_reg i=0;
-
- assert(FFABS(scale) < MAX_ABS);
- scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
-
- SET_RND(mm6);
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movd %4, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq 8(%1, %0), %%mm1 \n\t"
- PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
- "paddw (%2, %0), %%mm0 \n\t"
- "paddw 8(%2, %0), %%mm1 \n\t"
- "psraw $6, %%mm0 \n\t"
- "psraw $6, %%mm1 \n\t"
- "pmullw (%3, %0), %%mm0 \n\t"
- "pmullw 8(%3, %0), %%mm1 \n\t"
- "pmaddwd %%mm0, %%mm0 \n\t"
- "pmaddwd %%mm1, %%mm1 \n\t"
- "paddd %%mm1, %%mm0 \n\t"
- "psrld $4, %%mm0 \n\t"
- "paddd %%mm0, %%mm7 \n\t"
- "add $16, %0 \n\t"
- "cmp $128, %0 \n\t" //FIXME optimize & bench
- " jb 1b \n\t"
- PHADDD(%%mm7, %%mm6)
- "psrld $2, %%mm7 \n\t"
- "movd %%mm7, %0 \n\t"
-
- : "+r" (i)
- : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
- );
- return i;
-}
-
-static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
-{
- x86_reg i=0;
-
- if(FFABS(scale) < MAX_ABS){
- scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
- SET_RND(mm6);
- __asm__ volatile(
- "movd %3, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- "punpcklwd %%mm5, %%mm5 \n\t"
- ASMALIGN(4)
- "1: \n\t"
- "movq (%1, %0), %%mm0 \n\t"
- "movq 8(%1, %0), %%mm1 \n\t"
- PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
- "paddw (%2, %0), %%mm0 \n\t"
- "paddw 8(%2, %0), %%mm1 \n\t"
- "movq %%mm0, (%2, %0) \n\t"
- "movq %%mm1, 8(%2, %0) \n\t"
- "add $16, %0 \n\t"
- "cmp $128, %0 \n\t" // FIXME optimize & bench
- " jb 1b \n\t"
-
- : "+r" (i)
- : "r"(basis), "r"(rem), "g"(scale)
- );
- }else{
- for(i=0; i<8*8; i++){
- rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
- }
- }
-}
diff --git a/libavcodec/i386/dsputil_mmx_rnd_template.c b/libavcodec/i386/dsputil_mmx_rnd_template.c
deleted file mode 100644
index 5ef06da..0000000
--- a/libavcodec/i386/dsputil_mmx_rnd_template.c
+++ /dev/null
@@ -1,594 +0,0 @@
-/*
- * DSP utils mmx functions are compiled twice for rnd/no_rnd
- * Copyright (c) 2000, 2001 Fabrice Bellard.
- * Copyright (c) 2003-2004 Michael Niedermayer <michaelni at gmx.at>
- *
- * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
- * mostly rewritten by Michael Niedermayer <michaelni at gmx.at>
- * and improved by Zdenek Kabelac <kabi at users.sf.net>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/* This header intentionally has no multiple inclusion guards. It is meant to
- * be included multiple times and generates different code depending on the
- * value of certain #defines. */
-
-// put_pixels
-static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "add $8, %2 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
- "movq %%mm4, (%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm5, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 16(%2), %%mm1 \n\t"
- "add %4, %1 \n\t"
- "movq (%1), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- "add $32, %2 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%3) \n\t"
- "add %5, %3 \n\t"
- "movq %%mm5, (%3) \n\t"
- "add %5, %3 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-}
-
-static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 9(%1), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm2 \n\t"
- "movq 9(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm1 \n\t"
- "movq (%1, %3), %%mm2 \n\t"
- "movq 1(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "movq 8(%1), %%mm0 \n\t"
- "movq 9(%1), %%mm1 \n\t"
- "movq 8(%1, %3), %%mm2 \n\t"
- "movq 9(%1, %3), %%mm3 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, 8(%2) \n\t"
- "movq %%mm5, 8(%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "testl $1, %0 \n\t"
- " jz 1f \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 8(%1), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- "add $16, %2 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%3) \n\t"
- "movq %%mm5, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "decl %0 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq (%2), %%mm1 \n\t"
- "movq 8(%1), %%mm2 \n\t"
- "movq 8(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%3) \n\t"
- "movq %%mm5, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "movq (%1), %%mm0 \n\t"
- "movq 16(%2), %%mm1 \n\t"
- "movq 8(%1), %%mm2 \n\t"
- "movq 24(%2), %%mm3 \n\t"
- "add %4, %1 \n\t"
- PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
- "movq %%mm4, (%3) \n\t"
- "movq %%mm5, 8(%3) \n\t"
- "add %5, %3 \n\t"
- "add $32, %2 \n\t"
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
-#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
- :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#else
- :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
-#endif
- :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
- :"memory");
-}
-
-static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"),%%mm2 \n\t"
- PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"),%%mm0 \n\t"
- PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
- "movq %%mm4, (%2) \n\t"
- "movq %%mm5, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm4 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddusw %%mm0, %%mm4 \n\t"
- "paddusw %%mm1, %%mm5 \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t"
- "add %3, %1 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm2, %%mm0 \n\t"
- "paddusw %%mm3, %%mm1 \n\t"
- "paddusw %%mm6, %%mm4 \n\t"
- "paddusw %%mm6, %%mm5 \n\t"
- "paddusw %%mm0, %%mm4 \n\t"
- "paddusw %%mm1, %%mm5 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "psrlw $2, %%mm5 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "movq %%mm4, (%2, %%"REG_a") \n\t"
- "add %3, %%"REG_a" \n\t"
-
- "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
- "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm3 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm3, %%mm5 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm6, %%mm1 \n\t"
- "paddusw %%mm4, %%mm0 \n\t"
- "paddusw %%mm5, %%mm1 \n\t"
- "psrlw $2, %%mm0 \n\t"
- "psrlw $2, %%mm1 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "movq %%mm0, (%2, %%"REG_a") \n\t"
- "add %3, %%"REG_a" \n\t"
-
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels)
- :"D"(block), "r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-// avg_pixels
-static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "movd %1, %%mm1 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- "movd %%mm2, %0 \n\t"
- :"+m"(*block)
- :"m"(*pixels)
- :"memory");
- pixels += line_size;
- block += line_size;
- }
- while (--h);
-}
-
-// in case more speed is needed - unroling would certainly help
-static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- "movq %%mm2, %0 \n\t"
- :"+m"(*block)
- :"m"(*pixels)
- :"memory");
- pixels += line_size;
- block += line_size;
- }
- while (--h);
-}
-
-static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- "movq %%mm2, %0 \n\t"
- "movq 8%0, %%mm0 \n\t"
- "movq 8%1, %%mm1 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- "movq %%mm2, 8%0 \n\t"
- :"+m"(*block)
- :"m"(*pixels)
- :"memory");
- pixels += line_size;
- block += line_size;
- }
- while (--h);
-}
-
-static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %1, %%mm0 \n\t"
- "movq 1%1, %%mm1 \n\t"
- "movq %0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, %0 \n\t"
- :"+m"(*block)
- :"m"(*pixels)
- :"memory");
- pixels += line_size;
- block += line_size;
- } while (--h);
-}
-
-static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %1, %%mm0 \n\t"
- "movq %2, %%mm1 \n\t"
- "movq %0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, %0 \n\t"
- :"+m"(*dst)
- :"m"(*src1), "m"(*src2)
- :"memory");
- dst += dstStride;
- src1 += src1Stride;
- src2 += 8;
- } while (--h);
-}
-
-static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %1, %%mm0 \n\t"
- "movq 1%1, %%mm1 \n\t"
- "movq %0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, %0 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 9%1, %%mm1 \n\t"
- "movq 8%0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, 8%0 \n\t"
- :"+m"(*block)
- :"m"(*pixels)
- :"memory");
- pixels += line_size;
- block += line_size;
- } while (--h);
-}
-
-static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
-{
- MOVQ_BFE(mm6);
- JUMPALIGN();
- do {
- __asm__ volatile(
- "movq %1, %%mm0 \n\t"
- "movq %2, %%mm1 \n\t"
- "movq %0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, %0 \n\t"
- "movq 8%1, %%mm0 \n\t"
- "movq 8%2, %%mm1 \n\t"
- "movq 8%0, %%mm3 \n\t"
- PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
- PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
- "movq %%mm0, 8%0 \n\t"
- :"+m"(*dst)
- :"m"(*src1), "m"(*src2)
- :"memory");
- dst += dstStride;
- src1 += src1Stride;
- src2 += 16;
- } while (--h);
-}
-
-static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_BFE(mm6);
- __asm__ volatile(
- "lea (%3, %3), %%"REG_a" \n\t"
- "movq (%1), %%mm0 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm2 \n\t"
- PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
- "movq (%2), %%mm3 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
- "movq (%2, %3), %%mm3 \n\t"
- PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
- "movq %%mm0, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
-
- "movq (%1, %3), %%mm1 \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
- "movq (%2), %%mm3 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
- "movq (%2, %3), %%mm3 \n\t"
- PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
- "movq %%mm2, (%2) \n\t"
- "movq %%mm1, (%2, %3) \n\t"
- "add %%"REG_a", %1 \n\t"
- "add %%"REG_a", %2 \n\t"
-
- "subl $4, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels), "+D"(block)
- :"r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-// this routine is 'slightly' suboptimal but mostly unused
-static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
-{
- MOVQ_ZERO(mm7);
- SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
- __asm__ volatile(
- "movq (%1), %%mm0 \n\t"
- "movq 1(%1), %%mm4 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddusw %%mm0, %%mm4 \n\t"
- "paddusw %%mm1, %%mm5 \n\t"
- "xor %%"REG_a", %%"REG_a" \n\t"
- "add %3, %1 \n\t"
- ASMALIGN(3)
- "1: \n\t"
- "movq (%1, %%"REG_a"), %%mm0 \n\t"
- "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
- "movq %%mm0, %%mm1 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm3 \n\t"
- "paddusw %%mm2, %%mm0 \n\t"
- "paddusw %%mm3, %%mm1 \n\t"
- "paddusw %%mm6, %%mm4 \n\t"
- "paddusw %%mm6, %%mm5 \n\t"
- "paddusw %%mm0, %%mm4 \n\t"
- "paddusw %%mm1, %%mm5 \n\t"
- "psrlw $2, %%mm4 \n\t"
- "psrlw $2, %%mm5 \n\t"
- "movq (%2, %%"REG_a"), %%mm3 \n\t"
- "packuswb %%mm5, %%mm4 \n\t"
- "pcmpeqd %%mm2, %%mm2 \n\t"
- "paddb %%mm2, %%mm2 \n\t"
- PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
- "movq %%mm5, (%2, %%"REG_a") \n\t"
- "add %3, %%"REG_a" \n\t"
-
- "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
- "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
- "movq %%mm2, %%mm3 \n\t"
- "movq %%mm4, %%mm5 \n\t"
- "punpcklbw %%mm7, %%mm2 \n\t"
- "punpcklbw %%mm7, %%mm4 \n\t"
- "punpckhbw %%mm7, %%mm3 \n\t"
- "punpckhbw %%mm7, %%mm5 \n\t"
- "paddusw %%mm2, %%mm4 \n\t"
- "paddusw %%mm3, %%mm5 \n\t"
- "paddusw %%mm6, %%mm0 \n\t"
- "paddusw %%mm6, %%mm1 \n\t"
- "paddusw %%mm4, %%mm0 \n\t"
- "paddusw %%mm5, %%mm1 \n\t"
- "psrlw $2, %%mm0 \n\t"
- "psrlw $2, %%mm1 \n\t"
- "movq (%2, %%"REG_a"), %%mm3 \n\t"
- "packuswb %%mm1, %%mm0 \n\t"
- "pcmpeqd %%mm2, %%mm2 \n\t"
- "paddb %%mm2, %%mm2 \n\t"
- PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
- "movq %%mm1, (%2, %%"REG_a") \n\t"
- "add %3, %%"REG_a" \n\t"
-
- "subl $2, %0 \n\t"
- "jnz 1b \n\t"
- :"+g"(h), "+S"(pixels)
- :"D"(block), "r"((x86_reg)line_size)
- :REG_a, "memory");
-}
-
-//FIXME optimize
-static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(put, pixels8_y2)(block , pixels , line_size, h);
- DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
-}
-
-static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(put, pixels8_xy2)(block , pixels , line_size, h);
- DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
-}
-
-static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg, pixels8_y2)(block , pixels , line_size, h);
- DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
-}
-
-static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
- DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
- DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
-}
diff --git a/libavcodec/i386/dsputil_yasm.asm b/libavcodec/i386/dsputil_yasm.asm
deleted file mode 100644
index 09beb5c..0000000
--- a/libavcodec/i386/dsputil_yasm.asm
+++ /dev/null
@@ -1,92 +0,0 @@
-;******************************************************************************
-;* MMX optimized DSP utils
-;* Copyright (c) 2008 Loren Merritt
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-%include "x86inc.asm"
-
-section .text align=16
-
-%macro PSWAPD_SSE 2
- pshufw %1, %2, 0x4e
-%endmacro
-%macro PSWAPD_3DN1 2
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endmacro
-
-%macro FLOAT_TO_INT16_INTERLEAVE6 1
-; void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
-cglobal ff_float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5
-%ifdef ARCH_X86_64
- %define lend r10d
- mov lend, r2d
-%else
- %define lend dword r2m
-%endif
- mov src1q, [srcq+1*gprsize]
- mov src2q, [srcq+2*gprsize]
- mov src3q, [srcq+3*gprsize]
- mov src4q, [srcq+4*gprsize]
- mov src5q, [srcq+5*gprsize]
- mov srcq, [srcq]
- sub src1q, srcq
- sub src2q, srcq
- sub src3q, srcq
- sub src4q, srcq
- sub src5q, srcq
-.loop:
- cvtps2pi mm0, [srcq]
- cvtps2pi mm1, [srcq+src1q]
- cvtps2pi mm2, [srcq+src2q]
- cvtps2pi mm3, [srcq+src3q]
- cvtps2pi mm4, [srcq+src4q]
- cvtps2pi mm5, [srcq+src5q]
- packssdw mm0, mm3
- packssdw mm1, mm4
- packssdw mm2, mm5
- pswapd mm3, mm0
- punpcklwd mm0, mm1
- punpckhwd mm1, mm2
- punpcklwd mm2, mm3
- pswapd mm3, mm0
- punpckldq mm0, mm2
- punpckhdq mm2, mm1
- punpckldq mm1, mm3
- movq [dstq ], mm0
- movq [dstq+16], mm2
- movq [dstq+ 8], mm1
- add srcq, 8
- add dstq, 24
- sub lend, 2
- jg .loop
- emms
- RET
-%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
-
-%define pswapd PSWAPD_SSE
-FLOAT_TO_INT16_INTERLEAVE6 sse
-%define cvtps2pi pf2id
-%define pswapd PSWAPD_3DN1
-FLOAT_TO_INT16_INTERLEAVE6 3dnow
-%undef pswapd
-FLOAT_TO_INT16_INTERLEAVE6 3dn2
-%undef cvtps2pi
-
diff --git a/libavcodec/i386/fft_mmx.asm b/libavcodec/i386/fft_mmx.asm
deleted file mode 100644
index c0a9bd5..0000000
--- a/libavcodec/i386/fft_mmx.asm
+++ /dev/null
@@ -1,467 +0,0 @@
-;******************************************************************************
-;* FFT transform with SSE/3DNow optimizations
-;* Copyright (c) 2008 Loren Merritt
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;******************************************************************************
-
-; These functions are not individually interchangeable with the C versions.
-; While C takes arrays of FFTComplex, SSE/3DNow leave intermediate results
-; in blocks as conventient to the vector size.
-; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively)
-
-%include "x86inc.asm"
-
-SECTION_RODATA
-
-%define M_SQRT1_2 0.70710678118654752440
-ps_root2: times 4 dd M_SQRT1_2
-ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
-ps_m1p1: dd 1<<31, 0
-
-%assign i 16
-%rep 13
-cextern ff_cos_ %+ i
-%assign i i<<1
-%endrep
-
-%ifdef ARCH_X86_64
- %define pointer dq
-%else
- %define pointer dd
-%endif
-
-%macro IF0 1+
-%endmacro
-%macro IF1 1+
- %1
-%endmacro
-
-section .text align=16
-
-%macro T2_3DN 4 ; z0, z1, mem0, mem1
- mova %1, %3
- mova %2, %1
- pfadd %1, %4
- pfsub %2, %4
-%endmacro
-
-%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1
- mova %5, %3
- pfsub %3, %4
- pfadd %5, %4 ; {t6,t5}
- pxor %3, [ps_m1p1 GLOBAL] ; {t8,t7}
- mova %6, %1
- pswapd %3, %3
- pfadd %1, %5 ; {r0,i0}
- pfsub %6, %5 ; {r2,i2}
- mova %4, %2
- pfadd %2, %3 ; {r1,i1}
- pfsub %4, %3 ; {r3,i3}
- SWAP %3, %6
-%endmacro
-
-; in: %1={r0,i0,r1,i1} %2={r2,i2,r3,i3}
-; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
-%macro T4_SSE 3
- mova %3, %1
- shufps %1, %2, 0x64 ; {r0,i0,r3,i2}
- shufps %3, %2, 0xce ; {r1,i1,r2,i3}
- mova %2, %1
- addps %1, %3 ; {t1,t2,t6,t5}
- subps %2, %3 ; {t3,t4,t8,t7}
- mova %3, %1
- shufps %1, %2, 0x44 ; {t1,t2,t3,t4}
- shufps %3, %2, 0xbe ; {t6,t5,t7,t8}
- mova %2, %1
- addps %1, %3 ; {r0,i0,r1,i1}
- subps %2, %3 ; {r2,i2,r3,i3}
- mova %3, %1
- shufps %1, %2, 0x88 ; {r0,r1,r2,r3}
- shufps %3, %2, 0xdd ; {i0,i1,i2,i3}
- SWAP %2, %3
-%endmacro
-
-%macro T8_SSE 6 ; r0,i0,r1,i1,t0,t1
- mova %5, %3
- shufps %3, %4, 0x44 ; {r4,i4,r6,i6}
- shufps %5, %4, 0xee ; {r5,i5,r7,i7}
- mova %6, %3
- subps %3, %5 ; {r5,i5,r7,i7}
- addps %6, %5 ; {t1,t2,t3,t4}
- mova %5, %3
- shufps %5, %5, 0xb1 ; {i5,r5,i7,r7}
- mulps %3, [ps_root2mppm GLOBAL] ; {-r5,i5,r7,-i7}
- mulps %5, [ps_root2 GLOBAL]
- addps %3, %5 ; {t8,t7,ta,t9}
- mova %5, %6
- shufps %6, %3, 0x36 ; {t3,t2,t9,t8}
- shufps %5, %3, 0x9c ; {t1,t4,t7,ta}
- mova %3, %6
- addps %6, %5 ; {t1,t2,t9,ta}
- subps %3, %5 ; {t6,t5,tc,tb}
- mova %5, %6
- shufps %6, %3, 0xd8 ; {t1,t9,t5,tb}
- shufps %5, %3, 0x8d ; {t2,ta,t6,tc}
- mova %3, %1
- mova %4, %2
- addps %1, %6 ; {r0,r1,r2,r3}
- addps %2, %5 ; {i0,i1,i2,i3}
- subps %3, %6 ; {r4,r5,r6,r7}
- subps %4, %5 ; {i4,i5,i6,i7}
-%endmacro
-
-; scheduled for cpu-bound sizes
-%macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
-IF%1 mova m4, Z(4)
-IF%1 mova m5, Z(5)
- mova m0, %2 ; wre
- mova m2, m4
- mova m1, %3 ; wim
- mova m3, m5
- mulps m2, m0 ; r2*wre
-IF%1 mova m6, Z(6)
- mulps m3, m1 ; i2*wim
-IF%1 mova m7, Z(7)
- mulps m4, m1 ; r2*wim
- mulps m5, m0 ; i2*wre
- addps m2, m3 ; r2*wre + i2*wim
- mova m3, m1
- mulps m1, m6 ; r3*wim
- subps m5, m4 ; i2*wre - r2*wim
- mova m4, m0
- mulps m3, m7 ; i3*wim
- mulps m4, m6 ; r3*wre
- mulps m0, m7 ; i3*wre
- subps m4, m3 ; r3*wre - i3*wim
- mova m3, Z(0)
- addps m0, m1 ; i3*wre + r3*wim
- mova m1, m4
- addps m4, m2 ; t5
- subps m1, m2 ; t3
- subps m3, m4 ; r2
- addps m4, Z(0) ; r0
- mova m6, Z(2)
- mova Z(4), m3
- mova Z(0), m4
- mova m3, m5
- subps m5, m0 ; t4
- mova m4, m6
- subps m6, m5 ; r3
- addps m5, m4 ; r1
- mova Z(6), m6
- mova Z(2), m5
- mova m2, Z(3)
- addps m3, m0 ; t6
- subps m2, m1 ; i3
- mova m7, Z(1)
- addps m1, Z(3) ; i1
- mova Z(7), m2
- mova Z(3), m1
- mova m4, m7
- subps m7, m3 ; i2
- addps m3, m4 ; i0
- mova Z(5), m7
- mova Z(1), m3
-%endmacro
-
-; scheduled to avoid store->load aliasing
-%macro PASS_BIG 1 ; (!interleave)
- mova m4, Z(4) ; r2
- mova m5, Z(5) ; i2
- mova m2, m4
- mova m0, [wq] ; wre
- mova m3, m5
- mova m1, [wq+o1q] ; wim
- mulps m2, m0 ; r2*wre
- mova m6, Z(6) ; r3
- mulps m3, m1 ; i2*wim
- mova m7, Z(7) ; i3
- mulps m4, m1 ; r2*wim
- mulps m5, m0 ; i2*wre
- addps m2, m3 ; r2*wre + i2*wim
- mova m3, m1
- mulps m1, m6 ; r3*wim
- subps m5, m4 ; i2*wre - r2*wim
- mova m4, m0
- mulps m3, m7 ; i3*wim
- mulps m4, m6 ; r3*wre
- mulps m0, m7 ; i3*wre
- subps m4, m3 ; r3*wre - i3*wim
- mova m3, Z(0)
- addps m0, m1 ; i3*wre + r3*wim
- mova m1, m4
- addps m4, m2 ; t5
- subps m1, m2 ; t3
- subps m3, m4 ; r2
- addps m4, Z(0) ; r0
- mova m6, Z(2)
- mova Z(4), m3
- mova Z(0), m4
- mova m3, m5
- subps m5, m0 ; t4
- mova m4, m6
- subps m6, m5 ; r3
- addps m5, m4 ; r1
-IF%1 mova Z(6), m6
-IF%1 mova Z(2), m5
- mova m2, Z(3)
- addps m3, m0 ; t6
- subps m2, m1 ; i3
- mova m7, Z(1)
- addps m1, Z(3) ; i1
-IF%1 mova Z(7), m2
-IF%1 mova Z(3), m1
- mova m4, m7
- subps m7, m3 ; i2
- addps m3, m4 ; i0
-IF%1 mova Z(5), m7
-IF%1 mova Z(1), m3
-%if %1==0
- mova m4, m5 ; r1
- mova m0, m6 ; r3
- unpcklps m5, m1
- unpckhps m4, m1
- unpcklps m6, m2
- unpckhps m0, m2
- mova m1, Z(0)
- mova m2, Z(4)
- mova Z(2), m5
- mova Z(3), m4
- mova Z(6), m6
- mova Z(7), m0
- mova m5, m1 ; r0
- mova m4, m2 ; r2
- unpcklps m1, m3
- unpckhps m5, m3
- unpcklps m2, m7
- unpckhps m4, m7
- mova Z(0), m1
- mova Z(1), m5
- mova Z(4), m2
- mova Z(5), m4
-%endif
-%endmacro
-
-%macro PUNPCK 3
- mova %3, %1
- punpckldq %1, %2
- punpckhdq %3, %2
-%endmacro
-
-INIT_XMM
-
-%define Z(x) [r0+mmsize*x]
-
-align 16
-fft4_sse:
- mova m0, Z(0)
- mova m1, Z(1)
- T4_SSE m0, m1, m2
- mova Z(0), m0
- mova Z(1), m1
- ret
-
-align 16
-fft8_sse:
- mova m0, Z(0)
- mova m1, Z(1)
- T4_SSE m0, m1, m2
- mova m2, Z(2)
- mova m3, Z(3)
- T8_SSE m0, m1, m2, m3, m4, m5
- mova Z(0), m0
- mova Z(1), m1
- mova Z(2), m2
- mova Z(3), m3
- ret
-
-align 16
-fft16_sse:
- mova m0, Z(0)
- mova m1, Z(1)
- T4_SSE m0, m1, m2
- mova m2, Z(2)
- mova m3, Z(3)
- T8_SSE m0, m1, m2, m3, m4, m5
- mova m4, Z(4)
- mova m5, Z(5)
- mova Z(0), m0
- mova Z(1), m1
- mova Z(2), m2
- mova Z(3), m3
- T4_SSE m4, m5, m6
- mova m6, Z(6)
- mova m7, Z(7)
- T4_SSE m6, m7, m0
- PASS_SMALL 0, [ff_cos_16 GLOBAL], [ff_cos_16+16 GLOBAL]
- ret
-
-
-INIT_MMX
-
-%macro FFT48_3DN 1
-align 16
-fft4%1:
- T2_3DN m0, m1, Z(0), Z(1)
- mova m2, Z(2)
- mova m3, Z(3)
- T4_3DN m0, m1, m2, m3, m4, m5
- PUNPCK m0, m1, m4
- PUNPCK m2, m3, m5
- mova Z(0), m0
- mova Z(1), m4
- mova Z(2), m2
- mova Z(3), m5
- ret
-
-align 16
-fft8%1:
- T2_3DN m0, m1, Z(0), Z(1)
- mova m2, Z(2)
- mova m3, Z(3)
- T4_3DN m0, m1, m2, m3, m4, m5
- mova Z(0), m0
- mova Z(2), m2
- T2_3DN m4, m5, Z(4), Z(5)
- T2_3DN m6, m7, Z(6), Z(7)
- pswapd m0, m5
- pswapd m2, m7
- pxor m0, [ps_m1p1 GLOBAL]
- pxor m2, [ps_m1p1 GLOBAL]
- pfsub m5, m0
- pfadd m7, m2
- pfmul m5, [ps_root2 GLOBAL]
- pfmul m7, [ps_root2 GLOBAL]
- T4_3DN m1, m3, m5, m7, m0, m2
- mova Z(5), m5
- mova Z(7), m7
- mova m0, Z(0)
- mova m2, Z(2)
- T4_3DN m0, m2, m4, m6, m5, m7
- PUNPCK m0, m1, m5
- PUNPCK m2, m3, m7
- mova Z(0), m0
- mova Z(1), m5
- mova Z(2), m2
- mova Z(3), m7
- PUNPCK m4, Z(5), m5
- PUNPCK m6, Z(7), m7
- mova Z(4), m4
- mova Z(5), m5
- mova Z(6), m6
- mova Z(7), m7
- ret
-%endmacro
-
-FFT48_3DN _3dn2
-
-%macro pswapd 2
-%ifidn %1, %2
- movd [r0+12], %1
- punpckhdq %1, [r0+8]
-%else
- movq %1, %2
- psrlq %1, 32
- punpckldq %1, %2
-%endif
-%endmacro
-
-FFT48_3DN _3dn
-
-
-%define Z(x) [zq + o1q*(x&6)*((x/6)^1) + o3q*(x/6) + mmsize*(x&1)]
-
-%macro DECL_PASS 2+ ; name, payload
-align 16
-%1:
-DEFINE_ARGS z, w, n, o1, o3
- lea o3q, [nq*3]
- lea o1q, [nq*8]
- shl o3q, 4
-.loop:
- %2
- add zq, mmsize*2
- add wq, mmsize
- sub nd, mmsize/8
- jg .loop
- rep ret
-%endmacro
-
-INIT_XMM
-DECL_PASS pass_sse, PASS_BIG 1
-DECL_PASS pass_interleave_sse, PASS_BIG 0
-
-INIT_MMX
-%define mulps pfmul
-%define addps pfadd
-%define subps pfsub
-%define unpcklps punpckldq
-%define unpckhps punpckhdq
-DECL_PASS pass_3dn, PASS_SMALL 1, [wq], [wq+o1q]
-DECL_PASS pass_interleave_3dn, PASS_BIG 0
-%define pass_3dn2 pass_3dn
-%define pass_interleave_3dn2 pass_interleave_3dn
-
-
-%macro DECL_FFT 2-3 ; nbits, cpu, suffix
-%xdefine list_of_fft fft4%2, fft8%2
-%if %1==5
-%xdefine list_of_fft list_of_fft, fft16%2
-%endif
-
-%assign n 1<<%1
-%rep 17-%1
-%assign n2 n/2
-%assign n4 n/4
-%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2
-
-align 16
-fft %+ n %+ %3%2:
- call fft %+ n2 %+ %2
- add r0, n*4 - (n&(-2<<%1))
- call fft %+ n4 %+ %2
- add r0, n*2 - (n2&(-2<<%1))
- call fft %+ n4 %+ %2
- sub r0, n*6 + (n2&(-2<<%1))
- lea r1, [ff_cos_ %+ n GLOBAL]
- mov r2d, n4/2
- jmp pass%3%2
-
-%assign n n*2
-%endrep
-%undef n
-
-align 8
-dispatch_tab%3%2: pointer list_of_fft
-
-; On x86_32, this function does the register saving and restoring for all of fft.
-; The others pass args in registers and don't spill anything.
-cglobal ff_fft_dispatch%3%2, 2,5,0, z, nbits
- lea r2, [dispatch_tab%3%2 GLOBAL]
- mov r2, [r2 + (nbitsq-2)*gprsize]
- call r2
- RET
-%endmacro ; DECL_FFT
-
-DECL_FFT 5, _sse
-DECL_FFT 5, _sse, _interleave
-DECL_FFT 4, _3dn
-DECL_FFT 4, _3dn, _interleave
-DECL_FFT 4, _3dn2
-DECL_FFT 4, _3dn2, _interleave
-
diff --git a/libavcodec/i386/h264_i386.h b/libavcodec/i386/h264_i386.h
deleted file mode 100644
index ed62dd6..0000000
--- a/libavcodec/i386/h264_i386.h
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
- * Copyright (c) 2003 Michael Niedermayer <michaelni at gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/**
- * @file h264_i386.h
- * H.264 / AVC / MPEG4 part10 codec.
- * non-MMX i386-specific optimizations for H.264
- * @author Michael Niedermayer <michaelni at gmx.at>
- */
-
-#ifndef AVCODEC_I386_H264_I386_H
-#define AVCODEC_I386_H264_I386_H
-
-#include "libavcodec/cabac.h"
-
-//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
-//as that would make optimization work hard)
-#if defined(ARCH_X86) && defined(HAVE_7REGS) && \
- defined(HAVE_EBX_AVAILABLE) && \
- !defined(BROKEN_RELOCATIONS)
-static int decode_significance_x86(CABACContext *c, int max_coeff,
- uint8_t *significant_coeff_ctx_base,
- int *index){
- void *end= significant_coeff_ctx_base + max_coeff - 1;
- int minusstart= -(int)significant_coeff_ctx_base;
- int minusindex= 4-(int)index;
- int coeff_count;
- __asm__ volatile(
- "movl "RANGE "(%3), %%esi \n\t"
- "movl "LOW "(%3), %%ebx \n\t"
-
- "2: \n\t"
-
- BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
- "%%bx", "%%esi", "%%eax", "%%al")
-
- "test $1, %%edx \n\t"
- " jz 3f \n\t"
-
- BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
- "%%bx", "%%esi", "%%eax", "%%al")
-
- "mov %2, %%"REG_a" \n\t"
- "movl %4, %%ecx \n\t"
- "add %1, %%"REG_c" \n\t"
- "movl %%ecx, (%%"REG_a") \n\t"
-
- "test $1, %%edx \n\t"
- " jnz 4f \n\t"
-
- "add $4, %%"REG_a" \n\t"
- "mov %%"REG_a", %2 \n\t"
-
- "3: \n\t"
- "add $1, %1 \n\t"
- "cmp %5, %1 \n\t"
- " jb 2b \n\t"
- "mov %2, %%"REG_a" \n\t"
- "movl %4, %%ecx \n\t"
- "add %1, %%"REG_c" \n\t"
- "movl %%ecx, (%%"REG_a") \n\t"
- "4: \n\t"
- "add %6, %%eax \n\t"
- "shr $2, %%eax \n\t"
-
- "movl %%esi, "RANGE "(%3) \n\t"
- "movl %%ebx, "LOW "(%3) \n\t"
- :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
- :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
- : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
- );
- return coeff_count;
-}
-
-static int decode_significance_8x8_x86(CABACContext *c,
- uint8_t *significant_coeff_ctx_base,
- int *index, const uint8_t *sig_off){
- int minusindex= 4-(int)index;
- int coeff_count;
- x86_reg last=0;
- __asm__ volatile(
- "movl "RANGE "(%3), %%esi \n\t"
- "movl "LOW "(%3), %%ebx \n\t"
-
- "mov %1, %%"REG_D" \n\t"
- "2: \n\t"
-
- "mov %6, %%"REG_a" \n\t"
- "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t"
- "add %5, %%"REG_D" \n\t"
-
- BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
- "%%bx", "%%esi", "%%eax", "%%al")
-
- "mov %1, %%edi \n\t"
- "test $1, %%edx \n\t"
- " jz 3f \n\t"
-
- "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
- "add %5, %%"REG_D" \n\t"
-
- BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
- "%%bx", "%%esi", "%%eax", "%%al")
-
- "mov %2, %%"REG_a" \n\t"
- "mov %1, %%edi \n\t"
- "movl %%edi, (%%"REG_a") \n\t"
-
- "test $1, %%edx \n\t"
- " jnz 4f \n\t"
-
- "add $4, %%"REG_a" \n\t"
- "mov %%"REG_a", %2 \n\t"
-
- "3: \n\t"
- "addl $1, %%edi \n\t"
- "mov %%edi, %1 \n\t"
- "cmpl $63, %%edi \n\t"
- " jb 2b \n\t"
- "mov %2, %%"REG_a" \n\t"
- "movl %%edi, (%%"REG_a") \n\t"
- "4: \n\t"
- "addl %4, %%eax \n\t"
- "shr $2, %%eax \n\t"
-
- "movl %%esi, "RANGE "(%3) \n\t"
- "movl %%ebx, "LOW "(%3) \n\t"
- :"=&a"(coeff_count),"+m"(last), "+m"(index)
- :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
- : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
- );
- return coeff_count;
-}
-#endif /* defined(ARCH_X86) && defined(HAVE_7REGS) && */
- /* defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
-
-#endif /* AVCODEC_I386_H264_I386_H */
diff --git a/libavcodec/i386/h264dsp_mmx.c b/libavcodec/i386/h264dsp_mmx.c
deleted file mode 100644
index bb9c82d..0000000
--- a/libavcodec/i386/h264dsp_mmx.c
+++ /dev/null
@@ -1,2113 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "dsputil_mmx.h"
-
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
-DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
-
-/***********************************/
-/* IDCT */
-
-#define SUMSUB_BADC( a, b, c, d ) \
- "paddw "#b", "#a" \n\t"\
- "paddw "#d", "#c" \n\t"\
- "paddw "#b", "#b" \n\t"\
- "paddw "#d", "#d" \n\t"\
- "psubw "#a", "#b" \n\t"\
- "psubw "#c", "#d" \n\t"
-
-#define SUMSUBD2_AB( a, b, t ) \
- "movq "#b", "#t" \n\t"\
- "psraw $1 , "#b" \n\t"\
- "paddw "#a", "#b" \n\t"\
- "psraw $1 , "#a" \n\t"\
- "psubw "#t", "#a" \n\t"
-
-#define IDCT4_1D( s02, s13, d02, d13, t ) \
- SUMSUB_BA ( s02, d02 )\
- SUMSUBD2_AB( s13, d13, t )\
- SUMSUB_BADC( d13, s02, s13, d02 )
-
-#define STORE_DIFF_4P( p, t, z ) \
- "psraw $6, "#p" \n\t"\
- "movd (%0), "#t" \n\t"\
- "punpcklbw "#z", "#t" \n\t"\
- "paddsw "#t", "#p" \n\t"\
- "packuswb "#z", "#p" \n\t"\
- "movd "#p", (%0) \n\t"
-
-static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
-{
- /* Load dct coeffs */
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm1 \n\t"
- "movq 16(%0), %%mm2 \n\t"
- "movq 24(%0), %%mm3 \n\t"
- :: "r"(block) );
-
- __asm__ volatile(
- /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */
- IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 )
-
- "movq %0, %%mm6 \n\t"
- /* in: 1,4,0,2 out: 1,2,3,0 */
- TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 )
-
- "paddw %%mm6, %%mm3 \n\t"
-
- /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
- IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
-
- "pxor %%mm7, %%mm7 \n\t"
- :: "m"(ff_pw_32));
-
- __asm__ volatile(
- STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm2, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm3, %%mm1, %%mm7)
- "add %1, %0 \n\t"
- STORE_DIFF_4P( %%mm4, %%mm1, %%mm7)
- : "+r"(dst)
- : "r" ((x86_reg)stride)
- );
-}
-
-static inline void h264_idct8_1d(int16_t *block)
-{
- __asm__ volatile(
- "movq 112(%0), %%mm7 \n\t"
- "movq 80(%0), %%mm0 \n\t"
- "movq 48(%0), %%mm3 \n\t"
- "movq 16(%0), %%mm5 \n\t"
-
- "movq %%mm0, %%mm4 \n\t"
- "movq %%mm5, %%mm1 \n\t"
- "psraw $1, %%mm4 \n\t"
- "psraw $1, %%mm1 \n\t"
- "paddw %%mm0, %%mm4 \n\t"
- "paddw %%mm5, %%mm1 \n\t"
- "paddw %%mm7, %%mm4 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psubw %%mm5, %%mm4 \n\t"
- "paddw %%mm3, %%mm1 \n\t"
-
- "psubw %%mm3, %%mm5 \n\t"
- "psubw %%mm3, %%mm0 \n\t"
- "paddw %%mm7, %%mm5 \n\t"
- "psubw %%mm7, %%mm0 \n\t"
- "psraw $1, %%mm3 \n\t"
- "psraw $1, %%mm7 \n\t"
- "psubw %%mm3, %%mm5 \n\t"
- "psubw %%mm7, %%mm0 \n\t"
-
- "movq %%mm4, %%mm3 \n\t"
- "movq %%mm1, %%mm7 \n\t"
- "psraw $2, %%mm1 \n\t"
- "psraw $2, %%mm3 \n\t"
- "paddw %%mm5, %%mm3 \n\t"
- "psraw $2, %%mm5 \n\t"
- "paddw %%mm0, %%mm1 \n\t"
- "psraw $2, %%mm0 \n\t"
- "psubw %%mm4, %%mm5 \n\t"
- "psubw %%mm0, %%mm7 \n\t"
-
- "movq 32(%0), %%mm2 \n\t"
- "movq 96(%0), %%mm6 \n\t"
- "movq %%mm2, %%mm4 \n\t"
- "movq %%mm6, %%mm0 \n\t"
- "psraw $1, %%mm4 \n\t"
- "psraw $1, %%mm6 \n\t"
- "psubw %%mm0, %%mm4 \n\t"
- "paddw %%mm2, %%mm6 \n\t"
-
- "movq (%0), %%mm2 \n\t"
- "movq 64(%0), %%mm0 \n\t"
- SUMSUB_BA( %%mm0, %%mm2 )
- SUMSUB_BA( %%mm6, %%mm0 )
- SUMSUB_BA( %%mm4, %%mm2 )
- SUMSUB_BA( %%mm7, %%mm6 )
- SUMSUB_BA( %%mm5, %%mm4 )
- SUMSUB_BA( %%mm3, %%mm2 )
- SUMSUB_BA( %%mm1, %%mm0 )
- :: "r"(block)
- );
-}
-
-static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
-{
- int i;
- int16_t __attribute__ ((aligned(8))) b2[64];
-
- block[0] += 32;
-
- for(i=0; i<2; i++){
- DECLARE_ALIGNED_8(uint64_t, tmp);
-
- h264_idct8_1d(block+4*i);
-
- __asm__ volatile(
- "movq %%mm7, %0 \n\t"
- TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
- "movq %%mm0, 8(%1) \n\t"
- "movq %%mm6, 24(%1) \n\t"
- "movq %%mm7, 40(%1) \n\t"
- "movq %%mm4, 56(%1) \n\t"
- "movq %0, %%mm7 \n\t"
- TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
- "movq %%mm7, (%1) \n\t"
- "movq %%mm1, 16(%1) \n\t"
- "movq %%mm0, 32(%1) \n\t"
- "movq %%mm3, 48(%1) \n\t"
- : "=m"(tmp)
- : "r"(b2+32*i)
- : "memory"
- );
- }
-
- for(i=0; i<2; i++){
- h264_idct8_1d(b2+4*i);
-
- __asm__ volatile(
- "psraw $6, %%mm7 \n\t"
- "psraw $6, %%mm6 \n\t"
- "psraw $6, %%mm5 \n\t"
- "psraw $6, %%mm4 \n\t"
- "psraw $6, %%mm3 \n\t"
- "psraw $6, %%mm2 \n\t"
- "psraw $6, %%mm1 \n\t"
- "psraw $6, %%mm0 \n\t"
-
- "movq %%mm7, (%0) \n\t"
- "movq %%mm5, 16(%0) \n\t"
- "movq %%mm3, 32(%0) \n\t"
- "movq %%mm1, 48(%0) \n\t"
- "movq %%mm0, 64(%0) \n\t"
- "movq %%mm2, 80(%0) \n\t"
- "movq %%mm4, 96(%0) \n\t"
- "movq %%mm6, 112(%0) \n\t"
- :: "r"(b2+4*i)
- : "memory"
- );
- }
-
- add_pixels_clamped_mmx(b2, dst, stride);
-}
-
-#define STORE_DIFF_8P( p, d, t, z )\
- "movq "#d", "#t" \n"\
- "psraw $6, "#p" \n"\
- "punpcklbw "#z", "#t" \n"\
- "paddsw "#t", "#p" \n"\
- "packuswb "#p", "#p" \n"\
- "movq "#p", "#d" \n"
-
-#define H264_IDCT8_1D_SSE2(a,b,c,d,e,f,g,h)\
- "movdqa "#c", "#a" \n"\
- "movdqa "#g", "#e" \n"\
- "psraw $1, "#c" \n"\
- "psraw $1, "#g" \n"\
- "psubw "#e", "#c" \n"\
- "paddw "#a", "#g" \n"\
- "movdqa "#b", "#e" \n"\
- "psraw $1, "#e" \n"\
- "paddw "#b", "#e" \n"\
- "paddw "#d", "#e" \n"\
- "paddw "#f", "#e" \n"\
- "movdqa "#f", "#a" \n"\
- "psraw $1, "#a" \n"\
- "paddw "#f", "#a" \n"\
- "paddw "#h", "#a" \n"\
- "psubw "#b", "#a" \n"\
- "psubw "#d", "#b" \n"\
- "psubw "#d", "#f" \n"\
- "paddw "#h", "#b" \n"\
- "psubw "#h", "#f" \n"\
- "psraw $1, "#d" \n"\
- "psraw $1, "#h" \n"\
- "psubw "#d", "#b" \n"\
- "psubw "#h", "#f" \n"\
- "movdqa "#e", "#d" \n"\
- "movdqa "#a", "#h" \n"\
- "psraw $2, "#d" \n"\
- "psraw $2, "#h" \n"\
- "paddw "#f", "#d" \n"\
- "paddw "#b", "#h" \n"\
- "psraw $2, "#f" \n"\
- "psraw $2, "#b" \n"\
- "psubw "#f", "#e" \n"\
- "psubw "#a", "#b" \n"\
- "movdqa 0x00(%1), "#a" \n"\
- "movdqa 0x40(%1), "#f" \n"\
- SUMSUB_BA(f, a)\
- SUMSUB_BA(g, f)\
- SUMSUB_BA(c, a)\
- SUMSUB_BA(e, g)\
- SUMSUB_BA(b, c)\
- SUMSUB_BA(h, a)\
- SUMSUB_BA(d, f)
-
-static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
-{
- __asm__ volatile(
- "movdqa 0x10(%1), %%xmm1 \n"
- "movdqa 0x20(%1), %%xmm2 \n"
- "movdqa 0x30(%1), %%xmm3 \n"
- "movdqa 0x50(%1), %%xmm5 \n"
- "movdqa 0x60(%1), %%xmm6 \n"
- "movdqa 0x70(%1), %%xmm7 \n"
- H264_IDCT8_1D_SSE2(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
- TRANSPOSE8(%%xmm4, %%xmm1, %%xmm7, %%xmm3, %%xmm5, %%xmm0, %%xmm2, %%xmm6, (%1))
- "paddw %4, %%xmm4 \n"
- "movdqa %%xmm4, 0x00(%1) \n"
- "movdqa %%xmm2, 0x40(%1) \n"
- H264_IDCT8_1D_SSE2(%%xmm4, %%xmm0, %%xmm6, %%xmm3, %%xmm2, %%xmm5, %%xmm7, %%xmm1)
- "movdqa %%xmm6, 0x60(%1) \n"
- "movdqa %%xmm7, 0x70(%1) \n"
- "pxor %%xmm7, %%xmm7 \n"
- STORE_DIFF_8P(%%xmm2, (%0), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm0, (%0,%2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm1, (%0,%2,2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm3, (%0,%3), %%xmm6, %%xmm7)
- "lea (%0,%2,4), %0 \n"
- STORE_DIFF_8P(%%xmm5, (%0), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm4, (%0,%2), %%xmm6, %%xmm7)
- "movdqa 0x60(%1), %%xmm0 \n"
- "movdqa 0x70(%1), %%xmm1 \n"
- STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
- STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
- :"+r"(dst)
- :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride), "m"(ff_pw_32)
- );
-}
-
-static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-{
- int dc = (block[0] + 32) >> 6;
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "pshufw $0, %%mm0, %%mm0 \n\t"
- "pxor %%mm1, %%mm1 \n\t"
- "psubw %%mm0, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- ::"r"(dc)
- );
- __asm__ volatile(
- "movd %0, %%mm2 \n\t"
- "movd %1, %%mm3 \n\t"
- "movd %2, %%mm4 \n\t"
- "movd %3, %%mm5 \n\t"
- "paddusb %%mm0, %%mm2 \n\t"
- "paddusb %%mm0, %%mm3 \n\t"
- "paddusb %%mm0, %%mm4 \n\t"
- "paddusb %%mm0, %%mm5 \n\t"
- "psubusb %%mm1, %%mm2 \n\t"
- "psubusb %%mm1, %%mm3 \n\t"
- "psubusb %%mm1, %%mm4 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "movd %%mm2, %0 \n\t"
- "movd %%mm3, %1 \n\t"
- "movd %%mm4, %2 \n\t"
- "movd %%mm5, %3 \n\t"
- :"+m"(*(uint32_t*)(dst+0*stride)),
- "+m"(*(uint32_t*)(dst+1*stride)),
- "+m"(*(uint32_t*)(dst+2*stride)),
- "+m"(*(uint32_t*)(dst+3*stride))
- );
-}
-
-static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
-{
- int dc = (block[0] + 32) >> 6;
- int y;
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "pshufw $0, %%mm0, %%mm0 \n\t"
- "pxor %%mm1, %%mm1 \n\t"
- "psubw %%mm0, %%mm1 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "packuswb %%mm1, %%mm1 \n\t"
- ::"r"(dc)
- );
- for(y=2; y--; dst += 4*stride){
- __asm__ volatile(
- "movq %0, %%mm2 \n\t"
- "movq %1, %%mm3 \n\t"
- "movq %2, %%mm4 \n\t"
- "movq %3, %%mm5 \n\t"
- "paddusb %%mm0, %%mm2 \n\t"
- "paddusb %%mm0, %%mm3 \n\t"
- "paddusb %%mm0, %%mm4 \n\t"
- "paddusb %%mm0, %%mm5 \n\t"
- "psubusb %%mm1, %%mm2 \n\t"
- "psubusb %%mm1, %%mm3 \n\t"
- "psubusb %%mm1, %%mm4 \n\t"
- "psubusb %%mm1, %%mm5 \n\t"
- "movq %%mm2, %0 \n\t"
- "movq %%mm3, %1 \n\t"
- "movq %%mm4, %2 \n\t"
- "movq %%mm5, %3 \n\t"
- :"+m"(*(uint64_t*)(dst+0*stride)),
- "+m"(*(uint64_t*)(dst+1*stride)),
- "+m"(*(uint64_t*)(dst+2*stride)),
- "+m"(*(uint64_t*)(dst+3*stride))
- );
- }
-}
-
-
-/***********************************/
-/* deblocking */
-
-// out: o = |x-y|>a
-// clobbers: t
-#define DIFF_GT_MMX(x,y,a,o,t)\
- "movq "#y", "#t" \n\t"\
- "movq "#x", "#o" \n\t"\
- "psubusb "#x", "#t" \n\t"\
- "psubusb "#y", "#o" \n\t"\
- "por "#t", "#o" \n\t"\
- "psubusb "#a", "#o" \n\t"
-
-// out: o = |x-y|>a
-// clobbers: t
-#define DIFF_GT2_MMX(x,y,a,o,t)\
- "movq "#y", "#t" \n\t"\
- "movq "#x", "#o" \n\t"\
- "psubusb "#x", "#t" \n\t"\
- "psubusb "#y", "#o" \n\t"\
- "psubusb "#a", "#t" \n\t"\
- "psubusb "#a", "#o" \n\t"\
- "pcmpeqb "#t", "#o" \n\t"\
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1
-// out: mm5=beta-1, mm7=mask
-// clobbers: mm4,mm6
-#define H264_DEBLOCK_MASK(alpha1, beta1) \
- "pshufw $0, "#alpha1", %%mm4 \n\t"\
- "pshufw $0, "#beta1 ", %%mm5 \n\t"\
- "packuswb %%mm4, %%mm4 \n\t"\
- "packuswb %%mm5, %%mm5 \n\t"\
- DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\
- DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\
- "por %%mm4, %%mm7 \n\t"\
- DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\
- "por %%mm4, %%mm7 \n\t"\
- "pxor %%mm6, %%mm6 \n\t"\
- "pcmpeqb %%mm6, %%mm7 \n\t"
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
-// out: mm1=p0' mm2=q0'
-// clobbers: mm0,3-6
-#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
- "movq %%mm1 , %%mm5 \n\t"\
- "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\
- "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\
- "pcmpeqb %%mm4 , %%mm4 \n\t"\
- "pxor %%mm4 , %%mm3 \n\t"\
- "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
- "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
- "pxor %%mm1 , %%mm4 \n\t"\
- "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
- "pavgb %%mm5 , %%mm3 \n\t"\
- "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\
- "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
- "psubusb %%mm3 , %%mm6 \n\t"\
- "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
- "pminub %%mm7 , %%mm6 \n\t"\
- "pminub %%mm7 , %%mm3 \n\t"\
- "psubusb %%mm6 , %%mm1 \n\t"\
- "psubusb %%mm3 , %%mm2 \n\t"\
- "paddusb %%mm3 , %%mm1 \n\t"\
- "paddusb %%mm6 , %%mm2 \n\t"
-
-// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=ff_bone
-// out: (q1addr) = av_clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
-// clobbers: q2, tmp, tc0
-#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\
- "movq %%mm1, "#tmp" \n\t"\
- "pavgb %%mm2, "#tmp" \n\t"\
- "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\
- "pxor "q2addr", "#tmp" \n\t"\
- "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\
- "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\
- "movq "#p1", "#tmp" \n\t"\
- "psubusb "#tc0", "#tmp" \n\t"\
- "paddusb "#p1", "#tc0" \n\t"\
- "pmaxub "#tmp", "#q2" \n\t"\
- "pminub "#tc0", "#q2" \n\t"\
- "movq "#q2", "q1addr" \n\t"
-
-static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
-{
- DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
-
- __asm__ volatile(
- "movq (%1,%3), %%mm0 \n\t" //p1
- "movq (%1,%3,2), %%mm1 \n\t" //p0
- "movq (%2), %%mm2 \n\t" //q0
- "movq (%2,%3), %%mm3 \n\t" //q1
- H264_DEBLOCK_MASK(%6, %7)
-
- "movd %5, %%mm4 \n\t"
- "punpcklbw %%mm4, %%mm4 \n\t"
- "punpcklwd %%mm4, %%mm4 \n\t"
- "pcmpeqb %%mm3, %%mm3 \n\t"
- "movq %%mm4, %%mm6 \n\t"
- "pcmpgtb %%mm3, %%mm4 \n\t"
- "movq %%mm6, 8+%0 \n\t"
- "pand %%mm4, %%mm7 \n\t"
- "movq %%mm7, %0 \n\t"
-
- /* filter p1 */
- "movq (%1), %%mm3 \n\t" //p2
- DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1
- "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|<beta
- "pand 8+%0, %%mm7 \n\t" // mask & tc0
- "movq %%mm7, %%mm4 \n\t"
- "psubb %%mm6, %%mm7 \n\t"
- "pand %%mm4, %%mm6 \n\t" // mask & |p2-p0|<beta & tc0
- H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%1)", "(%1,%3)", %%mm6, %%mm4)
-
- /* filter q1 */
- "movq (%2,%3,2), %%mm4 \n\t" //q2
- DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1
- "pand %0, %%mm6 \n\t"
- "movq 8+%0, %%mm5 \n\t" // can be merged with the and below but is slower then
- "pand %%mm6, %%mm5 \n\t"
- "psubb %%mm6, %%mm7 \n\t"
- "movq (%2,%3), %%mm3 \n\t"
- H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
-
- /* filter p0, q0 */
- H264_DEBLOCK_P0_Q0(%8, unused)
- "movq %%mm1, (%1,%3,2) \n\t"
- "movq %%mm2, (%2) \n\t"
-
- : "=m"(*tmp0)
- : "r"(pix-3*stride), "r"(pix), "r"((x86_reg)stride),
- "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
- "m"(ff_bone)
- );
-}
-
-static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- if((tc0[0] & tc0[1]) >= 0)
- h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0);
- if((tc0[2] & tc0[3]) >= 0)
- h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2);
-}
-static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- // also, it only needs to transpose 6x8
- DECLARE_ALIGNED_8(uint8_t, trans[8*8]);
- int i;
- for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
- if((tc0[0] & tc0[1]) < 0)
- continue;
- transpose4x4(trans, pix-4, 8, stride);
- transpose4x4(trans +4*8, pix, 8, stride);
- transpose4x4(trans+4, pix-4+4*stride, 8, stride);
- transpose4x4(trans+4+4*8, pix +4*stride, 8, stride);
- h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0);
- transpose4x4(pix-2, trans +2*8, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8);
- }
-}
-
-static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
-{
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t" //p1
- "movq (%0,%2), %%mm1 \n\t" //p0
- "movq (%1), %%mm2 \n\t" //q0
- "movq (%1,%2), %%mm3 \n\t" //q1
- H264_DEBLOCK_MASK(%4, %5)
- "movd %3, %%mm6 \n\t"
- "punpcklbw %%mm6, %%mm6 \n\t"
- "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask
- H264_DEBLOCK_P0_Q0(%6, %7)
- "movq %%mm1, (%0,%2) \n\t"
- "movq %%mm2, (%1) \n\t"
-
- :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
- "r"(*(uint32_t*)tc0),
- "m"(alpha1), "m"(beta1), "m"(ff_bone), "m"(ff_pb_3F)
- );
-}
-
-static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0);
-}
-
-static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
- transpose4x4(trans, pix-2, 8, stride);
- transpose4x4(trans+4, pix-2+4*stride, 8, stride);
- h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
- transpose4x4(pix-2, trans, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4, stride, 8);
-}
-
-// p0 = (p0 + q1 + 2*p1 + 2) >> 2
-#define H264_FILTER_CHROMA4(p0, p1, q1, one) \
- "movq "#p0", %%mm4 \n\t"\
- "pxor "#q1", %%mm4 \n\t"\
- "pand "#one", %%mm4 \n\t" /* mm4 = (p0^q1)&1 */\
- "pavgb "#q1", "#p0" \n\t"\
- "psubusb %%mm4, "#p0" \n\t"\
- "pavgb "#p1", "#p0" \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\
-
-static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
-{
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t"
- "movq (%0,%2), %%mm1 \n\t"
- "movq (%1), %%mm2 \n\t"
- "movq (%1,%2), %%mm3 \n\t"
- H264_DEBLOCK_MASK(%3, %4)
- "movq %%mm1, %%mm5 \n\t"
- "movq %%mm2, %%mm6 \n\t"
- H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0'
- H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0'
- "psubb %%mm5, %%mm1 \n\t"
- "psubb %%mm6, %%mm2 \n\t"
- "pand %%mm7, %%mm1 \n\t"
- "pand %%mm7, %%mm2 \n\t"
- "paddb %%mm5, %%mm1 \n\t"
- "paddb %%mm6, %%mm2 \n\t"
- "movq %%mm1, (%0,%2) \n\t"
- "movq %%mm2, (%1) \n\t"
- :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
- "m"(alpha1), "m"(beta1), "m"(ff_bone)
- );
-}
-
-static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
-{
- h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1);
-}
-
-static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
-{
- //FIXME: could cut some load/stores by merging transpose with filter
- DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
- transpose4x4(trans, pix-2, 8, stride);
- transpose4x4(trans+4, pix-2+4*stride, 8, stride);
- h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
- transpose4x4(pix-2, trans, stride, 8);
- transpose4x4(pix-2+4*stride, trans+4, stride, 8);
-}
-
-static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
- int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
- int dir;
- __asm__ volatile(
- "pxor %%mm7, %%mm7 \n\t"
- "movq %0, %%mm6 \n\t"
- "movq %1, %%mm5 \n\t"
- "movq %2, %%mm4 \n\t"
- ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7)
- );
- if(field)
- __asm__ volatile(
- "movq %0, %%mm5 \n\t"
- "movq %1, %%mm4 \n\t"
- ::"m"(ff_pb_3_1), "m"(ff_pb_7_3)
- );
-
- // could do a special case for dir==0 && edges==1, but it only reduces the
- // average filter time by 1.2%
- for( dir=1; dir>=0; dir-- ) {
- const int d_idx = dir ? -8 : -1;
- const int mask_mv = dir ? mask_mv1 : mask_mv0;
- DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
- int b_idx, edge, l;
- for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
- __asm__ volatile(
- "pand %0, %%mm0 \n\t"
- ::"m"(mask_dir)
- );
- if(!(mask_mv & edge)) {
- __asm__ volatile("pxor %%mm0, %%mm0 \n\t":);
- for( l = bidir; l >= 0; l-- ) {
- __asm__ volatile(
- "movd %0, %%mm1 \n\t"
- "punpckldq %1, %%mm1 \n\t"
- "movq %%mm1, %%mm2 \n\t"
- "psrlw $7, %%mm2 \n\t"
- "pand %%mm6, %%mm2 \n\t"
- "por %%mm2, %%mm1 \n\t" // ref_cache with -2 mapped to -1
- "punpckldq %%mm1, %%mm2 \n\t"
- "pcmpeqb %%mm2, %%mm1 \n\t"
- "paddb %%mm6, %%mm1 \n\t"
- "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
- "por %%mm1, %%mm0 \n\t"
-
- "movq %2, %%mm1 \n\t"
- "movq %3, %%mm2 \n\t"
- "psubw %4, %%mm1 \n\t"
- "psubw %5, %%mm2 \n\t"
- "packsswb %%mm2, %%mm1 \n\t"
- "paddb %%mm5, %%mm1 \n\t"
- "pminub %%mm4, %%mm1 \n\t"
- "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
- "por %%mm1, %%mm0 \n\t"
- ::"m"(ref[l][b_idx]),
- "m"(ref[l][b_idx+d_idx]),
- "m"(mv[l][b_idx][0]),
- "m"(mv[l][b_idx+2][0]),
- "m"(mv[l][b_idx+d_idx][0]),
- "m"(mv[l][b_idx+d_idx+2][0])
- );
- }
- }
- __asm__ volatile(
- "movd %0, %%mm1 \n\t"
- "por %1, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pcmpgtw %%mm7, %%mm1 \n\t" // nnz[b] || nnz[bn]
- ::"m"(nnz[b_idx]),
- "m"(nnz[b_idx+d_idx])
- );
- __asm__ volatile(
- "pcmpeqw %%mm7, %%mm0 \n\t"
- "pcmpeqw %%mm7, %%mm0 \n\t"
- "psrlw $15, %%mm0 \n\t" // nonzero -> 1
- "psrlw $14, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "por %%mm1, %%mm2 \n\t"
- "psrlw $1, %%mm1 \n\t"
- "pandn %%mm2, %%mm1 \n\t"
- "movq %%mm1, %0 \n\t"
- :"=m"(*bS[dir][edge])
- ::"memory"
- );
- }
- edges = 4;
- step = 1;
- }
- __asm__ volatile(
- "movq (%0), %%mm0 \n\t"
- "movq 8(%0), %%mm1 \n\t"
- "movq 16(%0), %%mm2 \n\t"
- "movq 24(%0), %%mm3 \n\t"
- TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)
- "movq %%mm0, (%0) \n\t"
- "movq %%mm3, 8(%0) \n\t"
- "movq %%mm4, 16(%0) \n\t"
- "movq %%mm2, 24(%0) \n\t"
- ::"r"(bS[0])
- :"memory"
- );
-}
-
-/***********************************/
-/* motion compensation */
-
-#define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\
- "mov"#q" "#C", "#T" \n\t"\
- "mov"#d" (%0), "#F" \n\t"\
- "paddw "#D", "#T" \n\t"\
- "psllw $2, "#T" \n\t"\
- "psubw "#B", "#T" \n\t"\
- "psubw "#E", "#T" \n\t"\
- "punpcklbw "#Z", "#F" \n\t"\
- "pmullw %4, "#T" \n\t"\
- "paddw %5, "#A" \n\t"\
- "add %2, %0 \n\t"\
- "paddw "#F", "#A" \n\t"\
- "paddw "#A", "#T" \n\t"\
- "psraw $5, "#T" \n\t"\
- "packuswb "#T", "#T" \n\t"\
- OP(T, (%1), A, d)\
- "add %3, %1 \n\t"
-
-#define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\
- "mov"#q" "#C", "#T" \n\t"\
- "mov"#d" (%0), "#F" \n\t"\
- "paddw "#D", "#T" \n\t"\
- "psllw $2, "#T" \n\t"\
- "paddw %4, "#A" \n\t"\
- "psubw "#B", "#T" \n\t"\
- "psubw "#E", "#T" \n\t"\
- "punpcklbw "#Z", "#F" \n\t"\
- "pmullw %3, "#T" \n\t"\
- "paddw "#F", "#A" \n\t"\
- "add %2, %0 \n\t"\
- "paddw "#A", "#T" \n\t"\
- "mov"#q" "#T", "#OF"(%1) \n\t"
-
-#define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q)
-#define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q)
-#define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa)
-#define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa)
-
-
-#define QPEL_H264(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=4;\
-\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm4 \n\t"\
- "movq %6, %%mm5 \n\t"\
- "1: \n\t"\
- "movd -1(%0), %%mm1 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "movd 1(%0), %%mm3 \n\t"\
- "movd 2(%0), %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "paddw %%mm0, %%mm1 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "movd -2(%0), %%mm0 \n\t"\
- "movd 3(%0), %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm3, %%mm0 \n\t"\
- "psllw $2, %%mm2 \n\t"\
- "psubw %%mm1, %%mm2 \n\t"\
- "pmullw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm0 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm6, d)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}\
-static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=4;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %0, %%mm4 \n\t"\
- "movq %1, %%mm5 \n\t"\
- :: "m"(ff_pw_5), "m"(ff_pw_16)\
- );\
- do{\
- __asm__ volatile(\
- "movd -1(%0), %%mm1 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "movd 1(%0), %%mm3 \n\t"\
- "movd 2(%0), %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "paddw %%mm0, %%mm1 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "movd -2(%0), %%mm0 \n\t"\
- "movd 3(%0), %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm3, %%mm0 \n\t"\
- "psllw $2, %%mm2 \n\t"\
- "psubw %%mm1, %%mm2 \n\t"\
- "pmullw %%mm4, %%mm2 \n\t"\
- "paddw %%mm5, %%mm0 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "movd (%2), %%mm3 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- PAVGB" %%mm3, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm6, d)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }while(--h);\
-}\
-static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- src -= 2*srcStride;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}\
-static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- int h=4;\
- int w=3;\
- src -= 2*srcStride+2;\
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
- \
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- tmp += 4;\
- src += 4 - 9*srcStride;\
- }\
- tmp -= 3*4;\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "paddw 10(%0), %%mm0 \n\t"\
- "movq 2(%0), %%mm1 \n\t"\
- "paddw 8(%0), %%mm1 \n\t"\
- "movq 4(%0), %%mm2 \n\t"\
- "paddw 6(%0), %%mm2 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
- "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
- "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
- "paddsw %%mm2, %%mm0 \n\t"\
- "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\
- "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\
- "psraw $6, %%mm0 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm7, d)\
- "add $24, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %5, %%mm6 \n\t"\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 1(%0), %%mm2 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "psllw $2, %%mm0 \n\t"\
- "psllw $2, %%mm1 \n\t"\
- "movq -1(%0), %%mm2 \n\t"\
- "movq 2(%0), %%mm4 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "movq %%mm4, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm3, %%mm5 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm5, %%mm1 \n\t"\
- "pmullw %%mm6, %%mm0 \n\t"\
- "pmullw %%mm6, %%mm1 \n\t"\
- "movd -2(%0), %%mm2 \n\t"\
- "movd 7(%0), %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "movq %6, %%mm5 \n\t"\
- "paddw %%mm5, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm4, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm5, q)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movq %0, %%mm6 \n\t"\
- :: "m"(ff_pw_5)\
- );\
- do{\
- __asm__ volatile(\
- "movq (%0), %%mm0 \n\t"\
- "movq 1(%0), %%mm2 \n\t"\
- "movq %%mm0, %%mm1 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpckhbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "psllw $2, %%mm0 \n\t"\
- "psllw $2, %%mm1 \n\t"\
- "movq -1(%0), %%mm2 \n\t"\
- "movq 2(%0), %%mm4 \n\t"\
- "movq %%mm2, %%mm3 \n\t"\
- "movq %%mm4, %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpckhbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- "punpckhbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm4, %%mm2 \n\t"\
- "paddw %%mm3, %%mm5 \n\t"\
- "psubw %%mm2, %%mm0 \n\t"\
- "psubw %%mm5, %%mm1 \n\t"\
- "pmullw %%mm6, %%mm0 \n\t"\
- "pmullw %%mm6, %%mm1 \n\t"\
- "movd -2(%0), %%mm2 \n\t"\
- "movd 7(%0), %%mm5 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm5 \n\t"\
- "paddw %%mm3, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "movq %5, %%mm5 \n\t"\
- "paddw %%mm5, %%mm2 \n\t"\
- "paddw %%mm5, %%mm4 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm4, %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "movq (%2), %%mm4 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- PAVGB" %%mm4, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm5, q)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
- : "memory"\
- );\
- }while(--h);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- int w= 2;\
- src -= 2*srcStride;\
- \
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(h==16){\
- __asm__ volatile(\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
- QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
- QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
- QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
- QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
- QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
- src += 4-(h+5)*srcStride;\
- dst += 4-h*dstStride;\
- }\
-}\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
- int w = (size+8)>>2;\
- src -= 2*srcStride+2;\
- while(w--){\
- __asm__ volatile(\
- "pxor %%mm7, %%mm7 \n\t"\
- "movd (%0), %%mm0 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm1 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm2 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm3 \n\t"\
- "add %2, %0 \n\t"\
- "movd (%0), %%mm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%mm7, %%mm0 \n\t"\
- "punpcklbw %%mm7, %%mm1 \n\t"\
- "punpcklbw %%mm7, %%mm2 \n\t"\
- "punpcklbw %%mm7, %%mm3 \n\t"\
- "punpcklbw %%mm7, %%mm4 \n\t"\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\
- QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\
- QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(size==16){\
- __asm__ volatile(\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
- QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
- QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
- QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
- QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
- QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
- QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
- : "+a"(src)\
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
- tmp += 4;\
- src += 4 - (size+5)*srcStride;\
- }\
-}\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
- int w = size>>4;\
- do{\
- int h = size;\
- __asm__ volatile(\
- "1: \n\t"\
- "movq (%0), %%mm0 \n\t"\
- "movq 8(%0), %%mm3 \n\t"\
- "movq 2(%0), %%mm1 \n\t"\
- "movq 10(%0), %%mm4 \n\t"\
- "paddw %%mm4, %%mm0 \n\t"\
- "paddw %%mm3, %%mm1 \n\t"\
- "paddw 18(%0), %%mm3 \n\t"\
- "paddw 16(%0), %%mm4 \n\t"\
- "movq 4(%0), %%mm2 \n\t"\
- "movq 12(%0), %%mm5 \n\t"\
- "paddw 6(%0), %%mm2 \n\t"\
- "paddw 14(%0), %%mm5 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"\
- "psubw %%mm4, %%mm3 \n\t"\
- "psraw $2, %%mm0 \n\t"\
- "psraw $2, %%mm3 \n\t"\
- "psubw %%mm1, %%mm0 \n\t"\
- "psubw %%mm4, %%mm3 \n\t"\
- "paddsw %%mm2, %%mm0 \n\t"\
- "paddsw %%mm5, %%mm3 \n\t"\
- "psraw $2, %%mm0 \n\t"\
- "psraw $2, %%mm3 \n\t"\
- "paddw %%mm2, %%mm0 \n\t"\
- "paddw %%mm5, %%mm3 \n\t"\
- "psraw $6, %%mm0 \n\t"\
- "psraw $6, %%mm3 \n\t"\
- "packuswb %%mm3, %%mm0 \n\t"\
- OP(%%mm0, (%1),%%mm7, q)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- tmp += 8 - size*24;\
- dst += 8 - size*dstStride;\
- }while(w--);\
-}\
-\
-static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}\
-\
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
- src += 8*srcStride;\
- dst += 8*dstStride;\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
- src += 8*dstStride;\
- dst += 8*dstStride;\
- src2 += 8*src2Stride;\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
-}\
-\
-static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
- put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
- OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
-}\
-static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
-}\
-\
-static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
-}\
-\
-static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- __asm__ volatile(\
- "movq (%1), %%mm0 \n\t"\
- "movq 24(%1), %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- "packuswb %%mm1, %%mm1 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm1 \n\t"\
- OP(%%mm0, (%2), %%mm4, d)\
- OP(%%mm1, (%2,%4), %%mm5, d)\
- "lea (%0,%3,2), %0 \n\t"\
- "lea (%2,%4,2), %2 \n\t"\
- "movq 48(%1), %%mm0 \n\t"\
- "movq 72(%1), %%mm1 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "packuswb %%mm0, %%mm0 \n\t"\
- "packuswb %%mm1, %%mm1 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm1 \n\t"\
- OP(%%mm0, (%2), %%mm4, d)\
- OP(%%mm1, (%2,%4), %%mm5, d)\
- :"+a"(src8), "+c"(src16), "+d"(dst)\
- :"S"((x86_reg)src8Stride), "D"((x86_reg)dstStride)\
- :"memory");\
-}\
-static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- do{\
- __asm__ volatile(\
- "movq (%1), %%mm0 \n\t"\
- "movq 8(%1), %%mm1 \n\t"\
- "movq 48(%1), %%mm2 \n\t"\
- "movq 8+48(%1), %%mm3 \n\t"\
- "psraw $5, %%mm0 \n\t"\
- "psraw $5, %%mm1 \n\t"\
- "psraw $5, %%mm2 \n\t"\
- "psraw $5, %%mm3 \n\t"\
- "packuswb %%mm1, %%mm0 \n\t"\
- "packuswb %%mm3, %%mm2 \n\t"\
- PAVGB" (%0), %%mm0 \n\t"\
- PAVGB" (%0,%3), %%mm2 \n\t"\
- OP(%%mm0, (%2), %%mm5, q)\
- OP(%%mm2, (%2,%4), %%mm5, q)\
- ::"a"(src8), "c"(src16), "d"(dst),\
- "r"((x86_reg)src8Stride), "r"((x86_reg)dstStride)\
- :"memory");\
- src8 += 2L*src8Stride;\
- src16 += 48;\
- dst += 2L*dstStride;\
- }while(h-=2);\
-}\
-static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
-{\
- OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
- OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
-}\
-
-
-#ifdef ARCH_X86_64
-#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=16;\
- __asm__ volatile(\
- "pxor %%xmm15, %%xmm15 \n\t"\
- "movdqa %6, %%xmm14 \n\t"\
- "movdqa %7, %%xmm13 \n\t"\
- "1: \n\t"\
- "lddqu 3(%0), %%xmm1 \n\t"\
- "lddqu -5(%0), %%xmm7 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm15, %%xmm1 \n\t"\
- "punpcklbw %%xmm15, %%xmm0 \n\t"\
- "punpcklbw %%xmm15, %%xmm7 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm0, %%xmm6 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm0, %%xmm8 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm0, %%xmm9 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "movdqa %%xmm0, %%xmm10 \n\t"\
- "palignr $6, %%xmm0, %%xmm5 \n\t"\
- "palignr $6, %%xmm7, %%xmm10\n\t"\
- "palignr $8, %%xmm0, %%xmm4 \n\t"\
- "palignr $8, %%xmm7, %%xmm9 \n\t"\
- "palignr $10,%%xmm0, %%xmm3 \n\t"\
- "palignr $10,%%xmm7, %%xmm8 \n\t"\
- "paddw %%xmm1, %%xmm5 \n\t"\
- "paddw %%xmm0, %%xmm10 \n\t"\
- "palignr $12,%%xmm0, %%xmm2 \n\t"\
- "palignr $12,%%xmm7, %%xmm6 \n\t"\
- "palignr $14,%%xmm0, %%xmm1 \n\t"\
- "palignr $14,%%xmm7, %%xmm0 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm8, %%xmm6 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm9, %%xmm0 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "psllw $2, %%xmm6 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "psubw %%xmm0, %%xmm6 \n\t"\
- "paddw %%xmm13,%%xmm5 \n\t"\
- "paddw %%xmm13,%%xmm10 \n\t"\
- "pmullw %%xmm14,%%xmm2 \n\t"\
- "pmullw %%xmm14,%%xmm6 \n\t"\
- "lddqu (%2), %%xmm3 \n\t"\
- "paddw %%xmm5, %%xmm2 \n\t"\
- "paddw %%xmm10,%%xmm6 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "psraw $5, %%xmm6 \n\t"\
- "packuswb %%xmm2,%%xmm6 \n\t"\
- "pavgb %%xmm3, %%xmm6 \n\t"\
- OP(%%xmm6, (%1), %%xmm4, dqa)\
- "add %5, %0 \n\t"\
- "add %5, %1 \n\t"\
- "add %4, %2 \n\t"\
- "decl %3 \n\t"\
- "jg 1b \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}
-#else // ARCH_X86_64
-#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
- src += 8*dstStride;\
- dst += 8*dstStride;\
- src2 += 8*src2Stride;\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
- OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
-}
-#endif // ARCH_X86_64
-
-#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movdqa %0, %%xmm6 \n\t"\
- :: "m"(ff_pw_5)\
- );\
- do{\
- __asm__ volatile(\
- "lddqu -5(%0), %%xmm1 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $6, %%xmm0, %%xmm5 \n\t"\
- "palignr $8, %%xmm0, %%xmm4 \n\t"\
- "palignr $10,%%xmm0, %%xmm3 \n\t"\
- "paddw %%xmm1, %%xmm5 \n\t"\
- "palignr $12,%%xmm0, %%xmm2 \n\t"\
- "palignr $14,%%xmm0, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "movq (%2), %%xmm3 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "paddw %5, %%xmm5 \n\t"\
- "pmullw %%xmm6, %%xmm2 \n\t"\
- "paddw %%xmm5, %%xmm2 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "packuswb %%xmm2, %%xmm2 \n\t"\
- "pavgb %%xmm3, %%xmm2 \n\t"\
- OP(%%xmm2, (%1), %%xmm4, q)\
- "add %4, %0 \n\t"\
- "add %4, %1 \n\t"\
- "add %3, %2 \n\t"\
- : "+a"(src), "+c"(dst), "+d"(src2)\
- : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_16)\
- : "memory"\
- );\
- }while(--h);\
-}\
-QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
-\
-static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- int h=8;\
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movdqa %5, %%xmm6 \n\t"\
- "1: \n\t"\
- "lddqu -5(%0), %%xmm1 \n\t"\
- "movdqa %%xmm1, %%xmm0 \n\t"\
- "punpckhbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $6, %%xmm0, %%xmm5 \n\t"\
- "palignr $8, %%xmm0, %%xmm4 \n\t"\
- "palignr $10,%%xmm0, %%xmm3 \n\t"\
- "paddw %%xmm1, %%xmm5 \n\t"\
- "palignr $12,%%xmm0, %%xmm2 \n\t"\
- "palignr $14,%%xmm0, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "psllw $2, %%xmm2 \n\t"\
- "psubw %%xmm1, %%xmm2 \n\t"\
- "paddw %6, %%xmm5 \n\t"\
- "pmullw %%xmm6, %%xmm2 \n\t"\
- "paddw %%xmm5, %%xmm2 \n\t"\
- "psraw $5, %%xmm2 \n\t"\
- "packuswb %%xmm2, %%xmm2 \n\t"\
- OP(%%xmm2, (%1), %%xmm4, q)\
- "add %3, %0 \n\t"\
- "add %4, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(src), "+c"(dst), "+g"(h)\
- : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
- "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
-}\
-static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
- src += 8*srcStride;\
- dst += 8*dstStride;\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
- OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
-}\
-
-#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- src -= 2*srcStride;\
- \
- __asm__ volatile(\
- "pxor %%xmm7, %%xmm7 \n\t"\
- "movq (%0), %%xmm0 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm1 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm2 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm3 \n\t"\
- "add %2, %0 \n\t"\
- "movq (%0), %%xmm4 \n\t"\
- "add %2, %0 \n\t"\
- "punpcklbw %%xmm7, %%xmm0 \n\t"\
- "punpcklbw %%xmm7, %%xmm1 \n\t"\
- "punpcklbw %%xmm7, %%xmm2 \n\t"\
- "punpcklbw %%xmm7, %%xmm3 \n\t"\
- "punpcklbw %%xmm7, %%xmm4 \n\t"\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
- QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- if(h==16){\
- __asm__ volatile(\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
- QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
- QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
- QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
- QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
- QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
- \
- : "+a"(src), "+c"(dst)\
- : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
- : "memory"\
- );\
- }\
-}\
-static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
-}\
-static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
- OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
-}
-
-static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
- int w = (size+8)>>3;
- src -= 2*srcStride+2;
- while(w--){
- __asm__ volatile(
- "pxor %%xmm7, %%xmm7 \n\t"
- "movq (%0), %%xmm0 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm1 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm2 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm3 \n\t"
- "add %2, %0 \n\t"
- "movq (%0), %%xmm4 \n\t"
- "add %2, %0 \n\t"
- "punpcklbw %%xmm7, %%xmm0 \n\t"
- "punpcklbw %%xmm7, %%xmm1 \n\t"
- "punpcklbw %%xmm7, %%xmm2 \n\t"
- "punpcklbw %%xmm7, %%xmm3 \n\t"
- "punpcklbw %%xmm7, %%xmm4 \n\t"
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 0*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 1*48)
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 2*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 3*48)
- QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 4*48)
- QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
- : "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
- : "memory"
- );
- if(size==16){
- __asm__ volatile(
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
- QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
- QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48)
- QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48)
- QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48)
- QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
- QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
- : "+a"(src)
- : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
- : "memory"
- );
- }
- tmp += 8;
- src += 8 - (size+5)*srcStride;
- }
-}
-
-#define QPEL_H264_HV2_XMM(OPNAME, OP, MMX)\
-static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
- int h = size;\
- if(size == 16){\
- __asm__ volatile(\
- "1: \n\t"\
- "movdqa 32(%0), %%xmm4 \n\t"\
- "movdqa 16(%0), %%xmm5 \n\t"\
- "movdqa (%0), %%xmm7 \n\t"\
- "movdqa %%xmm4, %%xmm3 \n\t"\
- "movdqa %%xmm4, %%xmm2 \n\t"\
- "movdqa %%xmm4, %%xmm1 \n\t"\
- "movdqa %%xmm4, %%xmm0 \n\t"\
- "palignr $10, %%xmm5, %%xmm0 \n\t"\
- "palignr $8, %%xmm5, %%xmm1 \n\t"\
- "palignr $6, %%xmm5, %%xmm2 \n\t"\
- "palignr $4, %%xmm5, %%xmm3 \n\t"\
- "palignr $2, %%xmm5, %%xmm4 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "movdqa %%xmm5, %%xmm6 \n\t"\
- "movdqa %%xmm5, %%xmm4 \n\t"\
- "movdqa %%xmm5, %%xmm3 \n\t"\
- "palignr $8, %%xmm7, %%xmm4 \n\t"\
- "palignr $2, %%xmm7, %%xmm6 \n\t"\
- "palignr $10, %%xmm7, %%xmm3 \n\t"\
- "paddw %%xmm6, %%xmm4 \n\t"\
- "movdqa %%xmm5, %%xmm6 \n\t"\
- "palignr $6, %%xmm7, %%xmm5 \n\t"\
- "palignr $4, %%xmm7, %%xmm6 \n\t"\
- "paddw %%xmm7, %%xmm3 \n\t"\
- "paddw %%xmm6, %%xmm5 \n\t"\
- \
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psubw %%xmm4, %%xmm3 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psraw $2, %%xmm3 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psubw %%xmm4, %%xmm3 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "paddw %%xmm5, %%xmm3 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psraw $2, %%xmm3 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "paddw %%xmm5, %%xmm3 \n\t"\
- "psraw $6, %%xmm0 \n\t"\
- "psraw $6, %%xmm3 \n\t"\
- "packuswb %%xmm0, %%xmm3 \n\t"\
- OP(%%xmm3, (%1), %%xmm7, dqa)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }else{\
- __asm__ volatile(\
- "1: \n\t"\
- "movdqa 16(%0), %%xmm1 \n\t"\
- "movdqa (%0), %%xmm0 \n\t"\
- "movdqa %%xmm1, %%xmm2 \n\t"\
- "movdqa %%xmm1, %%xmm3 \n\t"\
- "movdqa %%xmm1, %%xmm4 \n\t"\
- "movdqa %%xmm1, %%xmm5 \n\t"\
- "palignr $10, %%xmm0, %%xmm5 \n\t"\
- "palignr $8, %%xmm0, %%xmm4 \n\t"\
- "palignr $6, %%xmm0, %%xmm3 \n\t"\
- "palignr $4, %%xmm0, %%xmm2 \n\t"\
- "palignr $2, %%xmm0, %%xmm1 \n\t"\
- "paddw %%xmm5, %%xmm0 \n\t"\
- "paddw %%xmm4, %%xmm1 \n\t"\
- "paddw %%xmm3, %%xmm2 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "psubw %%xmm1, %%xmm0 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "psraw $2, %%xmm0 \n\t"\
- "paddw %%xmm2, %%xmm0 \n\t"\
- "psraw $6, %%xmm0 \n\t"\
- "packuswb %%xmm0, %%xmm0 \n\t"\
- OP(%%xmm0, (%1), %%xmm7, q)\
- "add $48, %0 \n\t"\
- "add %3, %1 \n\t"\
- "decl %2 \n\t"\
- " jnz 1b \n\t"\
- : "+a"(tmp), "+c"(dst), "+g"(h)\
- : "S"((x86_reg)dstStride)\
- : "memory"\
- );\
- }\
-}
-
-#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
-static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
- put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
- OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
-}\
-static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
-}\
-static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
- OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
-}\
-
-#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2
-#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2
-#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2
-#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2
-#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2
-#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2
-#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2
-#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2
-
-#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2
-#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2
-#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2
-#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2
-#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2
-#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2
-#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2
-#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2
-
-#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2
-#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2
-#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2
-#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2
-
-#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
-#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
-#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
-#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
-
-#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2
-#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2
-
-#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
-H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
-H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
-
-static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
- put_pixels16_sse2(dst, src, stride, 16);
-}
-static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
- avg_pixels16_sse2(dst, src, stride, 16);
-}
-#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2
-#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2
-
-#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
-}\
-
-#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
-}\
-
-#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
-}\
-
-#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
- put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\
- OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
-}\
-\
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
- DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
- uint8_t * const halfHV= temp;\
- int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
- assert(((int)temp & 7) == 0);\
- put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
- OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
-}\
-
-#define H264_MC_4816(MMX)\
-H264_MC(put_, 4, MMX, 8)\
-H264_MC(put_, 8, MMX, 8)\
-H264_MC(put_, 16,MMX, 8)\
-H264_MC(avg_, 4, MMX, 8)\
-H264_MC(avg_, 8, MMX, 8)\
-H264_MC(avg_, 16,MMX, 8)\
-
-#define H264_MC_816(QPEL, XMM)\
-QPEL(put_, 8, XMM, 16)\
-QPEL(put_, 16,XMM, 16)\
-QPEL(avg_, 8, XMM, 16)\
-QPEL(avg_, 16,XMM, 16)\
-
-
-#define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgusb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-#define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp " \n\t"\
-"pavgb " #temp ", " #a " \n\t"\
-"mov" #size " " #a ", " #b " \n\t"
-
-#define PAVGB "pavgusb"
-QPEL_H264(put_, PUT_OP, 3dnow)
-QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
-#undef PAVGB
-#define PAVGB "pavgb"
-QPEL_H264(put_, PUT_OP, mmx2)
-QPEL_H264(avg_, AVG_MMX2_OP, mmx2)
-QPEL_H264_V_XMM(put_, PUT_OP, sse2)
-QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2)
-QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
-QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2)
-#ifdef HAVE_SSSE3
-QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3)
-QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3)
-QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
-QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
-#endif
-#undef PAVGB
-
-H264_MC_4816(3dnow)
-H264_MC_4816(mmx2)
-H264_MC_816(H264_MC_V, sse2)
-H264_MC_816(H264_MC_HV, sse2)
-#ifdef HAVE_SSSE3
-H264_MC_816(H264_MC_H, ssse3)
-H264_MC_816(H264_MC_HV, ssse3)
-#endif
-
-
-#define H264_CHROMA_OP(S,D)
-#define H264_CHROMA_OP4(S,D,T)
-#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx
-#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx
-#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
-#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
-#include "dsputil_h264_template_mmx.c"
-
-static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_mmx(dst, src, stride, h, x, y, 1);
-}
-static void put_h264_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_mmx(dst, src, stride, h, x, y, 0);
-}
-
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC2_TMPL
-#undef H264_CHROMA_MC8_MV0
-
-#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
-#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
- "pavgb " #T ", " #D " \n\t"
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2
-#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
-#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
-#include "dsputil_h264_template_mmx.c"
-static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_mc8_mmx2(dst, src, stride, h, x, y, 1);
-}
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC2_TMPL
-#undef H264_CHROMA_MC8_MV0
-
-#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
-#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
- "pavgusb " #T ", " #D " \n\t"
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow
-#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
-#include "dsputil_h264_template_mmx.c"
-static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_mc8_3dnow(dst, src, stride, h, x, y, 1);
-}
-#undef H264_CHROMA_OP
-#undef H264_CHROMA_OP4
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
-
-#ifdef HAVE_SSSE3
-#define AVG_OP(X)
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_ssse3
-#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_ssse3
-#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
-#include "dsputil_h264_template_ssse3.c"
-static void put_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
-}
-static void put_h264_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
-}
-
-#undef AVG_OP
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
-#define AVG_OP(X) X
-#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_ssse3
-#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_ssse3
-#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
-#include "dsputil_h264_template_ssse3.c"
-static void avg_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
-{
- avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
-}
-#undef AVG_OP
-#undef H264_CHROMA_MC8_TMPL
-#undef H264_CHROMA_MC4_TMPL
-#undef H264_CHROMA_MC8_MV0
-#endif
-
-/***********************************/
-/* weighted prediction */
-
-static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h)
-{
- int x, y;
- offset <<= log2_denom;
- offset += (1 << log2_denom) >> 1;
- __asm__ volatile(
- "movd %0, %%mm4 \n\t"
- "movd %1, %%mm5 \n\t"
- "movd %2, %%mm6 \n\t"
- "pshufw $0, %%mm4, %%mm4 \n\t"
- "pshufw $0, %%mm5, %%mm5 \n\t"
- "pxor %%mm7, %%mm7 \n\t"
- :: "g"(weight), "g"(offset), "g"(log2_denom)
- );
- for(y=0; y<h; y+=2){
- for(x=0; x<w; x+=4){
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "movd %1, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm4, %%mm0 \n\t"
- "pmullw %%mm4, %%mm1 \n\t"
- "paddsw %%mm5, %%mm0 \n\t"
- "paddsw %%mm5, %%mm1 \n\t"
- "psraw %%mm6, %%mm0 \n\t"
- "psraw %%mm6, %%mm1 \n\t"
- "packuswb %%mm7, %%mm0 \n\t"
- "packuswb %%mm7, %%mm1 \n\t"
- "movd %%mm0, %0 \n\t"
- "movd %%mm1, %1 \n\t"
- : "+m"(*(uint32_t*)(dst+x)),
- "+m"(*(uint32_t*)(dst+x+stride))
- );
- }
- dst += 2*stride;
- }
-}
-
-static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset, int w, int h)
-{
- int x, y;
- offset = ((offset + 1) | 1) << log2_denom;
- __asm__ volatile(
- "movd %0, %%mm3 \n\t"
- "movd %1, %%mm4 \n\t"
- "movd %2, %%mm5 \n\t"
- "movd %3, %%mm6 \n\t"
- "pshufw $0, %%mm3, %%mm3 \n\t"
- "pshufw $0, %%mm4, %%mm4 \n\t"
- "pshufw $0, %%mm5, %%mm5 \n\t"
- "pxor %%mm7, %%mm7 \n\t"
- :: "g"(weightd), "g"(weights), "g"(offset), "g"(log2_denom+1)
- );
- for(y=0; y<h; y++){
- for(x=0; x<w; x+=4){
- __asm__ volatile(
- "movd %0, %%mm0 \n\t"
- "movd %1, %%mm1 \n\t"
- "punpcklbw %%mm7, %%mm0 \n\t"
- "punpcklbw %%mm7, %%mm1 \n\t"
- "pmullw %%mm3, %%mm0 \n\t"
- "pmullw %%mm4, %%mm1 \n\t"
- "paddsw %%mm1, %%mm0 \n\t"
- "paddsw %%mm5, %%mm0 \n\t"
- "psraw %%mm6, %%mm0 \n\t"
- "packuswb %%mm0, %%mm0 \n\t"
- "movd %%mm0, %0 \n\t"
- : "+m"(*(uint32_t*)(dst+x))
- : "m"(*(uint32_t*)(src+x))
- );
- }
- src += stride;
- dst += stride;
- }
-}
-
-#define H264_WEIGHT(W,H) \
-static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
- ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
-} \
-static void ff_h264_weight_ ## W ## x ## H ## _mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
- ff_h264_weight_WxH_mmx2(dst, stride, log2_denom, weight, offset, W, H); \
-}
-
-H264_WEIGHT(16,16)
-H264_WEIGHT(16, 8)
-H264_WEIGHT( 8,16)
-H264_WEIGHT( 8, 8)
-H264_WEIGHT( 8, 4)
-H264_WEIGHT( 4, 8)
-H264_WEIGHT( 4, 4)
-H264_WEIGHT( 4, 2)
-
diff --git a/libavcodec/i386/idct_sse2_xvid.c b/libavcodec/i386/idct_sse2_xvid.c
deleted file mode 100644
index be4f211..0000000
--- a/libavcodec/i386/idct_sse2_xvid.c
+++ /dev/null
@@ -1,394 +0,0 @@
-/*
- * XVID MPEG-4 VIDEO CODEC
- * - SSE2 inverse discrete cosine transform -
- *
- * Copyright(C) 2003 Pascal Massimino <skal at planet-d.net>
- *
- * Conversion to gcc syntax with modifications
- * by Alexander Strange <astrange at ithinksw.com>
- *
- * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
- *
- * This file is part of FFmpeg.
- *
- * Vertical pass is an implementation of the scheme:
- * Loeffler C., Ligtenberg A., and Moschytz C.S.:
- * Practical Fast 1D DCT Algorithm with Eleven Multiplications,
- * Proc. ICASSP 1989, 988-991.
- *
- * Horizontal pass is a double 4x4 vector/matrix multiplication,
- * (see also Intel's Application Note 922:
- * http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
- * Copyright (C) 1999 Intel Corporation)
- *
- * More details at http://skal.planet-d.net/coding/dct.html
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with FFmpeg; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavcodec/dsputil.h"
-#include "libavcodec/i386/idct_xvid.h"
-
-/*!
- * @file idct_sse2_xvid.c
- * @brief SSE2 idct compatible with xvidmmx
- */
-
-#define X8(x) x,x,x,x,x,x,x,x
-
-#define ROW_SHIFT 11
-#define COL_SHIFT 6
-
-DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16)
-DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
-DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1
-DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2)
-DECLARE_ASM_CONST(8, uint8_t, m127[]) = {X8(127)};
-
-DECLARE_ASM_CONST(16, int16_t, iTab1[]) = {
- 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
- 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
- 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
- 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
-};
-
-DECLARE_ASM_CONST(16, int16_t, iTab2[]) = {
- 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
- 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
- 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
- 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
-};
-
-DECLARE_ASM_CONST(16, int16_t, iTab3[]) = {
- 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
- 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
- 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
- 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
-};
-
-DECLARE_ASM_CONST(16, int16_t, iTab4[]) = {
- 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
- 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
- 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
- 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
-};
-
-DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = {
- 65536, 65536, 65536, 65536,
- 3597, 3597, 3597, 3597,
- 2260, 2260, 2260, 2260,
- 1203, 1203, 1203, 1203,
- 120, 120, 120, 120,
- 512, 512, 512, 512
-};
-
-// Temporary storage before the column pass
-#define ROW1 "%%xmm6"
-#define ROW3 "%%xmm4"
-#define ROW5 "%%xmm5"
-#define ROW7 "%%xmm7"
-
-#define CLEAR_ODD(r) "pxor "r","r" \n\t"
-#define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t"
-
-#ifdef ARCH_X86_64
-
-# define ROW0 "%%xmm8"
-# define REG0 ROW0
-# define ROW2 "%%xmm9"
-# define REG2 ROW2
-# define ROW4 "%%xmm10"
-# define REG4 ROW4
-# define ROW6 "%%xmm11"
-# define REG6 ROW6
-# define CLEAR_EVEN(r) CLEAR_ODD(r)
-# define PUT_EVEN(dst) PUT_ODD(dst)
-# define XMMS "%%xmm12"
-# define MOV_32_ONLY "#"
-# define SREG2 REG2
-# define TAN3 "%%xmm13"
-# define TAN1 "%%xmm14"
-
-#else
-
-# define ROW0 "(%0)"
-# define REG0 "%%xmm4"
-# define ROW2 "2*16(%0)"
-# define REG2 "%%xmm4"
-# define ROW4 "4*16(%0)"
-# define REG4 "%%xmm6"
-# define ROW6 "6*16(%0)"
-# define REG6 "%%xmm6"
-# define CLEAR_EVEN(r)
-# define PUT_EVEN(dst) \
- "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \
- "movdqa %%xmm2, "dst" \n\t"
-# define XMMS "%%xmm2"
-# define MOV_32_ONLY "movdqa "
-# define SREG2 "%%xmm7"
-# define TAN3 "%%xmm0"
-# define TAN1 "%%xmm2"
-
-#endif
-
-#define ROUND(x) "paddd "MANGLE(x)
-
-#define JZ(reg, to) \
- "testl "reg","reg" \n\t" \
- "jz "to" \n\t"
-
-#define JNZ(reg, to) \
- "testl "reg","reg" \n\t" \
- "jnz "to" \n\t"
-
-#define TEST_ONE_ROW(src, reg, clear) \
- clear \
- "movq "src", %%mm1 \n\t" \
- "por 8+"src", %%mm1 \n\t" \
- "paddusb %%mm0, %%mm1 \n\t" \
- "pmovmskb %%mm1, "reg" \n\t"
-
-#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \
- clear1 \
- clear2 \
- "movq "row1", %%mm1 \n\t" \
- "por 8+"row1", %%mm1 \n\t" \
- "movq "row2", %%mm2 \n\t" \
- "por 8+"row2", %%mm2 \n\t" \
- "paddusb %%mm0, %%mm1 \n\t" \
- "paddusb %%mm0, %%mm2 \n\t" \
- "pmovmskb %%mm1, "reg1" \n\t" \
- "pmovmskb %%mm2, "reg2" \n\t"
-
-///IDCT pass on rows.
-#define iMTX_MULT(src, table, rounder, put) \
- "movdqa "src", %%xmm3 \n\t" \
- "movdqa %%xmm3, %%xmm0 \n\t" \
- "pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \
- "punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \
- "pmaddwd "table", %%xmm0 \n\t" \
- "pmaddwd 16+"table", %%xmm1 \n\t" \
- "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \
- "punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \
- "pmaddwd 32+"table", %%xmm2 \n\t" \
- "pmaddwd 48+"table", %%xmm3 \n\t" \
- "paddd %%xmm1, %%xmm0 \n\t" \
- "paddd %%xmm3, %%xmm2 \n\t" \
- rounder", %%xmm0 \n\t" \
- "movdqa %%xmm2, %%xmm3 \n\t" \
- "paddd %%xmm0, %%xmm2 \n\t" \
- "psubd %%xmm3, %%xmm0 \n\t" \
- "psrad $11, %%xmm2 \n\t" \
- "psrad $11, %%xmm0 \n\t" \
- "packssdw %%xmm0, %%xmm2 \n\t" \
- put \
- "1: \n\t"
-
-#define iLLM_HEAD \
- "movdqa "MANGLE(tan3)", "TAN3" \n\t" \
- "movdqa "MANGLE(tan1)", "TAN1" \n\t" \
-
-///IDCT pass on columns.
-#define iLLM_PASS(dct) \
- "movdqa "TAN3", %%xmm1 \n\t" \
- "movdqa "TAN1", %%xmm3 \n\t" \
- "pmulhw %%xmm4, "TAN3" \n\t" \
- "pmulhw %%xmm5, %%xmm1 \n\t" \
- "paddsw %%xmm4, "TAN3" \n\t" \
- "paddsw %%xmm5, %%xmm1 \n\t" \
- "psubsw %%xmm5, "TAN3" \n\t" \
- "paddsw %%xmm4, %%xmm1 \n\t" \
- "pmulhw %%xmm7, %%xmm3 \n\t" \
- "pmulhw %%xmm6, "TAN1" \n\t" \
- "paddsw %%xmm6, %%xmm3 \n\t" \
- "psubsw %%xmm7, "TAN1" \n\t" \
- "movdqa %%xmm3, %%xmm7 \n\t" \
- "movdqa "TAN1", %%xmm6 \n\t" \
- "psubsw %%xmm1, %%xmm3 \n\t" \
- "psubsw "TAN3", "TAN1" \n\t" \
- "paddsw %%xmm7, %%xmm1 \n\t" \
- "paddsw %%xmm6, "TAN3" \n\t" \
- "movdqa %%xmm3, %%xmm6 \n\t" \
- "psubsw "TAN3", %%xmm3 \n\t" \
- "paddsw %%xmm6, "TAN3" \n\t" \
- "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
- "pmulhw %%xmm4, %%xmm3 \n\t" \
- "pmulhw %%xmm4, "TAN3" \n\t" \
- "paddsw "TAN3", "TAN3" \n\t" \
- "paddsw %%xmm3, %%xmm3 \n\t" \
- "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \
- MOV_32_ONLY ROW2", "REG2" \n\t" \
- MOV_32_ONLY ROW6", "REG6" \n\t" \
- "movdqa %%xmm7, %%xmm5 \n\t" \
- "pmulhw "REG6", %%xmm7 \n\t" \
- "pmulhw "REG2", %%xmm5 \n\t" \
- "paddsw "REG2", %%xmm7 \n\t" \
- "psubsw "REG6", %%xmm5 \n\t" \
- MOV_32_ONLY ROW0", "REG0" \n\t" \
- MOV_32_ONLY ROW4", "REG4" \n\t" \
- MOV_32_ONLY" "TAN1", (%0) \n\t" \
- "movdqa "REG0", "XMMS" \n\t" \
- "psubsw "REG4", "REG0" \n\t" \
- "paddsw "XMMS", "REG4" \n\t" \
- "movdqa "REG4", "XMMS" \n\t" \
- "psubsw %%xmm7, "REG4" \n\t" \
- "paddsw "XMMS", %%xmm7 \n\t" \
- "movdqa "REG0", "XMMS" \n\t" \
- "psubsw %%xmm5, "REG0" \n\t" \
- "paddsw "XMMS", %%xmm5 \n\t" \
- "movdqa %%xmm5, "XMMS" \n\t" \
- "psubsw "TAN3", %%xmm5 \n\t" \
- "paddsw "XMMS", "TAN3" \n\t" \
- "movdqa "REG0", "XMMS" \n\t" \
- "psubsw %%xmm3, "REG0" \n\t" \
- "paddsw "XMMS", %%xmm3 \n\t" \
- MOV_32_ONLY" (%0), "TAN1" \n\t" \
- "psraw $6, %%xmm5 \n\t" \
- "psraw $6, "REG0" \n\t" \
- "psraw $6, "TAN3" \n\t" \
- "psraw $6, %%xmm3 \n\t" \
- "movdqa "TAN3", 1*16("dct") \n\t" \
- "movdqa %%xmm3, 2*16("dct") \n\t" \
- "movdqa "REG0", 5*16("dct") \n\t" \
- "movdqa %%xmm5, 6*16("dct") \n\t" \
- "movdqa %%xmm7, %%xmm0 \n\t" \
- "movdqa "REG4", %%xmm4 \n\t" \
- "psubsw %%xmm1, %%xmm7 \n\t" \
- "psubsw "TAN1", "REG4" \n\t" \
- "paddsw %%xmm0, %%xmm1 \n\t" \
- "paddsw %%xmm4, "TAN1" \n\t" \
- "psraw $6, %%xmm1 \n\t" \
- "psraw $6, %%xmm7 \n\t" \
- "psraw $6, "TAN1" \n\t" \
- "psraw $6, "REG4" \n\t" \
- "movdqa %%xmm1, ("dct") \n\t" \
- "movdqa "TAN1", 3*16("dct") \n\t" \
- "movdqa "REG4", 4*16("dct") \n\t" \
- "movdqa %%xmm7, 7*16("dct") \n\t"
-
-///IDCT pass on columns, assuming rows 4-7 are zero.
-#define iLLM_PASS_SPARSE(dct) \
- "pmulhw %%xmm4, "TAN3" \n\t" \
- "paddsw %%xmm4, "TAN3" \n\t" \
- "movdqa %%xmm6, %%xmm3 \n\t" \
- "pmulhw %%xmm6, "TAN1" \n\t" \
- "movdqa %%xmm4, %%xmm1 \n\t" \
- "psubsw %%xmm1, %%xmm3 \n\t" \
- "paddsw %%xmm6, %%xmm1 \n\t" \
- "movdqa "TAN1", %%xmm6 \n\t" \
- "psubsw "TAN3", "TAN1" \n\t" \
- "paddsw %%xmm6, "TAN3" \n\t" \
- "movdqa %%xmm3, %%xmm6 \n\t" \
- "psubsw "TAN3", %%xmm3 \n\t" \
- "paddsw %%xmm6, "TAN3" \n\t" \
- "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
- "pmulhw %%xmm4, %%xmm3 \n\t" \
- "pmulhw %%xmm4, "TAN3" \n\t" \
- "paddsw "TAN3", "TAN3" \n\t" \
- "paddsw %%xmm3, %%xmm3 \n\t" \
- "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \
- MOV_32_ONLY ROW2", "SREG2" \n\t" \
- "pmulhw "SREG2", %%xmm5 \n\t" \
- MOV_32_ONLY ROW0", "REG0" \n\t" \
- "movdqa "REG0", %%xmm6 \n\t" \
- "psubsw "SREG2", %%xmm6 \n\t" \
- "paddsw "REG0", "SREG2" \n\t" \
- MOV_32_ONLY" "TAN1", (%0) \n\t" \
- "movdqa "REG0", "XMMS" \n\t" \
- "psubsw %%xmm5, "REG0" \n\t" \
- "paddsw "XMMS", %%xmm5 \n\t" \
- "movdqa %%xmm5, "XMMS" \n\t" \
- "psubsw "TAN3", %%xmm5 \n\t" \
- "paddsw "XMMS", "TAN3" \n\t" \
- "movdqa "REG0", "XMMS" \n\t" \
- "psubsw %%xmm3, "REG0" \n\t" \
- "paddsw "XMMS", %%xmm3 \n\t" \
- MOV_32_ONLY" (%0), "TAN1" \n\t" \
- "psraw $6, %%xmm5 \n\t" \
- "psraw $6, "REG0" \n\t" \
- "psraw $6, "TAN3" \n\t" \
- "psraw $6, %%xmm3 \n\t" \
- "movdqa "TAN3", 1*16("dct") \n\t" \
- "movdqa %%xmm3, 2*16("dct") \n\t" \
- "movdqa "REG0", 5*16("dct") \n\t" \
- "movdqa %%xmm5, 6*16("dct") \n\t" \
- "movdqa "SREG2", %%xmm0 \n\t" \
- "movdqa %%xmm6, %%xmm4 \n\t" \
- "psubsw %%xmm1, "SREG2" \n\t" \
- "psubsw "TAN1", %%xmm6 \n\t" \
- "paddsw %%xmm0, %%xmm1 \n\t" \
- "paddsw %%xmm4, "TAN1" \n\t" \
- "psraw $6, %%xmm1 \n\t" \
- "psraw $6, "SREG2" \n\t" \
- "psraw $6, "TAN1" \n\t" \
- "psraw $6, %%xmm6 \n\t" \
- "movdqa %%xmm1, ("dct") \n\t" \
- "movdqa "TAN1", 3*16("dct") \n\t" \
- "movdqa %%xmm6, 4*16("dct") \n\t" \
- "movdqa "SREG2", 7*16("dct") \n\t"
-
-inline void ff_idct_xvid_sse2(short *block)
-{
- __asm__ volatile(
- "movq "MANGLE(m127)", %%mm0 \n\t"
- iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))
- iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
- iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))
-
- TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
- JZ("%%eax", "1f")
- iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))
-
- TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
- TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
- iLLM_HEAD
- ASMALIGN(4)
- JNZ("%%ecx", "2f")
- JNZ("%%eax", "3f")
- JNZ("%%edx", "4f")
- JNZ("%%esi", "5f")
- iLLM_PASS_SPARSE("%0")
- "jmp 6f \n\t"
- "2: \n\t"
- iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))
- "3: \n\t"
- iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))
- JZ("%%edx", "1f")
- "4: \n\t"
- iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))
- JZ("%%esi", "1f")
- "5: \n\t"
- iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))
-#ifndef ARCH_X86_64
- iLLM_HEAD
-#endif
- iLLM_PASS("%0")
- "6: \n\t"
- : "+r"(block)
- :
- : "%eax", "%ecx", "%edx", "%esi", "memory");
-}
-
-void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
-{
- ff_idct_xvid_sse2(block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-
-void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
-{
- ff_idct_xvid_sse2(block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
diff --git a/libavcodec/i386/idct_xvid.h b/libavcodec/i386/idct_xvid.h
deleted file mode 100644
index 0bf45d5..0000000
--- a/libavcodec/i386/idct_xvid.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * XVID MPEG-4 VIDEO CODEC
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-/*!
- * @file idct_xvid.h
- * header for Xvid IDCT functions
- */
-
-#ifndef AVCODEC_I386_IDCT_XVID_H
-#define AVCODEC_I386_IDCT_XVID_H
-
-#include <stdint.h>
-
-void ff_idct_xvid_mmx(short *block);
-void ff_idct_xvid_mmx2(short *block);
-void ff_idct_xvid_sse2(short *block);
-void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block);
-void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block);
-
-#endif /* AVCODEC_I386_IDCT_XVID_H */
diff --git a/libavcodec/i386/mathops.h b/libavcodec/i386/mathops.h
deleted file mode 100644
index 2ae24fc..0000000
--- a/libavcodec/i386/mathops.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * simple math operations
- * Copyright (c) 2006 Michael Niedermayer <michaelni at gmx.at> et al
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_I386_MATHOPS_H
-#define AVCODEC_I386_MATHOPS_H
-
-#ifdef FRAC_BITS
-# define MULL(ra, rb) \
- ({ int rt, dummy; __asm__ (\
- "imull %3 \n\t"\
- "shrdl %4, %%edx, %%eax \n\t"\
- : "=a"(rt), "=d"(dummy)\
- : "a" ((int)ra), "rm" ((int)rb), "i"(FRAC_BITS));\
- rt; })
-#endif
-
-#define MULH(ra, rb) \
- ({ int rt, dummy;\
- __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\
- rt; })
-
-#define MUL64(ra, rb) \
- ({ int64_t rt;\
- __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\
- rt; })
-
-#endif /* AVCODEC_I386_MATHOPS_H */
diff --git a/libavcodec/i386/mmx.h b/libavcodec/i386/mmx.h
deleted file mode 100644
index fb5c2d4..0000000
--- a/libavcodec/i386/mmx.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * mmx.h
- * Copyright (C) 1997-2001 H. Dietz and R. Fisher
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef AVCODEC_I386_MMX_H
-#define AVCODEC_I386_MMX_H
-
-#warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
-
-/*
- * The type of an value that fits in an MMX register (note that long
- * long constant values MUST be suffixed by LL and unsigned long long
- * values by ULL, lest they be truncated by the compiler)
- */
-
-typedef union {
- long long q; /* Quadword (64-bit) value */
- unsigned long long uq; /* Unsigned Quadword */
- int d[2]; /* 2 Doubleword (32-bit) values */
- unsigned int ud[2]; /* 2 Unsigned Doubleword */
- short w[4]; /* 4 Word (16-bit) values */
- unsigned short uw[4]; /* 4 Unsigned Word */
- char b[8]; /* 8 Byte (8-bit) values */
- unsigned char ub[8]; /* 8 Unsigned Byte */
- float s[2]; /* Single-precision (32-bit) value */
-} mmx_t; /* On an 8-byte (64-bit) boundary */
-
-
-#define mmx_i2r(op,imm,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_m2r(op,mem,reg) \
- __asm__ volatile (#op " %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem))
-
-#define mmx_r2m(op,reg,mem) \
- __asm__ volatile (#op " %%" #reg ", %0" \
- : "=m" (mem) \
- : /* nothing */ )
-
-#define mmx_r2r(op,regs,regd) \
- __asm__ volatile (#op " %" #regs ", %" #regd)
-
-
-#define emms() __asm__ volatile ("emms")
-
-#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
-#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
-#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
-
-#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
-#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
-#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
-
-#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
-#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
-#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
-#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
-
-#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
-#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
-
-#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
-#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
-#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
-#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
-#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
-#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
-
-#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
-#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
-#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
-#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
-
-#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
-#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
-#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
-#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
-
-#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
-#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
-
-#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
-#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
-
-#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
-#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
-#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
-#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
-#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
-#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
-
-#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
-#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
-#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
-#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
-#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
-#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
-
-#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
-#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
-
-#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
-#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
-
-#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
-#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
-
-#define por_m2r(var,reg) mmx_m2r (por, var, reg)
-#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
-
-#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
-#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
-#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
-#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
-#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
-#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
-#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
-#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
-#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
-
-#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
-#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
-#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
-#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
-#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
-#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
-
-#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
-#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
-#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
-#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
-#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
-#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
-#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
-#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
-#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
-
-#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
-#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
-#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
-#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
-#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
-#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
-
-#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
-#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
-#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
-#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
-
-#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
-#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
-#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
-#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
-
-#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
-#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
-#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
-#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
-#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
-#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
-
-#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
-#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
-#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
-#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
-#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
-#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
-
-#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
-#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
-
-
-/* 3DNOW extensions */
-
-#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
-#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
-
-
-/* AMD MMX extensions - also available in intel SSE */
-
-
-#define mmx_m2ri(op,mem,reg,imm) \
- __asm__ volatile (#op " %1, %0, %%" #reg \
- : /* nothing */ \
- : "m" (mem), "i" (imm))
-#define mmx_r2ri(op,regs,regd,imm) \
- __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
- : /* nothing */ \
- : "i" (imm) )
-
-#define mmx_fetch(mem,hint) \
- __asm__ volatile ("prefetch" #hint " %0" \
- : /* nothing */ \
- : "m" (mem))
-
-
-#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
-
-#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
-
-#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
-#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
-#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
-#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
-
-#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
-
-#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
-
-#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
-#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
-
-#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
-#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
-
-#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
-#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
-
-#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
-#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
-
-#define pmovmskb(mmreg,reg) \
- __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
-
-#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
-#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
-
-#define prefetcht0(mem) mmx_fetch (mem, t0)
-#define prefetcht1(mem) mmx_fetch (mem, t1)
-#define prefetcht2(mem) mmx_fetch (mem, t2)
-#define prefetchnta(mem) mmx_fetch (mem, nta)
-
-#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
-#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
-
-#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
-#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
-
-#define sfence() __asm__ volatile ("sfence\n\t")
-
-/* SSE2 */
-#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
-#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
-#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
-#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
-
-#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
-
-#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
-#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
-#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
-#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
-#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
-#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
-
-#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
-
-#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
-#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
-
-#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
-#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
-
-
-#endif /* AVCODEC_I386_MMX_H */
diff --git a/libavcodec/i386/simple_idct_mmx.c b/libavcodec/i386/simple_idct_mmx.c
deleted file mode 100644
index 6306fcb..0000000
--- a/libavcodec/i386/simple_idct_mmx.c
+++ /dev/null
@@ -1,1294 +0,0 @@
-/*
- * Simple IDCT MMX
- *
- * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni at gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "libavcodec/dsputil.h"
-#include "libavcodec/simple_idct.h"
-
-/*
-23170.475006
-22725.260826
-21406.727617
-19265.545870
-16384.000000
-12872.826198
-8866.956905
-4520.335430
-*/
-#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#if 0
-#define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#else
-#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
-#endif
-#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-
-#define ROW_SHIFT 11
-#define COL_SHIFT 20 // 6
-
-DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
-DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
-
-DECLARE_ALIGNED(8, static const int16_t, coeffs[])= {
- 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
-// 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
-// 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
- 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
- // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
-// 0, 0, 0, 0,
-// 0, 0, 0, 0,
-
- C4, C4, C4, C4,
- C4, -C4, C4, -C4,
-
- C2, C6, C2, C6,
- C6, -C2, C6, -C2,
-
- C1, C3, C1, C3,
- C5, C7, C5, C7,
-
- C3, -C7, C3, -C7,
--C1, -C5, -C1, -C5,
-
- C5, -C1, C5, -C1,
- C7, C3, C7, C3,
-
- C7, -C5, C7, -C5,
- C3, -C1, C3, -C1
-};
-
-#if 0
-static void unused_var_killer(){
- int a= wm1010 + d40000;
- temp[0]=a;
-}
-
-static void inline idctCol (int16_t * col, int16_t *input)
-{
-#undef C0
-#undef C1
-#undef C2
-#undef C3
-#undef C4
-#undef C5
-#undef C6
-#undef C7
- int a0, a1, a2, a3, b0, b1, b2, b3;
- const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-/*
- if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
- col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
- col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
- return;
- }*/
-
-col[8*0] = input[8*0 + 0];
-col[8*1] = input[8*2 + 0];
-col[8*2] = input[8*0 + 1];
-col[8*3] = input[8*2 + 1];
-col[8*4] = input[8*4 + 0];
-col[8*5] = input[8*6 + 0];
-col[8*6] = input[8*4 + 1];
-col[8*7] = input[8*6 + 1];
-
- a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
- a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
- a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
- a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
-
- b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
- b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
- b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
- b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
-
- col[8*0] = (a0 + b0) >> COL_SHIFT;
- col[8*1] = (a1 + b1) >> COL_SHIFT;
- col[8*2] = (a2 + b2) >> COL_SHIFT;
- col[8*3] = (a3 + b3) >> COL_SHIFT;
- col[8*4] = (a3 - b3) >> COL_SHIFT;
- col[8*5] = (a2 - b2) >> COL_SHIFT;
- col[8*6] = (a1 - b1) >> COL_SHIFT;
- col[8*7] = (a0 - b0) >> COL_SHIFT;
-}
-
-static void inline idctRow (int16_t * output, int16_t * input)
-{
- int16_t row[8];
-
- int a0, a1, a2, a3, b0, b1, b2, b3;
- const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
- const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-
-row[0] = input[0];
-row[2] = input[1];
-row[4] = input[4];
-row[6] = input[5];
-row[1] = input[8];
-row[3] = input[9];
-row[5] = input[12];
-row[7] = input[13];
-
- if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
- row[0] = row[1] = row[2] = row[3] = row[4] =
- row[5] = row[6] = row[7] = row[0]<<3;
- output[0] = row[0];
- output[2] = row[1];
- output[4] = row[2];
- output[6] = row[3];
- output[8] = row[4];
- output[10] = row[5];
- output[12] = row[6];
- output[14] = row[7];
- return;
- }
-
- a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
- a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
- a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
- a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
-
- b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
- b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
- b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
- b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
-
- row[0] = (a0 + b0) >> ROW_SHIFT;
- row[1] = (a1 + b1) >> ROW_SHIFT;
- row[2] = (a2 + b2) >> ROW_SHIFT;
- row[3] = (a3 + b3) >> ROW_SHIFT;
- row[4] = (a3 - b3) >> ROW_SHIFT;
- row[5] = (a2 - b2) >> ROW_SHIFT;
- row[6] = (a1 - b1) >> ROW_SHIFT;
- row[7] = (a0 - b0) >> ROW_SHIFT;
-
- output[0] = row[0];
- output[2] = row[1];
- output[4] = row[2];
- output[6] = row[3];
- output[8] = row[4];
- output[10] = row[5];
- output[12] = row[6];
- output[14] = row[7];
-}
-#endif
-
-static inline void idct(int16_t *block)
-{
- DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
- int16_t * const temp= (int16_t*)align_tmp;
-
- __asm__ volatile(
-#if 0 //Alternative, simpler variant
-
-#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- #rounder ", %%mm4 \n\t"\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm0, %%mm0 \n\t" \
- "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
- "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
- "movq %%mm7, " #dst " \n\t"\
- "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "movq %%mm2, 24+" #dst " \n\t"\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
- "movq %%mm2, 8+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
- "movq %%mm4, 16+" #dst " \n\t"\
-
-#define COL_IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
- "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
- "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm7, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm2, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm2, 32+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm4, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"\
-
-
-#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq "MANGLE(wm1010)", %%mm4 \n\t"\
- "pand %%mm0, %%mm4 \n\t"\
- "por %%mm1, %%mm4 \n\t"\
- "por %%mm2, %%mm4 \n\t"\
- "por %%mm3, %%mm4 \n\t"\
- "packssdw %%mm4,%%mm4 \n\t"\
- "movd %%mm4, %%eax \n\t"\
- "orl %%eax, %%eax \n\t"\
- "jz 1f \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- #rounder ", %%mm4 \n\t"\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm0, %%mm0 \n\t" \
- "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
- "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
- "movq %%mm7, " #dst " \n\t"\
- "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "movq %%mm2, 24+" #dst " \n\t"\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
- "movq %%mm2, 8+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
- "movq %%mm4, 16+" #dst " \n\t"\
- "jmp 2f \n\t"\
- "1: \n\t"\
- "pslld $16, %%mm0 \n\t"\
- "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
- "psrad $13, %%mm0 \n\t"\
- "packssdw %%mm0, %%mm0 \n\t"\
- "movq %%mm0, " #dst " \n\t"\
- "movq %%mm0, 8+" #dst " \n\t"\
- "movq %%mm0, 16+" #dst " \n\t"\
- "movq %%mm0, 24+" #dst " \n\t"\
- "2: \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
-/*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
-ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
-ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
-
-DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
-DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
-DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
-
-#else
-
-#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq "MANGLE(wm1010)", %%mm4 \n\t"\
- "pand %%mm0, %%mm4 \n\t"\
- "por %%mm1, %%mm4 \n\t"\
- "por %%mm2, %%mm4 \n\t"\
- "por %%mm3, %%mm4 \n\t"\
- "packssdw %%mm4,%%mm4 \n\t"\
- "movd %%mm4, %%eax \n\t"\
- "orl %%eax, %%eax \n\t"\
- "jz 1f \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- #rounder ", %%mm4 \n\t"\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm0, %%mm0 \n\t" \
- "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
- "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
- "movq %%mm7, " #dst " \n\t"\
- "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "movq %%mm2, 24+" #dst " \n\t"\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
- "movq %%mm2, 8+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
- "movq %%mm4, 16+" #dst " \n\t"\
- "jmp 2f \n\t"\
- "1: \n\t"\
- "pslld $16, %%mm0 \n\t"\
- "paddd "MANGLE(d40000)", %%mm0 \n\t"\
- "psrad $13, %%mm0 \n\t"\
- "packssdw %%mm0, %%mm0 \n\t"\
- "movq %%mm0, " #dst " \n\t"\
- "movq %%mm0, 8+" #dst " \n\t"\
- "movq %%mm0, 16+" #dst " \n\t"\
- "movq %%mm0, 24+" #dst " \n\t"\
- "2: \n\t"
-
-#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq %%mm0, %%mm4 \n\t"\
- "por %%mm1, %%mm4 \n\t"\
- "por %%mm2, %%mm4 \n\t"\
- "por %%mm3, %%mm4 \n\t"\
- "packssdw %%mm4,%%mm4 \n\t"\
- "movd %%mm4, %%eax \n\t"\
- "orl %%eax, %%eax \n\t"\
- "jz " #bt " \n\t"\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- #rounder ", %%mm4 \n\t"\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm0, %%mm0 \n\t" \
- "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
- "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
- "movq %%mm7, " #dst " \n\t"\
- "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "movq %%mm2, 24+" #dst " \n\t"\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
- "movq %%mm2, 8+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
- "movq %%mm4, 16+" #dst " \n\t"\
-
-#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- #rounder ", %%mm4 \n\t"\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- #rounder ", %%mm0 \n\t"\
- "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm0, %%mm0 \n\t" \
- "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
- "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
- "movq %%mm7, " #dst " \n\t"\
- "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "movq %%mm2, 24+" #dst " \n\t"\
- "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
- "movq %%mm2, 8+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
- "movq %%mm4, 16+" #dst " \n\t"\
-
-//IDCT( src0, src4, src1, src5, dst, rounder, shift)
-DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
-Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
-Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
-Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
-
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
- "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
- "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm7, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm2, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm2, 32+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm4, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
- "#" ASMALIGN(4) \
- "4: \n\t"
-Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
-Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
-
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
- "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
- "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm1, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm2, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm2, 32+" #dst " \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
- "#" ASMALIGN(4) \
- "6: \n\t"
-Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
-
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm1, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm2, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm2, 32+" #dst " \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm1, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
- "#" ASMALIGN(4) \
- "2: \n\t"
-Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
-
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
- "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
- "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
- "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm7, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm2, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
- "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
- "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
- "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
- "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
- "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm2 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
- "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm2, 32+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm4, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
- "#" ASMALIGN(4) \
- "3: \n\t"
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 64(%2), %%mm3 \n\t"\
- "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\
- "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm7, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm1, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\
- "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm1, 32+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "movd %%mm4, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
- "#" ASMALIGN(4) \
- "5: \n\t"
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
- "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
- "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
- "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\
- "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\
- "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\
- "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\
- "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\
- "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\
- "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\
- "psrad $" #shift ", %%mm4 \n\t"\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm3 \n\t"\
- "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\
- "movq %%mm4, " #dst " \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\
- "movq %%mm0, 16+" #dst " \n\t"\
- "movq %%mm0, 96+" #dst " \n\t"\
- "movq %%mm4, 112+" #dst " \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "psrad $" #shift ", %%mm6 \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movq %%mm5, 32+" #dst " \n\t"\
- "psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movq %%mm6, 48+" #dst " \n\t"\
- "movq %%mm6, 64+" #dst " \n\t"\
- "movq %%mm5, 80+" #dst " \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
-
- "#" ASMALIGN(4) \
- "1: \n\t"
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
- "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
- "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
- "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
- "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
- "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
- "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
- "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
- "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
- "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
- "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
- "movq 64(%2), %%mm1 \n\t"\
- "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
- "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
- "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "psrad $" #shift ", %%mm7 \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\
- "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\
- "psrad $" #shift ", %%mm0 \n\t"\
- "psrad $" #shift ", %%mm3 \n\t"\
- "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
- "movd %%mm7, " #dst " \n\t"\
- "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
- "movd %%mm0, 16+" #dst " \n\t"\
- "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\
- "movd %%mm3, 96+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
- "movd %%mm4, 112+" #dst " \n\t"\
- "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
- "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
- "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
- "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\
- "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\
- "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
- "psrad $" #shift ", %%mm3 \n\t"\
- "psrad $" #shift ", %%mm5 \n\t"\
- "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
- "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
- "psrad $" #shift ", %%mm6 \n\t"\
- "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\
- "movd %%mm3, 32+" #dst " \n\t"\
- "psrad $" #shift ", %%mm4 \n\t"\
- "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
- "movd %%mm6, 48+" #dst " \n\t"\
- "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
- "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
- "movd %%mm4, 64+" #dst " \n\t"\
- "movd %%mm5, 80+" #dst " \n\t"
-
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
- "jmp 9f \n\t"
-
-
- "#" ASMALIGN(4)
- "7: \n\t"
-#undef IDCT
-#define IDCT(src0, src4, src1, src5, dst, shift) \
- "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
- "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "psrad $" #shift ", %%mm4 \n\t"\
- "psrad $" #shift ", %%mm0 \n\t"\
- "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
- "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
- "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
- "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
- "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
- "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
- "psrad $" #shift ", %%mm1 \n\t"\
- "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\
- "movq %%mm4, " #dst " \n\t"\
- "psrad $" #shift ", %%mm2 \n\t"\
- "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\
- "movq %%mm0, 16+" #dst " \n\t"\
- "movq %%mm0, 96+" #dst " \n\t"\
- "movq %%mm4, 112+" #dst " \n\t"\
- "movq %%mm0, 32+" #dst " \n\t"\
- "movq %%mm4, 48+" #dst " \n\t"\
- "movq %%mm4, 64+" #dst " \n\t"\
- "movq %%mm0, 80+" #dst " \n\t"
-
-//IDCT( src0, src4, src1, src5, dst, shift)
-IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
-//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
-IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
-//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
-
-
-#endif
-
-/*
-Input
- 00 40 04 44 20 60 24 64
- 10 30 14 34 50 70 54 74
- 01 41 03 43 21 61 23 63
- 11 31 13 33 51 71 53 73
- 02 42 06 46 22 62 26 66
- 12 32 16 36 52 72 56 76
- 05 45 07 47 25 65 27 67
- 15 35 17 37 55 75 57 77
-
-Temp
- 00 04 10 14 20 24 30 34
- 40 44 50 54 60 64 70 74
- 01 03 11 13 21 23 31 33
- 41 43 51 53 61 63 71 73
- 02 06 12 16 22 26 32 36
- 42 46 52 56 62 66 72 76
- 05 07 15 17 25 27 35 37
- 45 47 55 57 65 67 75 77
-*/
-
-"9: \n\t"
- :: "r" (block), "r" (temp), "r" (coeffs)
- : "%eax"
- );
-}
-
-void ff_simple_idct_mmx(int16_t *block)
-{
- idct(block);
-}
-
-//FIXME merge add/put into the idct
-
-void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
-{
- idct(block);
- put_pixels_clamped_mmx(block, dest, line_size);
-}
-void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
-{
- idct(block);
- add_pixels_clamped_mmx(block, dest, line_size);
-}
diff --git a/libavcodec/i386/vp3dsp_mmx.h b/libavcodec/i386/vp3dsp_mmx.h
deleted file mode 100644
index 2e79913..0000000
--- a/libavcodec/i386/vp3dsp_mmx.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * vp3dsp MMX function declarations
- * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_I386_VP3DSP_MMX_H
-#define AVCODEC_I386_VP3DSP_MMX_H
-
-#include <stdint.h>
-#include "libavcodec/dsputil.h"
-
-void ff_vp3_idct_mmx(int16_t *data);
-void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
-
-void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
-
-#endif /* AVCODEC_I386_VP3DSP_MMX_H */
diff --git a/libavcodec/i386/vp3dsp_sse2.h b/libavcodec/i386/vp3dsp_sse2.h
deleted file mode 100644
index 55908c2..0000000
--- a/libavcodec/i386/vp3dsp_sse2.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * vp3dsp SSE2 function declarations
- * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef AVCODEC_I386_VP3DSP_SSE2_H
-#define AVCODEC_I386_VP3DSP_SSE2_H
-
-#include "libavcodec/dsputil.h"
-
-void ff_vp3_idct_sse2(int16_t *input_data);
-void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
-
-#endif /* AVCODEC_I386_VP3DSP_SSE2_H */
diff --git a/libavcodec/i386/x86inc.asm b/libavcodec/i386/x86inc.asm
deleted file mode 100644
index 54c4679..0000000
--- a/libavcodec/i386/x86inc.asm
+++ /dev/null
@@ -1,546 +0,0 @@
-;*****************************************************************************
-;* x86inc.asm
-;*****************************************************************************
-;* Copyright (C) 2005-2008 Loren Merritt <lorenm at u.washington.edu>
-;*
-;* This file is part of FFmpeg.
-;*
-;* FFmpeg is free software; you can redistribute it and/or
-;* modify it under the terms of the GNU Lesser General Public
-;* License as published by the Free Software Foundation; either
-;* version 2.1 of the License, or (at your option) any later version.
-;*
-;* FFmpeg is distributed in the hope that it will be useful,
-;* but WITHOUT ANY WARRANTY; without even the implied warranty of
-;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-;* Lesser General Public License for more details.
-;*
-;* You should have received a copy of the GNU Lesser General Public
-;* License along with FFmpeg; if not, write to the Free Software
-;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-;*****************************************************************************
-
-; FIXME: All of the 64bit asm functions that take a stride as an argument
-; via register, assume that the high dword of that register is filled with 0.
-; This is true in practice (since we never do any 64bit arithmetic on strides,
-; and x264's strides are all positive), but is not guaranteed by the ABI.
-
-; Name of the .rodata section.
-; Kludge: Something on OS X fails to align .rodata even given an align attribute,
-; so use a different read-only section.
-%macro SECTION_RODATA 0
- %ifidn __OUTPUT_FORMAT__,macho64
- SECTION .text align=16
- %elifidn __OUTPUT_FORMAT__,macho
- SECTION .text align=16
- fakegot:
- %else
- SECTION .rodata align=16
- %endif
-%endmacro
-
-; PIC support macros. All these macros are totally harmless when PIC is
-; not defined but can ruin everything if misused in PIC mode. On x86_32, shared
-; objects cannot directly access global variables by address, they need to
-; go through the GOT (global offset table). Most OSes do not care about it
-; and let you load non-shared .so objects (Linux, Win32...). However, OS X
-; requires PIC code in its .dylib objects.
-;
-; - GLOBAL should be used as a suffix for global addressing, eg.
-; picgetgot ebx
-; mov eax, [foo GLOBAL]
-; instead of
-; mov eax, [foo]
-;
-; - picgetgot computes the GOT address into the given register in PIC
-; mode, otherwise does nothing. You need to do this before using GLOBAL.
-; Before in both execution order and compiled code order (so GLOBAL knows
-; which register the GOT is in).
-
-%ifndef PIC
- %define GLOBAL
- %macro picgetgot 1
- %endmacro
-%elifdef ARCH_X86_64
- %define PIC64
- %define GLOBAL wrt rip
- %macro picgetgot 1
- %endmacro
-%else
- %define PIC32
- %ifidn __OUTPUT_FORMAT__,macho
- ; There is no real global offset table on OS X, but we still
- ; need to reference our variables by offset.
- %macro picgetgot 1
- call %%getgot
- %%getgot:
- pop %1
- add %1, $$ - %%getgot
- %undef GLOBAL
- %define GLOBAL + %1 - fakegot
- %endmacro
- %else ; elf
- extern _GLOBAL_OFFSET_TABLE_
- %macro picgetgot 1
- call %%getgot
- %%getgot:
- pop %1
- add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%getgot wrt ..gotpc
- %undef GLOBAL
- %define GLOBAL + %1 wrt ..gotoff
- %endmacro
- %endif
-%endif
-
-; Macros to eliminate most code duplication between x86_32 and x86_64:
-; Currently this works only for leaf functions which load all their arguments
-; into registers at the start, and make no other use of the stack. Luckily that
-; covers most of x264's asm.
-
-; PROLOGUE:
-; %1 = number of arguments. loads them from stack if needed.
-; %2 = number of registers used, not including PIC. pushes callee-saved regs if needed.
-; %3 = whether global constants are used in this function. inits x86_32 PIC if needed.
-; %4 = list of names to define to registers
-; PROLOGUE can also be invoked by adding the same options to cglobal
-
-; e.g.
-; cglobal foo, 2,3,0, dst, src, tmp
-; declares a function (foo), taking two args (dst and src), one local variable (tmp), and not using globals
-
-; TODO Some functions can use some args directly from the stack. If they're the
-; last args then you can just not declare them, but if they're in the middle
-; we need more flexible macro.
-
-; RET:
-; Pops anything that was pushed by PROLOGUE
-
-; REP_RET:
-; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
-; which are slow when a normal ret follows a branch.
-
-%macro DECLARE_REG 6
- %define r%1q %2
- %define r%1d %3
- %define r%1w %4
- %define r%1b %5
- %define r%1m %6
- %define r%1 %2
-%endmacro
-
-%macro DECLARE_REG_SIZE 2
- %define r%1q r%1
- %define e%1q r%1
- %define r%1d e%1
- %define e%1d e%1
- %define r%1w %1
- %define e%1w %1
- %define r%1b %2
- %define e%1b %2
-%ifndef ARCH_X86_64
- %define r%1 e%1
-%endif
-%endmacro
-
-DECLARE_REG_SIZE ax, al
-DECLARE_REG_SIZE bx, bl
-DECLARE_REG_SIZE cx, cl
-DECLARE_REG_SIZE dx, dl
-DECLARE_REG_SIZE si, sil
-DECLARE_REG_SIZE di, dil
-DECLARE_REG_SIZE bp, bpl
-
-%ifdef ARCH_X86_64
- %define gprsize 8
-%else
- %define gprsize 4
-%endif
-
-%macro PUSH 1
- push %1
- %assign stack_offset stack_offset+gprsize
-%endmacro
-
-%macro POP 1
- pop %1
- %assign stack_offset stack_offset-gprsize
-%endmacro
-
-%macro SUB 2
- sub %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset+(%2)
- %endif
-%endmacro
-
-%macro ADD 2
- add %1, %2
- %ifidn %1, rsp
- %assign stack_offset stack_offset-(%2)
- %endif
-%endmacro
-
-%macro movifnidn 2
- %ifnidn %1, %2
- mov %1, %2
- %endif
-%endmacro
-
-%macro movsxdifnidn 2
- %ifnidn %1, %2
- movsxd %1, %2
- %endif
-%endmacro
-
-%macro ASSERT 1
- %if (%1) == 0
- %error assert failed
- %endif
-%endmacro
-
-%macro DEFINE_ARGS 0-*
- %ifdef n_arg_names
- %assign %%i 0
- %rep n_arg_names
- CAT_UNDEF arg_name %+ %%i, q
- CAT_UNDEF arg_name %+ %%i, d
- CAT_UNDEF arg_name %+ %%i, w
- CAT_UNDEF arg_name %+ %%i, b
- CAT_UNDEF arg_name, %%i
- %assign %%i %%i+1
- %endrep
- %endif
-
- %assign %%i 0
- %rep %0
- %xdefine %1q r %+ %%i %+ q
- %xdefine %1d r %+ %%i %+ d
- %xdefine %1w r %+ %%i %+ w
- %xdefine %1b r %+ %%i %+ b
- CAT_XDEFINE arg_name, %%i, %1
- %assign %%i %%i+1
- %rotate 1
- %endrep
- %assign n_arg_names %%i
-%endmacro
-
-%ifdef ARCH_X86_64 ;==========================================================
-%ifidn __OUTPUT_FORMAT__,win32
-
-DECLARE_REG 0, rcx, ecx, cx, cl, ecx
-DECLARE_REG 1, rdx, edx, dx, dl, edx
-DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
-DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
-DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
-DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
-DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
-%define r7m [rsp + stack_offset + 64]
-%define r8m [rsp + stack_offset + 72]
-
-%macro LOAD_IF_USED 2 ; reg_id, number_of_args
- %if %1 < %2
- mov r%1, [rsp + 8 + %1*8]
- %endif
-%endmacro
-
-%else ;=======================================================================
-
-DECLARE_REG 0, rdi, edi, di, dil, edi
-DECLARE_REG 1, rsi, esi, si, sil, esi
-DECLARE_REG 2, rdx, edx, dx, dl, edx
-DECLARE_REG 3, rcx, ecx, cx, cl, ecx
-DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
-DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
-DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
-%define r7m [rsp + stack_offset + 16]
-%define r8m [rsp + stack_offset + 24]
-
-%macro LOAD_IF_USED 2 ; reg_id, number_of_args
- %if %1 < %2
- mov r%1, [rsp - 40 + %1*8]
- %endif
-%endmacro
-
-%endif ; !WIN64
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
- ASSERT %2 >= %1
- ASSERT %2 <= 7
- %assign stack_offset 0
-%ifidn __OUTPUT_FORMAT__,win32
- LOAD_IF_USED 4, %1
- LOAD_IF_USED 5, %1
-%endif
- LOAD_IF_USED 6, %1
- DEFINE_ARGS %4
-%endmacro
-
-%macro RET 0
- ret
-%endmacro
-
-%macro REP_RET 0
- rep ret
-%endmacro
-
-%else ; X86_32 ;==============================================================
-
-DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
-DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
-DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
-DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
-DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
-DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
-DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
-%define r7m [esp + stack_offset + 32]
-%define r8m [esp + stack_offset + 36]
-%define rsp esp
-
-%macro PUSH_IF_USED 1 ; reg_id
- %if %1 < regs_used
- push r%1
- %assign stack_offset stack_offset+4
- %endif
-%endmacro
-
-%macro POP_IF_USED 1 ; reg_id
- %if %1 < regs_used
- pop r%1
- %endif
-%endmacro
-
-%macro LOAD_IF_USED 2 ; reg_id, number_of_args
- %if %1 < %2
- mov r%1, [esp + stack_offset + 4 + %1*4]
- %endif
-%endmacro
-
-%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
- ASSERT %2 >= %1
- %assign stack_offset 0
- %assign regs_used %2
- %ifdef PIC
- %if %3
- %assign regs_used regs_used+1
- %endif
- %endif
- ASSERT regs_used <= 7
- PUSH_IF_USED 3
- PUSH_IF_USED 4
- PUSH_IF_USED 5
- PUSH_IF_USED 6
- LOAD_IF_USED 0, %1
- LOAD_IF_USED 1, %1
- LOAD_IF_USED 2, %1
- LOAD_IF_USED 3, %1
- LOAD_IF_USED 4, %1
- LOAD_IF_USED 5, %1
- LOAD_IF_USED 6, %1
- %if %3
- picgetgot r%2
- %endif
- DEFINE_ARGS %4
-%endmacro
-
-%macro RET 0
- POP_IF_USED 6
- POP_IF_USED 5
- POP_IF_USED 4
- POP_IF_USED 3
- ret
-%endmacro
-
-%macro REP_RET 0
- %if regs_used > 3
- RET
- %else
- rep ret
- %endif
-%endmacro
-
-%endif ;======================================================================
-
-
-
-;=============================================================================
-; arch-independent part
-;=============================================================================
-
-%assign function_align 16
-
-; Symbol prefix for C linkage
-%macro cglobal 1-2+
- %ifidn __OUTPUT_FORMAT__,elf
- %ifdef PREFIX
- global _%1:function hidden
- %define %1 _%1
- %else
- global %1:function hidden
- %endif
- %else
- %ifdef PREFIX
- global _%1
- %define %1 _%1
- %else
- global %1
- %endif
- %endif
- align function_align
- %1:
- RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
- %if %0 > 1
- PROLOGUE %2
- %endif
-%endmacro
-
-%macro cextern 1
- %ifdef PREFIX
- extern _%1
- %define %1 _%1
- %else
- extern %1
- %endif
-%endmacro
-
-; This is needed for ELF, otherwise the GNU linker assumes the stack is
-; executable by default.
-%ifidn __OUTPUT_FORMAT__,elf
-SECTION .note.GNU-stack noalloc noexec nowrite progbits
-%endif
-
-%assign FENC_STRIDE 16
-%assign FDEC_STRIDE 32
-
-; merge mmx and sse*
-
-%macro CAT_XDEFINE 3
- %xdefine %1%2 %3
-%endmacro
-
-%macro CAT_UNDEF 2
- %undef %1%2
-%endmacro
-
-%macro INIT_MMX 0
- %define RESET_MM_PERMUTATION INIT_MMX
- %define mmsize 8
- %define num_mmregs 8
- %define mova movq
- %define movu movq
- %define movh movd
- %define movnt movntq
- %assign %%i 0
- %rep 8
- CAT_XDEFINE m, %%i, mm %+ %%i
- CAT_XDEFINE nmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
- %rep 8
- CAT_UNDEF m, %%i
- CAT_UNDEF nmm, %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro INIT_XMM 0
- %define RESET_MM_PERMUTATION INIT_XMM
- %define mmsize 16
- %define num_mmregs 8
- %ifdef ARCH_X86_64
- %define num_mmregs 16
- %endif
- %define mova movdqa
- %define movu movdqu
- %define movh movq
- %define movnt movntdq
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, xmm %+ %%i
- CAT_XDEFINE nxmm, %%i, %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-INIT_MMX
-
-; I often want to use macros that permute their arguments. e.g. there's no
-; efficient way to implement butterfly or transpose or dct without swapping some
-; arguments.
-;
-; I would like to not have to manually keep track of the permutations:
-; If I insert a permutation in the middle of a function, it should automatically
-; change everything that follows. For more complex macros I may also have multiple
-; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
-;
-; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
-; permutes its arguments. It's equivalent to exchanging the contents of the
-; registers, except that this way you exchange the register names instead, so it
-; doesn't cost any cycles.
-
-%macro PERMUTE 2-* ; takes a list of pairs to swap
-%rep %0/2
- %xdefine tmp%2 m%2
- %xdefine ntmp%2 nm%2
- %rotate 2
-%endrep
-%rep %0/2
- %xdefine m%1 tmp%2
- %xdefine nm%1 ntmp%2
- %undef tmp%2
- %undef ntmp%2
- %rotate 2
-%endrep
-%endmacro
-
-%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
-%rep %0-1
-%ifdef m%1
- %xdefine tmp m%1
- %xdefine m%1 m%2
- %xdefine m%2 tmp
- CAT_XDEFINE n, m%1, %1
- CAT_XDEFINE n, m%2, %2
-%else
- ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
- ; Be careful using this mode in nested macros though, as in some cases there may be
- ; other copies of m# that have already been dereferenced and don't get updated correctly.
- %xdefine %%n1 n %+ %1
- %xdefine %%n2 n %+ %2
- %xdefine tmp m %+ %%n1
- CAT_XDEFINE m, %%n1, m %+ %%n2
- CAT_XDEFINE m, %%n2, tmp
- CAT_XDEFINE n, m %+ %%n1, %%n1
- CAT_XDEFINE n, m %+ %%n2, %%n2
-%endif
- %undef tmp
- %rotate 1
-%endrep
-%endmacro
-
-%macro SAVE_MM_PERMUTATION 1
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE %1_m, %%i, m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro LOAD_MM_PERMUTATION 1
- %assign %%i 0
- %rep num_mmregs
- CAT_XDEFINE m, %%i, %1_m %+ %%i
- %assign %%i %%i+1
- %endrep
-%endmacro
-
-%macro call 1
- call %1
- %ifdef %1_m0
- LOAD_MM_PERMUTATION %1
- %endif
-%endmacro
-
-; substitutions which are functionally identical but reduce code size
-%define movdqa movaps
-%define movdqu movups
-
diff --git a/libavcodec/idcinvideo.c b/libavcodec/idcinvideo.c
index 43889a6..fa3bb16 100644
--- a/libavcodec/idcinvideo.c
+++ b/libavcodec/idcinvideo.c
@@ -60,7 +60,7 @@ typedef struct
int count;
unsigned char used;
int children[2];
-} hnode_t;
+} hnode;
typedef struct IdcinContext {
@@ -70,7 +70,7 @@ typedef struct IdcinContext {
const unsigned char *buf;
int size;
- hnode_t huff_nodes[256][HUF_TOKENS*2];
+ hnode huff_nodes[256][HUF_TOKENS*2];
int num_huff_nodes[256];
} IdcinContext;
@@ -81,7 +81,7 @@ typedef struct IdcinContext {
* Returns the node index of the lowest unused node, or -1 if all nodes
* are used.
*/
-static int huff_smallest_node(hnode_t *hnodes, int num_hnodes) {
+static int huff_smallest_node(hnode *hnodes, int num_hnodes) {
int i;
int best, best_node;
@@ -114,7 +114,7 @@ static int huff_smallest_node(hnode_t *hnodes, int num_hnodes) {
* That is: huff_nodes[prev][num_huff_nodes[prev]] is the root node.
*/
static av_cold void huff_build_tree(IdcinContext *s, int prev) {
- hnode_t *node, *hnodes;
+ hnode *node, *hnodes;
int num_hnodes, i;
num_hnodes = HUF_TOKENS;
@@ -173,7 +173,7 @@ static av_cold int idcin_decode_init(AVCodecContext *avctx)
static void idcin_decode_vlcs(IdcinContext *s)
{
- hnode_t *hnodes;
+ hnode *hnodes;
long x, y;
int prev;
unsigned char v = 0;
diff --git a/libavcodec/imc.c b/libavcodec/imc.c
index 436a5c9..91d9feb 100644
--- a/libavcodec/imc.c
+++ b/libavcodec/imc.c
@@ -143,7 +143,7 @@ static av_cold int imc_decode_init(AVCodecContext * avctx)
/* initialize the VLC tables */
for(i = 0; i < 4 ; i++) {
for(j = 0; j < 4; j++) {
- huffman_vlc[i][j].table = vlc_tables[vlc_offsets[i * 4 + j]];
+ huffman_vlc[i][j].table = &vlc_tables[vlc_offsets[i * 4 + j]];
huffman_vlc[i][j].table_allocated = vlc_offsets[i * 4 + j + 1] - vlc_offsets[i * 4 + j];
init_vlc(&huffman_vlc[i][j], 9, imc_huffman_sizes[i],
imc_huffman_lens[i][j], 1, 1,
@@ -155,6 +155,7 @@ static av_cold int imc_decode_init(AVCodecContext * avctx)
ff_fft_init(&q->fft, 7, 1);
dsputil_init(&q->dsp, avctx);
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/imgconvert.c b/libavcodec/imgconvert.c
index 72a5bbe..952ddcb 100644
--- a/libavcodec/imgconvert.c
+++ b/libavcodec/imgconvert.c
@@ -35,7 +35,8 @@
#include "colorspace.h"
#ifdef HAVE_MMX
-#include "i386/mmx.h"
+#include "x86/mmx.h"
+#include "x86/dsputil_mmx.h"
#endif
#define xglue(x, y) x ## y
@@ -266,6 +267,9 @@ static const PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
[PIX_FMT_XVMC_MPEG2_IDCT] = {
.name = "xvmcidct",
},
+ [PIX_FMT_VDPAU_H264] = {
+ .name = "vdpau_h264",
+ },
[PIX_FMT_UYYVYY411] = {
.name = "uyyvyy411",
.nb_channels = 1,
@@ -783,7 +787,7 @@ static int avcodec_find_best_pix_fmt1(int64_t pix_fmt_mask,
dst_pix_fmt = -1;
min_dist = 0x7fffffff;
for(i = 0;i < PIX_FMT_NB; i++) {
- if (pix_fmt_mask & (1 << i)) {
+ if (pix_fmt_mask & (1ULL << i)) {
loss = avcodec_get_pix_fmt_loss(i, src_pix_fmt, has_alpha) & loss_mask;
if (loss == 0) {
dist = avg_bits_per_pixel(i);
@@ -2733,13 +2737,8 @@ static void deinterlace_line(uint8_t *dst,
#else
{
- mmx_t rounder;
- rounder.uw[0]=4;
- rounder.uw[1]=4;
- rounder.uw[2]=4;
- rounder.uw[3]=4;
pxor_r2r(mm7,mm7);
- movq_m2r(rounder,mm6);
+ movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_LINE_LUM
@@ -2776,13 +2775,8 @@ static void deinterlace_line_inplace(uint8_t *lum_m4, uint8_t *lum_m3, uint8_t *
#else
{
- mmx_t rounder;
- rounder.uw[0]=4;
- rounder.uw[1]=4;
- rounder.uw[2]=4;
- rounder.uw[3]=4;
pxor_r2r(mm7,mm7);
- movq_m2r(rounder,mm6);
+ movq_m2r(ff_pw_4,mm6);
}
for (;size > 3; size-=4) {
DEINT_INPLACE_LINE_LUM
diff --git a/libavcodec/imgconvert.h b/libavcodec/imgconvert.h
index bfaa03e..83bce68 100644
--- a/libavcodec/imgconvert.h
+++ b/libavcodec/imgconvert.h
@@ -33,4 +33,7 @@ int ff_fill_pointer(AVPicture *picture, uint8_t *ptr, int pix_fmt, int height);
int ff_get_plane_bytewidth(enum PixelFormat pix_fmt, int width, int plane);
+int img_convert(AVPicture *dst, int dst_pix_fmt, const AVPicture *src,
+ int src_pix_fmt, int src_width, int src_height);
+
#endif /* AVCODEC_IMGCONVERT_H */
diff --git a/libavcodec/imgconvert_template.c b/libavcodec/imgconvert_template.c
index 2d23be8..7b4dbf9 100644
--- a/libavcodec/imgconvert_template.c
+++ b/libavcodec/imgconvert_template.c
@@ -19,10 +19,6 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-/* This header intentionally has no multiple inclusion guards. It is meant to
- * be included multiple times and generates different code depending on the
- * value of certain #defines. */
-
#ifndef RGB_OUT
#define RGB_OUT(d, r, g, b) RGBA_OUT(d, r, g, b, 0xff)
#endif
diff --git a/libavcodec/imgresample.c b/libavcodec/imgresample.c
index 3c70c2a..6ca810c 100644
--- a/libavcodec/imgresample.c
+++ b/libavcodec/imgresample.c
@@ -26,6 +26,7 @@
#include "avcodec.h"
#include "dsputil.h"
+#include "imgconvert.h"
#include "libswscale/swscale.h"
#ifdef HAVE_ALTIVEC
@@ -154,7 +155,7 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
#ifdef HAVE_MMX
-#include "i386/mmx.h"
+#include "x86/mmx.h"
#define FILTER4(reg) \
{\
@@ -182,7 +183,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
int src_pos, phase;
const uint8_t *s;
int16_t *filter;
- mmx_t tmp;
+ uint64_t tmp;
src_pos = src_start;
pxor_r2r(mm7, mm7);
@@ -199,13 +200,13 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
packuswb_r2r(mm7, mm3);
packuswb_r2r(mm7, mm2);
movq_r2m(mm0, tmp);
- dst[0] = tmp.ub[0];
+ dst[0] = tmp & 0xFF;
movq_r2m(mm1, tmp);
- dst[1] = tmp.ub[0];
+ dst[1] = tmp & 0xFF;
movq_r2m(mm2, tmp);
- dst[2] = tmp.ub[0];
+ dst[2] = tmp & 0xFF;
movq_r2m(mm3, tmp);
- dst[3] = tmp.ub[0];
+ dst[3] = tmp & 0xFF;
dst += 4;
dst_width -= 4;
}
@@ -213,7 +214,7 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
FILTER4(mm0);
packuswb_r2r(mm7, mm0);
movq_r2m(mm0, tmp);
- dst[0] = tmp.ub[0];
+ dst[0] = tmp & 0xFF;
dst++;
dst_width--;
}
@@ -223,17 +224,14 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
{
- int sum, i, v;
+ int sum, i;
const uint8_t *s;
- mmx_t tmp;
- mmx_t coefs[4];
+ uint64_t tmp;
+ uint64_t coefs[4];
for(i=0;i<4;i++) {
- v = filter[i];
- coefs[i].uw[0] = v;
- coefs[i].uw[1] = v;
- coefs[i].uw[2] = v;
- coefs[i].uw[3] = v;
+ tmp = filter[i];
+ coefs[i] = (tmp<<48) + (tmp<<32) + (tmp<<16) + tmp;
}
pxor_r2r(mm7, mm7);
@@ -261,7 +259,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
packuswb_r2r(mm7, mm0);
movq_r2m(mm0, tmp);
- *(uint32_t *)dst = tmp.ud[0];
+ *(uint32_t *)dst = tmp & 0xFFFFFFFF;
dst += 4;
s += 4;
dst_width -= 4;
diff --git a/libavcodec/indeo3.c b/libavcodec/indeo3.c
index 533057d..eea27f9 100644
--- a/libavcodec/indeo3.c
+++ b/libavcodec/indeo3.c
@@ -147,7 +147,7 @@ static av_cold void iv_free_func(Indeo3DecodeContext *s)
av_free(s->corrector_type);
}
-typedef struct {
+struct ustr {
long xpos;
long ypos;
long width;
@@ -155,7 +155,7 @@ typedef struct {
long split_flag;
long split_direction;
long usl7;
-} ustr_t;
+};
#define LV1_CHECK(buf1,rle_v3,lv1,lp2) \
@@ -213,7 +213,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
uint32_t *cur_lp, *ref_lp;
const uint32_t *correction_lp[2], *correctionloworder_lp[2], *correctionhighorder_lp[2];
uint8_t *correction_type_sp[2];
- ustr_t strip_tbl[20], *strip;
+ struct ustr strip_tbl[20], *strip;
int i, j, k, lp1, lp2, flag1, cmd, blks_width, blks_height, region_160_width,
rle_v1, rle_v2, rle_v3;
unsigned short res;
@@ -252,14 +252,14 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
if(cmd == 0) {
strip++;
- memcpy(strip, strip-1, sizeof(ustr_t));
+ memcpy(strip, strip-1, sizeof(*strip));
strip->split_flag = 1;
strip->split_direction = 0;
strip->height = (strip->height > 8 ? ((strip->height+8)>>4)<<3 : 4);
continue;
} else if(cmd == 1) {
strip++;
- memcpy(strip, strip-1, sizeof(ustr_t));
+ memcpy(strip, strip-1, sizeof(*strip));
strip->split_flag = 1;
strip->split_direction = 1;
strip->width = (strip->width > 8 ? ((strip->width+8)>>4)<<3 : 4);
diff --git a/libavcodec/internal.h b/libavcodec/internal.h
new file mode 100644
index 0000000..ec954a4
--- /dev/null
+++ b/libavcodec/internal.h
@@ -0,0 +1,39 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file internal.h
+ * common internal api header.
+ */
+
+#ifndef AVCODEC_INTERNAL_H
+#define AVCODEC_INTERNAL_H
+
+/**
+ * Logs a generic warning message about a missing feature.
+ * @param[in] avc a pointer to an arbitrary struct of which the first field is
+ * a pointer to an AVClass struct
+ * @param[in] feature string containing the name of the missing feature
+ * @param[in] want_sample indicates if samples are wanted which exhibit this feature.
+ * If \p want_sample is non-zero, additional verbage will be added to the log
+ * message which tells the user how to report samples to the development
+ * mailing list.
+ */
+void ff_log_missing_feature(void *avc, const char *feature, int want_sample);
+
+#endif /* AVCODEC_INTERNAL_H */
diff --git a/libavcodec/intrax8.c b/libavcodec/intrax8.c
index 0436deb..1bca7f6 100644
--- a/libavcodec/intrax8.c
+++ b/libavcodec/intrax8.c
@@ -42,7 +42,7 @@ static VLC j_ac_vlc[2][2][8]; //[quant<13],[intra/inter],[select]
static VLC j_dc_vlc[2][8]; //[quant], [select]
static VLC j_orient_vlc[2][4]; //[quant], [select]
-static void x8_vlc_init(){
+static void x8_vlc_init(void){
int i;
#define init_ac_vlc(dst,src) \
@@ -511,7 +511,7 @@ static int x8_decode_intra_mb(IntraX8Context* const w, const int chroma){
int sign;
assert(w->orient<12);
- memset(s->block[0],0x00,64*sizeof(DCTELEM));
+ s->dsp.clear_block(s->block[0]);
if(chroma){
dc_mode=2;
diff --git a/libavcodec/libamr.c b/libavcodec/libamr.c
index 4f56e4d..79ca57f 100644
--- a/libavcodec/libamr.c
+++ b/libavcodec/libamr.c
@@ -81,9 +81,9 @@
#include <amrnb/interf_enc.h>
#endif
-static const char *nb_bitrate_unsupported =
+static const char nb_bitrate_unsupported[] =
"bitrate not supported: use one of 4.75k, 5.15k, 5.9k, 6.7k, 7.4k, 7.95k, 10.2k or 12.2k\n";
-static const char *wb_bitrate_unsupported =
+static const char wb_bitrate_unsupported[] =
"bitrate not supported: use one of 6.6k, 8.85k, 12.65k, 14.25k, 15.85k, 18.25k, 19.85k, 23.05k, or 23.85k\n";
/* Common code for fixed and float version*/
@@ -245,10 +245,10 @@ static int amr_nb_decode_close(AVCodecContext * avctx)
static int amr_nb_decode_frame(AVCodecContext * avctx,
void *data, int *data_size,
- uint8_t * buf, int buf_size)
+ const uint8_t * buf, int buf_size)
{
AMRContext *s = avctx->priv_data;
- uint8_t*amrData=buf;
+ const uint8_t*amrData=buf;
int offset=0;
UWord8 toc, q, ft;
Word16 serial[SERIAL_FRAMESIZE]; /* coded bits */
@@ -441,10 +441,10 @@ static int amr_nb_encode_close(AVCodecContext * avctx)
static int amr_nb_decode_frame(AVCodecContext * avctx,
void *data, int *data_size,
- uint8_t * buf, int buf_size)
+ const uint8_t * buf, int buf_size)
{
AMRContext *s = avctx->priv_data;
- uint8_t*amrData=buf;
+ const uint8_t*amrData=buf;
static const uint8_t block_size[16]={ 12, 13, 15, 17, 19, 20, 26, 31, 5, 0, 0, 0, 0, 0, 0, 0 };
enum Mode dec_mode;
int packet_size;
@@ -654,10 +654,10 @@ static int amr_wb_decode_init(AVCodecContext * avctx)
static int amr_wb_decode_frame(AVCodecContext * avctx,
void *data, int *data_size,
- uint8_t * buf, int buf_size)
+ const uint8_t * buf, int buf_size)
{
AMRWBContext *s = avctx->priv_data;
- uint8_t*amrData=buf;
+ const uint8_t*amrData=buf;
int mode;
int packet_size;
static const uint8_t block_size[16] = {18, 23, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1};
diff --git a/libavcodec/libdiracdec.c b/libavcodec/libdiracdec.c
index 06830f7..6c6e467 100644
--- a/libavcodec/libdiracdec.c
+++ b/libavcodec/libdiracdec.c
@@ -88,10 +88,12 @@ static int libdirac_decode_frame(AVCodecContext *avccontext,
*data_size = 0;
- if (buf_size>0)
+ if (buf_size>0) {
/* set data to decode into buffer */
dirac_buffer (p_dirac_params->p_decoder, buf, buf+buf_size);
-
+ if ((buf[4] &0x08) == 0x08 && (buf[4] & 0x03))
+ avccontext->has_b_frames = 1;
+ }
while (1) {
/* parse data and process result */
DecoderState state = dirac_parse (p_dirac_params->p_decoder);
diff --git a/libavcodec/libschroedingerdec.c b/libavcodec/libschroedingerdec.c
index f9b4d21..36cba01 100644
--- a/libavcodec/libschroedingerdec.c
+++ b/libavcodec/libschroedingerdec.c
@@ -235,6 +235,9 @@ static int libschroedinger_decode_frame(AVCodecContext *avccontext,
do {
if ((enc_buf = FfmpegFindNextSchroParseUnit(&parse_ctx))) {
/* Push buffer into decoder. */
+ if (SCHRO_PARSE_CODE_IS_PICTURE(enc_buf->data[4]) &&
+ SCHRO_PARSE_CODE_NUM_REFS(enc_buf->data[4]) > 0)
+ avccontext->has_b_frames = 1;
state = schro_decoder_push (decoder, enc_buf);
if (state == SCHRO_DECODER_FIRST_ACCESS_UNIT)
libschroedinger_handle_first_access_unit(avccontext);
diff --git a/libavcodec/libxvidff.c b/libavcodec/libxvidff.c
index 36f27c7..3149459 100644
--- a/libavcodec/libxvidff.c
+++ b/libavcodec/libxvidff.c
@@ -38,13 +38,13 @@
#define BUFFER_CAT(x) (&((x)[strlen(x)]))
/* For PPC Use */
-extern int has_altivec(void);
+int has_altivec(void);
/**
* Structure for the private Xvid context.
* This stores all the private context for the codec.
*/
-typedef struct xvid_context {
+struct xvid_context {
void *encoder_handle; /** Handle for Xvid encoder */
int xsize, ysize; /** Frame size */
int vop_flags; /** VOP flags for Xvid encoder */
@@ -58,15 +58,15 @@ typedef struct xvid_context {
char *twopassfile; /** second pass temp file name */
unsigned char *intra_matrix; /** P-Frame Quant Matrix */
unsigned char *inter_matrix; /** I-Frame Quant Matrix */
-} xvid_context_t;
+};
/**
* Structure for the private first-pass plugin.
*/
-typedef struct xvid_ff_pass1 {
+struct xvid_ff_pass1 {
int version; /** Xvid version */
- xvid_context_t *context; /** Pointer to private context */
-} xvid_ff_pass1_t;
+ struct xvid_context *context; /** Pointer to private context */
+};
/* Prototypes - See function implementation for details */
int xvid_strip_vol_header(AVCodecContext *avctx, unsigned char *frame, unsigned int header_len, unsigned int frame_len);
@@ -84,12 +84,12 @@ void xvid_correct_framerate(AVCodecContext *avctx);
av_cold int ff_xvid_encode_init(AVCodecContext *avctx) {
int xerr, i;
int xvid_flags = avctx->flags;
- xvid_context_t *x = avctx->priv_data;
+ struct xvid_context *x = avctx->priv_data;
uint16_t *intra, *inter;
int fd;
xvid_plugin_single_t single;
- xvid_ff_pass1_t rc2pass1;
+ struct xvid_ff_pass1 rc2pass1;
xvid_plugin_2pass2_t rc2pass2;
xvid_gbl_init_t xvid_gbl_init;
xvid_enc_create_t xvid_enc_create;
@@ -166,7 +166,7 @@ av_cold int ff_xvid_encode_init(AVCodecContext *avctx) {
xvid_gbl_init.version = XVID_VERSION;
xvid_gbl_init.debug = 0;
-#ifdef ARCH_POWERPC
+#ifdef ARCH_PPC
/* Xvid's PPC support is borked, use libavcodec to detect */
#ifdef HAVE_ALTIVEC
if( has_altivec() ) {
@@ -208,7 +208,7 @@ av_cold int ff_xvid_encode_init(AVCodecContext *avctx) {
x->twopassfile = NULL;
if( xvid_flags & CODEC_FLAG_PASS1 ) {
- memset(&rc2pass1, 0, sizeof(xvid_ff_pass1_t));
+ memset(&rc2pass1, 0, sizeof(struct xvid_ff_pass1));
rc2pass1.version = XVID_VERSION;
rc2pass1.context = x;
x->twopassbuffer = av_malloc(BUFFER_SIZE);
@@ -370,7 +370,7 @@ int ff_xvid_encode_frame(AVCodecContext *avctx,
unsigned char *frame, int buf_size, void *data) {
int xerr, i;
char *tmp;
- xvid_context_t *x = avctx->priv_data;
+ struct xvid_context *x = avctx->priv_data;
AVFrame *picture = data;
AVFrame *p = &(x->encoded_picture);
@@ -475,7 +475,7 @@ int ff_xvid_encode_frame(AVCodecContext *avctx,
* @return Returns 0, success guaranteed
*/
av_cold int ff_xvid_encode_close(AVCodecContext *avctx) {
- xvid_context_t *x = avctx->priv_data;
+ struct xvid_context *x = avctx->priv_data;
xvid_encore(x->encoder_handle, XVID_ENC_DESTROY, NULL, NULL);
@@ -616,7 +616,7 @@ void xvid_correct_framerate(AVCodecContext *avctx) {
*/
static int xvid_ff_2pass_create(xvid_plg_create_t * param,
void ** handle) {
- xvid_ff_pass1_t *x = (xvid_ff_pass1_t *)param->param;
+ struct xvid_ff_pass1 *x = (struct xvid_ff_pass1 *)param->param;
char *log = x->context->twopassbuffer;
/* Do a quick bounds check */
@@ -645,7 +645,7 @@ static int xvid_ff_2pass_create(xvid_plg_create_t * param,
* @param param Destrooy context
* @return Returns 0, success guaranteed
*/
-static int xvid_ff_2pass_destroy(xvid_context_t *ref,
+static int xvid_ff_2pass_destroy(struct xvid_context *ref,
xvid_plg_destroy_t *param) {
/* Currently cannot think of anything to do on destruction */
/* Still, the framework should be here for reference/use */
@@ -661,7 +661,7 @@ static int xvid_ff_2pass_destroy(xvid_context_t *ref,
* @param param Frame data
* @return Returns 0, success guaranteed
*/
-static int xvid_ff_2pass_before(xvid_context_t *ref,
+static int xvid_ff_2pass_before(struct xvid_context *ref,
xvid_plg_data_t *param) {
int motion_remove;
int motion_replacements;
@@ -704,7 +704,7 @@ static int xvid_ff_2pass_before(xvid_context_t *ref,
* @param param Statistic data
* @return Returns XVID_ERR_xxxx on failure, or 0 on success
*/
-static int xvid_ff_2pass_after(xvid_context_t *ref,
+static int xvid_ff_2pass_after(struct xvid_context *ref,
xvid_plg_data_t *param) {
char *log = ref->twopassbuffer;
char *frame_types = " ipbs";
@@ -770,7 +770,7 @@ AVCodec libxvid_encoder = {
"libxvid",
CODEC_TYPE_VIDEO,
CODEC_ID_XVID,
- sizeof(xvid_context_t),
+ sizeof(struct xvid_context),
ff_xvid_encode_init,
ff_xvid_encode_frame,
ff_xvid_encode_close,
diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c
index d2785f7..f57f621 100644
--- a/libavcodec/lsp.c
+++ b/libavcodec/lsp.c
@@ -72,7 +72,7 @@ static void lsp2poly(int* f, const int16_t* lsp, int lp_half_order)
{
f[i] = f[i-2];
for(j=i; j>1; j--)
- f[j] -= MULL(f[j-1], lsp[2*i-2]) - f[j-2];
+ f[j] -= MULL(f[j-1], lsp[2*i-2], FRAC_BITS) - f[j-2];
f[1] -= lsp[2*i-2] << 8;
}
diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h
index 123fcb7..07265c8 100644
--- a/libavcodec/mathops.h
+++ b/libavcodec/mathops.h
@@ -26,13 +26,13 @@
#ifdef ARCH_X86_32
-#include "i386/mathops.h"
+#include "x86/mathops.h"
-#elif defined(ARCH_ARMV4L)
+#elif defined(ARCH_ARM)
-#include "armv4l/mathops.h"
+#include "arm/mathops.h"
-#elif defined(ARCH_POWERPC)
+#elif defined(ARCH_PPC)
#include "ppc/mathops.h"
@@ -45,7 +45,7 @@
/* generic implementation */
#ifndef MULL
-# define MULL(a,b) (((int64_t)(a) * (int64_t)(b)) >> FRAC_BITS)
+# define MULL(a,b,s) (((int64_t)(a) * (int64_t)(b)) >> (s))
#endif
#ifndef MULH
diff --git a/libavcodec/mimic.c b/libavcodec/mimic.c
index b740275..e30b682 100644
--- a/libavcodec/mimic.c
+++ b/libavcodec/mimic.c
@@ -163,7 +163,7 @@ static int vlc_decode_block(MimicContext *ctx, int num_coeffs, int qscale)
DCTELEM *block = ctx->dct_block;
unsigned int pos;
- memset(block, 0, 64 * sizeof(DCTELEM));
+ ctx->dsp.clear_block(block);
block[0] = get_bits(&ctx->gb, 8) << 3;
diff --git a/libavcodec/mjpega_dump_header_bsf.c b/libavcodec/mjpega_dump_header_bsf.c
index a3f0131..fb27efb 100644
--- a/libavcodec/mjpega_dump_header_bsf.c
+++ b/libavcodec/mjpega_dump_header_bsf.c
@@ -35,6 +35,7 @@ static int mjpega_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *av
const uint8_t *buf, int buf_size, int keyframe)
{
uint8_t *poutbufp;
+ unsigned dqt = 0, dht = 0, sof0 = 0;
int i;
if (avctx->codec_id != CODEC_ID_MJPEG) {
@@ -59,12 +60,13 @@ static int mjpega_dump_header(AVBitStreamFilterContext *bsfc, AVCodecContext *av
for (i = 0; i < buf_size - 1; i++) {
if (buf[i] == 0xff) {
switch (buf[i + 1]) {
- case DQT: /* quant off */
- case DHT: /* huff off */
- case SOF0: /* image off */
- bytestream_put_be32(&poutbufp, i + 46);
- break;
+ case DQT: dqt = i + 46; break;
+ case DHT: dht = i + 46; break;
+ case SOF0: sof0 = i + 46; break;
case SOS:
+ bytestream_put_be32(&poutbufp, dqt); /* quant off */
+ bytestream_put_be32(&poutbufp, dht); /* huff off */
+ bytestream_put_be32(&poutbufp, sof0); /* image off */
bytestream_put_be32(&poutbufp, i + 46); /* scan off */
bytestream_put_be32(&poutbufp, i + 46 + AV_RB16(buf + i + 2)); /* data off */
bytestream_put_buffer(&poutbufp, buf + 2, buf_size - 2); /* skip already written SOI */
diff --git a/libavcodec/mjpegdec.c b/libavcodec/mjpegdec.c
index e8a34ef..5dcdb73 100644
--- a/libavcodec/mjpegdec.c
+++ b/libavcodec/mjpegdec.c
@@ -351,9 +351,17 @@ int ff_mjpeg_decode_sof(MJpegDecodeContext *s)
/* totally blank picture as progressive JPEG will only add details to it */
if(s->progressive){
- memset(s->picture.data[0], 0, s->picture.linesize[0] * s->height);
- memset(s->picture.data[1], 0, s->picture.linesize[1] * s->height >> (s->v_max - s->v_count[1]));
- memset(s->picture.data[2], 0, s->picture.linesize[2] * s->height >> (s->v_max - s->v_count[2]));
+ int bw = (width + s->h_max*8-1) / (s->h_max*8);
+ int bh = (height + s->v_max*8-1) / (s->v_max*8);
+ for(i=0; i<s->nb_components; i++) {
+ int size = bw * bh * s->h_count[i] * s->v_count[i];
+ av_freep(&s->blocks[i]);
+ av_freep(&s->last_nnz[i]);
+ s->blocks[i] = av_malloc(size * sizeof(**s->blocks));
+ s->last_nnz[i] = av_mallocz(size * sizeof(**s->last_nnz));
+ s->block_stride[i] = bw * s->h_count[i];
+ }
+ memset(s->coefs_finished, 0, sizeof(s->coefs_finished));
}
return 0;
}
@@ -432,27 +440,29 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
return 0;
}
+static int decode_dc_progressive(MJpegDecodeContext *s, DCTELEM *block, int component,
+ int dc_index, int16_t *quant_matrix, int Al)
+{
+ int val;
+ s->dsp.clear_block(block);
+ val = mjpeg_decode_dc(s, dc_index);
+ if (val == 0xffff) {
+ av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
+ return -1;
+ }
+ val = (val * quant_matrix[0] << Al) + s->last_dc[component];
+ s->last_dc[component] = val;
+ block[0] = val;
+ return 0;
+}
+
/* decode block and dequantize - progressive JPEG version */
-static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block,
- int component, int dc_index, int ac_index, int16_t *quant_matrix,
- int ss, int se, int Ah, int Al, int *EOBRUN)
+static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block, uint8_t *last_nnz,
+ int ac_index, int16_t *quant_matrix,
+ int ss, int se, int Al, int *EOBRUN)
{
int code, i, j, level, val, run;
- /* DC coef */
- if(!ss){
- val = mjpeg_decode_dc(s, dc_index);
- if (val == 0xffff) {
- av_log(s->avctx, AV_LOG_ERROR, "error dc\n");
- return -1;
- }
- val = (val * quant_matrix[0] << Al) + s->last_dc[component];
- }else
- val = 0;
- s->last_dc[component] = val;
- block[0] = val;
- if(!se) return 0;
- /* AC coefs */
if(*EOBRUN){
(*EOBRUN)--;
return 0;
@@ -505,9 +515,100 @@ static int decode_block_progressive(MJpegDecodeContext *s, DCTELEM *block,
}
}
CLOSE_READER(re, &s->gb)}
+ if(i > *last_nnz)
+ *last_nnz = i;
+ return 0;
+}
+
+#define REFINE_BIT(j) {\
+ UPDATE_CACHE(re, &s->gb);\
+ sign = block[j]>>15;\
+ block[j] += SHOW_UBITS(re, &s->gb, 1) * ((quant_matrix[j]^sign)-sign) << Al;\
+ LAST_SKIP_BITS(re, &s->gb, 1);\
+}
+
+#define ZERO_RUN \
+for(;;i++) {\
+ if(i > last) {\
+ i += run;\
+ if(i > se) {\
+ av_log(s->avctx, AV_LOG_ERROR, "error count: %d\n", i);\
+ return -1;\
+ }\
+ break;\
+ }\
+ j = s->scantable.permutated[i];\
+ if(block[j])\
+ REFINE_BIT(j)\
+ else if(run-- == 0)\
+ break;\
+}
+
+/* decode block and dequantize - progressive JPEG refinement pass */
+static int decode_block_refinement(MJpegDecodeContext *s, DCTELEM *block, uint8_t *last_nnz,
+ int ac_index, int16_t *quant_matrix,
+ int ss, int se, int Al, int *EOBRUN)
+{
+ int code, i=ss, j, sign, val, run;
+ int last = FFMIN(se, *last_nnz);
+
+ OPEN_READER(re, &s->gb);
+ if(*EOBRUN)
+ (*EOBRUN)--;
+ else {
+ for(;;i++) {
+ UPDATE_CACHE(re, &s->gb);
+ GET_VLC(code, re, &s->gb, s->vlcs[1][ac_index].table, 9, 2)
+ /* Progressive JPEG use AC coeffs from zero and this decoder sets offset 16 by default */
+ code -= 16;
+ if(code & 0xF) {
+ run = ((unsigned) code) >> 4;
+ UPDATE_CACHE(re, &s->gb);
+ val = SHOW_UBITS(re, &s->gb, 1);
+ LAST_SKIP_BITS(re, &s->gb, 1);
+ ZERO_RUN;
+ j = s->scantable.permutated[i];
+ val--;
+ block[j] = ((quant_matrix[j]^val)-val) << Al;
+ if(i == se) {
+ if(i > *last_nnz)
+ *last_nnz = i;
+ CLOSE_READER(re, &s->gb)
+ return 0;
+ }
+ }else{
+ run = ((unsigned) code) >> 4;
+ if(run == 0xF){
+ ZERO_RUN;
+ }else{
+ val = run;
+ run = (1 << run);
+ if(val) {
+ UPDATE_CACHE(re, &s->gb);
+ run += SHOW_UBITS(re, &s->gb, val);
+ LAST_SKIP_BITS(re, &s->gb, val);
+ }
+ *EOBRUN = run - 1;
+ break;
+ }
+ }
+ }
+
+ if(i > *last_nnz)
+ *last_nnz = i;
+ }
+
+ for(;i<=last;i++) {
+ j = s->scantable.permutated[i];
+ if(block[j])
+ REFINE_BIT(j)
+ }
+ CLOSE_READER(re, &s->gb);
return 0;
}
+#undef REFINE_BIT
+#undef ZERO_RUN
static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point_transform){
int i, mb_x, mb_y;
@@ -660,18 +761,16 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point
return 0;
}
-static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int ss, int se, int Ah, int Al){
+static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int Ah, int Al){
int i, mb_x, mb_y;
- int EOBRUN = 0;
uint8_t* data[MAX_COMPONENTS];
int linesize[MAX_COMPONENTS];
- if(Ah) return 0; /* TODO decode refinement planes too */
-
for(i=0; i < nb_components; i++) {
int c = s->comp_index[i];
data[c] = s->picture.data[c];
linesize[c]=s->linesize[c];
+ s->coefs_finished[c] |= 1;
if(s->avctx->codec->id==CODEC_ID_AMV) {
//picture should be flipped upside-down for this codec
assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
@@ -695,30 +794,32 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int ss, i
x = 0;
y = 0;
for(j=0;j<n;j++) {
- memset(s->block, 0, sizeof(s->block));
- if (!s->progressive && decode_block(s, s->block, i,
- s->dc_index[i], s->ac_index[i],
- s->quant_matrixes[ s->quant_index[c] ]) < 0) {
- av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
- return -1;
- }
- if (s->progressive && decode_block_progressive(s, s->block, i,
- s->dc_index[i], s->ac_index[i],
- s->quant_matrixes[ s->quant_index[c] ], ss, se, Ah, Al, &EOBRUN) < 0) {
- av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
- return -1;
- }
-// av_log(s->avctx, AV_LOG_DEBUG, "mb: %d %d processed\n", mb_y, mb_x);
ptr = data[c] +
(((linesize[c] * (v * mb_y + y) * 8) +
(h * mb_x + x) * 8) >> s->avctx->lowres);
- if (s->interlaced && s->bottom_field)
+ if(s->interlaced && s->bottom_field)
ptr += linesize[c] >> 1;
-//av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8);
- if(!s->progressive)
+ if(!s->progressive) {
+ s->dsp.clear_block(s->block);
+ if(decode_block(s, s->block, i,
+ s->dc_index[i], s->ac_index[i],
+ s->quant_matrixes[ s->quant_index[c] ]) < 0) {
+ av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
+ return -1;
+ }
s->dsp.idct_put(ptr, linesize[c], s->block);
- else
- s->dsp.idct_add(ptr, linesize[c], s->block);
+ } else {
+ int block_idx = s->block_stride[c] * (v * mb_y + y) + (h * mb_x + x);
+ DCTELEM *block = s->blocks[c][block_idx];
+ if(Ah)
+ block[0] += get_bits1(&s->gb) * s->quant_matrixes[ s->quant_index[c] ][0] << Al;
+ else if(decode_dc_progressive(s, block, i, s->dc_index[i], s->quant_matrixes[ s->quant_index[c] ], Al) < 0) {
+ av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
+ return -1;
+ }
+ }
+// av_log(s->avctx, AV_LOG_DEBUG, "mb: %d %d processed\n", mb_y, mb_x);
+//av_log(NULL, AV_LOG_DEBUG, "%d %d %d %d %d %d %d %d \n", mb_x, mb_y, x, y, c, s->bottom_field, (v * mb_y + y) * 8, (h * mb_x + x) * 8);
if (++x == h) {
x = 0;
y++;
@@ -738,6 +839,49 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s, int nb_components, int ss, i
return 0;
}
+static int mjpeg_decode_scan_progressive_ac(MJpegDecodeContext *s, int ss, int se, int Ah, int Al){
+ int mb_x, mb_y;
+ int EOBRUN = 0;
+ int c = s->comp_index[0];
+ uint8_t* data = s->picture.data[c];
+ int linesize = s->linesize[c];
+ int last_scan = 0;
+ int16_t *quant_matrix = s->quant_matrixes[ s->quant_index[c] ];
+
+ if(!Al) {
+ s->coefs_finished[c] |= (1LL<<(se+1))-(1LL<<ss);
+ last_scan = !~s->coefs_finished[c];
+ }
+
+ if(s->interlaced && s->bottom_field)
+ data += linesize >> 1;
+
+ for(mb_y = 0; mb_y < s->mb_height; mb_y++) {
+ uint8_t *ptr = data + (mb_y*linesize*8 >> s->avctx->lowres);
+ int block_idx = mb_y * s->block_stride[c];
+ DCTELEM (*block)[64] = &s->blocks[c][block_idx];
+ uint8_t *last_nnz = &s->last_nnz[c][block_idx];
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, block++, last_nnz++) {
+ int ret;
+ if(Ah)
+ ret = decode_block_refinement(s, *block, last_nnz, s->ac_index[0],
+ quant_matrix, ss, se, Al, &EOBRUN);
+ else
+ ret = decode_block_progressive(s, *block, last_nnz, s->ac_index[0],
+ quant_matrix, ss, se, Al, &EOBRUN);
+ if(ret < 0) {
+ av_log(s->avctx, AV_LOG_ERROR, "error y=%d x=%d\n", mb_y, mb_x);
+ return -1;
+ }
+ if(last_scan) {
+ s->dsp.idct_put(ptr, linesize, *block);
+ ptr += 8 >> s->avctx->lowres;
+ }
+ }
+ }
+ return 0;
+}
+
int ff_mjpeg_decode_sos(MJpegDecodeContext *s)
{
int len, nb_components, i, h, v, predictor, point_transform;
@@ -849,8 +993,13 @@ int ff_mjpeg_decode_sos(MJpegDecodeContext *s)
}
}
}else{
- if(mjpeg_decode_scan(s, nb_components, predictor, ilv, prev_shift, point_transform) < 0)
- return -1;
+ if(s->progressive && predictor) {
+ if(mjpeg_decode_scan_progressive_ac(s, predictor, ilv, prev_shift, point_transform) < 0)
+ return -1;
+ } else {
+ if(mjpeg_decode_scan(s, nb_components, prev_shift, point_transform) < 0)
+ return -1;
+ }
}
emms_c();
return 0;
@@ -1354,6 +1503,10 @@ av_cold int ff_mjpeg_decode_end(AVCodecContext *avctx)
for(j=0;j<4;j++)
free_vlc(&s->vlcs[i][j]);
}
+ for(i=0; i<MAX_COMPONENTS; i++) {
+ av_freep(&s->blocks[i]);
+ av_freep(&s->last_nnz[i]);
+ }
return 0;
}
diff --git a/libavcodec/mjpegdec.h b/libavcodec/mjpegdec.h
index c973a9c..e90d864 100644
--- a/libavcodec/mjpegdec.h
+++ b/libavcodec/mjpegdec.h
@@ -67,6 +67,7 @@ typedef struct MJpegDecodeContext {
int width, height;
int mb_width, mb_height;
int nb_components;
+ int block_stride[MAX_COMPONENTS];
int component_id[MAX_COMPONENTS];
int h_count[MAX_COMPONENTS]; /* horizontal and vertical count for each component */
int v_count[MAX_COMPONENTS];
@@ -83,6 +84,9 @@ typedef struct MJpegDecodeContext {
int linesize[MAX_COMPONENTS]; ///< linesize << interlaced
int8_t *qscale_table;
DECLARE_ALIGNED_16(DCTELEM, block[64]);
+ DCTELEM (*blocks[MAX_COMPONENTS])[64]; ///< intermediate sums (progressive mode)
+ uint8_t *last_nnz[MAX_COMPONENTS];
+ uint64_t coefs_finished[MAX_COMPONENTS]; ///< bitmask of which coefs have been completely decoded (progressive mode)
ScanTable scantable;
DSPContext dsp;
diff --git a/libavcodec/mlp.c b/libavcodec/mlp.c
index 2ac2e65..7ef7f97 100644
--- a/libavcodec/mlp.c
+++ b/libavcodec/mlp.c
@@ -43,26 +43,14 @@ const uint8_t ff_mlp_huffman_tables[3][18][2] = {
static int crc_init = 0;
static AVCRC crc_63[1024];
static AVCRC crc_1D[1024];
-
-
-static int crc_init_2D = 0;
static AVCRC crc_2D[1024];
-int av_cold ff_mlp_init_crc2D(AVCodecParserContext *s)
-{
- if (!crc_init_2D) {
- av_crc_init(crc_2D, 0, 16, 0x002D, sizeof(crc_2D));
- crc_init_2D = 1;
- }
-
- return 0;
-}
-
void av_cold ff_mlp_init_crc()
{
if (!crc_init) {
av_crc_init(crc_63, 0, 8, 0x63, sizeof(crc_63));
av_crc_init(crc_1D, 0, 8, 0x1D, sizeof(crc_1D));
+ av_crc_init(crc_2D, 0, 16, 0x002D, sizeof(crc_2D));
crc_init = 1;
}
}
diff --git a/libavcodec/mlp.h b/libavcodec/mlp.h
index 338a7a3..b81c735 100644
--- a/libavcodec/mlp.h
+++ b/libavcodec/mlp.h
@@ -106,8 +106,6 @@ uint8_t ff_mlp_restart_checksum(const uint8_t *buf, unsigned int bit_size);
*/
uint8_t ff_mlp_calculate_parity(const uint8_t *buf, unsigned int buf_size);
-int ff_mlp_init_crc2D(AVCodecParserContext *s);
-
void ff_mlp_init_crc();
/** XOR four bytes into one. */
diff --git a/libavcodec/mlp_parser.c b/libavcodec/mlp_parser.c
index 4400d71..925ed86 100644
--- a/libavcodec/mlp_parser.c
+++ b/libavcodec/mlp_parser.c
@@ -150,6 +150,12 @@ typedef struct MLPParseContext
int num_substreams;
} MLPParseContext;
+static av_cold int mlp_init(AVCodecParserContext *s)
+{
+ ff_mlp_init_crc();
+ return 0;
+}
+
static int mlp_parse(AVCodecParserContext *s,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
@@ -245,11 +251,11 @@ static int mlp_parse(AVCodecParserContext *s,
if (ff_mlp_read_major_sync(avctx, &mh, &gb) < 0)
goto lost_sync;
-#ifdef CONFIG_AUDIO_NONSHORT
- avctx->bits_per_sample = mh.group1_bits;
- if (avctx->bits_per_sample > 16)
+ avctx->bits_per_raw_sample = mh.group1_bits;
+ if (avctx->bits_per_raw_sample > 16)
avctx->sample_fmt = SAMPLE_FMT_S32;
-#endif
+ else
+ avctx->sample_fmt = SAMPLE_FMT_S16;
avctx->sample_rate = mh.group1_samplerate;
avctx->frame_size = mh.access_unit_size;
@@ -283,7 +289,7 @@ lost_sync:
AVCodecParser mlp_parser = {
{ CODEC_ID_MLP },
sizeof(MLPParseContext),
- ff_mlp_init_crc2D,
+ mlp_init,
mlp_parse,
NULL,
};
diff --git a/libavcodec/mlpdec.c b/libavcodec/mlpdec.c
index 7272458..f4323da 100644
--- a/libavcodec/mlpdec.c
+++ b/libavcodec/mlpdec.c
@@ -40,7 +40,7 @@
static const char* sample_message =
"Please file a bug report following the instructions at "
- "http://ffmpeg.mplayerhq.hu/bugreports.html and include "
+ "http://ffmpeg.org/bugreports.html and include "
"a sample of this file.";
typedef struct SubStream {
@@ -222,7 +222,7 @@ static av_cold int mlp_decode_init(AVCodecContext *avctx)
m->avctx = avctx;
for (substr = 0; substr < MAX_SUBSTREAMS; substr++)
m->substream[substr].lossless_check_data = 0xffffffff;
- avctx->sample_fmt = SAMPLE_FMT_S16;
+
return 0;
}
@@ -296,12 +296,11 @@ static int read_major_sync(MLPDecodeContext *m, GetBitContext *gb)
m->avctx->sample_rate = mh.group1_samplerate;
m->avctx->frame_size = mh.access_unit_size;
-#ifdef CONFIG_AUDIO_NONSHORT
- m->avctx->bits_per_sample = mh.group1_bits;
- if (mh.group1_bits > 16) {
+ m->avctx->bits_per_raw_sample = mh.group1_bits;
+ if (mh.group1_bits > 16)
m->avctx->sample_fmt = SAMPLE_FMT_S32;
- }
-#endif
+ else
+ m->avctx->sample_fmt = SAMPLE_FMT_S16;
m->params_valid = 1;
for (substr = 0; substr < MAX_SUBSTREAMS; substr++)
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 7c1fc67..d9d48a6 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -307,8 +307,6 @@ int ff_init_me(MpegEncContext *s){
c->sub_motion_search= no_sub_motion_search;
}
- c->temp= c->scratchpad;
-
return 0;
}
diff --git a/libavcodec/mpc.h b/libavcodec/mpc.h
index 3040135..038c34f 100644
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@@ -74,7 +74,7 @@ typedef struct {
DECLARE_ALIGNED_16(int32_t, sb_samples[MPA_MAX_CHANNELS][36][SBLIMIT]);
} MPCContext;
-extern void ff_mpc_init();
-extern void ff_mpc_dequantize_and_synth(MPCContext *c, int maxband, void *dst);
+void ff_mpc_init();
+void ff_mpc_dequantize_and_synth(MPCContext *c, int maxband, void *dst);
#endif /* AVCODEC_MPC_H */
diff --git a/libavcodec/mpc7.c b/libavcodec/mpc7.c
index 565b858..2db4172 100644
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@@ -109,6 +109,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
}
vlc_initialized = 1;
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/mpc8.c b/libavcodec/mpc8.c
index 0d4f128..c47e74d 100644
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@@ -178,6 +178,7 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
}
vlc_initialized = 1;
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/mpeg12.c b/libavcodec/mpeg12.c
index 8b6a93e..52ab50b 100644
--- a/libavcodec/mpeg12.c
+++ b/libavcodec/mpeg12.c
@@ -63,10 +63,10 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *bloc
static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred);
static void exchange_uv(MpegEncContext *s);
-extern int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
-extern int XVMC_field_end(MpegEncContext *s);
-extern void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
-extern void XVMC_init_block(MpegEncContext *s);//set s->block
+int XVMC_field_start(MpegEncContext *s, AVCodecContext *avctx);
+int XVMC_field_end(MpegEncContext *s);
+void XVMC_pack_pblocks(MpegEncContext *s,int cbp);
+void XVMC_init_block(MpegEncContext *s);//set s->block
static const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
PIX_FMT_XVMC_MPEG2_IDCT,
@@ -303,7 +303,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
}else
memset(s->last_mv, 0, sizeof(s->last_mv)); /* reset mv prediction */
s->mb_intra = 1;
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
//if 1, we memcpy blocks in xvmcvideo
if(s->avctx->xvmc_acceleration > 1){
XVMC_pack_pblocks(s,-1);//inter are always full blocks
@@ -516,7 +516,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
return -1;
}
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
//if 1, we memcpy blocks in xvmcvideo
if(s->avctx->xvmc_acceleration > 1){
XVMC_pack_pblocks(s,cbp);
@@ -1212,6 +1212,22 @@ static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
}
}
+static enum PixelFormat mpeg_get_pixelformat(AVCodecContext *avctx){
+ Mpeg1Context *s1 = avctx->priv_data;
+ MpegEncContext *s = &s1->mpeg_enc_ctx;
+
+ if(avctx->xvmc_acceleration)
+ return avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
+ else{
+ if(s->chroma_format < 2)
+ return PIX_FMT_YUV420P;
+ else if(s->chroma_format == 2)
+ return PIX_FMT_YUV422P;
+ else
+ return PIX_FMT_YUV444P;
+ }
+}
+
/* Call this function when we know all parameters.
* It may be called in different places for MPEG-1 and MPEG-2. */
static int mpeg_decode_postinit(AVCodecContext *avctx){
@@ -1288,19 +1304,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
}
}//MPEG-2
- if(avctx->xvmc_acceleration){
- avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
- }else{
- if(s->chroma_format < 2){
- avctx->pix_fmt = PIX_FMT_YUV420P;
- }else
- if(s->chroma_format == 2){
- avctx->pix_fmt = PIX_FMT_YUV422P;
- }else
- if(s->chroma_format > 2){
- avctx->pix_fmt = PIX_FMT_YUV444P;
- }
- }
+ avctx->pix_fmt = mpeg_get_pixelformat(avctx);
//until then pix_fmt may be changed right after codec init
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
if( avctx->idct_algo == FF_IDCT_AUTO )
@@ -1639,7 +1643,7 @@ static int mpeg_field_start(MpegEncContext *s){
}
}
}
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
// MPV_frame_start will call this function too,
// but we need to call it on every field
if(s->avctx->xvmc_acceleration)
@@ -1730,7 +1734,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
}
for(;;) {
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
//If 1, we memcpy blocks in xvmcvideo.
if(s->avctx->xvmc_acceleration > 1)
XVMC_init_block(s);//set s->block
@@ -1912,7 +1916,7 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
if (!s1->mpeg_enc_ctx_allocated || !s->current_picture_ptr)
return 0;
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
if(s->avctx->xvmc_acceleration)
XVMC_field_end(s);
#endif
@@ -2069,11 +2073,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
avctx->has_b_frames= 0; //true?
s->low_delay= 1;
- if(avctx->xvmc_acceleration){
- avctx->pix_fmt = avctx->get_format(avctx,pixfmt_xvmc_mpg2_420);
- }else{
- avctx->pix_fmt = PIX_FMT_YUV420P;
- }
+ avctx->pix_fmt = mpeg_get_pixelformat(avctx);
if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
if( avctx->idct_algo == FF_IDCT_AUTO )
@@ -2472,7 +2472,7 @@ AVCodec mpegvideo_decoder = {
.long_name= NULL_IF_CONFIG_SMALL("MPEG-1 video"),
};
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
static av_cold int mpeg_mc_decode_init(AVCodecContext *avctx){
Mpeg1Context *s;
diff --git a/libavcodec/mpegaudio.h b/libavcodec/mpegaudio.h
index 21de2da..33af18d 100644
--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@@ -128,7 +128,7 @@ typedef struct HuffTable {
} HuffTable;
int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
-int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate);
+int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
void ff_mpa_synth_init(MPA_INT *window);
void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
MPA_INT *window, int *dither_state,
diff --git a/libavcodec/mpegaudio_parser.c b/libavcodec/mpegaudio_parser.c
index e7cb743..e4b17b0 100644
--- a/libavcodec/mpegaudio_parser.c
+++ b/libavcodec/mpegaudio_parser.c
@@ -44,7 +44,7 @@ typedef struct MpegAudioParseContext {
/* useful helper to get mpeg audio stream infos. Return -1 if error in
header, otherwise the coded frame size in bytes */
-int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate)
+int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bit_rate)
{
MPADecodeContext s1, *s = &s1;
s1.avctx = avctx;
@@ -58,23 +58,23 @@ int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate)
switch(s->layer) {
case 1:
- avctx->frame_size = 384;
+ *frame_size = 384;
break;
case 2:
- avctx->frame_size = 1152;
+ *frame_size = 1152;
break;
default:
case 3:
if (s->lsf)
- avctx->frame_size = 576;
+ *frame_size = 576;
else
- avctx->frame_size = 1152;
+ *frame_size = 1152;
break;
}
*sample_rate = s->sample_rate;
- avctx->channels = s->nb_channels;
- avctx->bit_rate = s->bit_rate;
+ *channels = s->nb_channels;
+ *bit_rate = s->bit_rate;
avctx->sub_id = s->layer;
return s->frame_size;
}
@@ -92,7 +92,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
const uint8_t *buf, int buf_size)
{
MpegAudioParseContext *s = s1->priv_data;
- int len, ret, sr;
+ int len, ret, sr, channels, bit_rate, frame_size;
uint32_t header;
const uint8_t *buf_ptr;
@@ -123,7 +123,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
got_header:
header = AV_RB32(s->inbuf);
- ret = ff_mpa_decode_header(avctx, header, &sr);
+ ret = ff_mpa_decode_header(avctx, header, &sr, &channels, &frame_size, &bit_rate);
if (ret < 0) {
s->header_count= -2;
/* no sync found : move by one byte (inefficient, but simple!) */
@@ -146,8 +146,12 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
s->frame_size = -1;
}
#endif
- if(s->header_count > 1)
+ if(s->header_count > 1){
avctx->sample_rate= sr;
+ avctx->channels = channels;
+ avctx->frame_size = frame_size;
+ avctx->bit_rate = bit_rate;
+ }
}
}
} else
diff --git a/libavcodec/mpegaudiodec.c b/libavcodec/mpegaudiodec.c
index a277492..a853562 100644
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@@ -351,9 +351,9 @@ static int decode_init(AVCodecContext * avctx)
int n, norm;
n = i + 2;
norm = ((INT64_C(1) << n) * FRAC_ONE) / ((1 << n) - 1);
- scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm);
- scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
- scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
+ scale_factor_mult[i][0] = MULL(FIXR(1.0 * 2.0), norm, FRAC_BITS);
+ scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm, FRAC_BITS);
+ scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm, FRAC_BITS);
dprintf(avctx, "%d: norm=%x s=%x %x %x\n",
i, norm,
scale_factor_mult[i][0],
@@ -1097,7 +1097,7 @@ static void imdct36(int *out, int *buf, int *in, int *win)
t2 = tmp[i + 1];
t3 = tmp[i + 3];
s1 = MULH(2*(t3 + t2), icos36h[j]);
- s3 = MULL(t3 - t2, icos36[8 - j]);
+ s3 = MULL(t3 - t2, icos36[8 - j], FRAC_BITS);
t0 = s0 + s1;
t1 = s0 - s1;
@@ -1705,8 +1705,8 @@ static void compute_stereo(MPADecodeContext *s,
v2 = is_tab[1][sf];
for(j=0;j<len;j++) {
tmp0 = tab0[j];
- tab0[j] = MULL(tmp0, v1);
- tab1[j] = MULL(tmp0, v2);
+ tab0[j] = MULL(tmp0, v1, FRAC_BITS);
+ tab1[j] = MULL(tmp0, v2, FRAC_BITS);
}
} else {
found1:
@@ -1716,8 +1716,8 @@ static void compute_stereo(MPADecodeContext *s,
for(j=0;j<len;j++) {
tmp0 = tab0[j];
tmp1 = tab1[j];
- tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
- tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
+ tab0[j] = MULL(tmp0 + tmp1, ISQRT2, FRAC_BITS);
+ tab1[j] = MULL(tmp0 - tmp1, ISQRT2, FRAC_BITS);
}
}
}
@@ -1749,8 +1749,8 @@ static void compute_stereo(MPADecodeContext *s,
v2 = is_tab[1][sf];
for(j=0;j<len;j++) {
tmp0 = tab0[j];
- tab0[j] = MULL(tmp0, v1);
- tab1[j] = MULL(tmp0, v2);
+ tab0[j] = MULL(tmp0, v1, FRAC_BITS);
+ tab1[j] = MULL(tmp0, v2, FRAC_BITS);
}
} else {
found2:
@@ -1760,8 +1760,8 @@ static void compute_stereo(MPADecodeContext *s,
for(j=0;j<len;j++) {
tmp0 = tab0[j];
tmp1 = tab1[j];
- tab0[j] = MULL(tmp0 + tmp1, ISQRT2);
- tab1[j] = MULL(tmp0 - tmp1, ISQRT2);
+ tab0[j] = MULL(tmp0 + tmp1, ISQRT2, FRAC_BITS);
+ tab1[j] = MULL(tmp0 - tmp1, ISQRT2, FRAC_BITS);
}
}
}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 4bc32a2..43a6e85 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -54,9 +54,9 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
DCTELEM *block, int n, int qscale);
-extern int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
-extern void XVMC_field_end(MpegEncContext *s);
-extern void XVMC_decode_mb(MpegEncContext *s);
+int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
+void XVMC_field_end(MpegEncContext *s);
+void XVMC_decode_mb(MpegEncContext *s);
/* enable all paranoid tests for rounding, overflows, etc... */
@@ -129,8 +129,8 @@ int ff_dct_common_init(MpegEncContext *s)
MPV_common_init_mlib(s);
#elif defined(HAVE_MMI)
MPV_common_init_mmi(s);
-#elif defined(ARCH_ARMV4L)
- MPV_common_init_armv4l(s);
+#elif defined(ARCH_ARM)
+ MPV_common_init_arm(s);
#elif defined(HAVE_ALTIVEC)
MPV_common_init_altivec(s);
#elif defined(ARCH_BFIN)
@@ -289,6 +289,7 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
//FIXME should be linesize instead of s->width*2 but that is not known before get_buffer()
CHECKED_ALLOCZ(s->me.scratchpad, (s->width+64)*4*16*2*sizeof(uint8_t))
+ s->me.temp= s->me.scratchpad;
s->rd_scratchpad= s->me.scratchpad;
s->b_scratchpad= s->me.scratchpad;
s->obmc_scratchpad= s->me.scratchpad + 16;
@@ -315,6 +316,7 @@ static void free_duplicate_context(MpegEncContext *s){
av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
av_freep(&s->me.scratchpad);
+ s->me.temp=
s->rd_scratchpad=
s->b_scratchpad=
s->obmc_scratchpad= NULL;
@@ -331,6 +333,7 @@ static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
COPY(allocated_edge_emu_buffer);
COPY(edge_emu_buffer);
COPY(me.scratchpad);
+ COPY(me.temp);
COPY(rd_scratchpad);
COPY(b_scratchpad);
COPY(obmc_scratchpad);
@@ -936,7 +939,7 @@ alloc:
update_noise_reduction(s);
}
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
if(s->avctx->xvmc_acceleration)
return XVMC_field_start(s, avctx);
#endif
@@ -948,13 +951,17 @@ void MPV_frame_end(MpegEncContext *s)
{
int i;
/* draw edge for correct motion prediction if outside */
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
//just to make sure that all data is rendered.
if(s->avctx->xvmc_acceleration){
XVMC_field_end(s);
}else
#endif
- if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
+ if(!(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+ && s->unrestricted_mv
+ && s->current_picture.reference
+ && !s->intra_only
+ && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
s->dsp.draw_edges(s->current_picture.data[0], s->linesize , s->h_edge_pos , s->v_edge_pos , EDGE_WIDTH );
s->dsp.draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
s->dsp.draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
@@ -1729,7 +1736,7 @@ void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64],
{
int mb_x, mb_y;
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
-#ifdef HAVE_XVMC
+#ifdef CONFIG_XVMC
if(s->avctx->xvmc_acceleration){
XVMC_decode_mb(s);//xvmc uses pblocks
return;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 4ddd30c..da08bb6 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -546,6 +546,9 @@ typedef struct MpegEncContext {
int mpeg_quant;
int t_frame; ///< time distance of first I -> B, used for interlaced b frames
int padding_bug_score; ///< used to detect the VERY common padding bug in MPEG4
+ int cplx_estimation_trash_i;
+ int cplx_estimation_trash_p;
+ int cplx_estimation_trash_b;
/* divx specific, used to workaround (many) bugs in divx5 */
int divx_version;
@@ -681,7 +684,7 @@ void MPV_common_init_mmx(MpegEncContext *s);
void MPV_common_init_axp(MpegEncContext *s);
void MPV_common_init_mlib(MpegEncContext *s);
void MPV_common_init_mmi(MpegEncContext *s);
-void MPV_common_init_armv4l(MpegEncContext *s);
+void MPV_common_init_arm(MpegEncContext *s);
void MPV_common_init_altivec(MpegEncContext *s);
void ff_clean_intra_table_entries(MpegEncContext *s);
void ff_draw_horiz_band(MpegEncContext *s, int y, int h);
diff --git a/libavcodec/mpegvideo_enc.c b/libavcodec/mpegvideo_enc.c
index 6c4a028..72af47a 100644
--- a/libavcodec/mpegvideo_enc.c
+++ b/libavcodec/mpegvideo_enc.c
@@ -35,6 +35,7 @@
#include "msmpeg4.h"
#include "h263.h"
#include "faandct.h"
+#include "aandcttab.h"
#include <limits.h>
//#undef NDEBUG
@@ -49,29 +50,6 @@ static int sse_mb(MpegEncContext *s);
//#define DEBUG
-static const uint16_t aanscales[64] = {
- /* precomputed values scaled up by 14 bits */
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
- 22725, 31521, 29692, 26722, 22725, 17855, 12299, 6270,
- 21407, 29692, 27969, 25172, 21407, 16819, 11585, 5906,
- 19266, 26722, 25172, 22654, 19266, 15137, 10426, 5315,
- 16384, 22725, 21407, 19266, 16384, 12873, 8867, 4520,
- 12873, 17855, 16819, 15137, 12873, 10114, 6967, 3552,
- 8867 , 12299, 11585, 10426, 8867, 6967, 4799, 2446,
- 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247
-};
-
-const uint16_t ff_inv_aanscales[64] = {
- 4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
- 2953, 2129, 2260, 2511, 2953, 3759, 5457, 10703,
- 3135, 2260, 2399, 2666, 3135, 3990, 5793, 11363,
- 3483, 2511, 2666, 2962, 3483, 4433, 6436, 12625,
- 4096, 2953, 3135, 3483, 4096, 5213, 7568, 14846,
- 5213, 3759, 3990, 4433, 5213, 6635, 9633, 18895,
- 7568, 5457, 5793, 6436, 7568, 9633, 13985, 27432,
- 14846, 10703, 11363, 12625, 14846, 18895, 27432, 53809,
-};
-
static uint8_t default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1];
static uint8_t default_fcode_tab[MAX_MV*2+1];
@@ -91,9 +69,9 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
for(i=0;i<64;i++) {
const int j= dsp->idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905 */
- /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
- /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
- /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+ /* 19952 <= ff_aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+ /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1 << 36) / 249205026 */
+ /* 3444240 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
(qscale * quant_matrix[j]));
@@ -106,12 +84,12 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
for(i=0;i<64;i++) {
const int j= dsp->idct_permutation[i];
/* 16 <= qscale * quant_matrix[i] <= 7905 */
- /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
- /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
- /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
+ /* 19952 <= ff_aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+ /* (1 << 36) / 19952 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+ /* 3444240 >= (1 << 36) / (ff_aanscales[i] * qscale * quant_matrix[i]) >= 275 */
qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
- (aanscales[i] * qscale * quant_matrix[j]));
+ (ff_aanscales[i] * qscale * quant_matrix[j]));
}
} else {
for(i=0;i<64;i++) {
@@ -137,7 +115,7 @@ void ff_convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][6
|| dsp->fdct == ff_faandct
#endif
) {
- max= (8191LL*aanscales[i]) >> 14;
+ max = (8191LL*ff_aanscales[i]) >> 14;
}
while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
shift++;
@@ -1253,7 +1231,7 @@ vbv_retry:
if(avctx->rc_buffer_size){
RateControlContext *rcc= &s->rc_context;
- int max_size= rcc->buffer_index/3;
+ int max_size= rcc->buffer_index * avctx->rc_max_available_vbv_use;
if(put_bits_count(&s->pb) > max_size && s->lambda < s->avctx->lmax){
s->next_lambda= FFMAX(s->lambda+1, s->lambda*(s->qscale+1) / s->qscale);
diff --git a/libavcodec/msmpeg4.h b/libavcodec/msmpeg4.h
index 3226015..1442959 100644
--- a/libavcodec/msmpeg4.h
+++ b/libavcodec/msmpeg4.h
@@ -52,7 +52,9 @@ int ff_wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64]);
#define ENABLE_MSMPEG4_DECODER (ENABLE_MSMPEG4V1_DECODER || \
ENABLE_MSMPEG4V2_DECODER || \
ENABLE_MSMPEG4V3_DECODER || \
- ENABLE_WMV2_DECODER)
+ ENABLE_WMV2_DECODER || \
+ ENABLE_VC1_DECODER || \
+ ENABLE_WMV3_DECODER)
#define ENABLE_MSMPEG4_ENCODER (ENABLE_MSMPEG4V1_ENCODER || \
ENABLE_MSMPEG4V2_ENCODER || \
ENABLE_MSMPEG4V3_ENCODER || \
diff --git a/libavcodec/msmpeg4data.c b/libavcodec/msmpeg4data.c
index da899b5..07d11f7 100644
--- a/libavcodec/msmpeg4data.c
+++ b/libavcodec/msmpeg4data.c
@@ -35,22 +35,22 @@ VLC ff_msmp4_dc_chroma_vlc[2];
/* intra picture macroblock coded block pattern */
const uint16_t ff_msmp4_mb_i_table[64][2] = {
-{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
-{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
-{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
-{ 0x2, 6 },{ 0xec, 9 },{ 0x77, 8 },{ 0x0, 8 },
-{ 0x3, 5 },{ 0xb7, 9 },{ 0x2c, 7 },{ 0x13, 7 },
-{ 0x1, 6 },{ 0x168, 10 },{ 0x46, 8 },{ 0x3f, 8 },
-{ 0x1e, 6 },{ 0x712, 13 },{ 0xb5, 9 },{ 0x42, 8 },
-{ 0x22, 7 },{ 0x1c5, 11 },{ 0x11e, 10 },{ 0x87, 9 },
-{ 0x6, 4 },{ 0x3, 9 },{ 0x1e, 7 },{ 0x1c, 6 },
-{ 0x12, 7 },{ 0x388, 12 },{ 0x44, 9 },{ 0x70, 9 },
-{ 0x1f, 6 },{ 0x23e, 11 },{ 0x39, 8 },{ 0x8e, 9 },
-{ 0x1, 7 },{ 0x1c6, 11 },{ 0xb6, 9 },{ 0x45, 9 },
-{ 0x14, 6 },{ 0x23f, 11 },{ 0x7d, 9 },{ 0x18, 9 },
-{ 0x7, 7 },{ 0x1c7, 11 },{ 0x86, 9 },{ 0x19, 9 },
-{ 0x15, 6 },{ 0x1db, 10 },{ 0x2, 9 },{ 0x46, 9 },
-{ 0xd, 8 },{ 0x713, 13 },{ 0x1da, 10 },{ 0x169, 10 },
+{ 0x1, 1 },{ 0x17, 6 },{ 0x9, 5 },{ 0x5, 5 },
+{ 0x6, 5 },{ 0x47, 9 },{ 0x20, 7 },{ 0x10, 7 },
+{ 0x2, 5 },{ 0x7c, 9 },{ 0x3a, 7 },{ 0x1d, 7 },
+{ 0x2, 6 },{ 0xec, 9 },{ 0x77, 8 },{ 0x0, 8 },
+{ 0x3, 5 },{ 0xb7, 9 },{ 0x2c, 7 },{ 0x13, 7 },
+{ 0x1, 6 },{ 0x168, 10 },{ 0x46, 8 },{ 0x3f, 8 },
+{ 0x1e, 6 },{ 0x712, 13 },{ 0xb5, 9 },{ 0x42, 8 },
+{ 0x22, 7 },{ 0x1c5, 11 },{ 0x11e, 10 },{ 0x87, 9 },
+{ 0x6, 4 },{ 0x3, 9 },{ 0x1e, 7 },{ 0x1c, 6 },
+{ 0x12, 7 },{ 0x388, 12 },{ 0x44, 9 },{ 0x70, 9 },
+{ 0x1f, 6 },{ 0x23e, 11 },{ 0x39, 8 },{ 0x8e, 9 },
+{ 0x1, 7 },{ 0x1c6, 11 },{ 0xb6, 9 },{ 0x45, 9 },
+{ 0x14, 6 },{ 0x23f, 11 },{ 0x7d, 9 },{ 0x18, 9 },
+{ 0x7, 7 },{ 0x1c7, 11 },{ 0x86, 9 },{ 0x19, 9 },
+{ 0x15, 6 },{ 0x1db, 10 },{ 0x2, 9 },{ 0x46, 9 },
+{ 0xd, 8 },{ 0x713, 13 },{ 0x1da, 10 },{ 0x169, 10 },
};
/* non intra picture macroblock coded block pattern + mb type */
diff --git a/libavcodec/msrledec.c b/libavcodec/msrledec.c
index f44b8b6..7d09ed9 100644
--- a/libavcodec/msrledec.c
+++ b/libavcodec/msrledec.c
@@ -145,8 +145,10 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
p2 = *src++;
if(p2 == 0) { //End-of-line
output = pic->data[0] + (--line) * pic->linesize[0];
- if (line < 0)
+ if (line < 0){
+ av_log(avctx, AV_LOG_ERROR, "Next line is beyond picture bounds\n");
return -1;
+ }
pos = 0;
continue;
} else if(p2 == 1) { //End-of-picture
@@ -155,8 +157,10 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
p1 = *src++;
p2 = *src++;
line -= p2;
- if (line < 0)
+ if (line < 0){
+ av_log(avctx, AV_LOG_ERROR, "Skip beyond picture bounds\n");
return -1;
+ }
pos += p1;
output = pic->data[0] + line * pic->linesize[0] + pos * (depth >> 3);
continue;
@@ -191,13 +195,12 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
}
pos += p2;
} else { //Run of pixels
- int pix[4]; //original pixel
+ uint8_t pix[3]; //original pixel
switch(depth){
case 8: pix[0] = *src++;
break;
case 16: pix16 = AV_RL16(src);
src += 2;
- *(uint16_t*)pix = pix16;
break;
case 24: pix[0] = *src++;
pix[1] = *src++;
@@ -205,7 +208,6 @@ static int msrle_decode_8_16_24_32(AVCodecContext *avctx, AVPicture *pic, int de
break;
case 32: pix32 = AV_RL32(src);
src += 4;
- *(uint32_t*)pix = pix32;
break;
}
if (output + p1 * (depth >> 3) > output_end)
diff --git a/libavcodec/nellymoserdec.c b/libavcodec/nellymoserdec.c
index f2a413e..dfbb41c 100644
--- a/libavcodec/nellymoserdec.c
+++ b/libavcodec/nellymoserdec.c
@@ -148,6 +148,7 @@ static av_cold int decode_init(AVCodecContext * avctx) {
ff_sine_window_init(sine_window, 128);
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/nellymoserenc.c b/libavcodec/nellymoserenc.c
index 03db30c..13fe64d 100644
--- a/libavcodec/nellymoserenc.c
+++ b/libavcodec/nellymoserenc.c
@@ -28,7 +28,7 @@
*
* Generic codec information: libavcodec/nellymoserdec.c
*
- * Some information also from: http://www1.mplayerhq.hu/ASAO/ASAO.zip
+ * Some information also from: http://samples.mplayerhq.hu/A-codecs/Nelly_Moser/ASAO/ASAO.zip
* (Copyright Joseph Artsimovich and UAB "DKD")
*
* for more information about nellymoser format, visit:
diff --git a/libavcodec/opt.c b/libavcodec/opt.c
index acf94ba..78fbfae 100644
--- a/libavcodec/opt.c
+++ b/libavcodec/opt.c
@@ -47,15 +47,17 @@ const AVOption *av_next_option(void *obj, const AVOption *last){
else return (*(AVClass**)obj)->option;
}
-static const AVOption *av_set_number(void *obj, const char *name, double num, int den, int64_t intnum){
+static int av_set_number2(void *obj, const char *name, double num, int den, int64_t intnum, const AVOption **o_out){
const AVOption *o= av_find_opt(obj, name, NULL, 0, 0);
void *dst;
+ if(o_out)
+ *o_out= o;
if(!o || o->offset<=0)
- return NULL;
+ return AVERROR(ENOENT);
if(o->max*den < num*intnum || o->min*den > num*intnum) {
- av_log(NULL, AV_LOG_ERROR, "Value %lf for parameter '%s' out of range.\n", num, name);
- return NULL;
+ av_log(NULL, AV_LOG_ERROR, "Value %lf for parameter '%s' out of range\n", num, name);
+ return AVERROR(ERANGE);
}
dst= ((uint8_t*)obj) + o->offset;
@@ -71,27 +73,17 @@ static const AVOption *av_set_number(void *obj, const char *name, double num, in
else *(AVRational*)dst= av_d2q(num*intnum/den, 1<<24);
break;
default:
- return NULL;
+ return AVERROR(EINVAL);
}
- return o;
+ return 0;
}
-static const AVOption *set_all_opt(void *v, const char *unit, double d){
- AVClass *c= *(AVClass**)v; //FIXME silly way of storing AVClass
- const AVOption *o= c->option;
- const AVOption *ret=NULL;
-
- for(;o && o->name; o++){
- if(o->type != FF_OPT_TYPE_CONST && o->unit && !strcmp(o->unit, unit)){
- double tmp= d;
- if(o->type == FF_OPT_TYPE_FLAGS)
- tmp= av_get_int(v, o->name, NULL) | (int64_t)d;
-
- av_set_number(v, o->name, tmp, 1, 1);
- ret= o;
- }
- }
- return ret;
+static const AVOption *av_set_number(void *obj, const char *name, double num, int den, int64_t intnum){
+ const AVOption *o = NULL;
+ if (av_set_number2(obj, name, num, den, intnum, &o) < 0)
+ return NULL;
+ else
+ return o;
}
static const double const_values[]={
@@ -115,13 +107,16 @@ static int hexchar2int(char c) {
return -1;
}
-const AVOption *av_set_string2(void *obj, const char *name, const char *val, int alloc){
+int av_set_string3(void *obj, const char *name, const char *val, int alloc, const AVOption **o_out){
+ int ret;
const AVOption *o= av_find_opt(obj, name, NULL, 0, 0);
- if(o && o->offset==0 && o->type == FF_OPT_TYPE_CONST && o->unit){
- return set_all_opt(obj, o->unit, o->default_val);
- }
- if(!o || !val || o->offset<=0)
- return NULL;
+ if (o_out)
+ *o_out = o;
+ if(!o)
+ return AVERROR(ENOENT);
+ if(!val || o->offset<=0)
+ return AVERROR(EINVAL);
+
if(o->type == FF_OPT_TYPE_BINARY){
uint8_t **dst = (uint8_t **)(((uint8_t*)obj) + o->offset);
int *lendst = (int *)(dst + 1);
@@ -129,7 +124,7 @@ const AVOption *av_set_string2(void *obj, const char *name, const char *val, int
int len = strlen(val);
av_freep(dst);
*lendst = 0;
- if (len & 1) return NULL;
+ if (len & 1) return AVERROR(EINVAL);
len /= 2;
ptr = bin = av_malloc(len);
while (*val) {
@@ -137,13 +132,13 @@ const AVOption *av_set_string2(void *obj, const char *name, const char *val, int
int b = hexchar2int(*val++);
if (a < 0 || b < 0) {
av_free(bin);
- return NULL;
+ return AVERROR(EINVAL);
}
*ptr++ = (a << 4) | b;
}
*dst = bin;
*lendst = len;
- return o;
+ return 0;
}
if(o->type != FF_OPT_TYPE_STRING){
int notfirst=0;
@@ -174,7 +169,7 @@ const AVOption *av_set_string2(void *obj, const char *name, const char *val, int
else {
if (error)
av_log(NULL, AV_LOG_ERROR, "Unable to parse option value \"%s\": %s\n", val, error);
- return NULL;
+ return AVERROR(EINVAL);
}
}
if(o->type == FF_OPT_TYPE_FLAGS){
@@ -185,14 +180,14 @@ const AVOption *av_set_string2(void *obj, const char *name, const char *val, int
else if(cmd=='-') d= notfirst*av_get_double(obj, name, NULL) - d;
}
- if (!av_set_number(obj, name, d, 1, 1))
- return NULL;
+ if ((ret = av_set_number2(obj, name, d, 1, 1, o_out)) < 0)
+ return ret;
val+= i;
if(!*val)
- return o;
+ return 0;
notfirst=1;
}
- return NULL;
+ return AVERROR(EINVAL);
}
if(alloc){
@@ -201,12 +196,24 @@ const AVOption *av_set_string2(void *obj, const char *name, const char *val, int
}
memcpy(((uint8_t*)obj) + o->offset, &val, sizeof(val));
+ return 0;
+}
+
+#if LIBAVCODEC_VERSION_MAJOR < 53
+const AVOption *av_set_string2(void *obj, const char *name, const char *val, int alloc){
+ const AVOption *o;
+ if (av_set_string3(obj, name, val, alloc, &o) < 0)
+ return NULL;
return o;
}
const AVOption *av_set_string(void *obj, const char *name, const char *val){
- return av_set_string2(obj, name, val, 0);
+ const AVOption *o;
+ if (av_set_string3(obj, name, val, 0, &o) < 0)
+ return NULL;
+ return o;
}
+#endif
const AVOption *av_set_double(void *obj, const char *name, double n){
return av_set_number(obj, name, n, 1, 1);
@@ -415,6 +422,11 @@ void av_opt_set_defaults2(void *s, int mask, int flags)
av_set_int(s, opt->name, val);
}
break;
+ case FF_OPT_TYPE_INT64:
+ if((double)(opt->default_val+0.6) == opt->default_val)
+ av_log(s, AV_LOG_DEBUG, "loss of precission in default of %s\n", opt->name);
+ av_set_int(s, opt->name, opt->default_val);
+ break;
case FF_OPT_TYPE_FLOAT: {
double val;
val = opt->default_val;
diff --git a/libavcodec/opt.h b/libavcodec/opt.h
index 557c430..c968930 100644
--- a/libavcodec/opt.h
+++ b/libavcodec/opt.h
@@ -91,7 +91,7 @@ typedef struct AVOption {
* for which it is the case that opt->flags & mask == flags).
*
* @param[in] obj a pointer to a struct whose first element is a
- * pointer to an #AVClass
+ * pointer to an AVClass
* @param[in] name the name of the option to look for
* @param[in] unit the unit of the option to look for, or any if NULL
* @return a pointer to the option found, or NULL if no option
@@ -99,12 +99,22 @@ typedef struct AVOption {
*/
const AVOption *av_find_opt(void *obj, const char *name, const char *unit, int mask, int flags);
+#if LIBAVCODEC_VERSION_MAJOR < 53
/**
* @see av_set_string2()
*/
attribute_deprecated const AVOption *av_set_string(void *obj, const char *name, const char *val);
/**
+ * @return a pointer to the AVOption corresponding to the field set or
+ * NULL if no matching AVOption exists, or if the value \p val is not
+ * valid
+ * @see av_set_string3()
+ */
+attribute_deprecated const AVOption *av_set_string2(void *obj, const char *name, const char *val, int alloc);
+#endif
+
+/**
* Sets the field of obj with the given name to value.
*
* @param[in] obj A struct whose first element is a pointer to an
@@ -120,14 +130,15 @@ attribute_deprecated const AVOption *av_set_string(void *obj, const char *name,
* scalars or named flags separated by '+' or '-'. Prefixing a flag
* with '+' causes it to be set without affecting the other flags;
* similarly, '-' unsets a flag.
- * @return a pointer to the AVOption corresponding to the field set or
- * NULL if no matching AVOption exists, or if the value \p val is not
- * valid
+ * @param[out] o_out if non-NULL put here a pointer to the AVOption
+ * found
* @param alloc when 1 then the old value will be av_freed() and the
* new av_strduped()
* when 0 then no av_free() nor av_strdup() will be used
+ * @return 0 if the value has been set, an AVERROR* error code if no
+ * matching option exists, or if the value \p val is not valid
*/
-const AVOption *av_set_string2(void *obj, const char *name, const char *val, int alloc);
+int av_set_string3(void *obj, const char *name, const char *val, int alloc, const AVOption **o_out);
const AVOption *av_set_double(void *obj, const char *name, double n);
const AVOption *av_set_q(void *obj, const char *name, AVRational n);
diff --git a/libavcodec/png.h b/libavcodec/png.h
index 0fde3f4..17b1466 100644
--- a/libavcodec/png.h
+++ b/libavcodec/png.h
@@ -64,14 +64,13 @@ extern const uint8_t ff_png_pass_xshift[NB_PASSES];
/* Mask to determine which pixels are valid in a pass */
extern const uint8_t ff_png_pass_mask[NB_PASSES];
-extern void *ff_png_zalloc(void *opaque, unsigned int items,
- unsigned int size);
+void *ff_png_zalloc(void *opaque, unsigned int items, unsigned int size);
-extern void ff_png_zfree(void *opaque, void *ptr);
+void ff_png_zfree(void *opaque, void *ptr);
-extern int ff_png_get_nb_channels(int color_type);
+int ff_png_get_nb_channels(int color_type);
/* compute the row size of an interleaved pass */
-extern int ff_png_pass_row_size(int pass, int bits_per_pixel, int width);
+int ff_png_pass_row_size(int pass, int bits_per_pixel, int width);
#endif /* AVCODEC_PNG_H */
diff --git a/libavcodec/ppc/dsputil_altivec.c b/libavcodec/ppc/dsputil_altivec.c
index 6ff219e..44cce6a 100644
--- a/libavcodec/ppc/dsputil_altivec.c
+++ b/libavcodec/ppc/dsputil_altivec.c
@@ -26,6 +26,7 @@
#include "dsputil_ppc.h"
#include "util_altivec.h"
+#include "types_altivec.h"
int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
{
@@ -573,6 +574,20 @@ void diff_pixels_altivec(DCTELEM *restrict block, const uint8_t *s1,
}
}
+
+static void clear_block_altivec(DCTELEM *block) {
+ LOAD_ZERO;
+ vec_st(zero_s16v, 0, block);
+ vec_st(zero_s16v, 16, block);
+ vec_st(zero_s16v, 32, block);
+ vec_st(zero_s16v, 48, block);
+ vec_st(zero_s16v, 64, block);
+ vec_st(zero_s16v, 80, block);
+ vec_st(zero_s16v, 96, block);
+ vec_st(zero_s16v, 112, block);
+}
+
+
void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
register int i;
register vector unsigned char vdst, vsrc;
@@ -1420,6 +1435,7 @@ void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec;
c->get_pixels = get_pixels_altivec;
+ c->clear_block = clear_block_altivec;
c->add_bytes= add_bytes_altivec;
c->put_pixels_tab[0][0] = put_pixels16_altivec;
/* the two functions do the same thing, so use the same code */
diff --git a/libavcodec/ppc/dsputil_altivec.h b/libavcodec/ppc/dsputil_altivec.h
index 63f817a..03ff0b9 100644
--- a/libavcodec/ppc/dsputil_altivec.h
+++ b/libavcodec/ppc/dsputil_altivec.h
@@ -25,7 +25,7 @@
#include <stdint.h>
-extern int has_altivec(void);
+int has_altivec(void);
void put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h);
diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 443e1db..124453e 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -27,11 +27,11 @@
#ifdef HAVE_ALTIVEC
#include "dsputil_altivec.h"
-extern void fdct_altivec(int16_t *block);
-extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
- int x16, int y16, int rounder);
-extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
-extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
+void fdct_altivec(int16_t *block);
+void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
+ int x16, int y16, int rounder);
+void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
+void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
diff --git a/libavcodec/ppc/fft_altivec.c b/libavcodec/ppc/fft_altivec.c
index ddf142b..19123d0 100644
--- a/libavcodec/ppc/fft_altivec.c
+++ b/libavcodec/ppc/fft_altivec.c
@@ -26,31 +26,6 @@
#include "dsputil_ppc.h"
#include "util_altivec.h"
-/*
- those three macros are from libavcodec/fft.c
- and are required for the reference C code
-*/
-/* butter fly op */
-#define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
-{\
- FFTSample ax, ay, bx, by;\
- bx=pre1;\
- by=pim1;\
- ax=qre1;\
- ay=qim1;\
- pre = (bx + ax);\
- pim = (by + ay);\
- qre = (bx - ax);\
- qim = (by - ay);\
-}
-#define MUL16(a,b) ((a) * (b))
-#define CMUL(pre, pim, are, aim, bre, bim) \
-{\
- pre = (MUL16(are, bre) - MUL16(aim, bim));\
- pim = (MUL16(are, bim) + MUL16(bre, aim));\
-}
-
-
/**
* Do a complex FFT with the parameters defined in ff_fft_init(). The
* input data must be permuted before with s->revtab table. No
diff --git a/libavcodec/ppc/float_altivec.c b/libavcodec/ppc/float_altivec.c
index 1f39d8f..fd4aa53 100644
--- a/libavcodec/ppc/float_altivec.c
+++ b/libavcodec/ppc/float_altivec.c
@@ -23,6 +23,7 @@
#include "gcc_fixes.h"
#include "dsputil_altivec.h"
+#include "util_altivec.h"
static void vector_fmul_altivec(float *dst, const float *src, int len)
{
@@ -149,6 +150,67 @@ static void vector_fmul_add_add_altivec(float *dst, const float *src0,
ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
}
+static void vector_fmul_window_altivec(float *dst, const float *src0, const float *src1, const float *win, float add_bias, int len)
+{
+ union {
+ vector float v;
+ float s[4];
+ } vadd;
+ vector float vadd_bias, zero, t0, t1, s0, s1, wi, wj;
+ const vector unsigned char reverse = vcprm(3,2,1,0);
+ int i,j;
+
+ dst += len;
+ win += len;
+ src0+= len;
+
+ vadd.s[0] = add_bias;
+ vadd_bias = vec_splat(vadd.v, 0);
+ zero = (vector float)vec_splat_u32(0);
+
+ for(i=-len*4, j=len*4-16; i<0; i+=16, j-=16) {
+ s0 = vec_ld(i, src0);
+ s1 = vec_ld(j, src1);
+ wi = vec_ld(i, win);
+ wj = vec_ld(j, win);
+
+ s1 = vec_perm(s1, s1, reverse);
+ wj = vec_perm(wj, wj, reverse);
+
+ t0 = vec_madd(s0, wj, vadd_bias);
+ t0 = vec_nmsub(s1, wi, t0);
+ t1 = vec_madd(s0, wi, vadd_bias);
+ t1 = vec_madd(s1, wj, t1);
+ t1 = vec_perm(t1, t1, reverse);
+
+ vec_st(t0, i, dst);
+ vec_st(t1, j, dst);
+ }
+}
+
+static void int32_to_float_fmul_scalar_altivec(float *dst, const int *src, float mul, int len)
+{
+ union {
+ vector float v;
+ float s[4];
+ } mul_u;
+ int i;
+ vector float src1, src2, dst1, dst2, mul_v, zero;
+
+ zero = (vector float)vec_splat_u32(0);
+ mul_u.s[0] = mul;
+ mul_v = vec_splat(mul_u.v, 0);
+
+ for(i=0; i<len; i+=8) {
+ src1 = vec_ctf(vec_ld(0, src+i), 0);
+ src2 = vec_ctf(vec_ld(16, src+i), 0);
+ dst1 = vec_madd(src1, mul_v, zero);
+ dst2 = vec_madd(src2, mul_v, zero);
+ vec_st(dst1, 0, dst+i);
+ vec_st(dst2, 16, dst+i);
+ }
+}
+
static vector signed short
float_to_int16_one_altivec(const float *src)
@@ -160,7 +222,7 @@ float_to_int16_one_altivec(const float *src)
return vec_packs(t0,t1);
}
-static void float_to_int16_altivec(int16_t *dst, const float *src, int len)
+static void float_to_int16_altivec(int16_t *dst, const float *src, long len)
{
int i;
vector signed short d0, d1, d;
@@ -240,7 +302,9 @@ void float_init_altivec(DSPContext* c, AVCodecContext *avctx)
c->vector_fmul = vector_fmul_altivec;
c->vector_fmul_reverse = vector_fmul_reverse_altivec;
c->vector_fmul_add_add = vector_fmul_add_add_altivec;
+ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_altivec;
if(!(avctx->flags & CODEC_FLAG_BITEXACT)) {
+ c->vector_fmul_window = vector_fmul_window_altivec;
c->float_to_int16 = float_to_int16_altivec;
c->float_to_int16_interleave = float_to_int16_interleave_altivec;
}
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index ab4c5c1..b589be2 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -19,6 +19,7 @@
*/
#include "libavcodec/dsputil.h"
+#include "libavcodec/h264data.h"
#include "gcc_fixes.h"
@@ -188,32 +189,32 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
((8 - x) * (y)),
((x) * (y))};
register int i;
- vec_u8_t fperm;
- const vec_s32_t vABCD = vec_ld(0, ABCD);
- const vec_s16_t vA = vec_splat((vec_s16_t)vABCD, 1);
- const vec_s16_t vB = vec_splat((vec_s16_t)vABCD, 3);
- const vec_s16_t vC = vec_splat((vec_s16_t)vABCD, 5);
- const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);
+ vec_u8 fperm;
+ const vec_s32 vABCD = vec_ld(0, ABCD);
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
LOAD_ZERO;
- const vec_s16_t v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
- const vec_u16_t v6us = vec_splat_u16(6);
+ const vec_s16 v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
+ const vec_u16 v6us = vec_splat_u16(6);
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
- vec_u8_t vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
- vec_u8_t vsrc0uc, vsrc1uc;
- vec_s16_t vsrc0ssH, vsrc1ssH;
- vec_u8_t vsrcCuc, vsrc2uc, vsrc3uc;
- vec_s16_t vsrc2ssH, vsrc3ssH, psum;
- vec_u8_t vdst, ppsum, fsum;
+ vec_u8 vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+ vec_u8 vsrc0uc, vsrc1uc;
+ vec_s16 vsrc0ssH, vsrc1ssH;
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
+ vec_s16 vsrc2ssH, vsrc3ssH, psum;
+ vec_u8 vdst, ppsum, fsum;
if (((unsigned long)dst) % 16 == 0) {
- fperm = (vec_u8_t){0x10, 0x11, 0x12, 0x13,
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F};
} else {
- fperm = (vec_u8_t){0x00, 0x01, 0x02, 0x03,
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B,
0x1C, 0x1D, 0x1E, 0x1F};
@@ -232,8 +233,8 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
else
vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
- vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc0uc);
- vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc1uc);
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc0uc);
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc1uc);
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
@@ -244,8 +245,8 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
- vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc2uc);
- vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc3uc);
+ vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc2uc);
+ vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc3uc);
psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
psum = vec_mladd(vB, vsrc1ssH, psum);
@@ -255,7 +256,7 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
psum = vec_sra(psum, v6us);
vdst = vec_ld(0, dst);
- ppsum = (vec_u8_t)vec_packsu(psum, psum);
+ ppsum = (vec_u8)vec_packsu(psum, psum);
fsum = vec_perm(vdst, ppsum, fperm);
vec_st(fsum, 0, dst);
@@ -267,7 +268,7 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
src += stride;
}
} else {
- vec_u8_t vsrcDuc;
+ vec_u8 vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrcDuc = vec_ld(stride + 16, src);
@@ -278,8 +279,8 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
else
vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
- vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc2uc);
- vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v, (vec_u8_t)vsrc3uc);
+ vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc2uc);
+ vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v, (vec_u8)vsrc3uc);
psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
psum = vec_mladd(vB, vsrc1ssH, psum);
@@ -289,7 +290,7 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
psum = vec_sr(psum, v6us);
vdst = vec_ld(0, dst);
- ppsum = (vec_u8_t)vec_pack(psum, psum);
+ ppsum = (vec_u8)vec_pack(psum, psum);
fsum = vec_perm(vdst, ppsum, fperm);
vec_st(fsum, 0, dst);
@@ -308,7 +309,7 @@ static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
int src_stride1, int h)
{
int i;
- vec_u8_t a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+ vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
mask_ = vec_lvsl(0, src2);
@@ -350,7 +351,7 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
int src_stride1, int h)
{
int i;
- vec_u8_t a, b, d, tmp1, tmp2, mask, mask_, edges, align;
+ vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
mask_ = vec_lvsl(0, src2);
@@ -431,23 +432,23 @@ H264_MC(avg_, 16, altivec)
#define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \
vdst_orig = vec_ld(0, dst); \
vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask); \
- vdst_ss = (vec_s16_t) vec_mergeh(zero_u8v, vdst); \
+ vdst_ss = (vec_s16) vec_mergeh(zero_u8v, vdst); \
va = vec_add(va, vdst_ss); \
va_u8 = vec_packsu(va, zero_s16v); \
- va_u32 = vec_splat((vec_u32_t)va_u8, 0); \
+ va_u32 = vec_splat((vec_u32)va_u8, 0); \
vec_ste(va_u32, element, (uint32_t*)dst);
static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
{
- vec_s16_t va0, va1, va2, va3;
- vec_s16_t vz0, vz1, vz2, vz3;
- vec_s16_t vtmp0, vtmp1, vtmp2, vtmp3;
- vec_u8_t va_u8;
- vec_u32_t va_u32;
- vec_s16_t vdst_ss;
- const vec_u16_t v6us = vec_splat_u16(6);
- vec_u8_t vdst, vdst_orig;
- vec_u8_t vdst_mask = vec_lvsl(0, dst);
+ vec_s16 va0, va1, va2, va3;
+ vec_s16 vz0, vz1, vz2, vz3;
+ vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
+ vec_u8 va_u8;
+ vec_u32 va_u32;
+ vec_s16 vdst_ss;
+ const vec_u16 v6us = vec_splat_u16(6);
+ vec_u8 vdst, vdst_orig;
+ vec_u8 vdst_mask = vec_lvsl(0, dst);
int element = ((unsigned long)dst & 0xf) >> 2;
LOAD_ZERO;
@@ -478,40 +479,40 @@ static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
#define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
/* a0 = SRC(0) + SRC(4); */ \
- vec_s16_t a0v = vec_add(s0, s4); \
+ vec_s16 a0v = vec_add(s0, s4); \
/* a2 = SRC(0) - SRC(4); */ \
- vec_s16_t a2v = vec_sub(s0, s4); \
+ vec_s16 a2v = vec_sub(s0, s4); \
/* a4 = (SRC(2)>>1) - SRC(6); */ \
- vec_s16_t a4v = vec_sub(vec_sra(s2, onev), s6); \
+ vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \
/* a6 = (SRC(6)>>1) + SRC(2); */ \
- vec_s16_t a6v = vec_add(vec_sra(s6, onev), s2); \
+ vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \
/* b0 = a0 + a6; */ \
- vec_s16_t b0v = vec_add(a0v, a6v); \
+ vec_s16 b0v = vec_add(a0v, a6v); \
/* b2 = a2 + a4; */ \
- vec_s16_t b2v = vec_add(a2v, a4v); \
+ vec_s16 b2v = vec_add(a2v, a4v); \
/* b4 = a2 - a4; */ \
- vec_s16_t b4v = vec_sub(a2v, a4v); \
+ vec_s16 b4v = vec_sub(a2v, a4v); \
/* b6 = a0 - a6; */ \
- vec_s16_t b6v = vec_sub(a0v, a6v); \
+ vec_s16 b6v = vec_sub(a0v, a6v); \
/* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
/* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
- vec_s16_t a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
+ vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
/* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
/* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
- vec_s16_t a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
+ vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
/* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
/* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
- vec_s16_t a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
+ vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
/* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
- vec_s16_t a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
+ vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
/* b1 = (a7>>2) + a1; */ \
- vec_s16_t b1v = vec_add( vec_sra(a7v, twov), a1v); \
+ vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \
/* b3 = a3 + (a5>>2); */ \
- vec_s16_t b3v = vec_add(a3v, vec_sra(a5v, twov)); \
+ vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \
/* b5 = (a3>>2) - a5; */ \
- vec_s16_t b5v = vec_sub( vec_sra(a3v, twov), a5v); \
+ vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \
/* b7 = a7 - (a1>>2); */ \
- vec_s16_t b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
+ vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
/* DST(0, b0 + b7); */ \
d0 = vec_add(b0v, b7v); \
/* DST(1, b2 + b5); */ \
@@ -532,17 +533,17 @@ static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
#define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
/* unaligned load */ \
- vec_u8_t hv = vec_ld( 0, dest ); \
- vec_u8_t lv = vec_ld( 7, dest ); \
- vec_u8_t dstv = vec_perm( hv, lv, (vec_u8_t)perm_ldv ); \
- vec_s16_t idct_sh6 = vec_sra(idctv, sixv); \
- vec_u16_t dst16 = (vec_u16_t)vec_mergeh(zero_u8v, dstv); \
- vec_s16_t idstsum = vec_adds(idct_sh6, (vec_s16_t)dst16); \
- vec_u8_t idstsum8 = vec_packsu(zero_s16v, idstsum); \
- vec_u8_t edgehv; \
+ vec_u8 hv = vec_ld( 0, dest ); \
+ vec_u8 lv = vec_ld( 7, dest ); \
+ vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv ); \
+ vec_s16 idct_sh6 = vec_sra(idctv, sixv); \
+ vec_u16 dst16 = (vec_u16)vec_mergeh(zero_u8v, dstv); \
+ vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \
+ vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \
+ vec_u8 edgehv; \
/* unaligned store */ \
- vec_u8_t bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
- vec_u8_t edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
+ vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv );\
+ vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
lv = vec_sel( lv, bodyv, edgelv ); \
vec_st( lv, 7, dest ); \
hv = vec_ld( 0, dest ); \
@@ -552,18 +553,18 @@ static void ff_h264_idct_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
}
void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
- vec_s16_t s0, s1, s2, s3, s4, s5, s6, s7;
- vec_s16_t d0, d1, d2, d3, d4, d5, d6, d7;
- vec_s16_t idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
+ vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
+ vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
+ vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
- vec_u8_t perm_ldv = vec_lvsl(0, dst);
- vec_u8_t perm_stv = vec_lvsr(8, dst);
+ vec_u8 perm_ldv = vec_lvsl(0, dst);
+ vec_u8 perm_stv = vec_lvsr(8, dst);
- const vec_u16_t onev = vec_splat_u16(1);
- const vec_u16_t twov = vec_splat_u16(2);
- const vec_u16_t sixv = vec_splat_u16(6);
+ const vec_u16 onev = vec_splat_u16(1);
+ const vec_u16 twov = vec_splat_u16(2);
+ const vec_u16 sixv = vec_splat_u16(6);
- const vec_u8_t sel = (vec_u8_t) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
+ const vec_u8 sel = (vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
LOAD_ZERO;
dct[0] += 32; // rounding for the >>6 at the end
@@ -595,11 +596,106 @@ void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
}
+static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, DCTELEM *block, int stride, int size)
+{
+ vec_s16 dc16;
+ vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
+ LOAD_ZERO;
+ DECLARE_ALIGNED_16(int, dc);
+ int i;
+
+ dc = (block[0] + 32) >> 6;
+ dc16 = vec_splat((vec_s16) vec_lde(0, &dc), 1);
+
+ if (size == 4)
+ dc16 = vec_sld(dc16, zero_s16v, 8);
+ dcplus = vec_packsu(dc16, zero_s16v);
+ dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
+
+ aligner = vec_lvsr(0, dst);
+ dcplus = vec_perm(dcplus, dcplus, aligner);
+ dcminus = vec_perm(dcminus, dcminus, aligner);
+
+ for (i = 0; i < size; i += 4) {
+ v0 = vec_ld(0, dst+0*stride);
+ v1 = vec_ld(0, dst+1*stride);
+ v2 = vec_ld(0, dst+2*stride);
+ v3 = vec_ld(0, dst+3*stride);
+
+ v0 = vec_adds(v0, dcplus);
+ v1 = vec_adds(v1, dcplus);
+ v2 = vec_adds(v2, dcplus);
+ v3 = vec_adds(v3, dcplus);
+
+ v0 = vec_subs(v0, dcminus);
+ v1 = vec_subs(v1, dcminus);
+ v2 = vec_subs(v2, dcminus);
+ v3 = vec_subs(v3, dcminus);
+
+ vec_st(v0, 0, dst+0*stride);
+ vec_st(v1, 0, dst+1*stride);
+ vec_st(v2, 0, dst+2*stride);
+ vec_st(v3, 0, dst+3*stride);
+
+ dst += 4*stride;
+ }
+}
+
+static void h264_idct_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
+{
+ h264_idct_dc_add_internal(dst, block, stride, 4);
+}
+
+static void ff_h264_idct8_dc_add_altivec(uint8_t *dst, DCTELEM *block, int stride)
+{
+ h264_idct_dc_add_internal(dst, block, stride, 8);
+}
+
+static void ff_h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ]) ff_h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
+ else if(block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_altivec (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ else if(block[i*16])
+ h264_idct_dc_add_altivec(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
+
#define transpose4x16(r0, r1, r2, r3) { \
- register vec_u8_t r4; \
- register vec_u8_t r5; \
- register vec_u8_t r6; \
- register vec_u8_t r7; \
+ register vec_u8 r4; \
+ register vec_u8 r5; \
+ register vec_u8 r6; \
+ register vec_u8 r7; \
\
r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \
r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \
@@ -613,8 +709,8 @@ void ff_h264_idct8_add_altivec( uint8_t *dst, DCTELEM *dct, int stride ) {
}
static inline void write16x4(uint8_t *dst, int dst_stride,
- register vec_u8_t r0, register vec_u8_t r1,
- register vec_u8_t r2, register vec_u8_t r3) {
+ register vec_u8 r0, register vec_u8 r1,
+ register vec_u8 r2, register vec_u8 r3) {
DECLARE_ALIGNED_16(unsigned char, result[64]);
uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
int int_dst_stride = dst_stride/4;
@@ -646,16 +742,16 @@ static inline void write16x4(uint8_t *dst, int dst_stride,
\todo FIXME: see if we can't spare some vec_lvsl() by them factorizing
out of unaligned_load() */
#define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\
- register vec_u8_t r0 = unaligned_load(0, src); \
- register vec_u8_t r1 = unaligned_load( src_stride, src); \
- register vec_u8_t r2 = unaligned_load(2* src_stride, src); \
- register vec_u8_t r3 = unaligned_load(3* src_stride, src); \
- register vec_u8_t r4 = unaligned_load(4* src_stride, src); \
- register vec_u8_t r5 = unaligned_load(5* src_stride, src); \
- register vec_u8_t r6 = unaligned_load(6* src_stride, src); \
- register vec_u8_t r7 = unaligned_load(7* src_stride, src); \
- register vec_u8_t r14 = unaligned_load(14*src_stride, src); \
- register vec_u8_t r15 = unaligned_load(15*src_stride, src); \
+ register vec_u8 r0 = unaligned_load(0, src); \
+ register vec_u8 r1 = unaligned_load( src_stride, src); \
+ register vec_u8 r2 = unaligned_load(2* src_stride, src); \
+ register vec_u8 r3 = unaligned_load(3* src_stride, src); \
+ register vec_u8 r4 = unaligned_load(4* src_stride, src); \
+ register vec_u8 r5 = unaligned_load(5* src_stride, src); \
+ register vec_u8 r6 = unaligned_load(6* src_stride, src); \
+ register vec_u8 r7 = unaligned_load(7* src_stride, src); \
+ register vec_u8 r14 = unaligned_load(14*src_stride, src); \
+ register vec_u8 r15 = unaligned_load(15*src_stride, src); \
\
r8 = unaligned_load( 8*src_stride, src); \
r9 = unaligned_load( 9*src_stride, src); \
@@ -705,26 +801,26 @@ static inline void write16x4(uint8_t *dst, int dst_stride,
}
// out: o = |x-y| < a
-static inline vec_u8_t diff_lt_altivec ( register vec_u8_t x,
- register vec_u8_t y,
- register vec_u8_t a) {
-
- register vec_u8_t diff = vec_subs(x, y);
- register vec_u8_t diffneg = vec_subs(y, x);
- register vec_u8_t o = vec_or(diff, diffneg); /* |x-y| */
- o = (vec_u8_t)vec_cmplt(o, a);
+static inline vec_u8 diff_lt_altivec ( register vec_u8 x,
+ register vec_u8 y,
+ register vec_u8 a) {
+
+ register vec_u8 diff = vec_subs(x, y);
+ register vec_u8 diffneg = vec_subs(y, x);
+ register vec_u8 o = vec_or(diff, diffneg); /* |x-y| */
+ o = (vec_u8)vec_cmplt(o, a);
return o;
}
-static inline vec_u8_t h264_deblock_mask ( register vec_u8_t p0,
- register vec_u8_t p1,
- register vec_u8_t q0,
- register vec_u8_t q1,
- register vec_u8_t alpha,
- register vec_u8_t beta) {
+static inline vec_u8 h264_deblock_mask ( register vec_u8 p0,
+ register vec_u8 p1,
+ register vec_u8 q0,
+ register vec_u8 q1,
+ register vec_u8 alpha,
+ register vec_u8 beta) {
- register vec_u8_t mask;
- register vec_u8_t tempmask;
+ register vec_u8 mask;
+ register vec_u8 tempmask;
mask = diff_lt_altivec(p0, q0, alpha);
tempmask = diff_lt_altivec(p1, p0, beta);
@@ -736,19 +832,19 @@ static inline vec_u8_t h264_deblock_mask ( register vec_u8_t p0,
}
// out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
-static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
- register vec_u8_t p1,
- register vec_u8_t p2,
- register vec_u8_t q0,
- register vec_u8_t tc0) {
-
- register vec_u8_t average = vec_avg(p0, q0);
- register vec_u8_t temp;
- register vec_u8_t uncliped;
- register vec_u8_t ones;
- register vec_u8_t max;
- register vec_u8_t min;
- register vec_u8_t newp1;
+static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
+ register vec_u8 p1,
+ register vec_u8 p2,
+ register vec_u8 q0,
+ register vec_u8 tc0) {
+
+ register vec_u8 average = vec_avg(p0, q0);
+ register vec_u8 temp;
+ register vec_u8 uncliped;
+ register vec_u8 ones;
+ register vec_u8 max;
+ register vec_u8 min;
+ register vec_u8 newp1;
temp = vec_xor(average, p2);
average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
@@ -764,16 +860,16 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
#define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
\
- const vec_u8_t A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
+ const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
\
- register vec_u8_t pq0bit = vec_xor(p0,q0); \
- register vec_u8_t q1minus; \
- register vec_u8_t p0minus; \
- register vec_u8_t stage1; \
- register vec_u8_t stage2; \
- register vec_u8_t vec160; \
- register vec_u8_t delta; \
- register vec_u8_t deltaneg; \
+ register vec_u8 pq0bit = vec_xor(p0,q0); \
+ register vec_u8 q1minus; \
+ register vec_u8 p0minus; \
+ register vec_u8 stage1; \
+ register vec_u8 stage2; \
+ register vec_u8 vec160; \
+ register vec_u8 delta; \
+ register vec_u8 deltaneg; \
\
q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
@@ -796,16 +892,16 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
#define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
DECLARE_ALIGNED_16(unsigned char, temp[16]); \
- register vec_u8_t alphavec; \
- register vec_u8_t betavec; \
- register vec_u8_t mask; \
- register vec_u8_t p1mask; \
- register vec_u8_t q1mask; \
+ register vec_u8 alphavec; \
+ register vec_u8 betavec; \
+ register vec_u8 mask; \
+ register vec_u8 p1mask; \
+ register vec_u8 q1mask; \
register vector signed char tc0vec; \
- register vec_u8_t finaltc0; \
- register vec_u8_t tc0masked; \
- register vec_u8_t newp1; \
- register vec_u8_t newq1; \
+ register vec_u8 finaltc0; \
+ register vec_u8 tc0masked; \
+ register vec_u8 newp1; \
+ register vec_u8 newq1; \
\
temp[0] = alpha; \
temp[1] = beta; \
@@ -819,18 +915,18 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
tc0vec = vec_mergeh(tc0vec, tc0vec); \
tc0vec = vec_mergeh(tc0vec, tc0vec); \
mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
- finaltc0 = vec_and((vec_u8_t)tc0vec, mask); /* tc = tc0 */ \
+ finaltc0 = vec_and((vec_u8)tc0vec, mask); /* tc = tc0 */ \
\
p1mask = diff_lt_altivec(p2, p0, betavec); \
p1mask = vec_and(p1mask, mask); /* if ( |p2 - p0| < beta) */ \
- tc0masked = vec_and(p1mask, (vec_u8_t)tc0vec); \
+ tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \
finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
/*end if*/ \
\
q1mask = diff_lt_altivec(q2, q0, betavec); \
q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
- tc0masked = vec_and(q1mask, (vec_u8_t)tc0vec); \
+ tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \
finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
/*end if*/ \
@@ -843,12 +939,12 @@ static inline vec_u8_t h264_deblock_q1(register vec_u8_t p0,
static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
- register vec_u8_t p2 = vec_ld(-3*stride, pix);
- register vec_u8_t p1 = vec_ld(-2*stride, pix);
- register vec_u8_t p0 = vec_ld(-1*stride, pix);
- register vec_u8_t q0 = vec_ld(0, pix);
- register vec_u8_t q1 = vec_ld(stride, pix);
- register vec_u8_t q2 = vec_ld(2*stride, pix);
+ register vec_u8 p2 = vec_ld(-3*stride, pix);
+ register vec_u8 p1 = vec_ld(-2*stride, pix);
+ register vec_u8 p0 = vec_ld(-1*stride, pix);
+ register vec_u8 q0 = vec_ld(0, pix);
+ register vec_u8 q1 = vec_ld(stride, pix);
+ register vec_u8 q2 = vec_ld(2*stride, pix);
h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0);
vec_st(p1, -2*stride, pix);
vec_st(p0, -1*stride, pix);
@@ -859,7 +955,7 @@ static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) {
- register vec_u8_t line0, line1, line2, line3, line4, line5;
+ register vec_u8 line0, line1, line2, line3, line4, line5;
if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
return;
readAndTranspose16x6(pix-3, stride, line0, line1, line2, line3, line4, line5);
@@ -868,6 +964,130 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha,
write16x4(pix-2, stride, line1, line2, line3, line4);
}
+static av_always_inline
+void weight_h264_WxH_altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset, int w, int h)
+{
+ int y, aligned;
+ vec_u8 vblock;
+ vec_s16 vtemp, vweight, voffset, v0, v1;
+ vec_u16 vlog2_denom;
+ DECLARE_ALIGNED_16(int32_t, temp[4]);
+ LOAD_ZERO;
+
+ offset <<= log2_denom;
+ if(log2_denom) offset += 1<<(log2_denom-1);
+ temp[0] = log2_denom;
+ temp[1] = weight;
+ temp[2] = offset;
+
+ vtemp = (vec_s16)vec_ld(0, temp);
+ vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
+ vweight = vec_splat(vtemp, 3);
+ voffset = vec_splat(vtemp, 5);
+ aligned = !((unsigned long)block & 0xf);
+
+ for (y=0; y<h; y++) {
+ vblock = vec_ld(0, block);
+
+ v0 = (vec_s16)vec_mergeh(zero_u8v, vblock);
+ v1 = (vec_s16)vec_mergel(zero_u8v, vblock);
+
+ if (w == 16 || aligned) {
+ v0 = vec_mladd(v0, vweight, zero_s16v);
+ v0 = vec_adds(v0, voffset);
+ v0 = vec_sra(v0, vlog2_denom);
+ }
+ if (w == 16 || !aligned) {
+ v1 = vec_mladd(v1, vweight, zero_s16v);
+ v1 = vec_adds(v1, voffset);
+ v1 = vec_sra(v1, vlog2_denom);
+ }
+ vblock = vec_packsu(v0, v1);
+ vec_st(vblock, 0, block);
+
+ block += stride;
+ }
+}
+
+static av_always_inline
+void biweight_h264_WxH_altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom,
+ int weightd, int weights, int offset, int w, int h)
+{
+ int y, dst_aligned, src_aligned;
+ vec_u8 vsrc, vdst;
+ vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
+ vec_u16 vlog2_denom;
+ DECLARE_ALIGNED_16(int32_t, temp[4]);
+ LOAD_ZERO;
+
+ offset = ((offset + 1) | 1) << log2_denom;
+ temp[0] = log2_denom+1;
+ temp[1] = weights;
+ temp[2] = weightd;
+ temp[3] = offset;
+
+ vtemp = (vec_s16)vec_ld(0, temp);
+ vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
+ vweights = vec_splat(vtemp, 3);
+ vweightd = vec_splat(vtemp, 5);
+ voffset = vec_splat(vtemp, 7);
+ dst_aligned = !((unsigned long)dst & 0xf);
+ src_aligned = !((unsigned long)src & 0xf);
+
+ for (y=0; y<h; y++) {
+ vdst = vec_ld(0, dst);
+ vsrc = vec_ld(0, src);
+
+ v0 = (vec_s16)vec_mergeh(zero_u8v, vdst);
+ v1 = (vec_s16)vec_mergel(zero_u8v, vdst);
+ v2 = (vec_s16)vec_mergeh(zero_u8v, vsrc);
+ v3 = (vec_s16)vec_mergel(zero_u8v, vsrc);
+
+ if (w == 8) {
+ if (src_aligned)
+ v3 = v2;
+ else
+ v2 = v3;
+ }
+
+ if (w == 16 || dst_aligned) {
+ v0 = vec_mladd(v0, vweightd, zero_s16v);
+ v2 = vec_mladd(v2, vweights, zero_s16v);
+
+ v0 = vec_adds(v0, voffset);
+ v0 = vec_adds(v0, v2);
+ v0 = vec_sra(v0, vlog2_denom);
+ }
+ if (w == 16 || !dst_aligned) {
+ v1 = vec_mladd(v1, vweightd, zero_s16v);
+ v3 = vec_mladd(v3, vweights, zero_s16v);
+
+ v1 = vec_adds(v1, voffset);
+ v1 = vec_adds(v1, v3);
+ v1 = vec_sra(v1, vlog2_denom);
+ }
+ vdst = vec_packsu(v0, v1);
+ vec_st(vdst, 0, dst);
+
+ dst += stride;
+ src += stride;
+ }
+}
+
+#define H264_WEIGHT(W,H) \
+static void ff_weight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
+ weight_h264_WxH_altivec(block, stride, log2_denom, weight, offset, W, H); \
+}\
+static void ff_biweight_h264_pixels ## W ## x ## H ## _altivec(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+ biweight_h264_WxH_altivec(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16, 8)
+H264_WEIGHT( 8,16)
+H264_WEIGHT( 8, 8)
+H264_WEIGHT( 8, 4)
+
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
if (has_altivec()) {
@@ -875,7 +1095,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
c->put_no_rnd_h264_chroma_pixels_tab[0] = put_no_rnd_h264_chroma_mc8_altivec;
c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
c->h264_idct_add = ff_h264_idct_add_altivec;
+ c->h264_idct_add8 = ff_h264_idct_add8_altivec;
+ c->h264_idct_add16 = ff_h264_idct_add16_altivec;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec;
+ c->h264_idct_dc_add= h264_idct_dc_add_altivec;
+ c->h264_idct8_dc_add = ff_h264_idct8_dc_add_altivec;
c->h264_idct8_add = ff_h264_idct8_add_altivec;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_altivec;
c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
@@ -900,5 +1126,16 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
dspfunc(put_h264_qpel, 0, 16);
dspfunc(avg_h264_qpel, 0, 16);
#undef dspfunc
+
+ c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels16x16_altivec;
+ c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels16x8_altivec;
+ c->weight_h264_pixels_tab[2] = ff_weight_h264_pixels8x16_altivec;
+ c->weight_h264_pixels_tab[3] = ff_weight_h264_pixels8x8_altivec;
+ c->weight_h264_pixels_tab[4] = ff_weight_h264_pixels8x4_altivec;
+ c->biweight_h264_pixels_tab[0] = ff_biweight_h264_pixels16x16_altivec;
+ c->biweight_h264_pixels_tab[1] = ff_biweight_h264_pixels16x8_altivec;
+ c->biweight_h264_pixels_tab[2] = ff_biweight_h264_pixels8x16_altivec;
+ c->biweight_h264_pixels_tab[3] = ff_biweight_h264_pixels8x8_altivec;
+ c->biweight_h264_pixels_tab[4] = ff_biweight_h264_pixels8x4_altivec;
}
}
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c
index e050fe5..5f722d0 100644
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -28,8 +28,8 @@
/* this code assume that stride % 16 == 0 */
#define CHROMA_MC8_ALTIVEC_CORE \
- vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
- vsrc3ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc3uc);\
+ vsrc2ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc2uc);\
+ vsrc3ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc3uc);\
\
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
psum = vec_mladd(vB, vsrc1ssH, psum);\
@@ -38,7 +38,7 @@
psum = vec_sr(psum, v6us);\
\
vdst = vec_ld(0, dst);\
- ppsum = (vec_u8_t)vec_pack(psum, psum);\
+ ppsum = (vec_u8)vec_pack(psum, psum);\
vfdst = vec_perm(vdst, ppsum, fperm);\
\
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
@@ -53,15 +53,15 @@
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
\
- vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);\
- vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);\
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);\
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);\
\
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
psum = vec_mladd(vE, vsrc1ssH, psum);\
psum = vec_sr(psum, v6us);\
\
vdst = vec_ld(0, dst);\
- ppsum = (vec_u8_t)vec_pack(psum, psum);\
+ ppsum = (vec_u8)vec_pack(psum, psum);\
vfdst = vec_perm(vdst, ppsum, fperm);\
\
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
@@ -80,34 +80,34 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
((8 - x) * ( y)),
(( x) * ( y))};
register int i;
- vec_u8_t fperm;
- const vec_s32_t vABCD = vec_ld(0, ABCD);
- const vec_s16_t vA = vec_splat((vec_s16_t)vABCD, 1);
- const vec_s16_t vB = vec_splat((vec_s16_t)vABCD, 3);
- const vec_s16_t vC = vec_splat((vec_s16_t)vABCD, 5);
- const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);
+ vec_u8 fperm;
+ const vec_s32 vABCD = vec_ld(0, ABCD);
+ const vec_s16 vA = vec_splat((vec_s16)vABCD, 1);
+ const vec_s16 vB = vec_splat((vec_s16)vABCD, 3);
+ const vec_s16 vC = vec_splat((vec_s16)vABCD, 5);
+ const vec_s16 vD = vec_splat((vec_s16)vABCD, 7);
LOAD_ZERO;
- const vec_s16_t v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
- const vec_u16_t v6us = vec_splat_u16(6);
+ const vec_s16 v32ss = vec_sl(vec_splat_s16(1),vec_splat_u16(5));
+ const vec_u16 v6us = vec_splat_u16(6);
register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
- vec_u8_t vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
- vec_u8_t vsrc0uc, vsrc1uc;
- vec_s16_t vsrc0ssH, vsrc1ssH;
- vec_u8_t vsrcCuc, vsrc2uc, vsrc3uc;
- vec_s16_t vsrc2ssH, vsrc3ssH, psum;
- vec_u8_t vdst, ppsum, vfdst, fsum;
+ vec_u8 vsrcAuc, vsrcBuc, vsrcperm0, vsrcperm1;
+ vec_u8 vsrc0uc, vsrc1uc;
+ vec_s16 vsrc0ssH, vsrc1ssH;
+ vec_u8 vsrcCuc, vsrc2uc, vsrc3uc;
+ vec_s16 vsrc2ssH, vsrc3ssH, psum;
+ vec_u8 vdst, ppsum, vfdst, fsum;
POWERPC_PERF_START_COUNT(PREFIX_h264_chroma_mc8_num, 1);
if (((unsigned long)dst) % 16 == 0) {
- fperm = (vec_u8_t){0x10, 0x11, 0x12, 0x13,
+ fperm = (vec_u8){0x10, 0x11, 0x12, 0x13,
0x14, 0x15, 0x16, 0x17,
0x08, 0x09, 0x0A, 0x0B,
0x0C, 0x0D, 0x0E, 0x0F};
} else {
- fperm = (vec_u8_t){0x00, 0x01, 0x02, 0x03,
+ fperm = (vec_u8){0x00, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07,
0x18, 0x19, 0x1A, 0x1B,
0x1C, 0x1D, 0x1E, 0x1F};
@@ -126,8 +126,8 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
else
vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
- vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);
- vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);
+ vsrc0ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc0uc);
+ vsrc1ssH = (vec_s16)vec_mergeh(zero_u8v,(vec_u8)vsrc1uc);
if (ABCD[3]) {
if (!loadSecond) {// -> !reallyBadAlign
@@ -139,7 +139,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
CHROMA_MC8_ALTIVEC_CORE
}
} else {
- vec_u8_t vsrcDuc;
+ vec_u8 vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrcDuc = vec_ld(stride + 16, src);
@@ -153,7 +153,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
}
}
} else {
- const vec_s16_t vE = vec_add(vB, vC);
+ const vec_s16 vE = vec_add(vB, vC);
if (ABCD[2]) { // x == 0 B == 0
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
@@ -164,7 +164,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
vsrc0uc = vsrc1uc;
}
} else {
- vec_u8_t vsrcDuc;
+ vec_u8 vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrcDuc = vec_ld(stride + 15, src);
@@ -184,7 +184,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
}
} else {
- vec_u8_t vsrcDuc;
+ vec_u8 vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(0, src);
vsrcDuc = vec_ld(15, src);
@@ -210,35 +210,35 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
register int i;
LOAD_ZERO;
- const vec_u8_t permM2 = vec_lvsl(-2, src);
- const vec_u8_t permM1 = vec_lvsl(-1, src);
- const vec_u8_t permP0 = vec_lvsl(+0, src);
- const vec_u8_t permP1 = vec_lvsl(+1, src);
- const vec_u8_t permP2 = vec_lvsl(+2, src);
- const vec_u8_t permP3 = vec_lvsl(+3, src);
- const vec_s16_t v5ss = vec_splat_s16(5);
- const vec_u16_t v5us = vec_splat_u16(5);
- const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
- const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
-
- vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+ const vec_u8 permM2 = vec_lvsl(-2, src);
+ const vec_u8 permM1 = vec_lvsl(-1, src);
+ const vec_u8 permP0 = vec_lvsl(+0, src);
+ const vec_u8 permP1 = vec_lvsl(+1, src);
+ const vec_u8 permP2 = vec_lvsl(+2, src);
+ const vec_u8 permP3 = vec_lvsl(+3, src);
+ const vec_s16 v5ss = vec_splat_s16(5);
+ const vec_u16 v5us = vec_splat_u16(5);
+ const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+
+ vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
register int align = ((((unsigned long)src) - 2) % 16);
- vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+ vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
srcP2A, srcP2B, srcP3A, srcP3B,
srcM1A, srcM1B, srcM2A, srcM2B,
sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
psumA, psumB, sumA, sumB;
- vec_u8_t sum, vdst, fsum;
+ vec_u8 sum, vdst, fsum;
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
for (i = 0 ; i < 16 ; i ++) {
- vec_u8_t srcR1 = vec_ld(-2, src);
- vec_u8_t srcR2 = vec_ld(14, src);
+ vec_u8 srcR1 = vec_ld(-2, src);
+ vec_u8 srcR2 = vec_ld(14, src);
switch (align) {
default: {
@@ -258,7 +258,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = srcR2;
} break;
case 12: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = vec_perm(srcR1, srcR2, permP0);
@@ -267,7 +267,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 13: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = vec_perm(srcR1, srcR2, permP0);
@@ -276,7 +276,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 14: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = srcR2;
@@ -285,7 +285,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 15: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = srcR2;
srcP0 = vec_perm(srcR2, srcR3, permP0);
@@ -295,20 +295,20 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
} break;
}
- srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
- srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
- srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
- srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+ srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
+ srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
+ srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
+ srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
- srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
- srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
- srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
- srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+ srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
+ srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
+ srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
+ srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
- srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
- srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
- srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
- srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+ srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
+ srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
+ srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
+ srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
sum1A = vec_adds(srcP0A, srcP1A);
sum1B = vec_adds(srcP0B, srcP1B);
@@ -354,52 +354,52 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
register int i;
LOAD_ZERO;
- const vec_u8_t perm = vec_lvsl(0, src);
- const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
- const vec_u16_t v5us = vec_splat_u16(5);
- const vec_s16_t v5ss = vec_splat_s16(5);
- const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+ const vec_u8 perm = vec_lvsl(0, src);
+ const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vec_u16 v5us = vec_splat_u16(5);
+ const vec_s16 v5ss = vec_splat_s16(5);
+ const vec_s16 v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
uint8_t *srcbis = src - (srcStride * 2);
- const vec_u8_t srcM2a = vec_ld(0, srcbis);
- const vec_u8_t srcM2b = vec_ld(16, srcbis);
- const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
+ const vec_u8 srcM2a = vec_ld(0, srcbis);
+ const vec_u8 srcM2b = vec_ld(16, srcbis);
+ const vec_u8 srcM2 = vec_perm(srcM2a, srcM2b, perm);
//srcbis += srcStride;
- const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
- const vec_u8_t srcM1b = vec_ld(16, srcbis);
- const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
+ const vec_u8 srcM1a = vec_ld(0, srcbis += srcStride);
+ const vec_u8 srcM1b = vec_ld(16, srcbis);
+ const vec_u8 srcM1 = vec_perm(srcM1a, srcM1b, perm);
//srcbis += srcStride;
- const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
- const vec_u8_t srcP0b = vec_ld(16, srcbis);
- const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
+ const vec_u8 srcP0a = vec_ld(0, srcbis += srcStride);
+ const vec_u8 srcP0b = vec_ld(16, srcbis);
+ const vec_u8 srcP0 = vec_perm(srcP0a, srcP0b, perm);
//srcbis += srcStride;
- const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
- const vec_u8_t srcP1b = vec_ld(16, srcbis);
- const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
+ const vec_u8 srcP1a = vec_ld(0, srcbis += srcStride);
+ const vec_u8 srcP1b = vec_ld(16, srcbis);
+ const vec_u8 srcP1 = vec_perm(srcP1a, srcP1b, perm);
//srcbis += srcStride;
- const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
- const vec_u8_t srcP2b = vec_ld(16, srcbis);
- const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
+ const vec_u8 srcP2a = vec_ld(0, srcbis += srcStride);
+ const vec_u8 srcP2b = vec_ld(16, srcbis);
+ const vec_u8 srcP2 = vec_perm(srcP2a, srcP2b, perm);
//srcbis += srcStride;
- vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
- vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
- vec_s16_t srcM1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
- vec_s16_t srcM1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
- vec_s16_t srcP0ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
- vec_s16_t srcP0ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
- vec_s16_t srcP1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
- vec_s16_t srcP1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
- vec_s16_t srcP2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
- vec_s16_t srcP2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-
- vec_s16_t pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+ vec_s16 srcM2ssA = (vec_s16) vec_mergeh(zero_u8v, srcM2);
+ vec_s16 srcM2ssB = (vec_s16) vec_mergel(zero_u8v, srcM2);
+ vec_s16 srcM1ssA = (vec_s16) vec_mergeh(zero_u8v, srcM1);
+ vec_s16 srcM1ssB = (vec_s16) vec_mergel(zero_u8v, srcM1);
+ vec_s16 srcP0ssA = (vec_s16) vec_mergeh(zero_u8v, srcP0);
+ vec_s16 srcP0ssB = (vec_s16) vec_mergel(zero_u8v, srcP0);
+ vec_s16 srcP1ssA = (vec_s16) vec_mergeh(zero_u8v, srcP1);
+ vec_s16 srcP1ssB = (vec_s16) vec_mergel(zero_u8v, srcP1);
+ vec_s16 srcP2ssA = (vec_s16) vec_mergeh(zero_u8v, srcP2);
+ vec_s16 srcP2ssB = (vec_s16) vec_mergel(zero_u8v, srcP2);
+
+ vec_s16 pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
psumA, psumB, sumA, sumB,
srcP3ssA, srcP3ssB,
sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
- vec_u8_t sum, vdst, fsum, srcP3a, srcP3b, srcP3;
+ vec_u8 sum, vdst, fsum, srcP3a, srcP3b, srcP3;
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
@@ -407,8 +407,8 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
srcP3a = vec_ld(0, srcbis += srcStride);
srcP3b = vec_ld(16, srcbis);
srcP3 = vec_perm(srcP3a, srcP3b, perm);
- srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
- srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+ srcP3ssA = (vec_s16) vec_mergeh(zero_u8v, srcP3);
+ srcP3ssB = (vec_s16) vec_mergel(zero_u8v, srcP3);
//srcbis += srcStride;
sum1A = vec_adds(srcP0ssA, srcP1ssA);
@@ -463,49 +463,49 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
register int i;
LOAD_ZERO;
- const vec_u8_t permM2 = vec_lvsl(-2, src);
- const vec_u8_t permM1 = vec_lvsl(-1, src);
- const vec_u8_t permP0 = vec_lvsl(+0, src);
- const vec_u8_t permP1 = vec_lvsl(+1, src);
- const vec_u8_t permP2 = vec_lvsl(+2, src);
- const vec_u8_t permP3 = vec_lvsl(+3, src);
- const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
- const vec_u32_t v10ui = vec_splat_u32(10);
- const vec_s16_t v5ss = vec_splat_s16(5);
- const vec_s16_t v1ss = vec_splat_s16(1);
- const vec_s32_t v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
- const vec_u32_t v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+ const vec_u8 permM2 = vec_lvsl(-2, src);
+ const vec_u8 permM1 = vec_lvsl(-1, src);
+ const vec_u8 permP0 = vec_lvsl(+0, src);
+ const vec_u8 permP1 = vec_lvsl(+1, src);
+ const vec_u8 permP2 = vec_lvsl(+2, src);
+ const vec_u8 permP3 = vec_lvsl(+3, src);
+ const vec_s16 v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+ const vec_u32 v10ui = vec_splat_u32(10);
+ const vec_s16 v5ss = vec_splat_s16(5);
+ const vec_s16 v1ss = vec_splat_s16(1);
+ const vec_s32 v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+ const vec_u32 v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
register int align = ((((unsigned long)src) - 2) % 16);
- vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+ vec_s16 srcP0A, srcP0B, srcP1A, srcP1B,
srcP2A, srcP2B, srcP3A, srcP3B,
srcM1A, srcM1B, srcM2A, srcM2B,
sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
pp1A, pp1B, pp2A, pp2B, psumA, psumB;
- const vec_u8_t mperm = (const vec_u8_t)
+ const vec_u8 mperm = (const vec_u8)
{0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F};
int16_t *tmpbis = tmp;
- vec_s16_t tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+ vec_s16 tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
tmpP2ssA, tmpP2ssB;
- vec_s32_t pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+ vec_s32 pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
ssumAe, ssumAo, ssumBe, ssumBo;
- vec_u8_t fsum, sumv, sum, vdst;
- vec_s16_t ssume, ssumo;
+ vec_u8 fsum, sumv, sum, vdst;
+ vec_s16 ssume, ssumo;
POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
src -= (2 * srcStride);
for (i = 0 ; i < 21 ; i ++) {
- vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
- vec_u8_t srcR1 = vec_ld(-2, src);
- vec_u8_t srcR2 = vec_ld(14, src);
+ vec_u8 srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+ vec_u8 srcR1 = vec_ld(-2, src);
+ vec_u8 srcR2 = vec_ld(14, src);
switch (align) {
default: {
@@ -525,7 +525,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
srcP3 = srcR2;
} break;
case 12: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = vec_perm(srcR1, srcR2, permP0);
@@ -534,7 +534,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 13: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = vec_perm(srcR1, srcR2, permP0);
@@ -543,7 +543,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 14: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = vec_perm(srcR1, srcR2, permM1);
srcP0 = srcR2;
@@ -552,7 +552,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
srcP3 = vec_perm(srcR2, srcR3, permP3);
} break;
case 15: {
- vec_u8_t srcR3 = vec_ld(30, src);
+ vec_u8 srcR3 = vec_ld(30, src);
srcM2 = vec_perm(srcR1, srcR2, permM2);
srcM1 = srcR2;
srcP0 = vec_perm(srcR2, srcR3, permP0);
@@ -562,20 +562,20 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
} break;
}
- srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
- srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
- srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
- srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+ srcP0A = (vec_s16) vec_mergeh(zero_u8v, srcP0);
+ srcP0B = (vec_s16) vec_mergel(zero_u8v, srcP0);
+ srcP1A = (vec_s16) vec_mergeh(zero_u8v, srcP1);
+ srcP1B = (vec_s16) vec_mergel(zero_u8v, srcP1);
- srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
- srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
- srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
- srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+ srcP2A = (vec_s16) vec_mergeh(zero_u8v, srcP2);
+ srcP2B = (vec_s16) vec_mergel(zero_u8v, srcP2);
+ srcP3A = (vec_s16) vec_mergeh(zero_u8v, srcP3);
+ srcP3B = (vec_s16) vec_mergel(zero_u8v, srcP3);
- srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
- srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
- srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
- srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+ srcM1A = (vec_s16) vec_mergeh(zero_u8v, srcM1);
+ srcM1B = (vec_s16) vec_mergel(zero_u8v, srcM1);
+ srcM2A = (vec_s16) vec_mergeh(zero_u8v, srcM2);
+ srcM2B = (vec_s16) vec_mergel(zero_u8v, srcM2);
sum1A = vec_adds(srcP0A, srcP1A);
sum1B = vec_adds(srcP0B, srcP1B);
@@ -617,15 +617,15 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
tmpbis += tmpStride;
for (i = 0 ; i < 16 ; i++) {
- const vec_s16_t tmpP3ssA = vec_ld(0, tmpbis);
- const vec_s16_t tmpP3ssB = vec_ld(16, tmpbis);
+ const vec_s16 tmpP3ssA = vec_ld(0, tmpbis);
+ const vec_s16 tmpP3ssB = vec_ld(16, tmpbis);
- const vec_s16_t sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
- const vec_s16_t sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
- const vec_s16_t sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
- const vec_s16_t sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
- const vec_s16_t sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
- const vec_s16_t sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+ const vec_s16 sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+ const vec_s16 sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+ const vec_s16 sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+ const vec_s16 sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+ const vec_s16 sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+ const vec_s16 sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
tmpbis += tmpStride;
@@ -650,9 +650,9 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
pp2Be = vec_mule(sum2B, v5ss);
pp2Bo = vec_mulo(sum2B, v5ss);
- pp3Ae = vec_sra((vec_s32_t)sum3A, v16ui);
+ pp3Ae = vec_sra((vec_s32)sum3A, v16ui);
pp3Ao = vec_mulo(sum3A, v1ss);
- pp3Be = vec_sra((vec_s32_t)sum3B, v16ui);
+ pp3Be = vec_sra((vec_s32)sum3B, v16ui);
pp3Bo = vec_mulo(sum3B, v1ss);
pp1cAe = vec_add(pp1Ae, v512si);
diff --git a/libavcodec/ppc/idct_altivec.c b/libavcodec/ppc/idct_altivec.c
index 7acef48..94b6598 100644
--- a/libavcodec/ppc/idct_altivec.c
+++ b/libavcodec/ppc/idct_altivec.c
@@ -40,17 +40,9 @@
#include "libavcodec/dsputil.h"
#include "gcc_fixes.h"
-
+#include "types_altivec.h"
#include "dsputil_ppc.h"
-#define vector_s16_t vector signed short
-#define const_vector_s16_t const vector signed short
-#define vector_u16_t vector unsigned short
-#define vector_s8_t vector signed char
-#define vector_u8_t vector unsigned char
-#define vector_s32_t vector signed int
-#define vector_u32_t vector unsigned int
-
#define IDCT_HALF \
/* 1st stage */ \
t1 = vec_mradds (a1, vx7, vx1 ); \
@@ -88,11 +80,11 @@
#define IDCT \
- vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
- vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
- vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias; \
- vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8; \
- vector_u16_t shift; \
+ vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
+ vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
+ vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
+ vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
+ vec_u16 shift; \
\
c4 = vec_splat (constants[0], 0); \
a0 = vec_splat (constants[0], 1); \
@@ -100,7 +92,7 @@
a2 = vec_splat (constants[0], 3); \
mc4 = vec_splat (constants[0], 4); \
ma2 = vec_splat (constants[0], 5); \
- bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3); \
+ bias = (vec_s16)vec_splat ((vec_s32)constants[0], 3); \
\
zero = vec_splat_s16 (0); \
shift = vec_splat_u16 (4); \
@@ -156,7 +148,7 @@
vx7 = vec_sra (vy7, shift);
-static const_vector_s16_t constants[5] = {
+static const vec_s16 constants[5] = {
{23170, 13573, 6518, 21895, -23170, -21895, 32, 31},
{16384, 22725, 21407, 19266, 16384, 19266, 21407, 22725},
{22725, 31521, 29692, 26722, 22725, 26722, 29692, 31521},
@@ -164,10 +156,10 @@ static const_vector_s16_t constants[5] = {
{19266, 26722, 25172, 22654, 19266, 22654, 25172, 26722}
};
-void idct_put_altivec(uint8_t* dest, int stride, vector_s16_t* block)
+void idct_put_altivec(uint8_t* dest, int stride, vec_s16* block)
{
POWERPC_PERF_DECLARE(altivec_idct_put_num, 1);
- vector_u8_t tmp;
+ vec_u8 tmp;
#ifdef CONFIG_POWERPC_PERF
POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
@@ -176,8 +168,8 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
#define COPY(dest,src) \
tmp = vec_packsu (src, src); \
- vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
- vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \
+ vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);
COPY (dest, vx0) dest += stride;
COPY (dest, vx1) dest += stride;
@@ -191,14 +183,14 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
}
-void idct_add_altivec(uint8_t* dest, int stride, vector_s16_t* block)
+void idct_add_altivec(uint8_t* dest, int stride, vec_s16* block)
{
POWERPC_PERF_DECLARE(altivec_idct_add_num, 1);
- vector_u8_t tmp;
- vector_s16_t tmp2, tmp3;
- vector_u8_t perm0;
- vector_u8_t perm1;
- vector_u8_t p0, p1, p;
+ vec_u8 tmp;
+ vec_s16 tmp2, tmp3;
+ vec_u8 perm0;
+ vec_u8 perm1;
+ vec_u8 p0, p1, p;
#ifdef CONFIG_POWERPC_PERF
POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
@@ -215,11 +207,11 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
#define ADD(dest,src,perm) \
/* *(uint64_t *)&tmp = *(uint64_t *)dest; */ \
tmp = vec_ld (0, dest); \
- tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm); \
+ tmp2 = (vec_s16)vec_perm (tmp, (vec_u8)zero, perm); \
tmp3 = vec_adds (tmp2, src); \
tmp = vec_packsu (tmp3, tmp3); \
- vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest); \
- vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
+ vec_ste ((vec_u32)tmp, 0, (unsigned int *)dest); \
+ vec_ste ((vec_u32)tmp, 4, (unsigned int *)dest);
ADD (dest, vx0, perm0) dest += stride;
ADD (dest, vx1, perm1) dest += stride;
diff --git a/libavcodec/ppc/imgresample_altivec.c b/libavcodec/ppc/imgresample_altivec.c
index b38e41b..fdbca5c 100644
--- a/libavcodec/ppc/imgresample_altivec.c
+++ b/libavcodec/ppc/imgresample_altivec.c
@@ -24,17 +24,13 @@
* High quality image resampling with polyphase filters - AltiVec bits
*/
-#include "gcc_fixes.h"
-
-typedef union {
- vector unsigned char v;
- unsigned char c[16];
-} vec_uc_t;
+#include "util_altivec.h"
+#define FILTER_BITS 8
typedef union {
vector signed short v;
signed short s[8];
-} vec_ss_t;
+} vec_ss;
void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
int wrap, int16_t *filter)
@@ -42,7 +38,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
int sum, i;
const uint8_t *s;
vector unsigned char *tv, tmp, dstv, zero;
- vec_ss_t srchv[4], srclv[4], fv[4];
+ vec_ss srchv[4], srclv[4], fv[4];
vector signed short zeros, sumhv, sumlv;
s = src;
diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/int_altivec.c
index 8bd3936..5cadea2 100644
--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/int_altivec.c
@@ -79,10 +79,10 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
static void add_int16_altivec(int16_t * v1, int16_t * v2, int order)
{
int i;
- register vec_s16_t vec, *pv;
+ register vec_s16 vec, *pv;
for(i = 0; i < order; i += 8){
- pv = (vec_s16_t*)v2;
+ pv = (vec_s16*)v2;
vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
vec_st(vec_add(vec_ld(0, v1), vec), 0, v1);
v1 += 8;
@@ -93,10 +93,10 @@ static void add_int16_altivec(int16_t * v1, int16_t * v2, int order)
static void sub_int16_altivec(int16_t * v1, int16_t * v2, int order)
{
int i;
- register vec_s16_t vec, *pv;
+ register vec_s16 vec, *pv;
for(i = 0; i < order; i += 8){
- pv = (vec_s16_t*)v2;
+ pv = (vec_s16*)v2;
vec = vec_perm(pv[0], pv[1], vec_lvsl(0, v2));
vec_st(vec_sub(vec_ld(0, v1), vec), 0, v1);
v1 += 8;
@@ -108,9 +108,9 @@ static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order
{
int i;
LOAD_ZERO;
- register vec_s16_t vec1, *pv;
- register vec_s32_t res = vec_splat_s32(0), t;
- register vec_u32_t shifts;
+ register vec_s16 vec1, *pv;
+ register vec_s32 res = vec_splat_s32(0), t;
+ register vec_u32 shifts;
DECLARE_ALIGNED_16(int32_t, ires);
shifts = zero_u32v;
@@ -121,7 +121,7 @@ static int32_t scalarproduct_int16_altivec(int16_t * v1, int16_t * v2, int order
if(shift & 0x01) shifts = vec_add(shifts, vec_splat_u32(0x01));
for(i = 0; i < order; i += 8){
- pv = (vec_s16_t*)v1;
+ pv = (vec_s16*)v1;
vec1 = vec_perm(pv[0], pv[1], vec_lvsl(0, v1));
t = vec_msum(vec1, vec_ld(0, v2), zero_s32v);
t = vec_sr(t, shifts);
diff --git a/libavcodec/ppc/mathops.h b/libavcodec/ppc/mathops.h
index 776ee62..edfe2ea 100644
--- a/libavcodec/ppc/mathops.h
+++ b/libavcodec/ppc/mathops.h
@@ -23,7 +23,7 @@
#ifndef AVCODEC_PPC_MATHOPS_H
#define AVCODEC_PPC_MATHOPS_H
-#if defined(ARCH_POWERPC_405)
+#if defined(ARCH_PPC_405)
/* signed 16x16 -> 32 multiply add accumulate */
#define MAC16(rt, ra, rb) \
__asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c
index ba1719f..28ada7a 100644
--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@@ -585,8 +585,8 @@ POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
}
-extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
-extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
+void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
+void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void MPV_common_init_altivec(MpegEncContext *s)
{
diff --git a/libavcodec/ppc/types_altivec.h b/libavcodec/ppc/types_altivec.h
index 30963c2..2870e83 100644
--- a/libavcodec/ppc/types_altivec.h
+++ b/libavcodec/ppc/types_altivec.h
@@ -24,23 +24,23 @@
/***********************************************************************
* Vector types
**********************************************************************/
-#define vec_u8_t vector unsigned char
-#define vec_s8_t vector signed char
-#define vec_u16_t vector unsigned short
-#define vec_s16_t vector signed short
-#define vec_u32_t vector unsigned int
-#define vec_s32_t vector signed int
+#define vec_u8 vector unsigned char
+#define vec_s8 vector signed char
+#define vec_u16 vector unsigned short
+#define vec_s16 vector signed short
+#define vec_u32 vector unsigned int
+#define vec_s32 vector signed int
/***********************************************************************
* Null vector
**********************************************************************/
-#define LOAD_ZERO const vec_u8_t zerov = vec_splat_u8( 0 )
+#define LOAD_ZERO const vec_u8 zerov = vec_splat_u8( 0 )
-#define zero_u8v (vec_u8_t) zerov
-#define zero_s8v (vec_s8_t) zerov
-#define zero_u16v (vec_u16_t) zerov
-#define zero_s16v (vec_s16_t) zerov
-#define zero_u32v (vec_u32_t) zerov
-#define zero_s32v (vec_s32_t) zerov
+#define zero_u8v (vec_u8) zerov
+#define zero_s8v (vec_s8) zerov
+#define zero_u16v (vec_u16) zerov
+#define zero_s16v (vec_s16) zerov
+#define zero_u32v (vec_u32) zerov
+#define zero_s32v (vec_s32) zerov
#endif /* AVCODEC_PPC_TYPES_ALTIVEC_H */
diff --git a/libavcodec/pthread.c b/libavcodec/pthread.c
index 71f2da6..e178993 100644
--- a/libavcodec/pthread.c
+++ b/libavcodec/pthread.c
@@ -25,11 +25,11 @@
#include "avcodec.h"
-typedef int (action_t)(AVCodecContext *c, void *arg);
+typedef int (action_func)(AVCodecContext *c, void *arg);
typedef struct ThreadContext {
pthread_t *workers;
- action_t *func;
+ action_func *func;
void *args;
int *rets;
int rets_count;
@@ -101,7 +101,7 @@ void avcodec_thread_free(AVCodecContext *avctx)
av_freep(&avctx->thread_opaque);
}
-int avcodec_thread_execute(AVCodecContext *avctx, action_t* func, void *arg, int *ret, int job_count, int job_size)
+int avcodec_thread_execute(AVCodecContext *avctx, action_func* func, void *arg, int *ret, int job_count, int job_size)
{
ThreadContext *c= avctx->thread_opaque;
int dummy_ret;
diff --git a/libavcodec/qcelp_lsp.c b/libavcodec/qcelp_lsp.c
new file mode 100644
index 0000000..a6c6595
--- /dev/null
+++ b/libavcodec/qcelp_lsp.c
@@ -0,0 +1,102 @@
+/*
+ * QCELP decoder
+ * Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file qcelp_lsp.c
+ * QCELP decoder
+ * @author Reynaldo H. Verdejo Pinochet
+ * @remark FFmpeg merging spearheaded by Kenan Gillet
+ * @remark Development mentored by Benjamin Larson
+ */
+
+#include "libavutil/mathematics.h"
+
+/**
+ * initial coefficient to perform bandwidth expansion on LPC
+ *
+ * @note: 0.9883 looks like an approximation of 253/256.
+ *
+ * TIA/EIA/IS-733 2.4.3.3.6 6
+ */
+#define QCELP_BANDWITH_EXPANSION_COEFF 0.9883
+
+/**
+ * Computes the Pa / (1 + z(-1)) or Qa / (1 - z(-1)) coefficients
+ * needed for LSP to LPC conversion.
+ * We only need to calculate the 6 first elements of the polynomial.
+ *
+ * @param lspf line spectral pair frequencies
+ * @param f [out] polynomial input/output as a vector
+ *
+ * TIA/EIA/IS-733 2.4.3.3.5-1/2
+ */
+static void lsp2polyf(const float *lspf, double *f, int lp_half_order)
+{
+ int i, j;
+
+ f[0] = 1.0;
+ f[1] = -2 * cos(M_PI * lspf[0]);
+ lspf -= 2;
+ for(i=2; i<=lp_half_order; i++)
+ {
+ double val = -2 * cos(M_PI * lspf[2*i]);
+ f[i] = val * f[i-1] + 2*f[i-2];
+ for(j=i-1; j>1; j--)
+ f[j] += f[j-1] * val + f[j-2];
+ f[1] += val;
+ }
+}
+
+/**
+ * Reconstructs LPC coefficients from the line spectral pair frequencies
+ * and performs bandwidth expansion.
+ *
+ * @param lspf line spectral pair frequencies
+ * @param lpc linear predictive coding coefficients
+ *
+ * @note: bandwith_expansion_coeff could be precalculated into a table
+ * but it seems to be slower on x86
+ *
+ * TIA/EIA/IS-733 2.4.3.3.5
+ */
+void ff_qcelp_lspf2lpc(const float *lspf, float *lpc)
+{
+ double pa[6], qa[6];
+ int i;
+ double bandwith_expansion_coeff = -QCELP_BANDWITH_EXPANSION_COEFF * 0.5;
+
+ lsp2polyf(lspf, pa, 5);
+ lsp2polyf(lspf + 1, qa, 5);
+
+ for (i=4; i>=0; i--)
+ {
+ double paf = pa[i+1] + pa[i];
+ double qaf = qa[i+1] - qa[i];
+
+ lpc[i ] = paf + qaf;
+ lpc[9-i] = paf - qaf;
+ }
+ for (i=0; i<10; i++)
+ {
+ lpc[i] *= bandwith_expansion_coeff;
+ bandwith_expansion_coeff *= QCELP_BANDWITH_EXPANSION_COEFF;
+ }
+}
diff --git a/libavcodec/qcelpdata.h b/libavcodec/qcelpdata.h
index 856f11d..9d5915b 100644
--- a/libavcodec/qcelpdata.h
+++ b/libavcodec/qcelpdata.h
@@ -1,5 +1,5 @@
/*
- * part of QCELP decoder
+ * QCELP decoder
* Copyright (c) 2007 Reynaldo H. Verdejo Pinochet
*
* This file is part of FFmpeg.
@@ -22,7 +22,48 @@
#ifndef AVCODEC_QCELPDATA_H
#define AVCODEC_QCELPDATA_H
+/**
+ * @file qcelpdata.h
+ * Data tables for the QCELP decoder
+ * @author Reynaldo H. Verdejo Pinochet
+ * @remark FFmpeg merging spearheaded by Kenan Gillet
+ * @remark Development mentored by Benjamin Larson
+ */
+
+#include <stddef.h>
#include <stdint.h>
+#include "libavutil/common.h"
+
+/**
+ * QCELP unpacked data frame
+ */
+typedef struct {
+/// @defgroup qcelp_codebook_parameters QCELP excitation codebook parameters
+/// @{
+ uint8_t cbsign[16]; ///!< sign of the codebook gain for each codebook subframe
+ uint8_t cbgain[16]; ///!< unsigned codebook gain for each codebook subframe
+ uint8_t cindex[16]; ///!< codebook index for each codebook subframe
+/// @}
+
+/// @defgroup qcelp_pitch_parameters QCELP pitch prediction parameters
+/// @{
+ uint8_t plag[4]; ///!< pitch lag for each pitch subframe
+ uint8_t pfrac[4]; ///!< fractional pitch lag for each pitch subframe
+ uint8_t pgain[4]; ///!< pitch gain for each pitch subframe
+/// @}
+
+ /**
+ * line spectral pair frequencies (LSP) for RATE_OCTAVE,
+ * line spectral pair frequencies grouped into five vectors
+ * of dimension two (LSPV) for other rates
+ */
+ uint8_t lspv[10];
+
+ /**
+ * reserved bits only present in bitrate 1, 1/4 and 1/8 packets
+ */
+ uint8_t reserved;
+} QCELPFrame;
/**
* pre-calculated table for hammsinc function
@@ -38,7 +79,7 @@ typedef struct {
uint8_t bitlen; /*!< number of bits to read */
} QCELPBitmap;
-#define QCELP_OF(variable, bit, len) {offsetof(QCELPContext, variable), bit, len}
+#define QCELP_OF(variable, bit, len) {offsetof(QCELPFrame, variable), bit, len}
/**
* bitmap unpacking tables for RATE_FULL
@@ -232,7 +273,7 @@ static const QCELPBitmap * const qcelp_unpacking_bitmaps_per_rate[5] = {
qcelp_rate_full_bitmap,
};
-static const uint16_t qcelp_bits_per_rate[5] = {
+static const uint16_t qcelp_unpacking_bitmaps_lengths[5] = {
0, ///!< for SILENCE rate
FF_ARRAY_ELEMS(qcelp_rate_octave_bitmap),
FF_ARRAY_ELEMS(qcelp_rate_quarter_bitmap),
@@ -384,6 +425,16 @@ static const qcelp_vector * const qcelp_lspvq[5] = {
#define QCELP_SCALE 8192.
/**
+ * the upper boundary of the clipping, depends on QCELP_SCALE
+ */
+#define QCELP_CLIP_UPPER_BOUND (8191.75/8192.)
+
+/**
+ * the lower boundary of the clipping, depends on QCELP_SCALE
+ */
+#define QCELP_CLIP_LOWER_BOUND -1.
+
+/**
* table for computing Ga (decoded linear codebook gain magnitude)
*
* @note The table could fit in int16_t in x*8 form, but it seems
@@ -406,7 +457,7 @@ static const float qcelp_g12ga[61] = {
100.000/QCELP_SCALE, 112.250/QCELP_SCALE, 125.875/QCELP_SCALE, 141.250/QCELP_SCALE,
158.500/QCELP_SCALE, 177.875/QCELP_SCALE, 199.500/QCELP_SCALE, 223.875/QCELP_SCALE,
251.250/QCELP_SCALE, 281.875/QCELP_SCALE, 316.250/QCELP_SCALE, 354.875/QCELP_SCALE,
- 398.125/QCELP_SCALE, 446.625/QCELP_SCALE, 501.125/QCELP_SCALE, 563.375/QCELP_SCALE,
+ 398.125/QCELP_SCALE, 446.625/QCELP_SCALE, 501.125/QCELP_SCALE, 562.375/QCELP_SCALE,
631.000/QCELP_SCALE, 708.000/QCELP_SCALE, 794.375/QCELP_SCALE, 891.250/QCELP_SCALE,
1000.000/QCELP_SCALE};
@@ -463,7 +514,7 @@ static const int8_t qcelp_rate_half_codebook[128] = {
/**
* sqrt(1.887) is the maximum of the pseudorandom
* white sequence used to generate the scaled codebook
- * vector for framerate 1/4.
+ * vector for bitrate 1/4.
*
* TIA/EIA/IS-733 2.4.8.1.2
*/
@@ -471,9 +522,9 @@ static const int8_t qcelp_rate_half_codebook[128] = {
/**
* table for impulse response of BPF used to filter
- * the white excitation for framerate 1/4 synthesis
+ * the white excitation for bitrate 1/4 synthesis
*
- * Only half the tables are needed because of symetry.
+ * Only half the tables are needed because of symmetry.
*
* TIA/EIA/IS-733 2.4.8.1.2-1.1
*/
@@ -483,4 +534,20 @@ static const double qcelp_rnd_fir_coefs[11] = {
-9.918777e-2, 3.749518e-2, 8.985137e-1
};
+/**
+ * This spread factor is used, for bitrate 1/8 and I_F_Q,
+ * to force the LSP frequencies to be at least 80 Hz apart.
+ *
+ * TIA/EIA/IS-733 2.4.3.3.2
+ */
+#define QCELP_LSP_SPREAD_FACTOR 0.02
+
+/**
+ * predictor coefficient for the conversion of LSP codes
+ * to LSP frequencies for 1/8 and I_F_Q
+ *
+ * TIA/EIA/IS-733 2.4.3.2.7-2
+ */
+#define QCELP_LSP_OCTAVE_PREDICTOR 29.0/32
+
#endif /* AVCODEC_QCELPDATA_H */
diff --git a/libavcodec/qcelpdec.c b/libavcodec/qcelpdec.c
index c0bf2cc..c65b094 100644
--- a/libavcodec/qcelpdec.c
+++ b/libavcodec/qcelpdec.c
@@ -18,18 +18,21 @@
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+
/**
* @file qcelpdec.c
* QCELP decoder
* @author Reynaldo H. Verdejo Pinochet
+ * @remark FFmpeg merging spearheaded by Kenan Gillet
+ * @remark Development mentored by Benjamin Larson
*/
#include <stddef.h>
#include "avcodec.h"
+#include "internal.h"
#include "bitstream.h"
-#include "qcelp.h"
#include "qcelpdata.h"
#include "celp_math.h"
@@ -38,15 +41,53 @@
#undef NDEBUG
#include <assert.h>
-static void weighted_vector_sumf(float *out,
- const float *in_a,
- const float *in_b,
- float weight_coeff_a,
- float weight_coeff_b,
- int length) {
- int i;
+typedef enum
+{
+ I_F_Q = -1, /*!< insufficient frame quality */
+ SILENCE,
+ RATE_OCTAVE,
+ RATE_QUARTER,
+ RATE_HALF,
+ RATE_FULL
+} qcelp_packet_rate;
+
+typedef struct
+{
+ GetBitContext gb;
+ qcelp_packet_rate bitrate;
+ QCELPFrame frame; /*!< unpacked data frame */
+
+ uint8_t erasure_count;
+ uint8_t octave_count; /*!< count the consecutive RATE_OCTAVE frames */
+ float prev_lspf[10];
+ float predictor_lspf[10];/*!< LSP predictor for RATE_OCTAVE and I_F_Q */
+ float pitch_synthesis_filter_mem[303];
+ float pitch_pre_filter_mem[303];
+ float rnd_fir_filter_mem[180];
+ float formant_mem[170];
+ float last_codebook_gain;
+ int prev_g1[2];
+ int prev_bitrate;
+ float pitch_gain[4];
+ uint8_t pitch_lag[4];
+ uint16_t first16bits;
+ uint8_t warned_buf_mismatch_bitrate;
+} QCELPContext;
+
+/**
+ * Reconstructs LPC coefficients from the line spectral pair frequencies.
+ *
+ * TIA/EIA/IS-733 2.4.3.3.5
+ */
+void ff_qcelp_lspf2lpc(const float *lspf, float *lpc);
- for (i = 0; i < length; i++)
+static void weighted_vector_sumf(float *out, const float *in_a,
+ const float *in_b, float weight_coeff_a,
+ float weight_coeff_b, int length)
+{
+ int i;
+
+ for(i=0; i<length; i++)
out[i] = weight_coeff_a * in_a[i]
+ weight_coeff_b * in_b[i];
}
@@ -56,15 +97,224 @@ static void weighted_vector_sumf(float *out,
*
* TIA/EIA/IS-733 2.4.9
*/
-static av_cold int qcelp_decode_init(AVCodecContext *avctx) {
+static av_cold int qcelp_decode_init(AVCodecContext *avctx)
+{
QCELPContext *q = avctx->priv_data;
int i;
avctx->sample_fmt = SAMPLE_FMT_FLT;
- for (i = 0; i < 10; i++)
- q->prev_lspf[i] = (i + 1) / 11.;
+ for(i=0; i<10; i++)
+ q->prev_lspf[i] = (i+1)/11.;
+
+ return 0;
+}
+
+/**
+ * Decodes the 10 quantized LSP frequencies from the LSPV/LSP
+ * transmission codes of any bitrate and checks for badly received packets.
+ *
+ * @param q the context
+ * @param lspf line spectral pair frequencies
+ *
+ * @return 0 on success, -1 if the packet is badly received
+ *
+ * TIA/EIA/IS-733 2.4.3.2.6.2-2, 2.4.8.7.3
+ */
+static int decode_lspf(QCELPContext *q, float *lspf)
+{
+ int i;
+ float tmp_lspf, smooth, erasure_coeff;
+ const float *predictors;
+
+ if(q->bitrate == RATE_OCTAVE || q->bitrate == I_F_Q)
+ {
+ predictors = (q->prev_bitrate != RATE_OCTAVE &&
+ q->prev_bitrate != I_F_Q ?
+ q->prev_lspf : q->predictor_lspf);
+
+ if(q->bitrate == RATE_OCTAVE)
+ {
+ q->octave_count++;
+
+ for(i=0; i<10; i++)
+ {
+ q->predictor_lspf[i] =
+ lspf[i] = (q->frame.lspv[i] ? QCELP_LSP_SPREAD_FACTOR
+ : -QCELP_LSP_SPREAD_FACTOR)
+ + predictors[i] * QCELP_LSP_OCTAVE_PREDICTOR
+ + (i + 1) * ((1 - QCELP_LSP_OCTAVE_PREDICTOR)/11);
+ }
+ smooth = (q->octave_count < 10 ? .875 : 0.1);
+ }else
+ {
+ erasure_coeff = QCELP_LSP_OCTAVE_PREDICTOR;
+
+ assert(q->bitrate == I_F_Q);
+
+ if(q->erasure_count > 1)
+ erasure_coeff *= (q->erasure_count < 4 ? 0.9 : 0.7);
+
+ for(i=0; i<10; i++)
+ {
+ q->predictor_lspf[i] =
+ lspf[i] = (i + 1) * ( 1 - erasure_coeff)/11
+ + erasure_coeff * predictors[i];
+ }
+ smooth = 0.125;
+ }
+
+ // Check the stability of the LSP frequencies.
+ lspf[0] = FFMAX(lspf[0], QCELP_LSP_SPREAD_FACTOR);
+ for(i=1; i<10; i++)
+ lspf[i] = FFMAX(lspf[i], (lspf[i-1] + QCELP_LSP_SPREAD_FACTOR));
+
+ lspf[9] = FFMIN(lspf[9], (1.0 - QCELP_LSP_SPREAD_FACTOR));
+ for(i=9; i>0; i--)
+ lspf[i-1] = FFMIN(lspf[i-1], (lspf[i] - QCELP_LSP_SPREAD_FACTOR));
+
+ // Low-pass filter the LSP frequencies.
+ weighted_vector_sumf(lspf, lspf, q->prev_lspf, smooth, 1.0-smooth, 10);
+ }else
+ {
+ q->octave_count = 0;
+
+ tmp_lspf = 0.;
+ for(i=0; i<5 ; i++)
+ {
+ lspf[2*i+0] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][0] * 0.0001;
+ lspf[2*i+1] = tmp_lspf += qcelp_lspvq[i][q->frame.lspv[i]][1] * 0.0001;
+ }
+
+ // Check for badly received packets.
+ if(q->bitrate == RATE_QUARTER)
+ {
+ if(lspf[9] <= .70 || lspf[9] >= .97)
+ return -1;
+ for(i=3; i<10; i++)
+ if(fabs(lspf[i] - lspf[i-2]) < .08)
+ return -1;
+ }else
+ {
+ if(lspf[9] <= .66 || lspf[9] >= .985)
+ return -1;
+ for(i=4; i<10; i++)
+ if (fabs(lspf[i] - lspf[i-4]) < .0931)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Converts codebook transmission codes to GAIN and INDEX.
+ *
+ * @param q the context
+ * @param gain array holding the decoded gain
+ *
+ * TIA/EIA/IS-733 2.4.6.2
+ */
+static void decode_gain_and_index(QCELPContext *q,
+ float *gain) {
+ int i, subframes_count, g1[16];
+ float slope;
+
+ if(q->bitrate >= RATE_QUARTER)
+ {
+ switch(q->bitrate)
+ {
+ case RATE_FULL: subframes_count = 16; break;
+ case RATE_HALF: subframes_count = 4; break;
+ default: subframes_count = 5;
+ }
+ for(i=0; i<subframes_count; i++)
+ {
+ g1[i] = 4 * q->frame.cbgain[i];
+ if(q->bitrate == RATE_FULL && !((i+1) & 3))
+ {
+ g1[i] += av_clip((g1[i-1] + g1[i-2] + g1[i-3]) / 3 - 6, 0, 32);
+ }
+
+ gain[i] = qcelp_g12ga[g1[i]];
+
+ if(q->frame.cbsign[i])
+ {
+ gain[i] = -gain[i];
+ q->frame.cindex[i] = (q->frame.cindex[i]-89) & 127;
+ }
+ }
+
+ q->prev_g1[0] = g1[i-2];
+ q->prev_g1[1] = g1[i-1];
+ q->last_codebook_gain = qcelp_g12ga[g1[i-1]];
+
+ if(q->bitrate == RATE_QUARTER)
+ {
+ // Provide smoothing of the unvoiced excitation energy.
+ gain[7] = gain[4];
+ gain[6] = 0.4*gain[3] + 0.6*gain[4];
+ gain[5] = gain[3];
+ gain[4] = 0.8*gain[2] + 0.2*gain[3];
+ gain[3] = 0.2*gain[1] + 0.8*gain[2];
+ gain[2] = gain[1];
+ gain[1] = 0.6*gain[0] + 0.4*gain[1];
+ }
+ }else
+ {
+ if(q->bitrate == RATE_OCTAVE)
+ {
+ g1[0] = 2 * q->frame.cbgain[0]
+ + av_clip((q->prev_g1[0] + q->prev_g1[1]) / 2 - 5, 0, 54);
+ subframes_count = 8;
+ }else
+ {
+ assert(q->bitrate == I_F_Q);
+
+ g1[0] = q->prev_g1[1];
+ switch(q->erasure_count)
+ {
+ case 1 : break;
+ case 2 : g1[0] -= 1; break;
+ case 3 : g1[0] -= 2; break;
+ default: g1[0] -= 6;
+ }
+ if(g1[0] < 0)
+ g1[0] = 0;
+ subframes_count = 4;
+ }
+ // This interpolation is done to produce smoother background noise.
+ slope = 0.5*(qcelp_g12ga[g1[0]] - q->last_codebook_gain) / subframes_count;
+ for(i=1; i<=subframes_count; i++)
+ gain[i-1] = q->last_codebook_gain + slope * i;
+
+ q->last_codebook_gain = gain[i-2];
+ q->prev_g1[0] = q->prev_g1[1];
+ q->prev_g1[1] = g1[0];
+ }
+}
+
+/**
+ * If the received packet is Rate 1/4 a further sanity check is made of the
+ * codebook gain.
+ *
+ * @param cbgain the unpacked cbgain array
+ * @return -1 if the sanity check fails, 0 otherwise
+ *
+ * TIA/EIA/IS-733 2.4.8.7.3
+ */
+static int codebook_sanity_check_for_rate_quarter(const uint8_t *cbgain)
+{
+ int i, diff, prev_diff=0;
+ for(i=1; i<5; i++)
+ {
+ diff = cbgain[i] - cbgain[i-1];
+ if(FFABS(diff) > 10)
+ return -1;
+ else if(FFABS(diff - prev_diff) > 12)
+ return -1;
+ prev_diff = diff;
+ }
return 0;
}
@@ -89,72 +339,84 @@ static av_cold int qcelp_decode_init(AVCodecContext *avctx) {
* @param gain array holding the 4 pitch subframe gain values
* @param cdn_vector array for the generated scaled codebook vector
*/
-static void compute_svector(const QCELPContext *q,
- const float *gain,
- float *cdn_vector) {
+static void compute_svector(QCELPContext *q, const float *gain,
+ float *cdn_vector)
+{
int i, j, k;
uint16_t cbseed, cindex;
float *rnd, tmp_gain, fir_filter_value;
- switch (q->framerate) {
- case RATE_FULL:
- for (i = 0; i < 16; i++) {
- tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
- cindex = -q->cindex[i];
- for (j = 0; j < 10; j++)
- *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
- }
+ switch(q->bitrate)
+ {
+ case RATE_FULL:
+ for(i=0; i<16; i++)
+ {
+ tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
+ cindex = -q->frame.cindex[i];
+ for(j=0; j<10; j++)
+ *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cindex++ & 127];
+ }
break;
- case RATE_HALF:
- for (i = 0; i < 4; i++) {
- tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
- cindex = -q->cindex[i];
- for (j = 0; j < 40; j++)
+ case RATE_HALF:
+ for(i=0; i<4; i++)
+ {
+ tmp_gain = gain[i] * QCELP_RATE_HALF_CODEBOOK_RATIO;
+ cindex = -q->frame.cindex[i];
+ for (j = 0; j < 40; j++)
*cdn_vector++ = tmp_gain * qcelp_rate_half_codebook[cindex++ & 127];
- }
+ }
break;
- case RATE_QUARTER:
- cbseed = (0x0003 & q->lspv[4])<<14 |
- (0x003F & q->lspv[3])<< 8 |
- (0x0060 & q->lspv[2])<< 1 |
- (0x0007 & q->lspv[1])<< 3 |
- (0x0038 & q->lspv[0])>> 3 ;
- rnd = q->rnd_fir_filter_mem + 20;
- for (i = 0; i < 8; i++) {
- tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
- for (k = 0; k < 20; k++) {
- cbseed = 521 * cbseed + 259;
- *rnd = (int16_t)cbseed;
-
- // FIR filter
- fir_filter_value = 0.0;
- for (j = 0; j < 10; j++)
- fir_filter_value += qcelp_rnd_fir_coefs[j ] * (rnd[-j ] + rnd[-20+j]);
- fir_filter_value += qcelp_rnd_fir_coefs[10] * rnd[-10];
-
- *cdn_vector++ = tmp_gain * fir_filter_value;
- rnd++;
+ case RATE_QUARTER:
+ cbseed = (0x0003 & q->frame.lspv[4])<<14 |
+ (0x003F & q->frame.lspv[3])<< 8 |
+ (0x0060 & q->frame.lspv[2])<< 1 |
+ (0x0007 & q->frame.lspv[1])<< 3 |
+ (0x0038 & q->frame.lspv[0])>> 3 ;
+ rnd = q->rnd_fir_filter_mem + 20;
+ for(i=0; i<8; i++)
+ {
+ tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
+ for(k=0; k<20; k++)
+ {
+ cbseed = 521 * cbseed + 259;
+ *rnd = (int16_t)cbseed;
+
+ // FIR filter
+ fir_filter_value = 0.0;
+ for(j=0; j<10; j++)
+ fir_filter_value += qcelp_rnd_fir_coefs[j ]
+ * (rnd[-j ] + rnd[-20+j]);
+
+ fir_filter_value += qcelp_rnd_fir_coefs[10] * rnd[-10];
+ *cdn_vector++ = tmp_gain * fir_filter_value;
+ rnd++;
+ }
}
- }
- memcpy(q->rnd_fir_filter_mem, q->rnd_fir_filter_mem + 160, 20 * sizeof(float));
+ memcpy(q->rnd_fir_filter_mem, q->rnd_fir_filter_mem + 160, 20 * sizeof(float));
break;
- case RATE_OCTAVE:
- cbseed = q->first16bits;
- for (i = 0; i < 8; i++) {
- tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
- for (j = 0; j < 20; j++) {
- cbseed = 521 * cbseed + 259;
- *cdn_vector++ = tmp_gain * (int16_t)cbseed;
+ case RATE_OCTAVE:
+ cbseed = q->first16bits;
+ for(i=0; i<8; i++)
+ {
+ tmp_gain = gain[i] * (QCELP_SQRT1887 / 32768.0);
+ for(j=0; j<20; j++)
+ {
+ cbseed = 521 * cbseed + 259;
+ *cdn_vector++ = tmp_gain * (int16_t)cbseed;
+ }
}
- }
break;
- case I_F_Q:
- cbseed = -44; // random codebook index
- for (i = 0; i < 4; i++) {
- tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
- for (j = 0; j < 40; j++)
- *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cbseed++ & 127];
- }
+ case I_F_Q:
+ cbseed = -44; // random codebook index
+ for(i=0; i<4; i++)
+ {
+ tmp_gain = gain[i] * QCELP_RATE_FULL_CODEBOOK_RATIO;
+ for(j=0; j<40; j++)
+ *cdn_vector++ = tmp_gain * qcelp_rate_full_codebook[cbseed++ & 127];
+ }
+ break;
+ case SILENCE:
+ memset(cdn_vector, 0, 160 * sizeof(float));
break;
}
}
@@ -172,19 +434,21 @@ static void compute_svector(const QCELPContext *q,
*
* TIA/EIA/IS-733 2.4.8.3-2/3/4/5, 2.4.8.6
*/
-static void apply_gain_ctrl(float *v_out,
- const float *v_ref,
- const float *v_in) {
+static void apply_gain_ctrl(float *v_out, const float *v_ref,
+ const float *v_in)
+{
int i, j, len;
float scalefactor;
- for (i = 0, j = 0; i < 4; i++) {
+ for(i=0, j=0; i<4; i++)
+ {
scalefactor = ff_dot_productf(v_in + j, v_in + j, 40);
- if (scalefactor)
- scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40) / scalefactor);
+ if(scalefactor)
+ scalefactor = sqrt(ff_dot_productf(v_ref + j, v_ref + j, 40)
+ / scalefactor);
else
- av_log_missing_feature(NULL, "Zero energy for gain control", 1);
- for (len = j + 40; j < len; j++)
+ ff_log_missing_feature(NULL, "Zero energy for gain control", 1);
+ for(len=j+40; j<len; j++)
v_out[j] = scalefactor * v_in[j];
}
}
@@ -201,29 +465,33 @@ static void apply_gain_ctrl(float *v_out,
* @param lag per-subframe lag array, each element is
* - between 16 and 143 if its corresponding pfrac is 0,
* - between 16 and 139 otherwise
- * @param pfrac per-subframe boolean array, 1 if the lag is fractional, 0 otherwise
+ * @param pfrac per-subframe boolean array, 1 if the lag is fractional, 0
+ * otherwise
*
* @return filter output vector
*/
-static const float *do_pitchfilter(float memory[303],
- const float v_in[160],
- const float gain[4],
- const uint8_t *lag,
- const uint8_t pfrac[4]) {
+static const float *do_pitchfilter(float memory[303], const float v_in[160],
+ const float gain[4], const uint8_t *lag,
+ const uint8_t pfrac[4])
+{
int i, j;
float *v_lag, *v_out;
const float *v_len;
v_out = memory + 143; // Output vector starts at memory[143].
- for (i = 0; i < 4; i++)
- if (gain[i]) {
+ for(i=0; i<4; i++)
+ {
+ if(gain[i])
+ {
v_lag = memory + 143 + 40 * i - lag[i];
- for (v_len = v_in + 40; v_in < v_len; v_in++) {
- if (pfrac[i]) { // If it is a fractional lag...
- for (j = 0, *v_out = 0.; j < 4; j++)
+ for(v_len=v_in+40; v_in<v_len; v_in++)
+ {
+ if(pfrac[i]) // If it is a fractional lag...
+ {
+ for(j=0, *v_out=0.; j<4; j++)
*v_out += qcelp_hammsinc_table[j] * (v_lag[j-4] + v_lag[3-j]);
- } else
+ }else
*v_out = *v_lag;
*v_out = *v_in + gain[i] * *v_out;
@@ -231,68 +499,313 @@ static const float *do_pitchfilter(float memory[303],
v_lag++;
v_out++;
}
- } else {
+ }else
+ {
memcpy(v_out, v_in, 40 * sizeof(float));
v_in += 40;
v_out += 40;
}
+ }
memmove(memory, memory + 160, 143 * sizeof(float));
return memory + 143;
}
/**
+ * Apply pitch synthesis filter and pitch prefilter to the scaled codebook vector.
+ * TIA/EIA/IS-733 2.4.5.2, 2.4.8.7.2
+ *
+ * @param q the context
+ * @param cdn_vector the scaled codebook vector
+ */
+static void apply_pitch_filters(QCELPContext *q, float *cdn_vector)
+{
+ int i;
+ const float *v_synthesis_filtered, *v_pre_filtered;
+
+ if(q->bitrate >= RATE_HALF ||
+ q->bitrate == SILENCE ||
+ (q->bitrate == I_F_Q && (q->prev_bitrate >= RATE_HALF)))
+ {
+
+ if(q->bitrate >= RATE_HALF)
+ {
+
+ // Compute gain & lag for the whole frame.
+ for(i=0; i<4; i++)
+ {
+ q->pitch_gain[i] = q->frame.plag[i] ? (q->frame.pgain[i] + 1) * 0.25 : 0.0;
+
+ q->pitch_lag[i] = q->frame.plag[i] + 16;
+ }
+ }else
+ {
+ float max_pitch_gain;
+
+ if (q->bitrate == I_F_Q)
+ {
+ if (q->erasure_count < 3)
+ max_pitch_gain = 0.9 - 0.3 * (q->erasure_count - 1);
+ else
+ max_pitch_gain = 0.0;
+ }else
+ {
+ assert(q->bitrate == SILENCE);
+ max_pitch_gain = 1.0;
+ }
+ for(i=0; i<4; i++)
+ q->pitch_gain[i] = FFMIN(q->pitch_gain[i], max_pitch_gain);
+
+ memset(q->frame.pfrac, 0, sizeof(q->frame.pfrac));
+ }
+
+ // pitch synthesis filter
+ v_synthesis_filtered = do_pitchfilter(q->pitch_synthesis_filter_mem,
+ cdn_vector, q->pitch_gain,
+ q->pitch_lag, q->frame.pfrac);
+
+ // pitch prefilter update
+ for(i=0; i<4; i++)
+ q->pitch_gain[i] = 0.5 * FFMIN(q->pitch_gain[i], 1.0);
+
+ v_pre_filtered = do_pitchfilter(q->pitch_pre_filter_mem,
+ v_synthesis_filtered,
+ q->pitch_gain, q->pitch_lag,
+ q->frame.pfrac);
+
+ apply_gain_ctrl(cdn_vector, v_synthesis_filtered, v_pre_filtered);
+ }else
+ {
+ memcpy(q->pitch_synthesis_filter_mem, cdn_vector + 17,
+ 143 * sizeof(float));
+ memcpy(q->pitch_pre_filter_mem, cdn_vector + 17, 143 * sizeof(float));
+ memset(q->pitch_gain, 0, sizeof(q->pitch_gain));
+ memset(q->pitch_lag, 0, sizeof(q->pitch_lag));
+ }
+}
+
+/**
* Interpolates LSP frequencies and computes LPC coefficients
- * for a given framerate & pitch subframe.
+ * for a given bitrate & pitch subframe.
*
- * TIA/EIA/IS-733 2.4.3.3.4
+ * TIA/EIA/IS-733 2.4.3.3.4, 2.4.8.7.2
*
* @param q the context
* @param curr_lspf LSP frequencies vector of the current frame
* @param lpc float vector for the resulting LPC
* @param subframe_num frame number in decoded stream
*/
-void interpolate_lpc(QCELPContext *q,
- const float *curr_lspf,
- float *lpc,
- const int subframe_num) {
+void interpolate_lpc(QCELPContext *q, const float *curr_lspf, float *lpc,
+ const int subframe_num)
+{
float interpolated_lspf[10];
float weight;
- if (q->framerate >= RATE_QUARTER) {
+ if(q->bitrate >= RATE_QUARTER)
weight = 0.25 * (subframe_num + 1);
- } else if (q->framerate == RATE_OCTAVE && !subframe_num) {
+ else if(q->bitrate == RATE_OCTAVE && !subframe_num)
weight = 0.625;
- } else {
+ else
weight = 1.0;
+
+ if(weight != 1.0)
+ {
+ weighted_vector_sumf(interpolated_lspf, curr_lspf, q->prev_lspf,
+ weight, 1.0 - weight, 10);
+ ff_qcelp_lspf2lpc(interpolated_lspf, lpc);
+ }else if(q->bitrate >= RATE_QUARTER ||
+ (q->bitrate == I_F_Q && !subframe_num))
+ ff_qcelp_lspf2lpc(curr_lspf, lpc);
+ else if(q->bitrate == SILENCE && !subframe_num)
+ ff_qcelp_lspf2lpc(q->prev_lspf, lpc);
+}
+
+static qcelp_packet_rate buf_size2bitrate(const int buf_size)
+{
+ switch(buf_size)
+ {
+ case 35: return RATE_FULL;
+ case 17: return RATE_HALF;
+ case 8: return RATE_QUARTER;
+ case 4: return RATE_OCTAVE;
+ case 1: return SILENCE;
}
- if (weight != 1.0) {
- weighted_vector_sumf(interpolated_lspf, curr_lspf, q->prev_lspf, weight, 1.0 - weight, 10);
- qcelp_lspf2lpc(interpolated_lspf, lpc);
- } else if (q->framerate >= RATE_QUARTER || (q->framerate == I_F_Q && !subframe_num))
- qcelp_lspf2lpc(curr_lspf, lpc);
+ return I_F_Q;
}
-static int buf_size2framerate(const int buf_size) {
- switch (buf_size) {
- case 35:
- return RATE_FULL;
- case 17:
- return RATE_HALF;
- case 8:
- return RATE_QUARTER;
- case 4:
- return RATE_OCTAVE;
- case 1:
- return SILENCE;
+/**
+ * Determine the bitrate from the frame size and/or the first byte of the frame.
+ *
+ * @param avctx the AV codec context
+ * @param buf_size length of the buffer
+ * @param buf the bufffer
+ *
+ * @return the bitrate on success,
+ * I_F_Q if the bitrate cannot be satisfactorily determined
+ *
+ * TIA/EIA/IS-733 2.4.8.7.1
+ */
+static qcelp_packet_rate determine_bitrate(AVCodecContext *avctx, const int buf_size,
+ const uint8_t **buf)
+{
+ qcelp_packet_rate bitrate;
+
+ if((bitrate = buf_size2bitrate(buf_size)) >= 0)
+ {
+ if(bitrate > **buf)
+ {
+ QCELPContext *q = avctx->priv_data;
+ if (!q->warned_buf_mismatch_bitrate)
+ {
+ av_log(avctx, AV_LOG_WARNING,
+ "Claimed bitrate and buffer size mismatch.\n");
+ q->warned_buf_mismatch_bitrate = 1;
+ }
+ bitrate = **buf;
+ }else if(bitrate < **buf)
+ {
+ av_log(avctx, AV_LOG_ERROR,
+ "Buffer is too small for the claimed bitrate.\n");
+ return I_F_Q;
+ }
+ (*buf)++;
+ }else if((bitrate = buf_size2bitrate(buf_size + 1)) >= 0)
+ {
+ av_log(avctx, AV_LOG_WARNING,
+ "Bitrate byte is missing, guessing the bitrate from packet size.\n");
+ }else
+ return I_F_Q;
+
+ if(bitrate == SILENCE)
+ {
+ //FIXME: Remove experimental warning when tested with samples.
+ av_log(avctx, AV_LOG_WARNING, "'Blank frame handling is experimental."
+ " If you want to help, upload a sample "
+ "of this file to ftp://upload.ffmpeg.org/MPlayer/incoming/ "
+ "and contact the ffmpeg-devel mailing list.\n");
}
- return -1;
+ return bitrate;
}
static void warn_insufficient_frame_quality(AVCodecContext *avctx,
- const char *message) {
- av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx->frame_number, message);
+ const char *message)
+{
+ av_log(avctx, AV_LOG_WARNING, "Frame #%d, IFQ: %s\n", avctx->frame_number,
+ message);
+}
+
+static int qcelp_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+ const uint8_t *buf, int buf_size)
+{
+ QCELPContext *q = avctx->priv_data;
+ float *outbuffer = data;
+ int i;
+ float quantized_lspf[10], lpc[10];
+ float gain[16];
+ float *formant_mem;
+
+ if((q->bitrate = determine_bitrate(avctx, buf_size, &buf)) == I_F_Q)
+ {
+ warn_insufficient_frame_quality(avctx, "bitrate cannot be determined.");
+ goto erasure;
+ }
+
+ if(q->bitrate == RATE_OCTAVE &&
+ (q->first16bits = AV_RB16(buf)) == 0xFFFF)
+ {
+ warn_insufficient_frame_quality(avctx, "Bitrate is 1/8 and first 16 bits are on.");
+ goto erasure;
+ }
+
+ if(q->bitrate > SILENCE)
+ {
+ const QCELPBitmap *bitmaps = qcelp_unpacking_bitmaps_per_rate[q->bitrate];
+ const QCELPBitmap *bitmaps_end = qcelp_unpacking_bitmaps_per_rate[q->bitrate]
+ + qcelp_unpacking_bitmaps_lengths[q->bitrate];
+ uint8_t *unpacked_data = (uint8_t *)&q->frame;
+
+ init_get_bits(&q->gb, buf, 8*buf_size);
+
+ memset(&q->frame, 0, sizeof(QCELPFrame));
+
+ for(; bitmaps < bitmaps_end; bitmaps++)
+ unpacked_data[bitmaps->index] |= get_bits(&q->gb, bitmaps->bitlen) << bitmaps->bitpos;
+
+ // Check for erasures/blanks on rates 1, 1/4 and 1/8.
+ if(q->frame.reserved)
+ {
+ warn_insufficient_frame_quality(avctx, "Wrong data in reserved frame area.");
+ goto erasure;
+ }
+ if(q->bitrate == RATE_QUARTER &&
+ codebook_sanity_check_for_rate_quarter(q->frame.cbgain))
+ {
+ warn_insufficient_frame_quality(avctx, "Codebook gain sanity check failed.");
+ goto erasure;
+ }
+
+ if(q->bitrate >= RATE_HALF)
+ {
+ for(i=0; i<4; i++)
+ {
+ if(q->frame.pfrac[i] && q->frame.plag[i] >= 124)
+ {
+ warn_insufficient_frame_quality(avctx, "Cannot initialize pitch filter.");
+ goto erasure;
+ }
+ }
+ }
+ }
+
+ decode_gain_and_index(q, gain);
+ compute_svector(q, gain, outbuffer);
+
+ if(decode_lspf(q, quantized_lspf) < 0)
+ {
+ warn_insufficient_frame_quality(avctx, "Badly received packets in frame.");
+ goto erasure;
+ }
+
+
+ apply_pitch_filters(q, outbuffer);
+
+ if(q->bitrate == I_F_Q)
+ {
+erasure:
+ q->bitrate = I_F_Q;
+ q->erasure_count++;
+ decode_gain_and_index(q, gain);
+ compute_svector(q, gain, outbuffer);
+ decode_lspf(q, quantized_lspf);
+ apply_pitch_filters(q, outbuffer);
+ }else
+ q->erasure_count = 0;
+
+ formant_mem = q->formant_mem + 10;
+ for(i=0; i<4; i++)
+ {
+ interpolate_lpc(q, quantized_lspf, lpc, i);
+ ff_celp_lp_synthesis_filterf(formant_mem, lpc, outbuffer + i * 40, 40,
+ 10);
+ formant_mem += 40;
+ }
+ memcpy(q->formant_mem, q->formant_mem + 160, 10 * sizeof(float));
+
+ // FIXME: postfilter and final gain control should be here.
+ // TIA/EIA/IS-733 2.4.8.6
+
+ formant_mem = q->formant_mem + 10;
+ for(i=0; i<160; i++)
+ *outbuffer++ = av_clipf(*formant_mem++, QCELP_CLIP_LOWER_BOUND,
+ QCELP_CLIP_UPPER_BOUND);
+
+ memcpy(q->prev_lspf, quantized_lspf, sizeof(q->prev_lspf));
+ q->prev_bitrate = q->bitrate;
+
+ *data_size = 160 * sizeof(*outbuffer);
+
+ return *data_size;
}
AVCodec qcelp_decoder =
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 8e248b9..c88059a 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -461,7 +461,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q,
else if(d<0.0001) d=0.0001;
q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
- q_limit= bits2qp(rce, FFMAX((min_rate - buffer_size + rcc->buffer_index)*3, 1));
+ q_limit= bits2qp(rce, FFMAX((min_rate - buffer_size + rcc->buffer_index) * s->avctx->rc_min_vbv_overflow_use, 1));
if(q > q_limit){
if(s->avctx->debug&FF_DEBUG_RC){
av_log(s->avctx, AV_LOG_DEBUG, "limiting QP %f -> %f\n", q, q_limit);
@@ -476,7 +476,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q,
else if(d<0.0001) d=0.0001;
q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
- q_limit= bits2qp(rce, FFMAX(rcc->buffer_index/3, 1));
+ q_limit= bits2qp(rce, FFMAX(rcc->buffer_index * s->avctx->rc_max_available_vbv_use, 1));
if(q < q_limit){
if(s->avctx->debug&FF_DEBUG_RC){
av_log(s->avctx, AV_LOG_DEBUG, "limiting QP %f -> %f\n", q, q_limit);
diff --git a/libavcodec/raw.c b/libavcodec/raw.c
index c88830c..de10f28 100644
--- a/libavcodec/raw.c
+++ b/libavcodec/raw.c
@@ -51,6 +51,7 @@ const PixelFormatTag ff_raw_pixelFormatTags[] = {
/* quicktime */
{ PIX_FMT_UYVY422, MKTAG('2', 'v', 'u', 'y') },
{ PIX_FMT_UYVY422, MKTAG('A', 'V', 'U', 'I') }, /* FIXME merge both fields */
+ { PIX_FMT_PAL8, MKTAG('W', 'R', 'A', 'W') },
{ PIX_FMT_NONE, 0 },
};
diff --git a/libavcodec/roqaudioenc.c b/libavcodec/roqaudioenc.c
index df014a4..28a6adc 100644
--- a/libavcodec/roqaudioenc.c
+++ b/libavcodec/roqaudioenc.c
@@ -35,7 +35,7 @@ static unsigned char dpcmValues[MAX_DPCM];
typedef struct
{
short lastSample[2];
-} ROQDPCMContext_t;
+} ROQDPCMContext;
static av_cold void roq_dpcm_table_init(void)
{
@@ -51,7 +51,7 @@ static av_cold void roq_dpcm_table_init(void)
static int roq_dpcm_encode_init(AVCodecContext *avctx)
{
- ROQDPCMContext_t *context = avctx->priv_data;
+ ROQDPCMContext *context = avctx->priv_data;
if (avctx->channels > 2) {
av_log(avctx, AV_LOG_ERROR, "Audio must be mono or stereo\n");
@@ -123,7 +123,7 @@ static int roq_dpcm_encode_frame(AVCodecContext *avctx,
short *in;
unsigned char *out;
- ROQDPCMContext_t *context = avctx->priv_data;
+ ROQDPCMContext *context = avctx->priv_data;
stereo = (avctx->channels == 2);
@@ -169,7 +169,7 @@ AVCodec roq_dpcm_encoder = {
"roq_dpcm",
CODEC_TYPE_AUDIO,
CODEC_ID_ROQ_DPCM,
- sizeof(ROQDPCMContext_t),
+ sizeof(ROQDPCMContext),
roq_dpcm_encode_init,
roq_dpcm_encode_frame,
roq_dpcm_encode_close,
diff --git a/libavcodec/roqvideoenc.c b/libavcodec/roqvideoenc.c
index a115e58..3b405cc 100644
--- a/libavcodec/roqvideoenc.c
+++ b/libavcodec/roqvideoenc.c
@@ -190,20 +190,20 @@ typedef struct
int subCels[4];
motion_vect motion;
int cbEntry;
-} subcel_evaluation_t;
+} SubcelEvaluation;
typedef struct
{
int eval_dist[4];
int best_coding;
- subcel_evaluation_t subCels[4];
+ SubcelEvaluation subCels[4];
motion_vect motion;
int cbEntry;
int sourceX, sourceY;
-} cel_evaluation_t;
+} CelEvaluation;
typedef struct
{
@@ -214,14 +214,14 @@ typedef struct
uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
-} roq_codebooks_t;
+} RoqCodebooks;
/**
* Temporary vars
*/
typedef struct
{
- cel_evaluation_t *cel_evals;
+ CelEvaluation *cel_evals;
int f2i4[MAX_CBS_4x4];
int i2f4[MAX_CBS_4x4];
@@ -233,20 +233,20 @@ typedef struct
int numCB4;
int numCB2;
- roq_codebooks_t codebooks;
+ RoqCodebooks codebooks;
int *closest_cb2;
int used_option[4];
-} roq_tempdata_t;
+} RoqTempdata;
/**
* Initializes cel evaluators and sets their source coordinates
*/
-static void create_cel_evals(RoqContext *enc, roq_tempdata_t *tempData)
+static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData)
{
int n=0, x, y, i;
- tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(cel_evaluation_t));
+ tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation));
/* Map to the ROQ quadtree order */
for (y=0; y<enc->height; y+=16)
@@ -395,8 +395,8 @@ static void motion_search(RoqContext *enc, int blocksize)
/**
* Gets distortion for all options available to a subcel
*/
-static void gather_data_for_subcel(subcel_evaluation_t *subcel, int x,
- int y, RoqContext *enc, roq_tempdata_t *tempData)
+static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
+ int y, RoqContext *enc, RoqTempdata *tempData)
{
uint8_t mb4[4*4*3];
uint8_t mb2[2*2*3];
@@ -459,8 +459,8 @@ static void gather_data_for_subcel(subcel_evaluation_t *subcel, int x,
/**
* Gets distortion for all options available to a cel
*/
-static void gather_data_for_cel(cel_evaluation_t *cel, RoqContext *enc,
- roq_tempdata_t *tempData)
+static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc,
+ RoqTempdata *tempData)
{
uint8_t mb8[8*8*3];
int index = cel->sourceY*enc->width/64 + cel->sourceX/8;
@@ -533,7 +533,7 @@ static void gather_data_for_cel(cel_evaluation_t *cel, RoqContext *enc,
}
}
-static void remap_codebooks(RoqContext *enc, roq_tempdata_t *tempData)
+static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData)
{
int i, j, idx=0;
@@ -565,7 +565,7 @@ static void remap_codebooks(RoqContext *enc, roq_tempdata_t *tempData)
/**
* Write codebook chunk
*/
-static void write_codebooks(RoqContext *enc, roq_tempdata_t *tempData)
+static void write_codebooks(RoqContext *enc, RoqTempdata *tempData)
{
int i, j;
uint8_t **outp= &enc->out_buf;
@@ -620,7 +620,7 @@ static void write_typecode(CodingSpool *s, uint8_t type)
}
}
-static void reconstruct_and_encode_image(RoqContext *enc, roq_tempdata_t *tempData, int w, int h, int numBlocks)
+static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks)
{
int i, j, k;
int x, y;
@@ -628,7 +628,7 @@ static void reconstruct_and_encode_image(RoqContext *enc, roq_tempdata_t *tempDa
int dist=0;
roq_qcell *qcell;
- cel_evaluation_t *eval;
+ CelEvaluation *eval;
CodingSpool spool;
@@ -789,7 +789,7 @@ static void create_clusters(AVFrame *frame, int w, int h, uint8_t *yuvClusters)
}
}
-static void generate_codebook(RoqContext *enc, roq_tempdata_t *tempdata,
+static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata,
int *points, int inputCount, roq_cell *results,
int size, int cbsize)
{
@@ -824,10 +824,10 @@ static void generate_codebook(RoqContext *enc, roq_tempdata_t *tempdata,
av_free(codebook);
}
-static void generate_new_codebooks(RoqContext *enc, roq_tempdata_t *tempData)
+static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
{
int i,j;
- roq_codebooks_t *codebooks = &tempData->codebooks;
+ RoqCodebooks *codebooks = &tempData->codebooks;
int max = enc->width*enc->height/16;
uint8_t mb2[3*4];
roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
@@ -880,7 +880,7 @@ static void generate_new_codebooks(RoqContext *enc, roq_tempdata_t *tempData)
static void roq_encode_video(RoqContext *enc)
{
- roq_tempdata_t tempData;
+ RoqTempdata tempData;
int i;
memset(&tempData, 0, sizeof(tempData));
diff --git a/libavcodec/rv10.c b/libavcodec/rv10.c
index 0c05147..ad09c07 100644
--- a/libavcodec/rv10.c
+++ b/libavcodec/rv10.c
@@ -527,6 +527,11 @@ static av_cold int rv10_decode_init(AVCodecContext *avctx)
MpegEncContext *s = avctx->priv_data;
static int done=0;
+ if (avctx->extradata_size < 8) {
+ av_log(avctx, AV_LOG_ERROR, "Extradata is too small.\n");
+ return -1;
+ }
+
MPV_decode_defaults(s);
s->avctx= avctx;
diff --git a/libavcodec/rv30.c b/libavcodec/rv30.c
index 0202873..97d87b0 100644
--- a/libavcodec/rv30.c
+++ b/libavcodec/rv30.c
@@ -112,6 +112,127 @@ static int rv30_decode_mb_info(RV34DecContext *r)
return rv30_b_types[code];
}
+static inline void rv30_weak_loop_filter(uint8_t *src, const int step,
+ const int stride, const int lim)
+{
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+ int i, diff;
+
+ for(i = 0; i < 4; i++){
+ diff = ((src[-2*step] - src[1*step]) - (src[-1*step] - src[0*step])*4) >> 3;
+ diff = av_clip(diff, -lim, lim);
+ src[-1*step] = cm[src[-1*step] + diff];
+ src[ 0*step] = cm[src[ 0*step] - diff];
+ src += stride;
+ }
+}
+
+static void rv30_loop_filter(RV34DecContext *r, int row)
+{
+ MpegEncContext *s = &r->s;
+ int mb_pos, mb_x;
+ int i, j, k;
+ uint8_t *Y, *C;
+ int loc_lim, cur_lim, left_lim = 0, top_lim = 0;
+
+ mb_pos = row * s->mb_stride;
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, mb_pos++){
+ int mbtype = s->current_picture_ptr->mb_type[mb_pos];
+ if(IS_INTRA(mbtype) || IS_SEPARATE_DC(mbtype))
+ r->deblock_coefs[mb_pos] = 0xFFFF;
+ if(IS_INTRA(mbtype))
+ r->cbp_chroma[mb_pos] = 0xFF;
+ }
+
+ /* all vertical edges are filtered first
+ * and horizontal edges are filtered on the next iteration
+ */
+ mb_pos = row * s->mb_stride;
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, mb_pos++){
+ cur_lim = rv30_loop_filt_lim[s->current_picture_ptr->qscale_table[mb_pos]];
+ if(mb_x)
+ left_lim = rv30_loop_filt_lim[s->current_picture_ptr->qscale_table[mb_pos - 1]];
+ for(j = 0; j < 16; j += 4){
+ Y = s->current_picture_ptr->data[0] + mb_x*16 + (row*16 + j) * s->linesize + 4 * !mb_x;
+ for(i = !mb_x; i < 4; i++, Y += 4){
+ int ij = i + j;
+ loc_lim = 0;
+ if(r->deblock_coefs[mb_pos] & (1 << ij))
+ loc_lim = cur_lim;
+ else if(!i && r->deblock_coefs[mb_pos - 1] & (1 << (ij + 3)))
+ loc_lim = left_lim;
+ else if( i && r->deblock_coefs[mb_pos] & (1 << (ij - 1)))
+ loc_lim = cur_lim;
+ if(loc_lim)
+ rv30_weak_loop_filter(Y, 1, s->linesize, loc_lim);
+ }
+ }
+ for(k = 0; k < 2; k++){
+ int cur_cbp, left_cbp = 0;
+ cur_cbp = (r->cbp_chroma[mb_pos] >> (k*4)) & 0xF;
+ if(mb_x)
+ left_cbp = (r->cbp_chroma[mb_pos - 1] >> (k*4)) & 0xF;
+ for(j = 0; j < 8; j += 4){
+ C = s->current_picture_ptr->data[k+1] + mb_x*8 + (row*8 + j) * s->uvlinesize + 4 * !mb_x;
+ for(i = !mb_x; i < 2; i++, C += 4){
+ int ij = i + (j >> 1);
+ loc_lim = 0;
+ if(cur_cbp && (1 << ij))
+ loc_lim = cur_lim;
+ else if(!i && left_cbp & (1 << (ij + 1)))
+ loc_lim = left_lim;
+ else if( i && cur_cbp & (1 << (ij - 1)))
+ loc_lim = cur_lim;
+ if(loc_lim)
+ rv30_weak_loop_filter(C, 1, s->uvlinesize, loc_lim);
+ }
+ }
+ }
+ }
+ mb_pos = row * s->mb_stride;
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, mb_pos++){
+ cur_lim = rv30_loop_filt_lim[s->current_picture_ptr->qscale_table[mb_pos]];
+ if(row)
+ top_lim = rv30_loop_filt_lim[s->current_picture_ptr->qscale_table[mb_pos - s->mb_stride]];
+ for(j = 4*!row; j < 16; j += 4){
+ Y = s->current_picture_ptr->data[0] + mb_x*16 + (row*16 + j) * s->linesize;
+ for(i = 0; i < 4; i++, Y += 4){
+ int ij = i + j;
+ loc_lim = 0;
+ if(r->deblock_coefs[mb_pos] & (1 << ij))
+ loc_lim = cur_lim;
+ else if(!j && r->deblock_coefs[mb_pos - s->mb_stride] & (1 << (ij + 12)))
+ loc_lim = top_lim;
+ else if( j && r->deblock_coefs[mb_pos] & (1 << (ij - 4)))
+ loc_lim = cur_lim;
+ if(loc_lim)
+ rv30_weak_loop_filter(Y, s->linesize, 1, loc_lim);
+ }
+ }
+ for(k = 0; k < 2; k++){
+ int cur_cbp, top_cbp = 0;
+ cur_cbp = (r->cbp_chroma[mb_pos] >> (k*4)) & 0xF;
+ if(row)
+ top_cbp = (r->cbp_chroma[mb_pos - s->mb_stride] >> (k*4)) & 0xF;
+ for(j = 4*!row; j < 8; j += 4){
+ C = s->current_picture_ptr->data[k+1] + mb_x*8 + (row*8 + j) * s->uvlinesize;
+ for(i = 0; i < 2; i++, C += 4){
+ int ij = i + (j >> 1);
+ loc_lim = 0;
+ if(r->cbp_chroma[mb_pos] && (1 << ij))
+ loc_lim = cur_lim;
+ else if(!j && top_cbp & (1 << (ij + 2)))
+ loc_lim = top_lim;
+ else if( j && cur_cbp & (1 << (ij - 2)))
+ loc_lim = cur_lim;
+ if(loc_lim)
+ rv30_weak_loop_filter(C, s->uvlinesize, 1, loc_lim);
+ }
+ }
+ }
+ }
+}
+
/**
* Initialize decoder.
*/
@@ -130,6 +251,7 @@ static av_cold int rv30_decode_init(AVCodecContext *avctx)
r->parse_slice_header = rv30_parse_slice_header;
r->decode_intra_types = rv30_decode_intra_types;
r->decode_mb_info = rv30_decode_mb_info;
+ r->loop_filter = rv30_loop_filter;
r->luma_dc_quant_i = rv30_luma_dc_quant;
r->luma_dc_quant_p = rv30_luma_dc_quant;
return 0;
diff --git a/libavcodec/rv30data.h b/libavcodec/rv30data.h
index c16e51b..1662fd7 100644
--- a/libavcodec/rv30data.h
+++ b/libavcodec/rv30data.h
@@ -171,4 +171,11 @@ static const uint8_t rv30_itype_from_context[900] = {
2, 7, 8, 4, 0, 6, 1, 5, 3,
2, 8, 3, 0, 7, 4, 1, 6, 5,
};
+
+/**
+ * Loop filter limits are taken from this table.
+ */
+static const uint8_t rv30_loop_filt_lim[32] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5
+};
#endif /* AVCODEC_RV30DATA_H */
diff --git a/libavcodec/rv30dsp.c b/libavcodec/rv30dsp.c
index 13b218b..f693a14 100644
--- a/libavcodec/rv30dsp.c
+++ b/libavcodec/rv30dsp.c
@@ -34,14 +34,14 @@ static av_unused void OPNAME ## rv30_tpel8_h_lowpass(uint8_t *dst, uint8_t *src,
int i;\
for(i=0; i<h; i++)\
{\
- OP(dst[0], -(src[-1]+src[2]) + src[0]*C1 + src[1]*C2);\
- OP(dst[1], -(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2);\
- OP(dst[2], -(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2);\
- OP(dst[3], -(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2);\
- OP(dst[4], -(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2);\
- OP(dst[5], -(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2);\
- OP(dst[6], -(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2);\
- OP(dst[7], -(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2);\
+ OP(dst[0], (-(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + 8)>>4);\
+ OP(dst[1], (-(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + 8)>>4);\
+ OP(dst[2], (-(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + 8)>>4);\
+ OP(dst[3], (-(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + 8)>>4);\
+ OP(dst[4], (-(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + 8)>>4);\
+ OP(dst[5], (-(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + 8)>>4);\
+ OP(dst[6], (-(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + 8)>>4);\
+ OP(dst[7], (-(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + 8)>>4);\
dst+=dstStride;\
src+=srcStride;\
}\
@@ -64,71 +64,92 @@ static void OPNAME ## rv30_tpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstSt
const int src7= src[7 *srcStride];\
const int src8= src[8 *srcStride];\
const int src9= src[9 *srcStride];\
- OP(dst[0*dstStride], -(srcA+src2) + src0*C1 + src1*C2);\
- OP(dst[1*dstStride], -(src0+src3) + src1*C1 + src2*C2);\
- OP(dst[2*dstStride], -(src1+src4) + src2*C1 + src3*C2);\
- OP(dst[3*dstStride], -(src2+src5) + src3*C1 + src4*C2);\
- OP(dst[4*dstStride], -(src3+src6) + src4*C1 + src5*C2);\
- OP(dst[5*dstStride], -(src4+src7) + src5*C1 + src6*C2);\
- OP(dst[6*dstStride], -(src5+src8) + src6*C1 + src7*C2);\
- OP(dst[7*dstStride], -(src6+src9) + src7*C1 + src8*C2);\
+ OP(dst[0*dstStride], (-(srcA+src2) + src0*C1 + src1*C2 + 8)>>4);\
+ OP(dst[1*dstStride], (-(src0+src3) + src1*C1 + src2*C2 + 8)>>4);\
+ OP(dst[2*dstStride], (-(src1+src4) + src2*C1 + src3*C2 + 8)>>4);\
+ OP(dst[3*dstStride], (-(src2+src5) + src3*C1 + src4*C2 + 8)>>4);\
+ OP(dst[4*dstStride], (-(src3+src6) + src4*C1 + src5*C2 + 8)>>4);\
+ OP(dst[5*dstStride], (-(src4+src7) + src5*C1 + src6*C2 + 8)>>4);\
+ OP(dst[6*dstStride], (-(src5+src8) + src6*C1 + src7*C2 + 8)>>4);\
+ OP(dst[7*dstStride], (-(src6+src9) + src7*C1 + src8*C2 + 8)>>4);\
dst++;\
src++;\
}\
}\
\
-static void OPNAME ## rv30_tpel8_h3_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- const int h=8+2;\
+static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int w = 8;\
+ const int h = 8;\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
- int i;\
- for(i=0; i<h; i++)\
- {\
- OP(dst[0], 6*src[0]+9*src[1]+src[2]);\
- OP(dst[1], 6*src[1]+9*src[2]+src[3]);\
- OP(dst[2], 6*src[2]+9*src[3]+src[4]);\
- OP(dst[3], 6*src[3]+9*src[4]+src[5]);\
- OP(dst[4], 6*src[4]+9*src[5]+src[6]);\
- OP(dst[5], 6*src[5]+9*src[6]+src[7]);\
- OP(dst[6], 6*src[6]+9*src[7]+src[8]);\
- OP(dst[7], 6*src[7]+9*src[8]+src[9]);\
- dst+=dstStride;\
- src+=srcStride;\
+ int i, j;\
+ for(j = 0; j < h; j++){\
+ for(i = 0; i < w; i++){\
+ OP(dst[i], (\
+ src[srcStride*-1+i-1] -12*src[srcStride*-1+i] -6*src[srcStride*-1+i+1] +src[srcStride*-1+i+2]+\
+ -12*src[srcStride* 0+i-1] +144*src[srcStride* 0+i] +72*src[srcStride* 0+i+1] -12*src[srcStride* 0+i+2] +\
+ -6*src[srcStride* 1+i-1] +72*src[srcStride* 1+i] +36*src[srcStride* 1+i+1] -6*src[srcStride* 1+i+2] +\
+ src[srcStride* 2+i-1] -12*src[srcStride* 2+i] -6*src[srcStride* 2+i+1] +src[srcStride* 2+i+2] +\
+ 128)>>8);\
+ }\
+ src += srcStride;\
+ dst += dstStride;\
}\
}\
\
-static void OPNAME ## rv30_tpel8_v3_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- const int w=8;\
+static void OPNAME ## rv30_tpel8_hhv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int w = 8;\
+ const int h = 8;\
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
- int i;\
- for(i=0; i<w; i++)\
- {\
- const int src0= src[0 *srcStride];\
- const int src1= src[1 *srcStride];\
- const int src2= src[2 *srcStride];\
- const int src3= src[3 *srcStride];\
- const int src4= src[4 *srcStride];\
- const int src5= src[5 *srcStride];\
- const int src6= src[6 *srcStride];\
- const int src7= src[7 *srcStride];\
- const int src8= src[8 *srcStride];\
- const int src9= src[9 *srcStride];\
- OP(dst[0*dstStride], 6*src0 + 9*src1 + src2);\
- OP(dst[1*dstStride], 6*src1 + 9*src2 + src3);\
- OP(dst[2*dstStride], 6*src2 + 9*src3 + src4);\
- OP(dst[3*dstStride], 6*src3 + 9*src4 + src5);\
- OP(dst[4*dstStride], 6*src4 + 9*src5 + src6);\
- OP(dst[5*dstStride], 6*src5 + 9*src6 + src7);\
- OP(dst[6*dstStride], 6*src6 + 9*src7 + src8);\
- OP(dst[7*dstStride], 6*src7 + 9*src8 + src9);\
- dst ++;\
- src ++;\
+ int i, j;\
+ for(j = 0; j < h; j++){\
+ for(i = 0; i < w; i++){\
+ OP(dst[i], (\
+ src[srcStride*-1+i-1] -12*src[srcStride*-1+i+1] -6*src[srcStride*-1+i] +src[srcStride*-1+i+2]+\
+ -12*src[srcStride* 0+i-1] +144*src[srcStride* 0+i+1] +72*src[srcStride* 0+i] -12*src[srcStride* 0+i+2]+\
+ -6*src[srcStride* 1+i-1] +72*src[srcStride* 1+i+1] +36*src[srcStride* 1+i] -6*src[srcStride* 1+i+2]+\
+ src[srcStride* 2+i-1] -12*src[srcStride* 2+i+1] -6*src[srcStride* 2+i] +src[srcStride* 2+i+2]+\
+ 128)>>8);\
+ }\
+ src += srcStride;\
+ dst += dstStride;\
}\
}\
\
-static void OPNAME ## rv30_tpel8_hv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- uint8_t half[8*10];\
- put_rv30_tpel8_h3_lowpass(half, src, 8, srcStride);\
- OPNAME ## rv30_tpel8_v3_lowpass(dst, half, dstStride, 8);\
+static void OPNAME ## rv30_tpel8_hvv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int w = 8;\
+ const int h = 8;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+ int i, j;\
+ for(j = 0; j < h; j++){\
+ for(i = 0; i < w; i++){\
+ OP(dst[i], (\
+ src[srcStride*-1+i-1] -12*src[srcStride*-1+i] -6*src[srcStride*-1+i+1] +src[srcStride*-1+i+2]+\
+ -6*src[srcStride* 0+i-1] +72*src[srcStride* 0+i] +36*src[srcStride* 0+i+1] -6*src[srcStride* 0+i+2]+\
+ -12*src[srcStride* 1+i-1] +144*src[srcStride* 1+i] +72*src[srcStride* 1+i+1] -12*src[srcStride* 1+i+2]+\
+ src[srcStride* 2+i-1] -12*src[srcStride* 2+i] -6*src[srcStride* 2+i+1] +src[srcStride* 2+i+2]+\
+ 128)>>8);\
+ }\
+ src += srcStride;\
+ dst += dstStride;\
+ }\
+}\
+\
+static void OPNAME ## rv30_tpel8_hhvv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ const int w = 8;\
+ const int h = 8;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+ int i, j;\
+ for(j = 0; j < h; j++){\
+ for(i = 0; i < w; i++){\
+ OP(dst[i], (\
+ 36*src[i+srcStride*0] +54*src[i+1+srcStride*0] +6*src[i+2+srcStride*0]+\
+ 54*src[i+srcStride*1] +81*src[i+1+srcStride*1] +9*src[i+2+srcStride*1]+\
+ 6*src[i+srcStride*2] + 9*src[i+1+srcStride*2] + src[i+2+srcStride*2]+\
+ 128)>>8);\
+ }\
+ src += srcStride;\
+ dst += dstStride;\
+ }\
}\
\
static void OPNAME ## rv30_tpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, const int C1, const int C2){\
@@ -158,6 +179,33 @@ static void OPNAME ## rv30_tpel16_hv_lowpass(uint8_t *dst, uint8_t *src, int dst
OPNAME ## rv30_tpel8_hv_lowpass(dst+8, src+8, dstStride, srcStride);\
}\
\
+static void OPNAME ## rv30_tpel16_hhv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## rv30_tpel8_hhv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## rv30_tpel8_hhv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hhv_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## rv30_tpel16_hvv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## rv30_tpel8_hvv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## rv30_tpel8_hvv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hvv_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static void OPNAME ## rv30_tpel16_hhvv_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## rv30_tpel8_hhvv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## rv30_tpel8_hhvv_lowpass(dst , src , dstStride, srcStride);\
+ OPNAME ## rv30_tpel8_hhvv_lowpass(dst+8, src+8, dstStride, srcStride);\
+}\
+\
#define RV30_MC(OPNAME, SIZE) \
static void OPNAME ## rv30_tpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
@@ -177,30 +225,24 @@ static void OPNAME ## rv30_tpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, i
}\
\
static void OPNAME ## rv30_tpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t half[SIZE*SIZE];\
- put_rv30_tpel ## SIZE ## _h_lowpass(half, src, SIZE, stride, 12, 6);\
- OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 12, 6);\
+ OPNAME ## rv30_tpel ## SIZE ## _hv_lowpass(dst, src, stride, stride);\
}\
\
static void OPNAME ## rv30_tpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t half[SIZE*SIZE];\
- put_rv30_tpel ## SIZE ## _h_lowpass(half, src, SIZE, stride, 12, 6);\
- OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 6, 12);\
+ OPNAME ## rv30_tpel ## SIZE ## _hvv_lowpass(dst, src, stride, stride);\
}\
\
static void OPNAME ## rv30_tpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t half[SIZE*SIZE];\
- put_rv30_tpel ## SIZE ## _h_lowpass(half, src, SIZE, stride, 6, 12);\
- OPNAME ## rv30_tpel ## SIZE ## _v_lowpass(dst, src, stride, stride, 12, 6);\
+ OPNAME ## rv30_tpel ## SIZE ## _hhv_lowpass(dst, src, stride, stride);\
}\
\
static void OPNAME ## rv30_tpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
- OPNAME ## rv30_tpel ## SIZE ## _hv_lowpass(dst, src, stride, stride);\
+ OPNAME ## rv30_tpel ## SIZE ## _hhvv_lowpass(dst, src, stride, stride);\
}\
\
-#define op_avg(a, b) a = (((a)+cm[((b) + 8)>>4]+1)>>1)
-#define op_put(a, b) a = cm[((b) + 8)>>4]
+#define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
+#define op_put(a, b) a = cm[b]
RV30_LOWPASS(put_ , op_put)
RV30_LOWPASS(avg_ , op_avg)
diff --git a/libavcodec/rv34.c b/libavcodec/rv34.c
index 937c23b..450327e 100644
--- a/libavcodec/rv34.c
+++ b/libavcodec/rv34.c
@@ -367,7 +367,7 @@ int ff_rv34_get_start_offset(GetBitContext *gb, int mb_size)
{
int i;
for(i = 0; i < 5; i++)
- if(rv34_mb_max_sizes[i] > mb_size)
+ if(rv34_mb_max_sizes[i] >= mb_size - 1)
break;
return rv34_mb_bits_sizes[i];
}
@@ -564,7 +564,7 @@ static void rv34_pred_mv_rv3(RV34DecContext *r, int block_type, int dir)
MpegEncContext *s = &r->s;
int mv_pos = s->mb_x * 2 + s->mb_y * 2 * s->b8_stride;
int A[2] = {0}, B[2], C[2];
- int i, j;
+ int i, j, k;
int mx, my;
int avail_index = avail_indexes[0];
@@ -597,12 +597,12 @@ static void rv34_pred_mv_rv3(RV34DecContext *r, int block_type, int dir)
my += r->dmv[0][1];
for(j = 0; j < 2; j++){
for(i = 0; i < 2; i++){
- s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][0] = mx;
- s->current_picture_ptr->motion_val[0][mv_pos + i + j*s->b8_stride][1] = my;
+ for(k = 0; k < 2; k++){
+ s->current_picture_ptr->motion_val[k][mv_pos + i + j*s->b8_stride][0] = mx;
+ s->current_picture_ptr->motion_val[k][mv_pos + i + j*s->b8_stride][1] = my;
+ }
}
}
- if(block_type == RV34_MB_B_BACKWARD || block_type == RV34_MB_B_FORWARD)
- fill_rectangle(s->current_picture_ptr->motion_val[!dir][mv_pos], 2, 2, s->b8_stride, 0, 4);
}
static const int chroma_coeffs[3] = { 0, 3, 5 };
@@ -644,14 +644,20 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
uvmx = chroma_coeffs[(chroma_mx + (3 << 24)) % 3];
uvmy = chroma_coeffs[(chroma_my + (3 << 24)) % 3];
}else{
+ int cx, cy;
mx = s->current_picture_ptr->motion_val[dir][mv_pos][0] >> 2;
my = s->current_picture_ptr->motion_val[dir][mv_pos][1] >> 2;
lx = s->current_picture_ptr->motion_val[dir][mv_pos][0] & 3;
ly = s->current_picture_ptr->motion_val[dir][mv_pos][1] & 3;
- umx = mx >> 1;
- umy = my >> 1;
- uvmx = mx & 6;
- uvmy = my & 6;
+ cx = s->current_picture_ptr->motion_val[dir][mv_pos][0] / 2;
+ cy = s->current_picture_ptr->motion_val[dir][mv_pos][1] / 2;
+ umx = cx >> 2;
+ umy = cy >> 2;
+ uvmx = (cx & 3) << 1;
+ uvmy = (cy & 3) << 1;
+ //due to some flaw RV40 uses the same MC compensation routine for H2V2 and H3V3
+ if(uvmx == 6 && uvmy == 6)
+ uvmx = uvmy = 4;
}
dxy = ly*4 + lx;
srcY = dir ? s->next_picture_ptr->data[0] : s->last_picture_ptr->data[0];
@@ -664,12 +670,12 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
srcY += src_y * s->linesize + src_x;
srcU += uvsrc_y * s->uvlinesize + uvsrc_x;
srcV += uvsrc_y * s->uvlinesize + uvsrc_x;
- if( (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 3
- || (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 3){
- uint8_t *uvbuf= s->edge_emu_buffer + 20 * s->linesize;
+ if( (unsigned)(src_x - !!lx*2) > s->h_edge_pos - !!lx*2 - (width <<3) - 4
+ || (unsigned)(src_y - !!ly*2) > s->v_edge_pos - !!ly*2 - (height<<3) - 4){
+ uint8_t *uvbuf= s->edge_emu_buffer + 22 * s->linesize;
srcY -= 2 + 2*s->linesize;
- ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, (width<<3)+4, (height<<3)+4,
+ ff_emulated_edge_mc(s->edge_emu_buffer, srcY, s->linesize, (width<<3)+6, (height<<3)+6,
src_x - 2, src_y - 2, s->h_edge_pos, s->v_edge_pos);
srcY = s->edge_emu_buffer + 2 + 2*s->linesize;
ff_emulated_edge_mc(uvbuf , srcU, s->uvlinesize, (width<<2)+1, (height<<2)+1,
@@ -704,20 +710,23 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
{
rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30,
r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
- : r->s.dsp.put_h264_qpel_pixels_tab,
- r->s.dsp.put_h264_chroma_pixels_tab);
+ : r->s.dsp.put_rv40_qpel_pixels_tab,
+ r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
+ : r->s.dsp.put_rv40_chroma_pixels_tab);
}
static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
{
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30,
r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
- : r->s.dsp.put_h264_qpel_pixels_tab,
- r->s.dsp.put_h264_chroma_pixels_tab);
+ : r->s.dsp.put_rv40_qpel_pixels_tab,
+ r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
+ : r->s.dsp.put_rv40_chroma_pixels_tab);
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30,
r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
- : r->s.dsp.avg_h264_qpel_pixels_tab,
- r->s.dsp.avg_h264_chroma_pixels_tab);
+ : r->s.dsp.avg_rv40_qpel_pixels_tab,
+ r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
+ : r->s.dsp.avg_rv40_chroma_pixels_tab);
}
static void rv34_mc_2mv_skip(RV34DecContext *r)
@@ -727,12 +736,14 @@ static void rv34_mc_2mv_skip(RV34DecContext *r)
for(i = 0; i < 2; i++){
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
r->rv30 ? r->s.dsp.put_rv30_tpel_pixels_tab
- : r->s.dsp.put_h264_qpel_pixels_tab,
- r->s.dsp.put_h264_chroma_pixels_tab);
+ : r->s.dsp.put_rv40_qpel_pixels_tab,
+ r->rv30 ? r->s.dsp.put_h264_chroma_pixels_tab
+ : r->s.dsp.put_rv40_chroma_pixels_tab);
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
r->rv30 ? r->s.dsp.avg_rv30_tpel_pixels_tab
- : r->s.dsp.avg_h264_qpel_pixels_tab,
- r->s.dsp.avg_h264_chroma_pixels_tab);
+ : r->s.dsp.avg_rv40_qpel_pixels_tab,
+ r->rv30 ? r->s.dsp.avg_h264_chroma_pixels_tab
+ : r->s.dsp.avg_rv40_chroma_pixels_tab);
}
}
@@ -953,11 +964,11 @@ static void rv34_output_macroblock(RV34DecContext *r, int8_t *intra_types, int c
itype = ittrans16[intra_types[0]];
itype = adjust_pred16(itype, r->avail_cache[5-4], r->avail_cache[5-1]);
r->h.pred16x16[itype](Y, s->linesize);
- dsp->add_pixels_clamped(s->block[0], Y, s->current_picture.linesize[0]);
- dsp->add_pixels_clamped(s->block[1], Y + 8, s->current_picture.linesize[0]);
- Y += s->current_picture.linesize[0] * 8;
- dsp->add_pixels_clamped(s->block[2], Y, s->current_picture.linesize[0]);
- dsp->add_pixels_clamped(s->block[3], Y + 8, s->current_picture.linesize[0]);
+ dsp->add_pixels_clamped(s->block[0], Y, s->linesize);
+ dsp->add_pixels_clamped(s->block[1], Y + 8, s->linesize);
+ Y += s->linesize * 8;
+ dsp->add_pixels_clamped(s->block[2], Y, s->linesize);
+ dsp->add_pixels_clamped(s->block[3], Y + 8, s->linesize);
itype = ittrans16[intra_types[0]];
if(itype == PLANE_PRED8x8) itype = DC_PRED8x8;
@@ -1046,7 +1057,7 @@ static int rv34_decode_mb_header(RV34DecContext *r, int8_t *intra_types)
* mask for retrieving all bits in coded block pattern
* corresponding to one 8x8 block
*/
-#define LUMA_CBP_BLOCK_MASK 0x303
+#define LUMA_CBP_BLOCK_MASK 0x33
#define U_CBP_MASK 0x0F0000
#define V_CBP_MASK 0xF00000
@@ -1059,7 +1070,7 @@ static void rv34_apply_differences(RV34DecContext *r, int cbp)
int i;
for(i = 0; i < 4; i++)
- if(cbp & (LUMA_CBP_BLOCK_MASK << shifts[i]))
+ if((cbp & (LUMA_CBP_BLOCK_MASK << shifts[i])) || r->block_type == RV34_MB_P_MIX16x16)
s->dsp.add_pixels_clamped(s->block[i], s->dest[0] + (i & 1)*8 + (i&2)*4*s->linesize, s->linesize);
if(cbp & U_CBP_MASK)
s->dsp.add_pixels_clamped(s->block[4], s->dest[1], s->uvlinesize);
@@ -1089,7 +1100,7 @@ static int rv34_set_deblock_coef(RV34DecContext *r)
for(i = 0; i < 2; i++){
if(is_mv_diff_gt_3(motion_val + i, 1))
vmvmask |= 0x11 << (j + i*2);
- if(is_mv_diff_gt_3(motion_val + i, s->b8_stride))
+ if((j || s->mb_y) && is_mv_diff_gt_3(motion_val + i, s->b8_stride))
hmvmask |= 0x03 << (j + i*2);
}
motion_val += s->b8_stride;
@@ -1098,7 +1109,15 @@ static int rv34_set_deblock_coef(RV34DecContext *r)
hmvmask &= ~0x000F;
if(!s->mb_x)
vmvmask &= ~0x1111;
- return hmvmask | vmvmask; //XXX: should be stored separately for RV3
+ if(r->rv30){ //RV30 marks both subblocks on the edge for filtering
+ vmvmask |= (vmvmask & 0x4444) >> 1;
+ hmvmask |= (hmvmask & 0x0F00) >> 4;
+ if(s->mb_x)
+ r->deblock_coefs[s->mb_x - 1 + s->mb_y*s->mb_stride] |= (vmvmask & 0x1111) << 3;
+ if(!s->first_slice_line)
+ r->deblock_coefs[s->mb_x + (s->mb_y - 1)*s->mb_stride] |= (hmvmask & 0xF) << 12;
+ }
+ return hmvmask | vmvmask;
}
static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)
@@ -1129,13 +1148,13 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)
s->qscale = r->si.quant;
cbp = cbp2 = rv34_decode_mb_header(r, intra_types);
- r->cbp_luma [s->mb_x + s->mb_y * s->mb_stride] = cbp;
- r->cbp_chroma[s->mb_x + s->mb_y * s->mb_stride] = cbp >> 16;
+ r->cbp_luma [mb_pos] = cbp;
+ r->cbp_chroma[mb_pos] = cbp >> 16;
if(s->pict_type == FF_I_TYPE)
- r->deblock_coefs[mb_pos] = 0;
+ r->deblock_coefs[mb_pos] = 0xFFFF;
else
- r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r);
- s->current_picture.qscale_table[s->mb_x + s->mb_y * s->mb_stride] = s->qscale;
+ r->deblock_coefs[mb_pos] = rv34_set_deblock_coef(r) | r->cbp_luma[mb_pos];
+ s->current_picture_ptr->qscale_table[mb_pos] = s->qscale;
if(cbp == -1)
return -1;
@@ -1169,7 +1188,7 @@ static int rv34_decode_macroblock(RV34DecContext *r, int8_t *intra_types)
rv34_dequant4x4(s->block[blknum] + blkoff, rv34_qscale_tab[rv34_chroma_quant[1][s->qscale]],rv34_qscale_tab[rv34_chroma_quant[0][s->qscale]]);
rv34_inv_transform(s->block[blknum] + blkoff);
}
- if(IS_INTRA(s->current_picture_ptr->mb_type[s->mb_x + s->mb_y*s->mb_stride]))
+ if(IS_INTRA(s->current_picture_ptr->mb_type[mb_pos]))
rv34_output_macroblock(r, intra_types, cbp2, r->is16);
else
rv34_apply_differences(r, cbp2);
@@ -1201,7 +1220,7 @@ static inline int slice_compare(SliceInfo *si1, SliceInfo *si2)
si1->pts != si2->pts;
}
-static int rv34_decode_slice(RV34DecContext *r, int end, uint8_t* buf, int buf_size)
+static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int buf_size)
{
MpegEncContext *s = &r->s;
GetBitContext *gb = &s->gb;
@@ -1234,7 +1253,6 @@ static int rv34_decode_slice(RV34DecContext *r, int end, uint8_t* buf, int buf_s
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
ff_er_frame_start(s);
- s->current_picture_ptr = &s->current_picture;
r->cur_pts = r->si.pts;
if(s->pict_type != FF_B_TYPE){
r->last_pts = r->next_pts;
@@ -1276,6 +1294,9 @@ static int rv34_decode_slice(RV34DecContext *r, int end, uint8_t* buf, int buf_s
memmove(r->intra_types_hist, r->intra_types, s->b4_stride * 4 * sizeof(*r->intra_types_hist));
memset(r->intra_types, -1, s->b4_stride * 4 * sizeof(*r->intra_types_hist));
+
+ if(r->loop_filter && s->mb_y >= 2)
+ r->loop_filter(r, s->mb_y - 2);
}
if(s->mb_x == s->resync_mb_x)
s->first_slice_line=0;
@@ -1331,7 +1352,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx)
return 0;
}
-static int get_slice_offset(AVCodecContext *avctx, uint8_t *buf, int n)
+static int get_slice_offset(AVCodecContext *avctx, const uint8_t *buf, int n)
{
if(avctx->slice_count) return avctx->slice_offset[n];
else return AV_RL32(buf + n*8 - 4) == 1 ? AV_RL32(buf + n*8) : AV_RB32(buf + n*8);
@@ -1339,7 +1360,7 @@ static int get_slice_offset(AVCodecContext *avctx, uint8_t *buf, int n)
int ff_rv34_decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- uint8_t *buf, int buf_size)
+ const uint8_t *buf, int buf_size)
{
RV34DecContext *r = avctx->priv_data;
MpegEncContext *s = &r->s;
@@ -1347,7 +1368,7 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
SliceInfo si;
int i;
int slice_count;
- uint8_t *slices_hdr = NULL;
+ const uint8_t *slices_hdr = NULL;
int last = 0;
/* no supplementary picture */
@@ -1377,6 +1398,11 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
else
size= get_slice_offset(avctx, slices_hdr, i+1) - offset;
+ if(offset > buf_size){
+ av_log(avctx, AV_LOG_ERROR, "Slice offset is greater than frame size\n");
+ break;
+ }
+
r->si.end = s->mb_width * s->mb_height;
if(i+1 < slice_count){
init_get_bits(&s->gb, buf+get_slice_offset(avctx, slices_hdr, i+1), (buf_size-get_slice_offset(avctx, slices_hdr, i+1))*8);
@@ -1388,6 +1414,8 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
}else
r->si.end = si.start;
}
+ if(!i && si.type == FF_B_TYPE && (!s->last_picture_ptr || !s->last_picture_ptr->data[0]))
+ return -1;
last = rv34_decode_slice(r, r->si.end, buf + offset, size);
s->mb_num_left = r->s.mb_x + r->s.mb_y*r->s.mb_width - r->si.start;
if(last)
@@ -1396,7 +1424,7 @@ int ff_rv34_decode_frame(AVCodecContext *avctx,
if(last){
if(r->loop_filter)
- r->loop_filter(r);
+ r->loop_filter(r, s->mb_height - 1);
ff_er_frame_end(s);
MPV_frame_end(s);
if (s->pict_type == FF_B_TYPE || s->low_delay) {
diff --git a/libavcodec/rv34.h b/libavcodec/rv34.h
index 16e0faa..65dbb8a 100644
--- a/libavcodec/rv34.h
+++ b/libavcodec/rv34.h
@@ -115,7 +115,7 @@ typedef struct RV34DecContext{
int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
int (*decode_mb_info)(struct RV34DecContext *r);
int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);
- void (*loop_filter)(struct RV34DecContext *r);
+ void (*loop_filter)(struct RV34DecContext *r, int row);
}RV34DecContext;
/**
@@ -123,7 +123,7 @@ typedef struct RV34DecContext{
*/
int ff_rv34_get_start_offset(GetBitContext *gb, int blocks);
int ff_rv34_decode_init(AVCodecContext *avctx);
-int ff_rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size);
+int ff_rv34_decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size);
int ff_rv34_decode_end(AVCodecContext *avctx);
#endif /* AVCODEC_RV34_H */
diff --git a/libavcodec/rv34data.h b/libavcodec/rv34data.h
index e4862a3..95e5572 100644
--- a/libavcodec/rv34data.h
+++ b/libavcodec/rv34data.h
@@ -138,7 +138,7 @@ static const uint8_t rv34_dquant_tab[2][32]={
* maximum number of macroblocks for each of the possible slice offset sizes
* @todo This is the same as ff_mba_max, maybe use it instead.
*/
-static const uint16_t rv34_mb_max_sizes[6] = { 0x2F, 0x68, 0x18B, 0x62F, 0x18BF, 0x23FF };
+static const uint16_t rv34_mb_max_sizes[6] = { 0x2F, 0x62, 0x18B, 0x62F, 0x18BF, 0x23FF };
/**
* bits needed to code the slice offset for the given size
* @todo This is the same as ff_mba_length, maybe use it instead.
diff --git a/libavcodec/rv40.c b/libavcodec/rv40.c
index 2d52967..083de1b 100644
--- a/libavcodec/rv40.c
+++ b/libavcodec/rv40.c
@@ -247,6 +247,383 @@ static int rv40_decode_mb_info(RV34DecContext *r)
return 0;
}
+#define CLIP_SYMM(a, b) av_clip(a, -(b), b)
+/**
+ * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
+ */
+static inline void rv40_weak_loop_filter(uint8_t *src, const int step,
+ const int filter_p1, const int filter_q1,
+ const int alpha, const int beta,
+ const int lim_p0q0,
+ const int lim_q1, const int lim_p1,
+ const int diff_p1p0, const int diff_q1q0,
+ const int diff_p1p2, const int diff_q1q2)
+{
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+ int t, u, diff;
+
+ t = src[0*step] - src[-1*step];
+ if(!t)
+ return;
+ u = (alpha * FFABS(t)) >> 7;
+ if(u > 3 - (filter_p1 && filter_q1))
+ return;
+
+ t <<= 2;
+ if(filter_p1 && filter_q1)
+ t += src[-2*step] - src[1*step];
+ diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
+ src[-1*step] = cm[src[-1*step] + diff];
+ src[ 0*step] = cm[src[ 0*step] - diff];
+ if(FFABS(diff_p1p2) <= beta && filter_p1){
+ t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
+ src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
+ }
+ if(FFABS(diff_q1q2) <= beta && filter_q1){
+ t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
+ src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
+ }
+}
+
+static inline void rv40_adaptive_loop_filter(uint8_t *src, const int step,
+ const int stride, const int dmode,
+ const int lim_q1, const int lim_p1,
+ const int alpha,
+ const int beta, const int beta2,
+ const int chroma, const int edge)
+{
+ int diff_p1p0[4], diff_q1q0[4], diff_p1p2[4], diff_q1q2[4];
+ int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
+ uint8_t *ptr;
+ int flag_strong0 = 1, flag_strong1 = 1;
+ int filter_p1, filter_q1;
+ int i;
+ int lims;
+
+ for(i = 0, ptr = src; i < 4; i++, ptr += stride){
+ diff_p1p0[i] = ptr[-2*step] - ptr[-1*step];
+ diff_q1q0[i] = ptr[ 1*step] - ptr[ 0*step];
+ sum_p1p0 += diff_p1p0[i];
+ sum_q1q0 += diff_q1q0[i];
+ }
+ filter_p1 = FFABS(sum_p1p0) < (beta<<2);
+ filter_q1 = FFABS(sum_q1q0) < (beta<<2);
+ if(!filter_p1 && !filter_q1)
+ return;
+
+ for(i = 0, ptr = src; i < 4; i++, ptr += stride){
+ diff_p1p2[i] = ptr[-2*step] - ptr[-3*step];
+ diff_q1q2[i] = ptr[ 1*step] - ptr[ 2*step];
+ sum_p1p2 += diff_p1p2[i];
+ sum_q1q2 += diff_q1q2[i];
+ }
+
+ if(edge){
+ flag_strong0 = filter_p1 && (FFABS(sum_p1p2) < beta2);
+ flag_strong1 = filter_q1 && (FFABS(sum_q1q2) < beta2);
+ }else{
+ flag_strong0 = flag_strong1 = 0;
+ }
+
+ lims = filter_p1 + filter_q1 + ((lim_q1 + lim_p1) >> 1) + 1;
+ if(flag_strong0 && flag_strong1){ /* strong filtering */
+ for(i = 0; i < 4; i++, src += stride){
+ int sflag, p0, q0, p1, q1;
+ int t = src[0*step] - src[-1*step];
+
+ if(!t) continue;
+ sflag = (alpha * FFABS(t)) >> 7;
+ if(sflag > 1) continue;
+
+ p0 = (25*src[-3*step] + 26*src[-2*step]
+ + 26*src[-1*step]
+ + 26*src[ 0*step] + 25*src[ 1*step] + rv40_dither_l[dmode + i]) >> 7;
+ q0 = (25*src[-2*step] + 26*src[-1*step]
+ + 26*src[ 0*step]
+ + 26*src[ 1*step] + 25*src[ 2*step] + rv40_dither_r[dmode + i]) >> 7;
+ if(sflag){
+ p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
+ q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
+ }
+ p1 = (25*src[-4*step] + 26*src[-3*step]
+ + 26*src[-2*step]
+ + 26*p0 + 25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
+ q1 = (25*src[-1*step] + 26*q0
+ + 26*src[ 1*step]
+ + 26*src[ 2*step] + 25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
+ if(sflag){
+ p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
+ q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
+ }
+ src[-2*step] = p1;
+ src[-1*step] = p0;
+ src[ 0*step] = q0;
+ src[ 1*step] = q1;
+ if(!chroma){
+ src[-3*step] = (25*src[-1*step] + 26*src[-2*step] + 51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
+ src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] + 51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
+ }
+ }
+ }else if(filter_p1 && filter_q1){
+ for(i = 0; i < 4; i++, src += stride)
+ rv40_weak_loop_filter(src, step, 1, 1, alpha, beta, lims, lim_q1, lim_p1,
+ diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
+ }else{
+ for(i = 0; i < 4; i++, src += stride)
+ rv40_weak_loop_filter(src, step, filter_p1, filter_q1,
+ alpha, beta, lims>>1, lim_q1>>1, lim_p1>>1,
+ diff_p1p0[i], diff_q1q0[i], diff_p1p2[i], diff_q1q2[i]);
+ }
+}
+
+static void rv40_v_loop_filter(uint8_t *src, int stride, int dmode,
+ int lim_q1, int lim_p1,
+ int alpha, int beta, int beta2, int chroma, int edge){
+ rv40_adaptive_loop_filter(src, 1, stride, dmode, lim_q1, lim_p1,
+ alpha, beta, beta2, chroma, edge);
+}
+static void rv40_h_loop_filter(uint8_t *src, int stride, int dmode,
+ int lim_q1, int lim_p1,
+ int alpha, int beta, int beta2, int chroma, int edge){
+ rv40_adaptive_loop_filter(src, stride, 1, dmode, lim_q1, lim_p1,
+ alpha, beta, beta2, chroma, edge);
+}
+
+enum RV40BlockPos{
+ POS_CUR,
+ POS_TOP,
+ POS_LEFT,
+ POS_BOTTOM,
+};
+
+#define MASK_CUR 0x0001
+#define MASK_RIGHT 0x0008
+#define MASK_BOTTOM 0x0010
+#define MASK_TOP 0x1000
+#define MASK_Y_TOP_ROW 0x000F
+#define MASK_Y_LAST_ROW 0xF000
+#define MASK_Y_LEFT_COL 0x1111
+#define MASK_Y_RIGHT_COL 0x8888
+#define MASK_C_TOP_ROW 0x0003
+#define MASK_C_LAST_ROW 0x000C
+#define MASK_C_LEFT_COL 0x0005
+#define MASK_C_RIGHT_COL 0x000A
+
+static const int neighbour_offs_x[4] = { 0, 0, -1, 0 };
+static const int neighbour_offs_y[4] = { 0, -1, 0, 1 };
+
+/**
+ * RV40 loop filtering function
+ */
+static void rv40_loop_filter(RV34DecContext *r, int row)
+{
+ MpegEncContext *s = &r->s;
+ int mb_pos, mb_x;
+ int i, j, k;
+ uint8_t *Y, *C;
+ int alpha, beta, betaY, betaC;
+ int q;
+ int mbtype[4]; ///< current macroblock and its neighbours types
+ /**
+ * flags indicating that macroblock can be filtered with strong filter
+ * it is set only for intra coded MB and MB with DCs coded separately
+ */
+ int mb_strong[4];
+ int clip[4]; ///< MB filter clipping value calculated from filtering strength
+ /**
+ * coded block patterns for luma part of current macroblock and its neighbours
+ * Format:
+ * LSB corresponds to the top left block,
+ * each nibble represents one row of subblocks.
+ */
+ int cbp[4];
+ /**
+ * coded block patterns for chroma part of current macroblock and its neighbours
+ * Format is the same as for luma with two subblocks in a row.
+ */
+ int uvcbp[4][2];
+ /**
+ * This mask represents the pattern of luma subblocks that should be filtered
+ * in addition to the coded ones because because they lie at the edge of
+ * 8x8 block with different enough motion vectors
+ */
+ int mvmasks[4];
+
+ mb_pos = row * s->mb_stride;
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, mb_pos++){
+ int mbtype = s->current_picture_ptr->mb_type[mb_pos];
+ if(IS_INTRA(mbtype) || IS_SEPARATE_DC(mbtype))
+ r->cbp_luma [mb_pos] = r->deblock_coefs[mb_pos] = 0xFFFF;
+ if(IS_INTRA(mbtype))
+ r->cbp_chroma[mb_pos] = 0xFF;
+ }
+ mb_pos = row * s->mb_stride;
+ for(mb_x = 0; mb_x < s->mb_width; mb_x++, mb_pos++){
+ int y_h_deblock, y_v_deblock;
+ int c_v_deblock[2], c_h_deblock[2];
+ int clip_left;
+ int avail[4];
+ int y_to_deblock, c_to_deblock[2];
+
+ q = s->current_picture_ptr->qscale_table[mb_pos];
+ alpha = rv40_alpha_tab[q];
+ beta = rv40_beta_tab [q];
+ betaY = betaC = beta * 3;
+ if(s->width * s->height <= 176*144)
+ betaY += beta;
+
+ avail[0] = 1;
+ avail[1] = row;
+ avail[2] = mb_x;
+ avail[3] = row < s->mb_height - 1;
+ for(i = 0; i < 4; i++){
+ if(avail[i]){
+ int pos = mb_pos + neighbour_offs_x[i] + neighbour_offs_y[i]*s->mb_stride;
+ mvmasks[i] = r->deblock_coefs[pos];
+ mbtype [i] = s->current_picture_ptr->mb_type[pos];
+ cbp [i] = r->cbp_luma[pos];
+ uvcbp[i][0] = r->cbp_chroma[pos] & 0xF;
+ uvcbp[i][1] = r->cbp_chroma[pos] >> 4;
+ }else{
+ mvmasks[i] = 0;
+ mbtype [i] = mbtype[0];
+ cbp [i] = 0;
+ uvcbp[i][0] = uvcbp[i][1] = 0;
+ }
+ mb_strong[i] = IS_INTRA(mbtype[i]) || IS_SEPARATE_DC(mbtype[i]);
+ clip[i] = rv40_filter_clip_tbl[mb_strong[i] + 1][q];
+ }
+ y_to_deblock = mvmasks[POS_CUR]
+ | (mvmasks[POS_BOTTOM] << 16);
+ /* This pattern contains bits signalling that horizontal edges of
+ * the current block can be filtered.
+ * That happens when either of adjacent subblocks is coded or lies on
+ * the edge of 8x8 blocks with motion vectors differing by more than
+ * 3/4 pel in any component (any edge orientation for some reason).
+ */
+ y_h_deblock = y_to_deblock
+ | ((cbp[POS_CUR] << 4) & ~MASK_Y_TOP_ROW)
+ | ((cbp[POS_TOP] & MASK_Y_LAST_ROW) >> 12);
+ /* This pattern contains bits signalling that vertical edges of
+ * the current block can be filtered.
+ * That happens when either of adjacent subblocks is coded or lies on
+ * the edge of 8x8 blocks with motion vectors differing by more than
+ * 3/4 pel in any component (any edge orientation for some reason).
+ */
+ y_v_deblock = y_to_deblock
+ | ((cbp[POS_CUR] << 1) & ~MASK_Y_LEFT_COL)
+ | ((cbp[POS_LEFT] & MASK_Y_RIGHT_COL) >> 3);
+ if(!mb_x)
+ y_v_deblock &= ~MASK_Y_LEFT_COL;
+ if(!row)
+ y_h_deblock &= ~MASK_Y_TOP_ROW;
+ if(row == s->mb_height - 1 || (mb_strong[POS_CUR] || mb_strong[POS_BOTTOM]))
+ y_h_deblock &= ~(MASK_Y_TOP_ROW << 16);
+ /* Calculating chroma patterns is similar and easier since there is
+ * no motion vector pattern for them.
+ */
+ for(i = 0; i < 2; i++){
+ c_to_deblock[i] = (uvcbp[POS_BOTTOM][i] << 4) | uvcbp[POS_CUR][i];
+ c_v_deblock[i] = c_to_deblock[i]
+ | ((uvcbp[POS_CUR] [i] << 1) & ~MASK_C_LEFT_COL)
+ | ((uvcbp[POS_LEFT][i] & MASK_C_RIGHT_COL) >> 1);
+ c_h_deblock[i] = c_to_deblock[i]
+ | ((uvcbp[POS_TOP][i] & MASK_C_LAST_ROW) >> 2)
+ | (uvcbp[POS_CUR][i] << 2);
+ if(!mb_x)
+ c_v_deblock[i] &= ~MASK_C_LEFT_COL;
+ if(!row)
+ c_h_deblock[i] &= ~MASK_C_TOP_ROW;
+ if(row == s->mb_height - 1 || mb_strong[POS_CUR] || mb_strong[POS_BOTTOM])
+ c_h_deblock[i] &= ~(MASK_C_TOP_ROW << 4);
+ }
+
+ for(j = 0; j < 16; j += 4){
+ Y = s->current_picture_ptr->data[0] + mb_x*16 + (row*16 + j) * s->linesize;
+ for(i = 0; i < 4; i++, Y += 4){
+ int ij = i + j;
+ int clip_cur = y_to_deblock & (MASK_CUR << ij) ? clip[POS_CUR] : 0;
+ int dither = j ? ij : i*4;
+
+ // if bottom block is coded then we can filter its top edge
+ // (or bottom edge of this block, which is the same)
+ if(y_h_deblock & (MASK_BOTTOM << ij)){
+ rv40_h_loop_filter(Y+4*s->linesize, s->linesize, dither,
+ y_to_deblock & (MASK_BOTTOM << ij) ? clip[POS_CUR] : 0,
+ clip_cur,
+ alpha, beta, betaY, 0, 0);
+ }
+ // filter left block edge in ordinary mode (with low filtering strength)
+ if(y_v_deblock & (MASK_CUR << ij) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
+ if(!i)
+ clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
+ else
+ clip_left = y_to_deblock & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0;
+ rv40_v_loop_filter(Y, s->linesize, dither,
+ clip_cur,
+ clip_left,
+ alpha, beta, betaY, 0, 0);
+ }
+ // filter top edge of the current macroblock when filtering strength is high
+ if(!j && y_h_deblock & (MASK_CUR << i) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
+ rv40_h_loop_filter(Y, s->linesize, dither,
+ clip_cur,
+ mvmasks[POS_TOP] & (MASK_TOP << i) ? clip[POS_TOP] : 0,
+ alpha, beta, betaY, 0, 1);
+ }
+ // filter left block edge in edge mode (with high filtering strength)
+ if(y_v_deblock & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
+ clip_left = mvmasks[POS_LEFT] & (MASK_RIGHT << j) ? clip[POS_LEFT] : 0;
+ rv40_v_loop_filter(Y, s->linesize, dither,
+ clip_cur,
+ clip_left,
+ alpha, beta, betaY, 0, 1);
+ }
+ }
+ }
+ for(k = 0; k < 2; k++){
+ for(j = 0; j < 2; j++){
+ C = s->current_picture_ptr->data[k+1] + mb_x*8 + (row*8 + j*4) * s->uvlinesize;
+ for(i = 0; i < 2; i++, C += 4){
+ int ij = i + j*2;
+ int clip_cur = c_to_deblock[k] & (MASK_CUR << ij) ? clip[POS_CUR] : 0;
+ if(c_h_deblock[k] & (MASK_CUR << (ij+2))){
+ int clip_bot = c_to_deblock[k] & (MASK_CUR << (ij+2)) ? clip[POS_CUR] : 0;
+ rv40_h_loop_filter(C+4*s->uvlinesize, s->uvlinesize, i*8,
+ clip_bot,
+ clip_cur,
+ alpha, beta, betaC, 1, 0);
+ }
+ if((c_v_deblock[k] & (MASK_CUR << ij)) && (i || !(mb_strong[POS_CUR] || mb_strong[POS_LEFT]))){
+ if(!i)
+ clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
+ else
+ clip_left = c_to_deblock[k] & (MASK_CUR << (ij-1)) ? clip[POS_CUR] : 0;
+ rv40_v_loop_filter(C, s->uvlinesize, j*8,
+ clip_cur,
+ clip_left,
+ alpha, beta, betaC, 1, 0);
+ }
+ if(!j && c_h_deblock[k] & (MASK_CUR << ij) && (mb_strong[POS_CUR] || mb_strong[POS_TOP])){
+ int clip_top = uvcbp[POS_TOP][k] & (MASK_CUR << (ij+2)) ? clip[POS_TOP] : 0;
+ rv40_h_loop_filter(C, s->uvlinesize, i*8,
+ clip_cur,
+ clip_top,
+ alpha, beta, betaC, 1, 1);
+ }
+ if(c_v_deblock[k] & (MASK_CUR << ij) && !i && (mb_strong[POS_CUR] || mb_strong[POS_LEFT])){
+ clip_left = uvcbp[POS_LEFT][k] & (MASK_CUR << (2*j+1)) ? clip[POS_LEFT] : 0;
+ rv40_v_loop_filter(C, s->uvlinesize, j*8,
+ clip_cur,
+ clip_left,
+ alpha, beta, betaC, 1, 1);
+ }
+ }
+ }
+ }
+ }
+}
+
/**
* Initialize decoder.
*/
@@ -261,6 +638,7 @@ static av_cold int rv40_decode_init(AVCodecContext *avctx)
r->parse_slice_header = rv40_parse_slice_header;
r->decode_intra_types = rv40_decode_intra_types;
r->decode_mb_info = rv40_decode_mb_info;
+ r->loop_filter = rv40_loop_filter;
r->luma_dc_quant_i = rv40_luma_dc_quant[0];
r->luma_dc_quant_p = rv40_luma_dc_quant[1];
return 0;
diff --git a/libavcodec/rv40dsp.c b/libavcodec/rv40dsp.c
new file mode 100644
index 0000000..b48c4e8
--- /dev/null
+++ b/libavcodec/rv40dsp.c
@@ -0,0 +1,353 @@
+/*
+ * RV40 decoder motion compensation functions
+ * Copyright (c) 2008 Konstantin Shishkov
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file rv40dsp.c
+ * RV40 decoder motion compensation functions
+ */
+
+#include "avcodec.h"
+#include "dsputil.h"
+
+#define RV40_LOWPASS(OPNAME, OP) \
+static av_unused void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
+ const int h, const int C1, const int C2, const int SHIFT){\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+ int i;\
+ for(i=0; i<h; i++)\
+ {\
+ OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
+ const int w, const int C1, const int C2, const int SHIFT){\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
+ int i;\
+ for(i=0; i<w; i++)\
+ {\
+ const int srcB = src[-2*srcStride];\
+ const int srcA = src[-1*srcStride];\
+ const int src0 = src[0 *srcStride];\
+ const int src1 = src[1 *srcStride];\
+ const int src2 = src[2 *srcStride];\
+ const int src3 = src[3 *srcStride];\
+ const int src4 = src[4 *srcStride];\
+ const int src5 = src[5 *srcStride];\
+ const int src6 = src[6 *srcStride];\
+ const int src7 = src[7 *srcStride];\
+ const int src8 = src[8 *srcStride];\
+ const int src9 = src[9 *srcStride];\
+ const int src10= src[10*srcStride];\
+ OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
+ dst++;\
+ src++;\
+ }\
+}\
+\
+static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
+ const int w, const int C1, const int C2, const int SHIFT){\
+ OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
+ OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
+ OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
+}\
+\
+static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,\
+ const int h, const int C1, const int C2, const int SHIFT){\
+ OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
+ OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
+ OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
+}\
+\
+
+#define RV40_MC(OPNAME, SIZE) \
+static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 20, 5);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
+}\
+\
+static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+ uint8_t full[SIZE*(SIZE+5)];\
+ uint8_t * const full_mid= full + SIZE*2;\
+ put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
+ OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
+}\
+\
+
+#define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
+#define op_put(a, b) a = cm[b]
+
+RV40_LOWPASS(put_ , op_put)
+RV40_LOWPASS(avg_ , op_avg)
+
+#undef op_avg
+#undef op_put
+
+RV40_MC(put_, 8)
+RV40_MC(put_, 16)
+RV40_MC(avg_, 8)
+RV40_MC(avg_, 16)
+
+static const int rv40_bias[4][4] = {
+ { 0, 16, 32, 16 },
+ { 32, 28, 32, 28 },
+ { 0, 32, 16, 32 },
+ { 32, 28, 32, 28 }
+};
+
+#define RV40_CHROMA_MC(OPNAME, OP)\
+static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ int bias = rv40_bias[y>>1][x>>1];\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
+ OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
+ OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
+ OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}\
+\
+static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+ const int A=(8-x)*(8-y);\
+ const int B=( x)*(8-y);\
+ const int C=(8-x)*( y);\
+ const int D=( x)*( y);\
+ int i;\
+ int bias = rv40_bias[y>>1][x>>1];\
+ \
+ assert(x<8 && y<8 && x>=0 && y>=0);\
+\
+ if(D){\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
+ OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
+ OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
+ OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
+ OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
+ OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
+ OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
+ OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }else{\
+ const int E= B+C;\
+ const int step= C ? stride : 1;\
+ for(i=0; i<h; i++){\
+ OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
+ OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
+ OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
+ OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
+ OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
+ OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
+ OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
+ OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
+ dst+= stride;\
+ src+= stride;\
+ }\
+ }\
+}
+
+#define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
+#define op_put(a, b) a = ((b)>>6)
+
+RV40_CHROMA_MC(put_, op_put)
+RV40_CHROMA_MC(avg_, op_avg)
+
+void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx) {
+ c->put_rv40_qpel_pixels_tab[0][ 0] = c->put_h264_qpel_pixels_tab[0][0];
+ c->put_rv40_qpel_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
+ c->put_rv40_qpel_pixels_tab[0][ 2] = put_rv40_qpel16_mc20_c;
+ c->put_rv40_qpel_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
+ c->put_rv40_qpel_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
+ c->put_rv40_qpel_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
+ c->put_rv40_qpel_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
+ c->put_rv40_qpel_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
+ c->put_rv40_qpel_pixels_tab[0][ 8] = put_rv40_qpel16_mc02_c;
+ c->put_rv40_qpel_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
+ c->put_rv40_qpel_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
+ c->put_rv40_qpel_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
+ c->put_rv40_qpel_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
+ c->put_rv40_qpel_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
+ c->put_rv40_qpel_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 0] = c->avg_h264_qpel_pixels_tab[0][0];
+ c->avg_rv40_qpel_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 2] = avg_rv40_qpel16_mc20_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 8] = avg_rv40_qpel16_mc02_c;
+ c->avg_rv40_qpel_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
+ c->avg_rv40_qpel_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
+ c->avg_rv40_qpel_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
+ c->avg_rv40_qpel_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
+ c->avg_rv40_qpel_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
+ c->avg_rv40_qpel_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
+ c->put_rv40_qpel_pixels_tab[1][ 0] = c->put_h264_qpel_pixels_tab[1][0];
+ c->put_rv40_qpel_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
+ c->put_rv40_qpel_pixels_tab[1][ 2] = put_rv40_qpel8_mc20_c;
+ c->put_rv40_qpel_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
+ c->put_rv40_qpel_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
+ c->put_rv40_qpel_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
+ c->put_rv40_qpel_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
+ c->put_rv40_qpel_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
+ c->put_rv40_qpel_pixels_tab[1][ 8] = put_rv40_qpel8_mc02_c;
+ c->put_rv40_qpel_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
+ c->put_rv40_qpel_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
+ c->put_rv40_qpel_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
+ c->put_rv40_qpel_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
+ c->put_rv40_qpel_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
+ c->put_rv40_qpel_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 0] = c->avg_h264_qpel_pixels_tab[1][0];
+ c->avg_rv40_qpel_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 2] = avg_rv40_qpel8_mc20_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 8] = avg_rv40_qpel8_mc02_c;
+ c->avg_rv40_qpel_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
+ c->avg_rv40_qpel_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
+ c->avg_rv40_qpel_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
+ c->avg_rv40_qpel_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
+ c->avg_rv40_qpel_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
+ c->avg_rv40_qpel_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
+
+ c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_c;
+ c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_c;
+ c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_c;
+ c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_c;
+}
diff --git a/libavcodec/sh4/dsputil_align.c b/libavcodec/sh4/dsputil_align.c
index 7c49a06..d9286e1 100644
--- a/libavcodec/sh4/dsputil_align.c
+++ b/libavcodec/sh4/dsputil_align.c
@@ -272,36 +272,36 @@ static void op##_##rnd##_pixels##sz##_##xy (uint8_t * dest, const uint8_t * ref,
#define OP put
-DEFFUNC(put, rnd,o,8,OP_C,avg2)
-DEFFUNC(put, rnd,x,8,OP_X,avg2)
-DEFFUNC(put,no_rnd,x,8,OP_X,avg2)
-DEFFUNC(put, rnd,y,8,OP_Y,avg2)
-DEFFUNC(put,no_rnd,y,8,OP_Y,avg2)
+DEFFUNC(put, rnd,o,8,OP_C,avg32)
+DEFFUNC(put, rnd,x,8,OP_X,avg32)
+DEFFUNC(put,no_rnd,x,8,OP_X,avg32)
+DEFFUNC(put, rnd,y,8,OP_Y,avg32)
+DEFFUNC(put,no_rnd,y,8,OP_Y,avg32)
DEFFUNC(put, rnd,xy,8,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,8,OP_XY,PACK)
-DEFFUNC(put, rnd,o,16,OP_C,avg2)
-DEFFUNC(put, rnd,x,16,OP_X,avg2)
-DEFFUNC(put,no_rnd,x,16,OP_X,avg2)
-DEFFUNC(put, rnd,y,16,OP_Y,avg2)
-DEFFUNC(put,no_rnd,y,16,OP_Y,avg2)
+DEFFUNC(put, rnd,o,16,OP_C,avg32)
+DEFFUNC(put, rnd,x,16,OP_X,avg32)
+DEFFUNC(put,no_rnd,x,16,OP_X,avg32)
+DEFFUNC(put, rnd,y,16,OP_Y,avg32)
+DEFFUNC(put,no_rnd,y,16,OP_Y,avg32)
DEFFUNC(put, rnd,xy,16,OP_XY,PACK)
DEFFUNC(put,no_rnd,xy,16,OP_XY,PACK)
#undef OP
#define OP avg
-DEFFUNC(avg, rnd,o,8,OP_C,avg2)
-DEFFUNC(avg, rnd,x,8,OP_X,avg2)
-DEFFUNC(avg,no_rnd,x,8,OP_X,avg2)
-DEFFUNC(avg, rnd,y,8,OP_Y,avg2)
-DEFFUNC(avg,no_rnd,y,8,OP_Y,avg2)
+DEFFUNC(avg, rnd,o,8,OP_C,avg32)
+DEFFUNC(avg, rnd,x,8,OP_X,avg32)
+DEFFUNC(avg,no_rnd,x,8,OP_X,avg32)
+DEFFUNC(avg, rnd,y,8,OP_Y,avg32)
+DEFFUNC(avg,no_rnd,y,8,OP_Y,avg32)
DEFFUNC(avg, rnd,xy,8,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,8,OP_XY,PACK)
-DEFFUNC(avg, rnd,o,16,OP_C,avg2)
-DEFFUNC(avg, rnd,x,16,OP_X,avg2)
-DEFFUNC(avg,no_rnd,x,16,OP_X,avg2)
-DEFFUNC(avg, rnd,y,16,OP_Y,avg2)
-DEFFUNC(avg,no_rnd,y,16,OP_Y,avg2)
+DEFFUNC(avg, rnd,o,16,OP_C,avg32)
+DEFFUNC(avg, rnd,x,16,OP_X,avg32)
+DEFFUNC(avg,no_rnd,x,16,OP_X,avg32)
+DEFFUNC(avg, rnd,y,16,OP_Y,avg32)
+DEFFUNC(avg,no_rnd,y,16,OP_Y,avg32)
DEFFUNC(avg, rnd,xy,16,OP_XY,PACK)
DEFFUNC(avg,no_rnd,xy,16,OP_XY,PACK)
@@ -370,22 +370,22 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
#ifdef QPEL
#define dspfunc(PFX, IDX, NUM) \
- c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
- c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
- c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
- c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
- c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
- c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
- c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
- c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
- c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
- c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
- c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
- c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
- c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
- c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
- c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
- c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
+ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_sh4; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_sh4; \
+ c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_sh4; \
+ c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_sh4; \
+ c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_sh4; \
+ c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_sh4; \
+ c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_sh4; \
+ c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_sh4
dspfunc(put_qpel, 0, 16);
dspfunc(put_no_rnd_qpel, 0, 16);
@@ -407,21 +407,21 @@ void dsputil_init_align(DSPContext* c, AVCodecContext *avctx)
dspfunc(avg_h264_qpel, 2, 4);
#undef dspfunc
- c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_c;
- c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_c;
- c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_c;
- c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
- c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
- c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
-
- c->put_mspel_pixels_tab[0]= put_mspel8_mc00_c;
- c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
- c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
- c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
- c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
- c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
- c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
- c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
+ c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_sh4;
+ c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_sh4;
+ c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_sh4;
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_sh4;
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_sh4;
+ c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_sh4;
+
+ c->put_mspel_pixels_tab[0]= put_mspel8_mc00_sh4;
+ c->put_mspel_pixels_tab[1]= put_mspel8_mc10_sh4;
+ c->put_mspel_pixels_tab[2]= put_mspel8_mc20_sh4;
+ c->put_mspel_pixels_tab[3]= put_mspel8_mc30_sh4;
+ c->put_mspel_pixels_tab[4]= put_mspel8_mc02_sh4;
+ c->put_mspel_pixels_tab[5]= put_mspel8_mc12_sh4;
+ c->put_mspel_pixels_tab[6]= put_mspel8_mc22_sh4;
+ c->put_mspel_pixels_tab[7]= put_mspel8_mc32_sh4;
c->gmc1 = gmc1_c;
c->gmc = gmc_c;
diff --git a/libavcodec/sh4/dsputil_sh4.c b/libavcodec/sh4/dsputil_sh4.c
index e7e2de6..2c86a7a 100644
--- a/libavcodec/sh4/dsputil_sh4.c
+++ b/libavcodec/sh4/dsputil_sh4.c
@@ -22,16 +22,15 @@
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
+#include "sh4.h"
static void memzero_align8(void *dst,size_t size)
{
-#if defined(__SH4__) || defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
- (char*)dst+=size;
- size/=8*4;
- __asm__(
-#if defined(__SH4__)
- " fschg\n" //single float mode
-#endif
+ int fpscr;
+ fp_single_enter(fpscr);
+ dst = (char *)dst + size;
+ size /= 32;
+ __asm__ volatile (
" fldi0 fr0\n"
" fldi0 fr1\n"
" fschg\n" // double
@@ -42,35 +41,22 @@ static void memzero_align8(void *dst,size_t size)
" fmov dr0, at -%0\n"
" bf.s 1b\n"
" fmov dr0, at -%0\n"
-#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
" fschg" //back to single
-#endif
- : : "r"(dst),"r"(size): "memory" );
-#else
- double *d = dst;
- size/=8*4;
- do {
- d[0] = 0.0;
- d[1] = 0.0;
- d[2] = 0.0;
- d[3] = 0.0;
- d+=4;
- } while(--size);
-#endif
+ : "+r"(dst),"+r"(size) :: "memory" );
+ fp_single_leave(fpscr);
}
static void clear_blocks_sh4(DCTELEM *blocks)
{
-// if (((int)blocks&7)==0)
memzero_align8(blocks,sizeof(DCTELEM)*6*64);
}
-extern void idct_sh4(DCTELEM *block);
+void idct_sh4(DCTELEM *block);
static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
{
- idct_sh4(block);
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+ idct_sh4(block);
for(i=0;i<8;i++) {
dest[0] = cm[block[0]];
dest[1] = cm[block[1]];
@@ -86,9 +72,9 @@ static void idct_put(uint8_t *dest, int line_size, DCTELEM *block)
}
static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
{
- idct_sh4(block);
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
+ idct_sh4(block);
for(i=0;i<8;i++) {
dest[0] = cm[dest[0]+block[0]];
dest[1] = cm[dest[1]+block[1]];
@@ -103,7 +89,7 @@ static void idct_add(uint8_t *dest, int line_size, DCTELEM *block)
}
}
-extern void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_align(DSPContext* c, AVCodecContext *avctx);
void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
{
@@ -115,6 +101,6 @@ void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx)
c->idct_put = idct_put;
c->idct_add = idct_add;
c->idct = idct_sh4;
- c->idct_permutation_type= FF_NO_IDCT_PERM; //FF_SIMPLE_IDCT_PERM; //FF_LIBMPEG2_IDCT_PERM;
+ c->idct_permutation_type= FF_NO_IDCT_PERM;
}
}
diff --git a/libavcodec/sh4/idct_sh4.c b/libavcodec/sh4/idct_sh4.c
index b684e8f..5c461e7 100644
--- a/libavcodec/sh4/idct_sh4.c
+++ b/libavcodec/sh4/idct_sh4.c
@@ -21,6 +21,8 @@
*/
#include "libavcodec/dsputil.h"
+#include "sh4.h"
+
#define c1 1.38703984532214752434 /* sqrt(2)*cos(1*pi/16) */
#define c2 1.30656296487637657577 /* sqrt(2)*cos(2*pi/16) */
#define c3 1.17587560241935884520 /* sqrt(2)*cos(3*pi/16) */
@@ -51,9 +53,11 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
#undef c6
#undef c7
-#if defined(__SH4_SINGLE__) || defined(__SH4_SINGLE_ONLY__)
+#if 1
#define load_matrix(table) \
+ do { \
+ const float *t = table; \
__asm__ volatile( \
" fschg\n" \
" fmov @%0+,xd0\n" \
@@ -65,15 +69,13 @@ static const float odd_table[] __attribute__ ((aligned(8))) = {
" fmov @%0+,xd12\n" \
" fmov @%0+,xd14\n" \
" fschg\n" \
- :\
- : "r"(table)\
- : "0" \
- )
+ : "+r"(t) \
+ ); \
+ } while (0)
#define ftrv() \
__asm__ volatile("ftrv xmtrx,fv0" \
- : "=f"(fr0),"=f"(fr1),"=f"(fr2),"=f"(fr3) \
- : "0"(fr0), "1"(fr1), "2"(fr2), "3"(fr3) );
+ : "+f"(fr0),"+f"(fr1),"+f"(fr2),"+f"(fr3));
#define DEFREG \
register float fr0 __asm__("fr0"); \
@@ -136,10 +138,9 @@ void idct_sh4(DCTELEM *block)
int i;
float tblock[8*8],*fblock;
int ofs1,ofs2,ofs3;
+ int fpscr;
-#if defined(__SH4__)
-#error "FIXME!! change to single float"
-#endif
+ fp_single_enter(fpscr);
/* row */
@@ -168,10 +169,6 @@ void idct_sh4(DCTELEM *block)
i = 8;
-// ofs1 = sizeof(float)*1;
-// ofs2 = sizeof(float)*2;
-// ofs3 = sizeof(float)*3;
-
do {
float t0,t1,t2,t3;
fr0 = block[1];
@@ -252,9 +249,7 @@ void idct_sh4(DCTELEM *block)
block++;
} while(--i);
-#if defined(__SH4__)
-#error "FIXME!! change to double"
-#endif
+ fp_single_leave(fpscr);
}
#else
void idct_sh4(DCTELEM *block)
diff --git a/libavcodec/sh4/qpel.c b/libavcodec/sh4/qpel.c
index a75d22f..2069bd3 100644
--- a/libavcodec/sh4/qpel.c
+++ b/libavcodec/sh4/qpel.c
@@ -22,63 +22,6 @@
*/
#define PIXOP2(OPNAME, OP) \
-/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
-{\
- do {\
- OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
- src1+=src_stride1; \
- src2+=src_stride2; \
- dst+=dst_stride; \
- } while(--h); \
-}\
-\
-static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
-{\
- do {\
- OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
- src1+=src_stride1; \
- src2+=src_stride2; \
- dst+=dst_stride; \
- } while(--h); \
-}\
-\
-static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
-{\
- do {\
- OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
- src1+=src_stride1; \
- src2+=src_stride2; \
- dst+=dst_stride; \
- } while(--h); \
-}\
-\
-static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
-{\
- do {\
- OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
- src1+=src_stride1; \
- src2+=src_stride2; \
- dst+=dst_stride; \
- } while(--h); \
-}\
-\
-static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
-{\
- do {\
- OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
- OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
- OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
- src1+=src_stride1; \
- src2+=src_stride2; \
- dst+=dst_stride; \
- } while(--h); \
-}*/\
\
static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
@@ -472,7 +415,7 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
}
}
#define H264_CHROMA_MC(OPNAME, OP)\
-static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
@@ -494,7 +437,7 @@ static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
}while(--h);\
}\
\
-static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
@@ -520,7 +463,7 @@ static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
}while(--h);\
}\
\
-static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
+static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
const int A=(8-x)*(8-y);\
const int B=( x)*(8-y);\
const int C=(8-x)*( y);\
@@ -707,27 +650,27 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dst
}while(--w);\
}\
\
-static void OPNAME ## qpel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels8_c(dst, src, stride, 8);\
}\
\
-static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
}\
\
-static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
}\
\
-static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
}\
\
-static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\
@@ -735,31 +678,20 @@ static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
}\
\
-static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
copy_block9(full, src, 16, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
}\
\
-static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t half[64];\
copy_block9(full, src, 16, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
uint8_t halfHV[64];\
@@ -769,18 +701,7 @@ static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
uint8_t halfHV[64];\
@@ -790,18 +711,7 @@ static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_aligned(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
uint8_t halfHV[64];\
@@ -811,18 +721,7 @@ static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l4_aligned0(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
uint8_t halfHV[64];\
@@ -832,32 +731,21 @@ static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[72];\
uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
}\
-static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[72];\
uint8_t halfHV[64];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\
@@ -865,18 +753,7 @@ static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
-static void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[16*9];\
- uint8_t halfH[72];\
- uint8_t halfV[64];\
- uint8_t halfHV[64];\
- copy_block9(full, src, 16, stride, 9);\
- put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
- put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
- OPNAME ## pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);\
-}\
-static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[16*9];\
uint8_t halfH[72];\
copy_block9(full, src, 16, stride, 9);\
@@ -884,32 +761,32 @@ static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
-static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[72];\
put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
-static void OPNAME ## qpel16_mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels16_c(dst, src, stride, 16);\
}\
\
-static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
}\
\
-static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
}\
\
-static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
}\
\
-static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\
@@ -917,31 +794,20 @@ static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
}\
\
-static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
copy_block17(full, src, 24, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
}\
\
-static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t half[256];\
copy_block17(full, src, 24, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_aligned(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
uint8_t halfHV[256];\
@@ -951,18 +817,7 @@ static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_aligned0(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
uint8_t halfHV[256];\
@@ -972,18 +827,7 @@ static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_aligned(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
uint8_t halfHV[256];\
@@ -993,18 +837,7 @@ static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l4_aligned0(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
uint8_t halfHV[256];\
@@ -1014,32 +847,21 @@ static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[272];\
uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
}\
-static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[272];\
uint8_t halfHV[256];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\
@@ -1047,18 +869,7 @@ static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
-static void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
- uint8_t full[24*17];\
- uint8_t halfH[272];\
- uint8_t halfV[256];\
- uint8_t halfHV[256];\
- copy_block17(full, src, 24, stride, 17);\
- put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
- put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
- OPNAME ## pixels16_l2_aligned(dst, halfV, halfHV, stride, 16, 16, 16);\
-}\
-static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[24*17];\
uint8_t halfH[272];\
copy_block17(full, src, 24, stride, 17);\
@@ -1066,7 +877,7 @@ static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
-static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t halfH[272];\
put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
@@ -1332,27 +1143,27 @@ static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t
}\
#define H264_MC(OPNAME, SIZE) \
-static void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t half[SIZE*SIZE];\
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t half[SIZE*SIZE];\
@@ -1361,14 +1172,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t half[SIZE*SIZE];\
@@ -1377,7 +1188,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t halfH[SIZE*SIZE];\
@@ -1388,7 +1199,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t halfH[SIZE*SIZE];\
@@ -1399,7 +1210,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t halfH[SIZE*SIZE];\
@@ -1410,7 +1221,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
uint8_t halfH[SIZE*SIZE];\
@@ -1421,12 +1232,12 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
int16_t tmp[SIZE*(SIZE+5)];\
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
int16_t tmp[SIZE*(SIZE+5)];\
uint8_t halfH[SIZE*SIZE];\
uint8_t halfHV[SIZE*SIZE];\
@@ -1435,7 +1246,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
int16_t tmp[SIZE*(SIZE+5)];\
uint8_t halfH[SIZE*SIZE];\
uint8_t halfHV[SIZE*SIZE];\
@@ -1444,7 +1255,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
int16_t tmp[SIZE*(SIZE+5)];\
@@ -1456,7 +1267,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, i
OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
}\
\
-static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
uint8_t full[SIZE*(SIZE+5)];\
uint8_t * const full_mid= full + SIZE*2;\
int16_t tmp[SIZE*(SIZE+5)];\
@@ -1549,31 +1360,31 @@ static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
}while(--w);
}
-static void put_mspel8_mc00_c (uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){
put_pixels8_c(dst, src, stride, 8);
}
-static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
}
-static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
}
-static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){
uint8_t half[64];
wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
}
-static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
}
-static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
uint8_t halfV[64];
uint8_t halfHV[64];
@@ -1582,7 +1393,7 @@ static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
}
-static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
uint8_t halfV[64];
uint8_t halfHV[64];
@@ -1591,7 +1402,7 @@ static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
}
-static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
+static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){
uint8_t halfH[88];
wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
diff --git a/libavcodec/sh4/sh4.h b/libavcodec/sh4/sh4.h
new file mode 100644
index 0000000..5d46540
--- /dev/null
+++ b/libavcodec/sh4/sh4.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2008 Mans Rullgard <mans at mansr.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_SH4_SH4_H
+#define AVCODEC_SH4_SH4_H
+
+#ifdef __SH4__
+# define fp_single_enter(fpscr) \
+ do { \
+ __asm__ volatile ("sts fpscr, %0 \n\t" \
+ "and %1, %0 \n\t" \
+ "lds %0, fpscr \n\t" \
+ : "=&r"(fpscr) : "r"(~(1<<19))); \
+ } while (0)
+
+# define fp_single_leave(fpscr) \
+ do { \
+ __asm__ volatile ("or %1, %0 \n\t" \
+ "lds %0, fpscr \n\t" \
+ : "+r"(fpscr) : "r"(1<<19)); \
+ } while (0)
+#else
+# define fp_single_enter(fpscr) ((void)fpscr)
+# define fp_single_leave(fpscr)
+#endif
+
+#endif /* AVCODEC_SH4_SH4_H */
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index 62f5b24..2cf9e8d 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -31,6 +31,7 @@
*/
#include "avcodec.h"
#include "dsputil.h"
+#include "mathops.h"
#include "simple_idct.h"
#if 0
@@ -55,26 +56,6 @@
#define COL_SHIFT 20 // 6
#endif
-#if defined(ARCH_POWERPC_405)
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MAC16(rt, ra, rb) \
- __asm__ ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
-
-/* signed 16x16 -> 32 multiply */
-#define MUL16(rt, ra, rb) \
- __asm__ ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
-
-#else
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MAC16(rt, ra, rb) rt += (ra) * (rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MUL16(rt, ra, rb) rt = (ra) * (rb)
-
-#endif
-
static inline void idctRowCondDC (DCTELEM * row)
{
int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -137,13 +118,13 @@ static inline void idctRowCondDC (DCTELEM * row)
a2 -= W6 * row[2];
a3 -= W2 * row[2];
- MUL16(b0, W1, row[1]);
+ b0 = MUL16(W1, row[1]);
MAC16(b0, W3, row[3]);
- MUL16(b1, W3, row[1]);
+ b1 = MUL16(W3, row[1]);
MAC16(b1, -W7, row[3]);
- MUL16(b2, W5, row[1]);
+ b2 = MUL16(W5, row[1]);
MAC16(b2, -W1, row[3]);
- MUL16(b3, W7, row[1]);
+ b3 = MUL16(W7, row[1]);
MAC16(b3, -W5, row[3]);
#ifdef HAVE_FAST_64BIT
@@ -197,10 +178,10 @@ static inline void idctSparseColPut (uint8_t *dest, int line_size,
a2 += - W6*col[8*2];
a3 += - W2*col[8*2];
- MUL16(b0, W1, col[8*1]);
- MUL16(b1, W3, col[8*1]);
- MUL16(b2, W5, col[8*1]);
- MUL16(b3, W7, col[8*1]);
+ b0 = MUL16(W1, col[8*1]);
+ b1 = MUL16(W3, col[8*1]);
+ b2 = MUL16(W5, col[8*1]);
+ b3 = MUL16(W7, col[8*1]);
MAC16(b0, + W3, col[8*3]);
MAC16(b1, - W7, col[8*3]);
@@ -269,10 +250,10 @@ static inline void idctSparseColAdd (uint8_t *dest, int line_size,
a2 += - W6*col[8*2];
a3 += - W2*col[8*2];
- MUL16(b0, W1, col[8*1]);
- MUL16(b1, W3, col[8*1]);
- MUL16(b2, W5, col[8*1]);
- MUL16(b3, W7, col[8*1]);
+ b0 = MUL16(W1, col[8*1]);
+ b1 = MUL16(W3, col[8*1]);
+ b2 = MUL16(W5, col[8*1]);
+ b3 = MUL16(W7, col[8*1]);
MAC16(b0, + W3, col[8*3]);
MAC16(b1, - W7, col[8*3]);
@@ -339,10 +320,10 @@ static inline void idctSparseCol (DCTELEM * col)
a2 += - W6*col[8*2];
a3 += - W2*col[8*2];
- MUL16(b0, W1, col[8*1]);
- MUL16(b1, W3, col[8*1]);
- MUL16(b2, W5, col[8*1]);
- MUL16(b3, W7, col[8*1]);
+ b0 = MUL16(W1, col[8*1]);
+ b1 = MUL16(W3, col[8*1]);
+ b2 = MUL16(W5, col[8*1]);
+ b3 = MUL16(W7, col[8*1]);
MAC16(b0, + W3, col[8*3]);
MAC16(b1, - W7, col[8*3]);
diff --git a/libavcodec/smacker.c b/libavcodec/smacker.c
index 1690518..bcdf792 100644
--- a/libavcodec/smacker.c
+++ b/libavcodec/smacker.c
@@ -457,8 +457,8 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const
case 2:
for(i = 0; i < 2; i++) {
uint16_t pix1, pix2;
- pix1 = smk_get_code(&gb, smk->full_tbl, smk->full_last);
pix2 = smk_get_code(&gb, smk->full_tbl, smk->full_last);
+ pix1 = smk_get_code(&gb, smk->full_tbl, smk->full_last);
AV_WL16(out,pix1);
AV_WL16(out+2,pix2);
out += stride;
@@ -559,6 +559,7 @@ static av_cold int decode_end(AVCodecContext *avctx)
static av_cold int smka_decode_init(AVCodecContext *avctx)
{
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index c5c73b1..e3e5aa9 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -488,6 +488,8 @@ typedef struct SnowContext{
slice_buffer sb;
MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
+
+ uint8_t *scratchbuf;
}SnowContext;
typedef struct {
@@ -496,7 +498,7 @@ typedef struct {
IDWTELEM *b2;
IDWTELEM *b3;
int y;
-} dwt_compose_t;
+} DWTCompose;
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
@@ -1078,19 +1080,19 @@ static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int
}
}
-static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
+static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
cs->y = -1;
}
-static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
+static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
cs->b0 = buffer + mirror(-1-1, height-1)*stride;
cs->b1 = buffer + mirror(-1 , height-1)*stride;
cs->y = -1;
}
-static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
+static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
int y= cs->y;
IDWTELEM *b0= cs->b0;
@@ -1109,7 +1111,7 @@ static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb,
cs->y += 2;
}
-static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
int y= cs->y;
IDWTELEM *b0= cs->b0;
IDWTELEM *b1= cs->b1;
@@ -1128,7 +1130,7 @@ static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width
}
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
- dwt_compose_t cs;
+ DWTCompose cs;
spatial_compose53i_init(&cs, buffer, height, stride);
while(cs.y <= height)
spatial_compose53i_dy(&cs, buffer, width, height, stride);
@@ -1196,7 +1198,7 @@ void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTE
}
}
-static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
+static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
@@ -1204,7 +1206,7 @@ static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * s
cs->y = -3;
}
-static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
+static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
cs->b0 = buffer + mirror(-3-1, height-1)*stride;
cs->b1 = buffer + mirror(-3 , height-1)*stride;
cs->b2 = buffer + mirror(-3+1, height-1)*stride;
@@ -1212,7 +1214,7 @@ static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int hei
cs->y = -3;
}
-static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
+static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
int y = cs->y;
IDWTELEM *b0= cs->b0;
@@ -1241,7 +1243,7 @@ static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, s
cs->y += 2;
}
-static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
+static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
int y = cs->y;
IDWTELEM *b0= cs->b0;
IDWTELEM *b1= cs->b1;
@@ -1266,13 +1268,13 @@ static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width
}
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
- dwt_compose_t cs;
+ DWTCompose cs;
spatial_compose97i_init(&cs, buffer, height, stride);
while(cs.y <= height)
spatial_compose97i_dy(&cs, buffer, width, height, stride);
}
-static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
+static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
int level;
for(level=decomposition_count-1; level>=0; level--){
switch(type){
@@ -1282,7 +1284,7 @@ static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb,
}
}
-static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
int level;
for(level=decomposition_count-1; level>=0; level--){
switch(type){
@@ -1292,7 +1294,7 @@ static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width,
}
}
-static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
const int support = type==1 ? 3 : 5;
int level;
if(type==2) return;
@@ -1309,7 +1311,7 @@ static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width
}
}
-static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
const int support = type==1 ? 3 : 5;
int level;
if(type==2) return;
@@ -1327,7 +1329,7 @@ static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, s
}
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
- dwt_compose_t cs[MAX_DECOMPOSITIONS];
+ DWTCompose cs[MAX_DECOMPOSITIONS];
int y;
ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
for(y=0; y<height; y+=4)
@@ -2423,7 +2425,7 @@ static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer
BlockNode *rb= lb+1;
uint8_t *block[4];
int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
- uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
+ uint8_t *tmp = s->scratchbuf;
uint8_t *ptmp;
int x,y;
@@ -2785,7 +2787,7 @@ static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, con
uint8_t *dst= s->current_picture.data[plane_index];
uint8_t *src= s-> input_picture.data[plane_index];
IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
- uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
+ uint8_t *cur = s->scratchbuf;
uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
const int b_stride = s->b_width << s->block_max_depth;
const int b_height = s->b_height<< s->block_max_depth;
@@ -3703,6 +3705,7 @@ static av_cold int common_init(AVCodecContext *avctx){
scale_mv_ref[i][j] = 256*(i+1)/(j+1);
s->avctx->get_buffer(s->avctx, &s->mconly_picture);
+ s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
return 0;
}
@@ -4004,6 +4007,7 @@ static av_cold int encode_init(AVCodecContext *avctx)
s->m.flags = avctx->flags;
s->m.bit_rate= avctx->bit_rate;
+ s->m.me.temp =
s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
@@ -4432,12 +4436,14 @@ static av_cold void common_end(SnowContext *s){
av_freep(&s->spatial_dwt_buffer);
av_freep(&s->spatial_idwt_buffer);
+ s->m.me.temp= NULL;
av_freep(&s->m.me.scratchpad);
av_freep(&s->m.me.map);
av_freep(&s->m.me.score_map);
av_freep(&s->m.obmc_scratchpad);
av_freep(&s->block);
+ av_freep(&s->scratchbuf);
for(i=0; i<MAX_REF_FRAMES; i++){
av_freep(&s->ref_mvs[i]);
@@ -4544,7 +4550,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const
const int block_size = MB_SIZE >> s->block_max_depth;
const int block_w = plane_index ? block_size/2 : block_size;
int mb_y;
- dwt_compose_t cs[MAX_DECOMPOSITIONS];
+ DWTCompose cs[MAX_DECOMPOSITIONS];
int yd=0, yq=0;
int y;
int end_y;
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 6f0d79f..b208e5f 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -121,16 +121,16 @@ struct slice_buffer_s {
#define W_DS 9
#endif
-extern void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
-extern void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
-extern void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
+void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
+void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
#ifdef CONFIG_SNOW_ENCODER
int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h);
#else
-static int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0);}
-static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0);}
+static int w53_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0); return 0;}
+static int w97_32_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {assert (0); return 0;}
#endif
/* C bits used by mmx/sse2/altivec */
diff --git a/libavcodec/sparc/dsputil_vis.c b/libavcodec/sparc/dsputil_vis.c
index a01eea3..0779395 100644
--- a/libavcodec/sparc/dsputil_vis.c
+++ b/libavcodec/sparc/dsputil_vis.c
@@ -31,9 +31,9 @@
#include "vis.h"
-extern void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data);
-extern void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data);
-extern void ff_simple_idct_vis(DCTELEM *data);
+void ff_simple_idct_put_vis(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_simple_idct_add_vis(uint8_t *dest, int line_size, DCTELEM *data);
+void ff_simple_idct_vis(DCTELEM *data);
/* The trick used in some of this file is the formula from the MMX
* motion comp code, which is:
diff --git a/libavcodec/sparc/simple_idct_vis.c b/libavcodec/sparc/simple_idct_vis.c
index 1581ee1..b78bc66 100644
--- a/libavcodec/sparc/simple_idct_vis.c
+++ b/libavcodec/sparc/simple_idct_vis.c
@@ -384,7 +384,7 @@ static const DECLARE_ALIGNED_8(uint16_t, expand[4]) = {
"st %%f14, [%12+" dest "] \n\t"\
-inline void ff_simple_idct_vis(DCTELEM *data) {
+void ff_simple_idct_vis(DCTELEM *data) {
int out1, out2, out3, out4;
DECLARE_ALIGNED_8(int16_t, temp[8*8]);
diff --git a/libavcodec/svq1.c b/libavcodec/svq1.c
index a499183..790bbb3 100644
--- a/libavcodec/svq1.c
+++ b/libavcodec/svq1.c
@@ -37,7 +37,7 @@
#include "svq1_vlc.h"
/* standard video sizes */
-const svq1_frame_size_t ff_svq1_frame_size_table[8] = {
+const struct svq1_frame_size ff_svq1_frame_size_table[8] = {
{ 160, 120 }, { 128, 96 }, { 176, 144 }, { 352, 288 },
{ 704, 576 }, { 240, 180 }, { 320, 240 }, { -1, -1 }
};
diff --git a/libavcodec/svq1.h b/libavcodec/svq1.h
index a8469d9..a4b5a16 100644
--- a/libavcodec/svq1.h
+++ b/libavcodec/svq1.h
@@ -42,10 +42,10 @@
#define SVQ1_BLOCK_INTER_4V 2
#define SVQ1_BLOCK_INTRA 3
-typedef struct {
+struct svq1_frame_size {
int width;
int height;
-} svq1_frame_size_t;
+};
uint16_t ff_svq1_packet_checksum (const uint8_t *data, const int length,
int value);
@@ -59,6 +59,6 @@ extern const uint8_t ff_svq1_inter_multistage_vlc[6][8][2];
extern const uint16_t ff_svq1_intra_mean_vlc[256][2];
extern const uint16_t ff_svq1_inter_mean_vlc[512][2];
-extern const svq1_frame_size_t ff_svq1_frame_size_table[8];
+extern const struct svq1_frame_size ff_svq1_frame_size_table[8];
#endif /* AVCODEC_SVQ1_H */
diff --git a/libavcodec/svq1dec.c b/libavcodec/svq1dec.c
index 8f399ab..d306149 100644
--- a/libavcodec/svq1dec.c
+++ b/libavcodec/svq1dec.c
@@ -56,7 +56,7 @@ static VLC svq1_inter_mean;
typedef struct svq1_pmv_s {
int x;
int y;
-} svq1_pmv_t;
+} svq1_pmv;
static const uint16_t checksum_table[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
@@ -301,7 +301,7 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
return 0;
}
-static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
+static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv *mv, svq1_pmv **pmv) {
int diff;
int i;
@@ -342,11 +342,11 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int
static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
uint8_t *current, uint8_t *previous, int pitch,
- svq1_pmv_t *motion, int x, int y) {
+ svq1_pmv *motion, int x, int y) {
uint8_t *src;
uint8_t *dst;
- svq1_pmv_t mv;
- svq1_pmv_t *pmv[3];
+ svq1_pmv mv;
+ svq1_pmv *pmv[3];
int result;
/* predict and decode motion vector */
@@ -394,11 +394,11 @@ static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
uint8_t *current, uint8_t *previous, int pitch,
- svq1_pmv_t *motion,int x, int y) {
+ svq1_pmv *motion,int x, int y) {
uint8_t *src;
uint8_t *dst;
- svq1_pmv_t mv;
- svq1_pmv_t *pmv[4];
+ svq1_pmv mv;
+ svq1_pmv *pmv[4];
int i, result;
/* predict and decode motion vector (0) */
@@ -484,7 +484,7 @@ static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
uint8_t *current, uint8_t *previous, int pitch,
- svq1_pmv_t *motion, int x, int y) {
+ svq1_pmv *motion, int x, int y) {
uint32_t block_type;
int result = 0;
@@ -727,9 +727,9 @@ static int svq1_decode_frame(AVCodecContext *avctx,
current += 16*linesize;
}
} else {
- svq1_pmv_t pmv[width/8+3];
+ svq1_pmv pmv[width/8+3];
/* delta frame */
- memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
+ memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv));
for (y=0; y < height; y+=16) {
for (x=0; x < width; x+=16) {
diff --git a/libavcodec/svq1enc.c b/libavcodec/svq1enc.c
index 8d0bca5..49ad3d3 100644
--- a/libavcodec/svq1enc.c
+++ b/libavcodec/svq1enc.c
@@ -67,6 +67,8 @@ typedef struct SVQ1Context {
int16_t (*motion_val16[3])[2];
int64_t rd_total;
+
+ uint8_t *scratchbuf;
} SVQ1Context;
static void svq1_write_header(SVQ1Context *s, int frame_type)
@@ -378,7 +380,7 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
uint8_t *decoded= decoded_plane + offset;
uint8_t *ref= ref_plane + offset;
int score[4]={0,0,0,0}, best;
- uint8_t temp[16*stride];
+ uint8_t *temp = s->scratchbuf;
if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
@@ -524,6 +526,7 @@ static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
if(!s->current_picture.data[0]){
avctx->get_buffer(avctx, &s->current_picture);
avctx->get_buffer(avctx, &s->last_picture);
+ s->scratchbuf = av_malloc(s->current_picture.linesize[0] * 16);
}
temp= s->current_picture;
@@ -566,6 +569,7 @@ static av_cold int svq1_encode_end(AVCodecContext *avctx)
av_freep(&s->m.me.score_map);
av_freep(&s->mb_type);
av_freep(&s->dummy);
+ av_freep(&s->scratchbuf);
for(i=0; i<3; i++){
av_freep(&s->motion_val8[i]);
diff --git a/libavcodec/svq3.c b/libavcodec/svq3.c
index 1da6802..f438ba8 100644
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -37,7 +37,7 @@
*
* You will know you have these parameters passed correctly when the decoder
* correctly decodes this file:
- * ftp://ftp.mplayerhq.hu/MPlayer/samples/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
+ * http://samples.mplayerhq.hu/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
*/
#ifdef CONFIG_ZLIB
@@ -65,93 +65,96 @@
/
o-->o-->o-->o
*/
-static const uint8_t svq3_scan[16]={
- 0+0*4, 1+0*4, 2+0*4, 2+1*4,
- 2+2*4, 3+0*4, 3+1*4, 3+2*4,
- 0+1*4, 0+2*4, 1+1*4, 1+2*4,
- 0+3*4, 1+3*4, 2+3*4, 3+3*4,
+static const uint8_t svq3_scan[16] = {
+ 0+0*4, 1+0*4, 2+0*4, 2+1*4,
+ 2+2*4, 3+0*4, 3+1*4, 3+2*4,
+ 0+1*4, 0+2*4, 1+1*4, 1+2*4,
+ 0+3*4, 1+3*4, 2+3*4, 3+3*4,
};
static const uint8_t svq3_pred_0[25][2] = {
- { 0, 0 },
- { 1, 0 }, { 0, 1 },
- { 0, 2 }, { 1, 1 }, { 2, 0 },
- { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
- { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
- { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
- { 2, 4 }, { 3, 3 }, { 4, 2 },
- { 4, 3 }, { 3, 4 },
- { 4, 4 }
+ { 0, 0 },
+ { 1, 0 }, { 0, 1 },
+ { 0, 2 }, { 1, 1 }, { 2, 0 },
+ { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
+ { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
+ { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
+ { 2, 4 }, { 3, 3 }, { 4, 2 },
+ { 4, 3 }, { 3, 4 },
+ { 4, 4 }
};
static const int8_t svq3_pred_1[6][6][5] = {
- { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
- { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
- { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
- { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
- { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
- { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
- { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
- { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
- { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
- { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
- { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
- { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
+ { { 2,-1,-1,-1,-1 }, { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 },
+ { 2, 1,-1,-1,-1 }, { 1, 2,-1,-1,-1 }, { 1, 2,-1,-1,-1 } },
+ { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 4, 3 }, { 0, 1, 2, 4, 3 },
+ { 0, 2, 1, 4, 3 }, { 2, 0, 1, 3, 4 }, { 0, 4, 2, 1, 3 } },
+ { { 2, 0,-1,-1,-1 }, { 2, 1, 0, 4, 3 }, { 1, 2, 4, 0, 3 },
+ { 2, 1, 0, 4, 3 }, { 2, 1, 4, 3, 0 }, { 1, 2, 4, 0, 3 } },
+ { { 2, 0,-1,-1,-1 }, { 2, 0, 1, 4, 3 }, { 1, 2, 0, 4, 3 },
+ { 2, 1, 0, 4, 3 }, { 2, 1, 3, 4, 0 }, { 2, 4, 1, 0, 3 } },
+ { { 0, 2,-1,-1,-1 }, { 0, 2, 1, 3, 4 }, { 1, 2, 3, 0, 4 },
+ { 2, 0, 1, 3, 4 }, { 2, 1, 3, 0, 4 }, { 2, 0, 4, 3, 1 } },
+ { { 0, 2,-1,-1,-1 }, { 0, 2, 4, 1, 3 }, { 1, 4, 2, 0, 3 },
+ { 4, 2, 0, 1, 3 }, { 2, 0, 1, 4, 3 }, { 4, 2, 1, 0, 3 } },
};
static const struct { uint8_t run; uint8_t level; } svq3_dct_tables[2][16] = {
- { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
- { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
- { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
- { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
+ { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
+ { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
+ { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
+ { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
};
static const uint32_t svq3_dequant_coeff[32] = {
- 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
- 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
- 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
- 61694, 68745, 77615, 89113,100253,109366,126635,141533
+ 3881, 4351, 4890, 5481, 6154, 6914, 7761, 8718,
+ 9781, 10987, 12339, 13828, 15523, 17435, 19561, 21873,
+ 24552, 27656, 30847, 34870, 38807, 43747, 49103, 54683,
+ 61694, 68745, 77615, 89113,100253,109366,126635,141533
};
-static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
- const int qmul= svq3_dequant_coeff[qp];
+static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp)
+{
+ const int qmul = svq3_dequant_coeff[qp];
#define stride 16
int i;
int temp[16];
- static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
- static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
-
- for(i=0; i<4; i++){
- const int offset= y_offset[i];
- const int z0= 13*(block[offset+stride*0] + block[offset+stride*4]);
- const int z1= 13*(block[offset+stride*0] - block[offset+stride*4]);
- const int z2= 7* block[offset+stride*1] - 17*block[offset+stride*5];
- const int z3= 17* block[offset+stride*1] + 7*block[offset+stride*5];
-
- temp[4*i+0]= z0+z3;
- temp[4*i+1]= z1+z2;
- temp[4*i+2]= z1-z2;
- temp[4*i+3]= z0-z3;
+ static const int x_offset[4] = {0, 1*stride, 4* stride, 5*stride};
+ static const int y_offset[4] = {0, 2*stride, 8* stride, 10*stride};
+
+ for (i = 0; i < 4; i++){
+ const int offset = y_offset[i];
+ const int z0 = 13*(block[offset+stride*0] + block[offset+stride*4]);
+ const int z1 = 13*(block[offset+stride*0] - block[offset+stride*4]);
+ const int z2 = 7* block[offset+stride*1] - 17*block[offset+stride*5];
+ const int z3 = 17* block[offset+stride*1] + 7*block[offset+stride*5];
+
+ temp[4*i+0] = z0+z3;
+ temp[4*i+1] = z1+z2;
+ temp[4*i+2] = z1-z2;
+ temp[4*i+3] = z0-z3;
}
- for(i=0; i<4; i++){
- const int offset= x_offset[i];
- const int z0= 13*(temp[4*0+i] + temp[4*2+i]);
- const int z1= 13*(temp[4*0+i] - temp[4*2+i]);
- const int z2= 7* temp[4*1+i] - 17*temp[4*3+i];
- const int z3= 17* temp[4*1+i] + 7*temp[4*3+i];
-
- block[stride*0 +offset]= ((z0 + z3)*qmul + 0x80000)>>20;
- block[stride*2 +offset]= ((z1 + z2)*qmul + 0x80000)>>20;
- block[stride*8 +offset]= ((z1 - z2)*qmul + 0x80000)>>20;
- block[stride*10+offset]= ((z0 - z3)*qmul + 0x80000)>>20;
+ for (i = 0; i < 4; i++){
+ const int offset = x_offset[i];
+ const int z0 = 13*(temp[4*0+i] + temp[4*2+i]);
+ const int z1 = 13*(temp[4*0+i] - temp[4*2+i]);
+ const int z2 = 7* temp[4*1+i] - 17*temp[4*3+i];
+ const int z3 = 17* temp[4*1+i] + 7*temp[4*3+i];
+
+ block[stride*0 +offset] = ((z0 + z3)*qmul + 0x80000) >> 20;
+ block[stride*2 +offset] = ((z1 + z2)*qmul + 0x80000) >> 20;
+ block[stride*8 +offset] = ((z1 - z2)*qmul + 0x80000) >> 20;
+ block[stride*10+offset] = ((z0 - z3)*qmul + 0x80000) >> 20;
}
}
#undef stride
-static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, int dc){
- const int qmul= svq3_dequant_coeff[qp];
+static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp,
+ int dc)
+{
+ const int qmul = svq3_dequant_coeff[qp];
int i;
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -160,859 +163,879 @@ static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, i
block[0] = 0;
}
- for (i=0; i < 4; i++) {
- const int z0= 13*(block[0 + 4*i] + block[2 + 4*i]);
- const int z1= 13*(block[0 + 4*i] - block[2 + 4*i]);
- const int z2= 7* block[1 + 4*i] - 17*block[3 + 4*i];
- const int z3= 17* block[1 + 4*i] + 7*block[3 + 4*i];
+ for (i = 0; i < 4; i++) {
+ const int z0 = 13*(block[0 + 4*i] + block[2 + 4*i]);
+ const int z1 = 13*(block[0 + 4*i] - block[2 + 4*i]);
+ const int z2 = 7* block[1 + 4*i] - 17*block[3 + 4*i];
+ const int z3 = 17* block[1 + 4*i] + 7*block[3 + 4*i];
- block[0 + 4*i]= z0 + z3;
- block[1 + 4*i]= z1 + z2;
- block[2 + 4*i]= z1 - z2;
- block[3 + 4*i]= z0 - z3;
+ block[0 + 4*i] = z0 + z3;
+ block[1 + 4*i] = z1 + z2;
+ block[2 + 4*i] = z1 - z2;
+ block[3 + 4*i] = z0 - z3;
}
- for (i=0; i < 4; i++) {
- const int z0= 13*(block[i + 4*0] + block[i + 4*2]);
- const int z1= 13*(block[i + 4*0] - block[i + 4*2]);
- const int z2= 7* block[i + 4*1] - 17*block[i + 4*3];
- const int z3= 17* block[i + 4*1] + 7*block[i + 4*3];
- const int rr= (dc + 0x80000);
-
- dst[i + stride*0]= cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
- dst[i + stride*1]= cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
- dst[i + stride*2]= cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
- dst[i + stride*3]= cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
+ for (i = 0; i < 4; i++) {
+ const int z0 = 13*(block[i + 4*0] + block[i + 4*2]);
+ const int z1 = 13*(block[i + 4*0] - block[i + 4*2]);
+ const int z2 = 7* block[i + 4*1] - 17*block[i + 4*3];
+ const int z3 = 17* block[i + 4*1] + 7*block[i + 4*3];
+ const int rr = (dc + 0x80000);
+
+ dst[i + stride*0] = cm[ dst[i + stride*0] + (((z0 + z3)*qmul + rr) >> 20) ];
+ dst[i + stride*1] = cm[ dst[i + stride*1] + (((z1 + z2)*qmul + rr) >> 20) ];
+ dst[i + stride*2] = cm[ dst[i + stride*2] + (((z1 - z2)*qmul + rr) >> 20) ];
+ dst[i + stride*3] = cm[ dst[i + stride*3] + (((z0 - z3)*qmul + rr) >> 20) ];
}
}
-static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
- int index, const int type) {
+static inline int svq3_decode_block(GetBitContext *gb, DCTELEM *block,
+ int index, const int type)
+{
+ static const uint8_t *const scan_patterns[4] =
+ { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
- static const uint8_t *const scan_patterns[4] =
- { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
+ int run, level, sign, vlc, limit;
+ const int intra = (3 * type) >> 2;
+ const uint8_t *const scan = scan_patterns[type];
- int run, level, sign, vlc, limit;
- const int intra = (3 * type) >> 2;
- const uint8_t *const scan = scan_patterns[type];
+ for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
+ for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
- for (limit=(16 >> intra); index < 16; index=limit, limit+=8) {
- for (; (vlc = svq3_get_ue_golomb (gb)) != 0; index++) {
+ if (vlc == INVALID_VLC)
+ return -1;
- if (vlc == INVALID_VLC)
- return -1;
+ sign = (vlc & 0x1) - 1;
+ vlc = (vlc + 1) >> 1;
+
+ if (type == 3) {
+ if (vlc < 3) {
+ run = 0;
+ level = vlc;
+ } else if (vlc < 4) {
+ run = 1;
+ level = 1;
+ } else {
+ run = (vlc & 0x3);
+ level = ((vlc + 9) >> 2) - run;
+ }
+ } else {
+ if (vlc < 16) {
+ run = svq3_dct_tables[intra][vlc].run;
+ level = svq3_dct_tables[intra][vlc].level;
+ } else if (intra) {
+ run = (vlc & 0x7);
+ level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
+ } else {
+ run = (vlc & 0xF);
+ level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
+ }
+ }
- sign = (vlc & 0x1) - 1;
- vlc = (vlc + 1) >> 1;
+ if ((index += run) >= limit)
+ return -1;
- if (type == 3) {
- if (vlc < 3) {
- run = 0;
- level = vlc;
- } else if (vlc < 4) {
- run = 1;
- level = 1;
- } else {
- run = (vlc & 0x3);
- level = ((vlc + 9) >> 2) - run;
+ block[scan[index]] = (level ^ sign) - sign;
}
- } else {
- if (vlc < 16) {
- run = svq3_dct_tables[intra][vlc].run;
- level = svq3_dct_tables[intra][vlc].level;
- } else if (intra) {
- run = (vlc & 0x7);
- level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
- } else {
- run = (vlc & 0xF);
- level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
- }
- }
-
- if ((index += run) >= limit)
- return -1;
- block[scan[index]] = (level ^ sign) - sign;
- }
-
- if (type != 2) {
- break;
+ if (type != 2) {
+ break;
+ }
}
- }
- return 0;
+ return 0;
}
-static inline void svq3_mc_dir_part (MpegEncContext *s,
- int x, int y, int width, int height,
- int mx, int my, int dxy,
- int thirdpel, int dir, int avg) {
+static inline void svq3_mc_dir_part(MpegEncContext *s,
+ int x, int y, int width, int height,
+ int mx, int my, int dxy,
+ int thirdpel, int dir, int avg)
+{
+ const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
+ uint8_t *src, *dest;
+ int i, emu = 0;
+ int blocksize = 2 - (width>>3); //16->0, 8->1, 4->2
- const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
- uint8_t *src, *dest;
- int i, emu = 0;
- int blocksize= 2 - (width>>3); //16->0, 8->1, 4->2
+ mx += x;
+ my += y;
- mx += x;
- my += y;
+ if (mx < 0 || mx >= (s->h_edge_pos - width - 1) ||
+ my < 0 || my >= (s->v_edge_pos - height - 1)) {
- if (mx < 0 || mx >= (s->h_edge_pos - width - 1) ||
- my < 0 || my >= (s->v_edge_pos - height - 1)) {
+ if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
+ emu = 1;
+ }
- if ((s->flags & CODEC_FLAG_EMU_EDGE)) {
- emu = 1;
+ mx = av_clip (mx, -16, (s->h_edge_pos - width + 15));
+ my = av_clip (my, -16, (s->v_edge_pos - height + 15));
}
- mx = av_clip (mx, -16, (s->h_edge_pos - width + 15));
- my = av_clip (my, -16, (s->v_edge_pos - height + 15));
- }
-
- /* form component predictions */
- dest = s->current_picture.data[0] + x + y*s->linesize;
- src = pic->data[0] + mx + my*s->linesize;
-
- if (emu) {
- ff_emulated_edge_mc (s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
- mx, my, s->h_edge_pos, s->v_edge_pos);
- src = s->edge_emu_buffer;
- }
- if(thirdpel)
- (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->linesize, width, height);
- else
- (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);
-
- if (!(s->flags & CODEC_FLAG_GRAY)) {
- mx = (mx + (mx < (int) x)) >> 1;
- my = (my + (my < (int) y)) >> 1;
- width = (width >> 1);
- height = (height >> 1);
- blocksize++;
-
- for (i=1; i < 3; i++) {
- dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
- src = pic->data[i] + mx + my*s->uvlinesize;
-
- if (emu) {
- ff_emulated_edge_mc (s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
- mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
+ /* form component predictions */
+ dest = s->current_picture.data[0] + x + y*s->linesize;
+ src = pic->data[0] + mx + my*s->linesize;
+
+ if (emu) {
+ ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
+ mx, my, s->h_edge_pos, s->v_edge_pos);
src = s->edge_emu_buffer;
- }
- if(thirdpel)
- (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->uvlinesize, width, height);
- else
- (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->uvlinesize, height);
}
- }
+ if (thirdpel)
+ (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->linesize, width, height);
+ else
+ (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);
+
+ if (!(s->flags & CODEC_FLAG_GRAY)) {
+ mx = (mx + (mx < (int) x)) >> 1;
+ my = (my + (my < (int) y)) >> 1;
+ width = (width >> 1);
+ height = (height >> 1);
+ blocksize++;
+
+ for (i = 1; i < 3; i++) {
+ dest = s->current_picture.data[i] + (x >> 1) + (y >> 1)*s->uvlinesize;
+ src = pic->data[i] + mx + my*s->uvlinesize;
+
+ if (emu) {
+ ff_emulated_edge_mc(s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
+ mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
+ src = s->edge_emu_buffer;
+ }
+ if (thirdpel)
+ (avg ? s->dsp.avg_tpel_pixels_tab : s->dsp.put_tpel_pixels_tab)[dxy](dest, src, s->uvlinesize, width, height);
+ else
+ (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->uvlinesize, height);
+ }
+ }
}
-static inline int svq3_mc_dir (H264Context *h, int size, int mode, int dir, int avg) {
-
- int i, j, k, mx, my, dx, dy, x, y;
- MpegEncContext *const s = (MpegEncContext *) h;
- const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
- const int part_height = 16 >> ((unsigned) (size + 1) / 3);
- const int extra_width = (mode == PREDICT_MODE) ? -16*6 : 0;
- const int h_edge_pos = 6*(s->h_edge_pos - part_width ) - extra_width;
- const int v_edge_pos = 6*(s->v_edge_pos - part_height) - extra_width;
-
- for (i=0; i < 16; i+=part_height) {
- for (j=0; j < 16; j+=part_width) {
- const int b_xy = (4*s->mb_x+(j>>2)) + (4*s->mb_y+(i>>2))*h->b_stride;
- int dxy;
- x = 16*s->mb_x + j;
- y = 16*s->mb_y + i;
- k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
-
- if (mode != PREDICT_MODE) {
- pred_motion (h, k, (part_width >> 2), dir, 1, &mx, &my);
- } else {
- mx = s->next_picture.motion_val[0][b_xy][0]<<1;
- my = s->next_picture.motion_val[0][b_xy][1]<<1;
-
- if (dir == 0) {
- mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
- my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
- } else {
- mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
- my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
+static inline int svq3_mc_dir(H264Context *h, int size, int mode, int dir,
+ int avg)
+{
+ int i, j, k, mx, my, dx, dy, x, y;
+ MpegEncContext *const s = (MpegEncContext *) h;
+ const int part_width = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
+ const int part_height = 16 >> ((unsigned) (size + 1) / 3);
+ const int extra_width = (mode == PREDICT_MODE) ? -16*6 : 0;
+ const int h_edge_pos = 6*(s->h_edge_pos - part_width ) - extra_width;
+ const int v_edge_pos = 6*(s->v_edge_pos - part_height) - extra_width;
+
+ for (i = 0; i < 16; i += part_height) {
+ for (j = 0; j < 16; j += part_width) {
+ const int b_xy = (4*s->mb_x + (j >> 2)) + (4*s->mb_y + (i >> 2))*h->b_stride;
+ int dxy;
+ x = 16*s->mb_x + j;
+ y = 16*s->mb_y + i;
+ k = ((j >> 2) & 1) + ((i >> 1) & 2) + ((j >> 1) & 4) + (i & 8);
+
+ if (mode != PREDICT_MODE) {
+ pred_motion(h, k, (part_width >> 2), dir, 1, &mx, &my);
+ } else {
+ mx = s->next_picture.motion_val[0][b_xy][0]<<1;
+ my = s->next_picture.motion_val[0][b_xy][1]<<1;
+
+ if (dir == 0) {
+ mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1) >> 1;
+ my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1) >> 1;
+ } else {
+ mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1) >> 1;
+ my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1) >> 1;
+ }
+ }
+
+ /* clip motion vector prediction to frame border */
+ mx = av_clip(mx, extra_width - 6*x, h_edge_pos - 6*x);
+ my = av_clip(my, extra_width - 6*y, v_edge_pos - 6*y);
+
+ /* get (optional) motion vector differential */
+ if (mode == PREDICT_MODE) {
+ dx = dy = 0;
+ } else {
+ dy = svq3_get_se_golomb(&s->gb);
+ dx = svq3_get_se_golomb(&s->gb);
+
+ if (dx == INVALID_VLC || dy == INVALID_VLC) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n");
+ return -1;
+ }
+ }
+
+ /* compute motion vector */
+ if (mode == THIRDPEL_MODE) {
+ int fx, fy;
+ mx = ((mx + 1)>>1) + dx;
+ my = ((my + 1)>>1) + dy;
+ fx = ((unsigned)(mx + 0x3000))/3 - 0x1000;
+ fy = ((unsigned)(my + 0x3000))/3 - 0x1000;
+ dxy = (mx - 3*fx) + 4*(my - 3*fy);
+
+ svq3_mc_dir_part(s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
+ mx += mx;
+ my += my;
+ } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
+ mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
+ my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
+ dxy = (mx&1) + 2*(my&1);
+
+ svq3_mc_dir_part(s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
+ mx *= 3;
+ my *= 3;
+ } else {
+ mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
+ my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
+
+ svq3_mc_dir_part(s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
+ mx *= 6;
+ my *= 6;
+ }
+
+ /* update mv_cache */
+ if (mode != PREDICT_MODE) {
+ int32_t mv = pack16to32(mx,my);
+
+ if (part_height == 8 && i < 8) {
+ *(int32_t *) h->mv_cache[dir][scan8[k] + 1*8] = mv;
+
+ if (part_width == 8 && j < 8) {
+ *(int32_t *) h->mv_cache[dir][scan8[k] + 1 + 1*8] = mv;
+ }
+ }
+ if (part_width == 8 && j < 8) {
+ *(int32_t *) h->mv_cache[dir][scan8[k] + 1] = mv;
+ }
+ if (part_width == 4 || part_height == 4) {
+ *(int32_t *) h->mv_cache[dir][scan8[k]] = mv;
+ }
+ }
+
+ /* write back motion vectors */
+ fill_rectangle(s->current_picture.motion_val[dir][b_xy], part_width>>2, part_height>>2, h->b_stride, pack16to32(mx,my), 4);
}
- }
-
- /* clip motion vector prediction to frame border */
- mx = av_clip (mx, extra_width - 6*x, h_edge_pos - 6*x);
- my = av_clip (my, extra_width - 6*y, v_edge_pos - 6*y);
-
- /* get (optional) motion vector differential */
- if (mode == PREDICT_MODE) {
- dx = dy = 0;
- } else {
- dy = svq3_get_se_golomb (&s->gb);
- dx = svq3_get_se_golomb (&s->gb);
-
- if (dx == INVALID_VLC || dy == INVALID_VLC) {
- av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n");
- return -1;
+ }
+
+ return 0;
+}
+
+static int svq3_decode_mb(H264Context *h, unsigned int mb_type)
+{
+ int i, j, k, m, dir, mode;
+ int cbp = 0;
+ uint32_t vlc;
+ int8_t *top, *left;
+ MpegEncContext *const s = (MpegEncContext *) h;
+ const int mb_xy = h->mb_xy;
+ const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+
+ h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
+ h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
+ h->topright_samples_available = 0xFFFF;
+
+ if (mb_type == 0) { /* SKIP */
+ if (s->pict_type == FF_P_TYPE || s->next_picture.mb_type[mb_xy] == -1) {
+ svq3_mc_dir_part(s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);
+
+ if (s->pict_type == FF_B_TYPE) {
+ svq3_mc_dir_part(s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
+ }
+
+ mb_type = MB_TYPE_SKIP;
+ } else {
+ mb_type = FFMIN(s->next_picture.mb_type[mb_xy], 6);
+ if (svq3_mc_dir(h, mb_type, PREDICT_MODE, 0, 0) < 0)
+ return -1;
+ if (svq3_mc_dir(h, mb_type, PREDICT_MODE, 1, 1) < 0)
+ return -1;
+
+ mb_type = MB_TYPE_16x16;
}
- }
-
- /* compute motion vector */
- if (mode == THIRDPEL_MODE) {
- int fx, fy;
- mx = ((mx + 1)>>1) + dx;
- my = ((my + 1)>>1) + dy;
- fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
- fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
- dxy= (mx - 3*fx) + 4*(my - 3*fy);
-
- svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
- mx += mx;
- my += my;
- } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
- mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
- my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
- dxy= (mx&1) + 2*(my&1);
-
- svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
- mx *= 3;
- my *= 3;
- } else {
- mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
- my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
-
- svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
- mx *= 6;
- my *= 6;
- }
-
- /* update mv_cache */
- if (mode != PREDICT_MODE) {
- int32_t mv = pack16to32(mx,my);
-
- if (part_height == 8 && i < 8) {
- *(int32_t *) h->mv_cache[dir][scan8[k] + 1*8] = mv;
-
- if (part_width == 8 && j < 8) {
- *(int32_t *) h->mv_cache[dir][scan8[k] + 1 + 1*8] = mv;
- }
+ } else if (mb_type < 8) { /* INTER */
+ if (h->thirdpel_flag && h->halfpel_flag == !get_bits1 (&s->gb)) {
+ mode = THIRDPEL_MODE;
+ } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits1 (&s->gb)) {
+ mode = HALFPEL_MODE;
+ } else {
+ mode = FULLPEL_MODE;
}
- if (part_width == 8 && j < 8) {
- *(int32_t *) h->mv_cache[dir][scan8[k] + 1] = mv;
+
+ /* fill caches */
+ /* note ref_cache should contain here:
+ ????????
+ ???11111
+ N??11111
+ N??11111
+ N??11111
+ */
+
+ for (m = 0; m < 2; m++) {
+ if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
+ for (i = 0; i < 4; i++) {
+ *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
+ }
+ } else {
+ for (i = 0; i < 4; i++) {
+ *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
+ }
+ }
+ if (s->mb_y > 0) {
+ memcpy(h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
+ memset(&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
+
+ if (s->mb_x < (s->mb_width - 1)) {
+ *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
+ h->ref_cache[m][scan8[0] + 4 - 1*8] =
+ (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
+ h->intra4x4_pred_mode[mb_xy - s->mb_stride ][4] == -1) ? PART_NOT_AVAILABLE : 1;
+ }else
+ h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
+ if (s->mb_x > 0) {
+ *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
+ h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
+ }else
+ h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
+ }else
+ memset(&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
+
+ if (s->pict_type != FF_B_TYPE)
+ break;
}
- if (part_width == 4 || part_height == 4) {
- *(int32_t *) h->mv_cache[dir][scan8[k]] = mv;
+
+ /* decode motion vector(s) and form prediction(s) */
+ if (s->pict_type == FF_P_TYPE) {
+ if (svq3_mc_dir(h, (mb_type - 1), mode, 0, 0) < 0)
+ return -1;
+ } else { /* FF_B_TYPE */
+ if (mb_type != 2) {
+ if (svq3_mc_dir(h, 0, mode, 0, 0) < 0)
+ return -1;
+ } else {
+ for (i = 0; i < 4; i++) {
+ memset(s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+ }
+ }
+ if (mb_type != 1) {
+ if (svq3_mc_dir(h, 0, mode, 1, (mb_type == 3)) < 0)
+ return -1;
+ } else {
+ for (i = 0; i < 4; i++) {
+ memset(s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+ }
+ }
}
- }
- /* write back motion vectors */
- fill_rectangle(s->current_picture.motion_val[dir][b_xy], part_width>>2, part_height>>2, h->b_stride, pack16to32(mx,my), 4);
- }
- }
+ mb_type = MB_TYPE_16x16;
+ } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
+ memset(h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
+
+ if (mb_type == 8) {
+ if (s->mb_x > 0) {
+ for (i = 0; i < 4; i++) {
+ h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
+ }
+ if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
+ h->left_samples_available = 0x5F5F;
+ }
+ }
+ if (s->mb_y > 0) {
+ h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
+ h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
+ h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
+ h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
+
+ if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
+ h->top_samples_available = 0x33FF;
+ }
+ }
+
+ /* decode prediction codes for luma blocks */
+ for (i = 0; i < 16; i+=2) {
+ vlc = svq3_get_ue_golomb(&s->gb);
+
+ if (vlc >= 25){
+ av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
+ return -1;
+ }
+
+ left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
+ top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
+
+ left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
+ left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
+
+ if (left[1] == -1 || left[2] == -1){
+ av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n");
+ return -1;
+ }
+ }
+ } else { /* mb_type == 33, DC_128_PRED block type */
+ for (i = 0; i < 4; i++) {
+ memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
+ }
+ }
- return 0;
-}
+ write_back_intra_pred_mode(h);
-static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
- int i, j, k, m, dir, mode;
- int cbp = 0;
- uint32_t vlc;
- int8_t *top, *left;
- MpegEncContext *const s = (MpegEncContext *) h;
- const int mb_xy = h->mb_xy;
- const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
+ if (mb_type == 8) {
+ check_intra4x4_pred_mode(h);
- h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
- h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
- h->topright_samples_available = 0xFFFF;
+ h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
+ h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
+ } else {
+ for (i = 0; i < 4; i++) {
+ memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_128_PRED, 4);
+ }
- if (mb_type == 0) { /* SKIP */
- if (s->pict_type == FF_P_TYPE || s->next_picture.mb_type[mb_xy] == -1) {
- svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);
+ h->top_samples_available = 0x33FF;
+ h->left_samples_available = 0x5F5F;
+ }
- if (s->pict_type == FF_B_TYPE) {
- svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
- }
+ mb_type = MB_TYPE_INTRA4x4;
+ } else { /* INTRA16x16 */
+ dir = i_mb_type_info[mb_type - 8].pred_mode;
+ dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
- mb_type = MB_TYPE_SKIP;
- } else {
- mb_type= FFMIN(s->next_picture.mb_type[mb_xy], 6);
- if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 0, 0) < 0)
- return -1;
- if(svq3_mc_dir (h, mb_type, PREDICT_MODE, 1, 1) < 0)
- return -1;
+ if ((h->intra16x16_pred_mode = check_intra_pred_mode(h, dir)) == -1){
+ av_log(h->s.avctx, AV_LOG_ERROR, "check_intra_pred_mode = -1\n");
+ return -1;
+ }
- mb_type = MB_TYPE_16x16;
- }
- } else if (mb_type < 8) { /* INTER */
- if (h->thirdpel_flag && h->halfpel_flag == !get_bits1 (&s->gb)) {
- mode = THIRDPEL_MODE;
- } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits1 (&s->gb)) {
- mode = HALFPEL_MODE;
- } else {
- mode = FULLPEL_MODE;
+ cbp = i_mb_type_info[mb_type - 8].cbp;
+ mb_type = MB_TYPE_INTRA16x16;
}
- /* fill caches */
- /* note ref_cache should contain here:
- ????????
- ???11111
- N??11111
- N??11111
- N??11111
- */
-
- for (m=0; m < 2; m++) {
- if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
- for (i=0; i < 4; i++) {
- *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
+ if (!IS_INTER(mb_type) && s->pict_type != FF_I_TYPE) {
+ for (i = 0; i < 4; i++) {
+ memset(s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
}
- } else {
- for (i=0; i < 4; i++) {
- *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
+ if (s->pict_type == FF_B_TYPE) {
+ for (i = 0; i < 4; i++) {
+ memset(s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+ }
}
- }
- if (s->mb_y > 0) {
- memcpy (h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
- memset (&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
-
- if (s->mb_x < (s->mb_width - 1)) {
- *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
- h->ref_cache[m][scan8[0] + 4 - 1*8] =
- (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
- h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1;
- }else
- h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
- if (s->mb_x > 0) {
- *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
- h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
- }else
- h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
- }else
- memset (&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
-
- if (s->pict_type != FF_B_TYPE)
- break;
+ }
+ if (!IS_INTRA4x4(mb_type)) {
+ memset(h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
+ }
+ if (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE) {
+ memset(h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
+ s->dsp.clear_blocks(h->mb);
}
- /* decode motion vector(s) and form prediction(s) */
- if (s->pict_type == FF_P_TYPE) {
- if(svq3_mc_dir (h, (mb_type - 1), mode, 0, 0) < 0)
- return -1;
- } else { /* FF_B_TYPE */
- if (mb_type != 2) {
- if(svq3_mc_dir (h, 0, mode, 0, 0) < 0)
- return -1;
- } else {
- for (i=0; i < 4; i++) {
- memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
- }
- }
- if (mb_type != 1) {
- if(svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)) < 0)
- return -1;
- } else {
- for (i=0; i < 4; i++) {
- memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+ if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE)) {
+ if ((vlc = svq3_get_ue_golomb(&s->gb)) >= 48){
+ av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc);
+ return -1;
}
- }
- }
- mb_type = MB_TYPE_16x16;
- } else if (mb_type == 8 || mb_type == 33) { /* INTRA4x4 */
- memset (h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
+ cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc] : golomb_to_inter_cbp[vlc];
+ }
+ if (IS_INTRA16x16(mb_type) || (s->pict_type != FF_I_TYPE && s->adaptive_quant && cbp)) {
+ s->qscale += svq3_get_se_golomb(&s->gb);
- if (mb_type == 8) {
- if (s->mb_x > 0) {
- for (i=0; i < 4; i++) {
- h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
- }
- if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
- h->left_samples_available = 0x5F5F;
+ if (s->qscale > 31){
+ av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
+ return -1;
}
- }
- if (s->mb_y > 0) {
- h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
- h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
- h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
- h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
-
- if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
- h->top_samples_available = 0x33FF;
+ }
+ if (IS_INTRA16x16(mb_type)) {
+ if (svq3_decode_block(&s->gb, h->mb, 0, 0)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
+ return -1;
}
- }
-
- /* decode prediction codes for luma blocks */
- for (i=0; i < 16; i+=2) {
- vlc = svq3_get_ue_golomb (&s->gb);
+ }
- if (vlc >= 25){
- av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
- return -1;
+ if (cbp) {
+ const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
+ const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
+
+ for (i = 0; i < 4; i++) {
+ if ((cbp & (1 << i))) {
+ for (j = 0; j < 4; j++) {
+ k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
+ h->non_zero_count_cache[ scan8[k] ] = 1;
+
+ if (svq3_decode_block(&s->gb, &h->mb[16*k], index, type)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n");
+ return -1;
+ }
+ }
+ }
}
- left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
- top = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
+ if ((cbp & 0x30)) {
+ for (i = 0; i < 2; ++i) {
+ if (svq3_decode_block(&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
+ return -1;
+ }
+ }
+
+ if ((cbp & 0x20)) {
+ for (i = 0; i < 8; i++) {
+ h->non_zero_count_cache[ scan8[16+i] ] = 1;
+
+ if (svq3_decode_block(&s->gb, &h->mb[16*(16 + i)], 1, 1)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
+ return -1;
+ }
+ }
+ }
+ }
+ }
- left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
- left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
+ h->cbp= cbp;
+ s->current_picture.mb_type[mb_xy] = mb_type;
- if (left[1] == -1 || left[2] == -1){
- av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n");
- return -1;
- }
- }
- } else { /* mb_type == 33, DC_128_PRED block type */
- for (i=0; i < 4; i++) {
- memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
- }
+ if (IS_INTRA(mb_type)) {
+ h->chroma_pred_mode = check_intra_pred_mode(h, DC_PRED8x8);
}
- write_back_intra_pred_mode (h);
+ return 0;
+}
- if (mb_type == 8) {
- check_intra4x4_pred_mode (h);
+static int svq3_decode_slice_header(H264Context *h)
+{
+ MpegEncContext *const s = (MpegEncContext *) h;
+ const int mb_xy = h->mb_xy;
+ int i, header;
- h->top_samples_available = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
- h->left_samples_available = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
- } else {
- for (i=0; i < 4; i++) {
- memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_128_PRED, 4);
- }
+ header = get_bits(&s->gb, 8);
- h->top_samples_available = 0x33FF;
- h->left_samples_available = 0x5F5F;
- }
+ if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
+ /* TODO: what? */
+ av_log(h->s.avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
+ return -1;
+ } else {
+ int length = (header >> 5) & 3;
- mb_type = MB_TYPE_INTRA4x4;
- } else { /* INTRA16x16 */
- dir = i_mb_type_info[mb_type - 8].pred_mode;
- dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
+ h->next_slice_index = get_bits_count(&s->gb) + 8*show_bits(&s->gb, 8*length) + 8*length;
- if ((h->intra16x16_pred_mode = check_intra_pred_mode (h, dir)) == -1){
- av_log(h->s.avctx, AV_LOG_ERROR, "check_intra_pred_mode = -1\n");
- return -1;
+ if (h->next_slice_index > s->gb.size_in_bits) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "slice after bitstream end\n");
+ return -1;
}
- cbp = i_mb_type_info[mb_type - 8].cbp;
- mb_type = MB_TYPE_INTRA16x16;
- }
+ s->gb.size_in_bits = h->next_slice_index - 8*(length - 1);
+ skip_bits(&s->gb, 8);
- if (!IS_INTER(mb_type) && s->pict_type != FF_I_TYPE) {
- for (i=0; i < 4; i++) {
- memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
- }
- if (s->pict_type == FF_B_TYPE) {
- for (i=0; i < 4; i++) {
- memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
- }
+ if (h->svq3_watermark_key) {
+ uint32_t header = AV_RL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1]);
+ AV_WL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1], header ^ h->svq3_watermark_key);
+ }
+ if (length > 0) {
+ memcpy((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3],
+ &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1));
+ }
}
- }
- if (!IS_INTRA4x4(mb_type)) {
- memset (h->intra4x4_pred_mode[mb_xy], DC_PRED, 8);
- }
- if (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE) {
- memset (h->non_zero_count_cache + 8, 0, 4*9*sizeof(uint8_t));
- s->dsp.clear_blocks(h->mb);
- }
-
- if (!IS_INTRA16x16(mb_type) && (!IS_SKIP(mb_type) || s->pict_type == FF_B_TYPE)) {
- if ((vlc = svq3_get_ue_golomb (&s->gb)) >= 48){
- av_log(h->s.avctx, AV_LOG_ERROR, "cbp_vlc=%d\n", vlc);
- return -1;
+
+ if ((i = svq3_get_ue_golomb(&s->gb)) == INVALID_VLC || i >= 3){
+ av_log(h->s.avctx, AV_LOG_ERROR, "illegal slice type %d \n", i);
+ return -1;
}
- cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc] : golomb_to_inter_cbp[vlc];
- }
- if (IS_INTRA16x16(mb_type) || (s->pict_type != FF_I_TYPE && s->adaptive_quant && cbp)) {
- s->qscale += svq3_get_se_golomb (&s->gb);
+ h->slice_type = golomb_to_pict_type[i];
- if (s->qscale > 31){
- av_log(h->s.avctx, AV_LOG_ERROR, "qscale:%d\n", s->qscale);
- return -1;
- }
- }
- if (IS_INTRA16x16(mb_type)) {
- if (svq3_decode_block (&s->gb, h->mb, 0, 0)){
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding intra luma dc\n");
- return -1;
+ if ((header & 0x9F) == 2) {
+ i = (s->mb_num < 64) ? 6 : (1 + av_log2 (s->mb_num - 1));
+ s->mb_skip_run = get_bits(&s->gb, i) - (s->mb_x + (s->mb_y * s->mb_width));
+ } else {
+ skip_bits1(&s->gb);
+ s->mb_skip_run = 0;
}
- }
- if (cbp) {
- const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
- const int type = ((s->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
+ h->slice_num = get_bits(&s->gb, 8);
+ s->qscale = get_bits(&s->gb, 5);
+ s->adaptive_quant = get_bits1(&s->gb);
- for (i=0; i < 4; i++) {
- if ((cbp & (1 << i))) {
- for (j=0; j < 4; j++) {
- k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
- h->non_zero_count_cache[ scan8[k] ] = 1;
+ /* unknown fields */
+ skip_bits1(&s->gb);
- if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)){
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n");
- return -1;
- }
- }
- }
+ if (h->unknown_svq3_flag) {
+ skip_bits1(&s->gb);
}
- if ((cbp & 0x30)) {
- for (i=0; i < 2; ++i) {
- if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
- return -1;
- }
- }
-
- if ((cbp & 0x20)) {
- for (i=0; i < 8; i++) {
- h->non_zero_count_cache[ scan8[16+i] ] = 1;
+ skip_bits1(&s->gb);
+ skip_bits(&s->gb, 2);
- if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)){
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
- return -1;
- }
- }
- }
+ while (get_bits1(&s->gb)) {
+ skip_bits(&s->gb, 8);
}
- }
- s->current_picture.mb_type[mb_xy] = mb_type;
+ /* reset intra predictors and invalidate motion vector references */
+ if (s->mb_x > 0) {
+ memset(h->intra4x4_pred_mode[mb_xy - 1], -1, 4*sizeof(int8_t));
+ memset(h->intra4x4_pred_mode[mb_xy - s->mb_x], -1, 8*sizeof(int8_t)*s->mb_x);
+ }
+ if (s->mb_y > 0) {
+ memset(h->intra4x4_pred_mode[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
- if (IS_INTRA(mb_type)) {
- h->chroma_pred_mode = check_intra_pred_mode (h, DC_PRED8x8);
- }
+ if (s->mb_x > 0) {
+ h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] = -1;
+ }
+ }
- return 0;
+ return 0;
}
-static int svq3_decode_slice_header (H264Context *h) {
- MpegEncContext *const s = (MpegEncContext *) h;
- const int mb_xy = h->mb_xy;
- int i, header;
+static int svq3_decode_init(AVCodecContext *avctx)
+{
+ MpegEncContext *const s = avctx->priv_data;
+ H264Context *const h = avctx->priv_data;
+ int m;
+ unsigned char *extradata;
+ unsigned int size;
- header = get_bits (&s->gb, 8);
+ if (decode_init(avctx) < 0)
+ return -1;
- if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
- /* TODO: what? */
- av_log(h->s.avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
- return -1;
- } else {
- int length = (header >> 5) & 3;
+ s->flags = avctx->flags;
+ s->flags2 = avctx->flags2;
+ s->unrestricted_mv = 1;
+ h->is_complex=1;
- h->next_slice_index = get_bits_count(&s->gb) + 8*show_bits (&s->gb, 8*length) + 8*length;
+ if (!s->context_initialized) {
+ s->width = avctx->width;
+ s->height = avctx->height;
+ h->halfpel_flag = 1;
+ h->thirdpel_flag = 1;
+ h->unknown_svq3_flag = 0;
+ h->chroma_qp[0] = h->chroma_qp[1] = 4;
- if (h->next_slice_index > s->gb.size_in_bits){
- av_log(h->s.avctx, AV_LOG_ERROR, "slice after bitstream end\n");
- return -1;
- }
+ if (MPV_common_init(s) < 0)
+ return -1;
- s->gb.size_in_bits = h->next_slice_index - 8*(length - 1);
- skip_bits(&s->gb, 8);
+ h->b_stride = 4*s->mb_width;
- if (h->svq3_watermark_key) {
- uint32_t header = AV_RL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1]);
- AV_WL32(&s->gb.buffer[(get_bits_count(&s->gb)>>3)+1], header ^ h->svq3_watermark_key);
- }
- if (length > 0) {
- memcpy ((uint8_t *) &s->gb.buffer[get_bits_count(&s->gb) >> 3],
- &s->gb.buffer[s->gb.size_in_bits >> 3], (length - 1));
- }
- }
+ alloc_tables(h);
- if ((i = svq3_get_ue_golomb (&s->gb)) == INVALID_VLC || i >= 3){
- av_log(h->s.avctx, AV_LOG_ERROR, "illegal slice type %d \n", i);
- return -1;
- }
+ /* prowl for the "SEQH" marker in the extradata */
+ extradata = (unsigned char *)avctx->extradata;
+ for (m = 0; m < avctx->extradata_size; m++) {
+ if (!memcmp(extradata, "SEQH", 4))
+ break;
+ extradata++;
+ }
- h->slice_type = golomb_to_pict_type[i];
+ /* if a match was found, parse the extra data */
+ if (extradata && !memcmp(extradata, "SEQH", 4)) {
- if ((header & 0x9F) == 2) {
- i = (s->mb_num < 64) ? 6 : (1 + av_log2 (s->mb_num - 1));
- s->mb_skip_run = get_bits (&s->gb, i) - (s->mb_x + (s->mb_y * s->mb_width));
- } else {
- skip_bits1 (&s->gb);
- s->mb_skip_run = 0;
- }
+ GetBitContext gb;
- h->slice_num = get_bits (&s->gb, 8);
- s->qscale = get_bits (&s->gb, 5);
- s->adaptive_quant = get_bits1 (&s->gb);
+ size = AV_RB32(&extradata[4]);
+ init_get_bits(&gb, extradata + 8, size*8);
- /* unknown fields */
- skip_bits1 (&s->gb);
+ /* 'frame size code' and optional 'width, height' */
+ if (get_bits(&gb, 3) == 7) {
+ skip_bits(&gb, 12);
+ skip_bits(&gb, 12);
+ }
- if (h->unknown_svq3_flag) {
- skip_bits1 (&s->gb);
- }
+ h->halfpel_flag = get_bits1(&gb);
+ h->thirdpel_flag = get_bits1(&gb);
- skip_bits1 (&s->gb);
- skip_bits (&s->gb, 2);
+ /* unknown fields */
+ skip_bits1(&gb);
+ skip_bits1(&gb);
+ skip_bits1(&gb);
+ skip_bits1(&gb);
- while (get_bits1 (&s->gb)) {
- skip_bits (&s->gb, 8);
- }
+ s->low_delay = get_bits1(&gb);
- /* reset intra predictors and invalidate motion vector references */
- if (s->mb_x > 0) {
- memset (h->intra4x4_pred_mode[mb_xy - 1], -1, 4*sizeof(int8_t));
- memset (h->intra4x4_pred_mode[mb_xy - s->mb_x], -1, 8*sizeof(int8_t)*s->mb_x);
- }
- if (s->mb_y > 0) {
- memset (h->intra4x4_pred_mode[mb_xy - s->mb_stride], -1, 8*sizeof(int8_t)*(s->mb_width - s->mb_x));
+ /* unknown field */
+ skip_bits1(&gb);
- if (s->mb_x > 0) {
- h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] = -1;
+ while (get_bits1(&gb)) {
+ skip_bits(&gb, 8);
+ }
+
+ h->unknown_svq3_flag = get_bits1(&gb);
+ avctx->has_b_frames = !s->low_delay;
+ if (h->unknown_svq3_flag) {
+#ifdef CONFIG_ZLIB
+ unsigned watermark_width = svq3_get_ue_golomb(&gb);
+ unsigned watermark_height = svq3_get_ue_golomb(&gb);
+ int u1 = svq3_get_ue_golomb(&gb);
+ int u2 = get_bits(&gb, 8);
+ int u3 = get_bits(&gb, 2);
+ int u4 = svq3_get_ue_golomb(&gb);
+ unsigned buf_len = watermark_width*watermark_height*4;
+ int offset = (get_bits_count(&gb)+7)>>3;
+ uint8_t *buf;
+
+ if ((uint64_t)watermark_width*4 > UINT_MAX/watermark_height)
+ return -1;
+
+ buf = av_malloc(buf_len);
+ av_log(avctx, AV_LOG_DEBUG, "watermark size: %dx%d\n", watermark_width, watermark_height);
+ av_log(avctx, AV_LOG_DEBUG, "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n", u1, u2, u3, u4, offset);
+ if (uncompress(buf, (uLong*)&buf_len, extradata + 8 + offset, size - offset) != Z_OK) {
+ av_log(avctx, AV_LOG_ERROR, "could not uncompress watermark logo\n");
+ av_free(buf);
+ return -1;
+ }
+ h->svq3_watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
+ h->svq3_watermark_key = h->svq3_watermark_key << 16 | h->svq3_watermark_key;
+ av_log(avctx, AV_LOG_DEBUG, "watermark key %#x\n", h->svq3_watermark_key);
+ av_free(buf);
+#else
+ av_log(avctx, AV_LOG_ERROR, "this svq3 file contains watermark which need zlib support compiled in\n");
+ return -1;
+#endif
+ }
+ }
}
- }
- return 0;
+ return 0;
}
-static int svq3_decode_frame (AVCodecContext *avctx,
- void *data, int *data_size,
- const uint8_t *buf, int buf_size) {
- MpegEncContext *const s = avctx->priv_data;
- H264Context *const h = avctx->priv_data;
- int m, mb_type;
- unsigned char *extradata;
- unsigned int size;
-
- s->flags = avctx->flags;
- s->flags2 = avctx->flags2;
- s->unrestricted_mv = 1;
-
- if (!s->context_initialized) {
- s->width = avctx->width;
- s->height = avctx->height;
- h->halfpel_flag = 1;
- h->thirdpel_flag = 1;
- h->unknown_svq3_flag = 0;
- h->chroma_qp[0] = h->chroma_qp[1] = 4;
-
- if (MPV_common_init (s) < 0)
- return -1;
-
- h->b_stride = 4*s->mb_width;
-
- alloc_tables (h);
-
- /* prowl for the "SEQH" marker in the extradata */
- extradata = (unsigned char *)avctx->extradata;
- for (m = 0; m < avctx->extradata_size; m++) {
- if (!memcmp (extradata, "SEQH", 4))
- break;
- extradata++;
+static int svq3_decode_frame(AVCodecContext *avctx,
+ void *data, int *data_size,
+ const uint8_t *buf, int buf_size)
+{
+ MpegEncContext *const s = avctx->priv_data;
+ H264Context *const h = avctx->priv_data;
+ int m, mb_type;
+
+ /* special case for last picture */
+ if (buf_size == 0) {
+ if (s->next_picture_ptr && !s->low_delay) {
+ *(AVFrame *) data = *(AVFrame *) &s->next_picture;
+ s->next_picture_ptr = NULL;
+ *data_size = sizeof(AVFrame);
+ }
+ return 0;
}
- /* if a match was found, parse the extra data */
- if (extradata && !memcmp (extradata, "SEQH", 4)) {
+ init_get_bits (&s->gb, buf, 8*buf_size);
- GetBitContext gb;
+ s->mb_x = s->mb_y = h->mb_xy = 0;
- size = AV_RB32(&extradata[4]);
- init_get_bits (&gb, extradata + 8, size*8);
-
- /* 'frame size code' and optional 'width, height' */
- if (get_bits (&gb, 3) == 7) {
- skip_bits (&gb, 12);
- skip_bits (&gb, 12);
- }
-
- h->halfpel_flag = get_bits1 (&gb);
- h->thirdpel_flag = get_bits1 (&gb);
-
- /* unknown fields */
- skip_bits1 (&gb);
- skip_bits1 (&gb);
- skip_bits1 (&gb);
- skip_bits1 (&gb);
-
- s->low_delay = get_bits1 (&gb);
-
- /* unknown field */
- skip_bits1 (&gb);
-
- while (get_bits1 (&gb)) {
- skip_bits (&gb, 8);
- }
+ if (svq3_decode_slice_header(h))
+ return -1;
- h->unknown_svq3_flag = get_bits1 (&gb);
- avctx->has_b_frames = !s->low_delay;
- if (h->unknown_svq3_flag) {
-#ifdef CONFIG_ZLIB
- unsigned watermark_width = svq3_get_ue_golomb(&gb);
- unsigned watermark_height = svq3_get_ue_golomb(&gb);
- int u1 = svq3_get_ue_golomb(&gb);
- int u2 = get_bits(&gb, 8);
- int u3 = get_bits(&gb, 2);
- int u4 = svq3_get_ue_golomb(&gb);
- unsigned buf_len = watermark_width*watermark_height*4;
- int offset = (get_bits_count(&gb)+7)>>3;
- uint8_t *buf;
-
- if ((uint64_t)watermark_width*4 > UINT_MAX/watermark_height)
- return -1;
+ s->pict_type = h->slice_type;
+ s->picture_number = h->slice_num;
- buf = av_malloc(buf_len);
- av_log(avctx, AV_LOG_DEBUG, "watermark size: %dx%d\n", watermark_width, watermark_height);
- av_log(avctx, AV_LOG_DEBUG, "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n", u1, u2, u3, u4, offset);
- if (uncompress(buf, (uLong*)&buf_len, extradata + 8 + offset, size - offset) != Z_OK) {
- av_log(avctx, AV_LOG_ERROR, "could not uncompress watermark logo\n");
- av_free(buf);
- return -1;
- }
- h->svq3_watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
- h->svq3_watermark_key = h->svq3_watermark_key << 16 | h->svq3_watermark_key;
- av_log(avctx, AV_LOG_DEBUG, "watermark key %#x\n", h->svq3_watermark_key);
- av_free(buf);
-#else
- av_log(avctx, AV_LOG_ERROR, "this svq3 file contains watermark which need zlib support compiled in\n");
- return -1;
-#endif
- }
+ if (avctx->debug&FF_DEBUG_PICT_INFO){
+ av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
+ av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
+ s->adaptive_quant, s->qscale, h->slice_num);
}
- }
-
- /* special case for last picture */
- if (buf_size == 0) {
- if (s->next_picture_ptr && !s->low_delay) {
- *(AVFrame *) data = *(AVFrame *) &s->next_picture;
- s->next_picture_ptr= NULL;
- *data_size = sizeof(AVFrame);
+
+ /* for hurry_up == 5 */
+ s->current_picture.pict_type = s->pict_type;
+ s->current_picture.key_frame = (s->pict_type == FF_I_TYPE);
+
+ /* Skip B-frames if we do not have reference frames. */
+ if (s->last_picture_ptr == NULL && s->pict_type == FF_B_TYPE)
+ return 0;
+ /* Skip B-frames if we are in a hurry. */
+ if (avctx->hurry_up && s->pict_type == FF_B_TYPE)
+ return 0;
+ /* Skip everything if we are in a hurry >= 5. */
+ if (avctx->hurry_up >= 5)
+ return 0;
+ if ( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type == FF_B_TYPE)
+ ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type != FF_I_TYPE)
+ || avctx->skip_frame >= AVDISCARD_ALL)
+ return 0;
+
+ if (s->next_p_frame_damaged) {
+ if (s->pict_type == FF_B_TYPE)
+ return 0;
+ else
+ s->next_p_frame_damaged = 0;
}
- return 0;
- }
-
- init_get_bits (&s->gb, buf, 8*buf_size);
-
- s->mb_x = s->mb_y = h->mb_xy = 0;
-
- if (svq3_decode_slice_header (h))
- return -1;
-
- s->pict_type = h->slice_type;
- s->picture_number = h->slice_num;
-
- if(avctx->debug&FF_DEBUG_PICT_INFO){
- av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
- av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
- s->adaptive_quant, s->qscale, h->slice_num
- );
- }
-
- /* for hurry_up==5 */
- s->current_picture.pict_type = s->pict_type;
- s->current_picture.key_frame = (s->pict_type == FF_I_TYPE);
-
- /* Skip B-frames if we do not have reference frames. */
- if (s->last_picture_ptr == NULL && s->pict_type == FF_B_TYPE) return 0;
- /* Skip B-frames if we are in a hurry. */
- if (avctx->hurry_up && s->pict_type == FF_B_TYPE) return 0;
- /* Skip everything if we are in a hurry >= 5. */
- if (avctx->hurry_up >= 5) return 0;
- if( (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==FF_B_TYPE)
- ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=FF_I_TYPE)
- || avctx->skip_frame >= AVDISCARD_ALL)
- return 0;
-
- if (s->next_p_frame_damaged) {
- if (s->pict_type == FF_B_TYPE)
- return 0;
- else
- s->next_p_frame_damaged = 0;
- }
- if (frame_start (h) < 0)
- return -1;
+ if (frame_start(h) < 0)
+ return -1;
+
+ if (s->pict_type == FF_B_TYPE) {
+ h->frame_num_offset = (h->slice_num - h->prev_frame_num);
- if (s->pict_type == FF_B_TYPE) {
- h->frame_num_offset = (h->slice_num - h->prev_frame_num);
+ if (h->frame_num_offset < 0) {
+ h->frame_num_offset += 256;
+ }
+ if (h->frame_num_offset == 0 || h->frame_num_offset >= h->prev_frame_num_offset) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
+ return -1;
+ }
+ } else {
+ h->prev_frame_num = h->frame_num;
+ h->frame_num = h->slice_num;
+ h->prev_frame_num_offset = (h->frame_num - h->prev_frame_num);
- if (h->frame_num_offset < 0) {
- h->frame_num_offset += 256;
- }
- if (h->frame_num_offset == 0 || h->frame_num_offset >= h->prev_frame_num_offset) {
- av_log(h->s.avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
- return -1;
+ if (h->prev_frame_num_offset < 0) {
+ h->prev_frame_num_offset += 256;
+ }
}
- } else {
- h->prev_frame_num = h->frame_num;
- h->frame_num = h->slice_num;
- h->prev_frame_num_offset = (h->frame_num - h->prev_frame_num);
- if (h->prev_frame_num_offset < 0) {
- h->prev_frame_num_offset += 256;
+ for (m = 0; m < 2; m++){
+ int i;
+ for (i = 0; i < 4; i++){
+ int j;
+ for (j = -1; j < 4; j++)
+ h->ref_cache[m][scan8[0] + 8*i + j]= 1;
+ if (i < 3)
+ h->ref_cache[m][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
+ }
}
- }
- for(m=0; m<2; m++){
- int i;
- for(i=0; i<4; i++){
- int j;
- for(j=-1; j<4; j++)
- h->ref_cache[m][scan8[0] + 8*i + j]= 1;
- if(i<3)
- h->ref_cache[m][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
- }
- }
+ for (s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
+ for (s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
+ h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
- for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
- for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
- h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
+ if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits &&
+ ((get_bits_count(&s->gb) & 7) == 0 || show_bits(&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
- if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits &&
- ((get_bits_count(&s->gb) & 7) == 0 || show_bits (&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
+ skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
+ s->gb.size_in_bits = 8*buf_size;
- skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
- s->gb.size_in_bits = 8*buf_size;
+ if (svq3_decode_slice_header(h))
+ return -1;
- if (svq3_decode_slice_header (h))
- return -1;
+ /* TODO: support s->mb_skip_run */
+ }
- /* TODO: support s->mb_skip_run */
- }
+ mb_type = svq3_get_ue_golomb(&s->gb);
- mb_type = svq3_get_ue_golomb (&s->gb);
+ if (s->pict_type == FF_I_TYPE) {
+ mb_type += 8;
+ } else if (s->pict_type == FF_B_TYPE && mb_type >= 4) {
+ mb_type += 4;
+ }
+ if (mb_type > 33 || svq3_decode_mb(h, mb_type)) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+ return -1;
+ }
- if (s->pict_type == FF_I_TYPE) {
- mb_type += 8;
- } else if (s->pict_type == FF_B_TYPE && mb_type >= 4) {
- mb_type += 4;
- }
- if (mb_type > 33 || svq3_decode_mb (h, mb_type)) {
- av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
- return -1;
- }
+ if (mb_type != 0) {
+ hl_decode_mb (h);
+ }
- if (mb_type != 0) {
- hl_decode_mb (h);
- }
+ if (s->pict_type != FF_B_TYPE && !s->low_delay) {
+ s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
+ (s->pict_type == FF_P_TYPE && mb_type < 8) ? (mb_type - 1) : -1;
+ }
+ }
- if (s->pict_type != FF_B_TYPE && !s->low_delay) {
- s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
- (s->pict_type == FF_P_TYPE && mb_type < 8) ? (mb_type - 1) : -1;
- }
+ ff_draw_horiz_band(s, 16*s->mb_y, 16);
}
- ff_draw_horiz_band(s, 16*s->mb_y, 16);
- }
+ MPV_frame_end(s);
- MPV_frame_end(s);
-
- if (s->pict_type == FF_B_TYPE || s->low_delay) {
- *(AVFrame *) data = *(AVFrame *) &s->current_picture;
- } else {
- *(AVFrame *) data = *(AVFrame *) &s->last_picture;
- }
+ if (s->pict_type == FF_B_TYPE || s->low_delay) {
+ *(AVFrame *) data = *(AVFrame *) &s->current_picture;
+ } else {
+ *(AVFrame *) data = *(AVFrame *) &s->last_picture;
+ }
- avctx->frame_number = s->picture_number - 1;
+ avctx->frame_number = s->picture_number - 1;
- /* Do not output the last pic after seeking. */
- if (s->last_picture_ptr || s->low_delay) {
- *data_size = sizeof(AVFrame);
- }
+ /* Do not output the last pic after seeking. */
+ if (s->last_picture_ptr || s->low_delay) {
+ *data_size = sizeof(AVFrame);
+ }
- return buf_size;
+ return buf_size;
}
@@ -1021,7 +1044,7 @@ AVCodec svq3_decoder = {
CODEC_TYPE_VIDEO,
CODEC_ID_SVQ3,
sizeof(H264Context),
- decode_init,
+ svq3_decode_init,
NULL,
decode_end,
svq3_decode_frame,
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index 7e42aab..582076b 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -30,6 +30,7 @@
#endif
#include "lzw.h"
#include "tiff.h"
+#include "faxcompr.h"
typedef struct TiffContext {
@@ -41,8 +42,10 @@ typedef struct TiffContext {
int le;
int compr;
int invert;
+ int fax_opts;
+ int predictor;
- int strips, rps;
+ int strips, rps, sstype;
int sot;
const uint8_t* stripdata;
const uint8_t* stripsizes;
@@ -74,7 +77,7 @@ static int tget(const uint8_t **p, int type, int le){
static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uint8_t *src, int size, int lines){
int c, line, pixels, code;
const uint8_t *ssrc = src;
- int width = s->width * (s->bpp / 8);
+ int width = s->width * s->bpp >> 3;
#ifdef CONFIG_ZLIB
uint8_t *zbuf; unsigned long outlen;
@@ -102,6 +105,29 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
return -1;
}
}
+ if(s->compr == TIFF_CCITT_RLE || s->compr == TIFF_G3 || s->compr == TIFF_G4){
+ int i, ret = 0;
+ uint8_t *src2 = av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
+
+ if(!src2 || (unsigned)size + FF_INPUT_BUFFER_PADDING_SIZE < (unsigned)size){
+ av_log(s->avctx, AV_LOG_ERROR, "Error allocating temporary buffer\n");
+ return -1;
+ }
+ for(i = 0; i < size; i++)
+ src2[i] = ff_reverse[src[i]];
+ memset(src2+size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
+ if(s->compr == TIFF_G3 && !(s->fax_opts & 1))
+ s->compr = TIFF_CCITT_RLE;
+ switch(s->compr){
+ case TIFF_CCITT_RLE:
+ case TIFF_G3:
+ case TIFF_G4:
+ ret = ff_ccitt_unpack(s->avctx, src2, size, dst, lines, stride, s->compr);
+ break;
+ }
+ av_free(src2);
+ return ret;
+ }
for(line = 0; line < lines; line++){
if(src - ssrc > size){
av_log(s->avctx, AV_LOG_ERROR, "Source data overread\n");
@@ -109,8 +135,8 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
}
switch(s->compr){
case TIFF_RAW:
- memcpy(dst, src, s->width * (s->bpp / 8));
- src += s->width * (s->bpp / 8);
+ memcpy(dst, src, width);
+ src += width;
break;
case TIFF_PACKBITS:
for(pixels = 0; pixels < width;){
@@ -150,12 +176,10 @@ static int tiff_unpack_strip(TiffContext *s, uint8_t* dst, int stride, const uin
}
-static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *buf, const uint8_t *end_buf, AVFrame *pic)
+static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *buf, const uint8_t *end_buf)
{
int tag, type, count, off, value = 0;
- const uint8_t *src;
- uint8_t *dst;
- int i, j, ssize, soff, stride;
+ int i, j;
uint32_t *pal;
const uint8_t *rp, *gp, *bp;
@@ -176,6 +200,11 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
value = off;
buf = NULL;
break;
+ case TIFF_STRING:
+ if(count <= 4){
+ buf -= 4;
+ break;
+ }
default:
value = -1;
buf = start + off;
@@ -215,6 +244,9 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
}
}
switch(s->bpp){
+ case 1:
+ s->avctx->pix_fmt = PIX_FMT_MONOBLACK;
+ break;
case 8:
s->avctx->pix_fmt = PIX_FMT_PAL8;
break;
@@ -253,10 +285,16 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
break;
case TIFF_COMPR:
s->compr = value;
+ s->predictor = 0;
switch(s->compr){
case TIFF_RAW:
case TIFF_PACKBITS:
case TIFF_LZW:
+ case TIFF_CCITT_RLE:
+ break;
+ case TIFF_G3:
+ case TIFF_G4:
+ s->fax_opts = 0;
break;
case TIFF_DEFLATE:
case TIFF_ADOBE_DEFLATE:
@@ -266,15 +304,6 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
av_log(s->avctx, AV_LOG_ERROR, "Deflate: ZLib not compiled in\n");
return -1;
#endif
- case TIFF_G3:
- av_log(s->avctx, AV_LOG_ERROR, "CCITT G3 compression is not supported\n");
- return -1;
- case TIFF_G4:
- av_log(s->avctx, AV_LOG_ERROR, "CCITT G4 compression is not supported\n");
- return -1;
- case TIFF_CCITT_RLE:
- av_log(s->avctx, AV_LOG_ERROR, "CCITT RLE compression is not supported\n");
- return -1;
case TIFF_JPEG:
case TIFF_NEWJPEG:
av_log(s->avctx, AV_LOG_ERROR, "JPEG compression is not supported\n");
@@ -285,6 +314,8 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
}
break;
case TIFF_ROWSPERSTRIP:
+ if(type == TIFF_LONG && value == -1)
+ value = s->avctx->height;
if(value < 1){
av_log(s->avctx, AV_LOG_ERROR, "Incorrect value of rows per strip\n");
return -1;
@@ -314,49 +345,14 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
s->stripsizes = start + off;
}
s->strips = count;
+ s->sstype = type;
if(s->stripsizes > end_buf){
av_log(s->avctx, AV_LOG_ERROR, "Tag referencing position outside the image\n");
return -1;
}
- if(!pic->data[0]){
- av_log(s->avctx, AV_LOG_ERROR, "Picture initialization missing\n");
- return -1;
- }
- /* now we have the data and may start decoding */
- stride = pic->linesize[0];
- dst = pic->data[0];
- for(i = 0; i < s->height; i += s->rps){
- if(s->stripsizes)
- ssize = tget(&s->stripsizes, type, s->le);
- else
- ssize = s->stripsize;
-
- if(s->stripdata){
- soff = tget(&s->stripdata, s->sot, s->le);
- }else
- soff = s->stripoff;
- src = start + soff;
- if(tiff_unpack_strip(s, dst, stride, src, ssize, FFMIN(s->rps, s->height - i)) < 0)
- break;
- dst += s->rps * stride;
- }
break;
case TIFF_PREDICTOR:
- if(!pic->data[0]){
- av_log(s->avctx, AV_LOG_ERROR, "Picture initialization missing\n");
- return -1;
- }
- if(value == 2){
- dst = pic->data[0];
- stride = pic->linesize[0];
- soff = s->bpp >> 3;
- ssize = s->width * soff;
- for(i = 0; i < s->height; i++) {
- for(j = soff; j < ssize; j++)
- dst[j] += dst[j - soff];
- dst += stride;
- }
- }
+ s->predictor = value;
break;
case TIFF_INVERT:
switch(value){
@@ -398,6 +394,10 @@ static int tiff_decode_tag(TiffContext *s, const uint8_t *start, const uint8_t *
return -1;
}
break;
+ case TIFF_T4OPTIONS:
+ case TIFF_T6OPTIONS:
+ s->fax_opts = value;
+ break;
}
return 0;
}
@@ -411,7 +411,9 @@ static int decode_frame(AVCodecContext *avctx,
AVFrame * const p= (AVFrame*)&s->picture;
const uint8_t *orig_buf = buf, *end_buf = buf + buf_size;
int id, le, off;
- int i, entries;
+ int i, j, entries;
+ int stride, soff, ssize;
+ uint8_t *dst;
//parse image header
id = AV_RL16(buf); buf += 2;
@@ -439,10 +441,49 @@ static int decode_frame(AVCodecContext *avctx,
buf = orig_buf + off;
entries = tget_short(&buf, le);
for(i = 0; i < entries; i++){
- if(tiff_decode_tag(s, orig_buf, buf, end_buf, p) < 0)
+ if(tiff_decode_tag(s, orig_buf, buf, end_buf) < 0)
return -1;
buf += 12;
}
+ if(!s->stripdata && !s->stripoff){
+ av_log(avctx, AV_LOG_ERROR, "Image data is missing\n");
+ return -1;
+ }
+ /* now we have the data and may start decoding */
+ if(!p->data[0]){
+ av_log(s->avctx, AV_LOG_ERROR, "Picture initialization missing\n");
+ return -1;
+ }
+ if(s->strips == 1 && !s->stripsize){
+ av_log(avctx, AV_LOG_WARNING, "Image data size missing\n");
+ s->stripsize = buf_size - s->stripoff;
+ }
+ stride = p->linesize[0];
+ dst = p->data[0];
+ for(i = 0; i < s->height; i += s->rps){
+ if(s->stripsizes)
+ ssize = tget(&s->stripsizes, s->sstype, s->le);
+ else
+ ssize = s->stripsize;
+
+ if(s->stripdata){
+ soff = tget(&s->stripdata, s->sot, s->le);
+ }else
+ soff = s->stripoff;
+ if(tiff_unpack_strip(s, dst, stride, orig_buf + soff, ssize, FFMIN(s->rps, s->height - i)) < 0)
+ break;
+ dst += s->rps * stride;
+ }
+ if(s->predictor == 2){
+ dst = p->data[0];
+ soff = s->bpp >> 3;
+ ssize = s->width * soff;
+ for(i = 0; i < s->height; i++) {
+ for(j = soff; j < ssize; j++)
+ dst[j] += dst[j - soff];
+ dst += stride;
+ }
+ }
if(s->invert){
uint8_t *src;
@@ -471,6 +512,7 @@ static av_cold int tiff_init(AVCodecContext *avctx){
avctx->coded_frame= (AVFrame*)&s->picture;
s->picture.data[0] = NULL;
ff_lzw_decode_open(&s->lzw);
+ ff_ccitt_unpack_init();
return 0;
}
diff --git a/libavcodec/tiff.h b/libavcodec/tiff.h
index 1c51592..dfabe29 100644
--- a/libavcodec/tiff.h
+++ b/libavcodec/tiff.h
@@ -46,6 +46,8 @@ enum TiffTags{
TIFF_PLANAR = 0x11C,
TIFF_XPOS = 0x11E,
TIFF_YPOS = 0x11F,
+ TIFF_T4OPTIONS = 0x124,
+ TIFF_T6OPTIONS,
TIFF_RES_UNIT = 0x128,
TIFF_SOFTWARE_NAME = 0x131,
TIFF_PREDICTOR = 0x13D,
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 2f162bc..dcd7602 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -87,13 +87,14 @@ AVCodec *av_codec_next(AVCodec *c){
else return first_avcodec;
}
-void register_avcodec(AVCodec *format)
+void register_avcodec(AVCodec *codec)
{
AVCodec **p;
+ avcodec_init();
p = &first_avcodec;
while (*p != NULL) p = &(*p)->next;
- *p = format;
- format->next = NULL;
+ *p = codec;
+ codec->next = NULL;
}
void avcodec_set_dimensions(AVCodecContext *s, int width, int height){
@@ -563,6 +564,7 @@ static const AVOption options[]={
{"simplearm", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARM, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplearmv5te", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV5TE, INT_MIN, INT_MAX, V|E|D, "idct"},
{"simplearmv6", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLEARMV6, INT_MIN, INT_MAX, V|E|D, "idct"},
+{"simpleneon", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_SIMPLENEON, INT_MIN, INT_MAX, V|E|D, "idct"},
{"h264", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_H264, INT_MIN, INT_MAX, V|E|D, "idct"},
{"vp3", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_VP3, INT_MIN, INT_MAX, V|E|D, "idct"},
{"ipp", NULL, 0, FF_OPT_TYPE_CONST, FF_IDCT_IPP, INT_MIN, INT_MAX, V|E|D, "idct"},
@@ -743,6 +745,8 @@ static const AVOption options[]={
{"bits_per_raw_sample", NULL, OFFSET(bits_per_raw_sample), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"channel_layout", NULL, OFFSET(channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|E|D, "channel_layout"},
{"request_channel_layout", NULL, OFFSET(request_channel_layout), FF_OPT_TYPE_INT64, DEFAULT, 0, INT64_MAX, A|D, "request_channel_layout"},
+{"rc_max_vbv_use", NULL, OFFSET(rc_max_available_vbv_use), FF_OPT_TYPE_FLOAT, 1.0/3, 0.0, FLT_MAX, V|E},
+{"rc_min_vbv_use", NULL, OFFSET(rc_min_vbv_overflow_use), FF_OPT_TYPE_FLOAT, 3, 0.0, FLT_MAX, V|E},
{NULL},
};
@@ -1506,7 +1510,7 @@ int av_parse_video_frame_rate(AVRational *frame_rate, const char *arg)
return 0;
}
-void av_log_missing_feature(void *avc, const char *feature, int want_sample)
+void ff_log_missing_feature(void *avc, const char *feature, int want_sample)
{
av_log(avc, AV_LOG_WARNING, "%s not implemented. Update your FFmpeg "
"version to the newest one from SVN. If the problem still "
@@ -1514,7 +1518,7 @@ void av_log_missing_feature(void *avc, const char *feature, int want_sample)
"been implemented.", feature);
if(want_sample)
av_log(avc, AV_LOG_WARNING, " If you want to help, upload a sample "
- "of this file to ftp://upload.mplayerhq.hu/MPlayer/incoming/ "
- "and contact the FFmpeg-devel mailing list.");
+ "of this file to ftp://upload.ffmpeg.org/MPlayer/incoming/ "
+ "and contact the ffmpeg-devel mailing list.");
av_log(avc, AV_LOG_WARNING, "\n");
}
diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c
index 375c20a..62a0a97 100644
--- a/libavcodec/vc1.c
+++ b/libavcodec/vc1.c
@@ -1059,13 +1059,13 @@ static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb)
static int decode_entry_point(AVCodecContext *avctx, GetBitContext *gb)
{
VC1Context *v = avctx->priv_data;
- int i, blink, clentry, refdist;
+ int i, blink, clentry;
av_log(avctx, AV_LOG_DEBUG, "Entry point: %08X\n", show_bits_long(gb, 32));
blink = get_bits1(gb); // broken link
clentry = get_bits1(gb); // closed entry
v->panscanflag = get_bits1(gb);
- refdist = get_bits1(gb); // refdist flag
+ v->refdist_flag = get_bits1(gb);
v->s.loop_filter = get_bits1(gb);
v->fastuvmc = get_bits1(gb);
v->extended_mv = get_bits1(gb);
@@ -1099,7 +1099,7 @@ static int decode_entry_point(AVCodecContext *avctx, GetBitContext *gb)
"BrokenLink=%i, ClosedEntry=%i, PanscanFlag=%i\n"
"RefDist=%i, Postproc=%i, FastUVMC=%i, ExtMV=%i\n"
"DQuant=%i, VSTransform=%i, Overlap=%i, Qmode=%i\n",
- blink, clentry, v->panscanflag, refdist, v->s.loop_filter,
+ blink, clentry, v->panscanflag, v->refdist_flag, v->s.loop_filter,
v->fastuvmc, v->extended_mv, v->dquant, v->vstransform, v->overlap, v->quantizer_mode);
return 0;
@@ -1394,6 +1394,8 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
else v->halfpq = 0;
if (v->quantizer_mode == QUANT_FRAME_EXPLICIT)
v->pquantizer = get_bits1(gb);
+ if(v->postprocflag)
+ v->postproc = get_bits1(gb);
if(v->s.pict_type == FF_I_TYPE || v->s.pict_type == FF_P_TYPE) v->use_ic = 0;
@@ -1416,8 +1418,6 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
}
break;
case FF_P_TYPE:
- if(v->postprocflag)
- v->postproc = get_bits1(gb);
if (v->extended_mv) v->mvrange = get_unary(gb, 0, 3);
else v->mvrange = 0;
v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
@@ -1507,8 +1507,6 @@ static int vc1_parse_frame_header_adv(VC1Context *v, GetBitContext* gb)
}
break;
case FF_B_TYPE:
- if(v->postprocflag)
- v->postproc = get_bits1(gb);
if (v->extended_mv) v->mvrange = get_unary(gb, 0, 3);
else v->mvrange = 0;
v->k_x = v->mvrange + 9 + (v->mvrange >> 1); //k_x can be 9 10 12 13
diff --git a/libavcodec/vc1.h b/libavcodec/vc1.h
index 4cd7bb1..212842a 100644
--- a/libavcodec/vc1.h
+++ b/libavcodec/vc1.h
@@ -180,6 +180,7 @@ typedef struct VC1Context{
int interlace; ///< Progressive/interlaced (RPTFTM syntax element)
int tfcntrflag; ///< TFCNTR present
int panscanflag; ///< NUMPANSCANWIN, TOPLEFT{X,Y}, BOTRIGHT{X,Y} present
+ int refdist_flag; ///< REFDIST syntax element present in II, IP, PI or PP field picture headers
int extended_dmv; ///< Additional extended dmv range at P/B frame-level
int color_prim; ///< 8bits, chroma coordinates of the color primaries
int transfer_char; ///< 8bits, Opto-electronic transfer characteristics
diff --git a/libavcodec/vdpau.h b/libavcodec/vdpau.h
new file mode 100644
index 0000000..2d03356
--- /dev/null
+++ b/libavcodec/vdpau.h
@@ -0,0 +1,84 @@
+/*
+ * The Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * hardware-accelerated decoding of MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_H
+#define AVCODEC_VDPAU_H
+
+/**
+ * \defgroup Decoder VDPAU Decoder and Renderer
+ *
+ * VDPAU hardware acceleration has two modules
+ * - VDPAU decoding
+ * - VDPAU presentation
+ *
+ * The VDPAU decoding module parses all headers using FFmpeg
+ * parsing mechanisms and uses VDPAU for the actual decoding.
+ *
+ * As per the current implementation, the actual decoding
+ * and rendering (API calls) are done as part of the VDPAU
+ * presentation (vo_vdpau.c) module.
+ *
+ * @{
+ * \defgroup VDPAU_Decoding VDPAU Decoding
+ * \ingroup Decoder
+ * @{
+ */
+
+#include <vdpau/vdpau.h>
+#include <vdpau/vdpau_x11.h>
+
+/** \brief The videoSurface is used for rendering. */
+#define FF_VDPAU_STATE_USED_FOR_RENDER 1
+
+/**
+ * \brief The videoSurface is needed for reference/prediction.
+ * The codec manipulates this.
+ */
+#define FF_VDPAU_STATE_USED_FOR_REFERENCE 2
+
+/**
+ * \brief This structure is used as a callback between the FFmpeg
+ * decoder (vd_) and presentation (vo_) module.
+ * This is used for defining a video frame containing surface,
+ * picture parameter, bitstream information etc which are passed
+ * between the FFmpeg decoder and its clients.
+ */
+struct vdpau_render_state {
+ VdpVideoSurface surface; ///< Used as rendered surface, never changed.
+
+ int state; ///< Holds FF_VDPAU_STATE_* values.
+
+ /** picture parameter information for all supported codecs */
+ union VdpPictureInfo {
+ VdpPictureInfoH264 h264;
+ } info;
+
+ /** Describe size/location of the compressed video data. */
+ int bitstream_buffers_allocated;
+ int bitstream_buffers_used;
+ VdpBitstreamBuffer *bitstream_buffers;
+};
+
+/* @}*/
+
+#endif /* AVCODEC_VDPAU_H */
diff --git a/libavcodec/vdpau_internal.h b/libavcodec/vdpau_internal.h
new file mode 100644
index 0000000..6af2495
--- /dev/null
+++ b/libavcodec/vdpau_internal.h
@@ -0,0 +1,34 @@
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (C) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_VDPAU_INTERNAL_H
+#define AVCODEC_VDPAU_INTERNAL_H
+
+#include <stdint.h>
+#include "h264.h"
+
+void ff_vdpau_h264_add_data_chunk(H264Context *h, const uint8_t *buf,
+ int buf_size);
+void ff_vdpau_h264_picture_complete(H264Context *h);
+
+#endif /* AVCODEC_VDPAU_INTERNAL_H */
diff --git a/libavcodec/vdpauvideo.c b/libavcodec/vdpauvideo.c
new file mode 100644
index 0000000..8b42823
--- /dev/null
+++ b/libavcodec/vdpauvideo.c
@@ -0,0 +1,183 @@
+/*
+ * Video Decode and Presentation API for UNIX (VDPAU) is used for
+ * HW decode acceleration for MPEG-1/2, H.264 and VC-1.
+ *
+ * Copyright (c) 2008 NVIDIA.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <limits.h>
+#include "avcodec.h"
+#include "h264.h"
+
+#undef NDEBUG
+#include <assert.h>
+
+#include "vdpau.h"
+#include "vdpau_internal.h"
+
+/**
+ * \addtogroup VDPAU_Decoding
+ *
+ * @{
+ */
+
+static void vdpau_h264_set_reference_frames(H264Context *h)
+{
+ MpegEncContext * s = &h->s;
+ struct vdpau_render_state * render, * render_ref;
+ VdpReferenceFrameH264 * rf, * rf2;
+ Picture * pic;
+ int i, list, pic_frame_idx;
+
+ render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+ assert(render);
+
+ rf = &render->info.h264.referenceFrames[0];
+#define H264_RF_COUNT FF_ARRAY_ELEMS(render->info.h264.referenceFrames)
+
+ for (list = 0; list < 2; ++list) {
+ Picture **lp = list ? h->long_ref : h->short_ref;
+ int ls = list ? h->long_ref_count : h->short_ref_count;
+
+ for (i = 0; i < ls; ++i) {
+ pic = lp[i];
+ if (!pic || !pic->reference)
+ continue;
+ pic_frame_idx = pic->long_ref ? pic->pic_id : pic->frame_num;
+
+ render_ref = (struct vdpau_render_state*)pic->data[0];
+ assert(render_ref);
+
+ rf2 = &render->info.h264.referenceFrames[0];
+ while (rf2 != rf) {
+ if (
+ (rf2->surface == render_ref->surface)
+ && (rf2->is_long_term == pic->long_ref)
+ && (rf2->frame_idx == pic_frame_idx)
+ )
+ break;
+ ++rf2;
+ }
+ if (rf2 != rf) {
+ rf2->top_is_reference |= (pic->reference & PICT_TOP_FIELD) ? VDP_TRUE : VDP_FALSE;
+ rf2->bottom_is_reference |= (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+ continue;
+ }
+
+ if (rf >= &render->info.h264.referenceFrames[H264_RF_COUNT])
+ continue;
+
+ rf->surface = render_ref->surface;
+ rf->is_long_term = pic->long_ref;
+ rf->top_is_reference = (pic->reference & PICT_TOP_FIELD) ? VDP_TRUE : VDP_FALSE;
+ rf->bottom_is_reference = (pic->reference & PICT_BOTTOM_FIELD) ? VDP_TRUE : VDP_FALSE;
+ rf->field_order_cnt[0] = pic->field_poc[0];
+ rf->field_order_cnt[1] = pic->field_poc[1];
+ rf->frame_idx = pic_frame_idx;
+
+ ++rf;
+ }
+ }
+
+ for (; rf < &render->info.h264.referenceFrames[H264_RF_COUNT]; ++rf) {
+ rf->surface = VDP_INVALID_HANDLE;
+ rf->is_long_term = 0;
+ rf->top_is_reference = 0;
+ rf->bottom_is_reference = 0;
+ rf->field_order_cnt[0] = 0;
+ rf->field_order_cnt[1] = 0;
+ rf->frame_idx = 0;
+ }
+}
+
+void ff_vdpau_h264_add_data_chunk(H264Context *h, const uint8_t *buf, int buf_size)
+{
+ MpegEncContext * s = &h->s;
+ struct vdpau_render_state * render;
+
+ render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+ assert(render);
+
+ if (!render->bitstream_buffers_used)
+ vdpau_h264_set_reference_frames(h);
+
+ render->bitstream_buffers= av_fast_realloc(
+ render->bitstream_buffers,
+ &render->bitstream_buffers_allocated,
+ sizeof(*render->bitstream_buffers)*(render->bitstream_buffers_used + 1)
+ );
+
+ render->bitstream_buffers[render->bitstream_buffers_used].struct_version = VDP_BITSTREAM_BUFFER_VERSION;
+ render->bitstream_buffers[render->bitstream_buffers_used].bitstream = buf;
+ render->bitstream_buffers[render->bitstream_buffers_used].bitstream_bytes = buf_size;
+ render->bitstream_buffers_used++;
+}
+
+void ff_vdpau_h264_picture_complete(H264Context *h)
+{
+ MpegEncContext * s = &h->s;
+ struct vdpau_render_state * render;
+
+ render = (struct vdpau_render_state*)s->current_picture_ptr->data[0];
+ assert(render);
+
+ render->info.h264.slice_count = h->slice_num;
+ if (render->info.h264.slice_count < 1)
+ return;
+
+ for (int i = 0; i < 2; ++i) {
+ int foc = s->current_picture_ptr->field_poc[i];
+ if (foc == INT_MAX)
+ foc = 0;
+ render->info.h264.field_order_cnt[i] = foc;
+ }
+
+ render->info.h264.is_reference = s->current_picture_ptr->reference ? VDP_TRUE : VDP_FALSE;
+ render->info.h264.frame_num = h->frame_num;
+ render->info.h264.field_pic_flag = s->picture_structure != PICT_FRAME;
+ render->info.h264.bottom_field_flag = s->picture_structure == PICT_BOTTOM_FIELD;
+ render->info.h264.num_ref_frames = h->sps.ref_frame_count;
+ render->info.h264.mb_adaptive_frame_field_flag = h->sps.mb_aff;
+ render->info.h264.constrained_intra_pred_flag = h->pps.constrained_intra_pred;
+ render->info.h264.weighted_pred_flag = h->pps.weighted_pred;
+ render->info.h264.weighted_bipred_idc = h->pps.weighted_bipred_idc;
+ render->info.h264.frame_mbs_only_flag = h->sps.frame_mbs_only_flag;
+ render->info.h264.transform_8x8_mode_flag = h->pps.transform_8x8_mode;
+ render->info.h264.chroma_qp_index_offset = h->pps.chroma_qp_index_offset[0];
+ render->info.h264.second_chroma_qp_index_offset = h->pps.chroma_qp_index_offset[1];
+ render->info.h264.pic_init_qp_minus26 = h->pps.init_qp - 26;
+ render->info.h264.num_ref_idx_l0_active_minus1 = h->pps.ref_count[0] - 1;
+ render->info.h264.num_ref_idx_l1_active_minus1 = h->pps.ref_count[1] - 1;
+ render->info.h264.log2_max_frame_num_minus4 = h->sps.log2_max_frame_num - 4;
+ render->info.h264.pic_order_cnt_type = h->sps.poc_type;
+ render->info.h264.log2_max_pic_order_cnt_lsb_minus4 = h->sps.log2_max_poc_lsb - 4;
+ render->info.h264.delta_pic_order_always_zero_flag = h->sps.delta_pic_order_always_zero_flag;
+ render->info.h264.direct_8x8_inference_flag = h->sps.direct_8x8_inference_flag;
+ render->info.h264.entropy_coding_mode_flag = h->pps.cabac;
+ render->info.h264.pic_order_present_flag = h->pps.pic_order_present;
+ render->info.h264.deblocking_filter_control_present_flag = h->pps.deblocking_filter_parameters_present;
+ render->info.h264.redundant_pic_cnt_present_flag = h->pps.redundant_pic_cnt_present;
+ memcpy(render->info.h264.scaling_lists_4x4, h->pps.scaling_matrix4, sizeof(render->info.h264.scaling_lists_4x4));
+ memcpy(render->info.h264.scaling_lists_8x8, h->pps.scaling_matrix8, sizeof(render->info.h264.scaling_lists_8x8));
+
+ ff_draw_horiz_band(s, 0, s->avctx->height);
+ render->bitstream_buffers_used = 0;
+}
+
+/* @}*/
diff --git a/libavcodec/vorbis.c b/libavcodec/vorbis.c
index f39380a..b6243b8 100644
--- a/libavcodec/vorbis.c
+++ b/libavcodec/vorbis.c
@@ -111,7 +111,7 @@ int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num) {
return 0;
}
-void ff_vorbis_ready_floor1_list(floor1_entry_t * list, int values) {
+void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values) {
int i;
list[0].sort = 0;
list[1].sort = 1;
@@ -162,7 +162,7 @@ static void render_line(int x0, int y0, int x1, int y1, float * buf) {
}
}
-void ff_vorbis_floor1_render_list(floor1_entry_t * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {
+void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {
int lx, ly, i;
lx = 0;
ly = y_list[0] * multiplier;
diff --git a/libavcodec/vorbis.h b/libavcodec/vorbis.h
index b8a8410..dc99acc 100644
--- a/libavcodec/vorbis.h
+++ b/libavcodec/vorbis.h
@@ -31,12 +31,12 @@ typedef struct {
uint_fast16_t sort;
uint_fast16_t low;
uint_fast16_t high;
-} floor1_entry_t;
+} vorbis_floor1_entry;
-void ff_vorbis_ready_floor1_list(floor1_entry_t * list, int values);
+void ff_vorbis_ready_floor1_list(vorbis_floor1_entry * list, int values);
unsigned int ff_vorbis_nth_root(unsigned int x, unsigned int n); // x^(1/n)
int ff_vorbis_len2vlc(uint8_t *bits, uint32_t *codes, uint_fast32_t num);
-void ff_vorbis_floor1_render_list(floor1_entry_t * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples);
+void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples);
#define ilog(i) av_log2(2*(i))
diff --git a/libavcodec/vorbis_dec.c b/libavcodec/vorbis_dec.c
index 6c15cb8..16f6dec 100644
--- a/libavcodec/vorbis_dec.c
+++ b/libavcodec/vorbis_dec.c
@@ -90,7 +90,7 @@ typedef struct {
int_fast16_t subclass_books[16][8];
uint_fast8_t multiplier;
uint_fast16_t x_list_dim;
- floor1_entry_t * list;
+ vorbis_floor1_entry * list;
} t1;
} data;
} vorbis_floor;
@@ -511,7 +511,7 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
floor_setup->data.t1.x_list_dim+=floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];
}
- floor_setup->data.t1.list=av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(floor1_entry_t));
+ floor_setup->data.t1.list=av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(vorbis_floor1_entry));
rangebits=get_bits(gb, 4);
diff --git a/libavcodec/vorbis_enc.c b/libavcodec/vorbis_enc.c
index 0cb644b..43350de 100644
--- a/libavcodec/vorbis_enc.c
+++ b/libavcodec/vorbis_enc.c
@@ -48,25 +48,25 @@ typedef struct {
int * quantlist;
float * dimentions;
float * pow2;
-} codebook_t;
+} vorbis_enc_codebook;
typedef struct {
int dim;
int subclass;
int masterbook;
int * books;
-} floor_class_t;
+} vorbis_enc_floor_class;
typedef struct {
int partitions;
int * partition_to_class;
int nclasses;
- floor_class_t * classes;
+ vorbis_enc_floor_class * classes;
int multiplier;
int rangebits;
int values;
- floor1_entry_t * list;
-} floor_t;
+ vorbis_floor1_entry * list;
+} vorbis_enc_floor;
typedef struct {
int type;
@@ -77,7 +77,7 @@ typedef struct {
int classbook;
int8_t (*books)[8];
float (*maxes)[2];
-} residue_t;
+} vorbis_enc_residue;
typedef struct {
int submaps;
@@ -87,12 +87,12 @@ typedef struct {
int coupling_steps;
int * magnitude;
int * angle;
-} mapping_t;
+} vorbis_enc_mapping;
typedef struct {
int blockflag;
int mapping;
-} vorbis_mode_t;
+} vorbis_enc_mode;
typedef struct {
int channels;
@@ -108,24 +108,24 @@ typedef struct {
float quality;
int ncodebooks;
- codebook_t * codebooks;
+ vorbis_enc_codebook * codebooks;
int nfloors;
- floor_t * floors;
+ vorbis_enc_floor * floors;
int nresidues;
- residue_t * residues;
+ vorbis_enc_residue * residues;
int nmappings;
- mapping_t * mappings;
+ vorbis_enc_mapping * mappings;
int nmodes;
- vorbis_mode_t * modes;
+ vorbis_enc_mode * modes;
int64_t sample_count;
-} venc_context_t;
+} vorbis_enc_context;
-static inline void put_codeword(PutBitContext * pb, codebook_t * cb, int entry) {
+static inline void put_codeword(PutBitContext * pb, vorbis_enc_codebook * cb, int entry) {
assert(entry >= 0);
assert(entry < cb->nentries);
assert(cb->lens[entry]);
@@ -138,7 +138,7 @@ static int cb_lookup_vals(int lookup, int dimentions, int entries) {
return 0;
}
-static void ready_codebook(codebook_t * cb) {
+static void ready_codebook(vorbis_enc_codebook * cb) {
int i;
ff_vorbis_len2vlc(cb->lens, cb->codewords, cb->nentries);
@@ -171,13 +171,13 @@ static void ready_codebook(codebook_t * cb) {
}
}
-static void ready_residue(residue_t * rc, venc_context_t * venc) {
+static void ready_residue(vorbis_enc_residue * rc, vorbis_enc_context * venc) {
int i;
assert(rc->type == 2);
rc->maxes = av_mallocz(sizeof(float[2]) * rc->classifications);
for (i = 0; i < rc->classifications; i++) {
int j;
- codebook_t * cb;
+ vorbis_enc_codebook * cb;
for (j = 0; j < 8; j++)
if (rc->books[i][j] != -1) break;
if (j == 8) continue; // zero
@@ -203,10 +203,10 @@ static void ready_residue(residue_t * rc, venc_context_t * venc) {
}
}
-static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccontext) {
- floor_t * fc;
- residue_t * rc;
- mapping_t * mc;
+static void create_vorbis_context(vorbis_enc_context * venc, AVCodecContext * avccontext) {
+ vorbis_enc_floor * fc;
+ vorbis_enc_residue * rc;
+ vorbis_enc_mapping * mc;
int i, book;
venc->channels = avccontext->channels;
@@ -214,13 +214,13 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11;
venc->ncodebooks = FF_ARRAY_ELEMS(cvectors);
- venc->codebooks = av_malloc(sizeof(codebook_t) * venc->ncodebooks);
+ venc->codebooks = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks);
// codebook 0..14 - floor1 book, values 0..255
// codebook 15 residue masterbook
// codebook 16..29 residue
for (book = 0; book < venc->ncodebooks; book++) {
- codebook_t * cb = &venc->codebooks[book];
+ vorbis_enc_codebook * cb = &venc->codebooks[book];
int vals;
cb->ndimentions = cvectors[book].dim;
cb->nentries = cvectors[book].real_len;
@@ -246,7 +246,7 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
}
venc->nfloors = 1;
- venc->floors = av_malloc(sizeof(floor_t) * venc->nfloors);
+ venc->floors = av_malloc(sizeof(vorbis_enc_floor) * venc->nfloors);
// just 1 floor
fc = &venc->floors[0];
@@ -259,9 +259,9 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
fc->nclasses = FFMAX(fc->nclasses, fc->partition_to_class[i]);
}
fc->nclasses++;
- fc->classes = av_malloc(sizeof(floor_class_t) * fc->nclasses);
+ fc->classes = av_malloc(sizeof(vorbis_enc_floor_class) * fc->nclasses);
for (i = 0; i < fc->nclasses; i++) {
- floor_class_t * c = &fc->classes[i];
+ vorbis_enc_floor_class * c = &fc->classes[i];
int j, books;
c->dim = floor_classes[i].dim;
c->subclass = floor_classes[i].subclass;
@@ -278,7 +278,7 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
for (i = 0; i < fc->partitions; i++)
fc->values += fc->classes[fc->partition_to_class[i]].dim;
- fc->list = av_malloc(sizeof(floor1_entry_t) * fc->values);
+ fc->list = av_malloc(sizeof(vorbis_floor1_entry) * fc->values);
fc->list[0].x = 0;
fc->list[1].x = 1 << fc->rangebits;
for (i = 2; i < fc->values; i++) {
@@ -292,7 +292,7 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
ff_vorbis_ready_floor1_list(fc->list, fc->values);
venc->nresidues = 1;
- venc->residues = av_malloc(sizeof(residue_t) * venc->nresidues);
+ venc->residues = av_malloc(sizeof(vorbis_enc_residue) * venc->nresidues);
// single residue
rc = &venc->residues[0];
@@ -321,7 +321,7 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
ready_residue(rc, venc);
venc->nmappings = 1;
- venc->mappings = av_malloc(sizeof(mapping_t) * venc->nmappings);
+ venc->mappings = av_malloc(sizeof(vorbis_enc_mapping) * venc->nmappings);
// single mapping
mc = &venc->mappings[0];
@@ -344,7 +344,7 @@ static void create_vorbis_context(venc_context_t * venc, AVCodecContext * avccon
}
venc->nmodes = 1;
- venc->modes = av_malloc(sizeof(vorbis_mode_t) * venc->nmodes);
+ venc->modes = av_malloc(sizeof(vorbis_enc_mode) * venc->nmodes);
// single mode
venc->modes[0].blockflag = 0;
@@ -373,7 +373,7 @@ static void put_float(PutBitContext * pb, float f) {
put_bits(pb, 32, res);
}
-static void put_codebook_header(PutBitContext * pb, codebook_t * cb) {
+static void put_codebook_header(PutBitContext * pb, vorbis_enc_codebook * cb) {
int i;
int ordered = 0;
@@ -432,7 +432,7 @@ static void put_codebook_header(PutBitContext * pb, codebook_t * cb) {
}
}
-static void put_floor_header(PutBitContext * pb, floor_t * fc) {
+static void put_floor_header(PutBitContext * pb, vorbis_enc_floor * fc) {
int i;
put_bits(pb, 16, 1); // type, only floor1 is supported
@@ -464,7 +464,7 @@ static void put_floor_header(PutBitContext * pb, floor_t * fc) {
put_bits(pb, fc->rangebits, fc->list[i].x);
}
-static void put_residue_header(PutBitContext * pb, residue_t * rc) {
+static void put_residue_header(PutBitContext * pb, vorbis_enc_residue * rc) {
int i;
put_bits(pb, 16, rc->type);
@@ -495,7 +495,7 @@ static void put_residue_header(PutBitContext * pb, residue_t * rc) {
}
}
-static int put_main_header(venc_context_t * venc, uint8_t ** out) {
+static int put_main_header(vorbis_enc_context * venc, uint8_t ** out) {
int i;
PutBitContext pb;
uint8_t buffer[50000] = {0}, * p = buffer;
@@ -564,7 +564,7 @@ static int put_main_header(venc_context_t * venc, uint8_t ** out) {
// mappings
put_bits(&pb, 6, venc->nmappings - 1);
for (i = 0; i < venc->nmappings; i++) {
- mapping_t * mc = &venc->mappings[i];
+ vorbis_enc_mapping * mc = &venc->mappings[i];
int j;
put_bits(&pb, 16, 0); // mapping type
@@ -624,7 +624,7 @@ static int put_main_header(venc_context_t * venc, uint8_t ** out) {
return p - *out;
}
-static float get_floor_average(floor_t * fc, float * coeffs, int i) {
+static float get_floor_average(vorbis_enc_floor * fc, float * coeffs, int i) {
int begin = fc->list[fc->list[FFMAX(i-1, 0)].sort].x;
int end = fc->list[fc->list[FFMIN(i+1, fc->values - 1)].sort].x;
int j;
@@ -635,7 +635,7 @@ static float get_floor_average(floor_t * fc, float * coeffs, int i) {
return average / (end - begin);
}
-static void floor_fit(venc_context_t * venc, floor_t * fc, float * coeffs, uint_fast16_t * posts, int samples) {
+static void floor_fit(vorbis_enc_context * venc, vorbis_enc_floor * fc, float * coeffs, uint_fast16_t * posts, int samples) {
int range = 255 / fc->multiplier + 1;
int i;
float tot_average = 0.;
@@ -663,7 +663,7 @@ static int render_point(int x0, int y0, int x1, int y1, int x) {
return y0 + (x - x0) * (y1 - y0) / (x1 - x0);
}
-static void floor_encode(venc_context_t * venc, floor_t * fc, PutBitContext * pb, uint_fast16_t * posts, float * floor, int samples) {
+static void floor_encode(vorbis_enc_context * venc, vorbis_enc_floor * fc, PutBitContext * pb, uint_fast16_t * posts, float * floor, int samples) {
int range = 255 / fc->multiplier + 1;
int coded[fc->values]; // first 2 values are unused
int i, counter;
@@ -704,10 +704,10 @@ static void floor_encode(venc_context_t * venc, floor_t * fc, PutBitContext * pb
counter = 2;
for (i = 0; i < fc->partitions; i++) {
- floor_class_t * c = &fc->classes[fc->partition_to_class[i]];
+ vorbis_enc_floor_class * c = &fc->classes[fc->partition_to_class[i]];
int k, cval = 0, csub = 1<<c->subclass;
if (c->subclass) {
- codebook_t * book = &venc->codebooks[c->masterbook];
+ vorbis_enc_codebook * book = &venc->codebooks[c->masterbook];
int cshift = 0;
for (k = 0; k < c->dim; k++) {
int l;
@@ -737,7 +737,7 @@ static void floor_encode(venc_context_t * venc, floor_t * fc, PutBitContext * pb
ff_vorbis_floor1_render_list(fc->list, fc->values, posts, coded, fc->multiplier, floor, samples);
}
-static float * put_vector(codebook_t * book, PutBitContext * pb, float * num) {
+static float * put_vector(vorbis_enc_codebook * book, PutBitContext * pb, float * num) {
int i, entry = -1;
float distance = FLT_MAX;
assert(book->dimentions);
@@ -756,7 +756,7 @@ static float * put_vector(codebook_t * book, PutBitContext * pb, float * num) {
return &book->dimentions[entry * book->ndimentions];
}
-static void residue_encode(venc_context_t * venc, residue_t * rc, PutBitContext * pb, float * coeffs, int samples, int real_ch) {
+static void residue_encode(vorbis_enc_context * venc, vorbis_enc_residue * rc, PutBitContext * pb, float * coeffs, int samples, int real_ch) {
int pass, i, j, p, k;
int psize = rc->partition_size;
int partitions = (rc->end - rc->begin) / psize;
@@ -785,7 +785,7 @@ static void residue_encode(venc_context_t * venc, residue_t * rc, PutBitContext
while (p < partitions) {
if (pass == 0)
for (j = 0; j < channels; j++) {
- codebook_t * book = &venc->codebooks[rc->classbook];
+ vorbis_enc_codebook * book = &venc->codebooks[rc->classbook];
int entry = 0;
for (i = 0; i < classwords; i++) {
entry *= rc->classifications;
@@ -796,7 +796,7 @@ static void residue_encode(venc_context_t * venc, residue_t * rc, PutBitContext
for (i = 0; i < classwords && p < partitions; i++, p++) {
for (j = 0; j < channels; j++) {
int nbook = rc->books[classes[j][p]][pass];
- codebook_t * book = &venc->codebooks[nbook];
+ vorbis_enc_codebook * book = &venc->codebooks[nbook];
float * buf = coeffs + samples*j + rc->begin + p*psize;
if (nbook == -1) continue;
@@ -841,7 +841,7 @@ static void residue_encode(venc_context_t * venc, residue_t * rc, PutBitContext
}
}
-static int apply_window_and_mdct(venc_context_t * venc, signed short * audio, int samples) {
+static int apply_window_and_mdct(vorbis_enc_context * venc, signed short * audio, int samples) {
int i, j, channel;
const float * win = venc->win[0];
int window_len = 1 << (venc->log2_blocksize[0] - 1);
@@ -893,7 +893,7 @@ static int apply_window_and_mdct(venc_context_t * venc, signed short * audio, in
static av_cold int vorbis_encode_init(AVCodecContext * avccontext)
{
- venc_context_t * venc = avccontext->priv_data;
+ vorbis_enc_context * venc = avccontext->priv_data;
if (avccontext->channels != 2) {
av_log(avccontext, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n");
@@ -920,11 +920,11 @@ static av_cold int vorbis_encode_init(AVCodecContext * avccontext)
static int vorbis_encode_frame(AVCodecContext * avccontext, unsigned char * packets, int buf_size, void *data)
{
- venc_context_t * venc = avccontext->priv_data;
+ vorbis_enc_context * venc = avccontext->priv_data;
signed short * audio = data;
int samples = data ? avccontext->frame_size : 0;
- vorbis_mode_t * mode;
- mapping_t * mapping;
+ vorbis_enc_mode * mode;
+ vorbis_enc_mapping * mapping;
PutBitContext pb;
int i;
@@ -945,7 +945,7 @@ static int vorbis_encode_frame(AVCodecContext * avccontext, unsigned char * pack
}
for (i = 0; i < venc->channels; i++) {
- floor_t * fc = &venc->floors[mapping->floor[mapping->mux[i]]];
+ vorbis_enc_floor * fc = &venc->floors[mapping->floor[mapping->mux[i]]];
uint_fast16_t posts[fc->values];
floor_fit(venc, fc, &venc->coeffs[i * samples], posts, samples);
floor_encode(venc, fc, &pb, posts, &venc->floor[i * samples], samples);
@@ -978,7 +978,7 @@ static int vorbis_encode_frame(AVCodecContext * avccontext, unsigned char * pack
static av_cold int vorbis_encode_close(AVCodecContext * avccontext)
{
- venc_context_t * venc = avccontext->priv_data;
+ vorbis_enc_context * venc = avccontext->priv_data;
int i;
if (venc->codebooks)
@@ -1040,7 +1040,7 @@ AVCodec vorbis_encoder = {
"vorbis",
CODEC_TYPE_AUDIO,
CODEC_ID_VORBIS,
- sizeof(venc_context_t),
+ sizeof(vorbis_enc_context),
vorbis_encode_init,
vorbis_encode_frame,
vorbis_encode_close,
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index a5b97ad..2737025 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -1402,14 +1402,14 @@ static void render_slice(Vp3DecodeContext *s, int slice)
/* dequantize the DCT coefficients */
if(s->avctx->idct_algo==FF_IDCT_VP3){
Coeff *coeff= s->coeffs + i;
- memset(block, 0, sizeof(block));
+ s->dsp.clear_block(block);
while(coeff->next){
block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
coeff= coeff->next;
}
}else{
Coeff *coeff= s->coeffs + i;
- memset(block, 0, sizeof(block));
+ s->dsp.clear_block(block);
while(coeff->next){
block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
coeff= coeff->next;
diff --git a/libavcodec/vp5.c b/libavcodec/vp5.c
index 4f9d42f..e47db0d 100644
--- a/libavcodec/vp5.c
+++ b/libavcodec/vp5.c
@@ -33,10 +33,10 @@
#include "vp5data.h"
-static int vp5_parse_header(vp56_context_t *s, const uint8_t *buf, int buf_size,
+static int vp5_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
int *golden_frame)
{
- vp56_range_coder_t *c = &s->c;
+ VP56RangeCoder *c = &s->c;
int rows, cols;
vp56_init_range_decoder(&s->c, buf, buf_size);
@@ -58,7 +58,8 @@ static int vp5_parse_header(vp56_context_t *s, const uint8_t *buf, int buf_size,
vp56_rac_gets(c, 8); /* number of displayed macroblock rows */
vp56_rac_gets(c, 8); /* number of displayed macroblock cols */
vp56_rac_gets(c, 2);
- if (16*cols != s->avctx->coded_width ||
+ if (!s->macroblocks || /* first frame */
+ 16*cols != s->avctx->coded_width ||
16*rows != s->avctx->coded_height) {
avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
return 2;
@@ -84,10 +85,10 @@ static int vp5_adjust(int v, int t)
return v;
}
-static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+static void vp5_parse_vector_adjustment(VP56Context *s, VP56mv *vect)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int comp, di;
for (comp=0; comp<2; comp++) {
@@ -108,10 +109,10 @@ static void vp5_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
}
}
-static void vp5_parse_vector_models(vp56_context_t *s)
+static void vp5_parse_vector_models(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int comp, node;
for (comp=0; comp<2; comp++) {
@@ -131,10 +132,10 @@ static void vp5_parse_vector_models(vp56_context_t *s)
model->vector_pdv[comp][node] = vp56_rac_gets_nn(c, 7);
}
-static void vp5_parse_coeff_models(vp56_context_t *s)
+static void vp5_parse_coeff_models(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
uint8_t def_prob[11];
int node, cg, ctx;
int ct; /* code type */
@@ -177,10 +178,10 @@ static void vp5_parse_coeff_models(vp56_context_t *s)
model->coeff_acct[pt][ct][cg][ctx][node] = av_clip(((model->coeff_ract[pt][ct][cg][node] * vp5_ract_lc[ct][cg][node][ctx][0] + 128) >> 8) + vp5_ract_lc[ct][cg][node][ctx][1], 1, 254);
}
-static void vp5_parse_coeff(vp56_context_t *s)
+static void vp5_parse_coeff(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
uint8_t *permute = s->scantable.permutated;
uint8_t *model1, *model2;
int coeff, sign, coeff_idx;
@@ -250,9 +251,9 @@ static void vp5_parse_coeff(vp56_context_t *s)
}
}
-static void vp5_default_models_init(vp56_context_t *s)
+static void vp5_default_models_init(VP56Context *s)
{
- vp56_model_t *model = s->modelp;
+ VP56Model *model = s->modelp;
int i;
for (i=0; i<2; i++) {
@@ -267,7 +268,7 @@ static void vp5_default_models_init(vp56_context_t *s)
static av_cold int vp5_decode_init(AVCodecContext *avctx)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
vp56_init(avctx, 1, 0);
s->vp56_coord_div = vp5_coord_div;
@@ -286,7 +287,7 @@ AVCodec vp5_decoder = {
"vp5",
CODEC_TYPE_VIDEO,
CODEC_ID_VP5,
- sizeof(vp56_context_t),
+ sizeof(VP56Context),
vp5_decode_init,
NULL,
vp56_free,
diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c
index c9daaf7..79667cf 100644
--- a/libavcodec/vp56.c
+++ b/libavcodec/vp56.c
@@ -28,20 +28,20 @@
#include "vp56data.h"
-void vp56_init_dequant(vp56_context_t *s, int quantizer)
+void vp56_init_dequant(VP56Context *s, int quantizer)
{
s->quantizer = quantizer;
s->dequant_dc = vp56_dc_dequant[quantizer] << 2;
s->dequant_ac = vp56_ac_dequant[quantizer] << 2;
}
-static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
- vp56_frame_t ref_frame)
+static int vp56_get_vectors_predictors(VP56Context *s, int row, int col,
+ VP56Frame ref_frame)
{
int nb_pred = 0;
- vp56_mv_t vect[2] = {{0,0}, {0,0}};
+ VP56mv vect[2] = {{0,0}, {0,0}};
int pos, offset;
- vp56_mv_t mvp;
+ VP56mv mvp;
for (pos=0; pos<12; pos++) {
mvp.x = col + vp56_candidate_predictor_pos[pos][0];
@@ -73,10 +73,10 @@ static int vp56_get_vectors_predictors(vp56_context_t *s, int row, int col,
return nb_pred+1;
}
-static void vp56_parse_mb_type_models(vp56_context_t *s)
+static void vp56_parse_mb_type_models(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int i, ctx, type;
for (ctx=0; ctx<3; ctx++) {
@@ -144,11 +144,11 @@ static void vp56_parse_mb_type_models(vp56_context_t *s)
}
}
-static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
- vp56_mb_t prev_type, int ctx)
+static VP56mb vp56_parse_mb_type(VP56Context *s,
+ VP56mb prev_type, int ctx)
{
uint8_t *mb_type_model = s->modelp->mb_type[ctx][prev_type];
- vp56_range_coder_t *c = &s->c;
+ VP56RangeCoder *c = &s->c;
if (vp56_rac_get_prob(c, mb_type_model[0]))
return prev_type;
@@ -156,9 +156,9 @@ static vp56_mb_t vp56_parse_mb_type(vp56_context_t *s,
return vp56_rac_get_tree(c, vp56_pmbt_tree, mb_type_model);
}
-static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
+static void vp56_decode_4mv(VP56Context *s, int row, int col)
{
- vp56_mv_t mv = {0,0};
+ VP56mv mv = {0,0};
int type[4];
int b;
@@ -173,7 +173,7 @@ static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
for (b=0; b<4; b++) {
switch (type[b]) {
case VP56_MB_INTER_NOVEC_PF:
- s->mv[b] = (vp56_mv_t) {0,0};
+ s->mv[b] = (VP56mv) {0,0};
break;
case VP56_MB_INTER_DELTA_PF:
s->parse_vector_adjustment(s, &s->mv[b]);
@@ -197,13 +197,13 @@ static void vp56_decode_4mv(vp56_context_t *s, int row, int col)
s->mv[4].x = s->mv[5].x = RSHIFT(mv.x,2);
s->mv[4].y = s->mv[5].y = RSHIFT(mv.y,2);
} else {
- s->mv[4] = s->mv[5] = (vp56_mv_t) {mv.x/4, mv.y/4};
+ s->mv[4] = s->mv[5] = (VP56mv) {mv.x/4, mv.y/4};
}
}
-static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
+static VP56mb vp56_decode_mv(VP56Context *s, int row, int col)
{
- vp56_mv_t *mv, vect = {0,0};
+ VP56mv *mv, vect = {0,0};
int ctx, b;
ctx = vp56_get_vectors_predictors(s, row, col, VP56_FRAME_PREVIOUS);
@@ -258,14 +258,14 @@ static vp56_mb_t vp56_decode_mv(vp56_context_t *s, int row, int col)
return s->mb_type;
}
-static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
+static void vp56_add_predictors_dc(VP56Context *s, VP56Frame ref_frame)
{
int idx = s->scantable.permutated[0];
int b;
for (b=0; b<6; b++) {
- vp56_ref_dc_t *ab = &s->above_blocks[s->above_block_idx[b]];
- vp56_ref_dc_t *lb = &s->left_block[vp56_b6to4[b]];
+ VP56RefDc *ab = &s->above_blocks[s->above_block_idx[b]];
+ VP56RefDc *lb = &s->left_block[vp56_b6to4[b]];
int count = 0;
int dc = 0;
int i;
@@ -299,7 +299,7 @@ static void vp56_add_predictors_dc(vp56_context_t *s, vp56_frame_t ref_frame)
}
}
-static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
+static void vp56_edge_filter(VP56Context *s, uint8_t *yuv,
int pix_inc, int line_inc, int t)
{
int pix2_inc = 2 * pix_inc;
@@ -314,7 +314,7 @@ static void vp56_edge_filter(vp56_context_t *s, uint8_t *yuv,
}
}
-static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
+static void vp56_deblock_filter(VP56Context *s, uint8_t *yuv,
int stride, int dx, int dy)
{
int t = vp56_filter_threshold[s->quantizer];
@@ -322,7 +322,7 @@ static void vp56_deblock_filter(vp56_context_t *s, uint8_t *yuv,
if (dy) vp56_edge_filter(s, yuv + stride*(10-dy), stride, 1, t);
}
-static void vp56_mc(vp56_context_t *s, int b, int plane, uint8_t *src,
+static void vp56_mc(VP56Context *s, int b, int plane, uint8_t *src,
int stride, int x, int y)
{
uint8_t *dst=s->framep[VP56_FRAME_CURRENT]->data[plane]+s->block_offset[b];
@@ -392,11 +392,11 @@ static void vp56_mc(vp56_context_t *s, int b, int plane, uint8_t *src,
}
}
-static void vp56_decode_mb(vp56_context_t *s, int row, int col, int is_alpha)
+static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha)
{
AVFrame *frame_current, *frame_ref;
- vp56_mb_t mb_type;
- vp56_frame_t ref_frame;
+ VP56mb mb_type;
+ VP56Frame ref_frame;
int b, ab, b_max, plane, off;
if (s->framep[VP56_FRAME_CURRENT]->key_frame)
@@ -405,7 +405,7 @@ static void vp56_decode_mb(vp56_context_t *s, int row, int col, int is_alpha)
mb_type = vp56_decode_mv(s, row, col);
ref_frame = vp56_reference_frame[mb_type];
- memset(s->block_coeff, 0, sizeof(s->block_coeff));
+ s->dsp.clear_blocks(*s->block_coeff);
s->parse_coeff(s);
@@ -461,7 +461,7 @@ static void vp56_decode_mb(vp56_context_t *s, int row, int col, int is_alpha)
static int vp56_size_changed(AVCodecContext *avctx)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
int stride = s->framep[VP56_FRAME_CURRENT]->linesize[0];
int i;
@@ -497,7 +497,7 @@ static int vp56_size_changed(AVCodecContext *avctx)
int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf, int buf_size)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
AVFrame *const p = s->framep[VP56_FRAME_CURRENT];
int remaining_buf_size = buf_size;
int is_alpha, alpha_offset;
@@ -646,7 +646,7 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
int i;
s->avctx = avctx;
@@ -657,8 +657,6 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
dsputil_init(&s->dsp, avctx);
ff_init_scantable(s->dsp.idct_permutation, &s->scantable,ff_zigzag_direct);
- avcodec_set_dimensions(avctx, 0, 0);
-
for (i=0; i<4; i++)
s->framep[i] = &s->frames[i];
s->framep[VP56_FRAME_UNUSED] = s->framep[VP56_FRAME_GOLDEN];
@@ -686,7 +684,7 @@ av_cold void vp56_init(AVCodecContext *avctx, int flip, int has_alpha)
av_cold int vp56_free(AVCodecContext *avctx)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
av_free(s->above_blocks);
av_free(s->macroblocks);
diff --git a/libavcodec/vp56.h b/libavcodec/vp56.h
index 991e94b..aaa18dd 100644
--- a/libavcodec/vp56.h
+++ b/libavcodec/vp56.h
@@ -30,34 +30,34 @@
#include "bytestream.h"
-typedef struct vp56_context vp56_context_t;
-typedef struct vp56_mv vp56_mv_t;
-
-typedef void (*vp56_parse_vector_adjustment_t)(vp56_context_t *s,
- vp56_mv_t *vect);
-typedef int (*vp56_adjust_t)(int v, int t);
-typedef void (*vp56_filter_t)(vp56_context_t *s, uint8_t *dst, uint8_t *src,
- int offset1, int offset2, int stride,
- vp56_mv_t mv, int mask, int select, int luma);
-typedef void (*vp56_parse_coeff_t)(vp56_context_t *s);
-typedef void (*vp56_default_models_init_t)(vp56_context_t *s);
-typedef void (*vp56_parse_vector_models_t)(vp56_context_t *s);
-typedef void (*vp56_parse_coeff_models_t)(vp56_context_t *s);
-typedef int (*vp56_parse_header_t)(vp56_context_t *s, const uint8_t *buf,
- int buf_size, int *golden_frame);
+typedef struct vp56_context VP56Context;
+typedef struct vp56_mv VP56mv;
+
+typedef void (*VP56ParseVectorAdjustment)(VP56Context *s,
+ VP56mv *vect);
+typedef int (*VP56Adjust)(int v, int t);
+typedef void (*VP56Filter)(VP56Context *s, uint8_t *dst, uint8_t *src,
+ int offset1, int offset2, int stride,
+ VP56mv mv, int mask, int select, int luma);
+typedef void (*VP56ParseCoeff)(VP56Context *s);
+typedef void (*VP56DefaultModelsInit)(VP56Context *s);
+typedef void (*VP56ParseVectorModels)(VP56Context *s);
+typedef void (*VP56ParseCoeffModels)(VP56Context *s);
+typedef int (*VP56ParseHeader)(VP56Context *s, const uint8_t *buf,
+ int buf_size, int *golden_frame);
typedef struct {
int high;
int bits;
const uint8_t *buffer;
unsigned long code_word;
-} vp56_range_coder_t;
+} VP56RangeCoder;
typedef struct {
uint8_t not_null_dc;
- vp56_frame_t ref_frame;
+ VP56Frame ref_frame;
DCTELEM dc_coeff;
-} vp56_ref_dc_t;
+} VP56RefDc;
struct vp56_mv {
int x;
@@ -66,8 +66,8 @@ struct vp56_mv {
typedef struct {
uint8_t type;
- vp56_mv_t mv;
-} vp56_macroblock_t;
+ VP56mv mv;
+} VP56Macroblock;
typedef struct {
uint8_t coeff_reorder[64]; /* used in vp6 only */
@@ -84,7 +84,7 @@ typedef struct {
uint8_t coeff_runv[2][14]; /* run value (vp6 only) */
uint8_t mb_type[3][10][10]; /* model for decoding MB type */
uint8_t mb_types_stats[3][10][2];/* contextual, next MB type stats */
-} vp56_model_t;
+} VP56Model;
struct vp56_context {
AVCodecContext *avctx;
@@ -94,9 +94,9 @@ struct vp56_context {
AVFrame *framep[6];
uint8_t *edge_emu_buffer_alloc;
uint8_t *edge_emu_buffer;
- vp56_range_coder_t c;
- vp56_range_coder_t cc;
- vp56_range_coder_t *ccp;
+ VP56RangeCoder c;
+ VP56RangeCoder cc;
+ VP56RangeCoder *ccp;
int sub_version;
/* frame info */
@@ -111,19 +111,19 @@ struct vp56_context {
uint16_t dequant_ac;
/* DC predictors management */
- vp56_ref_dc_t *above_blocks;
- vp56_ref_dc_t left_block[4];
+ VP56RefDc *above_blocks;
+ VP56RefDc left_block[4];
int above_block_idx[6];
DCTELEM prev_dc[3][3]; /* [plan][ref_frame] */
/* blocks / macroblock */
- vp56_mb_t mb_type;
- vp56_macroblock_t *macroblocks;
+ VP56mb mb_type;
+ VP56Macroblock *macroblocks;
DECLARE_ALIGNED_16(DCTELEM, block_coeff[6][64]);
/* motion vectors */
- vp56_mv_t mv[6]; /* vectors for each block in MB */
- vp56_mv_t vector_candidate[2];
+ VP56mv mv[6]; /* vectors for each block in MB */
+ VP56mv vector_candidate[2];
int vector_candidate_pos;
/* filtering hints */
@@ -146,17 +146,17 @@ struct vp56_context {
int stride[4]; /* stride for each plan */
const uint8_t *vp56_coord_div;
- vp56_parse_vector_adjustment_t parse_vector_adjustment;
- vp56_adjust_t adjust;
- vp56_filter_t filter;
- vp56_parse_coeff_t parse_coeff;
- vp56_default_models_init_t default_models_init;
- vp56_parse_vector_models_t parse_vector_models;
- vp56_parse_coeff_models_t parse_coeff_models;
- vp56_parse_header_t parse_header;
-
- vp56_model_t *modelp;
- vp56_model_t models[2];
+ VP56ParseVectorAdjustment parse_vector_adjustment;
+ VP56Adjust adjust;
+ VP56Filter filter;
+ VP56ParseCoeff parse_coeff;
+ VP56DefaultModelsInit default_models_init;
+ VP56ParseVectorModels parse_vector_models;
+ VP56ParseCoeffModels parse_coeff_models;
+ VP56ParseHeader parse_header;
+
+ VP56Model *modelp;
+ VP56Model models[2];
/* huffman decoding */
int use_huffman;
@@ -170,7 +170,7 @@ struct vp56_context {
void vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
int vp56_free(AVCodecContext *avctx);
-void vp56_init_dequant(vp56_context_t *s, int quantizer);
+void vp56_init_dequant(VP56Context *s, int quantizer);
int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
const uint8_t *buf, int buf_size);
@@ -179,7 +179,7 @@ int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
* vp56 specific range coder implementation
*/
-static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
+static inline void vp56_init_range_decoder(VP56RangeCoder *c,
const uint8_t *buf, int buf_size)
{
c->high = 255;
@@ -188,7 +188,7 @@ static inline void vp56_init_range_decoder(vp56_range_coder_t *c,
c->code_word = bytestream_get_be16(&c->buffer);
}
-static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
+static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
{
unsigned int low = 1 + (((c->high - 1) * prob) / 256);
unsigned int low_shift = low << 8;
@@ -213,7 +213,7 @@ static inline int vp56_rac_get_prob(vp56_range_coder_t *c, uint8_t prob)
return bit;
}
-static inline int vp56_rac_get(vp56_range_coder_t *c)
+static inline int vp56_rac_get(VP56RangeCoder *c)
{
/* equiprobable */
int low = (c->high + 1) >> 1;
@@ -235,7 +235,7 @@ static inline int vp56_rac_get(vp56_range_coder_t *c)
return bit;
}
-static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
+static inline int vp56_rac_gets(VP56RangeCoder *c, int bits)
{
int value = 0;
@@ -246,14 +246,14 @@ static inline int vp56_rac_gets(vp56_range_coder_t *c, int bits)
return value;
}
-static inline int vp56_rac_gets_nn(vp56_range_coder_t *c, int bits)
+static inline int vp56_rac_gets_nn(VP56RangeCoder *c, int bits)
{
int v = vp56_rac_gets(c, 7) << 1;
return v + !v;
}
-static inline int vp56_rac_get_tree(vp56_range_coder_t *c,
- const vp56_tree_t *tree,
+static inline int vp56_rac_get_tree(VP56RangeCoder *c,
+ const VP56Tree *tree,
const uint8_t *probs)
{
while (tree->val > 0) {
diff --git a/libavcodec/vp56data.c b/libavcodec/vp56data.c
index a7171c6..9b98014 100644
--- a/libavcodec/vp56data.c
+++ b/libavcodec/vp56data.c
@@ -44,7 +44,7 @@ const uint8_t vp56_def_mb_types_stats[3][10][2] = {
{ 1, 2 }, { 0, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }, },
};
-const vp56_tree_t vp56_pva_tree[] = {
+const VP56Tree vp56_pva_tree[] = {
{ 8, 0},
{ 4, 1},
{ 2, 2}, {-0}, {-1},
@@ -54,7 +54,7 @@ const vp56_tree_t vp56_pva_tree[] = {
{ 2, 6}, {-6}, {-7},
};
-const vp56_tree_t vp56_pc_tree[] = {
+const VP56Tree vp56_pc_tree[] = {
{ 4, 6},
{ 2, 7}, {-0}, {-1},
{ 4, 8},
diff --git a/libavcodec/vp56data.h b/libavcodec/vp56data.h
index 95dc633..92f2512 100644
--- a/libavcodec/vp56data.h
+++ b/libavcodec/vp56data.h
@@ -34,7 +34,7 @@ typedef enum {
VP56_FRAME_GOLDEN2 = 3,
VP56_FRAME_UNUSED = 4,
VP56_FRAME_UNUSED2 = 5,
-} vp56_frame_t;
+} VP56Frame;
typedef enum {
VP56_MB_INTER_NOVEC_PF = 0, /**< Inter MB, no vector, from previous frame */
@@ -47,23 +47,23 @@ typedef enum {
VP56_MB_INTER_4V = 7, /**< Inter MB, 4 vectors, from previous frame */
VP56_MB_INTER_V1_GF = 8, /**< Inter MB, first vector, from golden frame */
VP56_MB_INTER_V2_GF = 9, /**< Inter MB, second vector, from golden frame */
-} vp56_mb_t;
+} VP56mb;
typedef struct {
int8_t val;
int8_t prob_idx;
-} vp56_tree_t;
+} VP56Tree;
extern const uint8_t vp56_b2p[];
extern const uint8_t vp56_b6to4[];
extern const uint8_t vp56_coeff_parse_table[6][11];
extern const uint8_t vp56_def_mb_types_stats[3][10][2];
-extern const vp56_tree_t vp56_pva_tree[];
-extern const vp56_tree_t vp56_pc_tree[];
+extern const VP56Tree vp56_pva_tree[];
+extern const VP56Tree vp56_pc_tree[];
extern const uint8_t vp56_coeff_bias[];
extern const uint8_t vp56_coeff_bit_length[];
-static const vp56_frame_t vp56_reference_frame[] = {
+static const VP56Frame vp56_reference_frame[] = {
VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_NOVEC_PF */
VP56_FRAME_CURRENT, /* VP56_MB_INTRA */
VP56_FRAME_PREVIOUS, /* VP56_MB_INTER_DELTA_PF */
@@ -212,7 +212,7 @@ static const uint8_t vp56_mb_type_model_model[] = {
171, 83, 199, 140, 125, 104,
};
-static const vp56_tree_t vp56_pmbtm_tree[] = {
+static const VP56Tree vp56_pmbtm_tree[] = {
{ 4, 0},
{ 2, 1}, {-8}, {-4},
{ 8, 2},
@@ -221,7 +221,7 @@ static const vp56_tree_t vp56_pmbtm_tree[] = {
{ 2, 5}, {-24}, {-20}, {-16}, {-12}, {-0},
};
-static const vp56_tree_t vp56_pmbt_tree[] = {
+static const VP56Tree vp56_pmbt_tree[] = {
{ 8, 1},
{ 4, 2},
{ 2, 4}, {-VP56_MB_INTER_NOVEC_PF}, {-VP56_MB_INTER_DELTA_PF},
diff --git a/libavcodec/vp6.c b/libavcodec/vp6.c
index bea7c9e..6ffde07 100644
--- a/libavcodec/vp6.c
+++ b/libavcodec/vp6.c
@@ -37,13 +37,13 @@
#include "vp6data.h"
-static void vp6_parse_coeff(vp56_context_t *s);
-static void vp6_parse_coeff_huffman(vp56_context_t *s);
+static void vp6_parse_coeff(VP56Context *s);
+static void vp6_parse_coeff_huffman(VP56Context *s);
-static int vp6_parse_header(vp56_context_t *s, const uint8_t *buf, int buf_size,
+static int vp6_parse_header(VP56Context *s, const uint8_t *buf, int buf_size,
int *golden_frame)
{
- vp56_range_coder_t *c = &s->c;
+ VP56RangeCoder *c = &s->c;
int parse_filter_info = 0;
int coeff_offset = 0;
int vrt_shift = 0;
@@ -75,7 +75,8 @@ static int vp6_parse_header(vp56_context_t *s, const uint8_t *buf, int buf_size,
/* buf[4] is number of displayed macroblock rows */
/* buf[5] is number of displayed macroblock cols */
- if (16*cols != s->avctx->coded_width ||
+ if (!s->macroblocks || /* first frame */
+ 16*cols != s->avctx->coded_width ||
16*rows != s->avctx->coded_height) {
avcodec_set_dimensions(s->avctx, 16*cols, 16*rows);
if (s->avctx->extradata_size == 1) {
@@ -151,7 +152,7 @@ static int vp6_parse_header(vp56_context_t *s, const uint8_t *buf, int buf_size,
return res;
}
-static void vp6_coeff_order_table_init(vp56_context_t *s)
+static void vp6_coeff_order_table_init(VP56Context *s)
{
int i, pos, idx = 1;
@@ -162,9 +163,9 @@ static void vp6_coeff_order_table_init(vp56_context_t *s)
s->modelp->coeff_index_to_pos[idx++] = pos;
}
-static void vp6_default_models_init(vp56_context_t *s)
+static void vp6_default_models_init(VP56Context *s)
{
- vp56_model_t *model = s->modelp;
+ VP56Model *model = s->modelp;
model->vector_dct[0] = 0xA2;
model->vector_dct[1] = 0xA4;
@@ -180,10 +181,10 @@ static void vp6_default_models_init(vp56_context_t *s)
vp6_coeff_order_table_init(s);
}
-static void vp6_parse_vector_models(vp56_context_t *s)
+static void vp6_parse_vector_models(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int comp, node;
for (comp=0; comp<2; comp++) {
@@ -211,7 +212,7 @@ static int vp6_huff_cmp(const void *va, const void *vb)
return (a->count - b->count)*16 + (b->sym - a->sym);
}
-static void vp6_build_huff_tree(vp56_context_t *s, uint8_t coeff_model[],
+static void vp6_build_huff_tree(VP56Context *s, uint8_t coeff_model[],
const uint8_t *map, unsigned size, VLC *vlc)
{
Node nodes[2*size], *tmp = &nodes[size];
@@ -231,10 +232,10 @@ static void vp6_build_huff_tree(vp56_context_t *s, uint8_t coeff_model[],
FF_HUFFMAN_FLAG_HNODE_FIRST);
}
-static void vp6_parse_coeff_models(vp56_context_t *s)
+static void vp6_parse_coeff_models(VP56Context *s)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int def_prob[11];
int node, cg, ctx, pos;
int ct; /* code type */
@@ -296,13 +297,13 @@ static void vp6_parse_coeff_models(vp56_context_t *s)
}
}
-static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
+static void vp6_parse_vector_adjustment(VP56Context *s, VP56mv *vect)
{
- vp56_range_coder_t *c = &s->c;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = &s->c;
+ VP56Model *model = s->modelp;
int comp;
- *vect = (vp56_mv_t) {0,0};
+ *vect = (VP56mv) {0,0};
if (s->vector_candidate_pos < 2)
*vect = s->vector_candidate[0];
@@ -338,7 +339,7 @@ static void vp6_parse_vector_adjustment(vp56_context_t *s, vp56_mv_t *vect)
* Read number of consecutive blocks with null DC or AC.
* This value is < 74.
*/
-static unsigned vp6_get_nb_null(vp56_context_t *s)
+static unsigned vp6_get_nb_null(VP56Context *s)
{
unsigned val = get_bits(&s->gb, 2);
if (val == 2)
@@ -350,9 +351,9 @@ static unsigned vp6_get_nb_null(vp56_context_t *s)
return val;
}
-static void vp6_parse_coeff_huffman(vp56_context_t *s)
+static void vp6_parse_coeff_huffman(VP56Context *s)
{
- vp56_model_t *model = s->modelp;
+ VP56Model *model = s->modelp;
uint8_t *permute = s->scantable.permutated;
VLC *vlc_coeff;
int coeff, sign, coeff_idx;
@@ -405,10 +406,10 @@ static void vp6_parse_coeff_huffman(vp56_context_t *s)
}
}
-static void vp6_parse_coeff(vp56_context_t *s)
+static void vp6_parse_coeff(VP56Context *s)
{
- vp56_range_coder_t *c = s->ccp;
- vp56_model_t *model = s->modelp;
+ VP56RangeCoder *c = s->ccp;
+ VP56Model *model = s->modelp;
uint8_t *permute = s->scantable.permutated;
uint8_t *model1, *model2, *model3;
int coeff, sign, coeff_idx;
@@ -522,7 +523,7 @@ static void vp6_filter_hv4(uint8_t *dst, uint8_t *src, int stride,
}
}
-static void vp6_filter_diag2(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+static void vp6_filter_diag2(VP56Context *s, uint8_t *dst, uint8_t *src,
int stride, int h_weight, int v_weight)
{
uint8_t *tmp = s->edge_emu_buffer+16;
@@ -563,9 +564,9 @@ static void vp6_filter_diag4(uint8_t *dst, uint8_t *src, int stride,
}
}
-static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
+static void vp6_filter(VP56Context *s, uint8_t *dst, uint8_t *src,
int offset1, int offset2, int stride,
- vp56_mv_t mv, int mask, int select, int luma)
+ VP56mv mv, int mask, int select, int luma)
{
int filter4 = 0;
int x8 = mv.x & mask;
@@ -615,7 +616,7 @@ static void vp6_filter(vp56_context_t *s, uint8_t *dst, uint8_t *src,
static av_cold int vp6_decode_init(AVCodecContext *avctx)
{
- vp56_context_t *s = avctx->priv_data;
+ VP56Context *s = avctx->priv_data;
vp56_init(avctx, avctx->codec->id == CODEC_ID_VP6,
avctx->codec->id == CODEC_ID_VP6A);
@@ -635,7 +636,7 @@ AVCodec vp6_decoder = {
"vp6",
CODEC_TYPE_VIDEO,
CODEC_ID_VP6,
- sizeof(vp56_context_t),
+ sizeof(VP56Context),
vp6_decode_init,
NULL,
vp56_free,
@@ -649,7 +650,7 @@ AVCodec vp6f_decoder = {
"vp6f",
CODEC_TYPE_VIDEO,
CODEC_ID_VP6F,
- sizeof(vp56_context_t),
+ sizeof(VP56Context),
vp6_decode_init,
NULL,
vp56_free,
@@ -663,7 +664,7 @@ AVCodec vp6a_decoder = {
"vp6a",
CODEC_TYPE_VIDEO,
CODEC_ID_VP6A,
- sizeof(vp56_context_t),
+ sizeof(VP56Context),
vp6_decode_init,
NULL,
vp56_free,
diff --git a/libavcodec/vp6data.h b/libavcodec/vp6data.h
index 31d6c98..f57115c 100644
--- a/libavcodec/vp6data.h
+++ b/libavcodec/vp6data.h
@@ -283,7 +283,7 @@ static const int16_t vp6_block_copy_filter[17][8][4] = {
{ -2, 16, 118, -4 } },
};
-static const vp56_tree_t vp6_pcr_tree[] = {
+static const VP56Tree vp6_pcr_tree[] = {
{ 8, 0},
{ 4, 1},
{ 2, 2}, {-1}, {-2},
diff --git a/libavcodec/wavpack.c b/libavcodec/wavpack.c
index e8703b3..b89723e 100644
--- a/libavcodec/wavpack.c
+++ b/libavcodec/wavpack.c
@@ -361,6 +361,7 @@ static av_cold int wavpack_decode_init(AVCodecContext *avctx)
s->avctx = avctx;
s->stereo = (avctx->channels == 2);
avctx->sample_fmt = SAMPLE_FMT_S16;
+ avctx->channel_layout = (avctx->channels==2) ? CH_LAYOUT_STEREO : CH_LAYOUT_MONO;
return 0;
}
diff --git a/libavcodec/wmv2.c b/libavcodec/wmv2.c
index 1e29363..015e3f2 100644
--- a/libavcodec/wmv2.c
+++ b/libavcodec/wmv2.c
@@ -43,12 +43,12 @@ static void wmv2_add_block(Wmv2Context *w, DCTELEM *block1, uint8_t *dst, int st
case 1:
ff_simple_idct84_add(dst , stride, block1);
ff_simple_idct84_add(dst + 4*stride, stride, w->abt_block2[n]);
- memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ s->dsp.clear_block(w->abt_block2[n]);
break;
case 2:
ff_simple_idct48_add(dst , stride, block1);
ff_simple_idct48_add(dst + 4 , stride, w->abt_block2[n]);
- memset(w->abt_block2[n], 0, 64*sizeof(DCTELEM));
+ s->dsp.clear_block(w->abt_block2[n]);
break;
default:
av_log(s->avctx, AV_LOG_ERROR, "internal error in WMV2 abt\n");
diff --git a/libavcodec/x86/cavsdsp_mmx.c b/libavcodec/x86/cavsdsp_mmx.c
new file mode 100644
index 0000000..2000ba5
--- /dev/null
+++ b/libavcodec/x86/cavsdsp_mmx.c
@@ -0,0 +1,497 @@
+/*
+ * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
+ * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer at gmx.de>
+ *
+ * MMX-optimized DSP functions, based on H.264 optimizations by
+ * Michael Niedermayer and Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/common.h"
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "dsputil_mmx.h"
+
+/*****************************************************************************
+ *
+ * inverse transform
+ *
+ ****************************************************************************/
+
+static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
+{
+ __asm__ volatile(
+ "movq 112(%0), %%mm4 \n\t" /* mm4 = src7 */
+ "movq 16(%0), %%mm5 \n\t" /* mm5 = src1 */
+ "movq 80(%0), %%mm2 \n\t" /* mm2 = src5 */
+ "movq 48(%0), %%mm7 \n\t" /* mm7 = src3 */
+ "movq %%mm4, %%mm0 \n\t"
+ "movq %%mm5, %%mm3 \n\t"
+ "movq %%mm2, %%mm6 \n\t"
+ "movq %%mm7, %%mm1 \n\t"
+
+ "paddw %%mm4, %%mm4 \n\t" /* mm4 = 2*src7 */
+ "paddw %%mm3, %%mm3 \n\t" /* mm3 = 2*src1 */
+ "paddw %%mm6, %%mm6 \n\t" /* mm6 = 2*src5 */
+ "paddw %%mm1, %%mm1 \n\t" /* mm1 = 2*src3 */
+ "paddw %%mm4, %%mm0 \n\t" /* mm0 = 3*src7 */
+ "paddw %%mm3, %%mm5 \n\t" /* mm5 = 3*src1 */
+ "paddw %%mm6, %%mm2 \n\t" /* mm2 = 3*src5 */
+ "paddw %%mm1, %%mm7 \n\t" /* mm7 = 3*src3 */
+ "psubw %%mm4, %%mm5 \n\t" /* mm5 = 3*src1 - 2*src7 = a0 */
+ "paddw %%mm6, %%mm7 \n\t" /* mm7 = 3*src3 + 2*src5 = a1 */
+ "psubw %%mm2, %%mm1 \n\t" /* mm1 = 2*src3 - 3*src5 = a2 */
+ "paddw %%mm0, %%mm3 \n\t" /* mm3 = 2*src1 + 3*src7 = a3 */
+
+ "movq %%mm5, %%mm4 \n\t"
+ "movq %%mm7, %%mm6 \n\t"
+ "movq %%mm3, %%mm0 \n\t"
+ "movq %%mm1, %%mm2 \n\t"
+ SUMSUB_BA( %%mm7, %%mm5 ) /* mm7 = a0 + a1 mm5 = a0 - a1 */
+ "paddw %%mm3, %%mm7 \n\t" /* mm7 = a0 + a1 + a3 */
+ "paddw %%mm1, %%mm5 \n\t" /* mm5 = a0 - a1 + a2 */
+ "paddw %%mm7, %%mm7 \n\t"
+ "paddw %%mm5, %%mm5 \n\t"
+ "paddw %%mm6, %%mm7 \n\t" /* mm7 = b4 */
+ "paddw %%mm4, %%mm5 \n\t" /* mm5 = b5 */
+
+ SUMSUB_BA( %%mm1, %%mm3 ) /* mm1 = a3 + a2 mm3 = a3 - a2 */
+ "psubw %%mm1, %%mm4 \n\t" /* mm4 = a0 - a2 - a3 */
+ "movq %%mm4, %%mm1 \n\t" /* mm1 = a0 - a2 - a3 */
+ "psubw %%mm6, %%mm3 \n\t" /* mm3 = a3 - a2 - a1 */
+ "paddw %%mm1, %%mm1 \n\t"
+ "paddw %%mm3, %%mm3 \n\t"
+ "psubw %%mm2, %%mm1 \n\t" /* mm1 = b7 */
+ "paddw %%mm0, %%mm3 \n\t" /* mm3 = b6 */
+
+ "movq 32(%0), %%mm2 \n\t" /* mm2 = src2 */
+ "movq 96(%0), %%mm6 \n\t" /* mm6 = src6 */
+ "movq %%mm2, %%mm4 \n\t"
+ "movq %%mm6, %%mm0 \n\t"
+ "psllw $2, %%mm4 \n\t" /* mm4 = 4*src2 */
+ "psllw $2, %%mm6 \n\t" /* mm6 = 4*src6 */
+ "paddw %%mm4, %%mm2 \n\t" /* mm2 = 5*src2 */
+ "paddw %%mm6, %%mm0 \n\t" /* mm0 = 5*src6 */
+ "paddw %%mm2, %%mm2 \n\t"
+ "paddw %%mm0, %%mm0 \n\t"
+ "psubw %%mm0, %%mm4 \n\t" /* mm4 = 4*src2 - 10*src6 = a7 */
+ "paddw %%mm2, %%mm6 \n\t" /* mm6 = 4*src6 + 10*src2 = a6 */
+
+ "movq (%0), %%mm2 \n\t" /* mm2 = src0 */
+ "movq 64(%0), %%mm0 \n\t" /* mm0 = src4 */
+ SUMSUB_BA( %%mm0, %%mm2 ) /* mm0 = src0+src4 mm2 = src0-src4 */
+ "psllw $3, %%mm0 \n\t"
+ "psllw $3, %%mm2 \n\t"
+ "paddw %1, %%mm0 \n\t" /* add rounding bias */
+ "paddw %1, %%mm2 \n\t" /* add rounding bias */
+
+ SUMSUB_BA( %%mm6, %%mm0 ) /* mm6 = a4 + a6 mm0 = a4 - a6 */
+ SUMSUB_BA( %%mm4, %%mm2 ) /* mm4 = a5 + a7 mm2 = a5 - a7 */
+ SUMSUB_BA( %%mm7, %%mm6 ) /* mm7 = dst0 mm6 = dst7 */
+ SUMSUB_BA( %%mm5, %%mm4 ) /* mm5 = dst1 mm4 = dst6 */
+ SUMSUB_BA( %%mm3, %%mm2 ) /* mm3 = dst2 mm2 = dst5 */
+ SUMSUB_BA( %%mm1, %%mm0 ) /* mm1 = dst3 mm0 = dst4 */
+ :: "r"(block), "m"(bias)
+ );
+}
+
+static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+ int i;
+ DECLARE_ALIGNED_8(int16_t, b2[64]);
+
+ for(i=0; i<2; i++){
+ DECLARE_ALIGNED_8(uint64_t, tmp);
+
+ cavs_idct8_1d(block+4*i, ff_pw_4);
+
+ __asm__ volatile(
+ "psraw $3, %%mm7 \n\t"
+ "psraw $3, %%mm6 \n\t"
+ "psraw $3, %%mm5 \n\t"
+ "psraw $3, %%mm4 \n\t"
+ "psraw $3, %%mm3 \n\t"
+ "psraw $3, %%mm2 \n\t"
+ "psraw $3, %%mm1 \n\t"
+ "psraw $3, %%mm0 \n\t"
+ "movq %%mm7, %0 \n\t"
+ TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
+ "movq %%mm0, 8(%1) \n\t"
+ "movq %%mm6, 24(%1) \n\t"
+ "movq %%mm7, 40(%1) \n\t"
+ "movq %%mm4, 56(%1) \n\t"
+ "movq %0, %%mm7 \n\t"
+ TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
+ "movq %%mm7, (%1) \n\t"
+ "movq %%mm1, 16(%1) \n\t"
+ "movq %%mm0, 32(%1) \n\t"
+ "movq %%mm3, 48(%1) \n\t"
+ : "=m"(tmp)
+ : "r"(b2+32*i)
+ : "memory"
+ );
+ }
+
+ for(i=0; i<2; i++){
+ cavs_idct8_1d(b2+4*i, ff_pw_64);
+
+ __asm__ volatile(
+ "psraw $7, %%mm7 \n\t"
+ "psraw $7, %%mm6 \n\t"
+ "psraw $7, %%mm5 \n\t"
+ "psraw $7, %%mm4 \n\t"
+ "psraw $7, %%mm3 \n\t"
+ "psraw $7, %%mm2 \n\t"
+ "psraw $7, %%mm1 \n\t"
+ "psraw $7, %%mm0 \n\t"
+ "movq %%mm7, (%0) \n\t"
+ "movq %%mm5, 16(%0) \n\t"
+ "movq %%mm3, 32(%0) \n\t"
+ "movq %%mm1, 48(%0) \n\t"
+ "movq %%mm0, 64(%0) \n\t"
+ "movq %%mm2, 80(%0) \n\t"
+ "movq %%mm4, 96(%0) \n\t"
+ "movq %%mm6, 112(%0) \n\t"
+ :: "r"(b2+4*i)
+ : "memory"
+ );
+ }
+
+ add_pixels_clamped_mmx(b2, dst, stride);
+
+ /* clear block */
+ __asm__ volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movq %%mm7, (%0) \n\t"
+ "movq %%mm7, 8(%0) \n\t"
+ "movq %%mm7, 16(%0) \n\t"
+ "movq %%mm7, 24(%0) \n\t"
+ "movq %%mm7, 32(%0) \n\t"
+ "movq %%mm7, 40(%0) \n\t"
+ "movq %%mm7, 48(%0) \n\t"
+ "movq %%mm7, 56(%0) \n\t"
+ "movq %%mm7, 64(%0) \n\t"
+ "movq %%mm7, 72(%0) \n\t"
+ "movq %%mm7, 80(%0) \n\t"
+ "movq %%mm7, 88(%0) \n\t"
+ "movq %%mm7, 96(%0) \n\t"
+ "movq %%mm7, 104(%0) \n\t"
+ "movq %%mm7, 112(%0) \n\t"
+ "movq %%mm7, 120(%0) \n\t"
+ :: "r" (block)
+ );
+}
+
+/*****************************************************************************
+ *
+ * motion compensation
+ *
+ ****************************************************************************/
+
+/* vertical filter [-1 -2 96 42 -7 0] */
+#define QPEL_CAVSV1(A,B,C,D,E,F,OP) \
+ "movd (%0), "#F" \n\t"\
+ "movq "#C", %%mm6 \n\t"\
+ "pmullw %5, %%mm6 \n\t"\
+ "movq "#D", %%mm7 \n\t"\
+ "pmullw %6, %%mm7 \n\t"\
+ "psllw $3, "#E" \n\t"\
+ "psubw "#E", %%mm6 \n\t"\
+ "psraw $3, "#E" \n\t"\
+ "paddw %%mm7, %%mm6 \n\t"\
+ "paddw "#E", %%mm6 \n\t"\
+ "paddw "#B", "#B" \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, "#F" \n\t"\
+ "psubw "#B", %%mm6 \n\t"\
+ "psraw $1, "#B" \n\t"\
+ "psubw "#A", %%mm6 \n\t"\
+ "paddw %4, %%mm6 \n\t"\
+ "psraw $7, %%mm6 \n\t"\
+ "packuswb %%mm6, %%mm6 \n\t"\
+ OP(%%mm6, (%1), A, d) \
+ "add %3, %1 \n\t"
+
+/* vertical filter [ 0 -1 5 5 -1 0] */
+#define QPEL_CAVSV2(A,B,C,D,E,F,OP) \
+ "movd (%0), "#F" \n\t"\
+ "movq "#C", %%mm6 \n\t"\
+ "paddw "#D", %%mm6 \n\t"\
+ "pmullw %5, %%mm6 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, "#F" \n\t"\
+ "psubw "#B", %%mm6 \n\t"\
+ "psubw "#E", %%mm6 \n\t"\
+ "paddw %4, %%mm6 \n\t"\
+ "psraw $3, %%mm6 \n\t"\
+ "packuswb %%mm6, %%mm6 \n\t"\
+ OP(%%mm6, (%1), A, d) \
+ "add %3, %1 \n\t"
+
+/* vertical filter [ 0 -7 42 96 -2 -1] */
+#define QPEL_CAVSV3(A,B,C,D,E,F,OP) \
+ "movd (%0), "#F" \n\t"\
+ "movq "#C", %%mm6 \n\t"\
+ "pmullw %6, %%mm6 \n\t"\
+ "movq "#D", %%mm7 \n\t"\
+ "pmullw %5, %%mm7 \n\t"\
+ "psllw $3, "#B" \n\t"\
+ "psubw "#B", %%mm6 \n\t"\
+ "psraw $3, "#B" \n\t"\
+ "paddw %%mm7, %%mm6 \n\t"\
+ "paddw "#B", %%mm6 \n\t"\
+ "paddw "#E", "#E" \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, "#F" \n\t"\
+ "psubw "#E", %%mm6 \n\t"\
+ "psraw $1, "#E" \n\t"\
+ "psubw "#F", %%mm6 \n\t"\
+ "paddw %4, %%mm6 \n\t"\
+ "psraw $7, %%mm6 \n\t"\
+ "packuswb %%mm6, %%mm6 \n\t"\
+ OP(%%mm6, (%1), A, d) \
+ "add %3, %1 \n\t"
+
+
+#define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
+ int w= 2;\
+ src -= 2*srcStride;\
+ \
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
+ : "memory"\
+ );\
+ if(h==16){\
+ __asm__ volatile(\
+ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride), "m"(ADD), "m"(MUL1), "m"(MUL2)\
+ : "memory"\
+ );\
+ }\
+ src += 4-(h+5)*srcStride;\
+ dst += 4-h*dstStride;\
+ }
+
+#define QPEL_CAVS(OPNAME, OP, MMX)\
+static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %5, %%mm6 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 1(%0), %%mm2 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "pmullw %%mm6, %%mm0 \n\t"\
+ "pmullw %%mm6, %%mm1 \n\t"\
+ "movq -1(%0), %%mm2 \n\t"\
+ "movq 2(%0), %%mm4 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm4, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm5, %%mm1 \n\t"\
+ "movq %6, %%mm5 \n\t"\
+ "paddw %%mm5, %%mm0 \n\t"\
+ "paddw %%mm5, %%mm1 \n\t"\
+ "psraw $3, %%mm0 \n\t"\
+ "psraw $3, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm5, q) \
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+m"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_4)\
+ : "memory"\
+ );\
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_5) \
+}\
+\
+static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
+}\
+\
+static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+
+#define CAVS_MC(OPNAME, SIZE, MMX) \
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void ff_ ## OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
+}\
+
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgusb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+
+QPEL_CAVS(put_, PUT_OP, 3dnow)
+QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
+QPEL_CAVS(put_, PUT_OP, mmx2)
+QPEL_CAVS(avg_, AVG_MMX2_OP, mmx2)
+
+CAVS_MC(put_, 8, 3dnow)
+CAVS_MC(put_, 16,3dnow)
+CAVS_MC(avg_, 8, 3dnow)
+CAVS_MC(avg_, 16,3dnow)
+CAVS_MC(put_, 8, mmx2)
+CAVS_MC(put_, 16,mmx2)
+CAVS_MC(avg_, 8, mmx2)
+CAVS_MC(avg_, 16,mmx2)
+
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
+
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx) {
+#define dspfunc(PFX, IDX, NUM) \
+ c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
+ c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_mmx2; \
+ c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_mmx2; \
+ c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_mmx2; \
+ c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_mmx2; \
+
+ dspfunc(put_cavs_qpel, 0, 16);
+ dspfunc(put_cavs_qpel, 1, 8);
+ dspfunc(avg_cavs_qpel, 0, 16);
+ dspfunc(avg_cavs_qpel, 1, 8);
+#undef dspfunc
+ c->cavs_idct8_add = cavs_idct8_add_mmx;
+}
+
+void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx) {
+#define dspfunc(PFX, IDX, NUM) \
+ c->PFX ## _pixels_tab[IDX][ 0] = ff_ ## PFX ## NUM ## _mc00_mmx2; \
+ c->PFX ## _pixels_tab[IDX][ 2] = ff_ ## PFX ## NUM ## _mc20_3dnow; \
+ c->PFX ## _pixels_tab[IDX][ 4] = ff_ ## PFX ## NUM ## _mc01_3dnow; \
+ c->PFX ## _pixels_tab[IDX][ 8] = ff_ ## PFX ## NUM ## _mc02_3dnow; \
+ c->PFX ## _pixels_tab[IDX][12] = ff_ ## PFX ## NUM ## _mc03_3dnow; \
+
+ dspfunc(put_cavs_qpel, 0, 16);
+ dspfunc(put_cavs_qpel, 1, 8);
+ dspfunc(avg_cavs_qpel, 0, 16);
+ dspfunc(avg_cavs_qpel, 1, 8);
+#undef dspfunc
+ c->cavs_idct8_add = cavs_idct8_add_mmx;
+}
diff --git a/libavcodec/x86/cpuid.c b/libavcodec/x86/cpuid.c
new file mode 100644
index 0000000..664bac3
--- /dev/null
+++ b/libavcodec/x86/cpuid.c
@@ -0,0 +1,134 @@
+/*
+ * CPU detection code, extracted from mmx.h
+ * (c)1997-99 by H. Dietz and R. Fisher
+ * Converted to C and improved by Fabrice Bellard.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdlib.h>
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+
+#undef printf
+
+/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
+#define cpuid(index,eax,ebx,ecx,edx)\
+ __asm__ volatile\
+ ("mov %%"REG_b", %%"REG_S"\n\t"\
+ "cpuid\n\t"\
+ "xchg %%"REG_b", %%"REG_S\
+ : "=a" (eax), "=S" (ebx),\
+ "=c" (ecx), "=d" (edx)\
+ : "0" (index));
+
+/* Function to test if multimedia instructions are supported... */
+int mm_support(void)
+{
+ int rval = 0;
+ int eax, ebx, ecx, edx;
+ int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
+ x86_reg a, c;
+
+#ifdef ARCH_X86_64
+#define PUSHF "pushfq\n\t"
+#define POPF "popfq\n\t"
+#else
+#define PUSHF "pushfl\n\t"
+#define POPF "popfl\n\t"
+#endif
+ __asm__ volatile (
+ /* See if CPUID instruction is supported ... */
+ /* ... Get copies of EFLAGS into eax and ecx */
+ PUSHF
+ "pop %0\n\t"
+ "mov %0, %1\n\t"
+
+ /* ... Toggle the ID bit in one copy and store */
+ /* to the EFLAGS reg */
+ "xor $0x200000, %0\n\t"
+ "push %0\n\t"
+ POPF
+
+ /* ... Get the (hopefully modified) EFLAGS */
+ PUSHF
+ "pop %0\n\t"
+ : "=a" (a), "=c" (c)
+ :
+ : "cc"
+ );
+
+ if (a == c)
+ return 0; /* CPUID not supported */
+
+ cpuid(0, max_std_level, ebx, ecx, edx);
+
+ if(max_std_level >= 1){
+ cpuid(1, eax, ebx, ecx, std_caps);
+ if (std_caps & (1<<23))
+ rval |= FF_MM_MMX;
+ if (std_caps & (1<<25))
+ rval |= FF_MM_MMXEXT
+#ifdef HAVE_SSE
+ | FF_MM_SSE;
+ if (std_caps & (1<<26))
+ rval |= FF_MM_SSE2;
+ if (ecx & 1)
+ rval |= FF_MM_SSE3;
+ if (ecx & 0x00000200 )
+ rval |= FF_MM_SSSE3
+#endif
+ ;
+ }
+
+ cpuid(0x80000000, max_ext_level, ebx, ecx, edx);
+
+ if(max_ext_level >= 0x80000001){
+ cpuid(0x80000001, eax, ebx, ecx, ext_caps);
+ if (ext_caps & (1<<31))
+ rval |= FF_MM_3DNOW;
+ if (ext_caps & (1<<30))
+ rval |= FF_MM_3DNOWEXT;
+ if (ext_caps & (1<<23))
+ rval |= FF_MM_MMX;
+ if (ext_caps & (1<<22))
+ rval |= FF_MM_MMXEXT;
+ }
+
+#if 0
+ av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s%s%s\n",
+ (rval&FF_MM_MMX) ? "MMX ":"",
+ (rval&FF_MM_MMXEXT) ? "MMX2 ":"",
+ (rval&FF_MM_SSE) ? "SSE ":"",
+ (rval&FF_MM_SSE2) ? "SSE2 ":"",
+ (rval&FF_MM_SSE3) ? "SSE3 ":"",
+ (rval&FF_MM_SSSE3) ? "SSSE3 ":"",
+ (rval&FF_MM_3DNOW) ? "3DNow ":"",
+ (rval&FF_MM_3DNOWEXT) ? "3DNowExt ":"");
+#endif
+ return rval;
+}
+
+#ifdef TEST
+int main ( void )
+{
+ int mm_flags;
+ mm_flags = mm_support();
+ printf("mm_support = 0x%08X\n",mm_flags);
+ return 0;
+}
+#endif
diff --git a/libavcodec/x86/dnxhd_mmx.c b/libavcodec/x86/dnxhd_mmx.c
new file mode 100644
index 0000000..59bcb39
--- /dev/null
+++ b/libavcodec/x86/dnxhd_mmx.c
@@ -0,0 +1,58 @@
+/*
+ * VC3/DNxHD SIMD functions
+ * Copyright (c) 2007 Baptiste Coudurier <baptiste dot coudurier at smartjog dot com>
+ *
+ * VC-3 encoder funded by the British Broadcasting Corporation
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dnxhdenc.h"
+
+static void get_pixels_8x4_sym_sse2(DCTELEM *block, const uint8_t *pixels, int line_size)
+{
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7 \n\t"
+ "movq (%0), %%xmm0 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm1 \n\t"
+ "movq (%0, %2), %%xmm2 \n\t"
+ "movq (%0, %2,2), %%xmm3 \n\t"
+ "punpcklbw %%xmm7, %%xmm0 \n\t"
+ "punpcklbw %%xmm7, %%xmm1 \n\t"
+ "punpcklbw %%xmm7, %%xmm2 \n\t"
+ "punpcklbw %%xmm7, %%xmm3 \n\t"
+ "movdqa %%xmm0, (%1) \n\t"
+ "movdqa %%xmm1, 16(%1) \n\t"
+ "movdqa %%xmm2, 32(%1) \n\t"
+ "movdqa %%xmm3, 48(%1) \n\t"
+ "movdqa %%xmm3 , 64(%1) \n\t"
+ "movdqa %%xmm2 , 80(%1) \n\t"
+ "movdqa %%xmm1 , 96(%1) \n\t"
+ "movdqa %%xmm0, 112(%1) \n\t"
+ : "+r" (pixels)
+ : "r" (block), "r" ((x86_reg)line_size)
+ );
+}
+
+void ff_dnxhd_init_mmx(DNXHDEncContext *ctx)
+{
+ if (mm_flags & FF_MM_SSE2) {
+ ctx->get_pixels_8x4_sym = get_pixels_8x4_sym_sse2;
+ }
+}
diff --git a/libavcodec/x86/dsputil_h264_template_mmx.c b/libavcodec/x86/dsputil_h264_template_mmx.c
new file mode 100644
index 0000000..43f4393
--- /dev/null
+++ b/libavcodec/x86/dsputil_h264_template_mmx.c
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
+ * Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * MMX optimized version of (put|avg)_h264_chroma_mc8.
+ * H264_CHROMA_MC8_TMPL must be defined to the desired function name
+ * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg
+ * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
+ */
+static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
+{
+ DECLARE_ALIGNED_8(uint64_t, AA);
+ DECLARE_ALIGNED_8(uint64_t, DD);
+ int i;
+
+ if(y==0 && x==0) {
+ /* no filter needed */
+ H264_CHROMA_MC8_MV0(dst, src, stride, h);
+ return;
+ }
+
+ assert(x<8 && y<8 && x>=0 && y>=0);
+
+ if(y==0 || x==0)
+ {
+ /* 1 dimensional filter only */
+ const int dxy = x ? 1 : stride;
+
+ __asm__ volatile(
+ "movd %0, %%mm5\n\t"
+ "movq %1, %%mm4\n\t"
+ "movq %2, %%mm6\n\t" /* mm6 = rnd >> 3 */
+ "punpcklwd %%mm5, %%mm5\n\t"
+ "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
+ "pxor %%mm7, %%mm7\n\t"
+ "psubw %%mm5, %%mm4\n\t" /* mm4 = A = 8-x */
+ :: "rm"(x+y), "m"(ff_pw_8), "m"(*(rnd_reg+1)));
+
+ for(i=0; i<h; i++) {
+ __asm__ volatile(
+ /* mm0 = src[0..7], mm1 = src[1..8] */
+ "movq %0, %%mm0\n\t"
+ "movq %1, %%mm2\n\t"
+ :: "m"(src[0]), "m"(src[dxy]));
+
+ __asm__ volatile(
+ /* [mm0,mm1] = A * src[0..7] */
+ /* [mm2,mm3] = B * src[1..8] */
+ "movq %%mm0, %%mm1\n\t"
+ "movq %%mm2, %%mm3\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpckhbw %%mm7, %%mm1\n\t"
+ "punpcklbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "pmullw %%mm4, %%mm0\n\t"
+ "pmullw %%mm4, %%mm1\n\t"
+ "pmullw %%mm5, %%mm2\n\t"
+ "pmullw %%mm5, %%mm3\n\t"
+
+ /* dst[0..7] = (A * src[0..7] + B * src[1..8] + (rnd >> 3)) >> 3 */
+ "paddw %%mm6, %%mm0\n\t"
+ "paddw %%mm6, %%mm1\n\t"
+ "paddw %%mm2, %%mm0\n\t"
+ "paddw %%mm3, %%mm1\n\t"
+ "psrlw $3, %%mm0\n\t"
+ "psrlw $3, %%mm1\n\t"
+ "packuswb %%mm1, %%mm0\n\t"
+ H264_CHROMA_OP(%0, %%mm0)
+ "movq %%mm0, %0\n\t"
+ : "=m" (dst[0]));
+
+ src += stride;
+ dst += stride;
+ }
+ return;
+ }
+
+ /* general case, bilinear */
+ __asm__ volatile("movd %2, %%mm4\n\t"
+ "movd %3, %%mm6\n\t"
+ "punpcklwd %%mm4, %%mm4\n\t"
+ "punpcklwd %%mm6, %%mm6\n\t"
+ "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
+ "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
+ "movq %%mm4, %%mm5\n\t"
+ "pmullw %%mm6, %%mm4\n\t" /* mm4 = x * y */
+ "psllw $3, %%mm5\n\t"
+ "psllw $3, %%mm6\n\t"
+ "movq %%mm5, %%mm7\n\t"
+ "paddw %%mm6, %%mm7\n\t"
+ "movq %%mm4, %1\n\t" /* DD = x * y */
+ "psubw %%mm4, %%mm5\n\t" /* mm5 = B = 8x - xy */
+ "psubw %%mm4, %%mm6\n\t" /* mm6 = C = 8y - xy */
+ "paddw %4, %%mm4\n\t"
+ "psubw %%mm7, %%mm4\n\t" /* mm4 = A = xy - (8x+8y) + 64 */
+ "pxor %%mm7, %%mm7\n\t"
+ "movq %%mm4, %0\n\t"
+ : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
+
+ __asm__ volatile(
+ /* mm0 = src[0..7], mm1 = src[1..8] */
+ "movq %0, %%mm0\n\t"
+ "movq %1, %%mm1\n\t"
+ : : "m" (src[0]), "m" (src[1]));
+
+ for(i=0; i<h; i++) {
+ src += stride;
+
+ __asm__ volatile(
+ /* mm2 = A * src[0..3] + B * src[1..4] */
+ /* mm3 = A * src[4..7] + B * src[5..8] */
+ "movq %%mm0, %%mm2\n\t"
+ "movq %%mm1, %%mm3\n\t"
+ "punpckhbw %%mm7, %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm1\n\t"
+ "punpcklbw %%mm7, %%mm2\n\t"
+ "punpckhbw %%mm7, %%mm3\n\t"
+ "pmullw %0, %%mm0\n\t"
+ "pmullw %0, %%mm2\n\t"
+ "pmullw %%mm5, %%mm1\n\t"
+ "pmullw %%mm5, %%mm3\n\t"
+ "paddw %%mm1, %%mm2\n\t"
+ "paddw %%mm0, %%mm3\n\t"
+ : : "m" (AA));
+
+ __asm__ volatile(
+ /* [mm2,mm3] += C * src[0..7] */
+ "movq %0, %%mm0\n\t"
+ "movq %%mm0, %%mm1\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpckhbw %%mm7, %%mm1\n\t"
+ "pmullw %%mm6, %%mm0\n\t"
+ "pmullw %%mm6, %%mm1\n\t"
+ "paddw %%mm0, %%mm2\n\t"
+ "paddw %%mm1, %%mm3\n\t"
+ : : "m" (src[0]));
+
+ __asm__ volatile(
+ /* [mm2,mm3] += D * src[1..8] */
+ "movq %1, %%mm1\n\t"
+ "movq %%mm1, %%mm0\n\t"
+ "movq %%mm1, %%mm4\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "punpckhbw %%mm7, %%mm4\n\t"
+ "pmullw %2, %%mm0\n\t"
+ "pmullw %2, %%mm4\n\t"
+ "paddw %%mm0, %%mm2\n\t"
+ "paddw %%mm4, %%mm3\n\t"
+ "movq %0, %%mm0\n\t"
+ : : "m" (src[0]), "m" (src[1]), "m" (DD));
+
+ __asm__ volatile(
+ /* dst[0..7] = ([mm2,mm3] + rnd) >> 6 */
+ "paddw %1, %%mm2\n\t"
+ "paddw %1, %%mm3\n\t"
+ "psrlw $6, %%mm2\n\t"
+ "psrlw $6, %%mm3\n\t"
+ "packuswb %%mm3, %%mm2\n\t"
+ H264_CHROMA_OP(%0, %%mm2)
+ "movq %%mm2, %0\n\t"
+ : "=m" (dst[0]) : "m" (*rnd_reg));
+ dst+= stride;
+ }
+}
+
+static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y, const uint64_t *rnd_reg)
+{
+ __asm__ volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movd %5, %%mm2 \n\t"
+ "movd %6, %%mm3 \n\t"
+ "movq "MANGLE(ff_pw_8)", %%mm4\n\t"
+ "movq "MANGLE(ff_pw_8)", %%mm5\n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+ "punpcklwd %%mm3, %%mm3 \n\t"
+ "punpcklwd %%mm2, %%mm2 \n\t"
+ "punpcklwd %%mm3, %%mm3 \n\t"
+ "psubw %%mm2, %%mm4 \n\t"
+ "psubw %%mm3, %%mm5 \n\t"
+
+ "movd (%1), %%mm0 \n\t"
+ "movd 1(%1), %%mm6 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "pmullw %%mm4, %%mm0 \n\t"
+ "pmullw %%mm2, %%mm6 \n\t"
+ "paddw %%mm0, %%mm6 \n\t"
+
+ "1: \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd 1(%1), %%mm1 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm4, %%mm0 \n\t"
+ "pmullw %%mm2, %%mm1 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "movq %%mm1, %%mm0 \n\t"
+ "pmullw %%mm5, %%mm6 \n\t"
+ "pmullw %%mm3, %%mm1 \n\t"
+ "paddw %4, %%mm6 \n\t"
+ "paddw %%mm6, %%mm1 \n\t"
+ "psrlw $6, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm6)
+ "movd %%mm1, (%0) \n\t"
+ "add %3, %0 \n\t"
+ "movd (%1), %%mm6 \n\t"
+ "movd 1(%1), %%mm1 \n\t"
+ "add %3, %1 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm4, %%mm6 \n\t"
+ "pmullw %%mm2, %%mm1 \n\t"
+ "paddw %%mm6, %%mm1 \n\t"
+ "movq %%mm1, %%mm6 \n\t"
+ "pmullw %%mm5, %%mm0 \n\t"
+ "pmullw %%mm3, %%mm1 \n\t"
+ "paddw %4, %%mm0 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "psrlw $6, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm0)
+ "movd %%mm1, (%0) \n\t"
+ "add %3, %0 \n\t"
+ "sub $2, %2 \n\t"
+ "jnz 1b \n\t"
+ : "+r"(dst), "+r"(src), "+r"(h)
+ : "r"((x86_reg)stride), "m"(*rnd_reg), "m"(x), "m"(y)
+ );
+}
+
+#ifdef H264_CHROMA_MC2_TMPL
+static void H264_CHROMA_MC2_TMPL(uint8_t *dst/*align 2*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ int tmp = ((1<<16)-1)*x + 8;
+ int CD= tmp*y;
+ int AB= (tmp<<3) - CD;
+ __asm__ volatile(
+ /* mm5 = {A,B,A,B} */
+ /* mm6 = {C,D,C,D} */
+ "movd %0, %%mm5\n\t"
+ "movd %1, %%mm6\n\t"
+ "punpckldq %%mm5, %%mm5\n\t"
+ "punpckldq %%mm6, %%mm6\n\t"
+ "pxor %%mm7, %%mm7\n\t"
+ /* mm0 = src[0,1,1,2] */
+ "movd %2, %%mm2\n\t"
+ "punpcklbw %%mm7, %%mm2\n\t"
+ "pshufw $0x94, %%mm2, %%mm2\n\t"
+ :: "r"(AB), "r"(CD), "m"(src[0]));
+
+
+ __asm__ volatile(
+ "1:\n\t"
+ "add %4, %1\n\t"
+ /* mm1 = A * src[0,1] + B * src[1,2] */
+ "movq %%mm2, %%mm1\n\t"
+ "pmaddwd %%mm5, %%mm1\n\t"
+ /* mm0 = src[0,1,1,2] */
+ "movd (%1), %%mm0\n\t"
+ "punpcklbw %%mm7, %%mm0\n\t"
+ "pshufw $0x94, %%mm0, %%mm0\n\t"
+ /* mm1 += C * src[0,1] + D * src[1,2] */
+ "movq %%mm0, %%mm2\n\t"
+ "pmaddwd %%mm6, %%mm0\n\t"
+ "paddw %3, %%mm1\n\t"
+ "paddw %%mm0, %%mm1\n\t"
+ /* dst[0,1] = pack((mm1 + 32) >> 6) */
+ "psrlw $6, %%mm1\n\t"
+ "packssdw %%mm7, %%mm1\n\t"
+ "packuswb %%mm7, %%mm1\n\t"
+ H264_CHROMA_OP4((%0), %%mm1, %%mm3)
+ "movd %%mm1, %%esi\n\t"
+ "movw %%si, (%0)\n\t"
+ "add %4, %0\n\t"
+ "sub $1, %2\n\t"
+ "jnz 1b\n\t"
+ : "+r" (dst), "+r"(src), "+r"(h)
+ : "m" (ff_pw_32), "r"((x86_reg)stride)
+ : "%esi");
+
+}
+#endif
+
diff --git a/libavcodec/i386/dsputil_h264_template_ssse3.c b/libavcodec/x86/dsputil_h264_template_ssse3.c
similarity index 100%
rename from libavcodec/i386/dsputil_h264_template_ssse3.c
rename to libavcodec/x86/dsputil_h264_template_ssse3.c
diff --git a/libavcodec/x86/dsputil_mmx.c b/libavcodec/x86/dsputil_mmx.c
new file mode 100644
index 0000000..1d93351
--- /dev/null
+++ b/libavcodec/x86/dsputil_mmx.c
@@ -0,0 +1,2993 @@
+/*
+ * MMX optimized DSP utils
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/h263.h"
+#include "libavcodec/mpegvideo.h"
+#include "libavcodec/simple_idct.h"
+#include "dsputil_mmx.h"
+#include "mmx.h"
+#include "vp3dsp_mmx.h"
+#include "vp3dsp_sse2.h"
+#include "idct_xvid.h"
+
+//#undef NDEBUG
+//#include <assert.h>
+
+int mm_flags; /* multimedia extension flags */
+
+/* pixel operations */
+DECLARE_ALIGNED_8 (const uint64_t, ff_bone) = 0x0101010101010101ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_wtwo) = 0x0002000200020002ULL;
+
+DECLARE_ALIGNED_16(const uint64_t, ff_pdw_80000000[2]) =
+{0x8000000080000000ULL, 0x8000000080000000ULL};
+
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_3 ) = 0x0003000300030003ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_4 ) = 0x0004000400040004ULL;
+DECLARE_ALIGNED_16(const xmm_reg, ff_pw_5 ) = {0x0005000500050005ULL, 0x0005000500050005ULL};
+DECLARE_ALIGNED_16(const xmm_reg, ff_pw_8 ) = {0x0008000800080008ULL, 0x0008000800080008ULL};
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_15 ) = 0x000F000F000F000FULL;
+DECLARE_ALIGNED_16(const xmm_reg, ff_pw_16 ) = {0x0010001000100010ULL, 0x0010001000100010ULL};
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_20 ) = 0x0014001400140014ULL;
+DECLARE_ALIGNED_16(const xmm_reg, ff_pw_28 ) = {0x001C001C001C001CULL, 0x001C001C001C001CULL};
+DECLARE_ALIGNED_16(const xmm_reg, ff_pw_32 ) = {0x0020002000200020ULL, 0x0020002000200020ULL};
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_42 ) = 0x002A002A002A002AULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_64 ) = 0x0040004000400040ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_96 ) = 0x0060006000600060ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_128) = 0x0080008000800080ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pw_255) = 0x00ff00ff00ff00ffULL;
+
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1 ) = 0x0101010101010101ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3 ) = 0x0303030303030303ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_7 ) = 0x0707070707070707ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_1F ) = 0x1F1F1F1F1F1F1F1FULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_3F ) = 0x3F3F3F3F3F3F3F3FULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_81 ) = 0x8181818181818181ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_A1 ) = 0xA1A1A1A1A1A1A1A1ULL;
+DECLARE_ALIGNED_8 (const uint64_t, ff_pb_FC ) = 0xFCFCFCFCFCFCFCFCULL;
+
+DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 };
+DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
+
+#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
+#define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
+
+#define MOVQ_BFE(regd) \
+ __asm__ volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
+ "paddb %%" #regd ", %%" #regd " \n\t" ::)
+
+#ifndef PIC
+#define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
+#define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
+#else
+// for shared library it's better to use this way for accessing constants
+// pcmpeqd -> -1
+#define MOVQ_BONE(regd) \
+ __asm__ volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd " \n\t" \
+ "packuswb %%" #regd ", %%" #regd " \n\t" ::)
+
+#define MOVQ_WTWO(regd) \
+ __asm__ volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd " \n\t" \
+ "psllw $1, %%" #regd " \n\t"::)
+
+#endif
+
+// using regr as temporary and for the output result
+// first argument is unmodifed and second is trashed
+// regfe is supposed to contain 0xfefefefefefefefe
+#define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
+ "movq " #rega ", " #regr " \n\t"\
+ "pand " #regb ", " #regr " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pand " #regfe "," #regb " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "paddb " #regb ", " #regr " \n\t"
+
+#define PAVGB_MMX(rega, regb, regr, regfe) \
+ "movq " #rega ", " #regr " \n\t"\
+ "por " #regb ", " #regr " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pand " #regfe "," #regb " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psubb " #regb ", " #regr " \n\t"
+
+// mm6 is supposed to contain 0xfefefefefefefefe
+#define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
+ "movq " #rega ", " #regr " \n\t"\
+ "movq " #regc ", " #regp " \n\t"\
+ "pand " #regb ", " #regr " \n\t"\
+ "pand " #regd ", " #regp " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pxor " #regc ", " #regd " \n\t"\
+ "pand %%mm6, " #regb " \n\t"\
+ "pand %%mm6, " #regd " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psrlq $1, " #regd " \n\t"\
+ "paddb " #regb ", " #regr " \n\t"\
+ "paddb " #regd ", " #regp " \n\t"
+
+#define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
+ "movq " #rega ", " #regr " \n\t"\
+ "movq " #regc ", " #regp " \n\t"\
+ "por " #regb ", " #regr " \n\t"\
+ "por " #regd ", " #regp " \n\t"\
+ "pxor " #rega ", " #regb " \n\t"\
+ "pxor " #regc ", " #regd " \n\t"\
+ "pand %%mm6, " #regb " \n\t"\
+ "pand %%mm6, " #regd " \n\t"\
+ "psrlq $1, " #regd " \n\t"\
+ "psrlq $1, " #regb " \n\t"\
+ "psubb " #regb ", " #regr " \n\t"\
+ "psubb " #regd ", " #regp " \n\t"
+
+/***********************************/
+/* MMX no rounding */
+#define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
+#define SET_RND MOVQ_WONE
+#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
+
+#include "dsputil_mmx_rnd_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+/***********************************/
+/* MMX rounding */
+
+#define DEF(x, y) x ## _ ## y ##_mmx
+#define SET_RND MOVQ_WTWO
+#define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
+
+#include "dsputil_mmx_rnd_template.c"
+
+#undef DEF
+#undef SET_RND
+#undef PAVGBP
+#undef PAVGB
+
+/***********************************/
+/* 3Dnow specific */
+
+#define DEF(x) x ## _3dnow
+#define PAVGB "pavgusb"
+
+#include "dsputil_mmx_avg_template.c"
+
+#undef DEF
+#undef PAVGB
+
+/***********************************/
+/* MMX2 specific */
+
+#define DEF(x) x ## _mmx2
+
+/* Introduced only in MMX2 set */
+#define PAVGB "pavgb"
+
+#include "dsputil_mmx_avg_template.c"
+
+#undef DEF
+#undef PAVGB
+
+#define put_no_rnd_pixels16_mmx put_pixels16_mmx
+#define put_no_rnd_pixels8_mmx put_pixels8_mmx
+#define put_pixels16_mmx2 put_pixels16_mmx
+#define put_pixels8_mmx2 put_pixels8_mmx
+#define put_pixels4_mmx2 put_pixels4_mmx
+#define put_no_rnd_pixels16_mmx2 put_no_rnd_pixels16_mmx
+#define put_no_rnd_pixels8_mmx2 put_no_rnd_pixels8_mmx
+#define put_pixels16_3dnow put_pixels16_mmx
+#define put_pixels8_3dnow put_pixels8_mmx
+#define put_pixels4_3dnow put_pixels4_mmx
+#define put_no_rnd_pixels16_3dnow put_no_rnd_pixels16_mmx
+#define put_no_rnd_pixels8_3dnow put_no_rnd_pixels8_mmx
+
+/***********************************/
+/* standard MMX */
+
+void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+ const DCTELEM *p;
+ uint8_t *pix;
+
+ /* read the pixels */
+ p = block;
+ pix = pixels;
+ /* unrolled loop */
+ __asm__ volatile(
+ "movq %3, %%mm0 \n\t"
+ "movq 8%3, %%mm1 \n\t"
+ "movq 16%3, %%mm2 \n\t"
+ "movq 24%3, %%mm3 \n\t"
+ "movq 32%3, %%mm4 \n\t"
+ "movq 40%3, %%mm5 \n\t"
+ "movq 48%3, %%mm6 \n\t"
+ "movq 56%3, %%mm7 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "packuswb %%mm7, %%mm6 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm2, (%0, %1) \n\t"
+ "movq %%mm4, (%0, %1, 2) \n\t"
+ "movq %%mm6, (%0, %2) \n\t"
+ ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "m"(*p)
+ :"memory");
+ pix += line_size*4;
+ p += 32;
+
+ // if here would be an exact copy of the code above
+ // compiler would generate some very strange code
+ // thus using "r"
+ __asm__ volatile(
+ "movq (%3), %%mm0 \n\t"
+ "movq 8(%3), %%mm1 \n\t"
+ "movq 16(%3), %%mm2 \n\t"
+ "movq 24(%3), %%mm3 \n\t"
+ "movq 32(%3), %%mm4 \n\t"
+ "movq 40(%3), %%mm5 \n\t"
+ "movq 48(%3), %%mm6 \n\t"
+ "movq 56(%3), %%mm7 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "packuswb %%mm7, %%mm6 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm2, (%0, %1) \n\t"
+ "movq %%mm4, (%0, %1, 2) \n\t"
+ "movq %%mm6, (%0, %2) \n\t"
+ ::"r" (pix), "r" ((x86_reg)line_size), "r" ((x86_reg)line_size*3), "r"(p)
+ :"memory");
+}
+
+static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
+ { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
+
+void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+ int i;
+
+ movq_m2r(*vector128, mm1);
+ for (i = 0; i < 8; i++) {
+ movq_m2r(*(block), mm0);
+ packsswb_m2r(*(block + 4), mm0);
+ block += 8;
+ paddb_r2r(mm1, mm0);
+ movq_r2m(mm0, *pixels);
+ pixels += line_size;
+ }
+}
+
+void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
+{
+ const DCTELEM *p;
+ uint8_t *pix;
+ int i;
+
+ /* read the pixels */
+ p = block;
+ pix = pixels;
+ MOVQ_ZERO(mm7);
+ i = 4;
+ do {
+ __asm__ volatile(
+ "movq (%2), %%mm0 \n\t"
+ "movq 8(%2), %%mm1 \n\t"
+ "movq 16(%2), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "movq %0, %%mm4 \n\t"
+ "movq %1, %%mm6 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddsw %%mm4, %%mm0 \n\t"
+ "paddsw %%mm5, %%mm1 \n\t"
+ "movq %%mm6, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm6 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddsw %%mm6, %%mm2 \n\t"
+ "paddsw %%mm5, %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "packuswb %%mm3, %%mm2 \n\t"
+ "movq %%mm0, %0 \n\t"
+ "movq %%mm2, %1 \n\t"
+ :"+m"(*pix), "+m"(*(pix+line_size))
+ :"r"(p)
+ :"memory");
+ pix += line_size*2;
+ p += 16;
+ } while (--i);
+}
+
+static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd (%1, %3), %%mm1 \n\t"
+ "movd %%mm0, (%2) \n\t"
+ "movd %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd (%1, %3), %%mm1 \n\t"
+ "movd %%mm0, (%2) \n\t"
+ "movd %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"((x86_reg)line_size)
+ : "%"REG_a, "memory"
+ );
+}
+
+static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"((x86_reg)line_size)
+ : "%"REG_a, "memory"
+ );
+}
+
+static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm4 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm5 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm4 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm5 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"((x86_reg)line_size)
+ : "%"REG_a, "memory"
+ );
+}
+
+static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "1: \n\t"
+ "movdqu (%1), %%xmm0 \n\t"
+ "movdqu (%1,%3), %%xmm1 \n\t"
+ "movdqu (%1,%3,2), %%xmm2 \n\t"
+ "movdqu (%1,%4), %%xmm3 \n\t"
+ "movdqa %%xmm0, (%2) \n\t"
+ "movdqa %%xmm1, (%2,%3) \n\t"
+ "movdqa %%xmm2, (%2,%3,2) \n\t"
+ "movdqa %%xmm3, (%2,%4) \n\t"
+ "subl $4, %0 \n\t"
+ "lea (%1,%3,4), %1 \n\t"
+ "lea (%2,%3,4), %2 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
+ : "memory"
+ );
+}
+
+static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "1: \n\t"
+ "movdqu (%1), %%xmm0 \n\t"
+ "movdqu (%1,%3), %%xmm1 \n\t"
+ "movdqu (%1,%3,2), %%xmm2 \n\t"
+ "movdqu (%1,%4), %%xmm3 \n\t"
+ "pavgb (%2), %%xmm0 \n\t"
+ "pavgb (%2,%3), %%xmm1 \n\t"
+ "pavgb (%2,%3,2), %%xmm2 \n\t"
+ "pavgb (%2,%4), %%xmm3 \n\t"
+ "movdqa %%xmm0, (%2) \n\t"
+ "movdqa %%xmm1, (%2,%3) \n\t"
+ "movdqa %%xmm2, (%2,%3,2) \n\t"
+ "movdqa %%xmm3, (%2,%4) \n\t"
+ "subl $4, %0 \n\t"
+ "lea (%1,%3,4), %1 \n\t"
+ "lea (%2,%3,4), %2 \n\t"
+ "jnz 1b \n\t"
+ : "+g"(h), "+r" (pixels), "+r" (block)
+ : "r"((x86_reg)line_size), "r"((x86_reg)3L*line_size)
+ : "memory"
+ );
+}
+
+#define CLEAR_BLOCKS(name,n) \
+static void name(DCTELEM *blocks)\
+{\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "mov %1, %%"REG_a" \n\t"\
+ "1: \n\t"\
+ "movq %%mm7, (%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 8(%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 16(%0, %%"REG_a") \n\t"\
+ "movq %%mm7, 24(%0, %%"REG_a") \n\t"\
+ "add $32, %%"REG_a" \n\t"\
+ " js 1b \n\t"\
+ : : "r" (((uint8_t *)blocks)+128*n),\
+ "i" (-128*n)\
+ : "%"REG_a\
+ );\
+}
+CLEAR_BLOCKS(clear_blocks_mmx, 6)
+CLEAR_BLOCKS(clear_block_mmx, 1)
+
+static void clear_block_sse(DCTELEM *block)
+{
+ __asm__ volatile(
+ "xorps %%xmm0, %%xmm0 \n"
+ "movaps %%xmm0, (%0) \n"
+ "movaps %%xmm0, 16(%0) \n"
+ "movaps %%xmm0, 32(%0) \n"
+ "movaps %%xmm0, 48(%0) \n"
+ "movaps %%xmm0, 64(%0) \n"
+ "movaps %%xmm0, 80(%0) \n"
+ "movaps %%xmm0, 96(%0) \n"
+ "movaps %%xmm0, 112(%0) \n"
+ :: "r"(block)
+ : "memory"
+ );
+}
+
+static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
+ x86_reg i=0;
+ __asm__ volatile(
+ "jmp 2f \n\t"
+ "1: \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
+ "movq (%2, %0), %%mm1 \n\t"
+ "paddb %%mm0, %%mm1 \n\t"
+ "movq %%mm1, (%2, %0) \n\t"
+ "movq 8(%1, %0), %%mm0 \n\t"
+ "movq 8(%2, %0), %%mm1 \n\t"
+ "paddb %%mm0, %%mm1 \n\t"
+ "movq %%mm1, 8(%2, %0) \n\t"
+ "add $16, %0 \n\t"
+ "2: \n\t"
+ "cmp %3, %0 \n\t"
+ " js 1b \n\t"
+ : "+r" (i)
+ : "r"(src), "r"(dst), "r"((x86_reg)w-15)
+ );
+ for(; i<w; i++)
+ dst[i+0] += src[i+0];
+}
+
+static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+ x86_reg i=0;
+ __asm__ volatile(
+ "jmp 2f \n\t"
+ "1: \n\t"
+ "movq (%2, %0), %%mm0 \n\t"
+ "movq 8(%2, %0), %%mm1 \n\t"
+ "paddb (%3, %0), %%mm0 \n\t"
+ "paddb 8(%3, %0), %%mm1 \n\t"
+ "movq %%mm0, (%1, %0) \n\t"
+ "movq %%mm1, 8(%1, %0) \n\t"
+ "add $16, %0 \n\t"
+ "2: \n\t"
+ "cmp %4, %0 \n\t"
+ " js 1b \n\t"
+ : "+r" (i)
+ : "r"(dst), "r"(src1), "r"(src2), "r"((x86_reg)w-15)
+ );
+ for(; i<w; i++)
+ dst[i] = src1[i] + src2[i];
+}
+
+#define H263_LOOP_FILTER \
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %0, %%mm0 \n\t"\
+ "movq %0, %%mm1 \n\t"\
+ "movq %3, %%mm2 \n\t"\
+ "movq %3, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm3, %%mm1 \n\t"\
+ "movq %1, %%mm2 \n\t"\
+ "movq %1, %%mm3 \n\t"\
+ "movq %2, %%mm4 \n\t"\
+ "movq %2, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm4 \n\t"\
+ "psubw %%mm3, %%mm5 \n\t"\
+ "psllw $2, %%mm4 \n\t"\
+ "psllw $2, %%mm5 \n\t"\
+ "paddw %%mm0, %%mm4 \n\t"\
+ "paddw %%mm1, %%mm5 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "pcmpgtw %%mm4, %%mm6 \n\t"\
+ "pcmpgtw %%mm5, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm4 \n\t"\
+ "pxor %%mm7, %%mm5 \n\t"\
+ "psubw %%mm6, %%mm4 \n\t"\
+ "psubw %%mm7, %%mm5 \n\t"\
+ "psrlw $3, %%mm4 \n\t"\
+ "psrlw $3, %%mm5 \n\t"\
+ "packuswb %%mm5, %%mm4 \n\t"\
+ "packsswb %%mm7, %%mm6 \n\t"\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd %4, %%mm2 \n\t"\
+ "punpcklbw %%mm2, %%mm2 \n\t"\
+ "punpcklbw %%mm2, %%mm2 \n\t"\
+ "punpcklbw %%mm2, %%mm2 \n\t"\
+ "psubusb %%mm4, %%mm2 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "psubusb %%mm4, %%mm3 \n\t"\
+ "psubb %%mm3, %%mm2 \n\t"\
+ "movq %1, %%mm3 \n\t"\
+ "movq %2, %%mm4 \n\t"\
+ "pxor %%mm6, %%mm3 \n\t"\
+ "pxor %%mm6, %%mm4 \n\t"\
+ "paddusb %%mm2, %%mm3 \n\t"\
+ "psubusb %%mm2, %%mm4 \n\t"\
+ "pxor %%mm6, %%mm3 \n\t"\
+ "pxor %%mm6, %%mm4 \n\t"\
+ "paddusb %%mm2, %%mm2 \n\t"\
+ "packsswb %%mm1, %%mm0 \n\t"\
+ "pcmpgtb %%mm0, %%mm7 \n\t"\
+ "pxor %%mm7, %%mm0 \n\t"\
+ "psubb %%mm7, %%mm0 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "psubusb %%mm2, %%mm0 \n\t"\
+ "psubb %%mm0, %%mm1 \n\t"\
+ "pand %5, %%mm1 \n\t"\
+ "psrlw $2, %%mm1 \n\t"\
+ "pxor %%mm7, %%mm1 \n\t"\
+ "psubb %%mm7, %%mm1 \n\t"\
+ "movq %0, %%mm5 \n\t"\
+ "movq %3, %%mm6 \n\t"\
+ "psubb %%mm1, %%mm5 \n\t"\
+ "paddb %%mm1, %%mm6 \n\t"
+
+static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
+ if(ENABLE_ANY_H263) {
+ const int strength= ff_h263_loop_filter_strength[qscale];
+
+ __asm__ volatile(
+
+ H263_LOOP_FILTER
+
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm4, %2 \n\t"
+ "movq %%mm5, %0 \n\t"
+ "movq %%mm6, %3 \n\t"
+ : "+m" (*(uint64_t*)(src - 2*stride)),
+ "+m" (*(uint64_t*)(src - 1*stride)),
+ "+m" (*(uint64_t*)(src + 0*stride)),
+ "+m" (*(uint64_t*)(src + 1*stride))
+ : "g" (2*strength), "m"(ff_pb_FC)
+ );
+ }
+}
+
+static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
+ __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
+ "movd %4, %%mm0 \n\t"
+ "movd %5, %%mm1 \n\t"
+ "movd %6, %%mm2 \n\t"
+ "movd %7, %%mm3 \n\t"
+ "punpcklbw %%mm1, %%mm0 \n\t"
+ "punpcklbw %%mm3, %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "punpcklwd %%mm2, %%mm0 \n\t"
+ "punpckhwd %%mm2, %%mm1 \n\t"
+ "movd %%mm0, %0 \n\t"
+ "punpckhdq %%mm0, %%mm0 \n\t"
+ "movd %%mm0, %1 \n\t"
+ "movd %%mm1, %2 \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movd %%mm1, %3 \n\t"
+
+ : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 1*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 2*dst_stride)),
+ "=m" (*(uint32_t*)(dst + 3*dst_stride))
+ : "m" (*(uint32_t*)(src + 0*src_stride)),
+ "m" (*(uint32_t*)(src + 1*src_stride)),
+ "m" (*(uint32_t*)(src + 2*src_stride)),
+ "m" (*(uint32_t*)(src + 3*src_stride))
+ );
+}
+
+static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
+ if(ENABLE_ANY_H263) {
+ const int strength= ff_h263_loop_filter_strength[qscale];
+ DECLARE_ALIGNED(8, uint64_t, temp[4]);
+ uint8_t *btemp= (uint8_t*)temp;
+
+ src -= 2;
+
+ transpose4x4(btemp , src , 8, stride);
+ transpose4x4(btemp+4, src + 4*stride, 8, stride);
+ __asm__ volatile(
+ H263_LOOP_FILTER // 5 3 4 6
+
+ : "+m" (temp[0]),
+ "+m" (temp[1]),
+ "+m" (temp[2]),
+ "+m" (temp[3])
+ : "g" (2*strength), "m"(ff_pb_FC)
+ );
+
+ __asm__ volatile(
+ "movq %%mm5, %%mm1 \n\t"
+ "movq %%mm4, %%mm0 \n\t"
+ "punpcklbw %%mm3, %%mm5 \n\t"
+ "punpcklbw %%mm6, %%mm4 \n\t"
+ "punpckhbw %%mm3, %%mm1 \n\t"
+ "punpckhbw %%mm6, %%mm0 \n\t"
+ "movq %%mm5, %%mm3 \n\t"
+ "movq %%mm1, %%mm6 \n\t"
+ "punpcklwd %%mm4, %%mm5 \n\t"
+ "punpcklwd %%mm0, %%mm1 \n\t"
+ "punpckhwd %%mm4, %%mm3 \n\t"
+ "punpckhwd %%mm0, %%mm6 \n\t"
+ "movd %%mm5, (%0) \n\t"
+ "punpckhdq %%mm5, %%mm5 \n\t"
+ "movd %%mm5, (%0,%2) \n\t"
+ "movd %%mm3, (%0,%2,2) \n\t"
+ "punpckhdq %%mm3, %%mm3 \n\t"
+ "movd %%mm3, (%0,%3) \n\t"
+ "movd %%mm1, (%1) \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movd %%mm1, (%1,%2) \n\t"
+ "movd %%mm6, (%1,%2,2) \n\t"
+ "punpckhdq %%mm6, %%mm6 \n\t"
+ "movd %%mm6, (%1,%3) \n\t"
+ :: "r" (src),
+ "r" (src + 4*stride),
+ "r" ((x86_reg) stride ),
+ "r" ((x86_reg)(3*stride))
+ );
+ }
+}
+
+/* draw the edges of width 'w' of an image of size width, height
+ this mmx version can only handle w==8 || w==16 */
+static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
+{
+ uint8_t *ptr, *last_line;
+ int i;
+
+ last_line = buf + (height - 1) * wrap;
+ /* left and right */
+ ptr = buf;
+ if(w==8)
+ {
+ __asm__ volatile(
+ "1: \n\t"
+ "movd (%0), %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklwd %%mm0, %%mm0 \n\t"
+ "punpckldq %%mm0, %%mm0 \n\t"
+ "movq %%mm0, -8(%0) \n\t"
+ "movq -8(%0, %2), %%mm1 \n\t"
+ "punpckhbw %%mm1, %%mm1 \n\t"
+ "punpckhwd %%mm1, %%mm1 \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movq %%mm1, (%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ " jb 1b \n\t"
+ : "+r" (ptr)
+ : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
+ );
+ }
+ else
+ {
+ __asm__ volatile(
+ "1: \n\t"
+ "movd (%0), %%mm0 \n\t"
+ "punpcklbw %%mm0, %%mm0 \n\t"
+ "punpcklwd %%mm0, %%mm0 \n\t"
+ "punpckldq %%mm0, %%mm0 \n\t"
+ "movq %%mm0, -8(%0) \n\t"
+ "movq %%mm0, -16(%0) \n\t"
+ "movq -8(%0, %2), %%mm1 \n\t"
+ "punpckhbw %%mm1, %%mm1 \n\t"
+ "punpckhwd %%mm1, %%mm1 \n\t"
+ "punpckhdq %%mm1, %%mm1 \n\t"
+ "movq %%mm1, (%0, %2) \n\t"
+ "movq %%mm1, 8(%0, %2) \n\t"
+ "add %1, %0 \n\t"
+ "cmp %3, %0 \n\t"
+ " jb 1b \n\t"
+ : "+r" (ptr)
+ : "r" ((x86_reg)wrap), "r" ((x86_reg)width), "r" (ptr + wrap*height)
+ );
+ }
+
+ for(i=0;i<w;i+=4) {
+ /* top and bottom (and hopefully also the corners) */
+ ptr= buf - (i + 1) * wrap - w;
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm0, (%0, %2) \n\t"
+ "movq %%mm0, (%0, %2, 2) \n\t"
+ "movq %%mm0, (%0, %3) \n\t"
+ "add $8, %0 \n\t"
+ "cmp %4, %0 \n\t"
+ " jb 1b \n\t"
+ : "+r" (ptr)
+ : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
+ );
+ ptr= last_line + (i + 1) * wrap - w;
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm0, (%0, %2) \n\t"
+ "movq %%mm0, (%0, %2, 2) \n\t"
+ "movq %%mm0, (%0, %3) \n\t"
+ "add $8, %0 \n\t"
+ "cmp %4, %0 \n\t"
+ " jb 1b \n\t"
+ : "+r" (ptr)
+ : "r" ((x86_reg)last_line - (x86_reg)ptr - w), "r" ((x86_reg)wrap), "r" ((x86_reg)wrap*3), "r" (ptr+width+2*w)
+ );
+ }
+}
+
+#define PAETH(cpu, abs3)\
+static void add_png_paeth_prediction_##cpu(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp)\
+{\
+ x86_reg i = -bpp;\
+ x86_reg end = w-3;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n"\
+ "movd (%1,%0), %%mm0 \n"\
+ "movd (%2,%0), %%mm1 \n"\
+ "punpcklbw %%mm7, %%mm0 \n"\
+ "punpcklbw %%mm7, %%mm1 \n"\
+ "add %4, %0 \n"\
+ "1: \n"\
+ "movq %%mm1, %%mm2 \n"\
+ "movd (%2,%0), %%mm1 \n"\
+ "movq %%mm2, %%mm3 \n"\
+ "punpcklbw %%mm7, %%mm1 \n"\
+ "movq %%mm2, %%mm4 \n"\
+ "psubw %%mm1, %%mm3 \n"\
+ "psubw %%mm0, %%mm4 \n"\
+ "movq %%mm3, %%mm5 \n"\
+ "paddw %%mm4, %%mm5 \n"\
+ abs3\
+ "movq %%mm4, %%mm6 \n"\
+ "pminsw %%mm5, %%mm6 \n"\
+ "pcmpgtw %%mm6, %%mm3 \n"\
+ "pcmpgtw %%mm5, %%mm4 \n"\
+ "movq %%mm4, %%mm6 \n"\
+ "pand %%mm3, %%mm4 \n"\
+ "pandn %%mm3, %%mm6 \n"\
+ "pandn %%mm0, %%mm3 \n"\
+ "movd (%3,%0), %%mm0 \n"\
+ "pand %%mm1, %%mm6 \n"\
+ "pand %%mm4, %%mm2 \n"\
+ "punpcklbw %%mm7, %%mm0 \n"\
+ "movq %6, %%mm5 \n"\
+ "paddw %%mm6, %%mm0 \n"\
+ "paddw %%mm2, %%mm3 \n"\
+ "paddw %%mm3, %%mm0 \n"\
+ "pand %%mm5, %%mm0 \n"\
+ "movq %%mm0, %%mm3 \n"\
+ "packuswb %%mm3, %%mm3 \n"\
+ "movd %%mm3, (%1,%0) \n"\
+ "add %4, %0 \n"\
+ "cmp %5, %0 \n"\
+ "jle 1b \n"\
+ :"+r"(i)\
+ :"r"(dst), "r"(top), "r"(src), "r"((x86_reg)bpp), "g"(end),\
+ "m"(ff_pw_255)\
+ :"memory"\
+ );\
+}
+
+#define ABS3_MMX2\
+ "psubw %%mm5, %%mm7 \n"\
+ "pmaxsw %%mm7, %%mm5 \n"\
+ "pxor %%mm6, %%mm6 \n"\
+ "pxor %%mm7, %%mm7 \n"\
+ "psubw %%mm3, %%mm6 \n"\
+ "psubw %%mm4, %%mm7 \n"\
+ "pmaxsw %%mm6, %%mm3 \n"\
+ "pmaxsw %%mm7, %%mm4 \n"\
+ "pxor %%mm7, %%mm7 \n"
+
+#define ABS3_SSSE3\
+ "pabsw %%mm3, %%mm3 \n"\
+ "pabsw %%mm4, %%mm4 \n"\
+ "pabsw %%mm5, %%mm5 \n"
+
+PAETH(mmx2, ABS3_MMX2)
+#ifdef HAVE_SSSE3
+PAETH(ssse3, ABS3_SSSE3)
+#endif
+
+#define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
+ "paddw " #m4 ", " #m3 " \n\t" /* x1 */\
+ "movq "MANGLE(ff_pw_20)", %%mm4 \n\t" /* 20 */\
+ "pmullw " #m3 ", %%mm4 \n\t" /* 20x1 */\
+ "movq "#in7", " #m3 " \n\t" /* d */\
+ "movq "#in0", %%mm5 \n\t" /* D */\
+ "paddw " #m3 ", %%mm5 \n\t" /* x4 */\
+ "psubw %%mm5, %%mm4 \n\t" /* 20x1 - x4 */\
+ "movq "#in1", %%mm5 \n\t" /* C */\
+ "movq "#in2", %%mm6 \n\t" /* B */\
+ "paddw " #m6 ", %%mm5 \n\t" /* x3 */\
+ "paddw " #m5 ", %%mm6 \n\t" /* x2 */\
+ "paddw %%mm6, %%mm6 \n\t" /* 2x2 */\
+ "psubw %%mm6, %%mm5 \n\t" /* -2x2 + x3 */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm5 \n\t" /* -6x2 + 3x3 */\
+ "paddw " #rnd ", %%mm4 \n\t" /* x2 */\
+ "paddw %%mm4, %%mm5 \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
+ "psraw $5, %%mm5 \n\t"\
+ "packuswb %%mm5, %%mm5 \n\t"\
+ OP(%%mm5, out, %%mm7, d)
+
+#define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
+static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ uint64_t temp;\
+\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
+ "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
+ "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
+ "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
+ "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
+ "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
+ "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
+ "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
+ "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
+ "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
+ "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
+ "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
+ "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
+ "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
+ "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
+ "paddw %%mm3, %%mm5 \n\t" /* b */\
+ "paddw %%mm2, %%mm6 \n\t" /* c */\
+ "paddw %%mm5, %%mm5 \n\t" /* 2b */\
+ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
+ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
+ "paddw %%mm4, %%mm0 \n\t" /* a */\
+ "paddw %%mm1, %%mm5 \n\t" /* d */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
+ "paddw %6, %%mm6 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm0 \n\t"\
+ "movq %%mm0, %5 \n\t"\
+ /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
+ \
+ "movq 5(%0), %%mm0 \n\t" /* FGHIJKLM */\
+ "movq %%mm0, %%mm5 \n\t" /* FGHIJKLM */\
+ "movq %%mm0, %%mm6 \n\t" /* FGHIJKLM */\
+ "psrlq $8, %%mm0 \n\t" /* GHIJKLM0 */\
+ "psrlq $16, %%mm5 \n\t" /* HIJKLM00 */\
+ "punpcklbw %%mm7, %%mm0 \n\t" /* 0G0H0I0J */\
+ "punpcklbw %%mm7, %%mm5 \n\t" /* 0H0I0J0K */\
+ "paddw %%mm0, %%mm2 \n\t" /* b */\
+ "paddw %%mm5, %%mm3 \n\t" /* c */\
+ "paddw %%mm2, %%mm2 \n\t" /* 2b */\
+ "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
+ "movq %%mm6, %%mm2 \n\t" /* FGHIJKLM */\
+ "psrlq $24, %%mm6 \n\t" /* IJKLM000 */\
+ "punpcklbw %%mm7, %%mm2 \n\t" /* 0F0G0H0I */\
+ "punpcklbw %%mm7, %%mm6 \n\t" /* 0I0J0K0L */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
+ "paddw %%mm2, %%mm1 \n\t" /* a */\
+ "paddw %%mm6, %%mm4 \n\t" /* d */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+ "psubw %%mm4, %%mm3 \n\t" /* - 6b +3c - d */\
+ "paddw %6, %%mm1 \n\t"\
+ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b +3c - d */\
+ "psraw $5, %%mm3 \n\t"\
+ "movq %5, %%mm1 \n\t"\
+ "packuswb %%mm3, %%mm1 \n\t"\
+ OP_MMX2(%%mm1, (%1),%%mm4, q)\
+ /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
+ \
+ "movq 9(%0), %%mm1 \n\t" /* JKLMNOPQ */\
+ "movq %%mm1, %%mm4 \n\t" /* JKLMNOPQ */\
+ "movq %%mm1, %%mm3 \n\t" /* JKLMNOPQ */\
+ "psrlq $8, %%mm1 \n\t" /* KLMNOPQ0 */\
+ "psrlq $16, %%mm4 \n\t" /* LMNOPQ00 */\
+ "punpcklbw %%mm7, %%mm1 \n\t" /* 0K0L0M0N */\
+ "punpcklbw %%mm7, %%mm4 \n\t" /* 0L0M0N0O */\
+ "paddw %%mm1, %%mm5 \n\t" /* b */\
+ "paddw %%mm4, %%mm0 \n\t" /* c */\
+ "paddw %%mm5, %%mm5 \n\t" /* 2b */\
+ "psubw %%mm5, %%mm0 \n\t" /* c - 2b */\
+ "movq %%mm3, %%mm5 \n\t" /* JKLMNOPQ */\
+ "psrlq $24, %%mm3 \n\t" /* MNOPQ000 */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm0 \n\t" /* 3c - 6b */\
+ "punpcklbw %%mm7, %%mm3 \n\t" /* 0M0N0O0P */\
+ "paddw %%mm3, %%mm2 \n\t" /* d */\
+ "psubw %%mm2, %%mm0 \n\t" /* -6b + 3c - d */\
+ "movq %%mm5, %%mm2 \n\t" /* JKLMNOPQ */\
+ "punpcklbw %%mm7, %%mm2 \n\t" /* 0J0K0L0M */\
+ "punpckhbw %%mm7, %%mm5 \n\t" /* 0N0O0P0Q */\
+ "paddw %%mm2, %%mm6 \n\t" /* a */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
+ "paddw %6, %%mm0 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm0 \n\t"\
+ /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
+ \
+ "paddw %%mm5, %%mm3 \n\t" /* a */\
+ "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0O0P0Q0Q */\
+ "paddw %%mm4, %%mm6 \n\t" /* b */\
+ "pshufw $0xBE, %%mm5, %%mm4 \n\t" /* 0P0Q0Q0P */\
+ "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0Q0Q0P0O */\
+ "paddw %%mm1, %%mm4 \n\t" /* c */\
+ "paddw %%mm2, %%mm5 \n\t" /* d */\
+ "paddw %%mm6, %%mm6 \n\t" /* 2b */\
+ "psubw %%mm6, %%mm4 \n\t" /* c - 2b */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm4 \n\t" /* 3c - 6b */\
+ "psubw %%mm5, %%mm3 \n\t" /* -6b + 3c - d */\
+ "paddw %6, %%mm4 \n\t"\
+ "paddw %%mm3, %%mm4 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm4 \n\t"\
+ "packuswb %%mm4, %%mm0 \n\t"\
+ OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
+ \
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+D"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
+ : "memory"\
+ );\
+}\
+\
+static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ int i;\
+ int16_t temp[16];\
+ /* quick HACK, XXX FIXME MUST be optimized */\
+ for(i=0; i<h; i++)\
+ {\
+ temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
+ temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
+ temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
+ temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
+ temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
+ temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]);\
+ temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]);\
+ temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]);\
+ temp[ 8]= (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]);\
+ temp[ 9]= (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]);\
+ temp[10]= (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]);\
+ temp[11]= (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]);\
+ temp[12]= (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]);\
+ temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
+ temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
+ temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
+ __asm__ volatile(\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm1 \n\t"\
+ "paddw %2, %%mm0 \n\t"\
+ "paddw %2, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP_3DNOW(%%mm0, (%1), %%mm1, q)\
+ "movq 16(%0), %%mm0 \n\t"\
+ "movq 24(%0), %%mm1 \n\t"\
+ "paddw %2, %%mm0 \n\t"\
+ "paddw %2, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
+ :: "r"(temp), "r"(dst), "m"(ROUNDER)\
+ : "memory"\
+ );\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t" /* ABCDEFGH */\
+ "movq %%mm0, %%mm1 \n\t" /* ABCDEFGH */\
+ "movq %%mm0, %%mm2 \n\t" /* ABCDEFGH */\
+ "punpcklbw %%mm7, %%mm0 \n\t" /* 0A0B0C0D */\
+ "punpckhbw %%mm7, %%mm1 \n\t" /* 0E0F0G0H */\
+ "pshufw $0x90, %%mm0, %%mm5 \n\t" /* 0A0A0B0C */\
+ "pshufw $0x41, %%mm0, %%mm6 \n\t" /* 0B0A0A0B */\
+ "movq %%mm2, %%mm3 \n\t" /* ABCDEFGH */\
+ "movq %%mm2, %%mm4 \n\t" /* ABCDEFGH */\
+ "psllq $8, %%mm2 \n\t" /* 0ABCDEFG */\
+ "psllq $16, %%mm3 \n\t" /* 00ABCDEF */\
+ "psllq $24, %%mm4 \n\t" /* 000ABCDE */\
+ "punpckhbw %%mm7, %%mm2 \n\t" /* 0D0E0F0G */\
+ "punpckhbw %%mm7, %%mm3 \n\t" /* 0C0D0E0F */\
+ "punpckhbw %%mm7, %%mm4 \n\t" /* 0B0C0D0E */\
+ "paddw %%mm3, %%mm5 \n\t" /* b */\
+ "paddw %%mm2, %%mm6 \n\t" /* c */\
+ "paddw %%mm5, %%mm5 \n\t" /* 2b */\
+ "psubw %%mm5, %%mm6 \n\t" /* c - 2b */\
+ "pshufw $0x06, %%mm0, %%mm5 \n\t" /* 0C0B0A0A */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm6 \n\t" /* 3c - 6b */\
+ "paddw %%mm4, %%mm0 \n\t" /* a */\
+ "paddw %%mm1, %%mm5 \n\t" /* d */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+ "psubw %%mm5, %%mm0 \n\t" /* 20a - d */\
+ "paddw %5, %%mm6 \n\t"\
+ "paddw %%mm6, %%mm0 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm0 \n\t"\
+ /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
+ \
+ "movd 5(%0), %%mm5 \n\t" /* FGHI */\
+ "punpcklbw %%mm7, %%mm5 \n\t" /* 0F0G0H0I */\
+ "pshufw $0xF9, %%mm5, %%mm6 \n\t" /* 0G0H0I0I */\
+ "paddw %%mm5, %%mm1 \n\t" /* a */\
+ "paddw %%mm6, %%mm2 \n\t" /* b */\
+ "pshufw $0xBE, %%mm5, %%mm6 \n\t" /* 0H0I0I0H */\
+ "pshufw $0x6F, %%mm5, %%mm5 \n\t" /* 0I0I0H0G */\
+ "paddw %%mm6, %%mm3 \n\t" /* c */\
+ "paddw %%mm5, %%mm4 \n\t" /* d */\
+ "paddw %%mm2, %%mm2 \n\t" /* 2b */\
+ "psubw %%mm2, %%mm3 \n\t" /* c - 2b */\
+ "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+ "pmullw "MANGLE(ff_pw_3)", %%mm3 \n\t" /* 3c - 6b */\
+ "psubw %%mm4, %%mm3 \n\t" /* -6b + 3c - d */\
+ "paddw %5, %%mm1 \n\t"\
+ "paddw %%mm1, %%mm3 \n\t" /* 20a - 6b + 3c - d */\
+ "psraw $5, %%mm3 \n\t"\
+ "packuswb %%mm3, %%mm0 \n\t"\
+ OP_MMX2(%%mm0, (%1), %%mm4, q)\
+ \
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(h)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER)\
+ : "memory"\
+ );\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ int i;\
+ int16_t temp[8];\
+ /* quick HACK, XXX FIXME MUST be optimized */\
+ for(i=0; i<h; i++)\
+ {\
+ temp[ 0]= (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]);\
+ temp[ 1]= (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]);\
+ temp[ 2]= (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]);\
+ temp[ 3]= (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]);\
+ temp[ 4]= (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]);\
+ temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
+ temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
+ temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
+ __asm__ volatile(\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm1 \n\t"\
+ "paddw %2, %%mm0 \n\t"\
+ "paddw %2, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP_3DNOW(%%mm0, (%1), %%mm1, q)\
+ :: "r"(temp), "r"(dst), "m"(ROUNDER)\
+ :"memory"\
+ );\
+ dst+=dstStride;\
+ src+=srcStride;\
+ }\
+}
+
+#define QPEL_OP(OPNAME, ROUNDER, RND, OP, MMX)\
+\
+static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ uint64_t temp[17*4];\
+ uint64_t *temp_ptr= temp;\
+ int count= 17;\
+\
+ /*FIXME unroll */\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq (%0), %%mm1 \n\t"\
+ "movq 8(%0), %%mm2 \n\t"\
+ "movq 8(%0), %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "movq %%mm0, (%1) \n\t"\
+ "movq %%mm1, 17*8(%1) \n\t"\
+ "movq %%mm2, 2*17*8(%1) \n\t"\
+ "movq %%mm3, 3*17*8(%1) \n\t"\
+ "add $8, %1 \n\t"\
+ "add %3, %0 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+r" (src), "+r" (temp_ptr), "+r"(count)\
+ : "r" ((x86_reg)srcStride)\
+ : "memory"\
+ );\
+ \
+ temp_ptr= temp;\
+ count=4;\
+ \
+/*FIXME reorder for speed */\
+ __asm__ volatile(\
+ /*"pxor %%mm7, %%mm7 \n\t"*/\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm1 \n\t"\
+ "movq 16(%0), %%mm2 \n\t"\
+ "movq 24(%0), %%mm3 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
+ \
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
+ \
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t" \
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
+ \
+ "add $136, %0 \n\t"\
+ "add %6, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ \
+ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+ : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(x86_reg)dstStride)\
+ :"memory"\
+ );\
+}\
+\
+static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ uint64_t temp[9*2];\
+ uint64_t *temp_ptr= temp;\
+ int count= 9;\
+\
+ /*FIXME unroll */\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq (%0), %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "movq %%mm0, (%1) \n\t"\
+ "movq %%mm1, 9*8(%1) \n\t"\
+ "add $8, %1 \n\t"\
+ "add %3, %0 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+r" (src), "+r" (temp_ptr), "+r"(count)\
+ : "r" ((x86_reg)srcStride)\
+ : "memory"\
+ );\
+ \
+ temp_ptr= temp;\
+ count=2;\
+ \
+/*FIXME reorder for speed */\
+ __asm__ volatile(\
+ /*"pxor %%mm7, %%mm7 \n\t"*/\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm1 \n\t"\
+ "movq 16(%0), %%mm2 \n\t"\
+ "movq 24(%0), %%mm3 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0), 8(%0), (%0), 32(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 8(%0), (%0), (%0), 40(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, (%0), (%0), 8(%0), 48(%0), (%1), OP)\
+ \
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, (%0), 8(%0), 16(%0), 56(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
+ \
+ QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
+ "add %4, %1 \n\t"\
+ QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
+ QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
+ \
+ "add $72, %0 \n\t"\
+ "add %6, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ \
+ : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
+ : "r"((x86_reg)dstStride), "r"(2*(x86_reg)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+\
+static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels8_ ## MMX(dst, src, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[8];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[8];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, stride, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, src+1, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[8];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, src, half, stride, stride, 8);\
+}\
+\
+static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[8];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, 8, stride);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, src+stride, half, stride, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half) + 64;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
+ OPNAME ## pixels8_l2_ ## MMX(dst, halfH+8, halfHV, stride, 8, 8);\
+}\
+static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, stride, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[8 + 9];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ put ## RND ## pixels8_l2_ ## MMX(halfH, src+1, halfH, 8, stride, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[9];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, stride, 9);\
+ OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, stride, 8);\
+}\
+static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels16_ ## MMX(dst, src, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[32];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[32];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, stride, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, src+1, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[32];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, stride, 16);\
+}\
+\
+static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t temp[32];\
+ uint8_t * const half= (uint8_t*)temp;\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, stride);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, stride, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[16*2 + 17*2];\
+ uint8_t * const halfH= ((uint8_t*)half) + 256;\
+ uint8_t * const halfHV= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, 16, 16);\
+ OPNAME ## pixels16_l2_ ## MMX(dst, halfH+16, halfHV, stride, 16, 16);\
+}\
+static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[17*2];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, stride, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[17*2];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ put ## RND ## pixels16_l2_ ## MMX(halfH, src+1, halfH, 16, stride, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}\
+static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ uint64_t half[17*2];\
+ uint8_t * const halfH= ((uint8_t*)half);\
+ put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, stride, 17);\
+ OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
+}
+
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgusb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+
+QPEL_BASE(put_ , ff_pw_16, _ , PUT_OP, PUT_OP)
+QPEL_BASE(avg_ , ff_pw_16, _ , AVG_MMX2_OP, AVG_3DNOW_OP)
+QPEL_BASE(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, PUT_OP)
+QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, 3dnow)
+QPEL_OP(avg_ , ff_pw_16, _ , AVG_3DNOW_OP, 3dnow)
+QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, 3dnow)
+QPEL_OP(put_ , ff_pw_16, _ , PUT_OP, mmx2)
+QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2)
+QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2)
+
+/***********************************/
+/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */
+
+#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\
+}
+#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\
+}
+
+#define QPEL_2TAP(OPNAME, SIZE, MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\
+QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\
+ OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\
+ OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\
+static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\
+ OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\
+}\
+static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\
+}\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\
+QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\
+
+QPEL_2TAP(put_, 16, mmx2)
+QPEL_2TAP(avg_, 16, mmx2)
+QPEL_2TAP(put_, 8, mmx2)
+QPEL_2TAP(avg_, 8, mmx2)
+QPEL_2TAP(put_, 16, 3dnow)
+QPEL_2TAP(avg_, 16, 3dnow)
+QPEL_2TAP(put_, 8, 3dnow)
+QPEL_2TAP(avg_, 8, 3dnow)
+
+
+#if 0
+static void just_return(void) { return; }
+#endif
+
+static void gmc_mmx(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
+ int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height){
+ const int w = 8;
+ const int ix = ox>>(16+shift);
+ const int iy = oy>>(16+shift);
+ const int oxs = ox>>4;
+ const int oys = oy>>4;
+ const int dxxs = dxx>>4;
+ const int dxys = dxy>>4;
+ const int dyxs = dyx>>4;
+ const int dyys = dyy>>4;
+ const uint16_t r4[4] = {r,r,r,r};
+ const uint16_t dxy4[4] = {dxys,dxys,dxys,dxys};
+ const uint16_t dyy4[4] = {dyys,dyys,dyys,dyys};
+ const uint64_t shift2 = 2*shift;
+ uint8_t edge_buf[(h+1)*stride];
+ int x, y;
+
+ const int dxw = (dxx-(1<<(16+shift)))*(w-1);
+ const int dyh = (dyy-(1<<(16+shift)))*(h-1);
+ const int dxh = dxy*(h-1);
+ const int dyw = dyx*(w-1);
+ if( // non-constant fullpel offset (3% of blocks)
+ ((ox^(ox+dxw)) | (ox^(ox+dxh)) | (ox^(ox+dxw+dxh)) |
+ (oy^(oy+dyw)) | (oy^(oy+dyh)) | (oy^(oy+dyw+dyh))) >> (16+shift)
+ // uses more than 16 bits of subpel mv (only at huge resolution)
+ || (dxx|dxy|dyx|dyy)&15 )
+ {
+ //FIXME could still use mmx for some of the rows
+ ff_gmc_c(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r, width, height);
+ return;
+ }
+
+ src += ix + iy*stride;
+ if( (unsigned)ix >= width-w ||
+ (unsigned)iy >= height-h )
+ {
+ ff_emulated_edge_mc(edge_buf, src, stride, w+1, h+1, ix, iy, width, height);
+ src = edge_buf;
+ }
+
+ __asm__ volatile(
+ "movd %0, %%mm6 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ "punpcklwd %%mm6, %%mm6 \n\t"
+ "punpcklwd %%mm6, %%mm6 \n\t"
+ :: "r"(1<<shift)
+ );
+
+ for(x=0; x<w; x+=4){
+ uint16_t dx4[4] = { oxs - dxys + dxxs*(x+0),
+ oxs - dxys + dxxs*(x+1),
+ oxs - dxys + dxxs*(x+2),
+ oxs - dxys + dxxs*(x+3) };
+ uint16_t dy4[4] = { oys - dyys + dyxs*(x+0),
+ oys - dyys + dyxs*(x+1),
+ oys - dyys + dyxs*(x+2),
+ oys - dyys + dyxs*(x+3) };
+
+ for(y=0; y<h; y++){
+ __asm__ volatile(
+ "movq %0, %%mm4 \n\t"
+ "movq %1, %%mm5 \n\t"
+ "paddw %2, %%mm4 \n\t"
+ "paddw %3, %%mm5 \n\t"
+ "movq %%mm4, %0 \n\t"
+ "movq %%mm5, %1 \n\t"
+ "psrlw $12, %%mm4 \n\t"
+ "psrlw $12, %%mm5 \n\t"
+ : "+m"(*dx4), "+m"(*dy4)
+ : "m"(*dxy4), "m"(*dyy4)
+ );
+
+ __asm__ volatile(
+ "movq %%mm6, %%mm2 \n\t"
+ "movq %%mm6, %%mm1 \n\t"
+ "psubw %%mm4, %%mm2 \n\t"
+ "psubw %%mm5, %%mm1 \n\t"
+ "movq %%mm2, %%mm0 \n\t"
+ "movq %%mm4, %%mm3 \n\t"
+ "pmullw %%mm1, %%mm0 \n\t" // (s-dx)*(s-dy)
+ "pmullw %%mm5, %%mm3 \n\t" // dx*dy
+ "pmullw %%mm5, %%mm2 \n\t" // (s-dx)*dy
+ "pmullw %%mm4, %%mm1 \n\t" // dx*(s-dy)
+
+ "movd %4, %%mm5 \n\t"
+ "movd %3, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "pmullw %%mm5, %%mm3 \n\t" // src[1,1] * dx*dy
+ "pmullw %%mm4, %%mm2 \n\t" // src[0,1] * (s-dx)*dy
+
+ "movd %2, %%mm5 \n\t"
+ "movd %1, %%mm4 \n\t"
+ "punpcklbw %%mm7, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "pmullw %%mm5, %%mm1 \n\t" // src[1,0] * dx*(s-dy)
+ "pmullw %%mm4, %%mm0 \n\t" // src[0,0] * (s-dx)*(s-dy)
+ "paddw %5, %%mm1 \n\t"
+ "paddw %%mm3, %%mm2 \n\t"
+ "paddw %%mm1, %%mm0 \n\t"
+ "paddw %%mm2, %%mm0 \n\t"
+
+ "psrlw %6, %%mm0 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "movd %%mm0, %0 \n\t"
+
+ : "=m"(dst[x+y*stride])
+ : "m"(src[0]), "m"(src[1]),
+ "m"(src[stride]), "m"(src[stride+1]),
+ "m"(*r4), "m"(shift2)
+ );
+ src += stride;
+ }
+ src += 4-h*stride;
+ }
+}
+
+#define PREFETCH(name, op) \
+static void name(void *mem, int stride, int h){\
+ const uint8_t *p= mem;\
+ do{\
+ __asm__ volatile(#op" %0" :: "m"(*p));\
+ p+= stride;\
+ }while(--h);\
+}
+PREFETCH(prefetch_mmx2, prefetcht0)
+PREFETCH(prefetch_3dnow, prefetch)
+#undef PREFETCH
+
+#include "h264dsp_mmx.c"
+#include "rv40dsp_mmx.c"
+
+/* CAVS specific */
+void ff_cavsdsp_init_mmx2(DSPContext* c, AVCodecContext *avctx);
+void ff_cavsdsp_init_3dnow(DSPContext* c, AVCodecContext *avctx);
+
+void ff_put_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+ put_pixels8_mmx(dst, src, stride, 8);
+}
+void ff_avg_cavs_qpel8_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+ avg_pixels8_mmx(dst, src, stride, 8);
+}
+void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+ put_pixels16_mmx(dst, src, stride, 16);
+}
+void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
+ avg_pixels16_mmx(dst, src, stride, 16);
+}
+
+/* VC1 specific */
+void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
+
+void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
+ put_pixels8_mmx(dst, src, stride, 8);
+}
+
+/* external functions, from idct_mmx.c */
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+#ifdef CONFIG_GPL
+static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_mmx_idct (block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_mmx_idct (block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_mmxext_idct (block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_mmxext_idct (block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
+#endif
+static void ff_idct_xvid_mmx_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_idct_xvid_mmx (block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_idct_xvid_mmx (block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx2_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_idct_xvid_mmx2 (block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ ff_idct_xvid_mmx2 (block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
+
+static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+{
+ int i;
+ __asm__ volatile("pxor %%mm7, %%mm7":);
+ for(i=0; i<blocksize; i+=2) {
+ __asm__ volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+ __asm__ volatile("femms");
+}
+static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+{
+ int i;
+
+ __asm__ volatile(
+ "movaps %0, %%xmm5 \n\t"
+ ::"m"(ff_pdw_80000000[0])
+ );
+ for(i=0; i<blocksize; i+=4) {
+ __asm__ volatile(
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "xorps %%xmm2, %%xmm2 \n\t"
+ "xorps %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+ "xorps %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "andps %%xmm1, %%xmm3 \n\t"
+ "andnps %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ :"+m"(mag[i]), "+m"(ang[i])
+ ::"memory"
+ );
+ }
+}
+
+#define IF1(x) x
+#define IF0(x)
+
+#define MIX5(mono,stereo)\
+ __asm__ volatile(\
+ "movss 0(%2), %%xmm5 \n"\
+ "movss 8(%2), %%xmm6 \n"\
+ "movss 24(%2), %%xmm7 \n"\
+ "shufps $0, %%xmm5, %%xmm5 \n"\
+ "shufps $0, %%xmm6, %%xmm6 \n"\
+ "shufps $0, %%xmm7, %%xmm7 \n"\
+ "1: \n"\
+ "movaps (%0,%1), %%xmm0 \n"\
+ "movaps 0x400(%0,%1), %%xmm1 \n"\
+ "movaps 0x800(%0,%1), %%xmm2 \n"\
+ "movaps 0xc00(%0,%1), %%xmm3 \n"\
+ "movaps 0x1000(%0,%1), %%xmm4 \n"\
+ "mulps %%xmm5, %%xmm0 \n"\
+ "mulps %%xmm6, %%xmm1 \n"\
+ "mulps %%xmm5, %%xmm2 \n"\
+ "mulps %%xmm7, %%xmm3 \n"\
+ "mulps %%xmm7, %%xmm4 \n"\
+ stereo("addps %%xmm1, %%xmm0 \n")\
+ "addps %%xmm1, %%xmm2 \n"\
+ "addps %%xmm3, %%xmm0 \n"\
+ "addps %%xmm4, %%xmm2 \n"\
+ mono("addps %%xmm2, %%xmm0 \n")\
+ "movaps %%xmm0, (%0,%1) \n"\
+ stereo("movaps %%xmm2, 0x400(%0,%1) \n")\
+ "add $16, %0 \n"\
+ "jl 1b \n"\
+ :"+&r"(i)\
+ :"r"(samples[0]+len), "r"(matrix)\
+ :"memory"\
+ );
+
+#define MIX_MISC(stereo)\
+ __asm__ volatile(\
+ "1: \n"\
+ "movaps (%3,%0), %%xmm0 \n"\
+ stereo("movaps %%xmm0, %%xmm1 \n")\
+ "mulps %%xmm6, %%xmm0 \n"\
+ stereo("mulps %%xmm7, %%xmm1 \n")\
+ "lea 1024(%3,%0), %1 \n"\
+ "mov %5, %2 \n"\
+ "2: \n"\
+ "movaps (%1), %%xmm2 \n"\
+ stereo("movaps %%xmm2, %%xmm3 \n")\
+ "mulps (%4,%2), %%xmm2 \n"\
+ stereo("mulps 16(%4,%2), %%xmm3 \n")\
+ "addps %%xmm2, %%xmm0 \n"\
+ stereo("addps %%xmm3, %%xmm1 \n")\
+ "add $1024, %1 \n"\
+ "add $32, %2 \n"\
+ "jl 2b \n"\
+ "movaps %%xmm0, (%3,%0) \n"\
+ stereo("movaps %%xmm1, 1024(%3,%0) \n")\
+ "add $16, %0 \n"\
+ "jl 1b \n"\
+ :"+&r"(i), "=&r"(j), "=&r"(k)\
+ :"r"(samples[0]+len), "r"(matrix_simd+in_ch), "g"((intptr_t)-32*(in_ch-1))\
+ :"memory"\
+ );
+
+static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len)
+{
+ int (*matrix_cmp)[2] = (int(*)[2])matrix;
+ intptr_t i,j,k;
+
+ i = -len*sizeof(float);
+ if(in_ch == 5 && out_ch == 2 && !(matrix_cmp[0][1]|matrix_cmp[2][0]|matrix_cmp[3][1]|matrix_cmp[4][0]|(matrix_cmp[1][0]^matrix_cmp[1][1])|(matrix_cmp[0][0]^matrix_cmp[2][1]))) {
+ MIX5(IF0,IF1);
+ } else if(in_ch == 5 && out_ch == 1 && matrix_cmp[0][0]==matrix_cmp[2][0] && matrix_cmp[3][0]==matrix_cmp[4][0]) {
+ MIX5(IF1,IF0);
+ } else {
+ DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]);
+ j = 2*in_ch*sizeof(float);
+ __asm__ volatile(
+ "1: \n"
+ "sub $8, %0 \n"
+ "movss (%2,%0), %%xmm6 \n"
+ "movss 4(%2,%0), %%xmm7 \n"
+ "shufps $0, %%xmm6, %%xmm6 \n"
+ "shufps $0, %%xmm7, %%xmm7 \n"
+ "movaps %%xmm6, (%1,%0,4) \n"
+ "movaps %%xmm7, 16(%1,%0,4) \n"
+ "jg 1b \n"
+ :"+&r"(j)
+ :"r"(matrix_simd), "r"(matrix)
+ :"memory"
+ );
+ if(out_ch == 2) {
+ MIX_MISC(IF1);
+ } else {
+ MIX_MISC(IF0);
+ }
+ }
+}
+
+static void vector_fmul_3dnow(float *dst, const float *src, int len){
+ x86_reg i = (len-4)*4;
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1,%0), %%mm0 \n\t"
+ "movq 8(%1,%0), %%mm1 \n\t"
+ "pfmul (%2,%0), %%mm0 \n\t"
+ "pfmul 8(%2,%0), %%mm1 \n\t"
+ "movq %%mm0, (%1,%0) \n\t"
+ "movq %%mm1, 8(%1,%0) \n\t"
+ "sub $16, %0 \n\t"
+ "jge 1b \n\t"
+ "femms \n\t"
+ :"+r"(i)
+ :"r"(dst), "r"(src)
+ :"memory"
+ );
+}
+static void vector_fmul_sse(float *dst, const float *src, int len){
+ x86_reg i = (len-8)*4;
+ __asm__ volatile(
+ "1: \n\t"
+ "movaps (%1,%0), %%xmm0 \n\t"
+ "movaps 16(%1,%0), %%xmm1 \n\t"
+ "mulps (%2,%0), %%xmm0 \n\t"
+ "mulps 16(%2,%0), %%xmm1 \n\t"
+ "movaps %%xmm0, (%1,%0) \n\t"
+ "movaps %%xmm1, 16(%1,%0) \n\t"
+ "sub $32, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i)
+ :"r"(dst), "r"(src)
+ :"memory"
+ );
+}
+
+static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
+ x86_reg i = len*4-16;
+ __asm__ volatile(
+ "1: \n\t"
+ "pswapd 8(%1), %%mm0 \n\t"
+ "pswapd (%1), %%mm1 \n\t"
+ "pfmul (%3,%0), %%mm0 \n\t"
+ "pfmul 8(%3,%0), %%mm1 \n\t"
+ "movq %%mm0, (%2,%0) \n\t"
+ "movq %%mm1, 8(%2,%0) \n\t"
+ "add $16, %1 \n\t"
+ "sub $16, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i), "+r"(src1)
+ :"r"(dst), "r"(src0)
+ );
+ __asm__ volatile("femms");
+}
+static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
+ x86_reg i = len*4-32;
+ __asm__ volatile(
+ "1: \n\t"
+ "movaps 16(%1), %%xmm0 \n\t"
+ "movaps (%1), %%xmm1 \n\t"
+ "shufps $0x1b, %%xmm0, %%xmm0 \n\t"
+ "shufps $0x1b, %%xmm1, %%xmm1 \n\t"
+ "mulps (%3,%0), %%xmm0 \n\t"
+ "mulps 16(%3,%0), %%xmm1 \n\t"
+ "movaps %%xmm0, (%2,%0) \n\t"
+ "movaps %%xmm1, 16(%2,%0) \n\t"
+ "add $32, %1 \n\t"
+ "sub $32, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i), "+r"(src1)
+ :"r"(dst), "r"(src0)
+ );
+}
+
+static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1,
+ const float *src2, int src3, int len, int step){
+ x86_reg i = (len-4)*4;
+ if(step == 2 && src3 == 0){
+ dst += (len-4)*2;
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%2,%0), %%mm0 \n\t"
+ "movq 8(%2,%0), %%mm1 \n\t"
+ "pfmul (%3,%0), %%mm0 \n\t"
+ "pfmul 8(%3,%0), %%mm1 \n\t"
+ "pfadd (%4,%0), %%mm0 \n\t"
+ "pfadd 8(%4,%0), %%mm1 \n\t"
+ "movd %%mm0, (%1) \n\t"
+ "movd %%mm1, 16(%1) \n\t"
+ "psrlq $32, %%mm0 \n\t"
+ "psrlq $32, %%mm1 \n\t"
+ "movd %%mm0, 8(%1) \n\t"
+ "movd %%mm1, 24(%1) \n\t"
+ "sub $32, %1 \n\t"
+ "sub $16, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i), "+r"(dst)
+ :"r"(src0), "r"(src1), "r"(src2)
+ :"memory"
+ );
+ }
+ else if(step == 1 && src3 == 0){
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%2,%0), %%mm0 \n\t"
+ "movq 8(%2,%0), %%mm1 \n\t"
+ "pfmul (%3,%0), %%mm0 \n\t"
+ "pfmul 8(%3,%0), %%mm1 \n\t"
+ "pfadd (%4,%0), %%mm0 \n\t"
+ "pfadd 8(%4,%0), %%mm1 \n\t"
+ "movq %%mm0, (%1,%0) \n\t"
+ "movq %%mm1, 8(%1,%0) \n\t"
+ "sub $16, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i)
+ :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
+ :"memory"
+ );
+ }
+ else
+ ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+ __asm__ volatile("femms");
+}
+static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
+ const float *src2, int src3, int len, int step){
+ x86_reg i = (len-8)*4;
+ if(step == 2 && src3 == 0){
+ dst += (len-8)*2;
+ __asm__ volatile(
+ "1: \n\t"
+ "movaps (%2,%0), %%xmm0 \n\t"
+ "movaps 16(%2,%0), %%xmm1 \n\t"
+ "mulps (%3,%0), %%xmm0 \n\t"
+ "mulps 16(%3,%0), %%xmm1 \n\t"
+ "addps (%4,%0), %%xmm0 \n\t"
+ "addps 16(%4,%0), %%xmm1 \n\t"
+ "movss %%xmm0, (%1) \n\t"
+ "movss %%xmm1, 32(%1) \n\t"
+ "movhlps %%xmm0, %%xmm2 \n\t"
+ "movhlps %%xmm1, %%xmm3 \n\t"
+ "movss %%xmm2, 16(%1) \n\t"
+ "movss %%xmm3, 48(%1) \n\t"
+ "shufps $0xb1, %%xmm0, %%xmm0 \n\t"
+ "shufps $0xb1, %%xmm1, %%xmm1 \n\t"
+ "movss %%xmm0, 8(%1) \n\t"
+ "movss %%xmm1, 40(%1) \n\t"
+ "movhlps %%xmm0, %%xmm2 \n\t"
+ "movhlps %%xmm1, %%xmm3 \n\t"
+ "movss %%xmm2, 24(%1) \n\t"
+ "movss %%xmm3, 56(%1) \n\t"
+ "sub $64, %1 \n\t"
+ "sub $32, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i), "+r"(dst)
+ :"r"(src0), "r"(src1), "r"(src2)
+ :"memory"
+ );
+ }
+ else if(step == 1 && src3 == 0){
+ __asm__ volatile(
+ "1: \n\t"
+ "movaps (%2,%0), %%xmm0 \n\t"
+ "movaps 16(%2,%0), %%xmm1 \n\t"
+ "mulps (%3,%0), %%xmm0 \n\t"
+ "mulps 16(%3,%0), %%xmm1 \n\t"
+ "addps (%4,%0), %%xmm0 \n\t"
+ "addps 16(%4,%0), %%xmm1 \n\t"
+ "movaps %%xmm0, (%1,%0) \n\t"
+ "movaps %%xmm1, 16(%1,%0) \n\t"
+ "sub $32, %0 \n\t"
+ "jge 1b \n\t"
+ :"+r"(i)
+ :"r"(dst), "r"(src0), "r"(src1), "r"(src2)
+ :"memory"
+ );
+ }
+ else
+ ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
+}
+
+static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1,
+ const float *win, float add_bias, int len){
+#ifdef HAVE_6REGS
+ if(add_bias == 0){
+ x86_reg i = -len*4;
+ x86_reg j = len*4-8;
+ __asm__ volatile(
+ "1: \n"
+ "pswapd (%5,%1), %%mm1 \n"
+ "movq (%5,%0), %%mm0 \n"
+ "pswapd (%4,%1), %%mm5 \n"
+ "movq (%3,%0), %%mm4 \n"
+ "movq %%mm0, %%mm2 \n"
+ "movq %%mm1, %%mm3 \n"
+ "pfmul %%mm4, %%mm2 \n" // src0[len+i]*win[len+i]
+ "pfmul %%mm5, %%mm3 \n" // src1[ j]*win[len+j]
+ "pfmul %%mm4, %%mm1 \n" // src0[len+i]*win[len+j]
+ "pfmul %%mm5, %%mm0 \n" // src1[ j]*win[len+i]
+ "pfadd %%mm3, %%mm2 \n"
+ "pfsub %%mm0, %%mm1 \n"
+ "pswapd %%mm2, %%mm2 \n"
+ "movq %%mm1, (%2,%0) \n"
+ "movq %%mm2, (%2,%1) \n"
+ "sub $8, %1 \n"
+ "add $8, %0 \n"
+ "jl 1b \n"
+ "femms \n"
+ :"+r"(i), "+r"(j)
+ :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
+ );
+ }else
+#endif
+ ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
+}
+
+static void vector_fmul_window_sse(float *dst, const float *src0, const float *src1,
+ const float *win, float add_bias, int len){
+#ifdef HAVE_6REGS
+ if(add_bias == 0){
+ x86_reg i = -len*4;
+ x86_reg j = len*4-16;
+ __asm__ volatile(
+ "1: \n"
+ "movaps (%5,%1), %%xmm1 \n"
+ "movaps (%5,%0), %%xmm0 \n"
+ "movaps (%4,%1), %%xmm5 \n"
+ "movaps (%3,%0), %%xmm4 \n"
+ "shufps $0x1b, %%xmm1, %%xmm1 \n"
+ "shufps $0x1b, %%xmm5, %%xmm5 \n"
+ "movaps %%xmm0, %%xmm2 \n"
+ "movaps %%xmm1, %%xmm3 \n"
+ "mulps %%xmm4, %%xmm2 \n" // src0[len+i]*win[len+i]
+ "mulps %%xmm5, %%xmm3 \n" // src1[ j]*win[len+j]
+ "mulps %%xmm4, %%xmm1 \n" // src0[len+i]*win[len+j]
+ "mulps %%xmm5, %%xmm0 \n" // src1[ j]*win[len+i]
+ "addps %%xmm3, %%xmm2 \n"
+ "subps %%xmm0, %%xmm1 \n"
+ "shufps $0x1b, %%xmm2, %%xmm2 \n"
+ "movaps %%xmm1, (%2,%0) \n"
+ "movaps %%xmm2, (%2,%1) \n"
+ "sub $16, %1 \n"
+ "add $16, %0 \n"
+ "jl 1b \n"
+ :"+r"(i), "+r"(j)
+ :"r"(dst+len), "r"(src0+len), "r"(src1), "r"(win+len)
+ );
+ }else
+#endif
+ ff_vector_fmul_window_c(dst, src0, src1, win, add_bias, len);
+}
+
+static void int32_to_float_fmul_scalar_sse(float *dst, const int *src, float mul, int len)
+{
+ x86_reg i = -4*len;
+ __asm__ volatile(
+ "movss %3, %%xmm4 \n"
+ "shufps $0, %%xmm4, %%xmm4 \n"
+ "1: \n"
+ "cvtpi2ps (%2,%0), %%xmm0 \n"
+ "cvtpi2ps 8(%2,%0), %%xmm1 \n"
+ "cvtpi2ps 16(%2,%0), %%xmm2 \n"
+ "cvtpi2ps 24(%2,%0), %%xmm3 \n"
+ "movlhps %%xmm1, %%xmm0 \n"
+ "movlhps %%xmm3, %%xmm2 \n"
+ "mulps %%xmm4, %%xmm0 \n"
+ "mulps %%xmm4, %%xmm2 \n"
+ "movaps %%xmm0, (%1,%0) \n"
+ "movaps %%xmm2, 16(%1,%0) \n"
+ "add $32, %0 \n"
+ "jl 1b \n"
+ :"+r"(i)
+ :"r"(dst+len), "r"(src+len), "m"(mul)
+ );
+}
+
+static void int32_to_float_fmul_scalar_sse2(float *dst, const int *src, float mul, int len)
+{
+ x86_reg i = -4*len;
+ __asm__ volatile(
+ "movss %3, %%xmm4 \n"
+ "shufps $0, %%xmm4, %%xmm4 \n"
+ "1: \n"
+ "cvtdq2ps (%2,%0), %%xmm0 \n"
+ "cvtdq2ps 16(%2,%0), %%xmm1 \n"
+ "mulps %%xmm4, %%xmm0 \n"
+ "mulps %%xmm4, %%xmm1 \n"
+ "movaps %%xmm0, (%1,%0) \n"
+ "movaps %%xmm1, 16(%1,%0) \n"
+ "add $32, %0 \n"
+ "jl 1b \n"
+ :"+r"(i)
+ :"r"(dst+len), "r"(src+len), "m"(mul)
+ );
+}
+
+static void float_to_int16_3dnow(int16_t *dst, const float *src, long len){
+ x86_reg reglen = len;
+ // not bit-exact: pf2id uses different rounding than C and SSE
+ __asm__ volatile(
+ "add %0 , %0 \n\t"
+ "lea (%2,%0,2) , %2 \n\t"
+ "add %0 , %1 \n\t"
+ "neg %0 \n\t"
+ "1: \n\t"
+ "pf2id (%2,%0,2) , %%mm0 \n\t"
+ "pf2id 8(%2,%0,2) , %%mm1 \n\t"
+ "pf2id 16(%2,%0,2) , %%mm2 \n\t"
+ "pf2id 24(%2,%0,2) , %%mm3 \n\t"
+ "packssdw %%mm1 , %%mm0 \n\t"
+ "packssdw %%mm3 , %%mm2 \n\t"
+ "movq %%mm0 , (%1,%0) \n\t"
+ "movq %%mm2 , 8(%1,%0) \n\t"
+ "add $16 , %0 \n\t"
+ " js 1b \n\t"
+ "femms \n\t"
+ :"+r"(reglen), "+r"(dst), "+r"(src)
+ );
+}
+static void float_to_int16_sse(int16_t *dst, const float *src, long len){
+ x86_reg reglen = len;
+ __asm__ volatile(
+ "add %0 , %0 \n\t"
+ "lea (%2,%0,2) , %2 \n\t"
+ "add %0 , %1 \n\t"
+ "neg %0 \n\t"
+ "1: \n\t"
+ "cvtps2pi (%2,%0,2) , %%mm0 \n\t"
+ "cvtps2pi 8(%2,%0,2) , %%mm1 \n\t"
+ "cvtps2pi 16(%2,%0,2) , %%mm2 \n\t"
+ "cvtps2pi 24(%2,%0,2) , %%mm3 \n\t"
+ "packssdw %%mm1 , %%mm0 \n\t"
+ "packssdw %%mm3 , %%mm2 \n\t"
+ "movq %%mm0 , (%1,%0) \n\t"
+ "movq %%mm2 , 8(%1,%0) \n\t"
+ "add $16 , %0 \n\t"
+ " js 1b \n\t"
+ "emms \n\t"
+ :"+r"(reglen), "+r"(dst), "+r"(src)
+ );
+}
+
+static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
+ x86_reg reglen = len;
+ __asm__ volatile(
+ "add %0 , %0 \n\t"
+ "lea (%2,%0,2) , %2 \n\t"
+ "add %0 , %1 \n\t"
+ "neg %0 \n\t"
+ "1: \n\t"
+ "cvtps2dq (%2,%0,2) , %%xmm0 \n\t"
+ "cvtps2dq 16(%2,%0,2) , %%xmm1 \n\t"
+ "packssdw %%xmm1 , %%xmm0 \n\t"
+ "movdqa %%xmm0 , (%1,%0) \n\t"
+ "add $16 , %0 \n\t"
+ " js 1b \n\t"
+ :"+r"(reglen), "+r"(dst), "+r"(src)
+ );
+}
+
+#ifdef HAVE_YASM
+void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
+void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len);
+void ff_x264_deblock_v_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+void ff_x264_deblock_h_luma_sse2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
+void ff_x264_deblock_v8_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
+void ff_x264_deblock_h_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta);
+#ifdef ARCH_X86_32
+static void ff_x264_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
+{
+ ff_x264_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
+ ff_x264_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
+}
+#endif
+void ff_x264_deblock_v_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
+void ff_x264_deblock_h_luma_intra_sse2(uint8_t *pix, int stride, int alpha, int beta);
+#else
+#define ff_float_to_int16_interleave6_sse(a,b,c) float_to_int16_interleave_misc_sse(a,b,c,6)
+#define ff_float_to_int16_interleave6_3dnow(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
+#define ff_float_to_int16_interleave6_3dn2(a,b,c) float_to_int16_interleave_misc_3dnow(a,b,c,6)
+#endif
+#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
+
+#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
+/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
+static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
+ DECLARE_ALIGNED_16(int16_t, tmp[len]);\
+ int i,j,c;\
+ for(c=0; c<channels; c++){\
+ float_to_int16_##cpu(tmp, src[c], len);\
+ for(i=0, j=c; i<len; i++, j+=channels)\
+ dst[j] = tmp[i];\
+ }\
+}\
+\
+static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
+ if(channels==1)\
+ float_to_int16_##cpu(dst, src[0], len);\
+ else if(channels==2){\
+ x86_reg reglen = len; \
+ const float *src0 = src[0];\
+ const float *src1 = src[1];\
+ __asm__ volatile(\
+ "shl $2, %0 \n"\
+ "add %0, %1 \n"\
+ "add %0, %2 \n"\
+ "add %0, %3 \n"\
+ "neg %0 \n"\
+ body\
+ :"+r"(reglen), "+r"(dst), "+r"(src0), "+r"(src1)\
+ );\
+ }else if(channels==6){\
+ ff_float_to_int16_interleave6_##cpu(dst, src, len);\
+ }else\
+ float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
+}
+
+FLOAT_TO_INT16_INTERLEAVE(3dnow,
+ "1: \n"
+ "pf2id (%2,%0), %%mm0 \n"
+ "pf2id 8(%2,%0), %%mm1 \n"
+ "pf2id (%3,%0), %%mm2 \n"
+ "pf2id 8(%3,%0), %%mm3 \n"
+ "packssdw %%mm1, %%mm0 \n"
+ "packssdw %%mm3, %%mm2 \n"
+ "movq %%mm0, %%mm1 \n"
+ "punpcklwd %%mm2, %%mm0 \n"
+ "punpckhwd %%mm2, %%mm1 \n"
+ "movq %%mm0, (%1,%0)\n"
+ "movq %%mm1, 8(%1,%0)\n"
+ "add $16, %0 \n"
+ "js 1b \n"
+ "femms \n"
+)
+
+FLOAT_TO_INT16_INTERLEAVE(sse,
+ "1: \n"
+ "cvtps2pi (%2,%0), %%mm0 \n"
+ "cvtps2pi 8(%2,%0), %%mm1 \n"
+ "cvtps2pi (%3,%0), %%mm2 \n"
+ "cvtps2pi 8(%3,%0), %%mm3 \n"
+ "packssdw %%mm1, %%mm0 \n"
+ "packssdw %%mm3, %%mm2 \n"
+ "movq %%mm0, %%mm1 \n"
+ "punpcklwd %%mm2, %%mm0 \n"
+ "punpckhwd %%mm2, %%mm1 \n"
+ "movq %%mm0, (%1,%0)\n"
+ "movq %%mm1, 8(%1,%0)\n"
+ "add $16, %0 \n"
+ "js 1b \n"
+ "emms \n"
+)
+
+FLOAT_TO_INT16_INTERLEAVE(sse2,
+ "1: \n"
+ "cvtps2dq (%2,%0), %%xmm0 \n"
+ "cvtps2dq (%3,%0), %%xmm1 \n"
+ "packssdw %%xmm1, %%xmm0 \n"
+ "movhlps %%xmm0, %%xmm1 \n"
+ "punpcklwd %%xmm1, %%xmm0 \n"
+ "movdqa %%xmm0, (%1,%0) \n"
+ "add $16, %0 \n"
+ "js 1b \n"
+)
+
+static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){
+ if(channels==6)
+ ff_float_to_int16_interleave6_3dn2(dst, src, len);
+ else
+ float_to_int16_interleave_3dnow(dst, src, len, channels);
+}
+
+
+void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width);
+void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width);
+void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
+void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width);
+void ff_snow_inner_add_yblock_sse2(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+void ff_snow_inner_add_yblock_mmx(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
+ int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
+
+
+static void add_int16_sse2(int16_t * v1, int16_t * v2, int order)
+{
+ x86_reg o = -(order << 1);
+ v1 += order;
+ v2 += order;
+ __asm__ volatile(
+ "1: \n\t"
+ "movdqu (%1,%2), %%xmm0 \n\t"
+ "movdqu 16(%1,%2), %%xmm1 \n\t"
+ "paddw (%0,%2), %%xmm0 \n\t"
+ "paddw 16(%0,%2), %%xmm1 \n\t"
+ "movdqa %%xmm0, (%0,%2) \n\t"
+ "movdqa %%xmm1, 16(%0,%2) \n\t"
+ "add $32, %2 \n\t"
+ "js 1b \n\t"
+ : "+r"(v1), "+r"(v2), "+r"(o)
+ );
+}
+
+static void sub_int16_sse2(int16_t * v1, int16_t * v2, int order)
+{
+ x86_reg o = -(order << 1);
+ v1 += order;
+ v2 += order;
+ __asm__ volatile(
+ "1: \n\t"
+ "movdqa (%0,%2), %%xmm0 \n\t"
+ "movdqa 16(%0,%2), %%xmm2 \n\t"
+ "movdqu (%1,%2), %%xmm1 \n\t"
+ "movdqu 16(%1,%2), %%xmm3 \n\t"
+ "psubw %%xmm1, %%xmm0 \n\t"
+ "psubw %%xmm3, %%xmm2 \n\t"
+ "movdqa %%xmm0, (%0,%2) \n\t"
+ "movdqa %%xmm2, 16(%0,%2) \n\t"
+ "add $32, %2 \n\t"
+ "js 1b \n\t"
+ : "+r"(v1), "+r"(v2), "+r"(o)
+ );
+}
+
+static int32_t scalarproduct_int16_sse2(int16_t * v1, int16_t * v2, int order, int shift)
+{
+ int res = 0;
+ DECLARE_ALIGNED_16(int64_t, sh);
+ x86_reg o = -(order << 1);
+
+ v1 += order;
+ v2 += order;
+ sh = shift;
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7 \n\t"
+ "1: \n\t"
+ "movdqu (%0,%3), %%xmm0 \n\t"
+ "movdqu 16(%0,%3), %%xmm1 \n\t"
+ "pmaddwd (%1,%3), %%xmm0 \n\t"
+ "pmaddwd 16(%1,%3), %%xmm1 \n\t"
+ "paddd %%xmm0, %%xmm7 \n\t"
+ "paddd %%xmm1, %%xmm7 \n\t"
+ "add $32, %3 \n\t"
+ "js 1b \n\t"
+ "movhlps %%xmm7, %%xmm2 \n\t"
+ "paddd %%xmm2, %%xmm7 \n\t"
+ "psrad %4, %%xmm7 \n\t"
+ "pshuflw $0x4E, %%xmm7,%%xmm2 \n\t"
+ "paddd %%xmm2, %%xmm7 \n\t"
+ "movd %%xmm7, %2 \n\t"
+ : "+r"(v1), "+r"(v2), "=r"(res), "+r"(o)
+ : "m"(sh)
+ );
+ return res;
+}
+
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
+{
+ mm_flags = mm_support();
+
+ if (avctx->dsp_mask) {
+ if (avctx->dsp_mask & FF_MM_FORCE)
+ mm_flags |= (avctx->dsp_mask & 0xffff);
+ else
+ mm_flags &= ~(avctx->dsp_mask & 0xffff);
+ }
+
+#if 0
+ av_log(avctx, AV_LOG_INFO, "libavcodec: CPU flags:");
+ if (mm_flags & FF_MM_MMX)
+ av_log(avctx, AV_LOG_INFO, " mmx");
+ if (mm_flags & FF_MM_MMXEXT)
+ av_log(avctx, AV_LOG_INFO, " mmxext");
+ if (mm_flags & FF_MM_3DNOW)
+ av_log(avctx, AV_LOG_INFO, " 3dnow");
+ if (mm_flags & FF_MM_SSE)
+ av_log(avctx, AV_LOG_INFO, " sse");
+ if (mm_flags & FF_MM_SSE2)
+ av_log(avctx, AV_LOG_INFO, " sse2");
+ av_log(avctx, AV_LOG_INFO, "\n");
+#endif
+
+ if (mm_flags & FF_MM_MMX) {
+ const int idct_algo= avctx->idct_algo;
+
+ if(avctx->lowres==0){
+ if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
+ c->idct_put= ff_simple_idct_put_mmx;
+ c->idct_add= ff_simple_idct_add_mmx;
+ c->idct = ff_simple_idct_mmx;
+ c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
+#ifdef CONFIG_GPL
+ }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
+ if(mm_flags & FF_MM_MMXEXT){
+ c->idct_put= ff_libmpeg2mmx2_idct_put;
+ c->idct_add= ff_libmpeg2mmx2_idct_add;
+ c->idct = ff_mmxext_idct;
+ }else{
+ c->idct_put= ff_libmpeg2mmx_idct_put;
+ c->idct_add= ff_libmpeg2mmx_idct_add;
+ c->idct = ff_mmx_idct;
+ }
+ c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+#endif
+ }else if((ENABLE_VP3_DECODER || ENABLE_VP5_DECODER || ENABLE_VP6_DECODER || ENABLE_THEORA_DECODER) &&
+ idct_algo==FF_IDCT_VP3){
+ if(mm_flags & FF_MM_SSE2){
+ c->idct_put= ff_vp3_idct_put_sse2;
+ c->idct_add= ff_vp3_idct_add_sse2;
+ c->idct = ff_vp3_idct_sse2;
+ c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
+ }else{
+ c->idct_put= ff_vp3_idct_put_mmx;
+ c->idct_add= ff_vp3_idct_add_mmx;
+ c->idct = ff_vp3_idct_mmx;
+ c->idct_permutation_type= FF_PARTTRANS_IDCT_PERM;
+ }
+ }else if(idct_algo==FF_IDCT_CAVS){
+ c->idct_permutation_type= FF_TRANSPOSE_IDCT_PERM;
+ }else if(idct_algo==FF_IDCT_XVIDMMX){
+ if(mm_flags & FF_MM_SSE2){
+ c->idct_put= ff_idct_xvid_sse2_put;
+ c->idct_add= ff_idct_xvid_sse2_add;
+ c->idct = ff_idct_xvid_sse2;
+ c->idct_permutation_type= FF_SSE2_IDCT_PERM;
+ }else if(mm_flags & FF_MM_MMXEXT){
+ c->idct_put= ff_idct_xvid_mmx2_put;
+ c->idct_add= ff_idct_xvid_mmx2_add;
+ c->idct = ff_idct_xvid_mmx2;
+ }else{
+ c->idct_put= ff_idct_xvid_mmx_put;
+ c->idct_add= ff_idct_xvid_mmx_add;
+ c->idct = ff_idct_xvid_mmx;
+ }
+ }
+ }
+
+ c->put_pixels_clamped = put_pixels_clamped_mmx;
+ c->put_signed_pixels_clamped = put_signed_pixels_clamped_mmx;
+ c->add_pixels_clamped = add_pixels_clamped_mmx;
+ c->clear_block = clear_block_mmx;
+ c->clear_blocks = clear_blocks_mmx;
+ if (mm_flags & FF_MM_SSE)
+ c->clear_block = clear_block_sse;
+
+#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
+ c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
+
+ SET_HPEL_FUNCS(put, 0, 16, mmx);
+ SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
+ SET_HPEL_FUNCS(avg, 0, 16, mmx);
+ SET_HPEL_FUNCS(avg_no_rnd, 0, 16, mmx);
+ SET_HPEL_FUNCS(put, 1, 8, mmx);
+ SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
+ SET_HPEL_FUNCS(avg, 1, 8, mmx);
+ SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
+
+ c->gmc= gmc_mmx;
+
+ c->add_bytes= add_bytes_mmx;
+ c->add_bytes_l2= add_bytes_l2_mmx;
+
+ c->draw_edges = draw_edges_mmx;
+
+ if (ENABLE_ANY_H263) {
+ c->h263_v_loop_filter= h263_v_loop_filter_mmx;
+ c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+ }
+ c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_rnd;
+ c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
+ c->put_no_rnd_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx_nornd;
+
+ c->put_rv40_chroma_pixels_tab[0]= put_rv40_chroma_mc8_mmx;
+ c->put_rv40_chroma_pixels_tab[1]= put_rv40_chroma_mc4_mmx;
+
+ c->h264_idct_dc_add=
+ c->h264_idct_add= ff_h264_idct_add_mmx;
+ c->h264_idct8_dc_add=
+ c->h264_idct8_add= ff_h264_idct8_add_mmx;
+
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx;
+
+ if (mm_flags & FF_MM_MMXEXT) {
+ c->prefetch = prefetch_mmx2;
+
+ c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+
+ c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+
+ c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2;
+ c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2;
+ c->h264_idct_add16 = ff_h264_idct_add16_mmx2;
+ c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2;
+ c->h264_idct_add8 = ff_h264_idct_add8_mmx2;
+ c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
+
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+
+ if (ENABLE_VP3_DECODER || ENABLE_THEORA_DECODER) {
+ c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
+ c->vp3_h_loop_filter= ff_vp3_h_loop_filter_mmx2;
+ }
+ }
+
+#define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
+ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 2] = PFX ## SIZE ## _mc20_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 3] = PFX ## SIZE ## _mc30_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 4] = PFX ## SIZE ## _mc01_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 5] = PFX ## SIZE ## _mc11_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 6] = PFX ## SIZE ## _mc21_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 7] = PFX ## SIZE ## _mc31_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 8] = PFX ## SIZE ## _mc02_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][ 9] = PFX ## SIZE ## _mc12_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][10] = PFX ## SIZE ## _mc22_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][11] = PFX ## SIZE ## _mc32_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][12] = PFX ## SIZE ## _mc03_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][13] = PFX ## SIZE ## _mc13_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][14] = PFX ## SIZE ## _mc23_ ## CPU; \
+ c->PFX ## _pixels_tab[IDX][15] = PFX ## SIZE ## _mc33_ ## CPU
+
+ SET_QPEL_FUNCS(put_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(put_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
+
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
+
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, mmx2);
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, mmx2);
+
+ c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_mmx2;
+ c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_mmx2;
+
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2_rnd;
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
+ c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_mmx2;
+ c->put_h264_chroma_pixels_tab[2]= put_h264_chroma_mc2_mmx2;
+ c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
+ c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
+ c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
+ c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_mmx2;
+ c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2;
+ c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2;
+ c->h264_loop_filter_strength= h264_loop_filter_strength_mmx2;
+
+ c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
+ c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
+ c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2;
+ c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2;
+ c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2;
+ c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2;
+ c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2;
+ c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2;
+
+ c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2;
+ c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2;
+ c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2;
+ c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2;
+ c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2;
+ c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2;
+ c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2;
+ c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
+
+ if (ENABLE_CAVS_DECODER)
+ ff_cavsdsp_init_mmx2(c, avctx);
+
+ if (ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
+ ff_vc1dsp_init_mmx(c, avctx);
+
+ c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
+ } else if (mm_flags & FF_MM_3DNOW) {
+ c->prefetch = prefetch_3dnow;
+
+ c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
+ c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
+
+ c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+ c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+ c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+
+ c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
+ c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
+
+ c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
+ c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
+ c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
+
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+ c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+ c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+ c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+ c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
+ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+ }
+
+ SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
+
+ SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
+ SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
+
+ SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
+ SET_QPEL_FUNCS(avg_2tap_qpel, 0, 16, 3dnow);
+ SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
+
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow_rnd;
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
+
+ c->avg_rv40_chroma_pixels_tab[0]= avg_rv40_chroma_mc8_3dnow;
+ c->avg_rv40_chroma_pixels_tab[1]= avg_rv40_chroma_mc4_3dnow;
+
+ if (ENABLE_CAVS_DECODER)
+ ff_cavsdsp_init_3dnow(c, avctx);
+ }
+
+
+#define H264_QPEL_FUNCS(x, y, CPU)\
+ c->put_h264_qpel_pixels_tab[0][x+y*4] = put_h264_qpel16_mc##x##y##_##CPU;\
+ c->put_h264_qpel_pixels_tab[1][x+y*4] = put_h264_qpel8_mc##x##y##_##CPU;\
+ c->avg_h264_qpel_pixels_tab[0][x+y*4] = avg_h264_qpel16_mc##x##y##_##CPU;\
+ c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
+ if((mm_flags & FF_MM_SSE2) && !(mm_flags & FF_MM_3DNOW)){
+ // these functions are slower than mmx on AMD, but faster on Intel
+/* FIXME works in most codecs, but crashes svq1 due to unaligned chroma
+ c->put_pixels_tab[0][0] = put_pixels16_sse2;
+ c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
+*/
+ H264_QPEL_FUNCS(0, 0, sse2);
+ }
+ if(mm_flags & FF_MM_SSE2){
+ c->h264_idct8_add = ff_h264_idct8_add_sse2;
+ c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
+
+ H264_QPEL_FUNCS(0, 1, sse2);
+ H264_QPEL_FUNCS(0, 2, sse2);
+ H264_QPEL_FUNCS(0, 3, sse2);
+ H264_QPEL_FUNCS(1, 1, sse2);
+ H264_QPEL_FUNCS(1, 2, sse2);
+ H264_QPEL_FUNCS(1, 3, sse2);
+ H264_QPEL_FUNCS(2, 1, sse2);
+ H264_QPEL_FUNCS(2, 2, sse2);
+ H264_QPEL_FUNCS(2, 3, sse2);
+ H264_QPEL_FUNCS(3, 1, sse2);
+ H264_QPEL_FUNCS(3, 2, sse2);
+ H264_QPEL_FUNCS(3, 3, sse2);
+ }
+#ifdef HAVE_SSSE3
+ if(mm_flags & FF_MM_SSSE3){
+ H264_QPEL_FUNCS(1, 0, ssse3);
+ H264_QPEL_FUNCS(1, 1, ssse3);
+ H264_QPEL_FUNCS(1, 2, ssse3);
+ H264_QPEL_FUNCS(1, 3, ssse3);
+ H264_QPEL_FUNCS(2, 0, ssse3);
+ H264_QPEL_FUNCS(2, 1, ssse3);
+ H264_QPEL_FUNCS(2, 2, ssse3);
+ H264_QPEL_FUNCS(2, 3, ssse3);
+ H264_QPEL_FUNCS(3, 0, ssse3);
+ H264_QPEL_FUNCS(3, 1, ssse3);
+ H264_QPEL_FUNCS(3, 2, ssse3);
+ H264_QPEL_FUNCS(3, 3, ssse3);
+ c->put_no_rnd_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_nornd;
+ c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_ssse3_rnd;
+ c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_ssse3_rnd;
+ c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_ssse3;
+ c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_ssse3;
+ c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
+ }
+#endif
+
+#if defined(CONFIG_GPL) && defined(HAVE_YASM)
+ if( mm_flags&FF_MM_MMXEXT ){
+#ifdef ARCH_X86_32
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_mmxext;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_mmxext;
+#endif
+ if( mm_flags&FF_MM_SSE2 ){
+#if defined(ARCH_X86_64) || !defined(__ICC) || __ICC > 1100
+ c->h264_v_loop_filter_luma = ff_x264_deblock_v_luma_sse2;
+ c->h264_h_loop_filter_luma = ff_x264_deblock_h_luma_sse2;
+ c->h264_v_loop_filter_luma_intra = ff_x264_deblock_v_luma_intra_sse2;
+ c->h264_h_loop_filter_luma_intra = ff_x264_deblock_h_luma_intra_sse2;
+#endif
+ c->h264_idct_add16 = ff_h264_idct_add16_sse2;
+ c->h264_idct_add8 = ff_h264_idct_add8_sse2;
+ c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
+ }
+ }
+#endif
+
+#ifdef CONFIG_SNOW_DECODER
+ if(mm_flags & FF_MM_SSE2 & 0){
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_sse2;
+#ifdef HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_sse2;
+#endif
+ c->inner_add_yblock = ff_snow_inner_add_yblock_sse2;
+ }
+ else{
+ if(mm_flags & FF_MM_MMXEXT){
+ c->horizontal_compose97i = ff_snow_horizontal_compose97i_mmx;
+#ifdef HAVE_7REGS
+ c->vertical_compose97i = ff_snow_vertical_compose97i_mmx;
+#endif
+ }
+ c->inner_add_yblock = ff_snow_inner_add_yblock_mmx;
+ }
+#endif
+
+ if(mm_flags & FF_MM_3DNOW){
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+ c->vector_fmul = vector_fmul_3dnow;
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ c->float_to_int16 = float_to_int16_3dnow;
+ c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
+ }
+ }
+ if(mm_flags & FF_MM_3DNOWEXT){
+ c->vector_fmul_reverse = vector_fmul_reverse_3dnow2;
+ c->vector_fmul_window = vector_fmul_window_3dnow2;
+ if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+ c->float_to_int16_interleave = float_to_int16_interleave_3dn2;
+ }
+ }
+ if(mm_flags & FF_MM_SSE){
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+ c->ac3_downmix = ac3_downmix_sse;
+ c->vector_fmul = vector_fmul_sse;
+ c->vector_fmul_reverse = vector_fmul_reverse_sse;
+ c->vector_fmul_add_add = vector_fmul_add_add_sse;
+ c->vector_fmul_window = vector_fmul_window_sse;
+ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse;
+ c->float_to_int16 = float_to_int16_sse;
+ c->float_to_int16_interleave = float_to_int16_interleave_sse;
+ }
+ if(mm_flags & FF_MM_3DNOW)
+ c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse
+ if(mm_flags & FF_MM_SSE2){
+ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2;
+ c->float_to_int16 = float_to_int16_sse2;
+ c->float_to_int16_interleave = float_to_int16_interleave_sse2;
+ c->add_int16 = add_int16_sse2;
+ c->sub_int16 = sub_int16_sse2;
+ c->scalarproduct_int16 = scalarproduct_int16_sse2;
+ }
+ }
+
+ if (ENABLE_ENCODERS)
+ dsputilenc_init_mmx(c, avctx);
+
+#if 0
+ // for speed testing
+ get_pixels = just_return;
+ put_pixels_clamped = just_return;
+ add_pixels_clamped = just_return;
+
+ pix_abs16x16 = just_return;
+ pix_abs16x16_x2 = just_return;
+ pix_abs16x16_y2 = just_return;
+ pix_abs16x16_xy2 = just_return;
+
+ put_pixels_tab[0] = just_return;
+ put_pixels_tab[1] = just_return;
+ put_pixels_tab[2] = just_return;
+ put_pixels_tab[3] = just_return;
+
+ put_no_rnd_pixels_tab[0] = just_return;
+ put_no_rnd_pixels_tab[1] = just_return;
+ put_no_rnd_pixels_tab[2] = just_return;
+ put_no_rnd_pixels_tab[3] = just_return;
+
+ avg_pixels_tab[0] = just_return;
+ avg_pixels_tab[1] = just_return;
+ avg_pixels_tab[2] = just_return;
+ avg_pixels_tab[3] = just_return;
+
+ avg_no_rnd_pixels_tab[0] = just_return;
+ avg_no_rnd_pixels_tab[1] = just_return;
+ avg_no_rnd_pixels_tab[2] = just_return;
+ avg_no_rnd_pixels_tab[3] = just_return;
+
+ //av_fdct = just_return;
+ //ff_idct = just_return;
+#endif
+}
diff --git a/libavcodec/x86/dsputil_mmx.h b/libavcodec/x86/dsputil_mmx.h
new file mode 100644
index 0000000..87617e3
--- /dev/null
+++ b/libavcodec/x86/dsputil_mmx.h
@@ -0,0 +1,154 @@
+/*
+ * MMX optimized DSP utils
+ * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_DSPUTIL_MMX_H
+#define AVCODEC_X86_DSPUTIL_MMX_H
+
+#include <stdint.h>
+#include "libavcodec/dsputil.h"
+
+typedef struct { uint64_t a, b; } xmm_reg;
+
+extern const uint64_t ff_bone;
+extern const uint64_t ff_wtwo;
+
+extern const uint64_t ff_pdw_80000000[2];
+
+extern const uint64_t ff_pw_3;
+extern const uint64_t ff_pw_4;
+extern const xmm_reg ff_pw_5;
+extern const xmm_reg ff_pw_8;
+extern const uint64_t ff_pw_15;
+extern const xmm_reg ff_pw_16;
+extern const uint64_t ff_pw_20;
+extern const xmm_reg ff_pw_28;
+extern const xmm_reg ff_pw_32;
+extern const uint64_t ff_pw_42;
+extern const uint64_t ff_pw_64;
+extern const uint64_t ff_pw_96;
+extern const uint64_t ff_pw_128;
+extern const uint64_t ff_pw_255;
+
+extern const uint64_t ff_pb_1;
+extern const uint64_t ff_pb_3;
+extern const uint64_t ff_pb_7;
+extern const uint64_t ff_pb_1F;
+extern const uint64_t ff_pb_3F;
+extern const uint64_t ff_pb_81;
+extern const uint64_t ff_pb_A1;
+extern const uint64_t ff_pb_FC;
+
+extern const double ff_pd_1[2];
+extern const double ff_pd_2[2];
+
+#define LOAD4(stride,in,a,b,c,d)\
+ "movq 0*"#stride"+"#in", "#a"\n\t"\
+ "movq 1*"#stride"+"#in", "#b"\n\t"\
+ "movq 2*"#stride"+"#in", "#c"\n\t"\
+ "movq 3*"#stride"+"#in", "#d"\n\t"
+
+#define STORE4(stride,out,a,b,c,d)\
+ "movq "#a", 0*"#stride"+"#out"\n\t"\
+ "movq "#b", 1*"#stride"+"#out"\n\t"\
+ "movq "#c", 2*"#stride"+"#out"\n\t"\
+ "movq "#d", 3*"#stride"+"#out"\n\t"
+
+/* in/out: mma=mma+mmb, mmb=mmb-mma */
+#define SUMSUB_BA( a, b ) \
+ "paddw "#b", "#a" \n\t"\
+ "paddw "#b", "#b" \n\t"\
+ "psubw "#a", "#b" \n\t"
+
+#define SBUTTERFLY(a,b,t,n,m)\
+ "mov" #m " " #a ", " #t " \n\t" /* abcd */\
+ "punpckl" #n " " #b ", " #a " \n\t" /* aebf */\
+ "punpckh" #n " " #b ", " #t " \n\t" /* cgdh */\
+
+#define TRANSPOSE4(a,b,c,d,t)\
+ SBUTTERFLY(a,b,t,wd,q) /* a=aebf t=cgdh */\
+ SBUTTERFLY(c,d,b,wd,q) /* c=imjn b=kolp */\
+ SBUTTERFLY(a,c,d,dq,q) /* a=aeim d=bfjn */\
+ SBUTTERFLY(t,b,c,dq,q) /* t=cgko c=dhlp */
+
+// e,f,g,h can be memory
+// out: a,d,t,c
+#define TRANSPOSE8x4(a,b,c,d,e,f,g,h,t)\
+ "punpcklbw " #e ", " #a " \n\t" /* a0 e0 a1 e1 a2 e2 a3 e3 */\
+ "punpcklbw " #f ", " #b " \n\t" /* b0 f0 b1 f1 b2 f2 b3 f3 */\
+ "punpcklbw " #g ", " #c " \n\t" /* c0 g0 c1 g1 c2 g2 d3 g3 */\
+ "punpcklbw " #h ", " #d " \n\t" /* d0 h0 d1 h1 d2 h2 d3 h3 */\
+ SBUTTERFLY(a, b, t, bw, q) /* a= a0 b0 e0 f0 a1 b1 e1 f1 */\
+ /* t= a2 b2 e2 f2 a3 b3 e3 f3 */\
+ SBUTTERFLY(c, d, b, bw, q) /* c= c0 d0 g0 h0 c1 d1 g1 h1 */\
+ /* b= c2 d2 g2 h2 c3 d3 g3 h3 */\
+ SBUTTERFLY(a, c, d, wd, q) /* a= a0 b0 c0 d0 e0 f0 g0 h0 */\
+ /* d= a1 b1 c1 d1 e1 f1 g1 h1 */\
+ SBUTTERFLY(t, b, c, wd, q) /* t= a2 b2 c2 d2 e2 f2 g2 h2 */\
+ /* c= a3 b3 c3 d3 e3 f3 g3 h3 */
+
+#ifdef ARCH_X86_64
+// permutes 01234567 -> 05736421
+#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
+ SBUTTERFLY(a,b,%%xmm8,wd,dqa)\
+ SBUTTERFLY(c,d,b,wd,dqa)\
+ SBUTTERFLY(e,f,d,wd,dqa)\
+ SBUTTERFLY(g,h,f,wd,dqa)\
+ SBUTTERFLY(a,c,h,dq,dqa)\
+ SBUTTERFLY(%%xmm8,b,c,dq,dqa)\
+ SBUTTERFLY(e,g,b,dq,dqa)\
+ SBUTTERFLY(d,f,g,dq,dqa)\
+ SBUTTERFLY(a,e,f,qdq,dqa)\
+ SBUTTERFLY(%%xmm8,d,e,qdq,dqa)\
+ SBUTTERFLY(h,b,d,qdq,dqa)\
+ SBUTTERFLY(c,g,b,qdq,dqa)\
+ "movdqa %%xmm8, "#g" \n\t"
+#else
+#define TRANSPOSE8(a,b,c,d,e,f,g,h,t)\
+ "movdqa "#h", "#t" \n\t"\
+ SBUTTERFLY(a,b,h,wd,dqa)\
+ "movdqa "#h", 16"#t" \n\t"\
+ "movdqa "#t", "#h" \n\t"\
+ SBUTTERFLY(c,d,b,wd,dqa)\
+ SBUTTERFLY(e,f,d,wd,dqa)\
+ SBUTTERFLY(g,h,f,wd,dqa)\
+ SBUTTERFLY(a,c,h,dq,dqa)\
+ "movdqa "#h", "#t" \n\t"\
+ "movdqa 16"#t", "#h" \n\t"\
+ SBUTTERFLY(h,b,c,dq,dqa)\
+ SBUTTERFLY(e,g,b,dq,dqa)\
+ SBUTTERFLY(d,f,g,dq,dqa)\
+ SBUTTERFLY(a,e,f,qdq,dqa)\
+ SBUTTERFLY(h,d,e,qdq,dqa)\
+ "movdqa "#h", 16"#t" \n\t"\
+ "movdqa "#t", "#h" \n\t"\
+ SBUTTERFLY(h,b,d,qdq,dqa)\
+ SBUTTERFLY(c,g,b,qdq,dqa)\
+ "movdqa 16"#t", "#g" \n\t"
+#endif
+
+#define MOVQ_WONE(regd) \
+ __asm__ volatile ( \
+ "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
+ "psrlw $15, %%" #regd ::)
+
+void dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx);
+
+#endif /* AVCODEC_X86_DSPUTIL_MMX_H */
diff --git a/libavcodec/x86/dsputil_mmx_avg_template.c b/libavcodec/x86/dsputil_mmx_avg_template.c
new file mode 100644
index 0000000..616a12b
--- /dev/null
+++ b/libavcodec/x86/dsputil_mmx_avg_template.c
@@ -0,0 +1,896 @@
+/*
+ * DSP utils : average functions are compiled twice for 3dnow/mmx2
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2002-2004 Michael Niedermayer
+ *
+ * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni at gmx.at>
+ * and improved by Zdenek Kabelac <kabi at users.sf.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
+ clobber bug - now it will work with 2.95.2 and also with -fPIC
+ */
+static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $4, %2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movd (%1), %%mm1 \n\t"
+ "movd (%2), %%mm2 \n\t"
+ "movd 4(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movd %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movd (%1), %%mm1 \n\t"
+ "movd 8(%2), %%mm2 \n\t"
+ "movd 12(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movd %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $16, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+}
+
+
+static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $8, %2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" 16(%2), %%mm0 \n\t"
+ PAVGB" 24(%2), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "pcmpeqb %%mm6, %%mm6 \n\t"
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $8, %2 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%2), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "pxor %%mm6, %%mm2 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq 16(%2), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "pxor %%mm6, %%mm2 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "movd (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $4, %2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movd (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 4(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ PAVGB" (%3), %%mm1 \n\t"
+ "movd %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movd (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movd (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" 8(%2), %%mm0 \n\t"
+ PAVGB" 12(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movd %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ PAVGB" (%3), %%mm1 \n\t"
+ "movd %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $16, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+}
+
+
+static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $8, %2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ PAVGB" (%3), %%mm1 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" 16(%2), %%mm0 \n\t"
+ PAVGB" 24(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "add %5, %3 \n\t"
+ PAVGB" (%3), %%mm1 \n\t"
+ "movq %%mm1, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%1, %3), %%mm3 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 9(%1), %%mm2 \n\t"
+ PAVGB" 9(%1, %3), %%mm3 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm2, 8(%2) \n\t"
+ "movq %%mm3, 8(%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%1, %3), %%mm3 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 9(%1), %%mm2 \n\t"
+ PAVGB" 9(%1, %3), %%mm3 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq %%mm2, 8(%2) \n\t"
+ "movq %%mm3, 8(%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $16, %2 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" 16(%2), %%mm0 \n\t"
+ PAVGB" 24(%2), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $16, %2 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ PAVGB" 8(%3), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" 8(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ PAVGB" 8(%3), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ PAVGB" 16(%2), %%mm0 \n\t"
+ PAVGB" 24(%2), %%mm1 \n\t"
+ PAVGB" (%3), %%mm0 \n\t"
+ PAVGB" 8(%3), %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ __asm__ volatile(
+ "pcmpeqb %%mm6, %%mm6 \n\t"
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "movq (%2), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "pxor %%mm6, %%mm2 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $16, %2 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%2), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "pxor %%mm6, %%mm2 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 8(%1), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq 16(%2), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "pxor %%mm6, %%mm2 \n\t"
+ "pxor %%mm6, %%mm3 \n\t"
+ PAVGB" %%mm2, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm1 \n\t"
+ "pxor %%mm6, %%mm0 \n\t"
+ "pxor %%mm6, %%mm1 \n\t"
+ "movq %%mm0, (%3) \n\t"
+ "movq %%mm1, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+//the following should be used, though better not with gcc ...
+/* :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+ :"r"(src1Stride), "r"(dstStride)
+ :"memory");*/
+}
+
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BONE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "psubusb %%mm6, %%mm0 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "psubusb %%mm6, %%mm0 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "sub %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D" (block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+/* GL: this function does incorrect rounding if overflow */
+static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BONE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "sub %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "psubusb %%mm6, %%mm1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "psubusb %%mm6, %%mm1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D" (block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%2), %%mm0 \n\t"
+ "movq (%2, %3), %%mm1 \n\t"
+ PAVGB" (%1), %%mm0 \n\t"
+ PAVGB" (%1, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%2), %%mm0 \n\t"
+ "movq (%2, %3), %%mm1 \n\t"
+ PAVGB" (%1), %%mm0 \n\t"
+ PAVGB" (%1, %3), %%mm1 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm2 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm2 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm2 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "sub %3, %2 \n\t"
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ "movq (%2, %3), %%mm3 \n\t"
+ "movq (%2, %%"REG_a"), %%mm4 \n\t"
+ PAVGB" %%mm3, %%mm0 \n\t"
+ PAVGB" %%mm4, %%mm1 \n\t"
+ "movq %%mm0, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "movq (%2, %3), %%mm3 \n\t"
+ "movq (%2, %%"REG_a"), %%mm4 \n\t"
+ PAVGB" %%mm3, %%mm2 \n\t"
+ PAVGB" %%mm4, %%mm1 \n\t"
+ "movq %%mm2, (%2, %3) \n\t"
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+/* Note this is not correctly rounded, but this function is only
+ * used for B-frames so it does not matter. */
+static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BONE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ PAVGB" 1(%1), %%mm0 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1, %%"REG_a"), %%mm2 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "psubusb %%mm6, %%mm2 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1, %%"REG_a"), %%mm2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" %%mm1, %%mm0 \n\t"
+ PAVGB" %%mm2, %%mm1 \n\t"
+ PAVGB" (%2), %%mm0 \n\t"
+ PAVGB" (%2, %3), %%mm1 \n\t"
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ PAVGB" 1(%1, %3), %%mm1 \n\t"
+ PAVGB" 1(%1, %%"REG_a"), %%mm0 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "add %%"REG_a", %1 \n\t"
+ PAVGB" %%mm1, %%mm2 \n\t"
+ PAVGB" %%mm0, %%mm1 \n\t"
+ PAVGB" (%2), %%mm2 \n\t"
+ PAVGB" (%2, %3), %%mm1 \n\t"
+ "movq %%mm2, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r" ((x86_reg)line_size)
+ :"%"REG_a, "memory");
+}
+
+static void DEF(avg_pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ do {
+ __asm__ volatile(
+ "movd (%1), %%mm0 \n\t"
+ "movd (%1, %2), %%mm1 \n\t"
+ "movd (%1, %2, 2), %%mm2 \n\t"
+ "movd (%1, %3), %%mm3 \n\t"
+ PAVGB" (%0), %%mm0 \n\t"
+ PAVGB" (%0, %2), %%mm1 \n\t"
+ PAVGB" (%0, %2, 2), %%mm2 \n\t"
+ PAVGB" (%0, %3), %%mm3 \n\t"
+ "movd %%mm0, (%1) \n\t"
+ "movd %%mm1, (%1, %2) \n\t"
+ "movd %%mm2, (%1, %2, 2) \n\t"
+ "movd %%mm3, (%1, %3) \n\t"
+ ::"S"(pixels), "D"(block),
+ "r" ((x86_reg)line_size), "r"((x86_reg)3L*line_size)
+ :"memory");
+ block += 4*line_size;
+ pixels += 4*line_size;
+ h -= 4;
+ } while(h > 0);
+}
+
+//FIXME the following could be optimized too ...
+static void DEF(put_no_rnd_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(put_no_rnd_pixels8_x2)(block , pixels , line_size, h);
+ DEF(put_no_rnd_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(put_pixels8_y2)(block , pixels , line_size, h);
+ DEF(put_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(put_no_rnd_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(put_no_rnd_pixels8_y2)(block , pixels , line_size, h);
+ DEF(put_no_rnd_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg_pixels8)(block , pixels , line_size, h);
+ DEF(avg_pixels8)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg_pixels8_x2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_x2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg_pixels8_y2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+static void DEF(avg_pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg_pixels8_xy2)(block , pixels , line_size, h);
+ DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+
+#define QPEL_2TAP_L3(OPNAME) \
+static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%1,%2), %%mm0 \n\t"\
+ "movq 8(%1,%2), %%mm1 \n\t"\
+ PAVGB" (%1,%3), %%mm0 \n\t"\
+ PAVGB" 8(%1,%3), %%mm1 \n\t"\
+ PAVGB" (%1), %%mm0 \n\t"\
+ PAVGB" 8(%1), %%mm1 \n\t"\
+ STORE_OP( (%1,%4),%%mm0)\
+ STORE_OP(8(%1,%4),%%mm1)\
+ "movq %%mm0, (%1,%4) \n\t"\
+ "movq %%mm1, 8(%1,%4) \n\t"\
+ "add %5, %1 \n\t"\
+ "decl %0 \n\t"\
+ "jnz 1b \n\t"\
+ :"+g"(h), "+r"(src)\
+ :"r"((x86_reg)off1), "r"((x86_reg)off2),\
+ "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
+ :"memory"\
+ );\
+}\
+static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%1,%2), %%mm0 \n\t"\
+ PAVGB" (%1,%3), %%mm0 \n\t"\
+ PAVGB" (%1), %%mm0 \n\t"\
+ STORE_OP((%1,%4),%%mm0)\
+ "movq %%mm0, (%1,%4) \n\t"\
+ "add %5, %1 \n\t"\
+ "decl %0 \n\t"\
+ "jnz 1b \n\t"\
+ :"+g"(h), "+r"(src)\
+ :"r"((x86_reg)off1), "r"((x86_reg)off2),\
+ "r"((x86_reg)(dst-src)), "r"((x86_reg)stride)\
+ :"memory"\
+ );\
+}
+
+#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t"
+QPEL_2TAP_L3(avg_)
+#undef STORE_OP
+#define STORE_OP(a,b)
+QPEL_2TAP_L3(put_)
+#undef STORE_OP
+#undef QPEL_2TAP_L3
diff --git a/libavcodec/x86/dsputil_mmx_qns_template.c b/libavcodec/x86/dsputil_mmx_qns_template.c
new file mode 100644
index 0000000..d2dbfc5
--- /dev/null
+++ b/libavcodec/x86/dsputil_mmx_qns_template.c
@@ -0,0 +1,101 @@
+/*
+ * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3
+ * Copyright (c) 2004 Michael Niedermayer
+ *
+ * MMX optimization by Michael Niedermayer <michaelni at gmx.at>
+ * 3DNow! and SSSE3 optimization by Zuxy Meng <zuxy.meng at gmail.com>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define MAX_ABS (512 >> (SCALE_OFFSET>0 ? SCALE_OFFSET : 0))
+
+static int DEF(try_8x8basis)(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale)
+{
+ x86_reg i=0;
+
+ assert(FFABS(scale) < MAX_ABS);
+ scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
+
+ SET_RND(mm6);
+ __asm__ volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movd %4, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
+ "movq 8(%1, %0), %%mm1 \n\t"
+ PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
+ "paddw (%2, %0), %%mm0 \n\t"
+ "paddw 8(%2, %0), %%mm1 \n\t"
+ "psraw $6, %%mm0 \n\t"
+ "psraw $6, %%mm1 \n\t"
+ "pmullw (%3, %0), %%mm0 \n\t"
+ "pmullw 8(%3, %0), %%mm1 \n\t"
+ "pmaddwd %%mm0, %%mm0 \n\t"
+ "pmaddwd %%mm1, %%mm1 \n\t"
+ "paddd %%mm1, %%mm0 \n\t"
+ "psrld $4, %%mm0 \n\t"
+ "paddd %%mm0, %%mm7 \n\t"
+ "add $16, %0 \n\t"
+ "cmp $128, %0 \n\t" //FIXME optimize & bench
+ " jb 1b \n\t"
+ PHADDD(%%mm7, %%mm6)
+ "psrld $2, %%mm7 \n\t"
+ "movd %%mm7, %0 \n\t"
+
+ : "+r" (i)
+ : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
+ );
+ return i;
+}
+
+static void DEF(add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale)
+{
+ x86_reg i=0;
+
+ if(FFABS(scale) < MAX_ABS){
+ scale<<= 16 + SCALE_OFFSET - BASIS_SHIFT + RECON_SHIFT;
+ SET_RND(mm6);
+ __asm__ volatile(
+ "movd %3, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ "punpcklwd %%mm5, %%mm5 \n\t"
+ ASMALIGN(4)
+ "1: \n\t"
+ "movq (%1, %0), %%mm0 \n\t"
+ "movq 8(%1, %0), %%mm1 \n\t"
+ PMULHRW(%%mm0, %%mm1, %%mm5, %%mm6)
+ "paddw (%2, %0), %%mm0 \n\t"
+ "paddw 8(%2, %0), %%mm1 \n\t"
+ "movq %%mm0, (%2, %0) \n\t"
+ "movq %%mm1, 8(%2, %0) \n\t"
+ "add $16, %0 \n\t"
+ "cmp $128, %0 \n\t" // FIXME optimize & bench
+ " jb 1b \n\t"
+
+ : "+r" (i)
+ : "r"(basis), "r"(rem), "g"(scale)
+ );
+ }else{
+ for(i=0; i<8*8; i++){
+ rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
+ }
+ }
+}
diff --git a/libavcodec/x86/dsputil_mmx_rnd_template.c b/libavcodec/x86/dsputil_mmx_rnd_template.c
new file mode 100644
index 0000000..45ed590
--- /dev/null
+++ b/libavcodec/x86/dsputil_mmx_rnd_template.c
@@ -0,0 +1,590 @@
+/*
+ * DSP utils mmx functions are compiled twice for rnd/no_rnd
+ * Copyright (c) 2000, 2001 Fabrice Bellard.
+ * Copyright (c) 2003-2004 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * MMX optimization by Nick Kurshev <nickols_k at mail.ru>
+ * mostly rewritten by Michael Niedermayer <michaelni at gmx.at>
+ * and improved by Zdenek Kabelac <kabi at users.sf.net>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+// put_pixels
+static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+static void av_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "add $8, %2 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
+ "movq %%mm4, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm5, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 16(%2), %%mm1 \n\t"
+ "add %4, %1 \n\t"
+ "movq (%1), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ "add $32, %2 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq %%mm5, (%3) \n\t"
+ "add %5, %3 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+}
+
+static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 9(%1), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm2 \n\t"
+ "movq 9(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm1 \n\t"
+ "movq (%1, %3), %%mm2 \n\t"
+ "movq 1(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "movq 8(%1), %%mm0 \n\t"
+ "movq 9(%1), %%mm1 \n\t"
+ "movq 8(%1, %3), %%mm2 \n\t"
+ "movq 9(%1, %3), %%mm3 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, 8(%2) \n\t"
+ "movq %%mm5, 8(%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+static void av_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "testl $1, %0 \n\t"
+ " jz 1f \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ "add $16, %2 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "decl %0 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 8(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq 16(%2), %%mm1 \n\t"
+ "movq 8(%1), %%mm2 \n\t"
+ "movq 24(%2), %%mm3 \n\t"
+ "add %4, %1 \n\t"
+ PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, 8(%3) \n\t"
+ "add %5, %3 \n\t"
+ "add $32, %2 \n\t"
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+#ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cannot be used
+ :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#else
+ :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+#endif
+ :"S"((x86_reg)src1Stride), "D"((x86_reg)dstStride)
+ :"memory");
+}
+
+static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"),%%mm2 \n\t"
+ PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"),%%mm0 \n\t"
+ PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
+ "movq %%mm4, (%2) \n\t"
+ "movq %%mm5, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_ZERO(mm7);
+ SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
+ "add %3, %1 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddusw %%mm2, %%mm0 \n\t"
+ "paddusw %%mm3, %%mm1 \n\t"
+ "paddusw %%mm6, %%mm4 \n\t"
+ "paddusw %%mm6, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "psrlw $2, %%mm4 \n\t"
+ "psrlw $2, %%mm5 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "movq %%mm4, (%2, %%"REG_a") \n\t"
+ "add %3, %%"REG_a" \n\t"
+
+ "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
+ "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm2, %%mm4 \n\t"
+ "paddusw %%mm3, %%mm5 \n\t"
+ "paddusw %%mm6, %%mm0 \n\t"
+ "paddusw %%mm6, %%mm1 \n\t"
+ "paddusw %%mm4, %%mm0 \n\t"
+ "paddusw %%mm5, %%mm1 \n\t"
+ "psrlw $2, %%mm0 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "movq %%mm0, (%2, %%"REG_a") \n\t"
+ "add %3, %%"REG_a" \n\t"
+
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels)
+ :"D"(block), "r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+// avg_pixels
+static void av_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t"
+ "movd %1, %%mm1 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ "movd %%mm2, %0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ }
+ while (--h);
+}
+
+// in case more speed is needed - unroling would certainly help
+static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ "movq %%mm2, %0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ }
+ while (--h);
+}
+
+static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ "movq %%mm2, %0 \n\t"
+ "movq 8%0, %%mm0 \n\t"
+ "movq 8%1, %%mm1 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ "movq %%mm2, 8%0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ }
+ while (--h);
+}
+
+static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %1, %%mm0 \n\t"
+ "movq 1%1, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, %0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+}
+
+static av_unused void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %1, %%mm0 \n\t"
+ "movq %2, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, %0 \n\t"
+ :"+m"(*dst)
+ :"m"(*src1), "m"(*src2)
+ :"memory");
+ dst += dstStride;
+ src1 += src1Stride;
+ src2 += 8;
+ } while (--h);
+}
+
+static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %1, %%mm0 \n\t"
+ "movq 1%1, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, %0 \n\t"
+ "movq 8%1, %%mm0 \n\t"
+ "movq 9%1, %%mm1 \n\t"
+ "movq 8%0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, 8%0 \n\t"
+ :"+m"(*block)
+ :"m"(*pixels)
+ :"memory");
+ pixels += line_size;
+ block += line_size;
+ } while (--h);
+}
+
+static av_unused void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
+{
+ MOVQ_BFE(mm6);
+ JUMPALIGN();
+ do {
+ __asm__ volatile(
+ "movq %1, %%mm0 \n\t"
+ "movq %2, %%mm1 \n\t"
+ "movq %0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, %0 \n\t"
+ "movq 8%1, %%mm0 \n\t"
+ "movq 8%2, %%mm1 \n\t"
+ "movq 8%0, %%mm3 \n\t"
+ PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+ PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+ "movq %%mm0, 8%0 \n\t"
+ :"+m"(*dst)
+ :"m"(*src1), "m"(*src2)
+ :"memory");
+ dst += dstStride;
+ src1 += src1Stride;
+ src2 += 16;
+ } while (--h);
+}
+
+static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "lea (%3, %3), %%"REG_a" \n\t"
+ "movq (%1), %%mm0 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm2 \n\t"
+ PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
+ "movq (%2), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
+ "movq (%2, %3), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+ "movq %%mm0, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+
+ "movq (%1, %3), %%mm1 \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
+ "movq (%2), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
+ "movq (%2, %3), %%mm3 \n\t"
+ PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+ "movq %%mm2, (%2) \n\t"
+ "movq %%mm1, (%2, %3) \n\t"
+ "add %%"REG_a", %1 \n\t"
+ "add %%"REG_a", %2 \n\t"
+
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels), "+D"(block)
+ :"r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+// this routine is 'slightly' suboptimal but mostly unused
+static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+ MOVQ_ZERO(mm7);
+ SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
+ __asm__ volatile(
+ "movq (%1), %%mm0 \n\t"
+ "movq 1(%1), %%mm4 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
+ "add %3, %1 \n\t"
+ ASMALIGN(3)
+ "1: \n\t"
+ "movq (%1, %%"REG_a"), %%mm0 \n\t"
+ "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
+ "movq %%mm0, %%mm1 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "paddusw %%mm2, %%mm0 \n\t"
+ "paddusw %%mm3, %%mm1 \n\t"
+ "paddusw %%mm6, %%mm4 \n\t"
+ "paddusw %%mm6, %%mm5 \n\t"
+ "paddusw %%mm0, %%mm4 \n\t"
+ "paddusw %%mm1, %%mm5 \n\t"
+ "psrlw $2, %%mm4 \n\t"
+ "psrlw $2, %%mm5 \n\t"
+ "movq (%2, %%"REG_a"), %%mm3 \n\t"
+ "packuswb %%mm5, %%mm4 \n\t"
+ "pcmpeqd %%mm2, %%mm2 \n\t"
+ "paddb %%mm2, %%mm2 \n\t"
+ PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
+ "movq %%mm5, (%2, %%"REG_a") \n\t"
+ "add %3, %%"REG_a" \n\t"
+
+ "movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
+ "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
+ "movq %%mm2, %%mm3 \n\t"
+ "movq %%mm4, %%mm5 \n\t"
+ "punpcklbw %%mm7, %%mm2 \n\t"
+ "punpcklbw %%mm7, %%mm4 \n\t"
+ "punpckhbw %%mm7, %%mm3 \n\t"
+ "punpckhbw %%mm7, %%mm5 \n\t"
+ "paddusw %%mm2, %%mm4 \n\t"
+ "paddusw %%mm3, %%mm5 \n\t"
+ "paddusw %%mm6, %%mm0 \n\t"
+ "paddusw %%mm6, %%mm1 \n\t"
+ "paddusw %%mm4, %%mm0 \n\t"
+ "paddusw %%mm5, %%mm1 \n\t"
+ "psrlw $2, %%mm0 \n\t"
+ "psrlw $2, %%mm1 \n\t"
+ "movq (%2, %%"REG_a"), %%mm3 \n\t"
+ "packuswb %%mm1, %%mm0 \n\t"
+ "pcmpeqd %%mm2, %%mm2 \n\t"
+ "paddb %%mm2, %%mm2 \n\t"
+ PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
+ "movq %%mm1, (%2, %%"REG_a") \n\t"
+ "add %3, %%"REG_a" \n\t"
+
+ "subl $2, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+g"(h), "+S"(pixels)
+ :"D"(block), "r"((x86_reg)line_size)
+ :REG_a, "memory");
+}
+
+//FIXME optimize
+static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(put, pixels8_y2)(block , pixels , line_size, h);
+ DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(put, pixels8_xy2)(block , pixels , line_size, h);
+ DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg, pixels8_y2)(block , pixels , line_size, h);
+ DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
+}
+
+static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
+ DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
+ DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
+}
diff --git a/libavcodec/x86/dsputil_yasm.asm b/libavcodec/x86/dsputil_yasm.asm
new file mode 100644
index 0000000..91165f2
--- /dev/null
+++ b/libavcodec/x86/dsputil_yasm.asm
@@ -0,0 +1,92 @@
+;******************************************************************************
+;* MMX optimized DSP utils
+;* Copyright (c) 2008 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "x86inc.asm"
+
+section .text align=16
+
+%macro PSWAPD_SSE 2
+ pshufw %1, %2, 0x4e
+%endmacro
+%macro PSWAPD_3DN1 2
+ movq %1, %2
+ psrlq %1, 32
+ punpckldq %1, %2
+%endmacro
+
+%macro FLOAT_TO_INT16_INTERLEAVE6 1
+; void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
+cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5
+%ifdef ARCH_X86_64
+ %define lend r10d
+ mov lend, r2d
+%else
+ %define lend dword r2m
+%endif
+ mov src1q, [srcq+1*gprsize]
+ mov src2q, [srcq+2*gprsize]
+ mov src3q, [srcq+3*gprsize]
+ mov src4q, [srcq+4*gprsize]
+ mov src5q, [srcq+5*gprsize]
+ mov srcq, [srcq]
+ sub src1q, srcq
+ sub src2q, srcq
+ sub src3q, srcq
+ sub src4q, srcq
+ sub src5q, srcq
+.loop:
+ cvtps2pi mm0, [srcq]
+ cvtps2pi mm1, [srcq+src1q]
+ cvtps2pi mm2, [srcq+src2q]
+ cvtps2pi mm3, [srcq+src3q]
+ cvtps2pi mm4, [srcq+src4q]
+ cvtps2pi mm5, [srcq+src5q]
+ packssdw mm0, mm3
+ packssdw mm1, mm4
+ packssdw mm2, mm5
+ pswapd mm3, mm0
+ punpcklwd mm0, mm1
+ punpckhwd mm1, mm2
+ punpcklwd mm2, mm3
+ pswapd mm3, mm0
+ punpckldq mm0, mm2
+ punpckhdq mm2, mm1
+ punpckldq mm1, mm3
+ movq [dstq ], mm0
+ movq [dstq+16], mm2
+ movq [dstq+ 8], mm1
+ add srcq, 8
+ add dstq, 24
+ sub lend, 2
+ jg .loop
+ emms
+ RET
+%endmacro ; FLOAT_TO_INT16_INTERLEAVE6
+
+%define pswapd PSWAPD_SSE
+FLOAT_TO_INT16_INTERLEAVE6 sse
+%define cvtps2pi pf2id
+%define pswapd PSWAPD_3DN1
+FLOAT_TO_INT16_INTERLEAVE6 3dnow
+%undef pswapd
+FLOAT_TO_INT16_INTERLEAVE6 3dn2
+%undef cvtps2pi
+
diff --git a/libavcodec/i386/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c
similarity index 100%
rename from libavcodec/i386/dsputilenc_mmx.c
rename to libavcodec/x86/dsputilenc_mmx.c
diff --git a/libavcodec/i386/fdct_mmx.c b/libavcodec/x86/fdct_mmx.c
similarity index 100%
rename from libavcodec/i386/fdct_mmx.c
rename to libavcodec/x86/fdct_mmx.c
diff --git a/libavcodec/i386/fft_3dn.c b/libavcodec/x86/fft_3dn.c
similarity index 100%
rename from libavcodec/i386/fft_3dn.c
rename to libavcodec/x86/fft_3dn.c
diff --git a/libavcodec/i386/fft_3dn2.c b/libavcodec/x86/fft_3dn2.c
similarity index 100%
rename from libavcodec/i386/fft_3dn2.c
rename to libavcodec/x86/fft_3dn2.c
diff --git a/libavcodec/x86/fft_mmx.asm b/libavcodec/x86/fft_mmx.asm
new file mode 100644
index 0000000..3971867
--- /dev/null
+++ b/libavcodec/x86/fft_mmx.asm
@@ -0,0 +1,467 @@
+;******************************************************************************
+;* FFT transform with SSE/3DNow optimizations
+;* Copyright (c) 2008 Loren Merritt
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+; These functions are not individually interchangeable with the C versions.
+; While C takes arrays of FFTComplex, SSE/3DNow leave intermediate results
+; in blocks as conventient to the vector size.
+; i.e. {4x real, 4x imaginary, 4x real, ...} (or 2x respectively)
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+
+%define M_SQRT1_2 0.70710678118654752440
+ps_root2: times 4 dd M_SQRT1_2
+ps_root2mppm: dd -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2
+ps_m1p1: dd 1<<31, 0
+
+%assign i 16
+%rep 13
+cextern ff_cos_ %+ i
+%assign i i<<1
+%endrep
+
+%ifdef ARCH_X86_64
+ %define pointer dq
+%else
+ %define pointer dd
+%endif
+
+%macro IF0 1+
+%endmacro
+%macro IF1 1+
+ %1
+%endmacro
+
+section .text align=16
+
+%macro T2_3DN 4 ; z0, z1, mem0, mem1
+ mova %1, %3
+ mova %2, %1
+ pfadd %1, %4
+ pfsub %2, %4
+%endmacro
+
+%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1
+ mova %5, %3
+ pfsub %3, %4
+ pfadd %5, %4 ; {t6,t5}
+ pxor %3, [ps_m1p1 GLOBAL] ; {t8,t7}
+ mova %6, %1
+ pswapd %3, %3
+ pfadd %1, %5 ; {r0,i0}
+ pfsub %6, %5 ; {r2,i2}
+ mova %4, %2
+ pfadd %2, %3 ; {r1,i1}
+ pfsub %4, %3 ; {r3,i3}
+ SWAP %3, %6
+%endmacro
+
+; in: %1={r0,i0,r1,i1} %2={r2,i2,r3,i3}
+; out: %1={r0,r1,r2,r3} %2={i0,i1,i2,i3}
+%macro T4_SSE 3
+ mova %3, %1
+ shufps %1, %2, 0x64 ; {r0,i0,r3,i2}
+ shufps %3, %2, 0xce ; {r1,i1,r2,i3}
+ mova %2, %1
+ addps %1, %3 ; {t1,t2,t6,t5}
+ subps %2, %3 ; {t3,t4,t8,t7}
+ mova %3, %1
+ shufps %1, %2, 0x44 ; {t1,t2,t3,t4}
+ shufps %3, %2, 0xbe ; {t6,t5,t7,t8}
+ mova %2, %1
+ addps %1, %3 ; {r0,i0,r1,i1}
+ subps %2, %3 ; {r2,i2,r3,i3}
+ mova %3, %1
+ shufps %1, %2, 0x88 ; {r0,r1,r2,r3}
+ shufps %3, %2, 0xdd ; {i0,i1,i2,i3}
+ SWAP %2, %3
+%endmacro
+
+%macro T8_SSE 6 ; r0,i0,r1,i1,t0,t1
+ mova %5, %3
+ shufps %3, %4, 0x44 ; {r4,i4,r6,i6}
+ shufps %5, %4, 0xee ; {r5,i5,r7,i7}
+ mova %6, %3
+ subps %3, %5 ; {r5,i5,r7,i7}
+ addps %6, %5 ; {t1,t2,t3,t4}
+ mova %5, %3
+ shufps %5, %5, 0xb1 ; {i5,r5,i7,r7}
+ mulps %3, [ps_root2mppm GLOBAL] ; {-r5,i5,r7,-i7}
+ mulps %5, [ps_root2 GLOBAL]
+ addps %3, %5 ; {t8,t7,ta,t9}
+ mova %5, %6
+ shufps %6, %3, 0x36 ; {t3,t2,t9,t8}
+ shufps %5, %3, 0x9c ; {t1,t4,t7,ta}
+ mova %3, %6
+ addps %6, %5 ; {t1,t2,t9,ta}
+ subps %3, %5 ; {t6,t5,tc,tb}
+ mova %5, %6
+ shufps %6, %3, 0xd8 ; {t1,t9,t5,tb}
+ shufps %5, %3, 0x8d ; {t2,ta,t6,tc}
+ mova %3, %1
+ mova %4, %2
+ addps %1, %6 ; {r0,r1,r2,r3}
+ addps %2, %5 ; {i0,i1,i2,i3}
+ subps %3, %6 ; {r4,r5,r6,r7}
+ subps %4, %5 ; {i4,i5,i6,i7}
+%endmacro
+
+; scheduled for cpu-bound sizes
+%macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
+IF%1 mova m4, Z(4)
+IF%1 mova m5, Z(5)
+ mova m0, %2 ; wre
+ mova m2, m4
+ mova m1, %3 ; wim
+ mova m3, m5
+ mulps m2, m0 ; r2*wre
+IF%1 mova m6, Z(6)
+ mulps m3, m1 ; i2*wim
+IF%1 mova m7, Z(7)
+ mulps m4, m1 ; r2*wim
+ mulps m5, m0 ; i2*wre
+ addps m2, m3 ; r2*wre + i2*wim
+ mova m3, m1
+ mulps m1, m6 ; r3*wim
+ subps m5, m4 ; i2*wre - r2*wim
+ mova m4, m0
+ mulps m3, m7 ; i3*wim
+ mulps m4, m6 ; r3*wre
+ mulps m0, m7 ; i3*wre
+ subps m4, m3 ; r3*wre - i3*wim
+ mova m3, Z(0)
+ addps m0, m1 ; i3*wre + r3*wim
+ mova m1, m4
+ addps m4, m2 ; t5
+ subps m1, m2 ; t3
+ subps m3, m4 ; r2
+ addps m4, Z(0) ; r0
+ mova m6, Z(2)
+ mova Z(4), m3
+ mova Z(0), m4
+ mova m3, m5
+ subps m5, m0 ; t4
+ mova m4, m6
+ subps m6, m5 ; r3
+ addps m5, m4 ; r1
+ mova Z(6), m6
+ mova Z(2), m5
+ mova m2, Z(3)
+ addps m3, m0 ; t6
+ subps m2, m1 ; i3
+ mova m7, Z(1)
+ addps m1, Z(3) ; i1
+ mova Z(7), m2
+ mova Z(3), m1
+ mova m4, m7
+ subps m7, m3 ; i2
+ addps m3, m4 ; i0
+ mova Z(5), m7
+ mova Z(1), m3
+%endmacro
+
+; scheduled to avoid store->load aliasing
+%macro PASS_BIG 1 ; (!interleave)
+ mova m4, Z(4) ; r2
+ mova m5, Z(5) ; i2
+ mova m2, m4
+ mova m0, [wq] ; wre
+ mova m3, m5
+ mova m1, [wq+o1q] ; wim
+ mulps m2, m0 ; r2*wre
+ mova m6, Z(6) ; r3
+ mulps m3, m1 ; i2*wim
+ mova m7, Z(7) ; i3
+ mulps m4, m1 ; r2*wim
+ mulps m5, m0 ; i2*wre
+ addps m2, m3 ; r2*wre + i2*wim
+ mova m3, m1
+ mulps m1, m6 ; r3*wim
+ subps m5, m4 ; i2*wre - r2*wim
+ mova m4, m0
+ mulps m3, m7 ; i3*wim
+ mulps m4, m6 ; r3*wre
+ mulps m0, m7 ; i3*wre
+ subps m4, m3 ; r3*wre - i3*wim
+ mova m3, Z(0)
+ addps m0, m1 ; i3*wre + r3*wim
+ mova m1, m4
+ addps m4, m2 ; t5
+ subps m1, m2 ; t3
+ subps m3, m4 ; r2
+ addps m4, Z(0) ; r0
+ mova m6, Z(2)
+ mova Z(4), m3
+ mova Z(0), m4
+ mova m3, m5
+ subps m5, m0 ; t4
+ mova m4, m6
+ subps m6, m5 ; r3
+ addps m5, m4 ; r1
+IF%1 mova Z(6), m6
+IF%1 mova Z(2), m5
+ mova m2, Z(3)
+ addps m3, m0 ; t6
+ subps m2, m1 ; i3
+ mova m7, Z(1)
+ addps m1, Z(3) ; i1
+IF%1 mova Z(7), m2
+IF%1 mova Z(3), m1
+ mova m4, m7
+ subps m7, m3 ; i2
+ addps m3, m4 ; i0
+IF%1 mova Z(5), m7
+IF%1 mova Z(1), m3
+%if %1==0
+ mova m4, m5 ; r1
+ mova m0, m6 ; r3
+ unpcklps m5, m1
+ unpckhps m4, m1
+ unpcklps m6, m2
+ unpckhps m0, m2
+ mova m1, Z(0)
+ mova m2, Z(4)
+ mova Z(2), m5
+ mova Z(3), m4
+ mova Z(6), m6
+ mova Z(7), m0
+ mova m5, m1 ; r0
+ mova m4, m2 ; r2
+ unpcklps m1, m3
+ unpckhps m5, m3
+ unpcklps m2, m7
+ unpckhps m4, m7
+ mova Z(0), m1
+ mova Z(1), m5
+ mova Z(4), m2
+ mova Z(5), m4
+%endif
+%endmacro
+
+%macro PUNPCK 3
+ mova %3, %1
+ punpckldq %1, %2
+ punpckhdq %3, %2
+%endmacro
+
+INIT_XMM
+
+%define Z(x) [r0+mmsize*x]
+
+align 16
+fft4_sse:
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+ mova Z(0), m0
+ mova Z(1), m1
+ ret
+
+align 16
+fft8_sse:
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T8_SSE m0, m1, m2, m3, m4, m5
+ mova Z(0), m0
+ mova Z(1), m1
+ mova Z(2), m2
+ mova Z(3), m3
+ ret
+
+align 16
+fft16_sse:
+ mova m0, Z(0)
+ mova m1, Z(1)
+ T4_SSE m0, m1, m2
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T8_SSE m0, m1, m2, m3, m4, m5
+ mova m4, Z(4)
+ mova m5, Z(5)
+ mova Z(0), m0
+ mova Z(1), m1
+ mova Z(2), m2
+ mova Z(3), m3
+ T4_SSE m4, m5, m6
+ mova m6, Z(6)
+ mova m7, Z(7)
+ T4_SSE m6, m7, m0
+ PASS_SMALL 0, [ff_cos_16 GLOBAL], [ff_cos_16+16 GLOBAL]
+ ret
+
+
+INIT_MMX
+
+%macro FFT48_3DN 1
+align 16
+fft4%1:
+ T2_3DN m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T4_3DN m0, m1, m2, m3, m4, m5
+ PUNPCK m0, m1, m4
+ PUNPCK m2, m3, m5
+ mova Z(0), m0
+ mova Z(1), m4
+ mova Z(2), m2
+ mova Z(3), m5
+ ret
+
+align 16
+fft8%1:
+ T2_3DN m0, m1, Z(0), Z(1)
+ mova m2, Z(2)
+ mova m3, Z(3)
+ T4_3DN m0, m1, m2, m3, m4, m5
+ mova Z(0), m0
+ mova Z(2), m2
+ T2_3DN m4, m5, Z(4), Z(5)
+ T2_3DN m6, m7, Z(6), Z(7)
+ pswapd m0, m5
+ pswapd m2, m7
+ pxor m0, [ps_m1p1 GLOBAL]
+ pxor m2, [ps_m1p1 GLOBAL]
+ pfsub m5, m0
+ pfadd m7, m2
+ pfmul m5, [ps_root2 GLOBAL]
+ pfmul m7, [ps_root2 GLOBAL]
+ T4_3DN m1, m3, m5, m7, m0, m2
+ mova Z(5), m5
+ mova Z(7), m7
+ mova m0, Z(0)
+ mova m2, Z(2)
+ T4_3DN m0, m2, m4, m6, m5, m7
+ PUNPCK m0, m1, m5
+ PUNPCK m2, m3, m7
+ mova Z(0), m0
+ mova Z(1), m5
+ mova Z(2), m2
+ mova Z(3), m7
+ PUNPCK m4, Z(5), m5
+ PUNPCK m6, Z(7), m7
+ mova Z(4), m4
+ mova Z(5), m5
+ mova Z(6), m6
+ mova Z(7), m7
+ ret
+%endmacro
+
+FFT48_3DN _3dn2
+
+%macro pswapd 2
+%ifidn %1, %2
+ movd [r0+12], %1
+ punpckhdq %1, [r0+8]
+%else
+ movq %1, %2
+ psrlq %1, 32
+ punpckldq %1, %2
+%endif
+%endmacro
+
+FFT48_3DN _3dn
+
+
+%define Z(x) [zq + o1q*(x&6)*((x/6)^1) + o3q*(x/6) + mmsize*(x&1)]
+
+%macro DECL_PASS 2+ ; name, payload
+align 16
+%1:
+DEFINE_ARGS z, w, n, o1, o3
+ lea o3q, [nq*3]
+ lea o1q, [nq*8]
+ shl o3q, 4
+.loop:
+ %2
+ add zq, mmsize*2
+ add wq, mmsize
+ sub nd, mmsize/8
+ jg .loop
+ rep ret
+%endmacro
+
+INIT_XMM
+DECL_PASS pass_sse, PASS_BIG 1
+DECL_PASS pass_interleave_sse, PASS_BIG 0
+
+INIT_MMX
+%define mulps pfmul
+%define addps pfadd
+%define subps pfsub
+%define unpcklps punpckldq
+%define unpckhps punpckhdq
+DECL_PASS pass_3dn, PASS_SMALL 1, [wq], [wq+o1q]
+DECL_PASS pass_interleave_3dn, PASS_BIG 0
+%define pass_3dn2 pass_3dn
+%define pass_interleave_3dn2 pass_interleave_3dn
+
+
+%macro DECL_FFT 2-3 ; nbits, cpu, suffix
+%xdefine list_of_fft fft4%2, fft8%2
+%if %1==5
+%xdefine list_of_fft list_of_fft, fft16%2
+%endif
+
+%assign n 1<<%1
+%rep 17-%1
+%assign n2 n/2
+%assign n4 n/4
+%xdefine list_of_fft list_of_fft, fft %+ n %+ %3%2
+
+align 16
+fft %+ n %+ %3%2:
+ call fft %+ n2 %+ %2
+ add r0, n*4 - (n&(-2<<%1))
+ call fft %+ n4 %+ %2
+ add r0, n*2 - (n2&(-2<<%1))
+ call fft %+ n4 %+ %2
+ sub r0, n*6 + (n2&(-2<<%1))
+ lea r1, [ff_cos_ %+ n GLOBAL]
+ mov r2d, n4/2
+ jmp pass%3%2
+
+%assign n n*2
+%endrep
+%undef n
+
+align 8
+dispatch_tab%3%2: pointer list_of_fft
+
+; On x86_32, this function does the register saving and restoring for all of fft.
+; The others pass args in registers and don't spill anything.
+cglobal fft_dispatch%3%2, 2,5,0, z, nbits
+ lea r2, [dispatch_tab%3%2 GLOBAL]
+ mov r2, [r2 + (nbitsq-2)*gprsize]
+ call r2
+ RET
+%endmacro ; DECL_FFT
+
+DECL_FFT 5, _sse
+DECL_FFT 5, _sse, _interleave
+DECL_FFT 4, _3dn
+DECL_FFT 4, _3dn, _interleave
+DECL_FFT 4, _3dn2
+DECL_FFT 4, _3dn2, _interleave
+
diff --git a/libavcodec/i386/fft_sse.c b/libavcodec/x86/fft_sse.c
similarity index 100%
rename from libavcodec/i386/fft_sse.c
rename to libavcodec/x86/fft_sse.c
diff --git a/libavcodec/i386/flacdsp_mmx.c b/libavcodec/x86/flacdsp_mmx.c
similarity index 100%
rename from libavcodec/i386/flacdsp_mmx.c
rename to libavcodec/x86/flacdsp_mmx.c
diff --git a/libavcodec/x86/h264_deblock_sse2.asm b/libavcodec/x86/h264_deblock_sse2.asm
new file mode 100644
index 0000000..d59de91
--- /dev/null
+++ b/libavcodec/x86/h264_deblock_sse2.asm
@@ -0,0 +1,747 @@
+;*****************************************************************************
+;* deblock-a.asm: h264 encoder library
+;*****************************************************************************
+;* Copyright (C) 2005-2008 x264 project
+;*
+;* Authors: Loren Merritt <lorenm at u.washington.edu>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*****************************************************************************
+
+%include "x86inc.asm"
+
+SECTION_RODATA
+pb_00: times 16 db 0x00
+pb_01: times 16 db 0x01
+pb_03: times 16 db 0x03
+pb_a1: times 16 db 0xa1
+
+SECTION .text
+
+; expands to [base],...,[base+7*stride]
+%define PASS8ROWS(base, base3, stride, stride3) \
+ [base], [base+stride], [base+stride*2], [base3], \
+ [base3+stride], [base3+stride*2], [base3+stride3], [base3+stride*4]
+
+; in: 8 rows of 4 bytes in %1..%8
+; out: 4 rows of 8 bytes in m0..m3
+%macro TRANSPOSE4x8_LOAD 8
+ movd m0, %1
+ movd m2, %2
+ movd m1, %3
+ movd m3, %4
+ punpcklbw m0, m2
+ punpcklbw m1, m3
+ movq m2, m0
+ punpcklwd m0, m1
+ punpckhwd m2, m1
+
+ movd m4, %5
+ movd m6, %6
+ movd m5, %7
+ movd m7, %8
+ punpcklbw m4, m6
+ punpcklbw m5, m7
+ movq m6, m4
+ punpcklwd m4, m5
+ punpckhwd m6, m5
+
+ movq m1, m0
+ movq m3, m2
+ punpckldq m0, m4
+ punpckhdq m1, m4
+ punpckldq m2, m6
+ punpckhdq m3, m6
+%endmacro
+
+; in: 4 rows of 8 bytes in m0..m3
+; out: 8 rows of 4 bytes in %1..%8
+%macro TRANSPOSE8x4_STORE 8
+ movq m4, m0
+ movq m5, m1
+ movq m6, m2
+ punpckhdq m4, m4
+ punpckhdq m5, m5
+ punpckhdq m6, m6
+
+ punpcklbw m0, m1
+ punpcklbw m2, m3
+ movq m1, m0
+ punpcklwd m0, m2
+ punpckhwd m1, m2
+ movd %1, m0
+ punpckhdq m0, m0
+ movd %2, m0
+ movd %3, m1
+ punpckhdq m1, m1
+ movd %4, m1
+
+ punpckhdq m3, m3
+ punpcklbw m4, m5
+ punpcklbw m6, m3
+ movq m5, m4
+ punpcklwd m4, m6
+ punpckhwd m5, m6
+ movd %5, m4
+ punpckhdq m4, m4
+ movd %6, m4
+ movd %7, m5
+ punpckhdq m5, m5
+ movd %8, m5
+%endmacro
+
+%macro SBUTTERFLY 4
+ movq %4, %2
+ punpckl%1 %2, %3
+ punpckh%1 %4, %3
+%endmacro
+
+; in: 8 rows of 8 (only the middle 6 pels are used) in %1..%8
+; out: 6 rows of 8 in [%9+0*16] .. [%9+5*16]
+%macro TRANSPOSE6x8_MEM 9
+ movq m0, %1
+ movq m1, %2
+ movq m2, %3
+ movq m3, %4
+ movq m4, %5
+ movq m5, %6
+ movq m6, %7
+ SBUTTERFLY bw, m0, m1, m7
+ SBUTTERFLY bw, m2, m3, m1
+ SBUTTERFLY bw, m4, m5, m3
+ movq [%9+0x10], m1
+ SBUTTERFLY bw, m6, %8, m5
+ SBUTTERFLY wd, m0, m2, m1
+ SBUTTERFLY wd, m4, m6, m2
+ punpckhdq m0, m4
+ movq [%9+0x00], m0
+ SBUTTERFLY wd, m7, [%9+0x10], m6
+ SBUTTERFLY wd, m3, m5, m4
+ SBUTTERFLY dq, m7, m3, m0
+ SBUTTERFLY dq, m1, m2, m5
+ punpckldq m6, m4
+ movq [%9+0x10], m1
+ movq [%9+0x20], m5
+ movq [%9+0x30], m7
+ movq [%9+0x40], m0
+ movq [%9+0x50], m6
+%endmacro
+
+; in: 8 rows of 8 in %1..%8
+; out: 8 rows of 8 in %9..%16
+%macro TRANSPOSE8x8_MEM 16
+ movq m0, %1
+ movq m1, %2
+ movq m2, %3
+ movq m3, %4
+ movq m4, %5
+ movq m5, %6
+ movq m6, %7
+ SBUTTERFLY bw, m0, m1, m7
+ SBUTTERFLY bw, m2, m3, m1
+ SBUTTERFLY bw, m4, m5, m3
+ SBUTTERFLY bw, m6, %8, m5
+ movq %9, m3
+ SBUTTERFLY wd, m0, m2, m3
+ SBUTTERFLY wd, m4, m6, m2
+ SBUTTERFLY wd, m7, m1, m6
+ movq %11, m2
+ movq m2, %9
+ SBUTTERFLY wd, m2, m5, m1
+ SBUTTERFLY dq, m0, m4, m5
+ SBUTTERFLY dq, m7, m2, m4
+ movq %9, m0
+ movq %10, m5
+ movq %13, m7
+ movq %14, m4
+ SBUTTERFLY dq, m3, %11, m0
+ SBUTTERFLY dq, m6, m1, m5
+ movq %11, m3
+ movq %12, m0
+ movq %15, m6
+ movq %16, m5
+%endmacro
+
+; out: %4 = |%1-%2|>%3
+; clobbers: %5
+%macro DIFF_GT 5
+ mova %5, %2
+ mova %4, %1
+ psubusb %5, %1
+ psubusb %4, %2
+ por %4, %5
+ psubusb %4, %3
+%endmacro
+
+; out: %4 = |%1-%2|>%3
+; clobbers: %5
+%macro DIFF_GT2 5
+ mova %5, %2
+ mova %4, %1
+ psubusb %5, %1
+ psubusb %4, %2
+ psubusb %5, %3
+ psubusb %4, %3
+ pcmpeqb %4, %5
+%endmacro
+
+%macro SPLATW 1
+%ifidn m0, xmm0
+ pshuflw %1, %1, 0
+ punpcklqdq %1, %1
+%else
+ pshufw %1, %1, 0
+%endif
+%endmacro
+
+; in: m0=p1 m1=p0 m2=q0 m3=q1 %1=alpha-1 %2=beta-1
+; out: m5=beta-1, m7=mask, %3=alpha-1
+; clobbers: m4,m6
+%macro LOAD_MASK 2-3
+ movd m4, %1
+ movd m5, %2
+ SPLATW m4
+ SPLATW m5
+ packuswb m4, m4 ; 16x alpha-1
+ packuswb m5, m5 ; 16x beta-1
+%if %0>2
+ mova %3, m4
+%endif
+ DIFF_GT m1, m2, m4, m7, m6 ; |p0-q0| > alpha-1
+ DIFF_GT m0, m1, m5, m4, m6 ; |p1-p0| > beta-1
+ por m7, m4
+ DIFF_GT m3, m2, m5, m4, m6 ; |q1-q0| > beta-1
+ por m7, m4
+ pxor m6, m6
+ pcmpeqb m7, m6
+%endmacro
+
+; in: m0=p1 m1=p0 m2=q0 m3=q1 m7=(tc&mask)
+; out: m1=p0' m2=q0'
+; clobbers: m0,3-6
+%macro DEBLOCK_P0_Q0 0
+ mova m5, m1
+ pxor m5, m2 ; p0^q0
+ pand m5, [pb_01 GLOBAL] ; (p0^q0)&1
+ pcmpeqb m4, m4
+ pxor m3, m4
+ pavgb m3, m0 ; (p1 - q1 + 256)>>1
+ pavgb m3, [pb_03 GLOBAL] ; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
+ pxor m4, m1
+ pavgb m4, m2 ; (q0 - p0 + 256)>>1
+ pavgb m3, m5
+ paddusb m3, m4 ; d+128+33
+ mova m6, [pb_a1 GLOBAL]
+ psubusb m6, m3
+ psubusb m3, [pb_a1 GLOBAL]
+ pminub m6, m7
+ pminub m3, m7
+ psubusb m1, m6
+ psubusb m2, m3
+ paddusb m1, m3
+ paddusb m2, m6
+%endmacro
+
+; in: m1=p0 m2=q0
+; %1=p1 %2=q2 %3=[q2] %4=[q1] %5=tc0 %6=tmp
+; out: [q1] = clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
+; clobbers: q2, tmp, tc0
+%macro LUMA_Q1 6
+ mova %6, m1
+ pavgb %6, m2
+ pavgb %2, %6 ; avg(p2,avg(p0,q0))
+ pxor %6, %3
+ pand %6, [pb_01 GLOBAL] ; (p2^avg(p0,q0))&1
+ psubusb %2, %6 ; (p2+((p0+q0+1)>>1))>>1
+ mova %6, %1
+ psubusb %6, %5
+ paddusb %5, %1
+ pmaxub %2, %6
+ pminub %2, %5
+ mova %4, %2
+%endmacro
+
+%ifdef ARCH_X86_64
+;-----------------------------------------------------------------------------
+; void x264_deblock_v_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal x264_deblock_v_luma_sse2
+ movd m8, [r4] ; tc0
+ lea r4, [r1*3]
+ dec r2d ; alpha-1
+ neg r4
+ dec r3d ; beta-1
+ add r4, r0 ; pix-3*stride
+
+ mova m0, [r4+r1] ; p1
+ mova m1, [r4+2*r1] ; p0
+ mova m2, [r0] ; q0
+ mova m3, [r0+r1] ; q1
+ LOAD_MASK r2d, r3d
+
+ punpcklbw m8, m8
+ punpcklbw m8, m8 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
+ pcmpeqb m9, m9
+ pcmpeqb m9, m8
+ pandn m9, m7
+ pand m8, m9
+
+ movdqa m3, [r4] ; p2
+ DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
+ pand m6, m9
+ mova m7, m8
+ psubb m7, m6
+ pand m6, m8
+ LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
+
+ movdqa m4, [r0+2*r1] ; q2
+ DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
+ pand m6, m9
+ pand m8, m6
+ psubb m7, m6
+ mova m3, [r0+r1]
+ LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m8, m6
+
+ DEBLOCK_P0_Q0
+ mova [r4+2*r1], m1
+ mova [r0], m2
+ ret
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_h_luma_sse2( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+INIT_MMX
+cglobal x264_deblock_h_luma_sse2
+ movsxd r10, esi
+ lea r11, [r10+r10*2]
+ lea rax, [r0-4]
+ lea r9, [r0-4+r11]
+ sub rsp, 0x68
+ %define pix_tmp rsp
+
+ ; transpose 6x16 -> tmp space
+ TRANSPOSE6x8_MEM PASS8ROWS(rax, r9, r10, r11), pix_tmp
+ lea rax, [rax+r10*8]
+ lea r9, [r9 +r10*8]
+ TRANSPOSE6x8_MEM PASS8ROWS(rax, r9, r10, r11), pix_tmp+8
+
+ ; vertical filter
+ ; alpha, beta, tc0 are still in r2d, r3d, r4
+ ; don't backup rax, r9, r10, r11 because x264_deblock_v_luma_sse2 doesn't use them
+ lea r0, [pix_tmp+0x30]
+ mov esi, 0x10
+ call x264_deblock_v_luma_sse2
+
+ ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
+ add rax, 2
+ add r9, 2
+ movq m0, [pix_tmp+0x18]
+ movq m1, [pix_tmp+0x28]
+ movq m2, [pix_tmp+0x38]
+ movq m3, [pix_tmp+0x48]
+ TRANSPOSE8x4_STORE PASS8ROWS(rax, r9, r10, r11)
+
+ shl r10, 3
+ sub rax, r10
+ sub r9, r10
+ shr r10, 3
+ movq m0, [pix_tmp+0x10]
+ movq m1, [pix_tmp+0x20]
+ movq m2, [pix_tmp+0x30]
+ movq m3, [pix_tmp+0x40]
+ TRANSPOSE8x4_STORE PASS8ROWS(rax, r9, r10, r11)
+
+ add rsp, 0x68
+ ret
+
+%else
+
+%macro DEBLOCK_LUMA 3
+;-----------------------------------------------------------------------------
+; void x264_deblock_v8_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_%2_luma_%1, 5,5
+ lea r4, [r1*3]
+ dec r2 ; alpha-1
+ neg r4
+ dec r3 ; beta-1
+ add r4, r0 ; pix-3*stride
+ %assign pad 2*%3+12-(stack_offset&15)
+ SUB esp, pad
+
+ mova m0, [r4+r1] ; p1
+ mova m1, [r4+2*r1] ; p0
+ mova m2, [r0] ; q0
+ mova m3, [r0+r1] ; q1
+ LOAD_MASK r2, r3
+
+ mov r3, r4m
+ movd m4, [r3] ; tc0
+ punpcklbw m4, m4
+ punpcklbw m4, m4 ; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
+ mova [esp+%3], m4 ; tc
+ pcmpeqb m3, m3
+ pcmpgtb m4, m3
+ pand m4, m7
+ mova [esp], m4 ; mask
+
+ mova m3, [r4] ; p2
+ DIFF_GT2 m1, m3, m5, m6, m7 ; |p2-p0| > beta-1
+ pand m6, m4
+ pand m4, [esp+%3] ; tc
+ mova m7, m4
+ psubb m7, m6
+ pand m6, m4
+ LUMA_Q1 m0, m3, [r4], [r4+r1], m6, m4
+
+ mova m4, [r0+2*r1] ; q2
+ DIFF_GT2 m2, m4, m5, m6, m3 ; |q2-q0| > beta-1
+ mova m5, [esp] ; mask
+ pand m6, m5
+ mova m5, [esp+%3] ; tc
+ pand m5, m6
+ psubb m7, m6
+ mova m3, [r0+r1]
+ LUMA_Q1 m3, m4, [r0+2*r1], [r0+r1], m5, m6
+
+ DEBLOCK_P0_Q0
+ mova [r4+2*r1], m1
+ mova [r0], m2
+ ADD esp, pad
+ RET
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_h_luma_mmxext( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
+;-----------------------------------------------------------------------------
+INIT_MMX
+cglobal x264_deblock_h_luma_%1, 0,5
+ mov r0, r0m
+ mov r3, r1m
+ lea r4, [r3*3]
+ sub r0, 4
+ lea r1, [r0+r4]
+ %assign pad 0x78-(stack_offset&15)
+ SUB esp, pad
+%define pix_tmp esp+12
+
+ ; transpose 6x16 -> tmp space
+ TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp
+ lea r0, [r0+r3*8]
+ lea r1, [r1+r3*8]
+ TRANSPOSE6x8_MEM PASS8ROWS(r0, r1, r3, r4), pix_tmp+8
+
+ ; vertical filter
+ lea r0, [pix_tmp+0x30]
+ PUSH dword r4m
+ PUSH dword r3m
+ PUSH dword r2m
+ PUSH dword 16
+ PUSH dword r0
+ call x264_deblock_%2_luma_%1
+%ifidn %2, v8
+ add dword [esp ], 8 ; pix_tmp+0x38
+ add dword [esp+16], 2 ; tc0+2
+ call x264_deblock_%2_luma_%1
+%endif
+ ADD esp, 20
+
+ ; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
+ mov r0, r0m
+ sub r0, 2
+ lea r1, [r0+r4]
+
+ movq m0, [pix_tmp+0x10]
+ movq m1, [pix_tmp+0x20]
+ movq m2, [pix_tmp+0x30]
+ movq m3, [pix_tmp+0x40]
+ TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4)
+
+ lea r0, [r0+r3*8]
+ lea r1, [r1+r3*8]
+ movq m0, [pix_tmp+0x18]
+ movq m1, [pix_tmp+0x28]
+ movq m2, [pix_tmp+0x38]
+ movq m3, [pix_tmp+0x48]
+ TRANSPOSE8x4_STORE PASS8ROWS(r0, r1, r3, r4)
+
+ ADD esp, pad
+ RET
+%endmacro ; DEBLOCK_LUMA
+
+INIT_XMM
+DEBLOCK_LUMA sse2, v, 16
+
+%endif ; ARCH
+
+
+
+%macro LUMA_INTRA_P012 4 ; p0..p3 in memory
+ mova t0, p2
+ mova t1, p0
+ pavgb t0, p1
+ pavgb t1, q0
+ pavgb t0, t1 ; ((p2+p1+1)/2 + (p0+q0+1)/2 + 1)/2
+ mova t5, t1
+ mova t2, p2
+ mova t3, p0
+ paddb t2, p1
+ paddb t3, q0
+ paddb t2, t3
+ mova t3, t2
+ mova t4, t2
+ psrlw t2, 1
+ pavgb t2, mpb_00
+ pxor t2, t0
+ pand t2, mpb_01
+ psubb t0, t2 ; p1' = (p2+p1+p0+q0+2)/4;
+
+ mova t1, p2
+ mova t2, p2
+ pavgb t1, q1
+ psubb t2, q1
+ paddb t3, t3
+ psubb t3, t2 ; p2+2*p1+2*p0+2*q0+q1
+ pand t2, mpb_01
+ psubb t1, t2
+ pavgb t1, p1
+ pavgb t1, t5 ; (((p2+q1)/2 + p1+1)/2 + (p0+q0+1)/2 + 1)/2
+ psrlw t3, 2
+ pavgb t3, mpb_00
+ pxor t3, t1
+ pand t3, mpb_01
+ psubb t1, t3 ; p0'a = (p2+2*p1+2*p0+2*q0+q1+4)/8
+
+ mova t3, p0
+ mova t2, p0
+ pxor t3, q1
+ pavgb t2, q1
+ pand t3, mpb_01
+ psubb t2, t3
+ pavgb t2, p1 ; p0'b = (2*p1+p0+q0+2)/4
+
+ pxor t1, t2
+ pxor t2, p0
+ pand t1, mask1p
+ pand t2, mask0
+ pxor t1, t2
+ pxor t1, p0
+ mova %1, t1 ; store p0
+
+ mova t1, %4 ; p3
+ mova t2, t1
+ pavgb t1, p2
+ paddb t2, p2
+ pavgb t1, t0 ; (p3+p2+1)/2 + (p2+p1+p0+q0+2)/4
+ paddb t2, t2
+ paddb t2, t4 ; 2*p3+3*p2+p1+p0+q0
+ psrlw t2, 2
+ pavgb t2, mpb_00
+ pxor t2, t1
+ pand t2, mpb_01
+ psubb t1, t2 ; p2' = (2*p3+3*p2+p1+p0+q0+4)/8
+
+ pxor t0, p1
+ pxor t1, p2
+ pand t0, mask1p
+ pand t1, mask1p
+ pxor t0, p1
+ pxor t1, p2
+ mova %2, t0 ; store p1
+ mova %3, t1 ; store p2
+%endmacro
+
+%macro LUMA_INTRA_SWAP_PQ 0
+ %define q1 m0
+ %define q0 m1
+ %define p0 m2
+ %define p1 m3
+ %define p2 q2
+ %define mask1p mask1q
+%endmacro
+
+%macro DEBLOCK_LUMA_INTRA 2
+ %define p1 m0
+ %define p0 m1
+ %define q0 m2
+ %define q1 m3
+ %define t0 m4
+ %define t1 m5
+ %define t2 m6
+ %define t3 m7
+%ifdef ARCH_X86_64
+ %define p2 m8
+ %define q2 m9
+ %define t4 m10
+ %define t5 m11
+ %define mask0 m12
+ %define mask1p m13
+ %define mask1q [rsp-24]
+ %define mpb_00 m14
+ %define mpb_01 m15
+%else
+ %define spill(x) [esp+16*x+((stack_offset+4)&15)]
+ %define p2 [r4+r1]
+ %define q2 [r0+2*r1]
+ %define t4 spill(0)
+ %define t5 spill(1)
+ %define mask0 spill(2)
+ %define mask1p spill(3)
+ %define mask1q spill(4)
+ %define mpb_00 [pb_00 GLOBAL]
+ %define mpb_01 [pb_01 GLOBAL]
+%endif
+
+;-----------------------------------------------------------------------------
+; void x264_deblock_v_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_%2_luma_intra_%1, 4,6
+%ifndef ARCH_X86_64
+ sub esp, 0x60
+%endif
+ lea r4, [r1*4]
+ lea r5, [r1*3] ; 3*stride
+ dec r2d ; alpha-1
+ jl .end
+ neg r4
+ dec r3d ; beta-1
+ jl .end
+ add r4, r0 ; pix-4*stride
+ mova p1, [r4+2*r1]
+ mova p0, [r4+r5]
+ mova q0, [r0]
+ mova q1, [r0+r1]
+%ifdef ARCH_X86_64
+ pxor mpb_00, mpb_00
+ mova mpb_01, [pb_01 GLOBAL]
+ LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
+ SWAP 7, 12 ; m12=mask0
+ pavgb t5, mpb_00
+ pavgb t5, mpb_01 ; alpha/4+1
+ movdqa p2, [r4+r1]
+ movdqa q2, [r0+2*r1]
+ DIFF_GT2 p0, q0, t5, t0, t3 ; t0 = |p0-q0| > alpha/4+1
+ DIFF_GT2 p0, p2, m5, t2, t5 ; mask1 = |p2-p0| > beta-1
+ DIFF_GT2 q0, q2, m5, t4, t5 ; t4 = |q2-q0| > beta-1
+ pand t0, mask0
+ pand t4, t0
+ pand t2, t0
+ mova mask1q, t4
+ mova mask1p, t2
+%else
+ LOAD_MASK r2d, r3d, t5 ; m5=beta-1, t5=alpha-1, m7=mask0
+ mova m4, t5
+ mova mask0, m7
+ pavgb m4, [pb_00 GLOBAL]
+ pavgb m4, [pb_01 GLOBAL] ; alpha/4+1
+ DIFF_GT2 p0, q0, m4, m6, m7 ; m6 = |p0-q0| > alpha/4+1
+ pand m6, mask0
+ DIFF_GT2 p0, p2, m5, m4, m7 ; m4 = |p2-p0| > beta-1
+ pand m4, m6
+ mova mask1p, m4
+ DIFF_GT2 q0, q2, m5, m4, m7 ; m4 = |q2-q0| > beta-1
+ pand m4, m6
+ mova mask1q, m4
+%endif
+ LUMA_INTRA_P012 [r4+r5], [r4+2*r1], [r4+r1], [r4]
+ LUMA_INTRA_SWAP_PQ
+ LUMA_INTRA_P012 [r0], [r0+r1], [r0+2*r1], [r0+r5]
+.end:
+%ifndef ARCH_X86_64
+ add esp, 0x60
+%endif
+ RET
+
+INIT_MMX
+%ifdef ARCH_X86_64
+;-----------------------------------------------------------------------------
+; void x264_deblock_h_luma_intra_sse2( uint8_t *pix, int stride, int alpha, int beta )
+;-----------------------------------------------------------------------------
+cglobal x264_deblock_h_luma_intra_%1
+ movsxd r10, r1d
+ lea r11, [r10*3]
+ lea rax, [r0-4]
+ lea r9, [r0-4+r11]
+ sub rsp, 0x88
+ %define pix_tmp rsp
+
+ ; transpose 8x16 -> tmp space
+ TRANSPOSE8x8_MEM PASS8ROWS(rax, r9, r10, r11), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
+ lea rax, [rax+r10*8]
+ lea r9, [r9+r10*8]
+ TRANSPOSE8x8_MEM PASS8ROWS(rax, r9, r10, r11), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
+
+ lea r0, [pix_tmp+0x40]
+ mov r1, 0x10
+ call x264_deblock_v_luma_intra_%1
+
+ ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
+ lea r9, [rax+r11]
+ TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(rax, r9, r10, r11)
+ shl r10, 3
+ sub rax, r10
+ sub r9, r10
+ shr r10, 3
+ TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(rax, r9, r10, r11)
+ add rsp, 0x88
+ ret
+%else
+cglobal x264_deblock_h_luma_intra_%1, 2,4
+ lea r3, [r1*3]
+ sub r0, 4
+ lea r2, [r0+r3]
+%assign pad 0x8c-(stack_offset&15)
+ SUB rsp, pad
+ %define pix_tmp rsp
+
+ ; transpose 8x16 -> tmp space
+ TRANSPOSE8x8_MEM PASS8ROWS(r0, r2, r1, r3), PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30)
+ lea r0, [r0+r1*8]
+ lea r2, [r2+r1*8]
+ TRANSPOSE8x8_MEM PASS8ROWS(r0, r2, r1, r3), PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30)
+
+ lea r0, [pix_tmp+0x40]
+ PUSH dword r3m
+ PUSH dword r2m
+ PUSH dword 16
+ PUSH r0
+ call x264_deblock_%2_luma_intra_%1
+%ifidn %2, v8
+ add dword [rsp], 8 ; pix_tmp+8
+ call x264_deblock_%2_luma_intra_%1
+%endif
+ ADD esp, 16
+
+ mov r1, r1m
+ mov r0, r0m
+ lea r3, [r1*3]
+ sub r0, 4
+ lea r2, [r0+r3]
+ ; transpose 16x6 -> original space (but we can't write only 6 pixels, so really 16x8)
+ TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp, pix_tmp+0x30, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
+ lea r0, [r0+r1*8]
+ lea r2, [r2+r1*8]
+ TRANSPOSE8x8_MEM PASS8ROWS(pix_tmp+8, pix_tmp+0x38, 0x10, 0x30), PASS8ROWS(r0, r2, r1, r3)
+ ADD rsp, pad
+ RET
+%endif ; ARCH_X86_64
+%endmacro ; DEBLOCK_LUMA_INTRA
+
+INIT_XMM
+DEBLOCK_LUMA_INTRA sse2, v
+%ifndef ARCH_X86_64
+INIT_MMX
+DEBLOCK_LUMA_INTRA mmxext, v8
+%endif
diff --git a/libavcodec/x86/h264_i386.h b/libavcodec/x86/h264_i386.h
new file mode 100644
index 0000000..909c274
--- /dev/null
+++ b/libavcodec/x86/h264_i386.h
@@ -0,0 +1,155 @@
+/*
+ * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * Copyright (c) 2003 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file h264_i386.h
+ * H.264 / AVC / MPEG4 part10 codec.
+ * non-MMX i386-specific optimizations for H.264
+ * @author Michael Niedermayer <michaelni at gmx.at>
+ */
+
+#ifndef AVCODEC_X86_H264_I386_H
+#define AVCODEC_X86_H264_I386_H
+
+#include "libavcodec/cabac.h"
+
+//FIXME use some macros to avoid duplicating get_cabac (cannot be done yet
+//as that would make optimization work hard)
+#if defined(ARCH_X86) && defined(HAVE_7REGS) && \
+ defined(HAVE_EBX_AVAILABLE) && \
+ !defined(BROKEN_RELOCATIONS)
+static int decode_significance_x86(CABACContext *c, int max_coeff,
+ uint8_t *significant_coeff_ctx_base,
+ int *index){
+ void *end= significant_coeff_ctx_base + max_coeff - 1;
+ int minusstart= -(int)significant_coeff_ctx_base;
+ int minusindex= 4-(int)index;
+ int coeff_count;
+ __asm__ volatile(
+ "movl "RANGE "(%3), %%esi \n\t"
+ "movl "LOW "(%3), %%ebx \n\t"
+
+ "2: \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx",
+ "%%bx", "%%esi", "%%eax", "%%al")
+
+ "test $1, %%edx \n\t"
+ " jz 3f \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx",
+ "%%bx", "%%esi", "%%eax", "%%al")
+
+ "mov %2, %%"REG_a" \n\t"
+ "movl %4, %%ecx \n\t"
+ "add %1, %%"REG_c" \n\t"
+ "movl %%ecx, (%%"REG_a") \n\t"
+
+ "test $1, %%edx \n\t"
+ " jnz 4f \n\t"
+
+ "add $4, %%"REG_a" \n\t"
+ "mov %%"REG_a", %2 \n\t"
+
+ "3: \n\t"
+ "add $1, %1 \n\t"
+ "cmp %5, %1 \n\t"
+ " jb 2b \n\t"
+ "mov %2, %%"REG_a" \n\t"
+ "movl %4, %%ecx \n\t"
+ "add %1, %%"REG_c" \n\t"
+ "movl %%ecx, (%%"REG_a") \n\t"
+ "4: \n\t"
+ "add %6, %%eax \n\t"
+ "shr $2, %%eax \n\t"
+
+ "movl %%esi, "RANGE "(%3) \n\t"
+ "movl %%ebx, "LOW "(%3) \n\t"
+ :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)
+ :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)
+ : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
+ );
+ return coeff_count;
+}
+
+static int decode_significance_8x8_x86(CABACContext *c,
+ uint8_t *significant_coeff_ctx_base,
+ int *index, const uint8_t *sig_off){
+ int minusindex= 4-(int)index;
+ int coeff_count;
+ x86_reg last=0;
+ __asm__ volatile(
+ "movl "RANGE "(%3), %%esi \n\t"
+ "movl "LOW "(%3), %%ebx \n\t"
+
+ "mov %1, %%"REG_D" \n\t"
+ "2: \n\t"
+
+ "mov %6, %%"REG_a" \n\t"
+ "movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t"
+ "add %5, %%"REG_D" \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx",
+ "%%bx", "%%esi", "%%eax", "%%al")
+
+ "mov %1, %%edi \n\t"
+ "test $1, %%edx \n\t"
+ " jz 3f \n\t"
+
+ "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
+ "add %5, %%"REG_D" \n\t"
+
+ BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx",
+ "%%bx", "%%esi", "%%eax", "%%al")
+
+ "mov %2, %%"REG_a" \n\t"
+ "mov %1, %%edi \n\t"
+ "movl %%edi, (%%"REG_a") \n\t"
+
+ "test $1, %%edx \n\t"
+ " jnz 4f \n\t"
+
+ "add $4, %%"REG_a" \n\t"
+ "mov %%"REG_a", %2 \n\t"
+
+ "3: \n\t"
+ "addl $1, %%edi \n\t"
+ "mov %%edi, %1 \n\t"
+ "cmpl $63, %%edi \n\t"
+ " jb 2b \n\t"
+ "mov %2, %%"REG_a" \n\t"
+ "movl %%edi, (%%"REG_a") \n\t"
+ "4: \n\t"
+ "addl %4, %%eax \n\t"
+ "shr $2, %%eax \n\t"
+
+ "movl %%esi, "RANGE "(%3) \n\t"
+ "movl %%ebx, "LOW "(%3) \n\t"
+ :"=&a"(coeff_count),"+m"(last), "+m"(index)
+ :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)
+ : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"
+ );
+ return coeff_count;
+}
+#endif /* defined(ARCH_X86) && defined(HAVE_7REGS) && */
+ /* defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
+
+#endif /* AVCODEC_X86_H264_I386_H */
diff --git a/libavcodec/x86/h264_idct_sse2.asm b/libavcodec/x86/h264_idct_sse2.asm
new file mode 100755
index 0000000..a46cd97
--- /dev/null
+++ b/libavcodec/x86/h264_idct_sse2.asm
@@ -0,0 +1,61 @@
+;*****************************************************************************
+;* dct-a.asm: h264 encoder library
+;*****************************************************************************
+;* Copyright (C) 2003-2008 x264 project
+;*
+;* Authors: Laurent Aimar <fenrir at via.ecp.fr>
+;* Loren Merritt <lorenm at u.washington.edu>
+;* Holger Lubitz <hal at duncan.ol.sub.de>
+;* Min Chen <chenm001.163.com>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*****************************************************************************
+
+%include "x86inc.asm"
+%include "x86util.asm"
+
+SECTION_RODATA
+pw_32: times 8 dw 32
+
+SECTION .text
+
+%macro IDCT4_1D 6
+ SUMSUB_BA m%3, m%1
+ SUMSUBD2_AB m%2, m%4, m%6, m%5
+ SUMSUB_BADC m%2, m%3, m%5, m%1
+ SWAP %1, %2, %5, %4, %3
+%endmacro
+
+INIT_XMM
+cglobal x264_add8x4_idct_sse2, 3,3
+ movq m0, [r1+ 0]
+ movq m1, [r1+ 8]
+ movq m2, [r1+16]
+ movq m3, [r1+24]
+ movhps m0, [r1+32]
+ movhps m1, [r1+40]
+ movhps m2, [r1+48]
+ movhps m3, [r1+56]
+ IDCT4_1D 0,1,2,3,4,5
+ TRANSPOSE2x4x4W 0,1,2,3,4
+ paddw m0, [pw_32 GLOBAL]
+ IDCT4_1D 0,1,2,3,4,5
+ pxor m7, m7
+ STORE_DIFF m0, m4, m7, [r0]
+ STORE_DIFF m1, m4, m7, [r0+r2]
+ lea r0, [r0+r2*2]
+ STORE_DIFF m2, m4, m7, [r0]
+ STORE_DIFF m3, m4, m7, [r0+r2]
+ RET
diff --git a/libavcodec/x86/h264dsp_mmx.c b/libavcodec/x86/h264dsp_mmx.c
new file mode 100644
index 0000000..8eeb657
--- /dev/null
+++ b/libavcodec/x86/h264dsp_mmx.c
@@ -0,0 +1,2296 @@
+/*
+ * Copyright (c) 2004-2005 Michael Niedermayer, Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil_mmx.h"
+
+DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
+DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL;
+
+/***********************************/
+/* IDCT */
+
+#define SUMSUB_BADC( a, b, c, d ) \
+ "paddw "#b", "#a" \n\t"\
+ "paddw "#d", "#c" \n\t"\
+ "paddw "#b", "#b" \n\t"\
+ "paddw "#d", "#d" \n\t"\
+ "psubw "#a", "#b" \n\t"\
+ "psubw "#c", "#d" \n\t"
+
+#define SUMSUBD2_AB( a, b, t ) \
+ "movq "#b", "#t" \n\t"\
+ "psraw $1 , "#b" \n\t"\
+ "paddw "#a", "#b" \n\t"\
+ "psraw $1 , "#a" \n\t"\
+ "psubw "#t", "#a" \n\t"
+
+#define IDCT4_1D( s02, s13, d02, d13, t ) \
+ SUMSUB_BA ( s02, d02 )\
+ SUMSUBD2_AB( s13, d13, t )\
+ SUMSUB_BADC( d13, s02, s13, d02 )
+
+#define STORE_DIFF_4P( p, t, z ) \
+ "psraw $6, "#p" \n\t"\
+ "movd (%0), "#t" \n\t"\
+ "punpcklbw "#z", "#t" \n\t"\
+ "paddsw "#t", "#p" \n\t"\
+ "packuswb "#z", "#p" \n\t"\
+ "movd "#p", (%0) \n\t"
+
+static void ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+ /* Load dct coeffs */
+ __asm__ volatile(
+ "movq (%0), %%mm0 \n\t"
+ "movq 8(%0), %%mm1 \n\t"
+ "movq 16(%0), %%mm2 \n\t"
+ "movq 24(%0), %%mm3 \n\t"
+ :: "r"(block) );
+
+ __asm__ volatile(
+ /* mm1=s02+s13 mm2=s02-s13 mm4=d02+d13 mm0=d02-d13 */
+ IDCT4_1D( %%mm2, %%mm1, %%mm0, %%mm3, %%mm4 )
+
+ "movq %0, %%mm6 \n\t"
+ /* in: 1,4,0,2 out: 1,2,3,0 */
+ TRANSPOSE4( %%mm3, %%mm1, %%mm0, %%mm2, %%mm4 )
+
+ "paddw %%mm6, %%mm3 \n\t"
+
+ /* mm2=s02+s13 mm3=s02-s13 mm4=d02+d13 mm1=d02-d13 */
+ IDCT4_1D( %%mm4, %%mm2, %%mm3, %%mm0, %%mm1 )
+
+ "pxor %%mm7, %%mm7 \n\t"
+ :: "m"(ff_pw_32));
+
+ __asm__ volatile(
+ STORE_DIFF_4P( %%mm0, %%mm1, %%mm7)
+ "add %1, %0 \n\t"
+ STORE_DIFF_4P( %%mm2, %%mm1, %%mm7)
+ "add %1, %0 \n\t"
+ STORE_DIFF_4P( %%mm3, %%mm1, %%mm7)
+ "add %1, %0 \n\t"
+ STORE_DIFF_4P( %%mm4, %%mm1, %%mm7)
+ : "+r"(dst)
+ : "r" ((x86_reg)stride)
+ );
+}
+
+static inline void h264_idct8_1d(int16_t *block)
+{
+ __asm__ volatile(
+ "movq 112(%0), %%mm7 \n\t"
+ "movq 80(%0), %%mm0 \n\t"
+ "movq 48(%0), %%mm3 \n\t"
+ "movq 16(%0), %%mm5 \n\t"
+
+ "movq %%mm0, %%mm4 \n\t"
+ "movq %%mm5, %%mm1 \n\t"
+ "psraw $1, %%mm4 \n\t"
+ "psraw $1, %%mm1 \n\t"
+ "paddw %%mm0, %%mm4 \n\t"
+ "paddw %%mm5, %%mm1 \n\t"
+ "paddw %%mm7, %%mm4 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "psubw %%mm5, %%mm4 \n\t"
+ "paddw %%mm3, %%mm1 \n\t"
+
+ "psubw %%mm3, %%mm5 \n\t"
+ "psubw %%mm3, %%mm0 \n\t"
+ "paddw %%mm7, %%mm5 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+ "psraw $1, %%mm3 \n\t"
+ "psraw $1, %%mm7 \n\t"
+ "psubw %%mm3, %%mm5 \n\t"
+ "psubw %%mm7, %%mm0 \n\t"
+
+ "movq %%mm4, %%mm3 \n\t"
+ "movq %%mm1, %%mm7 \n\t"
+ "psraw $2, %%mm1 \n\t"
+ "psraw $2, %%mm3 \n\t"
+ "paddw %%mm5, %%mm3 \n\t"
+ "psraw $2, %%mm5 \n\t"
+ "paddw %%mm0, %%mm1 \n\t"
+ "psraw $2, %%mm0 \n\t"
+ "psubw %%mm4, %%mm5 \n\t"
+ "psubw %%mm0, %%mm7 \n\t"
+
+ "movq 32(%0), %%mm2 \n\t"
+ "movq 96(%0), %%mm6 \n\t"
+ "movq %%mm2, %%mm4 \n\t"
+ "movq %%mm6, %%mm0 \n\t"
+ "psraw $1, %%mm4 \n\t"
+ "psraw $1, %%mm6 \n\t"
+ "psubw %%mm0, %%mm4 \n\t"
+ "paddw %%mm2, %%mm6 \n\t"
+
+ "movq (%0), %%mm2 \n\t"
+ "movq 64(%0), %%mm0 \n\t"
+ SUMSUB_BA( %%mm0, %%mm2 )
+ SUMSUB_BA( %%mm6, %%mm0 )
+ SUMSUB_BA( %%mm4, %%mm2 )
+ SUMSUB_BA( %%mm7, %%mm6 )
+ SUMSUB_BA( %%mm5, %%mm4 )
+ SUMSUB_BA( %%mm3, %%mm2 )
+ SUMSUB_BA( %%mm1, %%mm0 )
+ :: "r"(block)
+ );
+}
+
+static void ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
+{
+ int i;
+ int16_t __attribute__ ((aligned(8))) b2[64];
+
+ block[0] += 32;
+
+ for(i=0; i<2; i++){
+ DECLARE_ALIGNED_8(uint64_t, tmp);
+
+ h264_idct8_1d(block+4*i);
+
+ __asm__ volatile(
+ "movq %%mm7, %0 \n\t"
+ TRANSPOSE4( %%mm0, %%mm2, %%mm4, %%mm6, %%mm7 )
+ "movq %%mm0, 8(%1) \n\t"
+ "movq %%mm6, 24(%1) \n\t"
+ "movq %%mm7, 40(%1) \n\t"
+ "movq %%mm4, 56(%1) \n\t"
+ "movq %0, %%mm7 \n\t"
+ TRANSPOSE4( %%mm7, %%mm5, %%mm3, %%mm1, %%mm0 )
+ "movq %%mm7, (%1) \n\t"
+ "movq %%mm1, 16(%1) \n\t"
+ "movq %%mm0, 32(%1) \n\t"
+ "movq %%mm3, 48(%1) \n\t"
+ : "=m"(tmp)
+ : "r"(b2+32*i)
+ : "memory"
+ );
+ }
+
+ for(i=0; i<2; i++){
+ h264_idct8_1d(b2+4*i);
+
+ __asm__ volatile(
+ "psraw $6, %%mm7 \n\t"
+ "psraw $6, %%mm6 \n\t"
+ "psraw $6, %%mm5 \n\t"
+ "psraw $6, %%mm4 \n\t"
+ "psraw $6, %%mm3 \n\t"
+ "psraw $6, %%mm2 \n\t"
+ "psraw $6, %%mm1 \n\t"
+ "psraw $6, %%mm0 \n\t"
+
+ "movq %%mm7, (%0) \n\t"
+ "movq %%mm5, 16(%0) \n\t"
+ "movq %%mm3, 32(%0) \n\t"
+ "movq %%mm1, 48(%0) \n\t"
+ "movq %%mm0, 64(%0) \n\t"
+ "movq %%mm2, 80(%0) \n\t"
+ "movq %%mm4, 96(%0) \n\t"
+ "movq %%mm6, 112(%0) \n\t"
+ :: "r"(b2+4*i)
+ : "memory"
+ );
+ }
+
+ add_pixels_clamped_mmx(b2, dst, stride);
+}
+
+#define STORE_DIFF_8P( p, d, t, z )\
+ "movq "#d", "#t" \n"\
+ "psraw $6, "#p" \n"\
+ "punpcklbw "#z", "#t" \n"\
+ "paddsw "#t", "#p" \n"\
+ "packuswb "#p", "#p" \n"\
+ "movq "#p", "#d" \n"
+
+#define H264_IDCT8_1D_SSE2(a,b,c,d,e,f,g,h)\
+ "movdqa "#c", "#a" \n"\
+ "movdqa "#g", "#e" \n"\
+ "psraw $1, "#c" \n"\
+ "psraw $1, "#g" \n"\
+ "psubw "#e", "#c" \n"\
+ "paddw "#a", "#g" \n"\
+ "movdqa "#b", "#e" \n"\
+ "psraw $1, "#e" \n"\
+ "paddw "#b", "#e" \n"\
+ "paddw "#d", "#e" \n"\
+ "paddw "#f", "#e" \n"\
+ "movdqa "#f", "#a" \n"\
+ "psraw $1, "#a" \n"\
+ "paddw "#f", "#a" \n"\
+ "paddw "#h", "#a" \n"\
+ "psubw "#b", "#a" \n"\
+ "psubw "#d", "#b" \n"\
+ "psubw "#d", "#f" \n"\
+ "paddw "#h", "#b" \n"\
+ "psubw "#h", "#f" \n"\
+ "psraw $1, "#d" \n"\
+ "psraw $1, "#h" \n"\
+ "psubw "#d", "#b" \n"\
+ "psubw "#h", "#f" \n"\
+ "movdqa "#e", "#d" \n"\
+ "movdqa "#a", "#h" \n"\
+ "psraw $2, "#d" \n"\
+ "psraw $2, "#h" \n"\
+ "paddw "#f", "#d" \n"\
+ "paddw "#b", "#h" \n"\
+ "psraw $2, "#f" \n"\
+ "psraw $2, "#b" \n"\
+ "psubw "#f", "#e" \n"\
+ "psubw "#a", "#b" \n"\
+ "movdqa 0x00(%1), "#a" \n"\
+ "movdqa 0x40(%1), "#f" \n"\
+ SUMSUB_BA(f, a)\
+ SUMSUB_BA(g, f)\
+ SUMSUB_BA(c, a)\
+ SUMSUB_BA(e, g)\
+ SUMSUB_BA(b, c)\
+ SUMSUB_BA(h, a)\
+ SUMSUB_BA(d, f)
+
+static void ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
+{
+ __asm__ volatile(
+ "movdqa 0x10(%1), %%xmm1 \n"
+ "movdqa 0x20(%1), %%xmm2 \n"
+ "movdqa 0x30(%1), %%xmm3 \n"
+ "movdqa 0x50(%1), %%xmm5 \n"
+ "movdqa 0x60(%1), %%xmm6 \n"
+ "movdqa 0x70(%1), %%xmm7 \n"
+ H264_IDCT8_1D_SSE2(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm6, %%xmm7)
+ TRANSPOSE8(%%xmm4, %%xmm1, %%xmm7, %%xmm3, %%xmm5, %%xmm0, %%xmm2, %%xmm6, (%1))
+ "paddw %4, %%xmm4 \n"
+ "movdqa %%xmm4, 0x00(%1) \n"
+ "movdqa %%xmm2, 0x40(%1) \n"
+ H264_IDCT8_1D_SSE2(%%xmm4, %%xmm0, %%xmm6, %%xmm3, %%xmm2, %%xmm5, %%xmm7, %%xmm1)
+ "movdqa %%xmm6, 0x60(%1) \n"
+ "movdqa %%xmm7, 0x70(%1) \n"
+ "pxor %%xmm7, %%xmm7 \n"
+ STORE_DIFF_8P(%%xmm2, (%0), %%xmm6, %%xmm7)
+ STORE_DIFF_8P(%%xmm0, (%0,%2), %%xmm6, %%xmm7)
+ STORE_DIFF_8P(%%xmm1, (%0,%2,2), %%xmm6, %%xmm7)
+ STORE_DIFF_8P(%%xmm3, (%0,%3), %%xmm6, %%xmm7)
+ "lea (%0,%2,4), %0 \n"
+ STORE_DIFF_8P(%%xmm5, (%0), %%xmm6, %%xmm7)
+ STORE_DIFF_8P(%%xmm4, (%0,%2), %%xmm6, %%xmm7)
+ "movdqa 0x60(%1), %%xmm0 \n"
+ "movdqa 0x70(%1), %%xmm1 \n"
+ STORE_DIFF_8P(%%xmm0, (%0,%2,2), %%xmm6, %%xmm7)
+ STORE_DIFF_8P(%%xmm1, (%0,%3), %%xmm6, %%xmm7)
+ :"+r"(dst)
+ :"r"(block), "r"((x86_reg)stride), "r"((x86_reg)3L*stride), "m"(ff_pw_32)
+ );
+}
+
+static void ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+ int dc = (block[0] + 32) >> 6;
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t"
+ "pshufw $0, %%mm0, %%mm0 \n\t"
+ "pxor %%mm1, %%mm1 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ ::"r"(dc)
+ );
+ __asm__ volatile(
+ "movd %0, %%mm2 \n\t"
+ "movd %1, %%mm3 \n\t"
+ "movd %2, %%mm4 \n\t"
+ "movd %3, %%mm5 \n\t"
+ "paddusb %%mm0, %%mm2 \n\t"
+ "paddusb %%mm0, %%mm3 \n\t"
+ "paddusb %%mm0, %%mm4 \n\t"
+ "paddusb %%mm0, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "movd %%mm2, %0 \n\t"
+ "movd %%mm3, %1 \n\t"
+ "movd %%mm4, %2 \n\t"
+ "movd %%mm5, %3 \n\t"
+ :"+m"(*(uint32_t*)(dst+0*stride)),
+ "+m"(*(uint32_t*)(dst+1*stride)),
+ "+m"(*(uint32_t*)(dst+2*stride)),
+ "+m"(*(uint32_t*)(dst+3*stride))
+ );
+}
+
+static void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+ int dc = (block[0] + 32) >> 6;
+ int y;
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t"
+ "pshufw $0, %%mm0, %%mm0 \n\t"
+ "pxor %%mm1, %%mm1 \n\t"
+ "psubw %%mm0, %%mm1 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "packuswb %%mm1, %%mm1 \n\t"
+ ::"r"(dc)
+ );
+ for(y=2; y--; dst += 4*stride){
+ __asm__ volatile(
+ "movq %0, %%mm2 \n\t"
+ "movq %1, %%mm3 \n\t"
+ "movq %2, %%mm4 \n\t"
+ "movq %3, %%mm5 \n\t"
+ "paddusb %%mm0, %%mm2 \n\t"
+ "paddusb %%mm0, %%mm3 \n\t"
+ "paddusb %%mm0, %%mm4 \n\t"
+ "paddusb %%mm0, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "movq %%mm2, %0 \n\t"
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm4, %2 \n\t"
+ "movq %%mm5, %3 \n\t"
+ :"+m"(*(uint64_t*)(dst+0*stride)),
+ "+m"(*(uint64_t*)(dst+1*stride)),
+ "+m"(*(uint64_t*)(dst+2*stride)),
+ "+m"(*(uint64_t*)(dst+3*stride))
+ );
+ }
+}
+
+//FIXME this table is a duplicate from h264data.h, and will be removed once the tables from, h264 have been split
+static const uint8_t scan8[16 + 2*4]={
+ 4+1*8, 5+1*8, 4+2*8, 5+2*8,
+ 6+1*8, 7+1*8, 6+2*8, 7+2*8,
+ 4+3*8, 5+3*8, 4+4*8, 5+4*8,
+ 6+3*8, 7+3*8, 6+4*8, 7+4*8,
+ 1+1*8, 2+1*8,
+ 1+2*8, 2+2*8,
+ 1+4*8, 2+4*8,
+ 1+5*8, 2+5*8,
+};
+
+static void ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct8_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+
+static void ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ] || block[i*16])
+ ff_h264_idct_add_mmx(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i++){
+ if(nnzc[ scan8[i] ]) ff_h264_idct_add_mmx (dst + block_offset[i], block + i*16, stride);
+ else if(block[i*16]) ff_h264_idct_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_mmx (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=4){
+ int nnz = nnzc[ scan8[i] ];
+ if(nnz){
+ if(nnz==1 && block[i*16]) ff_h264_idct8_dc_add_mmx2(dst + block_offset[i], block + i*16, stride);
+ else ff_h264_idct8_add_sse2 (dst + block_offset[i], block + i*16, stride);
+ }
+ }
+}
+
+static void ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ] || block[i*16])
+ ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i] ])
+ ff_h264_idct_add_mmx (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ else if(block[i*16])
+ ff_h264_idct_dc_add_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
+
+#if defined(CONFIG_GPL) && defined(HAVE_YASM)
+static void ff_h264_idct_dc_add8_mmx2(uint8_t *dst, int16_t *block, int stride)
+{
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t" // 0 0 X D
+ "punpcklwd %1, %%mm0 \n\t" // x X d D
+ "paddsw %2, %%mm0 \n\t"
+ "psraw $6, %%mm0 \n\t"
+ "punpcklwd %%mm0, %%mm0 \n\t" // d d D D
+ "pxor %%mm1, %%mm1 \n\t" // 0 0 0 0
+ "psubw %%mm0, %%mm1 \n\t" // -d-d-D-D
+ "packuswb %%mm1, %%mm0 \n\t" // -d-d-D-D d d D D
+ "pshufw $0xFA, %%mm0, %%mm1 \n\t" // -d-d-d-d-D-D-D-D
+ "punpcklwd %%mm0, %%mm0 \n\t" // d d d d D D D D
+ ::"m"(block[ 0]),
+ "m"(block[16]),
+ "m"(ff_pw_32)
+ );
+ __asm__ volatile(
+ "movq %0, %%mm2 \n\t"
+ "movq %1, %%mm3 \n\t"
+ "movq %2, %%mm4 \n\t"
+ "movq %3, %%mm5 \n\t"
+ "paddusb %%mm0, %%mm2 \n\t"
+ "paddusb %%mm0, %%mm3 \n\t"
+ "paddusb %%mm0, %%mm4 \n\t"
+ "paddusb %%mm0, %%mm5 \n\t"
+ "psubusb %%mm1, %%mm2 \n\t"
+ "psubusb %%mm1, %%mm3 \n\t"
+ "psubusb %%mm1, %%mm4 \n\t"
+ "psubusb %%mm1, %%mm5 \n\t"
+ "movq %%mm2, %0 \n\t"
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm4, %2 \n\t"
+ "movq %%mm5, %3 \n\t"
+ :"+m"(*(uint64_t*)(dst+0*stride)),
+ "+m"(*(uint64_t*)(dst+1*stride)),
+ "+m"(*(uint64_t*)(dst+2*stride)),
+ "+m"(*(uint64_t*)(dst+3*stride))
+ );
+}
+
+extern void ff_x264_add8x4_idct_sse2(uint8_t *dst, int16_t *block, int stride);
+
+static void ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=2)
+ if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
+ ff_x264_add8x4_idct_sse2 (dst + block_offset[i], block + i*16, stride);
+}
+
+static void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=0; i<16; i+=2){
+ if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
+ ff_x264_add8x4_idct_sse2 (dst + block_offset[i], block + i*16, stride);
+ else if(block[i*16]|block[i*16+16])
+ ff_h264_idct_dc_add8_mmx2(dst + block_offset[i], block + i*16, stride);
+ }
+}
+
+static void ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]){
+ int i;
+ for(i=16; i<16+8; i++){
+ if(nnzc[ scan8[i+0] ]|nnzc[ scan8[i+1] ])
+ ff_x264_add8x4_idct_sse2 (dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ else if(block[i*16]|block[i*16+16])
+ ff_h264_idct_dc_add8_mmx2(dest[(i&4)>>2] + block_offset[i], block + i*16, stride);
+ }
+}
+#endif
+
+/***********************************/
+/* deblocking */
+
+// out: o = |x-y|>a
+// clobbers: t
+#define DIFF_GT_MMX(x,y,a,o,t)\
+ "movq "#y", "#t" \n\t"\
+ "movq "#x", "#o" \n\t"\
+ "psubusb "#x", "#t" \n\t"\
+ "psubusb "#y", "#o" \n\t"\
+ "por "#t", "#o" \n\t"\
+ "psubusb "#a", "#o" \n\t"
+
+// out: o = |x-y|>a
+// clobbers: t
+#define DIFF_GT2_MMX(x,y,a,o,t)\
+ "movq "#y", "#t" \n\t"\
+ "movq "#x", "#o" \n\t"\
+ "psubusb "#x", "#t" \n\t"\
+ "psubusb "#y", "#o" \n\t"\
+ "psubusb "#a", "#t" \n\t"\
+ "psubusb "#a", "#o" \n\t"\
+ "pcmpeqb "#t", "#o" \n\t"\
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1
+// out: mm5=beta-1, mm7=mask
+// clobbers: mm4,mm6
+#define H264_DEBLOCK_MASK(alpha1, beta1) \
+ "pshufw $0, "#alpha1", %%mm4 \n\t"\
+ "pshufw $0, "#beta1 ", %%mm5 \n\t"\
+ "packuswb %%mm4, %%mm4 \n\t"\
+ "packuswb %%mm5, %%mm5 \n\t"\
+ DIFF_GT_MMX(%%mm1, %%mm2, %%mm4, %%mm7, %%mm6) /* |p0-q0| > alpha-1 */\
+ DIFF_GT_MMX(%%mm0, %%mm1, %%mm5, %%mm4, %%mm6) /* |p1-p0| > beta-1 */\
+ "por %%mm4, %%mm7 \n\t"\
+ DIFF_GT_MMX(%%mm3, %%mm2, %%mm5, %%mm4, %%mm6) /* |q1-q0| > beta-1 */\
+ "por %%mm4, %%mm7 \n\t"\
+ "pxor %%mm6, %%mm6 \n\t"\
+ "pcmpeqb %%mm6, %%mm7 \n\t"
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask)
+// out: mm1=p0' mm2=q0'
+// clobbers: mm0,3-6
+#define H264_DEBLOCK_P0_Q0(pb_01, pb_3f)\
+ "movq %%mm1 , %%mm5 \n\t"\
+ "pxor %%mm2 , %%mm5 \n\t" /* p0^q0*/\
+ "pand "#pb_01" , %%mm5 \n\t" /* (p0^q0)&1*/\
+ "pcmpeqb %%mm4 , %%mm4 \n\t"\
+ "pxor %%mm4 , %%mm3 \n\t"\
+ "pavgb %%mm0 , %%mm3 \n\t" /* (p1 - q1 + 256)>>1*/\
+ "pavgb "MANGLE(ff_pb_3)" , %%mm3 \n\t" /*(((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2*/\
+ "pxor %%mm1 , %%mm4 \n\t"\
+ "pavgb %%mm2 , %%mm4 \n\t" /* (q0 - p0 + 256)>>1*/\
+ "pavgb %%mm5 , %%mm3 \n\t"\
+ "paddusb %%mm4 , %%mm3 \n\t" /* d+128+33*/\
+ "movq "MANGLE(ff_pb_A1)" , %%mm6 \n\t"\
+ "psubusb %%mm3 , %%mm6 \n\t"\
+ "psubusb "MANGLE(ff_pb_A1)" , %%mm3 \n\t"\
+ "pminub %%mm7 , %%mm6 \n\t"\
+ "pminub %%mm7 , %%mm3 \n\t"\
+ "psubusb %%mm6 , %%mm1 \n\t"\
+ "psubusb %%mm3 , %%mm2 \n\t"\
+ "paddusb %%mm3 , %%mm1 \n\t"\
+ "paddusb %%mm6 , %%mm2 \n\t"
+
+// in: mm0=p1 mm1=p0 mm2=q0 mm3=q1 mm7=(tc&mask) %8=ff_bone
+// out: (q1addr) = av_clip( (q2+((p0+q0+1)>>1))>>1, q1-tc0, q1+tc0 )
+// clobbers: q2, tmp, tc0
+#define H264_DEBLOCK_Q1(p1, q2, q2addr, q1addr, tc0, tmp)\
+ "movq %%mm1, "#tmp" \n\t"\
+ "pavgb %%mm2, "#tmp" \n\t"\
+ "pavgb "#tmp", "#q2" \n\t" /* avg(p2,avg(p0,q0)) */\
+ "pxor "q2addr", "#tmp" \n\t"\
+ "pand %8, "#tmp" \n\t" /* (p2^avg(p0,q0))&1 */\
+ "psubusb "#tmp", "#q2" \n\t" /* (p2+((p0+q0+1)>>1))>>1 */\
+ "movq "#p1", "#tmp" \n\t"\
+ "psubusb "#tc0", "#tmp" \n\t"\
+ "paddusb "#p1", "#tc0" \n\t"\
+ "pmaxub "#tmp", "#q2" \n\t"\
+ "pminub "#tc0", "#q2" \n\t"\
+ "movq "#q2", "q1addr" \n\t"
+
+static inline void h264_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
+{
+ DECLARE_ALIGNED_8(uint64_t, tmp0[2]);
+
+ __asm__ volatile(
+ "movq (%1,%3), %%mm0 \n\t" //p1
+ "movq (%1,%3,2), %%mm1 \n\t" //p0
+ "movq (%2), %%mm2 \n\t" //q0
+ "movq (%2,%3), %%mm3 \n\t" //q1
+ H264_DEBLOCK_MASK(%6, %7)
+
+ "movd %5, %%mm4 \n\t"
+ "punpcklbw %%mm4, %%mm4 \n\t"
+ "punpcklwd %%mm4, %%mm4 \n\t"
+ "pcmpeqb %%mm3, %%mm3 \n\t"
+ "movq %%mm4, %%mm6 \n\t"
+ "pcmpgtb %%mm3, %%mm4 \n\t"
+ "movq %%mm6, 8+%0 \n\t"
+ "pand %%mm4, %%mm7 \n\t"
+ "movq %%mm7, %0 \n\t"
+
+ /* filter p1 */
+ "movq (%1), %%mm3 \n\t" //p2
+ DIFF_GT2_MMX(%%mm1, %%mm3, %%mm5, %%mm6, %%mm4) // |p2-p0|>beta-1
+ "pand %%mm7, %%mm6 \n\t" // mask & |p2-p0|<beta
+ "pand 8+%0, %%mm7 \n\t" // mask & tc0
+ "movq %%mm7, %%mm4 \n\t"
+ "psubb %%mm6, %%mm7 \n\t"
+ "pand %%mm4, %%mm6 \n\t" // mask & |p2-p0|<beta & tc0
+ H264_DEBLOCK_Q1(%%mm0, %%mm3, "(%1)", "(%1,%3)", %%mm6, %%mm4)
+
+ /* filter q1 */
+ "movq (%2,%3,2), %%mm4 \n\t" //q2
+ DIFF_GT2_MMX(%%mm2, %%mm4, %%mm5, %%mm6, %%mm3) // |q2-q0|>beta-1
+ "pand %0, %%mm6 \n\t"
+ "movq 8+%0, %%mm5 \n\t" // can be merged with the and below but is slower then
+ "pand %%mm6, %%mm5 \n\t"
+ "psubb %%mm6, %%mm7 \n\t"
+ "movq (%2,%3), %%mm3 \n\t"
+ H264_DEBLOCK_Q1(%%mm3, %%mm4, "(%2,%3,2)", "(%2,%3)", %%mm5, %%mm6)
+
+ /* filter p0, q0 */
+ H264_DEBLOCK_P0_Q0(%8, unused)
+ "movq %%mm1, (%1,%3,2) \n\t"
+ "movq %%mm2, (%2) \n\t"
+
+ : "=m"(*tmp0)
+ : "r"(pix-3*stride), "r"(pix), "r"((x86_reg)stride),
+ "m"(*tmp0/*unused*/), "m"(*(uint32_t*)tc0), "m"(alpha1), "m"(beta1),
+ "m"(ff_bone)
+ );
+}
+
+static void h264_v_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ if((tc0[0] & tc0[1]) >= 0)
+ h264_loop_filter_luma_mmx2(pix, stride, alpha-1, beta-1, tc0);
+ if((tc0[2] & tc0[3]) >= 0)
+ h264_loop_filter_luma_mmx2(pix+8, stride, alpha-1, beta-1, tc0+2);
+}
+static void h264_h_loop_filter_luma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ //FIXME: could cut some load/stores by merging transpose with filter
+ // also, it only needs to transpose 6x8
+ DECLARE_ALIGNED_8(uint8_t, trans[8*8]);
+ int i;
+ for(i=0; i<2; i++, pix+=8*stride, tc0+=2) {
+ if((tc0[0] & tc0[1]) < 0)
+ continue;
+ transpose4x4(trans, pix-4, 8, stride);
+ transpose4x4(trans +4*8, pix, 8, stride);
+ transpose4x4(trans+4, pix-4+4*stride, 8, stride);
+ transpose4x4(trans+4+4*8, pix +4*stride, 8, stride);
+ h264_loop_filter_luma_mmx2(trans+4*8, 8, alpha-1, beta-1, tc0);
+ transpose4x4(pix-2, trans +2*8, stride, 8);
+ transpose4x4(pix-2+4*stride, trans+4+2*8, stride, 8);
+ }
+}
+
+static inline void h264_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha1, int beta1, int8_t *tc0)
+{
+ __asm__ volatile(
+ "movq (%0), %%mm0 \n\t" //p1
+ "movq (%0,%2), %%mm1 \n\t" //p0
+ "movq (%1), %%mm2 \n\t" //q0
+ "movq (%1,%2), %%mm3 \n\t" //q1
+ H264_DEBLOCK_MASK(%4, %5)
+ "movd %3, %%mm6 \n\t"
+ "punpcklbw %%mm6, %%mm6 \n\t"
+ "pand %%mm6, %%mm7 \n\t" // mm7 = tc&mask
+ H264_DEBLOCK_P0_Q0(%6, %7)
+ "movq %%mm1, (%0,%2) \n\t"
+ "movq %%mm2, (%1) \n\t"
+
+ :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
+ "r"(*(uint32_t*)tc0),
+ "m"(alpha1), "m"(beta1), "m"(ff_bone), "m"(ff_pb_3F)
+ );
+}
+
+static void h264_v_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ h264_loop_filter_chroma_mmx2(pix, stride, alpha-1, beta-1, tc0);
+}
+
+static void h264_h_loop_filter_chroma_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
+{
+ //FIXME: could cut some load/stores by merging transpose with filter
+ DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
+ transpose4x4(trans, pix-2, 8, stride);
+ transpose4x4(trans+4, pix-2+4*stride, 8, stride);
+ h264_loop_filter_chroma_mmx2(trans+2*8, 8, alpha-1, beta-1, tc0);
+ transpose4x4(pix-2, trans, stride, 8);
+ transpose4x4(pix-2+4*stride, trans+4, stride, 8);
+}
+
+// p0 = (p0 + q1 + 2*p1 + 2) >> 2
+#define H264_FILTER_CHROMA4(p0, p1, q1, one) \
+ "movq "#p0", %%mm4 \n\t"\
+ "pxor "#q1", %%mm4 \n\t"\
+ "pand "#one", %%mm4 \n\t" /* mm4 = (p0^q1)&1 */\
+ "pavgb "#q1", "#p0" \n\t"\
+ "psubusb %%mm4, "#p0" \n\t"\
+ "pavgb "#p1", "#p0" \n\t" /* dst = avg(p1, avg(p0,q1) - ((p0^q1)&1)) */\
+
+static inline void h264_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha1, int beta1)
+{
+ __asm__ volatile(
+ "movq (%0), %%mm0 \n\t"
+ "movq (%0,%2), %%mm1 \n\t"
+ "movq (%1), %%mm2 \n\t"
+ "movq (%1,%2), %%mm3 \n\t"
+ H264_DEBLOCK_MASK(%3, %4)
+ "movq %%mm1, %%mm5 \n\t"
+ "movq %%mm2, %%mm6 \n\t"
+ H264_FILTER_CHROMA4(%%mm1, %%mm0, %%mm3, %5) //p0'
+ H264_FILTER_CHROMA4(%%mm2, %%mm3, %%mm0, %5) //q0'
+ "psubb %%mm5, %%mm1 \n\t"
+ "psubb %%mm6, %%mm2 \n\t"
+ "pand %%mm7, %%mm1 \n\t"
+ "pand %%mm7, %%mm2 \n\t"
+ "paddb %%mm5, %%mm1 \n\t"
+ "paddb %%mm6, %%mm2 \n\t"
+ "movq %%mm1, (%0,%2) \n\t"
+ "movq %%mm2, (%1) \n\t"
+ :: "r"(pix-2*stride), "r"(pix), "r"((x86_reg)stride),
+ "m"(alpha1), "m"(beta1), "m"(ff_bone)
+ );
+}
+
+static void h264_v_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
+{
+ h264_loop_filter_chroma_intra_mmx2(pix, stride, alpha-1, beta-1);
+}
+
+static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int alpha, int beta)
+{
+ //FIXME: could cut some load/stores by merging transpose with filter
+ DECLARE_ALIGNED_8(uint8_t, trans[8*4]);
+ transpose4x4(trans, pix-2, 8, stride);
+ transpose4x4(trans+4, pix-2+4*stride, 8, stride);
+ h264_loop_filter_chroma_intra_mmx2(trans+2*8, 8, alpha-1, beta-1);
+ transpose4x4(pix-2, trans, stride, 8);
+ transpose4x4(pix-2+4*stride, trans+4, stride, 8);
+}
+
+static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
+ int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
+ int dir;
+ __asm__ volatile(
+ "pxor %%mm7, %%mm7 \n\t"
+ "movq %0, %%mm6 \n\t"
+ "movq %1, %%mm5 \n\t"
+ "movq %2, %%mm4 \n\t"
+ ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7)
+ );
+ if(field)
+ __asm__ volatile(
+ "movq %0, %%mm5 \n\t"
+ "movq %1, %%mm4 \n\t"
+ ::"m"(ff_pb_3_1), "m"(ff_pb_7_3)
+ );
+
+ // could do a special case for dir==0 && edges==1, but it only reduces the
+ // average filter time by 1.2%
+ for( dir=1; dir>=0; dir-- ) {
+ const int d_idx = dir ? -8 : -1;
+ const int mask_mv = dir ? mask_mv1 : mask_mv0;
+ DECLARE_ALIGNED_8(const uint64_t, mask_dir) = dir ? 0 : 0xffffffffffffffffULL;
+ int b_idx, edge, l;
+ for( b_idx=12, edge=0; edge<edges; edge+=step, b_idx+=8*step ) {
+ __asm__ volatile(
+ "pand %0, %%mm0 \n\t"
+ ::"m"(mask_dir)
+ );
+ if(!(mask_mv & edge)) {
+ __asm__ volatile("pxor %%mm0, %%mm0 \n\t":);
+ for( l = bidir; l >= 0; l-- ) {
+ __asm__ volatile(
+ "movd %0, %%mm1 \n\t"
+ "punpckldq %1, %%mm1 \n\t"
+ "movq %%mm1, %%mm2 \n\t"
+ "psrlw $7, %%mm2 \n\t"
+ "pand %%mm6, %%mm2 \n\t"
+ "por %%mm2, %%mm1 \n\t" // ref_cache with -2 mapped to -1
+ "punpckldq %%mm1, %%mm2 \n\t"
+ "pcmpeqb %%mm2, %%mm1 \n\t"
+ "paddb %%mm6, %%mm1 \n\t"
+ "punpckhbw %%mm7, %%mm1 \n\t" // ref[b] != ref[bn]
+ "por %%mm1, %%mm0 \n\t"
+
+ "movq %2, %%mm1 \n\t"
+ "movq %3, %%mm2 \n\t"
+ "psubw %4, %%mm1 \n\t"
+ "psubw %5, %%mm2 \n\t"
+ "packsswb %%mm2, %%mm1 \n\t"
+ "paddb %%mm5, %%mm1 \n\t"
+ "pminub %%mm4, %%mm1 \n\t"
+ "pcmpeqb %%mm4, %%mm1 \n\t" // abs(mv[b] - mv[bn]) >= limit
+ "por %%mm1, %%mm0 \n\t"
+ ::"m"(ref[l][b_idx]),
+ "m"(ref[l][b_idx+d_idx]),
+ "m"(mv[l][b_idx][0]),
+ "m"(mv[l][b_idx+2][0]),
+ "m"(mv[l][b_idx+d_idx][0]),
+ "m"(mv[l][b_idx+d_idx+2][0])
+ );
+ }
+ }
+ __asm__ volatile(
+ "movd %0, %%mm1 \n\t"
+ "por %1, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pcmpgtw %%mm7, %%mm1 \n\t" // nnz[b] || nnz[bn]
+ ::"m"(nnz[b_idx]),
+ "m"(nnz[b_idx+d_idx])
+ );
+ __asm__ volatile(
+ "pcmpeqw %%mm7, %%mm0 \n\t"
+ "pcmpeqw %%mm7, %%mm0 \n\t"
+ "psrlw $15, %%mm0 \n\t" // nonzero -> 1
+ "psrlw $14, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "por %%mm1, %%mm2 \n\t"
+ "psrlw $1, %%mm1 \n\t"
+ "pandn %%mm2, %%mm1 \n\t"
+ "movq %%mm1, %0 \n\t"
+ :"=m"(*bS[dir][edge])
+ ::"memory"
+ );
+ }
+ edges = 4;
+ step = 1;
+ }
+ __asm__ volatile(
+ "movq (%0), %%mm0 \n\t"
+ "movq 8(%0), %%mm1 \n\t"
+ "movq 16(%0), %%mm2 \n\t"
+ "movq 24(%0), %%mm3 \n\t"
+ TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4)
+ "movq %%mm0, (%0) \n\t"
+ "movq %%mm3, 8(%0) \n\t"
+ "movq %%mm4, 16(%0) \n\t"
+ "movq %%mm2, 24(%0) \n\t"
+ ::"r"(bS[0])
+ :"memory"
+ );
+}
+
+/***********************************/
+/* motion compensation */
+
+#define QPEL_H264V_MM(A,B,C,D,E,F,OP,T,Z,d,q)\
+ "mov"#q" "#C", "#T" \n\t"\
+ "mov"#d" (%0), "#F" \n\t"\
+ "paddw "#D", "#T" \n\t"\
+ "psllw $2, "#T" \n\t"\
+ "psubw "#B", "#T" \n\t"\
+ "psubw "#E", "#T" \n\t"\
+ "punpcklbw "#Z", "#F" \n\t"\
+ "pmullw %4, "#T" \n\t"\
+ "paddw %5, "#A" \n\t"\
+ "add %2, %0 \n\t"\
+ "paddw "#F", "#A" \n\t"\
+ "paddw "#A", "#T" \n\t"\
+ "psraw $5, "#T" \n\t"\
+ "packuswb "#T", "#T" \n\t"\
+ OP(T, (%1), A, d)\
+ "add %3, %1 \n\t"
+
+#define QPEL_H264HV_MM(A,B,C,D,E,F,OF,T,Z,d,q)\
+ "mov"#q" "#C", "#T" \n\t"\
+ "mov"#d" (%0), "#F" \n\t"\
+ "paddw "#D", "#T" \n\t"\
+ "psllw $2, "#T" \n\t"\
+ "paddw %4, "#A" \n\t"\
+ "psubw "#B", "#T" \n\t"\
+ "psubw "#E", "#T" \n\t"\
+ "punpcklbw "#Z", "#F" \n\t"\
+ "pmullw %3, "#T" \n\t"\
+ "paddw "#F", "#A" \n\t"\
+ "add %2, %0 \n\t"\
+ "paddw "#A", "#T" \n\t"\
+ "mov"#q" "#T", "#OF"(%1) \n\t"
+
+#define QPEL_H264V(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%mm6,%%mm7,d,q)
+#define QPEL_H264HV(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%mm6,%%mm7,d,q)
+#define QPEL_H264V_XMM(A,B,C,D,E,F,OP) QPEL_H264V_MM(A,B,C,D,E,F,OP,%%xmm6,%%xmm7,q,dqa)
+#define QPEL_H264HV_XMM(A,B,C,D,E,F,OF) QPEL_H264HV_MM(A,B,C,D,E,F,OF,%%xmm6,%%xmm7,q,dqa)
+
+
+#define QPEL_H264(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=4;\
+\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %5, %%mm4 \n\t"\
+ "movq %6, %%mm5 \n\t"\
+ "1: \n\t"\
+ "movd -1(%0), %%mm1 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "movd 1(%0), %%mm3 \n\t"\
+ "movd 2(%0), %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "movd -2(%0), %%mm0 \n\t"\
+ "movd 3(%0), %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm3, %%mm0 \n\t"\
+ "psllw $2, %%mm2 \n\t"\
+ "psubw %%mm1, %%mm2 \n\t"\
+ "pmullw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm0 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm6, d)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}\
+static av_noinline void OPNAME ## h264_qpel4_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=4;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %0, %%mm4 \n\t"\
+ "movq %1, %%mm5 \n\t"\
+ :: "m"(ff_pw_5), "m"(ff_pw_16)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "movd -1(%0), %%mm1 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "movd 1(%0), %%mm3 \n\t"\
+ "movd 2(%0), %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "paddw %%mm0, %%mm1 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "movd -2(%0), %%mm0 \n\t"\
+ "movd 3(%0), %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm3, %%mm0 \n\t"\
+ "psllw $2, %%mm2 \n\t"\
+ "psubw %%mm1, %%mm2 \n\t"\
+ "pmullw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm0 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "movd (%2), %%mm3 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ PAVGB" %%mm3, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm6, d)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+static av_noinline void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ src -= 2*srcStride;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}\
+static av_noinline void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ int h=4;\
+ int w=3;\
+ src -= 2*srcStride+2;\
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
+ \
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ tmp += 4;\
+ src += 4 - 9*srcStride;\
+ }\
+ tmp -= 3*4;\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "paddw 10(%0), %%mm0 \n\t"\
+ "movq 2(%0), %%mm1 \n\t"\
+ "paddw 8(%0), %%mm1 \n\t"\
+ "movq 4(%0), %%mm2 \n\t"\
+ "paddw 6(%0), %%mm2 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"/*a-b (abccba)*/\
+ "psraw $2, %%mm0 \n\t"/*(a-b)/4 */\
+ "psubw %%mm1, %%mm0 \n\t"/*(a-b)/4-b */\
+ "paddsw %%mm2, %%mm0 \n\t"\
+ "psraw $2, %%mm0 \n\t"/*((a-b)/4-b+c)/4 */\
+ "paddw %%mm2, %%mm0 \n\t"/*(a-5*b+20*c)/16 */\
+ "psraw $6, %%mm0 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm7, d)\
+ "add $24, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %5, %%mm6 \n\t"\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 1(%0), %%mm2 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "psllw $2, %%mm0 \n\t"\
+ "psllw $2, %%mm1 \n\t"\
+ "movq -1(%0), %%mm2 \n\t"\
+ "movq 2(%0), %%mm4 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm4, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm5, %%mm1 \n\t"\
+ "pmullw %%mm6, %%mm0 \n\t"\
+ "pmullw %%mm6, %%mm1 \n\t"\
+ "movd -2(%0), %%mm2 \n\t"\
+ "movd 7(%0), %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "movq %6, %%mm5 \n\t"\
+ "paddw %%mm5, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm4, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm5, q)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movq %0, %%mm6 \n\t"\
+ :: "m"(ff_pw_5)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 1(%0), %%mm2 \n\t"\
+ "movq %%mm0, %%mm1 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpckhbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "psllw $2, %%mm0 \n\t"\
+ "psllw $2, %%mm1 \n\t"\
+ "movq -1(%0), %%mm2 \n\t"\
+ "movq 2(%0), %%mm4 \n\t"\
+ "movq %%mm2, %%mm3 \n\t"\
+ "movq %%mm4, %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpckhbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ "punpckhbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm4, %%mm2 \n\t"\
+ "paddw %%mm3, %%mm5 \n\t"\
+ "psubw %%mm2, %%mm0 \n\t"\
+ "psubw %%mm5, %%mm1 \n\t"\
+ "pmullw %%mm6, %%mm0 \n\t"\
+ "pmullw %%mm6, %%mm1 \n\t"\
+ "movd -2(%0), %%mm2 \n\t"\
+ "movd 7(%0), %%mm5 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm5 \n\t"\
+ "paddw %%mm3, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "movq %5, %%mm5 \n\t"\
+ "paddw %%mm5, %%mm2 \n\t"\
+ "paddw %%mm5, %%mm4 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm4, %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "movq (%2), %%mm4 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ PAVGB" %%mm4, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm5, q)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ int w= 2;\
+ src -= 2*srcStride;\
+ \
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(h==16){\
+ __asm__ volatile(\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ QPEL_H264V(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
+ QPEL_H264V(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP)\
+ QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
+ QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
+ QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
+ QPEL_H264V(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+ src += 4-(h+5)*srcStride;\
+ dst += 4-h*dstStride;\
+ }\
+}\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv1_lowpass_ ## MMX(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){\
+ int w = (size+8)>>2;\
+ src -= 2*srcStride+2;\
+ while(w--){\
+ __asm__ volatile(\
+ "pxor %%mm7, %%mm7 \n\t"\
+ "movd (%0), %%mm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movd (%0), %%mm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%mm7, %%mm0 \n\t"\
+ "punpcklbw %%mm7, %%mm1 \n\t"\
+ "punpcklbw %%mm7, %%mm2 \n\t"\
+ "punpcklbw %%mm7, %%mm3 \n\t"\
+ "punpcklbw %%mm7, %%mm4 \n\t"\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*48)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*48)\
+ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 4*48)\
+ QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 5*48)\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(size==16){\
+ __asm__ volatile(\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 8*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 9*48)\
+ QPEL_H264HV(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, 10*48)\
+ QPEL_H264HV(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, 11*48)\
+ QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 12*48)\
+ QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 13*48)\
+ QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
+ QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
+ : "+a"(src)\
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+ tmp += 4;\
+ src += 4 - (size+5)*srcStride;\
+ }\
+}\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
+ int w = size>>4;\
+ do{\
+ int h = size;\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movq (%0), %%mm0 \n\t"\
+ "movq 8(%0), %%mm3 \n\t"\
+ "movq 2(%0), %%mm1 \n\t"\
+ "movq 10(%0), %%mm4 \n\t"\
+ "paddw %%mm4, %%mm0 \n\t"\
+ "paddw %%mm3, %%mm1 \n\t"\
+ "paddw 18(%0), %%mm3 \n\t"\
+ "paddw 16(%0), %%mm4 \n\t"\
+ "movq 4(%0), %%mm2 \n\t"\
+ "movq 12(%0), %%mm5 \n\t"\
+ "paddw 6(%0), %%mm2 \n\t"\
+ "paddw 14(%0), %%mm5 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"\
+ "psubw %%mm4, %%mm3 \n\t"\
+ "psraw $2, %%mm0 \n\t"\
+ "psraw $2, %%mm3 \n\t"\
+ "psubw %%mm1, %%mm0 \n\t"\
+ "psubw %%mm4, %%mm3 \n\t"\
+ "paddsw %%mm2, %%mm0 \n\t"\
+ "paddsw %%mm5, %%mm3 \n\t"\
+ "psraw $2, %%mm0 \n\t"\
+ "psraw $2, %%mm3 \n\t"\
+ "paddw %%mm2, %%mm0 \n\t"\
+ "paddw %%mm5, %%mm3 \n\t"\
+ "psraw $6, %%mm0 \n\t"\
+ "psraw $6, %%mm3 \n\t"\
+ "packuswb %%mm3, %%mm0 \n\t"\
+ OP(%%mm0, (%1),%%mm7, q)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ tmp += 8 - size*24;\
+ dst += 8 - size*dstStride;\
+ }while(w--);\
+}\
+\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}\
+\
+static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+ src += 8*dstStride;\
+ dst += 8*dstStride;\
+ src2 += 8*src2Stride;\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+}\
+\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+ put_h264_qpel8or16_hv1_lowpass_ ## MMX(tmp, src, tmpStride, srcStride, size);\
+ OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
+}\
+static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 8);\
+}\
+\
+static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst , tmp , src , dstStride, tmpStride, srcStride, 16);\
+}\
+\
+static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ __asm__ volatile(\
+ "movq (%1), %%mm0 \n\t"\
+ "movq 24(%1), %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ "packuswb %%mm1, %%mm1 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm1 \n\t"\
+ OP(%%mm0, (%2), %%mm4, d)\
+ OP(%%mm1, (%2,%4), %%mm5, d)\
+ "lea (%0,%3,2), %0 \n\t"\
+ "lea (%2,%4,2), %2 \n\t"\
+ "movq 48(%1), %%mm0 \n\t"\
+ "movq 72(%1), %%mm1 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "packuswb %%mm0, %%mm0 \n\t"\
+ "packuswb %%mm1, %%mm1 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm1 \n\t"\
+ OP(%%mm0, (%2), %%mm4, d)\
+ OP(%%mm1, (%2,%4), %%mm5, d)\
+ :"+a"(src8), "+c"(src16), "+d"(dst)\
+ :"S"((x86_reg)src8Stride), "D"((x86_reg)dstStride)\
+ :"memory");\
+}\
+static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ do{\
+ __asm__ volatile(\
+ "movq (%1), %%mm0 \n\t"\
+ "movq 8(%1), %%mm1 \n\t"\
+ "movq 48(%1), %%mm2 \n\t"\
+ "movq 8+48(%1), %%mm3 \n\t"\
+ "psraw $5, %%mm0 \n\t"\
+ "psraw $5, %%mm1 \n\t"\
+ "psraw $5, %%mm2 \n\t"\
+ "psraw $5, %%mm3 \n\t"\
+ "packuswb %%mm1, %%mm0 \n\t"\
+ "packuswb %%mm3, %%mm2 \n\t"\
+ PAVGB" (%0), %%mm0 \n\t"\
+ PAVGB" (%0,%3), %%mm2 \n\t"\
+ OP(%%mm0, (%2), %%mm5, q)\
+ OP(%%mm2, (%2,%4), %%mm5, q)\
+ ::"a"(src8), "c"(src16), "d"(dst),\
+ "r"((x86_reg)src8Stride), "r"((x86_reg)dstStride)\
+ :"memory");\
+ src8 += 2L*src8Stride;\
+ src16 += 48;\
+ dst += 2L*dstStride;\
+ }while(h-=2);\
+}\
+static void OPNAME ## pixels16_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
+{\
+ OPNAME ## pixels8_l2_shift5_ ## MMX(dst , src16 , src8 , dstStride, src8Stride, h);\
+ OPNAME ## pixels8_l2_shift5_ ## MMX(dst+8, src16+8, src8+8, dstStride, src8Stride, h);\
+}\
+
+
+#ifdef ARCH_X86_64
+#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=16;\
+ __asm__ volatile(\
+ "pxor %%xmm15, %%xmm15 \n\t"\
+ "movdqa %6, %%xmm14 \n\t"\
+ "movdqa %7, %%xmm13 \n\t"\
+ "1: \n\t"\
+ "lddqu 3(%0), %%xmm1 \n\t"\
+ "lddqu -5(%0), %%xmm7 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm15, %%xmm1 \n\t"\
+ "punpcklbw %%xmm15, %%xmm0 \n\t"\
+ "punpcklbw %%xmm15, %%xmm7 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm0, %%xmm6 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm0, %%xmm8 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm0, %%xmm9 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "movdqa %%xmm0, %%xmm10 \n\t"\
+ "palignr $6, %%xmm0, %%xmm5 \n\t"\
+ "palignr $6, %%xmm7, %%xmm10\n\t"\
+ "palignr $8, %%xmm0, %%xmm4 \n\t"\
+ "palignr $8, %%xmm7, %%xmm9 \n\t"\
+ "palignr $10,%%xmm0, %%xmm3 \n\t"\
+ "palignr $10,%%xmm7, %%xmm8 \n\t"\
+ "paddw %%xmm1, %%xmm5 \n\t"\
+ "paddw %%xmm0, %%xmm10 \n\t"\
+ "palignr $12,%%xmm0, %%xmm2 \n\t"\
+ "palignr $12,%%xmm7, %%xmm6 \n\t"\
+ "palignr $14,%%xmm0, %%xmm1 \n\t"\
+ "palignr $14,%%xmm7, %%xmm0 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm8, %%xmm6 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm9, %%xmm0 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "psllw $2, %%xmm6 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "psubw %%xmm0, %%xmm6 \n\t"\
+ "paddw %%xmm13,%%xmm5 \n\t"\
+ "paddw %%xmm13,%%xmm10 \n\t"\
+ "pmullw %%xmm14,%%xmm2 \n\t"\
+ "pmullw %%xmm14,%%xmm6 \n\t"\
+ "lddqu (%2), %%xmm3 \n\t"\
+ "paddw %%xmm5, %%xmm2 \n\t"\
+ "paddw %%xmm10,%%xmm6 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "psraw $5, %%xmm6 \n\t"\
+ "packuswb %%xmm2,%%xmm6 \n\t"\
+ "pavgb %%xmm3, %%xmm6 \n\t"\
+ OP(%%xmm6, (%1), %%xmm4, dqa)\
+ "add %5, %0 \n\t"\
+ "add %5, %1 \n\t"\
+ "add %4, %2 \n\t"\
+ "decl %3 \n\t"\
+ "jg 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2), "+g"(h)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}
+#else // ARCH_X86_64
+#define QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel16_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+ src += 8*dstStride;\
+ dst += 8*dstStride;\
+ src2 += 8*src2Stride;\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst , src , src2 , dstStride, src2Stride);\
+ OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(dst+8, src+8, src2+8, dstStride, src2Stride);\
+}
+#endif // ARCH_X86_64
+
+#define QPEL_H264_H_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_l2_ ## MMX(uint8_t *dst, uint8_t *src, uint8_t *src2, int dstStride, int src2Stride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movdqa %0, %%xmm6 \n\t"\
+ :: "m"(ff_pw_5)\
+ );\
+ do{\
+ __asm__ volatile(\
+ "lddqu -5(%0), %%xmm1 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $6, %%xmm0, %%xmm5 \n\t"\
+ "palignr $8, %%xmm0, %%xmm4 \n\t"\
+ "palignr $10,%%xmm0, %%xmm3 \n\t"\
+ "paddw %%xmm1, %%xmm5 \n\t"\
+ "palignr $12,%%xmm0, %%xmm2 \n\t"\
+ "palignr $14,%%xmm0, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "movq (%2), %%xmm3 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "paddw %5, %%xmm5 \n\t"\
+ "pmullw %%xmm6, %%xmm2 \n\t"\
+ "paddw %%xmm5, %%xmm2 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "packuswb %%xmm2, %%xmm2 \n\t"\
+ "pavgb %%xmm3, %%xmm2 \n\t"\
+ OP(%%xmm2, (%1), %%xmm4, q)\
+ "add %4, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "add %3, %2 \n\t"\
+ : "+a"(src), "+c"(dst), "+d"(src2)\
+ : "D"((x86_reg)src2Stride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }while(--h);\
+}\
+QPEL_H264_H16_XMM(OPNAME, OP, MMX)\
+\
+static av_noinline void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ int h=8;\
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movdqa %5, %%xmm6 \n\t"\
+ "1: \n\t"\
+ "lddqu -5(%0), %%xmm1 \n\t"\
+ "movdqa %%xmm1, %%xmm0 \n\t"\
+ "punpckhbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $6, %%xmm0, %%xmm5 \n\t"\
+ "palignr $8, %%xmm0, %%xmm4 \n\t"\
+ "palignr $10,%%xmm0, %%xmm3 \n\t"\
+ "paddw %%xmm1, %%xmm5 \n\t"\
+ "palignr $12,%%xmm0, %%xmm2 \n\t"\
+ "palignr $14,%%xmm0, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "psllw $2, %%xmm2 \n\t"\
+ "psubw %%xmm1, %%xmm2 \n\t"\
+ "paddw %6, %%xmm5 \n\t"\
+ "pmullw %%xmm6, %%xmm2 \n\t"\
+ "paddw %%xmm5, %%xmm2 \n\t"\
+ "psraw $5, %%xmm2 \n\t"\
+ "packuswb %%xmm2, %%xmm2 \n\t"\
+ OP(%%xmm2, (%1), %%xmm4, q)\
+ "add %3, %0 \n\t"\
+ "add %4, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(src), "+c"(dst), "+g"(h)\
+ : "D"((x86_reg)srcStride), "S"((x86_reg)dstStride),\
+ "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+}\
+static void OPNAME ## h264_qpel16_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+ src += 8*srcStride;\
+ dst += 8*dstStride;\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst , src , dstStride, srcStride);\
+ OPNAME ## h264_qpel8_h_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride);\
+}\
+
+#define QPEL_H264_V_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
+ src -= 2*srcStride;\
+ \
+ __asm__ volatile(\
+ "pxor %%xmm7, %%xmm7 \n\t"\
+ "movq (%0), %%xmm0 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm1 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm2 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm3 \n\t"\
+ "add %2, %0 \n\t"\
+ "movq (%0), %%xmm4 \n\t"\
+ "add %2, %0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm0 \n\t"\
+ "punpcklbw %%xmm7, %%xmm1 \n\t"\
+ "punpcklbw %%xmm7, %%xmm2 \n\t"\
+ "punpcklbw %%xmm7, %%xmm3 \n\t"\
+ "punpcklbw %%xmm7, %%xmm4 \n\t"\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
+ QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ if(h==16){\
+ __asm__ volatile(\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ QPEL_H264V_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, OP)\
+ QPEL_H264V_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, OP)\
+ QPEL_H264V_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, OP)\
+ QPEL_H264V_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, OP)\
+ QPEL_H264V_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, OP)\
+ QPEL_H264V_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, OP)\
+ \
+ : "+a"(src), "+c"(dst)\
+ : "S"((x86_reg)srcStride), "D"((x86_reg)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
+ : "memory"\
+ );\
+ }\
+}\
+static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 8);\
+}\
+static av_noinline void OPNAME ## h264_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst , src , dstStride, srcStride, 16);\
+ OPNAME ## h264_qpel8or16_v_lowpass_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
+}
+
+static av_always_inline void put_h264_qpel8or16_hv1_lowpass_sse2(int16_t *tmp, uint8_t *src, int tmpStride, int srcStride, int size){
+ int w = (size+8)>>3;
+ src -= 2*srcStride+2;
+ while(w--){
+ __asm__ volatile(
+ "pxor %%xmm7, %%xmm7 \n\t"
+ "movq (%0), %%xmm0 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm1 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm2 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm3 \n\t"
+ "add %2, %0 \n\t"
+ "movq (%0), %%xmm4 \n\t"
+ "add %2, %0 \n\t"
+ "punpcklbw %%xmm7, %%xmm0 \n\t"
+ "punpcklbw %%xmm7, %%xmm1 \n\t"
+ "punpcklbw %%xmm7, %%xmm2 \n\t"
+ "punpcklbw %%xmm7, %%xmm3 \n\t"
+ "punpcklbw %%xmm7, %%xmm4 \n\t"
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 0*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 1*48)
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 2*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 3*48)
+ QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 4*48)
+ QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 5*48)
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 6*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 7*48)
+ : "+a"(src)
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "memory"
+ );
+ if(size==16){
+ __asm__ volatile(
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 8*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 9*48)
+ QPEL_H264HV_XMM(%%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, 10*48)
+ QPEL_H264HV_XMM(%%xmm5, %%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, 11*48)
+ QPEL_H264HV_XMM(%%xmm0, %%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, 12*48)
+ QPEL_H264HV_XMM(%%xmm1, %%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, 13*48)
+ QPEL_H264HV_XMM(%%xmm2, %%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, 14*48)
+ QPEL_H264HV_XMM(%%xmm3, %%xmm4, %%xmm5, %%xmm0, %%xmm1, %%xmm2, 15*48)
+ : "+a"(src)
+ : "c"(tmp), "S"((x86_reg)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)
+ : "memory"
+ );
+ }
+ tmp += 8;
+ src += 8 - (size+5)*srcStride;
+ }
+}
+
+#define QPEL_H264_HV2_XMM(OPNAME, OP, MMX)\
+static av_always_inline void OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, int dstStride, int tmpStride, int size){\
+ int h = size;\
+ if(size == 16){\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movdqa 32(%0), %%xmm4 \n\t"\
+ "movdqa 16(%0), %%xmm5 \n\t"\
+ "movdqa (%0), %%xmm7 \n\t"\
+ "movdqa %%xmm4, %%xmm3 \n\t"\
+ "movdqa %%xmm4, %%xmm2 \n\t"\
+ "movdqa %%xmm4, %%xmm1 \n\t"\
+ "movdqa %%xmm4, %%xmm0 \n\t"\
+ "palignr $10, %%xmm5, %%xmm0 \n\t"\
+ "palignr $8, %%xmm5, %%xmm1 \n\t"\
+ "palignr $6, %%xmm5, %%xmm2 \n\t"\
+ "palignr $4, %%xmm5, %%xmm3 \n\t"\
+ "palignr $2, %%xmm5, %%xmm4 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "movdqa %%xmm5, %%xmm6 \n\t"\
+ "movdqa %%xmm5, %%xmm4 \n\t"\
+ "movdqa %%xmm5, %%xmm3 \n\t"\
+ "palignr $8, %%xmm7, %%xmm4 \n\t"\
+ "palignr $2, %%xmm7, %%xmm6 \n\t"\
+ "palignr $10, %%xmm7, %%xmm3 \n\t"\
+ "paddw %%xmm6, %%xmm4 \n\t"\
+ "movdqa %%xmm5, %%xmm6 \n\t"\
+ "palignr $6, %%xmm7, %%xmm5 \n\t"\
+ "palignr $4, %%xmm7, %%xmm6 \n\t"\
+ "paddw %%xmm7, %%xmm3 \n\t"\
+ "paddw %%xmm6, %%xmm5 \n\t"\
+ \
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psubw %%xmm4, %%xmm3 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm3 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psubw %%xmm4, %%xmm3 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "paddw %%xmm5, %%xmm3 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm3 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "paddw %%xmm5, %%xmm3 \n\t"\
+ "psraw $6, %%xmm0 \n\t"\
+ "psraw $6, %%xmm3 \n\t"\
+ "packuswb %%xmm0, %%xmm3 \n\t"\
+ OP(%%xmm3, (%1), %%xmm7, dqa)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }else{\
+ __asm__ volatile(\
+ "1: \n\t"\
+ "movdqa 16(%0), %%xmm1 \n\t"\
+ "movdqa (%0), %%xmm0 \n\t"\
+ "movdqa %%xmm1, %%xmm2 \n\t"\
+ "movdqa %%xmm1, %%xmm3 \n\t"\
+ "movdqa %%xmm1, %%xmm4 \n\t"\
+ "movdqa %%xmm1, %%xmm5 \n\t"\
+ "palignr $10, %%xmm0, %%xmm5 \n\t"\
+ "palignr $8, %%xmm0, %%xmm4 \n\t"\
+ "palignr $6, %%xmm0, %%xmm3 \n\t"\
+ "palignr $4, %%xmm0, %%xmm2 \n\t"\
+ "palignr $2, %%xmm0, %%xmm1 \n\t"\
+ "paddw %%xmm5, %%xmm0 \n\t"\
+ "paddw %%xmm4, %%xmm1 \n\t"\
+ "paddw %%xmm3, %%xmm2 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "psubw %%xmm1, %%xmm0 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "psraw $2, %%xmm0 \n\t"\
+ "paddw %%xmm2, %%xmm0 \n\t"\
+ "psraw $6, %%xmm0 \n\t"\
+ "packuswb %%xmm0, %%xmm0 \n\t"\
+ OP(%%xmm0, (%1), %%xmm7, q)\
+ "add $48, %0 \n\t"\
+ "add %3, %1 \n\t"\
+ "decl %2 \n\t"\
+ " jnz 1b \n\t"\
+ : "+a"(tmp), "+c"(dst), "+g"(h)\
+ : "S"((x86_reg)dstStride)\
+ : "memory"\
+ );\
+ }\
+}
+
+#define QPEL_H264_HV_XMM(OPNAME, OP, MMX)\
+static av_noinline void OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride, int size){\
+ put_h264_qpel8or16_hv1_lowpass_sse2(tmp, src, tmpStride, srcStride, size);\
+ OPNAME ## h264_qpel8or16_hv2_lowpass_ ## MMX(dst, tmp, dstStride, tmpStride, size);\
+}\
+static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 8);\
+}\
+static void OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+ OPNAME ## h264_qpel8or16_hv_lowpass_ ## MMX(dst, tmp, src, dstStride, tmpStride, srcStride, 16);\
+}\
+
+#define put_pixels8_l2_sse2 put_pixels8_l2_mmx2
+#define avg_pixels8_l2_sse2 avg_pixels8_l2_mmx2
+#define put_pixels16_l2_sse2 put_pixels16_l2_mmx2
+#define avg_pixels16_l2_sse2 avg_pixels16_l2_mmx2
+#define put_pixels8_l2_ssse3 put_pixels8_l2_mmx2
+#define avg_pixels8_l2_ssse3 avg_pixels8_l2_mmx2
+#define put_pixels16_l2_ssse3 put_pixels16_l2_mmx2
+#define avg_pixels16_l2_ssse3 avg_pixels16_l2_mmx2
+
+#define put_pixels8_l2_shift5_sse2 put_pixels8_l2_shift5_mmx2
+#define avg_pixels8_l2_shift5_sse2 avg_pixels8_l2_shift5_mmx2
+#define put_pixels16_l2_shift5_sse2 put_pixels16_l2_shift5_mmx2
+#define avg_pixels16_l2_shift5_sse2 avg_pixels16_l2_shift5_mmx2
+#define put_pixels8_l2_shift5_ssse3 put_pixels8_l2_shift5_mmx2
+#define avg_pixels8_l2_shift5_ssse3 avg_pixels8_l2_shift5_mmx2
+#define put_pixels16_l2_shift5_ssse3 put_pixels16_l2_shift5_mmx2
+#define avg_pixels16_l2_shift5_ssse3 avg_pixels16_l2_shift5_mmx2
+
+#define put_h264_qpel8_h_lowpass_l2_sse2 put_h264_qpel8_h_lowpass_l2_mmx2
+#define avg_h264_qpel8_h_lowpass_l2_sse2 avg_h264_qpel8_h_lowpass_l2_mmx2
+#define put_h264_qpel16_h_lowpass_l2_sse2 put_h264_qpel16_h_lowpass_l2_mmx2
+#define avg_h264_qpel16_h_lowpass_l2_sse2 avg_h264_qpel16_h_lowpass_l2_mmx2
+
+#define put_h264_qpel8_v_lowpass_ssse3 put_h264_qpel8_v_lowpass_sse2
+#define avg_h264_qpel8_v_lowpass_ssse3 avg_h264_qpel8_v_lowpass_sse2
+#define put_h264_qpel16_v_lowpass_ssse3 put_h264_qpel16_v_lowpass_sse2
+#define avg_h264_qpel16_v_lowpass_ssse3 avg_h264_qpel16_v_lowpass_sse2
+
+#define put_h264_qpel8or16_hv2_lowpass_sse2 put_h264_qpel8or16_hv2_lowpass_mmx2
+#define avg_h264_qpel8or16_hv2_lowpass_sse2 avg_h264_qpel8or16_hv2_lowpass_mmx2
+
+#define H264_MC(OPNAME, SIZE, MMX, ALIGN) \
+H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\
+H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\
+
+static void put_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
+ put_pixels16_sse2(dst, src, stride, 16);
+}
+static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, uint8_t *src, int stride){
+ avg_pixels16_sse2(dst, src, stride, 16);
+}
+#define put_h264_qpel8_mc00_sse2 put_h264_qpel8_mc00_mmx2
+#define avg_h264_qpel8_mc00_sse2 avg_h264_qpel8_mc00_mmx2
+
+#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\
+}\
+
+#define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, src+1, stride, stride);\
+}\
+
+#define H264_MC_V(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src, temp, stride, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## MMX(dst, src, stride, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_ ## MMX(dst, src+stride, temp, stride, stride, SIZE);\
+}\
+
+#define H264_MC_HV(OPNAME, SIZE, MMX, ALIGN) \
+static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*SIZE]);\
+ put_h264_qpel ## SIZE ## _v_lowpass_ ## MMX(temp, src+1, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, temp, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint16_t, temp[SIZE*(SIZE<8?12:24)]);\
+ OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(dst, temp, src, stride, SIZE, stride);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## h264_qpel ## SIZE ## _h_lowpass_l2_ ## MMX(dst, src+stride, halfHV, stride, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+2, halfHV, stride, SIZE, SIZE);\
+}\
+\
+static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\
+ DECLARE_ALIGNED(ALIGN, uint8_t, temp[SIZE*(SIZE<8?12:24)*2 + SIZE*SIZE]);\
+ uint8_t * const halfHV= temp;\
+ int16_t * const halfV= (int16_t*)(temp + SIZE*SIZE);\
+ assert(((int)temp & 7) == 0);\
+ put_h264_qpel ## SIZE ## _hv_lowpass_ ## MMX(halfHV, halfV, src, SIZE, SIZE, stride);\
+ OPNAME ## pixels ## SIZE ## _l2_shift5_ ## MMX(dst, halfV+3, halfHV, stride, SIZE, SIZE);\
+}\
+
+#define H264_MC_4816(MMX)\
+H264_MC(put_, 4, MMX, 8)\
+H264_MC(put_, 8, MMX, 8)\
+H264_MC(put_, 16,MMX, 8)\
+H264_MC(avg_, 4, MMX, 8)\
+H264_MC(avg_, 8, MMX, 8)\
+H264_MC(avg_, 16,MMX, 8)\
+
+#define H264_MC_816(QPEL, XMM)\
+QPEL(put_, 8, XMM, 16)\
+QPEL(put_, 16,XMM, 16)\
+QPEL(avg_, 8, XMM, 16)\
+QPEL(avg_, 16,XMM, 16)\
+
+
+#define AVG_3DNOW_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgusb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+#define AVG_MMX2_OP(a,b,temp, size) \
+"mov" #size " " #b ", " #temp " \n\t"\
+"pavgb " #temp ", " #a " \n\t"\
+"mov" #size " " #a ", " #b " \n\t"
+
+#define PAVGB "pavgusb"
+QPEL_H264(put_, PUT_OP, 3dnow)
+QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
+#undef PAVGB
+#define PAVGB "pavgb"
+QPEL_H264(put_, PUT_OP, mmx2)
+QPEL_H264(avg_, AVG_MMX2_OP, mmx2)
+QPEL_H264_V_XMM(put_, PUT_OP, sse2)
+QPEL_H264_V_XMM(avg_, AVG_MMX2_OP, sse2)
+QPEL_H264_HV_XMM(put_, PUT_OP, sse2)
+QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, sse2)
+#ifdef HAVE_SSSE3
+QPEL_H264_H_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_H_XMM(avg_, AVG_MMX2_OP, ssse3)
+QPEL_H264_HV2_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_HV2_XMM(avg_, AVG_MMX2_OP, ssse3)
+QPEL_H264_HV_XMM(put_, PUT_OP, ssse3)
+QPEL_H264_HV_XMM(avg_, AVG_MMX2_OP, ssse3)
+#endif
+#undef PAVGB
+
+H264_MC_4816(3dnow)
+H264_MC_4816(mmx2)
+H264_MC_816(H264_MC_V, sse2)
+H264_MC_816(H264_MC_HV, sse2)
+#ifdef HAVE_SSSE3
+H264_MC_816(H264_MC_H, ssse3)
+H264_MC_816(H264_MC_HV, ssse3)
+#endif
+
+/* rnd interleaved with rnd div 8, use p+1 to access rnd div 8 */
+DECLARE_ALIGNED_8(static const uint64_t, h264_rnd_reg[4]) = {
+ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL
+};
+
+#define H264_CHROMA_OP(S,D)
+#define H264_CHROMA_OP4(S,D,T)
+#define H264_CHROMA_MC8_TMPL put_h264_chroma_generic_mc8_mmx
+#define H264_CHROMA_MC4_TMPL put_h264_chroma_generic_mc4_mmx
+#define H264_CHROMA_MC2_TMPL put_h264_chroma_mc2_mmx2
+#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
+#include "dsputil_h264_template_mmx.c"
+
+static void put_h264_chroma_mc8_mmx_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+static void put_h264_chroma_mc8_mmx_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, h264_rnd_reg+2);
+}
+static void put_h264_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC2_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+#define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
+ "pavgb " #T ", " #D " \n\t"
+#define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_mmx2
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_mmx2
+#define H264_CHROMA_MC2_TMPL avg_h264_chroma_mc2_mmx2
+#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
+#include "dsputil_h264_template_mmx.c"
+static void avg_h264_chroma_mc8_mmx2_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+static void avg_h264_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC2_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+#define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
+ "pavgusb " #T ", " #D " \n\t"
+#define H264_CHROMA_MC8_TMPL avg_h264_chroma_generic_mc8_3dnow
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_generic_mc4_3dnow
+#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
+#include "dsputil_h264_template_mmx.c"
+static void avg_h264_chroma_mc8_3dnow_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+static void avg_h264_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, h264_rnd_reg);
+}
+#undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
+
+#ifdef HAVE_SSSE3
+#define AVG_OP(X)
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_ssse3
+#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_ssse3
+#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
+#include "dsputil_h264_template_ssse3.c"
+static void put_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
+}
+static void put_h264_chroma_mc8_ssse3_nornd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 0);
+}
+
+#undef AVG_OP
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
+#define AVG_OP(X) X
+#define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_ssse3
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_ssse3
+#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
+#include "dsputil_h264_template_ssse3.c"
+static void avg_h264_chroma_mc8_ssse3_rnd(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_mc8_ssse3(dst, src, stride, h, x, y, 1);
+}
+#undef AVG_OP
+#undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
+#endif
+
+/***********************************/
+/* weighted prediction */
+
+static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h)
+{
+ int x, y;
+ offset <<= log2_denom;
+ offset += (1 << log2_denom) >> 1;
+ __asm__ volatile(
+ "movd %0, %%mm4 \n\t"
+ "movd %1, %%mm5 \n\t"
+ "movd %2, %%mm6 \n\t"
+ "pshufw $0, %%mm4, %%mm4 \n\t"
+ "pshufw $0, %%mm5, %%mm5 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ :: "g"(weight), "g"(offset), "g"(log2_denom)
+ );
+ for(y=0; y<h; y+=2){
+ for(x=0; x<w; x+=4){
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t"
+ "movd %1, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm4, %%mm0 \n\t"
+ "pmullw %%mm4, %%mm1 \n\t"
+ "paddsw %%mm5, %%mm0 \n\t"
+ "paddsw %%mm5, %%mm1 \n\t"
+ "psraw %%mm6, %%mm0 \n\t"
+ "psraw %%mm6, %%mm1 \n\t"
+ "packuswb %%mm7, %%mm0 \n\t"
+ "packuswb %%mm7, %%mm1 \n\t"
+ "movd %%mm0, %0 \n\t"
+ "movd %%mm1, %1 \n\t"
+ : "+m"(*(uint32_t*)(dst+x)),
+ "+m"(*(uint32_t*)(dst+x+stride))
+ );
+ }
+ dst += 2*stride;
+ }
+}
+
+static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset, int w, int h)
+{
+ int x, y;
+ offset = ((offset + 1) | 1) << log2_denom;
+ __asm__ volatile(
+ "movd %0, %%mm3 \n\t"
+ "movd %1, %%mm4 \n\t"
+ "movd %2, %%mm5 \n\t"
+ "movd %3, %%mm6 \n\t"
+ "pshufw $0, %%mm3, %%mm3 \n\t"
+ "pshufw $0, %%mm4, %%mm4 \n\t"
+ "pshufw $0, %%mm5, %%mm5 \n\t"
+ "pxor %%mm7, %%mm7 \n\t"
+ :: "g"(weightd), "g"(weights), "g"(offset), "g"(log2_denom+1)
+ );
+ for(y=0; y<h; y++){
+ for(x=0; x<w; x+=4){
+ __asm__ volatile(
+ "movd %0, %%mm0 \n\t"
+ "movd %1, %%mm1 \n\t"
+ "punpcklbw %%mm7, %%mm0 \n\t"
+ "punpcklbw %%mm7, %%mm1 \n\t"
+ "pmullw %%mm3, %%mm0 \n\t"
+ "pmullw %%mm4, %%mm1 \n\t"
+ "paddsw %%mm1, %%mm0 \n\t"
+ "paddsw %%mm5, %%mm0 \n\t"
+ "psraw %%mm6, %%mm0 \n\t"
+ "packuswb %%mm0, %%mm0 \n\t"
+ "movd %%mm0, %0 \n\t"
+ : "+m"(*(uint32_t*)(dst+x))
+ : "m"(*(uint32_t*)(src+x))
+ );
+ }
+ src += stride;
+ dst += stride;
+ }
+}
+
+#define H264_WEIGHT(W,H) \
+static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+ ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
+} \
+static void ff_h264_weight_ ## W ## x ## H ## _mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
+ ff_h264_weight_WxH_mmx2(dst, stride, log2_denom, weight, offset, W, H); \
+}
+
+H264_WEIGHT(16,16)
+H264_WEIGHT(16, 8)
+H264_WEIGHT( 8,16)
+H264_WEIGHT( 8, 8)
+H264_WEIGHT( 8, 4)
+H264_WEIGHT( 4, 8)
+H264_WEIGHT( 4, 4)
+H264_WEIGHT( 4, 2)
+
diff --git a/libavcodec/i386/idct_mmx.c b/libavcodec/x86/idct_mmx.c
similarity index 100%
rename from libavcodec/i386/idct_mmx.c
rename to libavcodec/x86/idct_mmx.c
diff --git a/libavcodec/i386/idct_mmx_xvid.c b/libavcodec/x86/idct_mmx_xvid.c
similarity index 100%
rename from libavcodec/i386/idct_mmx_xvid.c
rename to libavcodec/x86/idct_mmx_xvid.c
diff --git a/libavcodec/x86/idct_sse2_xvid.c b/libavcodec/x86/idct_sse2_xvid.c
new file mode 100644
index 0000000..d8711a2
--- /dev/null
+++ b/libavcodec/x86/idct_sse2_xvid.c
@@ -0,0 +1,394 @@
+/*
+ * XVID MPEG-4 VIDEO CODEC
+ * - SSE2 inverse discrete cosine transform -
+ *
+ * Copyright(C) 2003 Pascal Massimino <skal at planet-d.net>
+ *
+ * Conversion to gcc syntax with modifications
+ * by Alexander Strange <astrange at ithinksw.com>
+ *
+ * Originally from dct/x86_asm/fdct_sse2_skal.asm in Xvid.
+ *
+ * This file is part of FFmpeg.
+ *
+ * Vertical pass is an implementation of the scheme:
+ * Loeffler C., Ligtenberg A., and Moschytz C.S.:
+ * Practical Fast 1D DCT Algorithm with Eleven Multiplications,
+ * Proc. ICASSP 1989, 988-991.
+ *
+ * Horizontal pass is a double 4x4 vector/matrix multiplication,
+ * (see also Intel's Application Note 922:
+ * http://developer.intel.com/vtune/cbts/strmsimd/922down.htm
+ * Copyright (C) 1999 Intel Corporation)
+ *
+ * More details at http://skal.planet-d.net/coding/dct.html
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with FFmpeg; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavcodec/dsputil.h"
+#include "idct_xvid.h"
+
+/*!
+ * @file idct_sse2_xvid.c
+ * @brief SSE2 idct compatible with xvidmmx
+ */
+
+#define X8(x) x,x,x,x,x,x,x,x
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 6
+
+DECLARE_ASM_CONST(16, int16_t, tan1[]) = {X8(13036)}; // tan( pi/16)
+DECLARE_ASM_CONST(16, int16_t, tan2[]) = {X8(27146)}; // tan(2pi/16) = sqrt(2)-1
+DECLARE_ASM_CONST(16, int16_t, tan3[]) = {X8(43790)}; // tan(3pi/16)-1
+DECLARE_ASM_CONST(16, int16_t, sqrt2[])= {X8(23170)}; // 0.5/sqrt(2)
+DECLARE_ASM_CONST(8, uint8_t, m127[]) = {X8(127)};
+
+DECLARE_ASM_CONST(16, int16_t, iTab1[]) = {
+ 0x4000, 0x539f, 0xc000, 0xac61, 0x4000, 0xdd5d, 0x4000, 0xdd5d,
+ 0x4000, 0x22a3, 0x4000, 0x22a3, 0xc000, 0x539f, 0x4000, 0xac61,
+ 0x3249, 0x11a8, 0x4b42, 0xee58, 0x11a8, 0x4b42, 0x11a8, 0xcdb7,
+ 0x58c5, 0x4b42, 0xa73b, 0xcdb7, 0x3249, 0xa73b, 0x4b42, 0xa73b
+};
+
+DECLARE_ASM_CONST(16, int16_t, iTab2[]) = {
+ 0x58c5, 0x73fc, 0xa73b, 0x8c04, 0x58c5, 0xcff5, 0x58c5, 0xcff5,
+ 0x58c5, 0x300b, 0x58c5, 0x300b, 0xa73b, 0x73fc, 0x58c5, 0x8c04,
+ 0x45bf, 0x187e, 0x6862, 0xe782, 0x187e, 0x6862, 0x187e, 0xba41,
+ 0x7b21, 0x6862, 0x84df, 0xba41, 0x45bf, 0x84df, 0x6862, 0x84df
+};
+
+DECLARE_ASM_CONST(16, int16_t, iTab3[]) = {
+ 0x539f, 0x6d41, 0xac61, 0x92bf, 0x539f, 0xd2bf, 0x539f, 0xd2bf,
+ 0x539f, 0x2d41, 0x539f, 0x2d41, 0xac61, 0x6d41, 0x539f, 0x92bf,
+ 0x41b3, 0x1712, 0x6254, 0xe8ee, 0x1712, 0x6254, 0x1712, 0xbe4d,
+ 0x73fc, 0x6254, 0x8c04, 0xbe4d, 0x41b3, 0x8c04, 0x6254, 0x8c04
+};
+
+DECLARE_ASM_CONST(16, int16_t, iTab4[]) = {
+ 0x4b42, 0x6254, 0xb4be, 0x9dac, 0x4b42, 0xd746, 0x4b42, 0xd746,
+ 0x4b42, 0x28ba, 0x4b42, 0x28ba, 0xb4be, 0x6254, 0x4b42, 0x9dac,
+ 0x3b21, 0x14c3, 0x587e, 0xeb3d, 0x14c3, 0x587e, 0x14c3, 0xc4df,
+ 0x6862, 0x587e, 0x979e, 0xc4df, 0x3b21, 0x979e, 0x587e, 0x979e
+};
+
+DECLARE_ASM_CONST(16, int32_t, walkenIdctRounders[]) = {
+ 65536, 65536, 65536, 65536,
+ 3597, 3597, 3597, 3597,
+ 2260, 2260, 2260, 2260,
+ 1203, 1203, 1203, 1203,
+ 120, 120, 120, 120,
+ 512, 512, 512, 512
+};
+
+// Temporary storage before the column pass
+#define ROW1 "%%xmm6"
+#define ROW3 "%%xmm4"
+#define ROW5 "%%xmm5"
+#define ROW7 "%%xmm7"
+
+#define CLEAR_ODD(r) "pxor "r","r" \n\t"
+#define PUT_ODD(dst) "pshufhw $0x1B, %%xmm2, "dst" \n\t"
+
+#ifdef ARCH_X86_64
+
+# define ROW0 "%%xmm8"
+# define REG0 ROW0
+# define ROW2 "%%xmm9"
+# define REG2 ROW2
+# define ROW4 "%%xmm10"
+# define REG4 ROW4
+# define ROW6 "%%xmm11"
+# define REG6 ROW6
+# define CLEAR_EVEN(r) CLEAR_ODD(r)
+# define PUT_EVEN(dst) PUT_ODD(dst)
+# define XMMS "%%xmm12"
+# define MOV_32_ONLY "#"
+# define SREG2 REG2
+# define TAN3 "%%xmm13"
+# define TAN1 "%%xmm14"
+
+#else
+
+# define ROW0 "(%0)"
+# define REG0 "%%xmm4"
+# define ROW2 "2*16(%0)"
+# define REG2 "%%xmm4"
+# define ROW4 "4*16(%0)"
+# define REG4 "%%xmm6"
+# define ROW6 "6*16(%0)"
+# define REG6 "%%xmm6"
+# define CLEAR_EVEN(r)
+# define PUT_EVEN(dst) \
+ "pshufhw $0x1B, %%xmm2, %%xmm2 \n\t" \
+ "movdqa %%xmm2, "dst" \n\t"
+# define XMMS "%%xmm2"
+# define MOV_32_ONLY "movdqa "
+# define SREG2 "%%xmm7"
+# define TAN3 "%%xmm0"
+# define TAN1 "%%xmm2"
+
+#endif
+
+#define ROUND(x) "paddd "MANGLE(x)
+
+#define JZ(reg, to) \
+ "testl "reg","reg" \n\t" \
+ "jz "to" \n\t"
+
+#define JNZ(reg, to) \
+ "testl "reg","reg" \n\t" \
+ "jnz "to" \n\t"
+
+#define TEST_ONE_ROW(src, reg, clear) \
+ clear \
+ "movq "src", %%mm1 \n\t" \
+ "por 8+"src", %%mm1 \n\t" \
+ "paddusb %%mm0, %%mm1 \n\t" \
+ "pmovmskb %%mm1, "reg" \n\t"
+
+#define TEST_TWO_ROWS(row1, row2, reg1, reg2, clear1, clear2) \
+ clear1 \
+ clear2 \
+ "movq "row1", %%mm1 \n\t" \
+ "por 8+"row1", %%mm1 \n\t" \
+ "movq "row2", %%mm2 \n\t" \
+ "por 8+"row2", %%mm2 \n\t" \
+ "paddusb %%mm0, %%mm1 \n\t" \
+ "paddusb %%mm0, %%mm2 \n\t" \
+ "pmovmskb %%mm1, "reg1" \n\t" \
+ "pmovmskb %%mm2, "reg2" \n\t"
+
+///IDCT pass on rows.
+#define iMTX_MULT(src, table, rounder, put) \
+ "movdqa "src", %%xmm3 \n\t" \
+ "movdqa %%xmm3, %%xmm0 \n\t" \
+ "pshufd $0x11, %%xmm3, %%xmm1 \n\t" /* 4602 */ \
+ "punpcklqdq %%xmm0, %%xmm0 \n\t" /* 0246 */ \
+ "pmaddwd "table", %%xmm0 \n\t" \
+ "pmaddwd 16+"table", %%xmm1 \n\t" \
+ "pshufd $0xBB, %%xmm3, %%xmm2 \n\t" /* 5713 */ \
+ "punpckhqdq %%xmm3, %%xmm3 \n\t" /* 1357 */ \
+ "pmaddwd 32+"table", %%xmm2 \n\t" \
+ "pmaddwd 48+"table", %%xmm3 \n\t" \
+ "paddd %%xmm1, %%xmm0 \n\t" \
+ "paddd %%xmm3, %%xmm2 \n\t" \
+ rounder", %%xmm0 \n\t" \
+ "movdqa %%xmm2, %%xmm3 \n\t" \
+ "paddd %%xmm0, %%xmm2 \n\t" \
+ "psubd %%xmm3, %%xmm0 \n\t" \
+ "psrad $11, %%xmm2 \n\t" \
+ "psrad $11, %%xmm0 \n\t" \
+ "packssdw %%xmm0, %%xmm2 \n\t" \
+ put \
+ "1: \n\t"
+
+#define iLLM_HEAD \
+ "movdqa "MANGLE(tan3)", "TAN3" \n\t" \
+ "movdqa "MANGLE(tan1)", "TAN1" \n\t" \
+
+///IDCT pass on columns.
+#define iLLM_PASS(dct) \
+ "movdqa "TAN3", %%xmm1 \n\t" \
+ "movdqa "TAN1", %%xmm3 \n\t" \
+ "pmulhw %%xmm4, "TAN3" \n\t" \
+ "pmulhw %%xmm5, %%xmm1 \n\t" \
+ "paddsw %%xmm4, "TAN3" \n\t" \
+ "paddsw %%xmm5, %%xmm1 \n\t" \
+ "psubsw %%xmm5, "TAN3" \n\t" \
+ "paddsw %%xmm4, %%xmm1 \n\t" \
+ "pmulhw %%xmm7, %%xmm3 \n\t" \
+ "pmulhw %%xmm6, "TAN1" \n\t" \
+ "paddsw %%xmm6, %%xmm3 \n\t" \
+ "psubsw %%xmm7, "TAN1" \n\t" \
+ "movdqa %%xmm3, %%xmm7 \n\t" \
+ "movdqa "TAN1", %%xmm6 \n\t" \
+ "psubsw %%xmm1, %%xmm3 \n\t" \
+ "psubsw "TAN3", "TAN1" \n\t" \
+ "paddsw %%xmm7, %%xmm1 \n\t" \
+ "paddsw %%xmm6, "TAN3" \n\t" \
+ "movdqa %%xmm3, %%xmm6 \n\t" \
+ "psubsw "TAN3", %%xmm3 \n\t" \
+ "paddsw %%xmm6, "TAN3" \n\t" \
+ "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
+ "pmulhw %%xmm4, %%xmm3 \n\t" \
+ "pmulhw %%xmm4, "TAN3" \n\t" \
+ "paddsw "TAN3", "TAN3" \n\t" \
+ "paddsw %%xmm3, %%xmm3 \n\t" \
+ "movdqa "MANGLE(tan2)", %%xmm7 \n\t" \
+ MOV_32_ONLY ROW2", "REG2" \n\t" \
+ MOV_32_ONLY ROW6", "REG6" \n\t" \
+ "movdqa %%xmm7, %%xmm5 \n\t" \
+ "pmulhw "REG6", %%xmm7 \n\t" \
+ "pmulhw "REG2", %%xmm5 \n\t" \
+ "paddsw "REG2", %%xmm7 \n\t" \
+ "psubsw "REG6", %%xmm5 \n\t" \
+ MOV_32_ONLY ROW0", "REG0" \n\t" \
+ MOV_32_ONLY ROW4", "REG4" \n\t" \
+ MOV_32_ONLY" "TAN1", (%0) \n\t" \
+ "movdqa "REG0", "XMMS" \n\t" \
+ "psubsw "REG4", "REG0" \n\t" \
+ "paddsw "XMMS", "REG4" \n\t" \
+ "movdqa "REG4", "XMMS" \n\t" \
+ "psubsw %%xmm7, "REG4" \n\t" \
+ "paddsw "XMMS", %%xmm7 \n\t" \
+ "movdqa "REG0", "XMMS" \n\t" \
+ "psubsw %%xmm5, "REG0" \n\t" \
+ "paddsw "XMMS", %%xmm5 \n\t" \
+ "movdqa %%xmm5, "XMMS" \n\t" \
+ "psubsw "TAN3", %%xmm5 \n\t" \
+ "paddsw "XMMS", "TAN3" \n\t" \
+ "movdqa "REG0", "XMMS" \n\t" \
+ "psubsw %%xmm3, "REG0" \n\t" \
+ "paddsw "XMMS", %%xmm3 \n\t" \
+ MOV_32_ONLY" (%0), "TAN1" \n\t" \
+ "psraw $6, %%xmm5 \n\t" \
+ "psraw $6, "REG0" \n\t" \
+ "psraw $6, "TAN3" \n\t" \
+ "psraw $6, %%xmm3 \n\t" \
+ "movdqa "TAN3", 1*16("dct") \n\t" \
+ "movdqa %%xmm3, 2*16("dct") \n\t" \
+ "movdqa "REG0", 5*16("dct") \n\t" \
+ "movdqa %%xmm5, 6*16("dct") \n\t" \
+ "movdqa %%xmm7, %%xmm0 \n\t" \
+ "movdqa "REG4", %%xmm4 \n\t" \
+ "psubsw %%xmm1, %%xmm7 \n\t" \
+ "psubsw "TAN1", "REG4" \n\t" \
+ "paddsw %%xmm0, %%xmm1 \n\t" \
+ "paddsw %%xmm4, "TAN1" \n\t" \
+ "psraw $6, %%xmm1 \n\t" \
+ "psraw $6, %%xmm7 \n\t" \
+ "psraw $6, "TAN1" \n\t" \
+ "psraw $6, "REG4" \n\t" \
+ "movdqa %%xmm1, ("dct") \n\t" \
+ "movdqa "TAN1", 3*16("dct") \n\t" \
+ "movdqa "REG4", 4*16("dct") \n\t" \
+ "movdqa %%xmm7, 7*16("dct") \n\t"
+
+///IDCT pass on columns, assuming rows 4-7 are zero.
+#define iLLM_PASS_SPARSE(dct) \
+ "pmulhw %%xmm4, "TAN3" \n\t" \
+ "paddsw %%xmm4, "TAN3" \n\t" \
+ "movdqa %%xmm6, %%xmm3 \n\t" \
+ "pmulhw %%xmm6, "TAN1" \n\t" \
+ "movdqa %%xmm4, %%xmm1 \n\t" \
+ "psubsw %%xmm1, %%xmm3 \n\t" \
+ "paddsw %%xmm6, %%xmm1 \n\t" \
+ "movdqa "TAN1", %%xmm6 \n\t" \
+ "psubsw "TAN3", "TAN1" \n\t" \
+ "paddsw %%xmm6, "TAN3" \n\t" \
+ "movdqa %%xmm3, %%xmm6 \n\t" \
+ "psubsw "TAN3", %%xmm3 \n\t" \
+ "paddsw %%xmm6, "TAN3" \n\t" \
+ "movdqa "MANGLE(sqrt2)", %%xmm4 \n\t" \
+ "pmulhw %%xmm4, %%xmm3 \n\t" \
+ "pmulhw %%xmm4, "TAN3" \n\t" \
+ "paddsw "TAN3", "TAN3" \n\t" \
+ "paddsw %%xmm3, %%xmm3 \n\t" \
+ "movdqa "MANGLE(tan2)", %%xmm5 \n\t" \
+ MOV_32_ONLY ROW2", "SREG2" \n\t" \
+ "pmulhw "SREG2", %%xmm5 \n\t" \
+ MOV_32_ONLY ROW0", "REG0" \n\t" \
+ "movdqa "REG0", %%xmm6 \n\t" \
+ "psubsw "SREG2", %%xmm6 \n\t" \
+ "paddsw "REG0", "SREG2" \n\t" \
+ MOV_32_ONLY" "TAN1", (%0) \n\t" \
+ "movdqa "REG0", "XMMS" \n\t" \
+ "psubsw %%xmm5, "REG0" \n\t" \
+ "paddsw "XMMS", %%xmm5 \n\t" \
+ "movdqa %%xmm5, "XMMS" \n\t" \
+ "psubsw "TAN3", %%xmm5 \n\t" \
+ "paddsw "XMMS", "TAN3" \n\t" \
+ "movdqa "REG0", "XMMS" \n\t" \
+ "psubsw %%xmm3, "REG0" \n\t" \
+ "paddsw "XMMS", %%xmm3 \n\t" \
+ MOV_32_ONLY" (%0), "TAN1" \n\t" \
+ "psraw $6, %%xmm5 \n\t" \
+ "psraw $6, "REG0" \n\t" \
+ "psraw $6, "TAN3" \n\t" \
+ "psraw $6, %%xmm3 \n\t" \
+ "movdqa "TAN3", 1*16("dct") \n\t" \
+ "movdqa %%xmm3, 2*16("dct") \n\t" \
+ "movdqa "REG0", 5*16("dct") \n\t" \
+ "movdqa %%xmm5, 6*16("dct") \n\t" \
+ "movdqa "SREG2", %%xmm0 \n\t" \
+ "movdqa %%xmm6, %%xmm4 \n\t" \
+ "psubsw %%xmm1, "SREG2" \n\t" \
+ "psubsw "TAN1", %%xmm6 \n\t" \
+ "paddsw %%xmm0, %%xmm1 \n\t" \
+ "paddsw %%xmm4, "TAN1" \n\t" \
+ "psraw $6, %%xmm1 \n\t" \
+ "psraw $6, "SREG2" \n\t" \
+ "psraw $6, "TAN1" \n\t" \
+ "psraw $6, %%xmm6 \n\t" \
+ "movdqa %%xmm1, ("dct") \n\t" \
+ "movdqa "TAN1", 3*16("dct") \n\t" \
+ "movdqa %%xmm6, 4*16("dct") \n\t" \
+ "movdqa "SREG2", 7*16("dct") \n\t"
+
+inline void ff_idct_xvid_sse2(short *block)
+{
+ __asm__ volatile(
+ "movq "MANGLE(m127)", %%mm0 \n\t"
+ iMTX_MULT("(%0)", MANGLE(iTab1), ROUND(walkenIdctRounders), PUT_EVEN(ROW0))
+ iMTX_MULT("1*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+1*16), PUT_ODD(ROW1))
+ iMTX_MULT("2*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+2*16), PUT_EVEN(ROW2))
+
+ TEST_TWO_ROWS("3*16(%0)", "4*16(%0)", "%%eax", "%%ecx", CLEAR_ODD(ROW3), CLEAR_EVEN(ROW4))
+ JZ("%%eax", "1f")
+ iMTX_MULT("3*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+3*16), PUT_ODD(ROW3))
+
+ TEST_TWO_ROWS("5*16(%0)", "6*16(%0)", "%%eax", "%%edx", CLEAR_ODD(ROW5), CLEAR_EVEN(ROW6))
+ TEST_ONE_ROW("7*16(%0)", "%%esi", CLEAR_ODD(ROW7))
+ iLLM_HEAD
+ ASMALIGN(4)
+ JNZ("%%ecx", "2f")
+ JNZ("%%eax", "3f")
+ JNZ("%%edx", "4f")
+ JNZ("%%esi", "5f")
+ iLLM_PASS_SPARSE("%0")
+ "jmp 6f \n\t"
+ "2: \n\t"
+ iMTX_MULT("4*16(%0)", MANGLE(iTab1), "#", PUT_EVEN(ROW4))
+ "3: \n\t"
+ iMTX_MULT("5*16(%0)", MANGLE(iTab4), ROUND(walkenIdctRounders+4*16), PUT_ODD(ROW5))
+ JZ("%%edx", "1f")
+ "4: \n\t"
+ iMTX_MULT("6*16(%0)", MANGLE(iTab3), ROUND(walkenIdctRounders+5*16), PUT_EVEN(ROW6))
+ JZ("%%esi", "1f")
+ "5: \n\t"
+ iMTX_MULT("7*16(%0)", MANGLE(iTab2), ROUND(walkenIdctRounders+5*16), PUT_ODD(ROW7))
+#ifndef ARCH_X86_64
+ iLLM_HEAD
+#endif
+ iLLM_PASS("%0")
+ "6: \n\t"
+ : "+r"(block)
+ :
+ : "%eax", "%ecx", "%edx", "%esi", "memory");
+}
+
+void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block)
+{
+ ff_idct_xvid_sse2(block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+
+void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block)
+{
+ ff_idct_xvid_sse2(block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
diff --git a/libavcodec/x86/idct_xvid.h b/libavcodec/x86/idct_xvid.h
new file mode 100644
index 0000000..bddbdb9
--- /dev/null
+++ b/libavcodec/x86/idct_xvid.h
@@ -0,0 +1,37 @@
+/*
+ * XVID MPEG-4 VIDEO CODEC
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*!
+ * @file idct_xvid.h
+ * header for Xvid IDCT functions
+ */
+
+#ifndef AVCODEC_X86_IDCT_XVID_H
+#define AVCODEC_X86_IDCT_XVID_H
+
+#include <stdint.h>
+
+void ff_idct_xvid_mmx(short *block);
+void ff_idct_xvid_mmx2(short *block);
+void ff_idct_xvid_sse2(short *block);
+void ff_idct_xvid_sse2_put(uint8_t *dest, int line_size, short *block);
+void ff_idct_xvid_sse2_add(uint8_t *dest, int line_size, short *block);
+
+#endif /* AVCODEC_X86_IDCT_XVID_H */
diff --git a/libavcodec/x86/mathops.h b/libavcodec/x86/mathops.h
new file mode 100644
index 0000000..95377ac
--- /dev/null
+++ b/libavcodec/x86/mathops.h
@@ -0,0 +1,43 @@
+/*
+ * simple math operations
+ * Copyright (c) 2006 Michael Niedermayer <michaelni at gmx.at> et al
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_MATHOPS_H
+#define AVCODEC_X86_MATHOPS_H
+
+#define MULL(ra, rb, shift) \
+ ({ int rt, dummy; __asm__ (\
+ "imull %3 \n\t"\
+ "shrdl %4, %%edx, %%eax \n\t"\
+ : "=a"(rt), "=d"(dummy)\
+ : "a" ((int)ra), "rm" ((int)rb), "i"(shift));\
+ rt; })
+
+#define MULH(ra, rb) \
+ ({ int rt, dummy;\
+ __asm__ ("imull %3\n\t" : "=d"(rt), "=a"(dummy): "a" ((int)ra), "rm" ((int)rb));\
+ rt; })
+
+#define MUL64(ra, rb) \
+ ({ int64_t rt;\
+ __asm__ ("imull %2\n\t" : "=A"(rt) : "a" ((int)ra), "g" ((int)rb));\
+ rt; })
+
+#endif /* AVCODEC_X86_MATHOPS_H */
diff --git a/libavcodec/x86/mmx.h b/libavcodec/x86/mmx.h
new file mode 100644
index 0000000..d7a76bb
--- /dev/null
+++ b/libavcodec/x86/mmx.h
@@ -0,0 +1,267 @@
+/*
+ * mmx.h
+ * Copyright (C) 1997-2001 H. Dietz and R. Fisher
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_X86_MMX_H
+#define AVCODEC_X86_MMX_H
+
+#warning Everything in this header is deprecated, use plain __asm__()! New code using this header will be rejected.
+
+
+#define mmx_i2r(op,imm,reg) \
+ __asm__ volatile (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "i" (imm) )
+
+#define mmx_m2r(op,mem,reg) \
+ __asm__ volatile (#op " %0, %%" #reg \
+ : /* nothing */ \
+ : "m" (mem))
+
+#define mmx_r2m(op,reg,mem) \
+ __asm__ volatile (#op " %%" #reg ", %0" \
+ : "=m" (mem) \
+ : /* nothing */ )
+
+#define mmx_r2r(op,regs,regd) \
+ __asm__ volatile (#op " %" #regs ", %" #regd)
+
+
+#define emms() __asm__ volatile ("emms")
+
+#define movd_m2r(var,reg) mmx_m2r (movd, var, reg)
+#define movd_r2m(reg,var) mmx_r2m (movd, reg, var)
+#define movd_r2r(regs,regd) mmx_r2r (movd, regs, regd)
+
+#define movq_m2r(var,reg) mmx_m2r (movq, var, reg)
+#define movq_r2m(reg,var) mmx_r2m (movq, reg, var)
+#define movq_r2r(regs,regd) mmx_r2r (movq, regs, regd)
+
+#define packssdw_m2r(var,reg) mmx_m2r (packssdw, var, reg)
+#define packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
+#define packsswb_m2r(var,reg) mmx_m2r (packsswb, var, reg)
+#define packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
+
+#define packuswb_m2r(var,reg) mmx_m2r (packuswb, var, reg)
+#define packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
+
+#define paddb_m2r(var,reg) mmx_m2r (paddb, var, reg)
+#define paddb_r2r(regs,regd) mmx_r2r (paddb, regs, regd)
+#define paddd_m2r(var,reg) mmx_m2r (paddd, var, reg)
+#define paddd_r2r(regs,regd) mmx_r2r (paddd, regs, regd)
+#define paddw_m2r(var,reg) mmx_m2r (paddw, var, reg)
+#define paddw_r2r(regs,regd) mmx_r2r (paddw, regs, regd)
+
+#define paddsb_m2r(var,reg) mmx_m2r (paddsb, var, reg)
+#define paddsb_r2r(regs,regd) mmx_r2r (paddsb, regs, regd)
+#define paddsw_m2r(var,reg) mmx_m2r (paddsw, var, reg)
+#define paddsw_r2r(regs,regd) mmx_r2r (paddsw, regs, regd)
+
+#define paddusb_m2r(var,reg) mmx_m2r (paddusb, var, reg)
+#define paddusb_r2r(regs,regd) mmx_r2r (paddusb, regs, regd)
+#define paddusw_m2r(var,reg) mmx_m2r (paddusw, var, reg)
+#define paddusw_r2r(regs,regd) mmx_r2r (paddusw, regs, regd)
+
+#define pand_m2r(var,reg) mmx_m2r (pand, var, reg)
+#define pand_r2r(regs,regd) mmx_r2r (pand, regs, regd)
+
+#define pandn_m2r(var,reg) mmx_m2r (pandn, var, reg)
+#define pandn_r2r(regs,regd) mmx_r2r (pandn, regs, regd)
+
+#define pcmpeqb_m2r(var,reg) mmx_m2r (pcmpeqb, var, reg)
+#define pcmpeqb_r2r(regs,regd) mmx_r2r (pcmpeqb, regs, regd)
+#define pcmpeqd_m2r(var,reg) mmx_m2r (pcmpeqd, var, reg)
+#define pcmpeqd_r2r(regs,regd) mmx_r2r (pcmpeqd, regs, regd)
+#define pcmpeqw_m2r(var,reg) mmx_m2r (pcmpeqw, var, reg)
+#define pcmpeqw_r2r(regs,regd) mmx_r2r (pcmpeqw, regs, regd)
+
+#define pcmpgtb_m2r(var,reg) mmx_m2r (pcmpgtb, var, reg)
+#define pcmpgtb_r2r(regs,regd) mmx_r2r (pcmpgtb, regs, regd)
+#define pcmpgtd_m2r(var,reg) mmx_m2r (pcmpgtd, var, reg)
+#define pcmpgtd_r2r(regs,regd) mmx_r2r (pcmpgtd, regs, regd)
+#define pcmpgtw_m2r(var,reg) mmx_m2r (pcmpgtw, var, reg)
+#define pcmpgtw_r2r(regs,regd) mmx_r2r (pcmpgtw, regs, regd)
+
+#define pmaddwd_m2r(var,reg) mmx_m2r (pmaddwd, var, reg)
+#define pmaddwd_r2r(regs,regd) mmx_r2r (pmaddwd, regs, regd)
+
+#define pmulhw_m2r(var,reg) mmx_m2r (pmulhw, var, reg)
+#define pmulhw_r2r(regs,regd) mmx_r2r (pmulhw, regs, regd)
+
+#define pmullw_m2r(var,reg) mmx_m2r (pmullw, var, reg)
+#define pmullw_r2r(regs,regd) mmx_r2r (pmullw, regs, regd)
+
+#define por_m2r(var,reg) mmx_m2r (por, var, reg)
+#define por_r2r(regs,regd) mmx_r2r (por, regs, regd)
+
+#define pslld_i2r(imm,reg) mmx_i2r (pslld, imm, reg)
+#define pslld_m2r(var,reg) mmx_m2r (pslld, var, reg)
+#define pslld_r2r(regs,regd) mmx_r2r (pslld, regs, regd)
+#define psllq_i2r(imm,reg) mmx_i2r (psllq, imm, reg)
+#define psllq_m2r(var,reg) mmx_m2r (psllq, var, reg)
+#define psllq_r2r(regs,regd) mmx_r2r (psllq, regs, regd)
+#define psllw_i2r(imm,reg) mmx_i2r (psllw, imm, reg)
+#define psllw_m2r(var,reg) mmx_m2r (psllw, var, reg)
+#define psllw_r2r(regs,regd) mmx_r2r (psllw, regs, regd)
+
+#define psrad_i2r(imm,reg) mmx_i2r (psrad, imm, reg)
+#define psrad_m2r(var,reg) mmx_m2r (psrad, var, reg)
+#define psrad_r2r(regs,regd) mmx_r2r (psrad, regs, regd)
+#define psraw_i2r(imm,reg) mmx_i2r (psraw, imm, reg)
+#define psraw_m2r(var,reg) mmx_m2r (psraw, var, reg)
+#define psraw_r2r(regs,regd) mmx_r2r (psraw, regs, regd)
+
+#define psrld_i2r(imm,reg) mmx_i2r (psrld, imm, reg)
+#define psrld_m2r(var,reg) mmx_m2r (psrld, var, reg)
+#define psrld_r2r(regs,regd) mmx_r2r (psrld, regs, regd)
+#define psrlq_i2r(imm,reg) mmx_i2r (psrlq, imm, reg)
+#define psrlq_m2r(var,reg) mmx_m2r (psrlq, var, reg)
+#define psrlq_r2r(regs,regd) mmx_r2r (psrlq, regs, regd)
+#define psrlw_i2r(imm,reg) mmx_i2r (psrlw, imm, reg)
+#define psrlw_m2r(var,reg) mmx_m2r (psrlw, var, reg)
+#define psrlw_r2r(regs,regd) mmx_r2r (psrlw, regs, regd)
+
+#define psubb_m2r(var,reg) mmx_m2r (psubb, var, reg)
+#define psubb_r2r(regs,regd) mmx_r2r (psubb, regs, regd)
+#define psubd_m2r(var,reg) mmx_m2r (psubd, var, reg)
+#define psubd_r2r(regs,regd) mmx_r2r (psubd, regs, regd)
+#define psubw_m2r(var,reg) mmx_m2r (psubw, var, reg)
+#define psubw_r2r(regs,regd) mmx_r2r (psubw, regs, regd)
+
+#define psubsb_m2r(var,reg) mmx_m2r (psubsb, var, reg)
+#define psubsb_r2r(regs,regd) mmx_r2r (psubsb, regs, regd)
+#define psubsw_m2r(var,reg) mmx_m2r (psubsw, var, reg)
+#define psubsw_r2r(regs,regd) mmx_r2r (psubsw, regs, regd)
+
+#define psubusb_m2r(var,reg) mmx_m2r (psubusb, var, reg)
+#define psubusb_r2r(regs,regd) mmx_r2r (psubusb, regs, regd)
+#define psubusw_m2r(var,reg) mmx_m2r (psubusw, var, reg)
+#define psubusw_r2r(regs,regd) mmx_r2r (psubusw, regs, regd)
+
+#define punpckhbw_m2r(var,reg) mmx_m2r (punpckhbw, var, reg)
+#define punpckhbw_r2r(regs,regd) mmx_r2r (punpckhbw, regs, regd)
+#define punpckhdq_m2r(var,reg) mmx_m2r (punpckhdq, var, reg)
+#define punpckhdq_r2r(regs,regd) mmx_r2r (punpckhdq, regs, regd)
+#define punpckhwd_m2r(var,reg) mmx_m2r (punpckhwd, var, reg)
+#define punpckhwd_r2r(regs,regd) mmx_r2r (punpckhwd, regs, regd)
+
+#define punpcklbw_m2r(var,reg) mmx_m2r (punpcklbw, var, reg)
+#define punpcklbw_r2r(regs,regd) mmx_r2r (punpcklbw, regs, regd)
+#define punpckldq_m2r(var,reg) mmx_m2r (punpckldq, var, reg)
+#define punpckldq_r2r(regs,regd) mmx_r2r (punpckldq, regs, regd)
+#define punpcklwd_m2r(var,reg) mmx_m2r (punpcklwd, var, reg)
+#define punpcklwd_r2r(regs,regd) mmx_r2r (punpcklwd, regs, regd)
+
+#define pxor_m2r(var,reg) mmx_m2r (pxor, var, reg)
+#define pxor_r2r(regs,regd) mmx_r2r (pxor, regs, regd)
+
+
+/* 3DNOW extensions */
+
+#define pavgusb_m2r(var,reg) mmx_m2r (pavgusb, var, reg)
+#define pavgusb_r2r(regs,regd) mmx_r2r (pavgusb, regs, regd)
+
+
+/* AMD MMX extensions - also available in intel SSE */
+
+
+#define mmx_m2ri(op,mem,reg,imm) \
+ __asm__ volatile (#op " %1, %0, %%" #reg \
+ : /* nothing */ \
+ : "m" (mem), "i" (imm))
+#define mmx_r2ri(op,regs,regd,imm) \
+ __asm__ volatile (#op " %0, %%" #regs ", %%" #regd \
+ : /* nothing */ \
+ : "i" (imm) )
+
+#define mmx_fetch(mem,hint) \
+ __asm__ volatile ("prefetch" #hint " %0" \
+ : /* nothing */ \
+ : "m" (mem))
+
+
+#define maskmovq(regs,maskreg) mmx_r2ri (maskmovq, regs, maskreg)
+
+#define movntq_r2m(mmreg,var) mmx_r2m (movntq, mmreg, var)
+
+#define pavgb_m2r(var,reg) mmx_m2r (pavgb, var, reg)
+#define pavgb_r2r(regs,regd) mmx_r2r (pavgb, regs, regd)
+#define pavgw_m2r(var,reg) mmx_m2r (pavgw, var, reg)
+#define pavgw_r2r(regs,regd) mmx_r2r (pavgw, regs, regd)
+
+#define pextrw_r2r(mmreg,reg,imm) mmx_r2ri (pextrw, mmreg, reg, imm)
+
+#define pinsrw_r2r(reg,mmreg,imm) mmx_r2ri (pinsrw, reg, mmreg, imm)
+
+#define pmaxsw_m2r(var,reg) mmx_m2r (pmaxsw, var, reg)
+#define pmaxsw_r2r(regs,regd) mmx_r2r (pmaxsw, regs, regd)
+
+#define pmaxub_m2r(var,reg) mmx_m2r (pmaxub, var, reg)
+#define pmaxub_r2r(regs,regd) mmx_r2r (pmaxub, regs, regd)
+
+#define pminsw_m2r(var,reg) mmx_m2r (pminsw, var, reg)
+#define pminsw_r2r(regs,regd) mmx_r2r (pminsw, regs, regd)
+
+#define pminub_m2r(var,reg) mmx_m2r (pminub, var, reg)
+#define pminub_r2r(regs,regd) mmx_r2r (pminub, regs, regd)
+
+#define pmovmskb(mmreg,reg) \
+ __asm__ volatile ("movmskps %" #mmreg ", %" #reg)
+
+#define pmulhuw_m2r(var,reg) mmx_m2r (pmulhuw, var, reg)
+#define pmulhuw_r2r(regs,regd) mmx_r2r (pmulhuw, regs, regd)
+
+#define prefetcht0(mem) mmx_fetch (mem, t0)
+#define prefetcht1(mem) mmx_fetch (mem, t1)
+#define prefetcht2(mem) mmx_fetch (mem, t2)
+#define prefetchnta(mem) mmx_fetch (mem, nta)
+
+#define psadbw_m2r(var,reg) mmx_m2r (psadbw, var, reg)
+#define psadbw_r2r(regs,regd) mmx_r2r (psadbw, regs, regd)
+
+#define pshufw_m2r(var,reg,imm) mmx_m2ri(pshufw, var, reg, imm)
+#define pshufw_r2r(regs,regd,imm) mmx_r2ri(pshufw, regs, regd, imm)
+
+#define sfence() __asm__ volatile ("sfence\n\t")
+
+/* SSE2 */
+#define pshufhw_m2r(var,reg,imm) mmx_m2ri(pshufhw, var, reg, imm)
+#define pshufhw_r2r(regs,regd,imm) mmx_r2ri(pshufhw, regs, regd, imm)
+#define pshuflw_m2r(var,reg,imm) mmx_m2ri(pshuflw, var, reg, imm)
+#define pshuflw_r2r(regs,regd,imm) mmx_r2ri(pshuflw, regs, regd, imm)
+
+#define pshufd_r2r(regs,regd,imm) mmx_r2ri(pshufd, regs, regd, imm)
+
+#define movdqa_m2r(var,reg) mmx_m2r (movdqa, var, reg)
+#define movdqa_r2m(reg,var) mmx_r2m (movdqa, reg, var)
+#define movdqa_r2r(regs,regd) mmx_r2r (movdqa, regs, regd)
+#define movdqu_m2r(var,reg) mmx_m2r (movdqu, var, reg)
+#define movdqu_r2m(reg,var) mmx_r2m (movdqu, reg, var)
+#define movdqu_r2r(regs,regd) mmx_r2r (movdqu, regs, regd)
+
+#define pmullw_r2m(reg,var) mmx_r2m (pmullw, reg, var)
+
+#define pslldq_i2r(imm,reg) mmx_i2r (pslldq, imm, reg)
+#define psrldq_i2r(imm,reg) mmx_i2r (psrldq, imm, reg)
+
+#define punpcklqdq_r2r(regs,regd) mmx_r2r (punpcklqdq, regs, regd)
+#define punpckhqdq_r2r(regs,regd) mmx_r2r (punpckhqdq, regs, regd)
+
+
+#endif /* AVCODEC_X86_MMX_H */
diff --git a/libavcodec/i386/motion_est_mmx.c b/libavcodec/x86/motion_est_mmx.c
similarity index 100%
rename from libavcodec/i386/motion_est_mmx.c
rename to libavcodec/x86/motion_est_mmx.c
diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/x86/mpegvideo_mmx.c
similarity index 100%
rename from libavcodec/i386/mpegvideo_mmx.c
rename to libavcodec/x86/mpegvideo_mmx.c
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/x86/mpegvideo_mmx_template.c
similarity index 100%
rename from libavcodec/i386/mpegvideo_mmx_template.c
rename to libavcodec/x86/mpegvideo_mmx_template.c
diff --git a/libavcodec/x86/rv40dsp_mmx.c b/libavcodec/x86/rv40dsp_mmx.c
new file mode 100644
index 0000000..47461c6
--- /dev/null
+++ b/libavcodec/x86/rv40dsp_mmx.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2008 Konstantin Shishkov, Mathieu Velten
+ *
+ * MMX-optimized DSP functions for RV40, based on H.264 optimizations by
+ * Michael Niedermayer and Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "dsputil_mmx.h"
+
+/* bias interleaved with bias div 8, use p+1 to access bias div 8 */
+DECLARE_ALIGNED_8(static const uint64_t, rv40_bias_reg[4][8]) = {
+ { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0010001000100010ULL, 0x0002000200020002ULL,
+ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x0010001000100010ULL, 0x0002000200020002ULL },
+ { 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,
+ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL },
+ { 0x0000000000000000ULL, 0x0000000000000000ULL, 0x0020002000200020ULL, 0x0004000400040004ULL,
+ 0x0010001000100010ULL, 0x0002000200020002ULL, 0x0020002000200020ULL, 0x0004000400040004ULL },
+ { 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL,
+ 0x0020002000200020ULL, 0x0004000400040004ULL, 0x001C001C001C001CULL, 0x0003000300030003ULL }
+};
+
+static void put_rv40_chroma_mc8_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_generic_mc8_mmx(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
+static void put_rv40_chroma_mc4_mmx(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ put_h264_chroma_generic_mc4_mmx(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
+static void avg_rv40_chroma_mc8_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc8_mmx2(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
+static void avg_rv40_chroma_mc4_mmx2(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc4_mmx2(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
+static void avg_rv40_chroma_mc8_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc8_3dnow(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
+static void avg_rv40_chroma_mc4_3dnow(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+ avg_h264_chroma_generic_mc4_3dnow(dst, src, stride, h, x, y, &rv40_bias_reg[y>>1][x&(~1)]);
+}
diff --git a/libavcodec/x86/simple_idct_mmx.c b/libavcodec/x86/simple_idct_mmx.c
new file mode 100644
index 0000000..5786744
--- /dev/null
+++ b/libavcodec/x86/simple_idct_mmx.c
@@ -0,0 +1,1295 @@
+/*
+ * Simple IDCT MMX
+ *
+ * Copyright (c) 2001, 2002 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libavcodec/dsputil.h"
+#include "libavcodec/simple_idct.h"
+
+/*
+23170.475006
+22725.260826
+21406.727617
+19265.545870
+16384.000000
+12872.826198
+8866.956905
+4520.335430
+*/
+#define C0 23170 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#if 0
+#define C4 16384 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#else
+#define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
+#endif
+#define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+#define ROW_SHIFT 11
+#define COL_SHIFT 20 // 6
+
+DECLARE_ASM_CONST(8, uint64_t, wm1010)= 0xFFFF0000FFFF0000ULL;
+DECLARE_ASM_CONST(8, uint64_t, d40000)= 0x0000000000040000ULL;
+
+DECLARE_ALIGNED(8, static const int16_t, coeffs[])= {
+ 1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
+// 1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
+// 0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
+ 1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
+ // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
+// 0, 0, 0, 0,
+// 0, 0, 0, 0,
+
+ C4, C4, C4, C4,
+ C4, -C4, C4, -C4,
+
+ C2, C6, C2, C6,
+ C6, -C2, C6, -C2,
+
+ C1, C3, C1, C3,
+ C5, C7, C5, C7,
+
+ C3, -C7, C3, -C7,
+-C1, -C5, -C1, -C5,
+
+ C5, -C1, C5, -C1,
+ C7, C3, C7, C3,
+
+ C7, -C5, C7, -C5,
+ C3, -C1, C3, -C1
+};
+
+#if 0
+static void unused_var_killer(void)
+{
+ int a= wm1010 + d40000;
+ temp[0]=a;
+}
+
+static void inline idctCol (int16_t * col, int16_t *input)
+{
+#undef C0
+#undef C1
+#undef C2
+#undef C3
+#undef C4
+#undef C5
+#undef C6
+#undef C7
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+/*
+ if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
+ col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
+ col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
+ return;
+ }*/
+
+col[8*0] = input[8*0 + 0];
+col[8*1] = input[8*2 + 0];
+col[8*2] = input[8*0 + 1];
+col[8*3] = input[8*2 + 1];
+col[8*4] = input[8*4 + 0];
+col[8*5] = input[8*6 + 0];
+col[8*6] = input[8*4 + 1];
+col[8*7] = input[8*6 + 1];
+
+ a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
+ a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
+ a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
+ a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
+
+ b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
+ b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
+ b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
+ b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
+
+ col[8*0] = (a0 + b0) >> COL_SHIFT;
+ col[8*1] = (a1 + b1) >> COL_SHIFT;
+ col[8*2] = (a2 + b2) >> COL_SHIFT;
+ col[8*3] = (a3 + b3) >> COL_SHIFT;
+ col[8*4] = (a3 - b3) >> COL_SHIFT;
+ col[8*5] = (a2 - b2) >> COL_SHIFT;
+ col[8*6] = (a1 - b1) >> COL_SHIFT;
+ col[8*7] = (a0 - b0) >> COL_SHIFT;
+}
+
+static void inline idctRow (int16_t * output, int16_t * input)
+{
+ int16_t row[8];
+
+ int a0, a1, a2, a3, b0, b1, b2, b3;
+ const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+ const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+
+row[0] = input[0];
+row[2] = input[1];
+row[4] = input[4];
+row[6] = input[5];
+row[1] = input[8];
+row[3] = input[9];
+row[5] = input[12];
+row[7] = input[13];
+
+ if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
+ row[0] = row[1] = row[2] = row[3] = row[4] =
+ row[5] = row[6] = row[7] = row[0]<<3;
+ output[0] = row[0];
+ output[2] = row[1];
+ output[4] = row[2];
+ output[6] = row[3];
+ output[8] = row[4];
+ output[10] = row[5];
+ output[12] = row[6];
+ output[14] = row[7];
+ return;
+ }
+
+ a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
+ a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
+ a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
+ a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
+
+ b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+ b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+ b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+ b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+ row[0] = (a0 + b0) >> ROW_SHIFT;
+ row[1] = (a1 + b1) >> ROW_SHIFT;
+ row[2] = (a2 + b2) >> ROW_SHIFT;
+ row[3] = (a3 + b3) >> ROW_SHIFT;
+ row[4] = (a3 - b3) >> ROW_SHIFT;
+ row[5] = (a2 - b2) >> ROW_SHIFT;
+ row[6] = (a1 - b1) >> ROW_SHIFT;
+ row[7] = (a0 - b0) >> ROW_SHIFT;
+
+ output[0] = row[0];
+ output[2] = row[1];
+ output[4] = row[2];
+ output[6] = row[3];
+ output[8] = row[4];
+ output[10] = row[5];
+ output[12] = row[6];
+ output[14] = row[7];
+}
+#endif
+
+static inline void idct(int16_t *block)
+{
+ DECLARE_ALIGNED(8, int64_t, align_tmp[16]);
+ int16_t * const temp= (int16_t*)align_tmp;
+
+ __asm__ volatile(
+#if 0 //Alternative, simpler variant
+
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+
+#define COL_IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"\
+
+
+#define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq "MANGLE(wm1010)", %%mm4 \n\t"\
+ "pand %%mm0, %%mm4 \n\t"\
+ "por %%mm1, %%mm4 \n\t"\
+ "por %%mm2, %%mm4 \n\t"\
+ "por %%mm3, %%mm4 \n\t"\
+ "packssdw %%mm4,%%mm4 \n\t"\
+ "movd %%mm4, %%eax \n\t"\
+ "orl %%eax, %%eax \n\t"\
+ "jz 1f \n\t"\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+ "jmp 2f \n\t"\
+ "1: \n\t"\
+ "pslld $16, %%mm0 \n\t"\
+ "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+ "movq %%mm0, 8+" #dst " \n\t"\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 24+" #dst " \n\t"\
+ "2: \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+ROW_IDCT( (%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
+/*ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1), paddd (%2), 11)
+ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1), paddd (%2), 11)
+ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1), paddd (%2), 11)*/
+
+DC_COND_ROW_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11)
+DC_COND_ROW_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11)
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+COL_IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+COL_IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+COL_IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+COL_IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+
+#else
+
+#define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq "MANGLE(wm1010)", %%mm4 \n\t"\
+ "pand %%mm0, %%mm4 \n\t"\
+ "por %%mm1, %%mm4 \n\t"\
+ "por %%mm2, %%mm4 \n\t"\
+ "por %%mm3, %%mm4 \n\t"\
+ "packssdw %%mm4,%%mm4 \n\t"\
+ "movd %%mm4, %%eax \n\t"\
+ "orl %%eax, %%eax \n\t"\
+ "jz 1f \n\t"\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+ "jmp 2f \n\t"\
+ "1: \n\t"\
+ "pslld $16, %%mm0 \n\t"\
+ "paddd "MANGLE(d40000)", %%mm0 \n\t"\
+ "psrad $13, %%mm0 \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t"\
+ "movq %%mm0, " #dst " \n\t"\
+ "movq %%mm0, 8+" #dst " \n\t"\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 24+" #dst " \n\t"\
+ "2: \n\t"
+
+#define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq %%mm0, %%mm4 \n\t"\
+ "por %%mm1, %%mm4 \n\t"\
+ "por %%mm2, %%mm4 \n\t"\
+ "por %%mm3, %%mm4 \n\t"\
+ "packssdw %%mm4,%%mm4 \n\t"\
+ "movd %%mm4, %%eax \n\t"\
+ "orl %%eax, %%eax \n\t"\
+ "jz " #bt " \n\t"\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+
+#define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ #rounder ", %%mm4 \n\t"\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq 56(%2), %%mm5 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ #rounder ", %%mm0 \n\t"\
+ "paddd %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm0, %%mm0 \n\t" \
+ "psubd %%mm1, %%mm0 \n\t" /* A2 a2 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm5, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm5 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm5 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm5 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm1, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm5, %%mm1 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm5, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm7 \n\t" /* A1+B1 a1+b1 A0+B0 a0+b0 */\
+ "packssdw %%mm4, %%mm2 \n\t" /* A0-B0 a0-b0 A1-B1 a1-b1 */\
+ "movq %%mm7, " #dst " \n\t"\
+ "movq " #src1 ", %%mm1 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "movq %%mm2, 24+" #dst " \n\t"\
+ "pmaddwd %%mm1, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm1 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm0, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm0 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm1, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm6, %%mm2 \n\t" /* A3+B3 a3+b3 A2+B2 a2+b2 */\
+ "movq %%mm2, 8+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm0, %%mm4 \n\t" /* A2-B2 a2-b2 A3-B3 a3-b3 */\
+ "movq %%mm4, 16+" #dst " \n\t"\
+
+//IDCT( src0, src4, src1, src5, dst, rounder, shift)
+DC_COND_IDCT( 0(%0), 8(%0), 16(%0), 24(%0), 0(%1),paddd 8(%2), 11)
+Z_COND_IDCT( 32(%0), 40(%0), 48(%0), 56(%0), 32(%1),paddd (%2), 11, 4f)
+Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 2f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+ "#" ASMALIGN(4) \
+ "4: \n\t"
+Z_COND_IDCT( 64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm1, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm1, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+ "#" ASMALIGN(4) \
+ "6: \n\t"
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "movq 72(%2), %%mm7 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm1, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm7, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm7, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm1, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 88(%2), %%mm1 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm1, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm1, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm1 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm1, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+ "#" ASMALIGN(4) \
+ "2: \n\t"
+Z_COND_IDCT( 96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
+
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq " #src5 ", %%mm3 \n\t" /* R7 R5 r7 r5 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 56(%2), %%mm1 \n\t" /* C7 C5 C7 C5 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* C7R7+C5R5 C7r7+C5r5 */\
+ "pmaddwd 64(%2), %%mm2 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm1, %%mm7 \n\t" /* B0 b0 */\
+ "movq 72(%2), %%mm1 \n\t" /* -C5 -C1 -C5 -C1 */\
+ "pmaddwd %%mm3, %%mm1 \n\t" /* -C5R7-C1R5 -C5r7-C1r5 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "paddd %%mm2, %%mm1 \n\t" /* B1 b1 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm2 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm2, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq " #src1 ", %%mm0 \n\t" /* R3 R1 r3 r1 */\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "movq 88(%2), %%mm7 \n\t" /* C3 C7 C3 C7 */\
+ "pmaddwd 96(%2), %%mm0 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C3R7+C7R5 C3r7+C7r5 */\
+ "movq %%mm5, %%mm2 \n\t" /* A2 a2 */\
+ "pmaddwd 104(%2), %%mm3 \n\t" /* -C1R7+C3R5 -C1r7+C3r5 */\
+ "paddd %%mm7, %%mm4 \n\t" /* B2 b2 */\
+ "paddd %%mm4, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm0, %%mm3 \n\t" /* B3 b3 */\
+ "paddd %%mm3, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm3, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm2, %%mm2 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm2, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+ "#" ASMALIGN(4) \
+ "3: \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 64(%2), %%mm3 \n\t"\
+ "pmaddwd %%mm2, %%mm3 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm1 \n\t" /* A1 a1 */\
+ "paddd %%mm3, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm3, %%mm1 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm1, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm5, %%mm1 \n\t" /* A2 a2 */\
+ "paddd %%mm4, %%mm1 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm1, %%mm1 \n\t" /* A2+B2 a2+b2 */\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm1, 32+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+ "#" ASMALIGN(4) \
+ "5: \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
+ "movq 8+" #src4 ", %%mm3 \n\t" /* R6 R2 r6 r2 */\
+ "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm3, %%mm7 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "pmaddwd 40(%2), %%mm3 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "paddd %%mm1, %%mm7 \n\t" /* A0 a0 */\
+ "paddd %%mm1, %%mm1 \n\t" /* 2C0 2c0 */\
+ "psubd %%mm7, %%mm1 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm3 \n\t" /* A1 a1 */\
+ "paddd %%mm2, %%mm2 \n\t" /* 2C1 2c1 */\
+ "psubd %%mm3, %%mm2 \n\t" /* A2 a2 */\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "packssdw %%mm7, %%mm4 \n\t" /* A0 a0 */\
+ "movq %%mm4, " #dst " \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "packssdw %%mm3, %%mm0 \n\t" /* A1 a1 */\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 96+" #dst " \n\t"\
+ "movq %%mm4, 112+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm2, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movq %%mm5, 32+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm1, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movq %%mm6, 48+" #dst " \n\t"\
+ "movq %%mm6, 64+" #dst " \n\t"\
+ "movq %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+
+ "#" ASMALIGN(4) \
+ "1: \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq " #src4 ", %%mm1 \n\t" /* R6 R2 r6 r2 */\
+ "movq " #src1 ", %%mm2 \n\t" /* R3 R1 r3 r1 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm5 \n\t" /* C6 C2 C6 C2 */\
+ "pmaddwd %%mm1, %%mm5 \n\t" /* C6R6+C2R2 C6r6+C2r2 */\
+ "movq 40(%2), %%mm6 \n\t" /* -C2 C6 -C2 C6 */\
+ "pmaddwd %%mm6, %%mm1 \n\t" /* -C2R6+C6R2 -C2r6+C6r2 */\
+ "movq %%mm4, %%mm6 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 48(%2), %%mm7 \n\t" /* C3 C1 C3 C1 */\
+ "pmaddwd %%mm2, %%mm7 \n\t" /* C3R3+C1R1 C3r3+C1r1 */\
+ "paddd %%mm5, %%mm4 \n\t" /* A0 a0 */\
+ "psubd %%mm5, %%mm6 \n\t" /* A3 a3 */\
+ "movq %%mm0, %%mm5 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1 a1 */\
+ "psubd %%mm1, %%mm5 \n\t" /* A2 a2 */\
+ "movq 64(%2), %%mm1 \n\t"\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* -C7R3+C3R1 -C7r3+C3r1 */\
+ "paddd %%mm4, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "paddd %%mm4, %%mm4 \n\t" /* 2A0 2a0 */\
+ "psubd %%mm7, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "psrad $" #shift ", %%mm7 \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "movq %%mm0, %%mm3 \n\t" /* A1 a1 */\
+ "paddd %%mm1, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "psubd %%mm1, %%mm3 \n\t" /* A1-B1 a1-b1 */\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "packssdw %%mm7, %%mm7 \n\t" /* A0+B0 a0+b0 */\
+ "movd %%mm7, " #dst " \n\t"\
+ "packssdw %%mm0, %%mm0 \n\t" /* A1+B1 a1+b1 */\
+ "movd %%mm0, 16+" #dst " \n\t"\
+ "packssdw %%mm3, %%mm3 \n\t" /* A1-B1 a1-b1 */\
+ "movd %%mm3, 96+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A0-B0 a0-b0 */\
+ "movd %%mm4, 112+" #dst " \n\t"\
+ "movq 80(%2), %%mm4 \n\t" /* -C1 C5 -C1 C5 */\
+ "pmaddwd %%mm2, %%mm4 \n\t" /* -C1R3+C5R1 -C1r3+C5r1 */\
+ "pmaddwd 96(%2), %%mm2 \n\t" /* -C5R3+C7R1 -C5r3+C7r1 */\
+ "movq %%mm5, %%mm3 \n\t" /* A2 a2 */\
+ "paddd %%mm4, %%mm3 \n\t" /* A2+B2 a2+b2 */\
+ "psubd %%mm4, %%mm5 \n\t" /* a2-B2 a2-b2 */\
+ "psrad $" #shift ", %%mm3 \n\t"\
+ "psrad $" #shift ", %%mm5 \n\t"\
+ "movq %%mm6, %%mm4 \n\t" /* A3 a3 */\
+ "paddd %%mm2, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "psubd %%mm2, %%mm4 \n\t" /* a3-B3 a3-b3 */\
+ "psrad $" #shift ", %%mm6 \n\t"\
+ "packssdw %%mm3, %%mm3 \n\t" /* A2+B2 a2+b2 */\
+ "movd %%mm3, 32+" #dst " \n\t"\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "packssdw %%mm6, %%mm6 \n\t" /* A3+B3 a3+b3 */\
+ "movd %%mm6, 48+" #dst " \n\t"\
+ "packssdw %%mm4, %%mm4 \n\t" /* A3-B3 a3-b3 */\
+ "packssdw %%mm5, %%mm5 \n\t" /* A2-B2 a2-b2 */\
+ "movd %%mm4, 64+" #dst " \n\t"\
+ "movd %%mm5, 80+" #dst " \n\t"
+
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( (%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+ "jmp 9f \n\t"
+
+
+ "#" ASMALIGN(4)
+ "7: \n\t"
+#undef IDCT
+#define IDCT(src0, src4, src1, src5, dst, shift) \
+ "movq " #src0 ", %%mm0 \n\t" /* R4 R0 r4 r0 */\
+ "movq 16(%2), %%mm4 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm0, %%mm4 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm5 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm5, %%mm0 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "psrad $" #shift ", %%mm4 \n\t"\
+ "psrad $" #shift ", %%mm0 \n\t"\
+ "movq 8+" #src0 ", %%mm2 \n\t" /* R4 R0 r4 r0 */\
+ "movq 16(%2), %%mm1 \n\t" /* C4 C4 C4 C4 */\
+ "pmaddwd %%mm2, %%mm1 \n\t" /* C4R4+C4R0 C4r4+C4r0 */\
+ "movq 24(%2), %%mm7 \n\t" /* -C4 C4 -C4 C4 */\
+ "pmaddwd %%mm7, %%mm2 \n\t" /* -C4R4+C4R0 -C4r4+C4r0 */\
+ "movq 32(%2), %%mm7 \n\t" /* C6 C2 C6 C2 */\
+ "psrad $" #shift ", %%mm1 \n\t"\
+ "packssdw %%mm1, %%mm4 \n\t" /* A0 a0 */\
+ "movq %%mm4, " #dst " \n\t"\
+ "psrad $" #shift ", %%mm2 \n\t"\
+ "packssdw %%mm2, %%mm0 \n\t" /* A1 a1 */\
+ "movq %%mm0, 16+" #dst " \n\t"\
+ "movq %%mm0, 96+" #dst " \n\t"\
+ "movq %%mm4, 112+" #dst " \n\t"\
+ "movq %%mm0, 32+" #dst " \n\t"\
+ "movq %%mm4, 48+" #dst " \n\t"\
+ "movq %%mm4, 64+" #dst " \n\t"\
+ "movq %%mm0, 80+" #dst " \n\t"
+
+//IDCT( src0, src4, src1, src5, dst, shift)
+IDCT( 0(%1), 64(%1), 32(%1), 96(%1), 0(%0), 20)
+//IDCT( 8(%1), 72(%1), 40(%1), 104(%1), 4(%0), 20)
+IDCT( 16(%1), 80(%1), 48(%1), 112(%1), 8(%0), 20)
+//IDCT( 24(%1), 88(%1), 56(%1), 120(%1), 12(%0), 20)
+
+
+#endif
+
+/*
+Input
+ 00 40 04 44 20 60 24 64
+ 10 30 14 34 50 70 54 74
+ 01 41 03 43 21 61 23 63
+ 11 31 13 33 51 71 53 73
+ 02 42 06 46 22 62 26 66
+ 12 32 16 36 52 72 56 76
+ 05 45 07 47 25 65 27 67
+ 15 35 17 37 55 75 57 77
+
+Temp
+ 00 04 10 14 20 24 30 34
+ 40 44 50 54 60 64 70 74
+ 01 03 11 13 21 23 31 33
+ 41 43 51 53 61 63 71 73
+ 02 06 12 16 22 26 32 36
+ 42 46 52 56 62 66 72 76
+ 05 07 15 17 25 27 35 37
+ 45 47 55 57 65 67 75 77
+*/
+
+"9: \n\t"
+ :: "r" (block), "r" (temp), "r" (coeffs)
+ : "%eax"
+ );
+}
+
+void ff_simple_idct_mmx(int16_t *block)
+{
+ idct(block);
+}
+
+//FIXME merge add/put into the idct
+
+void ff_simple_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ idct(block);
+ put_pixels_clamped_mmx(block, dest, line_size);
+}
+void ff_simple_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block)
+{
+ idct(block);
+ add_pixels_clamped_mmx(block, dest, line_size);
+}
diff --git a/libavcodec/i386/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c
similarity index 100%
rename from libavcodec/i386/snowdsp_mmx.c
rename to libavcodec/x86/snowdsp_mmx.c
diff --git a/libavcodec/i386/vc1dsp_mmx.c b/libavcodec/x86/vc1dsp_mmx.c
similarity index 100%
rename from libavcodec/i386/vc1dsp_mmx.c
rename to libavcodec/x86/vc1dsp_mmx.c
diff --git a/libavcodec/i386/vp3dsp_mmx.c b/libavcodec/x86/vp3dsp_mmx.c
similarity index 100%
rename from libavcodec/i386/vp3dsp_mmx.c
rename to libavcodec/x86/vp3dsp_mmx.c
diff --git a/libavcodec/x86/vp3dsp_mmx.h b/libavcodec/x86/vp3dsp_mmx.h
new file mode 100644
index 0000000..e565a33
--- /dev/null
+++ b/libavcodec/x86/vp3dsp_mmx.h
@@ -0,0 +1,35 @@
+/*
+ * vp3dsp MMX function declarations
+ * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP3DSP_MMX_H
+#define AVCODEC_X86_VP3DSP_MMX_H
+
+#include <stdint.h>
+#include "libavcodec/dsputil.h"
+
+void ff_vp3_idct_mmx(int16_t *data);
+void ff_vp3_idct_put_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
+
+void ff_vp3_v_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+void ff_vp3_h_loop_filter_mmx2(uint8_t *src, int stride, int *bounding_values);
+
+#endif /* AVCODEC_X86_VP3DSP_MMX_H */
diff --git a/libavcodec/i386/vp3dsp_sse2.c b/libavcodec/x86/vp3dsp_sse2.c
similarity index 100%
rename from libavcodec/i386/vp3dsp_sse2.c
rename to libavcodec/x86/vp3dsp_sse2.c
diff --git a/libavcodec/x86/vp3dsp_sse2.h b/libavcodec/x86/vp3dsp_sse2.h
new file mode 100644
index 0000000..9094620
--- /dev/null
+++ b/libavcodec/x86/vp3dsp_sse2.h
@@ -0,0 +1,31 @@
+/*
+ * vp3dsp SSE2 function declarations
+ * Copyright (c) 2007 Aurelien Jacobs <aurel at gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_X86_VP3DSP_SSE2_H
+#define AVCODEC_X86_VP3DSP_SSE2_H
+
+#include "libavcodec/dsputil.h"
+
+void ff_vp3_idct_sse2(int16_t *input_data);
+void ff_vp3_idct_put_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_vp3_idct_add_sse2(uint8_t *dest, int line_size, DCTELEM *block);
+
+#endif /* AVCODEC_X86_VP3DSP_SSE2_H */
diff --git a/libavcodec/x86/x86inc.asm b/libavcodec/x86/x86inc.asm
new file mode 100644
index 0000000..3729b5b
--- /dev/null
+++ b/libavcodec/x86/x86inc.asm
@@ -0,0 +1,540 @@
+;*****************************************************************************
+;* x86inc.asm
+;*****************************************************************************
+;* Copyright (C) 2005-2008 Loren Merritt <lorenm at u.washington.edu>
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;*****************************************************************************
+
+; FIXME: All of the 64bit asm functions that take a stride as an argument
+; via register, assume that the high dword of that register is filled with 0.
+; This is true in practice (since we never do any 64bit arithmetic on strides,
+; and x264's strides are all positive), but is not guaranteed by the ABI.
+
+; Name of the .rodata section.
+; Kludge: Something on OS X fails to align .rodata even given an align attribute,
+; so use a different read-only section.
+%macro SECTION_RODATA 0
+ %ifidn __OUTPUT_FORMAT__,macho64
+ SECTION .text align=16
+ %elifidn __OUTPUT_FORMAT__,macho
+ SECTION .text align=16
+ fakegot:
+ %else
+ SECTION .rodata align=16
+ %endif
+%endmacro
+
+; PIC support macros. All these macros are totally harmless when PIC is
+; not defined but can ruin everything if misused in PIC mode. On x86_32, shared
+; objects cannot directly access global variables by address, they need to
+; go through the GOT (global offset table). Most OSes do not care about it
+; and let you load non-shared .so objects (Linux, Win32...). However, OS X
+; requires PIC code in its .dylib objects.
+;
+; - GLOBAL should be used as a suffix for global addressing, eg.
+; picgetgot ebx
+; mov eax, [foo GLOBAL]
+; instead of
+; mov eax, [foo]
+;
+; - picgetgot computes the GOT address into the given register in PIC
+; mode, otherwise does nothing. You need to do this before using GLOBAL.
+; Before in both execution order and compiled code order (so GLOBAL knows
+; which register the GOT is in).
+
+%ifndef PIC
+ %define GLOBAL
+ %macro picgetgot 1
+ %endmacro
+%elifdef ARCH_X86_64
+ %define PIC64
+ %define GLOBAL wrt rip
+ %macro picgetgot 1
+ %endmacro
+%else
+ %define PIC32
+ %ifidn __OUTPUT_FORMAT__,macho
+ ; There is no real global offset table on OS X, but we still
+ ; need to reference our variables by offset.
+ %macro picgetgot 1
+ call %%getgot
+ %%getgot:
+ pop %1
+ add %1, $$ - %%getgot
+ %undef GLOBAL
+ %define GLOBAL + %1 - fakegot
+ %endmacro
+ %else ; elf
+ extern _GLOBAL_OFFSET_TABLE_
+ %macro picgetgot 1
+ call %%getgot
+ %%getgot:
+ pop %1
+ add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%getgot wrt ..gotpc
+ %undef GLOBAL
+ %define GLOBAL + %1 wrt ..gotoff
+ %endmacro
+ %endif
+%endif
+
+; Macros to eliminate most code duplication between x86_32 and x86_64:
+; Currently this works only for leaf functions which load all their arguments
+; into registers at the start, and make no other use of the stack. Luckily that
+; covers most of x264's asm.
+
+; PROLOGUE:
+; %1 = number of arguments. loads them from stack if needed.
+; %2 = number of registers used, not including PIC. pushes callee-saved regs if needed.
+; %3 = whether global constants are used in this function. inits x86_32 PIC if needed.
+; %4 = list of names to define to registers
+; PROLOGUE can also be invoked by adding the same options to cglobal
+
+; e.g.
+; cglobal foo, 2,3,0, dst, src, tmp
+; declares a function (foo), taking two args (dst and src), one local variable (tmp), and not using globals
+
+; TODO Some functions can use some args directly from the stack. If they're the
+; last args then you can just not declare them, but if they're in the middle
+; we need more flexible macro.
+
+; RET:
+; Pops anything that was pushed by PROLOGUE
+
+; REP_RET:
+; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
+; which are slow when a normal ret follows a branch.
+
+%macro DECLARE_REG 6
+ %define r%1q %2
+ %define r%1d %3
+ %define r%1w %4
+ %define r%1b %5
+ %define r%1m %6
+ %define r%1 %2
+%endmacro
+
+%macro DECLARE_REG_SIZE 2
+ %define r%1q r%1
+ %define e%1q r%1
+ %define r%1d e%1
+ %define e%1d e%1
+ %define r%1w %1
+ %define e%1w %1
+ %define r%1b %2
+ %define e%1b %2
+%ifndef ARCH_X86_64
+ %define r%1 e%1
+%endif
+%endmacro
+
+DECLARE_REG_SIZE ax, al
+DECLARE_REG_SIZE bx, bl
+DECLARE_REG_SIZE cx, cl
+DECLARE_REG_SIZE dx, dl
+DECLARE_REG_SIZE si, sil
+DECLARE_REG_SIZE di, dil
+DECLARE_REG_SIZE bp, bpl
+
+%ifdef ARCH_X86_64
+ %define gprsize 8
+%else
+ %define gprsize 4
+%endif
+
+%macro PUSH 1
+ push %1
+ %assign stack_offset stack_offset+gprsize
+%endmacro
+
+%macro POP 1
+ pop %1
+ %assign stack_offset stack_offset-gprsize
+%endmacro
+
+%macro SUB 2
+ sub %1, %2
+ %ifidn %1, rsp
+ %assign stack_offset stack_offset+(%2)
+ %endif
+%endmacro
+
+%macro ADD 2
+ add %1, %2
+ %ifidn %1, rsp
+ %assign stack_offset stack_offset-(%2)
+ %endif
+%endmacro
+
+%macro movifnidn 2
+ %ifnidn %1, %2
+ mov %1, %2
+ %endif
+%endmacro
+
+%macro movsxdifnidn 2
+ %ifnidn %1, %2
+ movsxd %1, %2
+ %endif
+%endmacro
+
+%macro ASSERT 1
+ %if (%1) == 0
+ %error assert failed
+ %endif
+%endmacro
+
+%macro DEFINE_ARGS 0-*
+ %ifdef n_arg_names
+ %assign %%i 0
+ %rep n_arg_names
+ CAT_UNDEF arg_name %+ %%i, q
+ CAT_UNDEF arg_name %+ %%i, d
+ CAT_UNDEF arg_name %+ %%i, w
+ CAT_UNDEF arg_name %+ %%i, b
+ CAT_UNDEF arg_name, %%i
+ %assign %%i %%i+1
+ %endrep
+ %endif
+
+ %assign %%i 0
+ %rep %0
+ %xdefine %1q r %+ %%i %+ q
+ %xdefine %1d r %+ %%i %+ d
+ %xdefine %1w r %+ %%i %+ w
+ %xdefine %1b r %+ %%i %+ b
+ CAT_XDEFINE arg_name, %%i, %1
+ %assign %%i %%i+1
+ %rotate 1
+ %endrep
+ %assign n_arg_names %%i
+%endmacro
+
+%ifdef ARCH_X86_64 ;==========================================================
+%ifidn __OUTPUT_FORMAT__,win32
+
+DECLARE_REG 0, rcx, ecx, cx, cl, ecx
+DECLARE_REG 1, rdx, edx, dx, dl, edx
+DECLARE_REG 2, r8, r8d, r8w, r8b, r8d
+DECLARE_REG 3, r9, r9d, r9w, r9b, r9d
+DECLARE_REG 4, rdi, edi, di, dil, [rsp + stack_offset + 40]
+DECLARE_REG 5, rsi, esi, si, sil, [rsp + stack_offset + 48]
+DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 56]
+%define r7m [rsp + stack_offset + 64]
+%define r8m [rsp + stack_offset + 72]
+
+%macro LOAD_IF_USED 2 ; reg_id, number_of_args
+ %if %1 < %2
+ mov r%1, [rsp + 8 + %1*8]
+ %endif
+%endmacro
+
+%else ;=======================================================================
+
+DECLARE_REG 0, rdi, edi, di, dil, edi
+DECLARE_REG 1, rsi, esi, si, sil, esi
+DECLARE_REG 2, rdx, edx, dx, dl, edx
+DECLARE_REG 3, rcx, ecx, cx, cl, ecx
+DECLARE_REG 4, r8, r8d, r8w, r8b, r8d
+DECLARE_REG 5, r9, r9d, r9w, r9b, r9d
+DECLARE_REG 6, rax, eax, ax, al, [rsp + stack_offset + 8]
+%define r7m [rsp + stack_offset + 16]
+%define r8m [rsp + stack_offset + 24]
+
+%macro LOAD_IF_USED 2 ; reg_id, number_of_args
+ %if %1 < %2
+ mov r%1, [rsp - 40 + %1*8]
+ %endif
+%endmacro
+
+%endif ; !WIN64
+
+%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
+ ASSERT %2 >= %1
+ ASSERT %2 <= 7
+ %assign stack_offset 0
+%ifidn __OUTPUT_FORMAT__,win32
+ LOAD_IF_USED 4, %1
+ LOAD_IF_USED 5, %1
+%endif
+ LOAD_IF_USED 6, %1
+ DEFINE_ARGS %4
+%endmacro
+
+%macro RET 0
+ ret
+%endmacro
+
+%macro REP_RET 0
+ rep ret
+%endmacro
+
+%else ; X86_32 ;==============================================================
+
+DECLARE_REG 0, eax, eax, ax, al, [esp + stack_offset + 4]
+DECLARE_REG 1, ecx, ecx, cx, cl, [esp + stack_offset + 8]
+DECLARE_REG 2, edx, edx, dx, dl, [esp + stack_offset + 12]
+DECLARE_REG 3, ebx, ebx, bx, bl, [esp + stack_offset + 16]
+DECLARE_REG 4, esi, esi, si, null, [esp + stack_offset + 20]
+DECLARE_REG 5, edi, edi, di, null, [esp + stack_offset + 24]
+DECLARE_REG 6, ebp, ebp, bp, null, [esp + stack_offset + 28]
+%define r7m [esp + stack_offset + 32]
+%define r8m [esp + stack_offset + 36]
+%define rsp esp
+
+%macro PUSH_IF_USED 1 ; reg_id
+ %if %1 < regs_used
+ push r%1
+ %assign stack_offset stack_offset+4
+ %endif
+%endmacro
+
+%macro POP_IF_USED 1 ; reg_id
+ %if %1 < regs_used
+ pop r%1
+ %endif
+%endmacro
+
+%macro LOAD_IF_USED 2 ; reg_id, number_of_args
+ %if %1 < %2
+ mov r%1, [esp + stack_offset + 4 + %1*4]
+ %endif
+%endmacro
+
+%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
+ ASSERT %2 >= %1
+ %assign stack_offset 0
+ %assign regs_used %2
+ %ifdef PIC
+ %if %3
+ %assign regs_used regs_used+1
+ %endif
+ %endif
+ ASSERT regs_used <= 7
+ PUSH_IF_USED 3
+ PUSH_IF_USED 4
+ PUSH_IF_USED 5
+ PUSH_IF_USED 6
+ LOAD_IF_USED 0, %1
+ LOAD_IF_USED 1, %1
+ LOAD_IF_USED 2, %1
+ LOAD_IF_USED 3, %1
+ LOAD_IF_USED 4, %1
+ LOAD_IF_USED 5, %1
+ LOAD_IF_USED 6, %1
+ %if %3
+ picgetgot r%2
+ %endif
+ DEFINE_ARGS %4
+%endmacro
+
+%macro RET 0
+ POP_IF_USED 6
+ POP_IF_USED 5
+ POP_IF_USED 4
+ POP_IF_USED 3
+ ret
+%endmacro
+
+%macro REP_RET 0
+ %if regs_used > 3
+ RET
+ %else
+ rep ret
+ %endif
+%endmacro
+
+%endif ;======================================================================
+
+
+
+;=============================================================================
+; arch-independent part
+;=============================================================================
+
+%assign function_align 16
+
+; Symbol prefix for C linkage
+%macro cglobal 1-2+
+ %xdefine %1 ff_%1
+ %ifdef PREFIX
+ %xdefine %1 _ %+ %1
+ %endif
+ %ifidn __OUTPUT_FORMAT__,elf
+ global %1:function hidden
+ %else
+ global %1
+ %endif
+ align function_align
+ %1:
+ RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
+ %if %0 > 1
+ PROLOGUE %2
+ %endif
+%endmacro
+
+%macro cextern 1
+ %ifdef PREFIX
+ extern _%1
+ %define %1 _%1
+ %else
+ extern %1
+ %endif
+%endmacro
+
+; This is needed for ELF, otherwise the GNU linker assumes the stack is
+; executable by default.
+%ifidn __OUTPUT_FORMAT__,elf
+SECTION .note.GNU-stack noalloc noexec nowrite progbits
+%endif
+
+%assign FENC_STRIDE 16
+%assign FDEC_STRIDE 32
+
+; merge mmx and sse*
+
+%macro CAT_XDEFINE 3
+ %xdefine %1%2 %3
+%endmacro
+
+%macro CAT_UNDEF 2
+ %undef %1%2
+%endmacro
+
+%macro INIT_MMX 0
+ %define RESET_MM_PERMUTATION INIT_MMX
+ %define mmsize 8
+ %define num_mmregs 8
+ %define mova movq
+ %define movu movq
+ %define movh movd
+ %define movnt movntq
+ %assign %%i 0
+ %rep 8
+ CAT_XDEFINE m, %%i, mm %+ %%i
+ CAT_XDEFINE nmm, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+ %rep 8
+ CAT_UNDEF m, %%i
+ CAT_UNDEF nmm, %%i
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+%macro INIT_XMM 0
+ %define RESET_MM_PERMUTATION INIT_XMM
+ %define mmsize 16
+ %define num_mmregs 8
+ %ifdef ARCH_X86_64
+ %define num_mmregs 16
+ %endif
+ %define mova movdqa
+ %define movu movdqu
+ %define movh movq
+ %define movnt movntdq
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, xmm %+ %%i
+ CAT_XDEFINE nxmm, %%i, %%i
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+INIT_MMX
+
+; I often want to use macros that permute their arguments. e.g. there's no
+; efficient way to implement butterfly or transpose or dct without swapping some
+; arguments.
+;
+; I would like to not have to manually keep track of the permutations:
+; If I insert a permutation in the middle of a function, it should automatically
+; change everything that follows. For more complex macros I may also have multiple
+; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations.
+;
+; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that
+; permutes its arguments. It's equivalent to exchanging the contents of the
+; registers, except that this way you exchange the register names instead, so it
+; doesn't cost any cycles.
+
+%macro PERMUTE 2-* ; takes a list of pairs to swap
+%rep %0/2
+ %xdefine tmp%2 m%2
+ %xdefine ntmp%2 nm%2
+ %rotate 2
+%endrep
+%rep %0/2
+ %xdefine m%1 tmp%2
+ %xdefine nm%1 ntmp%2
+ %undef tmp%2
+ %undef ntmp%2
+ %rotate 2
+%endrep
+%endmacro
+
+%macro SWAP 2-* ; swaps a single chain (sometimes more concise than pairs)
+%rep %0-1
+%ifdef m%1
+ %xdefine tmp m%1
+ %xdefine m%1 m%2
+ %xdefine m%2 tmp
+ CAT_XDEFINE n, m%1, %1
+ CAT_XDEFINE n, m%2, %2
+%else
+ ; If we were called as "SWAP m0,m1" rather than "SWAP 0,1" infer the original numbers here.
+ ; Be careful using this mode in nested macros though, as in some cases there may be
+ ; other copies of m# that have already been dereferenced and don't get updated correctly.
+ %xdefine %%n1 n %+ %1
+ %xdefine %%n2 n %+ %2
+ %xdefine tmp m %+ %%n1
+ CAT_XDEFINE m, %%n1, m %+ %%n2
+ CAT_XDEFINE m, %%n2, tmp
+ CAT_XDEFINE n, m %+ %%n1, %%n1
+ CAT_XDEFINE n, m %+ %%n2, %%n2
+%endif
+ %undef tmp
+ %rotate 1
+%endrep
+%endmacro
+
+%macro SAVE_MM_PERMUTATION 1
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE %1_m, %%i, m %+ %%i
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+%macro LOAD_MM_PERMUTATION 1
+ %assign %%i 0
+ %rep num_mmregs
+ CAT_XDEFINE m, %%i, %1_m %+ %%i
+ %assign %%i %%i+1
+ %endrep
+%endmacro
+
+%macro call 1
+ call %1
+ %ifdef %1_m0
+ LOAD_MM_PERMUTATION %1
+ %endif
+%endmacro
+
+; substitutions which are functionally identical but reduce code size
+%define movdqa movaps
+%define movdqu movups
+
diff --git a/libavcodec/x86/x86util.asm b/libavcodec/x86/x86util.asm
new file mode 100644
index 0000000..2e318ef
--- /dev/null
+++ b/libavcodec/x86/x86util.asm
@@ -0,0 +1,240 @@
+;*****************************************************************************
+;* x86inc.asm
+;*****************************************************************************
+;* Copyright (C) 2008 Loren Merritt <lorenm at u.washington.edu>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+;*****************************************************************************
+
+%macro SBUTTERFLY 4
+ mova m%4, m%2
+ punpckl%1 m%2, m%3
+ punpckh%1 m%4, m%3
+ SWAP %3, %4
+%endmacro
+
+%macro TRANSPOSE4x4W 5
+ SBUTTERFLY wd, %1, %2, %5
+ SBUTTERFLY wd, %3, %4, %5
+ SBUTTERFLY dq, %1, %3, %5
+ SBUTTERFLY dq, %2, %4, %5
+ SWAP %2, %3
+%endmacro
+
+%macro TRANSPOSE2x4x4W 5
+ SBUTTERFLY wd, %1, %2, %5
+ SBUTTERFLY wd, %3, %4, %5
+ SBUTTERFLY dq, %1, %3, %5
+ SBUTTERFLY dq, %2, %4, %5
+ SBUTTERFLY qdq, %1, %2, %5
+ SBUTTERFLY qdq, %3, %4, %5
+%endmacro
+
+%macro TRANSPOSE4x4D 5
+ SBUTTERFLY dq, %1, %2, %5
+ SBUTTERFLY dq, %3, %4, %5
+ SBUTTERFLY qdq, %1, %3, %5
+ SBUTTERFLY qdq, %2, %4, %5
+ SWAP %2, %3
+%endmacro
+
+%macro TRANSPOSE8x8W 9-11
+%ifdef ARCH_X86_64
+ SBUTTERFLY wd, %1, %2, %9
+ SBUTTERFLY wd, %3, %4, %9
+ SBUTTERFLY wd, %5, %6, %9
+ SBUTTERFLY wd, %7, %8, %9
+ SBUTTERFLY dq, %1, %3, %9
+ SBUTTERFLY dq, %2, %4, %9
+ SBUTTERFLY dq, %5, %7, %9
+ SBUTTERFLY dq, %6, %8, %9
+ SBUTTERFLY qdq, %1, %5, %9
+ SBUTTERFLY qdq, %2, %6, %9
+ SBUTTERFLY qdq, %3, %7, %9
+ SBUTTERFLY qdq, %4, %8, %9
+ SWAP %2, %5
+ SWAP %4, %7
+%else
+; in: m0..m7, unless %11 in which case m6 is in %9
+; out: m0..m7, unless %11 in which case m4 is in %10
+; spills into %9 and %10
+%if %0<11
+ movdqa %9, m%7
+%endif
+ SBUTTERFLY wd, %1, %2, %7
+ movdqa %10, m%2
+ movdqa m%7, %9
+ SBUTTERFLY wd, %3, %4, %2
+ SBUTTERFLY wd, %5, %6, %2
+ SBUTTERFLY wd, %7, %8, %2
+ SBUTTERFLY dq, %1, %3, %2
+ movdqa %9, m%3
+ movdqa m%2, %10
+ SBUTTERFLY dq, %2, %4, %3
+ SBUTTERFLY dq, %5, %7, %3
+ SBUTTERFLY dq, %6, %8, %3
+ SBUTTERFLY qdq, %1, %5, %3
+ SBUTTERFLY qdq, %2, %6, %3
+ movdqa %10, m%2
+ movdqa m%3, %9
+ SBUTTERFLY qdq, %3, %7, %2
+ SBUTTERFLY qdq, %4, %8, %2
+ SWAP %2, %5
+ SWAP %4, %7
+%if 0<11
+ movdqa m%5, %10
+%endif
+%endif
+%endmacro
+
+%macro ABS1_MMX 2 ; a, tmp
+ pxor %2, %2
+ psubw %2, %1
+ pmaxsw %1, %2
+%endmacro
+
+%macro ABS2_MMX 4 ; a, b, tmp0, tmp1
+ pxor %3, %3
+ pxor %4, %4
+ psubw %3, %1
+ psubw %4, %2
+ pmaxsw %1, %3
+ pmaxsw %2, %4
+%endmacro
+
+%macro ABS1_SSSE3 2
+ pabsw %1, %1
+%endmacro
+
+%macro ABS2_SSSE3 4
+ pabsw %1, %1
+ pabsw %2, %2
+%endmacro
+
+%define ABS1 ABS1_MMX
+%define ABS2 ABS2_MMX
+
+%macro ABS4 6
+ ABS2 %1, %2, %5, %6
+ ABS2 %3, %4, %5, %6
+%endmacro
+
+%macro SPLATB_MMX 3
+ movd %1, [%2-3] ;to avoid crossing a cacheline
+ punpcklbw %1, %1
+%if mmsize==16
+ pshuflw %1, %1, 0xff
+ punpcklqdq %1, %1
+%else
+ pshufw %1, %1, 0xff
+%endif
+%endmacro
+
+%macro SPLATB_SSSE3 3
+ movd %1, [%2-3]
+ pshufb %1, %3
+%endmacro
+
+%macro PALIGNR_MMX 4
+ %ifnidn %4, %2
+ mova %4, %2
+ %endif
+ %if mmsize == 8
+ psllq %1, (8-%3)*8
+ psrlq %4, %3*8
+ %else
+ pslldq %1, 16-%3
+ psrldq %4, %3
+ %endif
+ por %1, %4
+%endmacro
+
+%macro PALIGNR_SSSE3 4
+ palignr %1, %2, %3
+%endmacro
+
+%macro SUMSUB_BA 2
+ paddw %1, %2
+ paddw %2, %2
+ psubw %2, %1
+%endmacro
+
+%macro SUMSUB_BADC 4
+ paddw %1, %2
+ paddw %3, %4
+ paddw %2, %2
+ paddw %4, %4
+ psubw %2, %1
+ psubw %4, %3
+%endmacro
+
+%macro HADAMARD8_1D 8
+ SUMSUB_BADC %1, %5, %2, %6
+ SUMSUB_BADC %3, %7, %4, %8
+ SUMSUB_BADC %1, %3, %2, %4
+ SUMSUB_BADC %5, %7, %6, %8
+ SUMSUB_BADC %1, %2, %3, %4
+ SUMSUB_BADC %5, %6, %7, %8
+%endmacro
+
+%macro SUMSUB2_AB 3
+ mova %3, %1
+ paddw %1, %1
+ paddw %1, %2
+ psubw %3, %2
+ psubw %3, %2
+%endmacro
+
+%macro SUMSUBD2_AB 4
+ mova %4, %1
+ mova %3, %2
+ psraw %2, 1
+ psraw %4, 1
+ paddw %1, %2
+ psubw %4, %3
+%endmacro
+
+%macro LOAD_DIFF 5
+%ifidn %3, none
+ movh %1, %4
+ movh %2, %5
+ punpcklbw %1, %2
+ punpcklbw %2, %2
+ psubw %1, %2
+%else
+ movh %1, %4
+ punpcklbw %1, %3
+ movh %2, %5
+ punpcklbw %2, %3
+ psubw %1, %2
+%endif
+%endmacro
+
+%macro LOAD_DIFF_8x4P 6-8 r0,r2 ; 4x dest, 2x temp, 2x pointer
+ LOAD_DIFF %1, %5, none, [%7], [%8]
+ LOAD_DIFF %2, %6, none, [%7+r1], [%8+r3]
+ LOAD_DIFF %3, %5, none, [%7+2*r1], [%8+2*r3]
+ LOAD_DIFF %4, %6, none, [%7+r4], [%8+r5]
+%endmacro
+
+%macro STORE_DIFF 4
+ psraw %1, 6
+ movh %2, %4
+ punpcklbw %2, %3
+ paddsw %1, %2
+ packuswb %1, %1
+ movh %4, %1
+%endmacro
+
diff --git a/libavcodec/xsubdec.c b/libavcodec/xsubdec.c
index 5f7d8fd..9ac315b 100644
--- a/libavcodec/xsubdec.c
+++ b/libavcodec/xsubdec.c
@@ -80,31 +80,32 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size,
// allocate sub and set values
if (!sub->rects) {
- sub->rects = av_mallocz(sizeof(AVSubtitleRect));
+ sub->rects = av_mallocz(sizeof(*sub->rects));
+ sub->rects[0] = av_mallocz(sizeof(*sub->rects[0]));
sub->num_rects = 1;
}
- av_freep(&sub->rects[0].bitmap);
- sub->rects[0].x = x; sub->rects[0].y = y;
- sub->rects[0].w = w; sub->rects[0].h = h;
- sub->rects[0].linesize = w;
- sub->rects[0].bitmap = av_malloc(w * h);
- sub->rects[0].nb_colors = 4;
- sub->rects[0].rgba_palette = av_malloc(sub->rects[0].nb_colors * 4);
+ av_freep(&sub->rects[0]->pict.data[0]);
+ sub->rects[0]->x = x; sub->rects[0]->y = y;
+ sub->rects[0]->w = w; sub->rects[0]->h = h;
+ sub->rects[0]->pict.linesize[0] = w;
+ sub->rects[0]->pict.data[0] = av_malloc(w * h);
+ sub->rects[0]->nb_colors = 4;
+ sub->rects[0]->pict.data[1] = av_malloc(sub->rects[0]->nb_colors * 4);
// read palette
- for (i = 0; i < sub->rects[0].nb_colors; i++)
- sub->rects[0].rgba_palette[i] = bytestream_get_be24(&buf);
+ for (i = 0; i < sub->rects[0]->nb_colors; i++)
+ ((uint32_t*)sub->rects[0]->pict.data[1])[i] = bytestream_get_be24(&buf);
// make all except background (first entry) non-transparent
- for (i = 1; i < sub->rects[0].nb_colors; i++)
- sub->rects[0].rgba_palette[i] |= 0xff000000;
+ for (i = 1; i < sub->rects[0]->nb_colors; i++)
+ ((uint32_t*)sub->rects[0]->pict.data[1])[i] |= 0xff000000;
// process RLE-compressed data
rlelen = FFMIN(rlelen, buf_end - buf);
init_get_bits(&gb, buf, rlelen * 8);
- bitmap = sub->rects[0].bitmap;
+ bitmap = sub->rects[0]->pict.data[0];
for (y = 0; y < h; y++) {
// interlaced: do odd lines
- if (y == (h + 1) / 2) bitmap = sub->rects[0].bitmap + w;
+ if (y == (h + 1) / 2) bitmap = sub->rects[0]->pict.data[0] + w;
for (x = 0; x < w; ) {
int log2 = ff_log2_tab[show_bits(&gb, 8)];
int run = get_bits(&gb, 14 - 4 * (log2 >> 1));
diff --git a/libavcodec/xvmc_render.h b/libavcodec/xvmc_render.h
index 8db4357..d9aa154 100644
--- a/libavcodec/xvmc_render.h
+++ b/libavcodec/xvmc_render.h
@@ -29,47 +29,47 @@
#include <X11/extensions/XvMClib.h>
-//the surface should be shown, video driver manipulates this
+//the surface should be shown, the video driver manipulates this
#define MP_XVMC_STATE_DISPLAY_PENDING 1
-//the surface is needed for prediction, codec manipulates this
+//the surface is needed for prediction, the codec manipulates this
#define MP_XVMC_STATE_PREDICTION 2
//this surface is needed for subpicture rendering
#define MP_XVMC_STATE_OSD_SOURCE 4
// 1337 IDCT MCo
#define MP_XVMC_RENDER_MAGIC 0x1DC711C0
-typedef struct{
-//these are not changed by the decoder!
- int magic;
+struct xvmc_render_state {
+ //these are not changed by the decoder!
+ int magic;
- short * data_blocks;
- XvMCMacroBlock * mv_blocks;
- int total_number_of_mv_blocks;
- int total_number_of_data_blocks;
- int mc_type;//XVMC_MPEG1/2/4,XVMC_H263 without XVMC_IDCT
- int idct;//Do we use IDCT acceleration?
- int chroma_format;//420,422,444
- int unsigned_intra;//+-128 for intra pictures after clip
- XvMCSurface* p_surface;//pointer to rendered surface, never changed
+ short * data_blocks;
+ XvMCMacroBlock * mv_blocks;
+ int total_number_of_mv_blocks;
+ int total_number_of_data_blocks;
+ int mc_type; //XVMC_MPEG1/2/4,XVMC_H263 without XVMC_IDCT
+ int idct; //Do we use IDCT acceleration?
+ int chroma_format; //420, 422, 444
+ int unsigned_intra; //+-128 for intra pictures after clipping
+ XvMCSurface* p_surface; //pointer to rendered surface, never changed
-//these are changed by decoder
-//used by XvMCRenderSurface function
- XvMCSurface* p_past_surface;//pointer to the past surface
- XvMCSurface* p_future_surface;//pointer to the future prediction surface
+ //these are changed by the decoder
+ //used by the XvMCRenderSurface function
+ XvMCSurface* p_past_surface; //pointer to the past surface
+ XvMCSurface* p_future_surface; //pointer to the future prediction surface
- unsigned int picture_structure;//top/bottom fields or frame!
- unsigned int flags;//XVMC_SECOND_FIELD - 1'st or 2'd field in the sequence
- unsigned int display_flags; //1,2 or 1+2 fields for XvMCPutSurface,
+ unsigned int picture_structure; //top/bottom fields or frame!
+ unsigned int flags; //XVMC_SECOND_FIELD - 1st or 2nd field in the sequence
+ unsigned int display_flags; //1,2 or 1+2 fields for XvMCPutSurface
-//these are internal communication ones
- int state;//0-free, 1 Waiting to Display, 2 Waiting for prediction
- int start_mv_blocks_num;//offset in the array for the current slice, updated by vo
- int filled_mv_blocks_num;//processed mv block in this slice, changed by decoder
+ //these are for internal communication
+ int state; //0 - free, 1 - waiting to display, 2 - waiting for prediction
+ int start_mv_blocks_num; //offset in the array for the current slice, updated by vo
+ int filled_mv_blocks_num; //processed mv block in this slice, changed by decoder
- int next_free_data_block_num;//used in add_mv_block, pointer to next free block
-//extensions
- void * p_osd_target_surface_render;//pointer to the surface where subpicture is rendered
+ int next_free_data_block_num; //used in add_mv_block, pointer to next free block
+ //extensions
+ void * p_osd_target_surface_render; //pointer to the surface where subpicture is rendered
-} xvmc_render_state_t;
+};
#endif /* AVCODEC_XVMC_RENDER_H */
diff --git a/libavcodec/xvmcvideo.c b/libavcodec/xvmcvideo.c
index 93d93a6..9c4d8d8 100644
--- a/libavcodec/xvmcvideo.c
+++ b/libavcodec/xvmcvideo.c
@@ -39,8 +39,8 @@
//set s->block
void XVMC_init_block(MpegEncContext *s){
-xvmc_render_state_t * render;
- render = (xvmc_render_state_t*)s->current_picture.data[2];
+ struct xvmc_render_state * render;
+ render = (struct xvmc_render_state*)s->current_picture.data[2];
assert(render != NULL);
if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) ){
assert(0);
@@ -69,11 +69,11 @@ const int mb_block_count = 4+(1<<s->chroma_format);
//These functions should be called on every new field and/or frame.
//They should be safe if they are called a few times for the same field!
int XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx){
-xvmc_render_state_t * render,* last, * next;
+ struct xvmc_render_state * render, * last, * next;
assert(avctx != NULL);
- render = (xvmc_render_state_t*)s->current_picture.data[2];
+ render = (struct xvmc_render_state*)s->current_picture.data[2];
assert(render != NULL);
if( (render == NULL) || (render->magic != MP_XVMC_RENDER_MAGIC) )
return -1;//make sure that this is render packet
@@ -91,7 +91,7 @@ xvmc_render_state_t * render,* last, * next;
case FF_I_TYPE:
return 0;// no prediction from other frames
case FF_B_TYPE:
- next = (xvmc_render_state_t*)s->next_picture.data[2];
+ next = (struct xvmc_render_state*)s->next_picture.data[2];
assert(next!=NULL);
assert(next->state & MP_XVMC_STATE_PREDICTION);
if(next == NULL) return -1;
@@ -99,7 +99,7 @@ xvmc_render_state_t * render,* last, * next;
render->p_future_surface = next->p_surface;
//no return here, going to set forward prediction
case FF_P_TYPE:
- last = (xvmc_render_state_t*)s->last_picture.data[2];
+ last = (struct xvmc_render_state*)s->last_picture.data[2];
if(last == NULL)// && !s->first_field)
last = render;//predict second field from the first
if(last->magic != MP_XVMC_RENDER_MAGIC) return -1;
@@ -112,8 +112,8 @@ return -1;
}
void XVMC_field_end(MpegEncContext *s){
-xvmc_render_state_t * render;
- render = (xvmc_render_state_t*)s->current_picture.data[2];
+ struct xvmc_render_state * render;
+ render = (struct xvmc_render_state*)s->current_picture.data[2];
assert(render != NULL);
if(render->filled_mv_blocks_num > 0){
@@ -124,7 +124,7 @@ xvmc_render_state_t * render;
void XVMC_decode_mb(MpegEncContext *s){
XvMCMacroBlock * mv_block;
-xvmc_render_state_t * render;
+struct xvmc_render_state * render;
int i,cbp,blocks_per_mb;
const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
@@ -152,7 +152,7 @@ const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
s->current_picture.qscale_table[mb_xy] = s->qscale;
//START OF XVMC specific code
- render = (xvmc_render_state_t*)s->current_picture.data[2];
+ render = (struct xvmc_render_state*)s->current_picture.data[2];
assert(render!=NULL);
assert(render->magic==MP_XVMC_RENDER_MAGIC);
assert(render->mv_blocks);
diff --git a/libavdevice/bktr.c b/libavdevice/bktr.c
index 7b37f11..ea9a6e2 100644
--- a/libavdevice/bktr.c
+++ b/libavdevice/bktr.c
@@ -24,6 +24,7 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#define _BSD_SOURCE 1
#include "libavformat/avformat.h"
#if defined (HAVE_DEV_BKTR_IOCTL_METEOR_H) && defined (HAVE_DEV_BKTR_IOCTL_BT848_H)
# include <dev/bktr/ioctl_meteor.h>
diff --git a/libavdevice/v4l.c b/libavdevice/v4l.c
index a469ae0..c2c67d0 100644
--- a/libavdevice/v4l.c
+++ b/libavdevice/v4l.c
@@ -21,6 +21,7 @@
#undef __STRICT_ANSI__ //workaround due to broken kernel headers
#include "config.h"
+#include "libavutil/rational.h"
#include "libavformat/avformat.h"
#include "libavcodec/dsputil.h"
#include <unistd.h>
@@ -37,13 +38,12 @@ typedef struct {
int fd;
int frame_format; /* see VIDEO_PALETTE_xxx */
int use_mmap;
- int width, height;
- int frame_rate;
- int frame_rate_base;
+ AVRational time_base;
int64_t time_frame;
int frame_size;
struct video_capability video_cap;
struct video_audio audio_saved;
+ struct video_window video_win;
uint8_t *video_buf;
struct video_mbuf gb_buffers;
struct video_mmap gb_buf;
@@ -70,9 +70,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
{
VideoData *s = s1->priv_data;
AVStream *st;
- int width, height;
- int video_fd, frame_size;
- int ret, frame_rate, frame_rate_base;
+ int video_fd;
int desired_palette, desired_depth;
struct video_tuner tuner;
struct video_audio audio;
@@ -80,44 +78,27 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
int j;
int vformat_num = FF_ARRAY_ELEMS(video_formats);
- if (ap->width <= 0 || ap->height <= 0) {
- av_log(s1, AV_LOG_ERROR, "Wrong size (%dx%d)\n", ap->width, ap->height);
- return -1;
- }
if (ap->time_base.den <= 0) {
av_log(s1, AV_LOG_ERROR, "Wrong time base (%d)\n", ap->time_base.den);
return -1;
}
+ s->time_base = ap->time_base;
- width = ap->width;
- height = ap->height;
- frame_rate = ap->time_base.den;
- frame_rate_base = ap->time_base.num;
-
- if((unsigned)width > 32767 || (unsigned)height > 32767) {
- av_log(s1, AV_LOG_ERROR, "Capture size is out of range: %dx%d\n",
- width, height);
-
- return -1;
- }
+ s->video_win.width = ap->width;
+ s->video_win.height = ap->height;
st = av_new_stream(s1, 0);
if (!st)
return AVERROR(ENOMEM);
av_set_pts_info(st, 64, 1, 1000000); /* 64 bits pts in us */
- s->width = width;
- s->height = height;
- s->frame_rate = frame_rate;
- s->frame_rate_base = frame_rate_base;
-
video_fd = open(s1->filename, O_RDWR);
if (video_fd < 0) {
av_log(s1, AV_LOG_ERROR, "%s: %s\n", s1->filename, strerror(errno));
goto fail;
}
- if (ioctl(video_fd,VIDIOCGCAP, &s->video_cap) < 0) {
+ if (ioctl(video_fd, VIDIOCGCAP, &s->video_cap) < 0) {
av_log(s1, AV_LOG_ERROR, "VIDIOCGCAP: %s\n", strerror(errno));
goto fail;
}
@@ -127,6 +108,17 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
goto fail;
}
+ /* no values set, autodetect them */
+ if (s->video_win.width <= 0 || s->video_win.height <= 0) {
+ if (ioctl(video_fd, VIDIOCGWIN, &s->video_win, sizeof(s->video_win)) < 0) {
+ av_log(s1, AV_LOG_ERROR, "VIDIOCGWIN: %s\n", strerror(errno));
+ goto fail;
+ }
+ }
+
+ if(avcodec_check_dimensions(s1, s->video_win.width, s->video_win.height) < 0)
+ return -1;
+
desired_palette = -1;
desired_depth = -1;
for (j = 0; j < vformat_num; j++) {
@@ -167,7 +159,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
/* try to choose a suitable video format */
pict.palette = desired_palette;
pict.depth= desired_depth;
- if (desired_palette == -1 || (ret = ioctl(video_fd, VIDIOCSPICT, &pict)) < 0) {
+ if (desired_palette == -1 || ioctl(video_fd, VIDIOCSPICT, &pict) < 0) {
for (j = 0; j < vformat_num; j++) {
pict.palette = video_formats[j].palette;
pict.depth = video_formats[j].depth;
@@ -178,53 +170,54 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
goto fail1;
}
- ret = ioctl(video_fd,VIDIOCGMBUF,&s->gb_buffers);
- if (ret < 0) {
+ if (ioctl(video_fd, VIDIOCGMBUF, &s->gb_buffers) < 0) {
/* try to use read based access */
- struct video_window win;
int val;
- win.x = 0;
- win.y = 0;
- win.width = width;
- win.height = height;
- win.chromakey = -1;
- win.flags = 0;
+ s->video_win.x = 0;
+ s->video_win.y = 0;
+ s->video_win.chromakey = -1;
+ s->video_win.flags = 0;
- ioctl(video_fd, VIDIOCSWIN, &win);
+ if (ioctl(video_fd, VIDIOCSWIN, s->video_win) < 0) {
+ av_log(s1, AV_LOG_ERROR, "VIDIOCSWIN: %s\n", strerror(errno));
+ goto fail;
+ }
s->frame_format = pict.palette;
val = 1;
- ioctl(video_fd, VIDIOCCAPTURE, &val);
+ if (ioctl(video_fd, VIDIOCCAPTURE, &val) < 0) {
+ av_log(s1, AV_LOG_ERROR, "VIDIOCCAPTURE: %s\n", strerror(errno));
+ goto fail;
+ }
- s->time_frame = av_gettime() * s->frame_rate / s->frame_rate_base;
+ s->time_frame = av_gettime() * s->time_base.den / s->time_base.num;
s->use_mmap = 0;
} else {
- s->video_buf = mmap(0,s->gb_buffers.size,PROT_READ|PROT_WRITE,MAP_SHARED,video_fd,0);
+ s->video_buf = mmap(0, s->gb_buffers.size, PROT_READ|PROT_WRITE, MAP_SHARED, video_fd, 0);
if ((unsigned char*)-1 == s->video_buf) {
- s->video_buf = mmap(0,s->gb_buffers.size,PROT_READ|PROT_WRITE,MAP_PRIVATE,video_fd,0);
+ s->video_buf = mmap(0, s->gb_buffers.size, PROT_READ|PROT_WRITE, MAP_PRIVATE, video_fd, 0);
if ((unsigned char*)-1 == s->video_buf) {
av_log(s1, AV_LOG_ERROR, "mmap: %s\n", strerror(errno));
goto fail;
}
}
s->gb_frame = 0;
- s->time_frame = av_gettime() * s->frame_rate / s->frame_rate_base;
+ s->time_frame = av_gettime() * s->time_base.den / s->time_base.num;
/* start to grab the first frame */
s->gb_buf.frame = s->gb_frame % s->gb_buffers.frames;
- s->gb_buf.height = height;
- s->gb_buf.width = width;
+ s->gb_buf.height = s->video_win.height;
+ s->gb_buf.width = s->video_win.width;
s->gb_buf.format = pict.palette;
- ret = ioctl(video_fd, VIDIOCMCAPTURE, &s->gb_buf);
- if (ret < 0) {
+ if (ioctl(video_fd, VIDIOCMCAPTURE, &s->gb_buf) < 0) {
if (errno != EAGAIN) {
fail1:
- av_log(s1, AV_LOG_ERROR, "Fatal: grab device does not support suitable format\n");
+ av_log(s1, AV_LOG_ERROR, "VIDIOCMCAPTURE: %s\n", strerror(errno));
} else {
- av_log(s1, AV_LOG_ERROR,"Fatal: grab device does not receive any video signal\n");
+ av_log(s1, AV_LOG_ERROR, "Fatal: grab device does not receive any video signal\n");
}
goto fail;
}
@@ -238,7 +231,7 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
for (j = 0; j < vformat_num; j++) {
if (s->frame_format == video_formats[j].palette) {
- frame_size = width * height * video_formats[j].depth / 8;
+ s->frame_size = s->video_win.width * s->video_win.height * video_formats[j].depth / 8;
st->codec->pix_fmt = video_formats[j].pix_fmt;
break;
}
@@ -248,15 +241,13 @@ static int grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
goto fail;
s->fd = video_fd;
- s->frame_size = frame_size;
st->codec->codec_type = CODEC_TYPE_VIDEO;
st->codec->codec_id = CODEC_ID_RAWVIDEO;
- st->codec->width = width;
- st->codec->height = height;
- st->codec->time_base.den = frame_rate;
- st->codec->time_base.num = frame_rate_base;
- st->codec->bit_rate = frame_size * 1/av_q2d(st->codec->time_base) * 8;
+ st->codec->width = s->video_win.width;
+ st->codec->height = s->video_win.height;
+ st->codec->time_base = s->time_base;
+ st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
return 0;
fail:
@@ -303,9 +294,9 @@ static int grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
/* wait based on the frame rate */
for(;;) {
curtime = av_gettime();
- delay = s->time_frame * s->frame_rate_base / s->frame_rate - curtime;
+ delay = s->time_frame * s->time_base.num / s->time_base.den - curtime;
if (delay <= 0) {
- if (delay < INT64_C(-1000000) * s->frame_rate_base / s->frame_rate) {
+ if (delay < INT64_C(-1000000) * s->time_base.num / s->time_base.den) {
/* printf("grabbing is %d frames late (dropping)\n", (int) -(delay / 16666)); */
s->time_frame += INT64_C(1000000);
}
@@ -349,7 +340,7 @@ static int grab_read_close(AVFormatContext *s1)
AVInputFormat v4l_demuxer = {
"video4linux",
- NULL_IF_CONFIG_SMALL("video grab"),
+ NULL_IF_CONFIG_SMALL("Video4Linux device grab"),
sizeof(VideoData),
NULL,
grab_read_header,
diff --git a/libavdevice/v4l2.c b/libavdevice/v4l2.c
index dc0a22d..1efed83 100644
--- a/libavdevice/v4l2.c
+++ b/libavdevice/v4l2.c
@@ -57,8 +57,6 @@ struct video_data {
int frame_format; /* V4L2_PIX_FMT_* */
enum io_method io_method;
int width, height;
- int frame_rate;
- int frame_rate_base;
int frame_size;
int top_field_first;
@@ -509,28 +507,19 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
struct video_data *s = s1->priv_data;
AVStream *st;
int width, height;
- int res, frame_rate, frame_rate_base;
+ int res;
uint32_t desired_format, capabilities;
if (ap->width <= 0 || ap->height <= 0) {
av_log(s1, AV_LOG_ERROR, "Wrong size (%dx%d)\n", ap->width, ap->height);
return -1;
}
- if (ap->time_base.den <= 0) {
- av_log(s1, AV_LOG_ERROR, "Wrong time base (%d)\n", ap->time_base.den);
- return -1;
- }
width = ap->width;
height = ap->height;
- frame_rate = ap->time_base.den;
- frame_rate_base = ap->time_base.num;
-
- if((unsigned)width > 32767 || (unsigned)height > 32767) {
- av_log(s1, AV_LOG_ERROR, "Wrong size (%dx%d)\n", width, height);
+ if(avcodec_check_dimensions(s1, ap->width, ap->height) < 0)
return -1;
- }
st = av_new_stream(s1, 0);
if (!st) {
@@ -540,8 +529,6 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
s->width = width;
s->height = height;
- s->frame_rate = frame_rate;
- s->frame_rate_base = frame_rate_base;
capabilities = 0;
s->fd = device_open(s1, &capabilities);
@@ -602,8 +589,8 @@ static int v4l2_read_header(AVFormatContext *s1, AVFormatParameters *ap)
st->codec->codec_id = CODEC_ID_RAWVIDEO;
st->codec->width = width;
st->codec->height = height;
- st->codec->time_base.den = frame_rate;
- st->codec->time_base.num = frame_rate_base;
+ st->codec->time_base.den = ap->time_base.den;
+ st->codec->time_base.num = ap->time_base.num;
st->codec->bit_rate = s->frame_size * 1/av_q2d(st->codec->time_base) * 8;
return 0;
@@ -651,7 +638,7 @@ static int v4l2_read_close(AVFormatContext *s1)
AVInputFormat v4l2_demuxer = {
"video4linux2",
- NULL_IF_CONFIG_SMALL("video grab"),
+ NULL_IF_CONFIG_SMALL("Video4Linux2 device grab"),
sizeof(struct video_data),
NULL,
v4l2_read_header,
diff --git a/libavdevice/vfwcap.c b/libavdevice/vfwcap.c
index 078c186..83e499c 100644
--- a/libavdevice/vfwcap.c
+++ b/libavdevice/vfwcap.c
@@ -27,18 +27,6 @@
/* Defines for VFW missing from MinGW.
* Remove this when MinGW incorporates them. */
-#define WM_CAP_START (0x0400)
-#define WM_CAP_SET_CALLBACK_VIDEOSTREAM (WM_CAP_START + 6)
-#define WM_CAP_DRIVER_CONNECT (WM_CAP_START + 10)
-#define WM_CAP_DRIVER_DISCONNECT (WM_CAP_START + 11)
-#define WM_CAP_GET_VIDEOFORMAT (WM_CAP_START + 44)
-#define WM_CAP_SET_VIDEOFORMAT (WM_CAP_START + 45)
-#define WM_CAP_SET_PREVIEW (WM_CAP_START + 50)
-#define WM_CAP_SET_OVERLAY (WM_CAP_START + 51)
-#define WM_CAP_SEQUENCE_NOFILE (WM_CAP_START + 63)
-#define WM_CAP_SET_SEQUENCE_SETUP (WM_CAP_START + 64)
-#define WM_CAP_GET_SEQUENCE_SETUP (WM_CAP_START + 65)
-
#define HWND_MESSAGE ((HWND)-3)
#define BI_RGB 0
diff --git a/libavdevice/x11grab.c b/libavdevice/x11grab.c
index 4e1a499..8f37f28 100644
--- a/libavdevice/x11grab.c
+++ b/libavdevice/x11grab.c
@@ -55,7 +55,7 @@
/**
* X11 Device Demuxer context
*/
-typedef struct x11_grab_s
+struct x11_grab
{
int frame_size; /**< Size in bytes of a grabbed frame */
AVRational time_base; /**< Time base */
@@ -71,7 +71,7 @@ typedef struct x11_grab_s
int use_shm; /**< !0 when using XShm extension */
XShmSegmentInfo shminfo; /**< When using XShm, keeps track of XShm infos */
int mouse_warning_shown;
-} x11_grab_t;
+};
/**
* Initializes the x11 grab device demuxer (public device demuxer API).
@@ -87,7 +87,7 @@ typedef struct x11_grab_s
static int
x11grab_read_header(AVFormatContext *s1, AVFormatParameters *ap)
{
- x11_grab_t *x11grab = s1->priv_data;
+ struct x11_grab *x11grab = s1->priv_data;
Display *dpy;
AVStream *st = NULL;
int input_pixfmt;
@@ -259,7 +259,7 @@ get_pointer_coordinates(int *x, int *y, Display *dpy, AVFormatContext *s1)
if (XQueryPointer(dpy, mrootwindow, &mrootwindow, &childwindow,
x, y, &dummy, &dummy, (unsigned int*)&dummy)) {
} else {
- x11_grab_t *s = s1->priv_data;
+ struct x11_grab *s = s1->priv_data;
if (!s->mouse_warning_shown) {
av_log(s1, AV_LOG_INFO, "couldn't find mouse pointer\n");
s->mouse_warning_shown = 1;
@@ -306,7 +306,7 @@ apply_masks(uint8_t *dst, int and, int or, int bits_per_pixel)
* @param y Mouse pointer coordinate
*/
static void
-paint_mouse_pointer(XImage *image, x11_grab_t *s, int x, int y)
+paint_mouse_pointer(XImage *image, struct x11_grab *s, int x, int y)
{
/* 16x20x1bpp bitmap for the black channel of the mouse pointer */
static const uint16_t const mousePointerBlack[] =
@@ -431,7 +431,7 @@ xget_zpixmap(Display *dpy, Drawable d, XImage *image, int x, int y)
static int
x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
{
- x11_grab_t *s = s1->priv_data;
+ struct x11_grab *s = s1->priv_data;
Display *dpy = s->dpy;
XImage *image = s->image;
int x_off = s->x_off;
@@ -495,7 +495,7 @@ x11grab_read_packet(AVFormatContext *s1, AVPacket *pkt)
static int
x11grab_read_close(AVFormatContext *s1)
{
- x11_grab_t *x11grab = s1->priv_data;
+ struct x11_grab *x11grab = s1->priv_data;
/* Detach cleanly from shared mem */
if (x11grab->use_shm) {
@@ -520,7 +520,7 @@ AVInputFormat x11_grab_device_demuxer =
{
"x11grab",
NULL_IF_CONFIG_SMALL("X11grab"),
- sizeof(x11_grab_t),
+ sizeof(struct x11_grab),
NULL,
x11grab_read_header,
x11grab_read_packet,
diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index 1cd4992..9da8f91 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -5,6 +5,8 @@ FFLIBS = avcodec avutil
FFLIBS-$(CONFIG_SWSCALE) += swscale
FFLIBS-$(CONFIG_AVFILTER_LAVF) += avformat
+HEADERS = avfilter.h
+
OBJS = allfilters.o \
avfilter.o \
defaults.o \
@@ -12,6 +14,4 @@ OBJS = allfilters.o \
#OBJS-$(CONFIG_XXX_FILTER) += vf_xxx.o
-HEADERS = avfilter.h
-
include $(SUBDIR)../subdir.mak
diff --git a/libavfilter/avfilter.c b/libavfilter/avfilter.c
index d6f07a9..218a83b 100644
--- a/libavfilter/avfilter.c
+++ b/libavfilter/avfilter.c
@@ -90,7 +90,7 @@ int avfilter_link(AVFilterContext *src, unsigned srcpad,
link->dst = dst;
link->srcpad = srcpad;
link->dstpad = dstpad;
- link->format = -1;
+ link->format = PIX_FMT_NONE;
return 0;
}
diff --git a/libavfilter/avfilter.h b/libavfilter/avfilter.h
index ff08f71..75d8c76 100644
--- a/libavfilter/avfilter.h
+++ b/libavfilter/avfilter.h
@@ -257,7 +257,7 @@ struct AVFilterPad
enum CodecType type;
/**
- * Minimum required permissions on incoming buffers. Any buffers with
+ * Minimum required permissions on incoming buffers. Any buffer with
* insufficient permissions will be automatically copied by the filter
* system to a new buffer which provides the needed access permissions.
*
@@ -267,9 +267,10 @@ struct AVFilterPad
/**
* Permissions which are not accepted on incoming buffers. Any buffer
- * which has any of these permissions set be automatically copied by the
- * filter system to a new buffer which does not have those permissions.
- * This can be used to easily disallow buffers with AV_PERM_REUSE.
+ * which has any of these permissions set will be automatically copied
+ * by the filter system to a new buffer which does not have those
+ * permissions. This can be used to easily disallow buffers with
+ * AV_PERM_REUSE.
*
* Input pads only.
*/
@@ -509,7 +510,8 @@ int avfilter_request_frame(AVFilterLink *link);
/**
* Poll a frame from the filter chain.
* @param link the input link
- * @return the number of imediately available frames
+ * @return the number of immediately available frames, a negative
+ * number in case of error
*/
int avfilter_poll_frame(AVFilterLink *link);
diff --git a/libavfilter/avfiltergraph.h b/libavfilter/avfiltergraph.h
index 0558de3..dee8a90 100644
--- a/libavfilter/avfiltergraph.h
+++ b/libavfilter/avfiltergraph.h
@@ -30,7 +30,10 @@ typedef struct AVFilterGraph {
} AVFilterGraph;
/**
- * Get a pointer to a graph by instance name
+ * Get from \p graph a filter instance with name \p name.
+ *
+ * @return the pointer to the found filter instance or NULL if it
+ * cannot be found.
*/
AVFilterContext *avfilter_graph_get_filter(AVFilterGraph *graph, char *name);
diff --git a/libavfilter/graphparser.c b/libavfilter/graphparser.c
index 25c5c4d..e7019ea 100644
--- a/libavfilter/graphparser.c
+++ b/libavfilter/graphparser.c
@@ -119,43 +119,43 @@ static char *parse_link_name(const char **buf, AVClass *log_ctx)
}
static AVFilterContext *create_filter(AVFilterGraph *ctx, int index,
- const char *name, const char *args,
+ const char *filt_name, const char *args,
AVClass *log_ctx)
{
- AVFilterContext *filt;
+ AVFilterContext *filt_ctx;
- AVFilter *filterdef;
+ AVFilter *filt;
char inst_name[30];
snprintf(inst_name, sizeof(inst_name), "Parsed filter %d", index);
- filterdef = avfilter_get_by_name(name);
+ filt = avfilter_get_by_name(filt_name);
- if(!filterdef) {
+ if(!filt) {
av_log(log_ctx, AV_LOG_ERROR,
- "no such filter: '%s'\n", name);
+ "no such filter: '%s'\n", filt_name);
return NULL;
}
- filt = avfilter_open(filterdef, inst_name);
- if(!filt) {
+ filt_ctx = avfilter_open(filt, inst_name);
+ if(!filt_ctx) {
av_log(log_ctx, AV_LOG_ERROR,
- "error creating filter '%s'\n", name);
+ "error creating filter '%s'\n", filt_name);
return NULL;
}
- if(avfilter_graph_add_filter(ctx, filt) < 0) {
- avfilter_destroy(filt);
+ if(avfilter_graph_add_filter(ctx, filt_ctx) < 0) {
+ avfilter_destroy(filt_ctx);
return NULL;
}
- if(avfilter_init_filter(filt, args, NULL)) {
+ if(avfilter_init_filter(filt_ctx, args, NULL)) {
av_log(log_ctx, AV_LOG_ERROR,
- "error initializing filter '%s' with args '%s'\n", name, args);
+ "error initializing filter '%s' with args '%s'\n", filt_name, args);
return NULL;
}
- return filt;
+ return filt_ctx;
}
/**
@@ -211,13 +211,13 @@ static void insert_inout(AVFilterInOut **inouts, AVFilterInOut *element)
}
static int link_filter_inouts(AVFilterContext *filter,
- AVFilterInOut **currInputs,
- AVFilterInOut **openInputs, AVClass *log_ctx)
+ AVFilterInOut **curr_inputs,
+ AVFilterInOut **open_inputs, AVClass *log_ctx)
{
int pad = filter->input_count;
while(pad--) {
- AVFilterInOut *p = *currInputs;
+ AVFilterInOut *p = *curr_inputs;
if(!p) {
av_log(log_ctx, AV_LOG_ERROR,
"Not enough inputs specified for the \"%s\" filter.\n",
@@ -225,7 +225,7 @@ static int link_filter_inouts(AVFilterContext *filter,
return -1;
}
- *currInputs = (*currInputs)->next;
+ *curr_inputs = (*curr_inputs)->next;
if(p->filter) {
if(link_filter(p->filter, p->pad_idx, filter, pad, log_ctx))
@@ -235,11 +235,11 @@ static int link_filter_inouts(AVFilterContext *filter,
} else {
p->filter = filter;
p->pad_idx = pad;
- insert_inout(openInputs, p);
+ insert_inout(open_inputs, p);
}
}
- if(*currInputs) {
+ if(*curr_inputs) {
av_log(log_ctx, AV_LOG_ERROR,
"Too many inputs specified for the \"%s\" filter.\n",
filter->filter->name);
@@ -251,14 +251,14 @@ static int link_filter_inouts(AVFilterContext *filter,
AVFilterInOut *currlinkn = av_mallocz(sizeof(AVFilterInOut));
currlinkn->filter = filter;
currlinkn->pad_idx = pad;
- insert_inout(currInputs, currlinkn);
+ insert_inout(curr_inputs, currlinkn);
}
return 0;
}
-static int parse_inputs(const char **buf, AVFilterInOut **currInputs,
- AVFilterInOut **openOutputs, AVClass *log_ctx)
+static int parse_inputs(const char **buf, AVFilterInOut **curr_inputs,
+ AVFilterInOut **open_outputs, AVClass *log_ctx)
{
int pad = 0;
@@ -269,8 +269,8 @@ static int parse_inputs(const char **buf, AVFilterInOut **currInputs,
if(!name)
return -1;
- /* First check if the label is not in the openOutputs list */
- match = extract_inout(name, openOutputs);
+ /* First check if the label is not in the open_outputs list */
+ match = extract_inout(name, open_outputs);
if(match) {
av_free(name);
@@ -281,7 +281,7 @@ static int parse_inputs(const char **buf, AVFilterInOut **currInputs,
match->pad_idx = pad;
}
- insert_inout(currInputs, match);
+ insert_inout(curr_inputs, match);
*buf += consume_whitespace(*buf);
pad++;
@@ -290,9 +290,9 @@ static int parse_inputs(const char **buf, AVFilterInOut **currInputs,
return pad;
}
-static int parse_outputs(const char **buf, AVFilterInOut **currInputs,
- AVFilterInOut **openInputs,
- AVFilterInOut **openOutputs, AVClass *log_ctx)
+static int parse_outputs(const char **buf, AVFilterInOut **curr_inputs,
+ AVFilterInOut **open_inputs,
+ AVFilterInOut **open_outputs, AVClass *log_ctx)
{
int pad = 0;
@@ -300,14 +300,14 @@ static int parse_outputs(const char **buf, AVFilterInOut **currInputs,
char *name = parse_link_name(buf, log_ctx);
AVFilterInOut *match;
- AVFilterInOut *input = *currInputs;
- *currInputs = (*currInputs)->next;
+ AVFilterInOut *input = *curr_inputs;
+ *curr_inputs = (*curr_inputs)->next;
if(!name)
return -1;
- /* First check if the label is not in the openInputs list */
- match = extract_inout(name, openInputs);
+ /* First check if the label is not in the open_inputs list */
+ match = extract_inout(name, open_inputs);
if(match) {
if(link_filter(input->filter, input->pad_idx,
@@ -318,9 +318,9 @@ static int parse_outputs(const char **buf, AVFilterInOut **currInputs,
av_free(match);
av_free(input);
} else {
- /* Not in the list, so add the first input as a openOutput */
+ /* Not in the list, so add the first input as a open_output */
input->name = name;
- insert_inout(openOutputs, input);
+ insert_inout(open_outputs, input);
}
*buf += consume_whitespace(*buf);
pad++;
@@ -330,19 +330,19 @@ static int parse_outputs(const char **buf, AVFilterInOut **currInputs,
}
int avfilter_parse_graph(AVFilterGraph *graph, const char *filters,
- AVFilterInOut *openInputs,
- AVFilterInOut *openOutputs, AVClass *log_ctx)
+ AVFilterInOut *open_inputs,
+ AVFilterInOut *open_outputs, AVClass *log_ctx)
{
int index = 0;
char chr = 0;
- AVFilterInOut *currInputs = NULL;
+ AVFilterInOut *curr_inputs = NULL;
do {
AVFilterContext *filter;
filters += consume_whitespace(filters);
- if(parse_inputs(&filters, &currInputs, &openOutputs, log_ctx) < 0)
+ if(parse_inputs(&filters, &curr_inputs, &open_outputs, log_ctx) < 0)
goto fail;
filter = parse_filter(&filters, graph, index, log_ctx);
@@ -350,24 +350,24 @@ int avfilter_parse_graph(AVFilterGraph *graph, const char *filters,
if(!filter)
goto fail;
- if(filter->input_count == 1 && !currInputs && !index) {
- /* First input can be ommitted if it is "[in]" */
+ if(filter->input_count == 1 && !curr_inputs && !index) {
+ /* First input can be omitted if it is "[in]" */
const char *tmp = "[in]";
- if(parse_inputs(&tmp, &currInputs, &openOutputs, log_ctx) < 0)
+ if(parse_inputs(&tmp, &curr_inputs, &open_outputs, log_ctx) < 0)
goto fail;
}
- if(link_filter_inouts(filter, &currInputs, &openInputs, log_ctx) < 0)
+ if(link_filter_inouts(filter, &curr_inputs, &open_inputs, log_ctx) < 0)
goto fail;
- if(parse_outputs(&filters, &currInputs, &openInputs, &openOutputs,
+ if(parse_outputs(&filters, &curr_inputs, &open_inputs, &open_outputs,
log_ctx) < 0)
goto fail;
filters += consume_whitespace(filters);
chr = *filters++;
- if(chr == ';' && currInputs) {
+ if(chr == ';' && curr_inputs) {
av_log(log_ctx, AV_LOG_ERROR,
"Could not find a output to link when parsing \"%s\"\n",
filters - 1);
@@ -376,11 +376,11 @@ int avfilter_parse_graph(AVFilterGraph *graph, const char *filters,
index++;
} while(chr == ',' || chr == ';');
- if(openInputs && !strcmp(openInputs->name, "out") && currInputs) {
- /* Last output can be ommitted if it is "[out]" */
+ if(open_inputs && !strcmp(open_inputs->name, "out") && curr_inputs) {
+ /* Last output can be omitted if it is "[out]" */
const char *tmp = "[out]";
- if(parse_outputs(&tmp, &currInputs, &openInputs,
- &openOutputs, log_ctx) < 0)
+ if(parse_outputs(&tmp, &curr_inputs, &open_inputs,
+ &open_outputs, log_ctx) < 0)
goto fail;
}
@@ -388,8 +388,8 @@ int avfilter_parse_graph(AVFilterGraph *graph, const char *filters,
fail:
avfilter_destroy_graph(graph);
- free_inout(openInputs);
- free_inout(openOutputs);
- free_inout(currInputs);
+ free_inout(open_inputs);
+ free_inout(open_outputs);
+ free_inout(curr_inputs);
return -1;
}
diff --git a/libavfilter/graphparser.h b/libavfilter/graphparser.h
index eddc6b0..725b728 100644
--- a/libavfilter/graphparser.h
+++ b/libavfilter/graphparser.h
@@ -38,10 +38,11 @@ typedef struct AVFilterInOut {
/**
* Add to a graph a graph described by a string.
+ *
* @param graph the filter graph where to link the parsed graph context
* @param filters string to be parsed
- * @param inouts linked list to the inputs and outputs of the graph
- * @param outpad pad index of the output
+ * @param inputs linked list to the inputs of the graph
+ * @param outputs linked list to the outputs of the graph
* @return zero on success, -1 on error
*/
int avfilter_parse_graph(AVFilterGraph *graph, const char *filters,
diff --git a/libavformat/Makefile b/libavformat/Makefile
index de5cae2..10a461c 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -3,10 +3,10 @@ include $(SUBDIR)../config.mak
NAME = avformat
FFLIBS = avcodec avutil
-OBJS = allformats.o cutils.o os_support.o sdp.o utils.o
-
HEADERS = avformat.h avio.h rtsp.h rtspcodes.h
+OBJS = allformats.o cutils.o metadata.o metadata_compat.o os_support.o sdp.o utils.o
+
# muxers/demuxers
OBJS-$(CONFIG_AAC_DEMUXER) += raw.o
OBJS-$(CONFIG_AC3_DEMUXER) += raw.o
@@ -81,9 +81,9 @@ OBJS-$(CONFIG_IPOD_MUXER) += movenc.o riff.o isom.o avc.o
OBJS-$(CONFIG_LMLM4_DEMUXER) += lmlm4.o
OBJS-$(CONFIG_M4V_DEMUXER) += raw.o
OBJS-$(CONFIG_M4V_MUXER) += raw.o
-OBJS-$(CONFIG_MATROSKA_AUDIO_MUXER) += matroskaenc.o matroska.o riff.o avc.o
-OBJS-$(CONFIG_MATROSKA_DEMUXER) += matroskadec.o matroska.o riff.o
-OBJS-$(CONFIG_MATROSKA_MUXER) += matroskaenc.o matroska.o riff.o avc.o
+OBJS-$(CONFIG_MATROSKA_AUDIO_MUXER) += matroskaenc.o matroska.o riff.o isom.o avc.o
+OBJS-$(CONFIG_MATROSKA_DEMUXER) += matroskadec.o matroska.o riff.o isom.o
+OBJS-$(CONFIG_MATROSKA_MUXER) += matroskaenc.o matroska.o riff.o isom.o avc.o
OBJS-$(CONFIG_MJPEG_DEMUXER) += raw.o
OBJS-$(CONFIG_MJPEG_MUXER) += raw.o
OBJS-$(CONFIG_MLP_DEMUXER) += raw.o
@@ -158,9 +158,9 @@ OBJS-$(CONFIG_ROQ_DEMUXER) += idroq.o
OBJS-$(CONFIG_ROQ_MUXER) += raw.o
OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o
OBJS-$(CONFIG_RTP_MUXER) += rtp.o \
- rtpenc.o \
- rtp_mpv.o \
rtp_aac.o \
+ rtp_mpv.o \
+ rtpenc.o \
rtpenc_h264.o \
avc.o
OBJS-$(CONFIG_RTSP_DEMUXER) += rdt.o rtsp.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 8c0cd5d..43243f8 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -23,15 +23,18 @@
#include "rdt.h"
#define REGISTER_MUXER(X,x) { \
- extern AVOutputFormat x##_muxer; \
- if(ENABLE_##X##_MUXER) av_register_output_format(&x##_muxer); }
+ extern AVOutputFormat x##_muxer; \
+ if(ENABLE_##X##_MUXER) av_register_output_format(&x##_muxer); }
+
#define REGISTER_DEMUXER(X,x) { \
- extern AVInputFormat x##_demuxer; \
- if(ENABLE_##X##_DEMUXER) av_register_input_format(&x##_demuxer); }
+ extern AVInputFormat x##_demuxer; \
+ if(ENABLE_##X##_DEMUXER) av_register_input_format(&x##_demuxer); }
+
#define REGISTER_MUXDEMUX(X,x) REGISTER_MUXER(X,x); REGISTER_DEMUXER(X,x)
+
#define REGISTER_PROTOCOL(X,x) { \
- extern URLProtocol x##_protocol; \
- if(ENABLE_##X##_PROTOCOL) register_protocol(&x##_protocol); }
+ extern URLProtocol x##_protocol; \
+ if(ENABLE_##X##_PROTOCOL) register_protocol(&x##_protocol); }
/* If you do not call this function, then you can select exactly which
formats you want to support */
@@ -47,7 +50,6 @@ void av_register_all(void)
return;
initialized = 1;
- avcodec_init();
avcodec_register_all();
/* (de)muxers */
diff --git a/libavformat/asf.c b/libavformat/asf.c
index 0faa46f..33cde4c 100644
--- a/libavformat/asf.c
+++ b/libavformat/asf.c
@@ -26,7 +26,7 @@
#include "asf.h"
#include "asfcrypt.h"
-extern void ff_mms_set_stream_selection(URLContext *h, AVFormatContext *format);
+void ff_mms_set_stream_selection(URLContext *h, AVFormatContext *format);
#undef NDEBUG
#include <assert.h>
@@ -555,19 +555,14 @@ static int asf_read_header(AVFormatContext *s, AVFormatParameters *ap)
default: var = defval; break; \
}
-/**
- *
- * @return <0 in case of an error
- */
-static int asf_get_packet(AVFormatContext *s)
+int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb)
{
ASFContext *asf = s->priv_data;
- ByteIOContext *pb = s->pb;
uint32_t packet_length, padsize;
int rsize = 8;
int c, d, e, off;
- off= (url_ftell(s->pb) - s->data_offset) % asf->packet_size + 3;
+ off= (url_ftell(pb) - s->data_offset) % asf->packet_size + 3;
c=d=e=-1;
while(off-- > 0){
@@ -634,9 +629,8 @@ static int asf_get_packet(AVFormatContext *s)
*
* @return <0 if error
*/
-static int asf_read_frame_header(AVFormatContext *s){
+static int asf_read_frame_header(AVFormatContext *s, ByteIOContext *pb){
ASFContext *asf = s->priv_data;
- ByteIOContext *pb = s->pb;
int rsize = 1;
int num = get_byte(pb);
int64_t ts0, ts1;
@@ -705,12 +699,10 @@ static int asf_read_frame_header(AVFormatContext *s){
return 0;
}
-static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
+int ff_asf_parse_packet(AVFormatContext *s, ByteIOContext *pb, AVPacket *pkt)
{
ASFContext *asf = s->priv_data;
ASFStream *asf_st = 0;
- ByteIOContext *pb = s->pb;
- //static int pc = 0;
for (;;) {
if(url_feof(pb))
return AVERROR(EIO);
@@ -723,19 +715,14 @@ static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
/* fail safe */
url_fskip(pb, ret);
- asf->packet_pos= url_ftell(s->pb);
+ asf->packet_pos= url_ftell(pb);
if (asf->data_object_size != (uint64_t)-1 &&
(asf->packet_pos - asf->data_object_offset >= asf->data_object_size))
return AVERROR(EIO); /* Do not exceed the size of the data object */
- ret = asf_get_packet(s);
- //printf("READ ASF PACKET %d r:%d c:%d\n", ret, asf->packet_size_left, pc++);
- if (ret < 0)
- assert(asf->packet_size_left < FRAME_HEADER_SIZE || asf->packet_segments < 1);
- asf->packet_time_start = 0;
- continue;
+ return 1;
}
if (asf->packet_time_start == 0) {
- if(asf_read_frame_header(s) < 0){
+ if(asf_read_frame_header(s, pb) < 0){
asf->packet_segments= 0;
continue;
}
@@ -879,6 +866,24 @@ static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
return 0;
}
+static int asf_read_packet(AVFormatContext *s, AVPacket *pkt)
+{
+ ASFContext *asf = s->priv_data;
+
+ for (;;) {
+ int ret;
+
+ /* parse cached packets, if any */
+ if ((ret = ff_asf_parse_packet(s, s->pb, pkt)) <= 0)
+ return ret;
+ if ((ret = ff_asf_get_packet(s, s->pb)) < 0)
+ assert(asf->packet_size_left < FRAME_HEADER_SIZE || asf->packet_segments < 1);
+ asf->packet_time_start = 0;
+ }
+
+ return 0;
+}
+
// Added to support seeking after packets have been read
// If information is not reset, read_packet fails due to
// leftover information from previous reads
diff --git a/libavformat/asf.h b/libavformat/asf.h
index 4ccae10..afb015e 100644
--- a/libavformat/asf.h
+++ b/libavformat/asf.h
@@ -286,4 +286,25 @@ static const GUID my_guid = {
#define ASF_PL_FLAG_KEY_FRAME 0x80 //1000 0000
+extern AVInputFormat asf_demuxer;
+
+/**
+ * Load a single ASF packet into the demuxer.
+ * @param s demux context
+ * @param pb context to read data from
+ * @returns 0 on success, <0 on error
+ */
+int ff_asf_get_packet(AVFormatContext *s, ByteIOContext *pb);
+
+/**
+ * Parse data from individual ASF packets (which were previously loaded
+ * with asf_get_packet()).
+ * @param s demux context
+ * @param pb context to read data from
+ * @param pkt pointer to store packet data into
+ * @returns 0 if data was stored in pkt, <0 on error or 1 if more ASF
+ * packets need to be loaded (through asf_get_packet())
+ */
+int ff_asf_parse_packet(AVFormatContext *s, ByteIOContext *pb, AVPacket *pkt);
+
#endif /* AVFORMAT_ASF_H */
diff --git a/libavformat/avformat.h b/libavformat/avformat.h
index acdcec4..e8b7100 100644
--- a/libavformat/avformat.h
+++ b/libavformat/avformat.h
@@ -46,6 +46,59 @@ unsigned avformat_version(void);
#include "avio.h"
+
+/*
+ * Public Metadata API.
+ * The metadata API allows libavformat to export metadata tags to a client
+ * application using a sequence of key/value pairs.
+ * Important concepts to keep in mind:
+ * 1. Keys are unique; there can never be 2 tags with the same key. This is
+ * also meant semantically, i.e., a demuxer should not knowingly produce
+ * several keys that are literally different but semantically identical.
+ * E.g., key=Author5, key=Author6. In this example, all authors must be
+ * placed in the same tag.
+ * 2. Metadata is flat, not hierarchical; there are no subtags. If you
+ * want to store, e.g., the email address of the child of producer Alice
+ * and actor Bob, that could have key=alice_and_bobs_childs_email_address.
+ * 3. A tag whose value is localized for a particular language is appended
+ * with a dash character ('-') and the ISO 639 3-letter language code.
+ * For example: Author-ger=Michael, Author-eng=Mike
+ * The original/default language is in the unqualified "Author" tag.
+ * A demuxer should set a default if it sets any translated tag.
+ */
+
+#define AV_METADATA_IGNORE_CASE 1
+#define AV_METADATA_IGNORE_SUFFIX 2
+
+typedef struct {
+ char *key;
+ char *value;
+}AVMetadataTag;
+
+typedef struct AVMetadata AVMetadata;
+
+/**
+ * gets a metadata element with matching key.
+ * @param prev set to the previous matching element to find the next.
+ * @param flags allows case as well as suffix insensitive comparissions.
+ * @return found tag or NULL, changing key or value leads to undefined behavior.
+ */
+AVMetadataTag *
+av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags);
+
+/**
+ * sets the given tag in m, overwriting an existing tag.
+ * @param tag tag to add to m, key and value will be av_strduped.
+ * @return >= 0 if success otherwise error code that is <0.
+ */
+int av_metadata_set(AVMetadata **m, AVMetadataTag tag);
+
+/**
+ * Free all the memory allocated for an AVMetadata struct.
+ */
+void av_metadata_free(AVMetadata **m);
+
+
/* packet functions */
typedef struct AVPacket {
@@ -160,7 +213,7 @@ static inline void av_free_packet(AVPacket *pkt)
*/
typedef struct AVFrac {
int64_t val, num, den;
-} AVFrac attribute_deprecated;
+} AVFrac;
/*************************************************/
/* input/output formats */
@@ -432,6 +485,8 @@ typedef struct AVStream {
* - decoding: Set by libavformat.
*/
AVRational sample_aspect_ratio;
+
+ AVMetadata *metadata;
} AVStream;
#define AV_PROGRAM_RUNNING 1
@@ -450,6 +505,7 @@ typedef struct AVProgram {
enum AVDiscard discard; ///< selects which program to discard and which to feed to the caller
unsigned int *stream_index;
unsigned int nb_stream_indexes;
+ AVMetadata *metadata;
} AVProgram;
#define AVFMTCTX_NOHEADER 0x0001 /**< signal that no header is present
@@ -460,6 +516,7 @@ typedef struct AVChapter {
AVRational time_base; ///< time base in which the start/end timestamps are specified
int64_t start, end; ///< chapter start/end time in time_base units
char *title; ///< chapter title
+ AVMetadata *metadata;
} AVChapter;
#define MAX_STREAMS 20
@@ -608,6 +665,8 @@ typedef struct AVFormatContext {
struct AVPacketList *raw_packet_buffer_end;
struct AVPacketList *packet_buffer_end;
+
+ AVMetadata *metadata;
} AVFormatContext;
typedef struct AVPacketList {
@@ -1018,6 +1077,7 @@ void dump_format(AVFormatContext *ic,
const char *url,
int is_output);
+#if LIBAVFORMAT_VERSION_MAJOR < 53
/**
* Parses width and height out of string str.
* @deprecated Use av_parse_video_frame_size instead.
@@ -1031,6 +1091,7 @@ attribute_deprecated int parse_image_size(int *width_ptr, int *height_ptr,
*/
attribute_deprecated int parse_frame_rate(int *frame_rate, int *frame_rate_base,
const char *arg);
+#endif
/**
* Parses \p datestr and returns a corresponding number of microseconds.
diff --git a/libavformat/avidec.c b/libavformat/avidec.c
index eaccd41..14f0ebd 100644
--- a/libavformat/avidec.c
+++ b/libavformat/avidec.c
@@ -163,7 +163,7 @@ static int read_braindead_odml_indx(AVFormatContext *s, int frame_num){
#endif
if(last_pos == pos || pos == base - 8)
avi->non_interleaved= 1;
- else
+ if(last_pos != pos)
av_add_index_entry(st, pos, ast->cum_len / FFMAX(1, ast->sample_size), len, 0, key ? AVINDEX_KEYFRAME : 0);
if(ast->sample_size)
@@ -216,13 +216,17 @@ static void clean_index(AVFormatContext *s){
}
}
-static int avi_read_tag(ByteIOContext *pb, char *buf, int maxlen, unsigned int size)
+static int avi_read_tag(AVFormatContext *s, const char *key, unsigned int size)
{
+ ByteIOContext *pb = s->pb;
+ uint8_t value[1024];
+
int64_t i = url_ftell(pb);
size += (size & 1);
- get_strz(pb, buf, maxlen);
+ get_strz(pb, value, sizeof(value));
url_fseek(pb, i+size, SEEK_SET);
- return 0;
+
+ return av_metadata_set(&s->metadata, (const AVMetadataTag){key, value});
}
static int avi_read_header(AVFormatContext *s, AVFormatParameters *ap)
@@ -235,7 +239,6 @@ static int avi_read_header(AVFormatContext *s, AVFormatParameters *ap)
int i;
AVStream *st;
AVIStream *ast = NULL;
- char str_track[4];
int avih_width=0, avih_height=0;
int amv_file_format=0;
@@ -561,26 +564,25 @@ static int avi_read_header(AVFormatContext *s, AVFormatParameters *ap)
url_fseek(pb, size, SEEK_CUR);
break;
case MKTAG('I', 'N', 'A', 'M'):
- avi_read_tag(pb, s->title, sizeof(s->title), size);
+ avi_read_tag(s, "Title", size);
break;
case MKTAG('I', 'A', 'R', 'T'):
- avi_read_tag(pb, s->author, sizeof(s->author), size);
+ avi_read_tag(s, "Artist", size);
break;
case MKTAG('I', 'C', 'O', 'P'):
- avi_read_tag(pb, s->copyright, sizeof(s->copyright), size);
+ avi_read_tag(s, "Copyright", size);
break;
case MKTAG('I', 'C', 'M', 'T'):
- avi_read_tag(pb, s->comment, sizeof(s->comment), size);
+ avi_read_tag(s, "Comment", size);
break;
case MKTAG('I', 'G', 'N', 'R'):
- avi_read_tag(pb, s->genre, sizeof(s->genre), size);
+ avi_read_tag(s, "Genre", size);
break;
case MKTAG('I', 'P', 'R', 'D'):
- avi_read_tag(pb, s->album, sizeof(s->album), size);
+ avi_read_tag(s, "Album", size);
break;
case MKTAG('I', 'P', 'R', 'T'):
- avi_read_tag(pb, str_track, sizeof(str_track), size);
- sscanf(str_track, "%d", &s->track);
+ avi_read_tag(s, "Track", size);
break;
default:
if(size > 1000000){
@@ -668,8 +670,12 @@ static int avi_read_packet(AVFormatContext *s, AVPacket *pkt)
best_ts= av_rescale(best_ts, best_st->time_base.den, AV_TIME_BASE * (int64_t)best_st->time_base.num); //FIXME a little ugly
if(best_ast->remaining)
i= av_index_search_timestamp(best_st, best_ts, AVSEEK_FLAG_ANY | AVSEEK_FLAG_BACKWARD);
- else
+ else{
i= av_index_search_timestamp(best_st, best_ts, AVSEEK_FLAG_ANY);
+ if(i>=0)
+ best_ast->frame_offset= best_st->index_entries[i].timestamp
+ * FFMAX(1, best_ast->sample_size);
+ }
// av_log(NULL, AV_LOG_DEBUG, "%d\n", i);
if(i>=0){
diff --git a/libavformat/avienc.c b/libavformat/avienc.c
index a672e8d..cea513f 100644
--- a/libavformat/avienc.c
+++ b/libavformat/avienc.c
@@ -103,6 +103,15 @@ static void avi_write_info_tag(ByteIOContext *pb, const char *tag, const char *s
}
}
+static void avi_write_info_tag2(AVFormatContext *s, const char *fourcc, const char *key1, const char *key2)
+{
+ AVMetadataTag *tag= av_metadata_get(s->metadata, key1, NULL, AV_METADATA_IGNORE_CASE);
+ if(!tag && key2)
+ tag= av_metadata_get(s->metadata, key2, NULL, AV_METADATA_IGNORE_CASE);
+ if(tag)
+ avi_write_info_tag(s->pb, fourcc, tag->value);
+}
+
static int avi_write_counters(AVFormatContext* s, int riff_id)
{
ByteIOContext *pb = s->pb;
@@ -332,17 +341,13 @@ static int avi_write_header(AVFormatContext *s)
list2 = start_tag(pb, "LIST");
put_tag(pb, "INFO");
- avi_write_info_tag(pb, "INAM", s->title);
- avi_write_info_tag(pb, "IART", s->author);
- avi_write_info_tag(pb, "ICOP", s->copyright);
- avi_write_info_tag(pb, "ICMT", s->comment);
- avi_write_info_tag(pb, "IPRD", s->album);
- avi_write_info_tag(pb, "IGNR", s->genre);
- if (s->track) {
- char str_track[4];
- snprintf(str_track, 4, "%d", s->track);
- avi_write_info_tag(pb, "IPRT", str_track);
- }
+ avi_write_info_tag2(s, "INAM", "Title", NULL);
+ avi_write_info_tag2(s, "IART", "Artist", "Author");
+ avi_write_info_tag2(s, "ICOP", "Copyright", NULL);
+ avi_write_info_tag2(s, "ICMT", "Comment", NULL);
+ avi_write_info_tag2(s, "IPRD", "Album", NULL);
+ avi_write_info_tag2(s, "IGNR", "Genre", NULL);
+ avi_write_info_tag2(s, "IPRT", "Track", NULL);
if(!(s->streams[0]->codec->flags & CODEC_FLAG_BITEXACT))
avi_write_info_tag(pb, "ISFT", LIBAVFORMAT_IDENT);
end_tag(pb, list2);
diff --git a/libavformat/avio.h b/libavformat/avio.h
index 687333e..3bb88b3 100644
--- a/libavformat/avio.h
+++ b/libavformat/avio.h
@@ -114,8 +114,8 @@ int av_url_read_pause(URLContext *h, int pause);
* @return >= 0 on success
* @see AVInputFormat::read_seek
*/
-int64_t av_url_read_seek(URLContext *h,
- int stream_index, int64_t timestamp, int flags);
+int64_t av_url_read_seek(URLContext *h, int stream_index,
+ int64_t timestamp, int flags);
/**
* Passing this as the "whence" parameter to a seek function causes it to
@@ -133,8 +133,8 @@ typedef struct URLProtocol {
int (*url_close)(URLContext *h);
struct URLProtocol *next;
int (*url_read_pause)(URLContext *h, int pause);
- int64_t (*url_read_seek)(URLContext *h,
- int stream_index, int64_t timestamp, int flags);
+ int64_t (*url_read_seek)(URLContext *h, int stream_index,
+ int64_t timestamp, int flags);
} URLProtocol;
extern URLProtocol *first_protocol;
@@ -170,8 +170,8 @@ typedef struct {
unsigned long (*update_checksum)(unsigned long checksum, const uint8_t *buf, unsigned int size);
int error; ///< contains the error code or 0 if no error happened
int (*read_pause)(void *opaque, int pause);
- int64_t (*read_seek)(void *opaque,
- int stream_index, int64_t timestamp, int flags);
+ int64_t (*read_seek)(void *opaque, int stream_index,
+ int64_t timestamp, int flags);
} ByteIOContext;
int init_put_byte(ByteIOContext *s,
@@ -238,8 +238,8 @@ int url_feof(ByteIOContext *s);
int url_ferror(ByteIOContext *s);
int av_url_read_fpause(ByteIOContext *h, int pause);
-int64_t av_url_read_fseek(ByteIOContext *h,
- int stream_index, int64_t timestamp, int flags);
+int64_t av_url_read_fseek(ByteIOContext *h, int stream_index,
+ int64_t timestamp, int flags);
#define URL_EOF (-1)
/** @note return URL_EOF (-1) if EOF */
@@ -253,7 +253,7 @@ int url_fprintf(ByteIOContext *s, const char *fmt, ...);
#endif
/** @note unlike fgets, the EOL character is not returned and a whole
- line is parsed. return NULL if first char read was EOF */
+ line is parsed. return NULL if first char read was EOF */
char *url_fgets(ByteIOContext *s, char *buf, int buf_size);
void put_flush_packet(ByteIOContext *s);
@@ -274,7 +274,7 @@ int get_buffer(ByteIOContext *s, unsigned char *buf, int size);
int get_partial_buffer(ByteIOContext *s, unsigned char *buf, int size);
/** @note return 0 if EOF, so you cannot use it if EOF handling is
- necessary */
+ necessary */
int get_byte(ByteIOContext *s);
unsigned int get_le24(ByteIOContext *s);
unsigned int get_le32(ByteIOContext *s);
@@ -295,7 +295,7 @@ static inline int url_is_streamed(ByteIOContext *s)
}
/** @note when opened as read/write, the buffers are only used for
- writing */
+ writing */
int url_fdopen(ByteIOContext **s, URLContext *h);
/** @warning must be called before any I/O */
@@ -307,7 +307,7 @@ int url_setbufsize(ByteIOContext *s, int buf_size);
int url_resetbuf(ByteIOContext *s, int flags);
/** @note when opened as read/write, the buffers are only used for
- writing */
+ writing */
int url_fopen(ByteIOContext **s, const char *filename, int flags);
int url_fclose(ByteIOContext *s);
URLContext *url_fileno(ByteIOContext *s);
@@ -355,9 +355,12 @@ int url_open_dyn_packet_buf(ByteIOContext **s, int max_packet_size);
*/
int url_close_dyn_buf(ByteIOContext *s, uint8_t **pbuffer);
-unsigned long ff_crc04C11DB7_update(unsigned long checksum, const uint8_t *buf, unsigned int len);
+unsigned long ff_crc04C11DB7_update(unsigned long checksum, const uint8_t *buf,
+ unsigned int len);
unsigned long get_checksum(ByteIOContext *s);
-void init_checksum(ByteIOContext *s, unsigned long (*update_checksum)(unsigned long c, const uint8_t *p, unsigned int len), unsigned long checksum);
+void init_checksum(ByteIOContext *s,
+ unsigned long (*update_checksum)(unsigned long c, const uint8_t *p, unsigned int len),
+ unsigned long checksum);
/* udp.c */
int udp_set_remote_url(URLContext *h, const char *uri);
diff --git a/libavformat/aviobuf.c b/libavformat/aviobuf.c
index e19e70b..d1dc09b 100644
--- a/libavformat/aviobuf.c
+++ b/libavformat/aviobuf.c
@@ -40,8 +40,8 @@ int init_put_byte(ByteIOContext *s,
s->buffer = buffer;
s->buffer_size = buffer_size;
s->buf_ptr = buffer;
- url_resetbuf(s, write_flag ? URL_WRONLY : URL_RDONLY);
s->opaque = opaque;
+ url_resetbuf(s, write_flag ? URL_WRONLY : URL_RDONLY);
s->write_packet = write_packet;
s->read_packet = read_packet;
s->seek = seek;
@@ -68,7 +68,8 @@ ByteIOContext *av_alloc_put_byte(
void *opaque,
int (*read_packet)(void *opaque, uint8_t *buf, int buf_size),
int (*write_packet)(void *opaque, uint8_t *buf, int buf_size),
- int64_t (*seek)(void *opaque, int64_t offset, int whence)) {
+ int64_t (*seek)(void *opaque, int64_t offset, int whence))
+{
ByteIOContext *s = av_mallocz(sizeof(ByteIOContext));
init_put_byte(s, buffer, buffer_size, write_flag, opaque,
read_packet, write_packet, seek);
@@ -322,17 +323,23 @@ static void fill_buffer(ByteIOContext *s)
}
}
-unsigned long ff_crc04C11DB7_update(unsigned long checksum, const uint8_t *buf, unsigned int len){
+unsigned long ff_crc04C11DB7_update(unsigned long checksum, const uint8_t *buf,
+ unsigned int len)
+{
return av_crc(av_crc_get_table(AV_CRC_32_IEEE), checksum, buf, len);
}
-unsigned long get_checksum(ByteIOContext *s){
+unsigned long get_checksum(ByteIOContext *s)
+{
s->checksum= s->update_checksum(s->checksum, s->checksum_ptr, s->buf_ptr - s->checksum_ptr);
s->update_checksum= NULL;
return s->checksum;
}
-void init_checksum(ByteIOContext *s, unsigned long (*update_checksum)(unsigned long c, const uint8_t *p, unsigned int len), unsigned long checksum){
+void init_checksum(ByteIOContext *s,
+ unsigned long (*update_checksum)(unsigned long c, const uint8_t *p, unsigned int len),
+ unsigned long checksum)
+{
s->update_checksum= update_checksum;
if(s->update_checksum){
s->checksum= checksum;
@@ -665,8 +672,8 @@ int av_url_read_fpause(ByteIOContext *s, int pause)
return s->read_pause(s->opaque, pause);
}
-int64_t av_url_read_fseek(ByteIOContext *s,
- int stream_index, int64_t timestamp, int flags)
+int64_t av_url_read_fseek(ByteIOContext *s, int stream_index,
+ int64_t timestamp, int flags)
{
URLContext *h = s->opaque;
int64_t ret;
diff --git a/libavformat/avs.c b/libavformat/avs.c
index 6fcb230..1fcb19f 100644
--- a/libavformat/avs.c
+++ b/libavformat/avs.c
@@ -24,7 +24,7 @@
typedef struct avs_format {
- voc_dec_context_t voc;
+ VocDecContext voc;
AVStream *st_video;
AVStream *st_audio;
int width;
@@ -34,7 +34,7 @@ typedef struct avs_format {
int nb_frames;
int remaining_frame_size;
int remaining_audio_size;
-} avs_format_t;
+} AvsFormat;
typedef enum avs_block_type {
AVS_NONE = 0x00,
@@ -42,7 +42,7 @@ typedef enum avs_block_type {
AVS_AUDIO = 0x02,
AVS_PALETTE = 0x03,
AVS_GAME_DATA = 0x04,
-} avs_block_type_t;
+} AvsBlockType;
static int avs_probe(AVProbeData * p)
{
@@ -57,7 +57,7 @@ static int avs_probe(AVProbeData * p)
static int avs_read_header(AVFormatContext * s, AVFormatParameters * ap)
{
- avs_format_t *avs = s->priv_data;
+ AvsFormat *avs = s->priv_data;
s->ctx_flags |= AVFMTCTX_NOHEADER;
@@ -82,10 +82,10 @@ static int avs_read_header(AVFormatContext * s, AVFormatParameters * ap)
static int
avs_read_video_packet(AVFormatContext * s, AVPacket * pkt,
- avs_block_type_t type, int sub_type, int size,
+ AvsBlockType type, int sub_type, int size,
uint8_t * palette, int palette_size)
{
- avs_format_t *avs = s->priv_data;
+ AvsFormat *avs = s->priv_data;
int ret;
ret = av_new_packet(pkt, size + palette_size);
@@ -120,7 +120,7 @@ avs_read_video_packet(AVFormatContext * s, AVPacket * pkt,
static int avs_read_audio_packet(AVFormatContext * s, AVPacket * pkt)
{
- avs_format_t *avs = s->priv_data;
+ AvsFormat *avs = s->priv_data;
int ret, size;
size = url_ftell(s->pb);
@@ -141,9 +141,9 @@ static int avs_read_audio_packet(AVFormatContext * s, AVPacket * pkt)
static int avs_read_packet(AVFormatContext * s, AVPacket * pkt)
{
- avs_format_t *avs = s->priv_data;
+ AvsFormat *avs = s->priv_data;
int sub_type = 0, size = 0;
- avs_block_type_t type = AVS_NONE;
+ AvsBlockType type = AVS_NONE;
int palette_size = 0;
uint8_t palette[4 + 3 * 256];
int ret;
@@ -218,7 +218,7 @@ static int avs_read_close(AVFormatContext * s)
AVInputFormat avs_demuxer = {
"avs",
NULL_IF_CONFIG_SMALL("AVS format"),
- sizeof(avs_format_t),
+ sizeof(AvsFormat),
avs_probe,
avs_read_header,
avs_read_packet,
diff --git a/libavformat/c93.c b/libavformat/c93.c
index c377f4d..11a0314 100644
--- a/libavformat/c93.c
+++ b/libavformat/c93.c
@@ -29,7 +29,7 @@ typedef struct {
} C93BlockRecord;
typedef struct {
- voc_dec_context_t voc;
+ VocDecContext voc;
C93BlockRecord block_records[512];
int current_block;
diff --git a/libavformat/dv.c b/libavformat/dv.c
index f7a0146..a0e07e9 100644
--- a/libavformat/dv.c
+++ b/libavformat/dv.c
@@ -430,6 +430,8 @@ static int dv_read_packet(AVFormatContext *s, AVPacket *pkt)
size = dv_get_packet(c->dv_demux, pkt);
if (size < 0) {
+ if (!c->dv_demux->sys)
+ return AVERROR(EIO);
size = c->dv_demux->sys->frame_size;
if (get_buffer(s->pb, c->buf, size) <= 0)
return AVERROR(EIO);
diff --git a/libavformat/dv.h b/libavformat/dv.h
index a8c0514..6bfb4ab 100644
--- a/libavformat/dv.h
+++ b/libavformat/dv.h
@@ -38,7 +38,7 @@ void dv_offset_reset(DVDemuxContext *c, int64_t frame_offset);
typedef struct DVMuxContext DVMuxContext;
DVMuxContext* dv_init_mux(AVFormatContext* s);
-int dv_assemble_frame(DVMuxContext *c, AVStream*, const uint8_t*, int, uint8_t**);
+int dv_assemble_frame(DVMuxContext *c, AVStream*, uint8_t*, int, uint8_t**);
void dv_delete_mux(DVMuxContext*);
#endif /* AVFORMAT_DV_H */
diff --git a/libavformat/dvenc.c b/libavformat/dvenc.c
index 7245e6e..c8dc5a8 100644
--- a/libavformat/dvenc.c
+++ b/libavformat/dvenc.c
@@ -231,7 +231,7 @@ static void dv_inject_metadata(DVMuxContext *c, uint8_t* frame)
*/
int dv_assemble_frame(DVMuxContext *c, AVStream* st,
- const uint8_t* data, int data_size, uint8_t** frame)
+ uint8_t* data, int data_size, uint8_t** frame)
{
int i, reqasize;
diff --git a/libavformat/flvdec.c b/libavformat/flvdec.c
index 0b25a25..01da9e8 100644
--- a/libavformat/flvdec.c
+++ b/libavformat/flvdec.c
@@ -26,6 +26,10 @@
#include "avformat.h"
#include "flv.h"
+typedef struct {
+ int wrong_dts; ///< wrong dts due to negative cts
+} FLVContext;
+
static int flv_probe(AVProbeData *p)
{
const uint8_t *d;
@@ -299,9 +303,10 @@ static int flv_get_extradata(AVFormatContext *s, AVStream *st, int size)
static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
{
+ FLVContext *flv = s->priv_data;
int ret, i, type, size, flags, is_audio;
int64_t next, pos;
- unsigned dts;
+ int64_t dts, pts = AV_NOPTS_VALUE;
AVStream *st = NULL;
retry:
@@ -386,10 +391,12 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
}
if(is_audio){
- if(!st->codec->channels || !st->codec->sample_rate || !st->codec->bits_per_coded_sample || (!st->codec->codec_id && !st->codec->codec_tag)) {
+ if(!st->codec->channels || !st->codec->sample_rate || !st->codec->bits_per_coded_sample) {
st->codec->channels = (flags & FLV_AUDIO_CHANNEL_MASK) == FLV_STEREO ? 2 : 1;
st->codec->sample_rate = (44100 << ((flags & FLV_AUDIO_SAMPLERATE_MASK) >> FLV_AUDIO_SAMPLERATE_OFFSET) >> 3);
st->codec->bits_per_coded_sample = (flags & FLV_AUDIO_SAMPLESIZE_MASK) ? 16 : 8;
+ }
+ if(!st->codec->codec_id){
flv_set_audio_codec(s, st, flags & FLV_AUDIO_CODECID_MASK);
}
}else{
@@ -401,9 +408,14 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
int type = get_byte(s->pb);
size--;
if (st->codec->codec_id == CODEC_ID_H264) {
- // cts offset ignored because it might to be signed
- // and would cause pts < dts
- get_be24(s->pb);
+ int32_t cts = (get_be24(s->pb)+0xff800000)^0xff800000; // sign extension
+ pts = dts + cts;
+ if (cts < 0) { // dts are wrong
+ flv->wrong_dts = 1;
+ av_log(s, AV_LOG_WARNING, "negative cts, previous timestamps might be wrong\n");
+ }
+ if (flv->wrong_dts)
+ dts = AV_NOPTS_VALUE;
}
if (type == 0) {
if ((ret = flv_get_extradata(s, st, size)) < 0)
@@ -420,6 +432,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
packet */
pkt->size = ret;
pkt->dts = dts;
+ pkt->pts = pts == AV_NOPTS_VALUE ? dts : pts;
pkt->stream_index = st->index;
if (is_audio || ((flags & FLV_VIDEO_FRAMETYPE_MASK) == FLV_FRAME_KEY))
@@ -431,7 +444,7 @@ static int flv_read_packet(AVFormatContext *s, AVPacket *pkt)
AVInputFormat flv_demuxer = {
"flv",
NULL_IF_CONFIG_SMALL("FLV format"),
- 0,
+ sizeof(FLVContext),
flv_probe,
flv_read_header,
flv_read_packet,
diff --git a/libavformat/framehook.h b/libavformat/framehook.h
index 9dc144b..0bad606 100644
--- a/libavformat/framehook.h
+++ b/libavformat/framehook.h
@@ -45,8 +45,8 @@ typedef void (FrameHookRelease)(void *ctx);
typedef FrameHookRelease *FrameHookReleaseFn;
extern FrameHookRelease Release;
-extern int frame_hook_add(int argc, char *argv[]);
-extern void frame_hook_process(struct AVPicture *pict, enum PixelFormat pix_fmt, int width, int height, int64_t pts);
-extern void frame_hook_release(void);
+int frame_hook_add(int argc, char *argv[]);
+void frame_hook_process(struct AVPicture *pict, enum PixelFormat pix_fmt, int width, int height, int64_t pts);
+void frame_hook_release(void);
#endif /* AVFORMAT_FRAMEHOOK_H */
diff --git a/libavformat/gxf.c b/libavformat/gxf.c
index 5bbac82..30cb4f7 100644
--- a/libavformat/gxf.c
+++ b/libavformat/gxf.c
@@ -23,12 +23,12 @@
#include "avformat.h"
#include "gxf.h"
-typedef struct {
+struct gxf_stream_info {
int64_t first_field;
int64_t last_field;
AVRational frames_per_second;
int32_t fields_per_frame;
-} st_info_t;
+};
/**
* \brief parses a packet header, extracting type and length
@@ -37,7 +37,7 @@ typedef struct {
* \param length detected packet length, excluding header is stored here
* \return 0 if header not found or contains invalid data, 1 otherwise
*/
-static int parse_packet_header(ByteIOContext *pb, pkt_type_t *type, int *length) {
+static int parse_packet_header(ByteIOContext *pb, GXFPktType *type, int *length) {
if (get_be32(pb))
return 0;
if (get_byte(pb) != 1)
@@ -157,11 +157,11 @@ static int get_sindex(AVFormatContext *s, int id, int format) {
* \param len length of tag section, will be adjusted to contain remaining bytes
* \param si struct to store collected information into
*/
-static void gxf_material_tags(ByteIOContext *pb, int *len, st_info_t *si) {
+static void gxf_material_tags(ByteIOContext *pb, int *len, struct gxf_stream_info *si) {
si->first_field = AV_NOPTS_VALUE;
si->last_field = AV_NOPTS_VALUE;
while (*len >= 2) {
- mat_tag_t tag = get_byte(pb);
+ GXFMatTag tag = get_byte(pb);
int tlen = get_byte(pb);
*len -= 2;
if (tlen > *len)
@@ -206,11 +206,11 @@ static AVRational fps_umf2avr(uint32_t flags) {
* \param len length of tag section, will be adjusted to contain remaining bytes
* \param si struct to store collected information into
*/
-static void gxf_track_tags(ByteIOContext *pb, int *len, st_info_t *si) {
+static void gxf_track_tags(ByteIOContext *pb, int *len, struct gxf_stream_info *si) {
si->frames_per_second = (AVRational){0, 0};
si->fields_per_frame = 0;
while (*len >= 2) {
- track_tag_t tag = get_byte(pb);
+ GXFTrackTag tag = get_byte(pb);
int tlen = get_byte(pb);
*len -= 2;
if (tlen > *len)
@@ -256,11 +256,11 @@ static void gxf_read_index(AVFormatContext *s, int pkt_len) {
static int gxf_header(AVFormatContext *s, AVFormatParameters *ap) {
ByteIOContext *pb = s->pb;
- pkt_type_t pkt_type;
+ GXFPktType pkt_type;
int map_len;
int len;
AVRational main_timebase = {0, 0};
- st_info_t si;
+ struct gxf_stream_info si;
int i;
if (!parse_packet_header(pb, &pkt_type, &map_len) || pkt_type != PKT_MAP) {
av_log(s, AV_LOG_ERROR, "map packet not found\n");
@@ -382,7 +382,7 @@ static int64_t gxf_resync_media(AVFormatContext *s, uint64_t max_interval, int t
int64_t cur_timestamp = AV_NOPTS_VALUE;
int len;
ByteIOContext *pb = s->pb;
- pkt_type_t type;
+ GXFPktType type;
tmp = get_be32(pb);
start:
while (tmp)
@@ -412,7 +412,7 @@ out:
static int gxf_packet(AVFormatContext *s, AVPacket *pkt) {
ByteIOContext *pb = s->pb;
- pkt_type_t pkt_type;
+ GXFPktType pkt_type;
int pkt_len;
while (!url_feof(pb)) {
AVStream *st;
diff --git a/libavformat/gxf.h b/libavformat/gxf.h
index 4212704..dcdcdef 100644
--- a/libavformat/gxf.h
+++ b/libavformat/gxf.h
@@ -22,14 +22,13 @@
#ifndef AVFORMAT_GXF_H
#define AVFORMAT_GXF_H
-/* gxf.c */
typedef enum {
PKT_MAP = 0xbc,
PKT_MEDIA = 0xbf,
PKT_EOS = 0xfb,
PKT_FLT = 0xfc,
PKT_UMF = 0xfd,
-} pkt_type_t;
+} GXFPktType;
typedef enum {
MAT_NAME = 0x40,
@@ -38,7 +37,7 @@ typedef enum {
MAT_MARK_IN = 0x43,
MAT_MARK_OUT = 0x44,
MAT_SIZE = 0x45,
-} mat_tag_t;
+} GXFMatTag;
typedef enum {
TRACK_NAME = 0x4c,
@@ -48,6 +47,6 @@ typedef enum {
TRACK_FPS = 0x50,
TRACK_LINES = 0x51,
TRACK_FPF = 0x52,
-} track_tag_t;
+} GXFTrackTag;
#endif /* AVFORMAT_GXF_H */
diff --git a/libavformat/gxfenc.c b/libavformat/gxfenc.c
index 72a35c3..396555a 100644
--- a/libavformat/gxfenc.c
+++ b/libavformat/gxfenc.c
@@ -155,7 +155,7 @@ static int64_t updateSize(ByteIOContext *pb, int64_t pos)
return curpos - pos;
}
-static void gxf_write_packet_header(ByteIOContext *pb, pkt_type_t type)
+static void gxf_write_packet_header(ByteIOContext *pb, GXFPktType type)
{
put_be32(pb, 0); /* packet leader for synchro */
put_byte(pb, 1);
diff --git a/libavformat/iff.c b/libavformat/iff.c
index 1fb94c0..e5158f6 100644
--- a/libavformat/iff.c
+++ b/libavformat/iff.c
@@ -52,7 +52,7 @@
#define PACKET_SIZE 1024
-typedef enum {COMP_NONE, COMP_FIB, COMP_EXP} svx8_compression_t;
+typedef enum {COMP_NONE, COMP_FIB, COMP_EXP} svx8_compression_type;
typedef struct {
uint32_t body_size;
diff --git a/libavformat/img2.c b/libavformat/img2.c
index 9706392..c0b5de2 100644
--- a/libavformat/img2.c
+++ b/libavformat/img2.c
@@ -45,6 +45,7 @@ static const IdStrMap img_tags[] = {
{ CODEC_ID_PNG , "png"},
{ CODEC_ID_PNG , "mng"},
{ CODEC_ID_PPM , "ppm"},
+ { CODEC_ID_PPM , "pnm"},
{ CODEC_ID_PGM , "pgm"},
{ CODEC_ID_PGMYUV , "pgmyuv"},
{ CODEC_ID_PBM , "pbm"},
@@ -69,6 +70,7 @@ static const IdStrMap img_tags[] = {
{ CODEC_ID_SUNRAST , "im8"},
{ CODEC_ID_SUNRAST , "im24"},
{ CODEC_ID_SUNRAST , "sunras"},
+ { CODEC_ID_JPEG2000 , "jp2"},
{ CODEC_ID_NONE , NULL}
};
@@ -402,7 +404,7 @@ AVOutputFormat image2_muxer = {
"image2",
NULL_IF_CONFIG_SMALL("image2 sequence"),
"",
- "",
+ "bmp,jpeg,jpg,ljpg,pam,pbm,pgm,pgmyuv,png,ppm,sgi,tif,tiff",
sizeof(VideoData),
CODEC_ID_NONE,
CODEC_ID_MJPEG,
diff --git a/libavformat/internal.h b/libavformat/internal.h
index a96f365..cb266ca 100644
--- a/libavformat/internal.h
+++ b/libavformat/internal.h
@@ -22,7 +22,11 @@
#define AVFORMAT_INTERNAL_H
#include <stdint.h>
+#include "avformat.h"
char *ff_data_to_hex(char *buf, const uint8_t *src, int size);
+void av_set_program_name(AVProgram *program, char *provider_name, char *name);
+void av_program_add_stream_index(AVFormatContext *ac, int progid, unsigned int idx);
+
#endif /* AVFORMAT_INTERNAL_H */
diff --git a/libavformat/ipmovie.c b/libavformat/ipmovie.c
index 4a766b2..35c61de 100644
--- a/libavformat/ipmovie.c
+++ b/libavformat/ipmovie.c
@@ -101,7 +101,7 @@ typedef struct IPMVEContext {
unsigned int audio_bits;
unsigned int audio_channels;
unsigned int audio_sample_rate;
- unsigned int audio_type;
+ enum CodecID audio_type;
unsigned int audio_frame_count;
int video_stream_index;
@@ -544,7 +544,7 @@ static int ipmovie_read_header(AVFormatContext *s,
url_fseek(pb, -CHUNK_PREAMBLE_SIZE, SEEK_CUR);
if (chunk_type == CHUNK_VIDEO)
- ipmovie->audio_type = 0; /* no audio */
+ ipmovie->audio_type = CODEC_ID_NONE; /* no audio */
else if (process_ipmovie_chunk(ipmovie, pb, &pkt) != CHUNK_INIT_AUDIO)
return AVERROR_INVALIDDATA;
diff --git a/libavformat/isom.c b/libavformat/isom.c
index 8791354..7d1da7c 100644
--- a/libavformat/isom.c
+++ b/libavformat/isom.c
@@ -106,6 +106,8 @@ const AVCodecTag codec_movvideo_tags[] = {
{ CODEC_ID_MSRLE, MKTAG('W', 'R', 'L', 'E') },
{ CODEC_ID_QDRAW, MKTAG('q', 'd', 'r', 'w') }, /* QuickDraw */
+ { CODEC_ID_RAWVIDEO, MKTAG('W', 'R', 'A', 'W') },
+
{ CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') }, /* AVC-1/H.264 */
{ CODEC_ID_MPEG1VIDEO, MKTAG('m', 'p', 'e', 'g') }, /* MPEG */
@@ -146,7 +148,7 @@ const AVCodecTag codec_movvideo_tags[] = {
{ CODEC_ID_MPEG2VIDEO, MKTAG('x', 'd', 'v', 'f') }, /* XDCAM EX 1080p30 VBR */
{ CODEC_ID_MPEG2VIDEO, MKTAG('A', 'V', 'm', 'p') }, /* AVID IMX PAL */
- //{ CODEC_ID_JPEG2000, MKTAG('m', 'j', 'p', '2') }, /* JPEG 2000 produced by FCP */
+ { CODEC_ID_JPEG2000, MKTAG('m', 'j', 'p', '2') }, /* JPEG 2000 produced by FCP */
{ CODEC_ID_TARGA, MKTAG('t', 'g', 'a', ' ') }, /* Truevision Targa */
{ CODEC_ID_TIFF, MKTAG('t', 'i', 'f', 'f') }, /* TIFF embedded in MOV */
diff --git a/libavformat/isom.h b/libavformat/isom.h
index 2fb4dbf..64bde46 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -38,6 +38,6 @@ int ff_mov_lang_to_iso639(int code, char *to);
typedef struct {
int count;
int duration;
-} MOV_stts_t;
+} MOVStts;
#endif /* AVFORMAT_ISOM_H */
diff --git a/libavformat/libnut.c b/libavformat/libnut.c
index 399ef9c..3f4a69b 100644
--- a/libavformat/libnut.c
+++ b/libavformat/libnut.c
@@ -33,8 +33,8 @@
#define ID_LENGTH (strlen(ID_STRING) + 1)
typedef struct {
- nut_context_t * nut;
- nut_stream_header_t * s;
+ nut_context_tt * nut;
+ nut_stream_header_tt * s;
} NUTContext;
static const AVCodecTag nut_tags[] = {
@@ -55,7 +55,7 @@ static int av_write(void * h, size_t len, const uint8_t * buf) {
static int nut_write_header(AVFormatContext * avf) {
NUTContext * priv = avf->priv_data;
ByteIOContext * bc = avf->pb;
- nut_muxer_opts_t mopts = {
+ nut_muxer_opts_tt mopts = {
.output = {
.priv = bc,
.write = av_write,
@@ -66,7 +66,7 @@ static int nut_write_header(AVFormatContext * avf) {
.max_distance = 32768,
.fti = NULL,
};
- nut_stream_header_t * s;
+ nut_stream_header_tt * s;
int i;
priv->s = s = av_mallocz((avf->nb_streams + 1) * sizeof*s);
@@ -123,7 +123,7 @@ static int nut_write_header(AVFormatContext * avf) {
static int nut_write_packet(AVFormatContext * avf, AVPacket * pkt) {
NUTContext * priv = avf->priv_data;
- nut_packet_t p;
+ nut_packet_tt p;
p.len = pkt->size;
p.stream = pkt->stream_index;
@@ -188,7 +188,7 @@ static off_t av_seek(void * h, long long pos, int whence) {
static int nut_read_header(AVFormatContext * avf, AVFormatParameters * ap) {
NUTContext * priv = avf->priv_data;
ByteIOContext * bc = avf->pb;
- nut_demuxer_opts_t dopts = {
+ nut_demuxer_opts_tt dopts = {
.input = {
.priv = bc,
.seek = av_seek,
@@ -200,8 +200,8 @@ static int nut_read_header(AVFormatContext * avf, AVFormatParameters * ap) {
.read_index = 1,
.cache_syncpoints = 1,
};
- nut_context_t * nut = priv->nut = nut_demuxer_init(&dopts);
- nut_stream_header_t * s;
+ nut_context_tt * nut = priv->nut = nut_demuxer_init(&dopts);
+ nut_stream_header_tt * s;
int ret, i;
if ((ret = nut_read_headers(nut, &s, NULL))) {
@@ -258,7 +258,7 @@ static int nut_read_header(AVFormatContext * avf, AVFormatParameters * ap) {
static int nut_read_packet(AVFormatContext * avf, AVPacket * pkt) {
NUTContext * priv = avf->priv_data;
- nut_packet_t pd;
+ nut_packet_tt pd;
int ret;
ret = nut_read_next_packet(priv->nut, &pd);
diff --git a/libavformat/matroska.c b/libavformat/matroska.c
index 0b657e1..f967f3e 100644
--- a/libavformat/matroska.c
+++ b/libavformat/matroska.c
@@ -22,25 +22,16 @@
#include "matroska.h"
const CodecTags ff_mkv_codec_tags[]={
- {"V_UNCOMPRESSED" , CODEC_ID_RAWVIDEO},
- {"V_MPEG4/ISO/ASP" , CODEC_ID_MPEG4},
- {"V_MPEG4/ISO/SP" , CODEC_ID_MPEG4},
- {"V_MPEG4/ISO/AP" , CODEC_ID_MPEG4},
- {"V_MPEG4/ISO/AVC" , CODEC_ID_H264},
- {"V_MPEG4/MS/V3" , CODEC_ID_MSMPEG4V3},
- {"V_MPEG1" , CODEC_ID_MPEG1VIDEO},
- {"V_MPEG2" , CODEC_ID_MPEG2VIDEO},
- {"V_MJPEG" , CODEC_ID_MJPEG},
- {"V_REAL/RV10" , CODEC_ID_RV10},
- {"V_REAL/RV20" , CODEC_ID_RV20},
- {"V_REAL/RV30" , CODEC_ID_RV30},
- {"V_REAL/RV40" , CODEC_ID_RV40},
- {"V_THEORA" , CODEC_ID_THEORA},
- {"V_SNOW" , CODEC_ID_SNOW},
-
- {"A_MPEG/L3" , CODEC_ID_MP3},
+ {"A_AAC" , CODEC_ID_AAC},
+ {"A_AC3" , CODEC_ID_AC3},
+ {"A_DTS" , CODEC_ID_DTS},
+ {"A_EAC3" , CODEC_ID_EAC3},
+ {"A_FLAC" , CODEC_ID_FLAC},
{"A_MPEG/L2" , CODEC_ID_MP2},
{"A_MPEG/L1" , CODEC_ID_MP2},
+ {"A_MPEG/L3" , CODEC_ID_MP3},
+ {"A_PCM/FLOAT/IEEE" , CODEC_ID_PCM_F32LE},
+ {"A_PCM/FLOAT/IEEE" , CODEC_ID_PCM_F64LE},
{"A_PCM/INT/BIG" , CODEC_ID_PCM_S16BE},
{"A_PCM/INT/BIG" , CODEC_ID_PCM_S24BE},
{"A_PCM/INT/BIG" , CODEC_ID_PCM_S32BE},
@@ -48,22 +39,15 @@ const CodecTags ff_mkv_codec_tags[]={
{"A_PCM/INT/LIT" , CODEC_ID_PCM_S24LE},
{"A_PCM/INT/LIT" , CODEC_ID_PCM_S32LE},
{"A_PCM/INT/LIT" , CODEC_ID_PCM_U8},
- {"A_PCM/FLOAT/IEEE" , CODEC_ID_PCM_F32LE},
- {"A_PCM/FLOAT/IEEE" , CODEC_ID_PCM_F64LE},
- {"A_AC3" , CODEC_ID_AC3},
- {"A_EAC3" , CODEC_ID_EAC3},
- {"A_DTS" , CODEC_ID_DTS},
- {"A_VORBIS" , CODEC_ID_VORBIS},
- {"A_AAC" , CODEC_ID_AAC},
- {"A_FLAC" , CODEC_ID_FLAC},
- {"A_WAVPACK4" , CODEC_ID_WAVPACK},
- {"A_TTA1" , CODEC_ID_TTA},
+ {"A_QUICKTIME/QDM2" , CODEC_ID_QDM2},
{"A_REAL/14_4" , CODEC_ID_RA_144},
{"A_REAL/28_8" , CODEC_ID_RA_288},
{"A_REAL/ATRC" , CODEC_ID_ATRAC3},
{"A_REAL/COOK" , CODEC_ID_COOK},
// {"A_REAL/SIPR" , CODEC_ID_SIPRO},
- {"A_QUICKTIME/QDM2" , CODEC_ID_QDM2},
+ {"A_TTA1" , CODEC_ID_TTA},
+ {"A_VORBIS" , CODEC_ID_VORBIS},
+ {"A_WAVPACK4" , CODEC_ID_WAVPACK},
{"S_TEXT/UTF8" , CODEC_ID_TEXT},
{"S_TEXT/ASCII" , CODEC_ID_TEXT},
@@ -73,6 +57,23 @@ const CodecTags ff_mkv_codec_tags[]={
{"S_SSA" , CODEC_ID_SSA},
{"S_VOBSUB" , CODEC_ID_DVD_SUBTITLE},
+ {"V_DIRAC" , CODEC_ID_DIRAC},
+ {"V_MJPEG" , CODEC_ID_MJPEG},
+ {"V_MPEG1" , CODEC_ID_MPEG1VIDEO},
+ {"V_MPEG2" , CODEC_ID_MPEG2VIDEO},
+ {"V_MPEG4/ISO/ASP" , CODEC_ID_MPEG4},
+ {"V_MPEG4/ISO/AP" , CODEC_ID_MPEG4},
+ {"V_MPEG4/ISO/SP" , CODEC_ID_MPEG4},
+ {"V_MPEG4/ISO/AVC" , CODEC_ID_H264},
+ {"V_MPEG4/MS/V3" , CODEC_ID_MSMPEG4V3},
+ {"V_REAL/RV10" , CODEC_ID_RV10},
+ {"V_REAL/RV20" , CODEC_ID_RV20},
+ {"V_REAL/RV30" , CODEC_ID_RV30},
+ {"V_REAL/RV40" , CODEC_ID_RV40},
+ {"V_SNOW" , CODEC_ID_SNOW},
+ {"V_THEORA" , CODEC_ID_THEORA},
+ {"V_UNCOMPRESSED" , CODEC_ID_RAWVIDEO},
+
{"" , CODEC_ID_NONE}
};
diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
index cedfb3d..6478d84 100644
--- a/libavformat/matroskadec.c
+++ b/libavformat/matroskadec.c
@@ -1074,6 +1074,7 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
MatroskaTrack *tracks;
EbmlList *index_list;
MatroskaIndex *index;
+ int index_scale = 1;
Ebml ebml = { 0 };
AVStream *st;
int i, j;
@@ -1303,7 +1304,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
st->codec->extradata = extradata;
st->codec->extradata_size = extradata_size;
} else if(track->codec_priv.data && track->codec_priv.size > 0){
- st->codec->extradata = av_malloc(track->codec_priv.size);
+ st->codec->extradata = av_mallocz(track->codec_priv.size +
+ FF_INPUT_BUFFER_PADDING_SIZE);
if(st->codec->extradata == NULL)
return AVERROR(ENOMEM);
st->codec->extradata_size = track->codec_priv.size;
@@ -1369,6 +1371,11 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
index_list = &matroska->index;
index = index_list->elem;
+ if (index_list->nb_elem
+ && index[0].time > 100000000000000/matroska->time_scale) {
+ av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
+ index_scale = matroska->time_scale;
+ }
for (i=0; i<index_list->nb_elem; i++) {
EbmlList *pos_list = &index[i].pos;
MatroskaIndexPos *pos = pos_list->elem;
@@ -1378,7 +1385,8 @@ static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
if (track && track->stream)
av_add_index_entry(track->stream,
pos[j].pos + matroska->segment_start,
- index[i].time, 0, 0, AVINDEX_KEYFRAME);
+ index[i].time/index_scale, 0, 0,
+ AVINDEX_KEYFRAME);
}
}
diff --git a/libavformat/metadata.c b/libavformat/metadata.c
new file mode 100644
index 0000000..9765e7c
--- /dev/null
+++ b/libavformat/metadata.c
@@ -0,0 +1,89 @@
+/*
+ * copyright (c) 2009 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "metadata.h"
+
+AVMetadataTag *
+av_metadata_get(AVMetadata *m, const char *key, const AVMetadataTag *prev, int flags)
+{
+ unsigned int i, j;
+
+ if(!m)
+ return NULL;
+
+ if(prev) i= prev - m->elems + 1;
+ else i= 0;
+
+ for(; i<m->count; i++){
+ const char *s= m->elems[i].key;
+ if(flags & AV_METADATA_IGNORE_CASE) for(j=0; toupper(s[j]) == toupper(key[j]) && key[j]; j++);
+ else for(j=0; s[j] == key[j] && key[j]; j++);
+ if(key[j])
+ continue;
+ if(s[j] && !(flags & AV_METADATA_IGNORE_SUFFIX))
+ continue;
+ return &m->elems[i];
+ }
+ return NULL;
+}
+
+int av_metadata_set(AVMetadata **pm, AVMetadataTag elem)
+{
+ AVMetadata *m= *pm;
+ AVMetadataTag *tag= av_metadata_get(m, elem.key, NULL, 0);
+
+ if(!m)
+ m=*pm= av_mallocz(sizeof(*m));
+
+ if(tag){
+ av_free(tag->value);
+ av_free(tag->key);
+ *tag= m->elems[--m->count];
+ }else{
+ AVMetadataTag *tmp= av_realloc(m->elems, (m->count+1) * sizeof(*m->elems));
+ if(tmp){
+ m->elems= tmp;
+ }else
+ return AVERROR(ENOMEM);
+ }
+ if(elem.value){
+ elem.key = av_strdup(elem.key );
+ elem.value= av_strdup(elem.value);
+ m->elems[m->count++]= elem;
+ }
+ if(!m->count)
+ av_freep(pm);
+
+ return 0;
+}
+
+void av_metadata_free(AVMetadata **pm)
+{
+ AVMetadata *m= *pm;
+
+ if(m){
+ while(m->count--){
+ av_free(m->elems[m->count].key);
+ av_free(m->elems[m->count].value);
+ }
+ av_free(m->elems);
+ }
+ av_freep(pm);
+}
diff --git a/libavformat/metadata.h b/libavformat/metadata.h
new file mode 100644
index 0000000..b6912ef
--- /dev/null
+++ b/libavformat/metadata.h
@@ -0,0 +1,43 @@
+/*
+ * copyright (c) 2009 Michael Niedermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVFORMAT_METADATA_H
+#define AVFORMAT_METADATA_H
+
+/**
+ * @file metadata.h
+ * internal metadata API header
+ * see avformat.h or the public API!
+ */
+
+
+#include "avformat.h"
+
+struct AVMetadata{
+ int count;
+ AVMetadataTag *elems;
+};
+
+#if LIBAVFORMAT_VERSION_MAJOR < 53
+void ff_metadata_demux_compat(AVFormatContext *s);
+void ff_metadata_mux_compat(AVFormatContext *s);
+#endif
+
+#endif /* AVFORMAT_METADATA_H */
diff --git a/libavformat/metadata_compat.c b/libavformat/metadata_compat.c
new file mode 100644
index 0000000..7aef938
--- /dev/null
+++ b/libavformat/metadata_compat.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2009 Aurelien Jacobs <aurel at gnuage.org>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#if LIBAVFORMAT_VERSION_MAJOR < 53
+
+#include <strings.h>
+#include "metadata.h"
+#include "libavutil/avstring.h"
+
+#define SIZE_OFFSET(x) sizeof(((AVFormatContext*)0)->x),offsetof(AVFormatContext,x)
+
+static const struct {
+ const char name[16];
+ int size;
+ int offset;
+} compat_tab[] = {
+ { "title", SIZE_OFFSET(title) },
+ { "author", SIZE_OFFSET(author) },
+ { "copyright", SIZE_OFFSET(copyright) },
+ { "comment", SIZE_OFFSET(comment) },
+ { "album", SIZE_OFFSET(album) },
+ { "year", SIZE_OFFSET(year) },
+ { "track", SIZE_OFFSET(track) },
+ { "genre", SIZE_OFFSET(genre) },
+
+ { "artist", SIZE_OFFSET(author) },
+ { "creator", SIZE_OFFSET(author) },
+ { "written_by", SIZE_OFFSET(author) },
+ { "lead_performer", SIZE_OFFSET(author) },
+ { "description", SIZE_OFFSET(comment) },
+ { "albumtitle", SIZE_OFFSET(album) },
+ { "date_written", SIZE_OFFSET(year) },
+ { "date_released", SIZE_OFFSET(year) },
+ { "tracknumber", SIZE_OFFSET(track) },
+ { "part_number", SIZE_OFFSET(track) },
+};
+
+void ff_metadata_demux_compat(AVFormatContext *ctx)
+{
+ AVMetadata *m;
+ int i, j;
+
+ if ((m = ctx->metadata))
+ for (j=0; j<m->count; j++)
+ for (i=0; i<FF_ARRAY_ELEMS(compat_tab); i++)
+ if (!strcasecmp(m->elems[j].key, compat_tab[i].name)) {
+ int *ptr = (int *)((char *)ctx+compat_tab[i].offset);
+ if (*ptr) continue;
+ if (compat_tab[i].size > sizeof(int))
+ av_strlcpy((char *)ptr, m->elems[j].value, compat_tab[i].size);
+ else
+ *ptr = atoi(m->elems[j].value);
+ }
+
+ for (i=0; i<ctx->nb_chapters; i++)
+ if ((m = ctx->chapters[i]->metadata))
+ for (j=0; j<m->count; j++)
+ if (!strcasecmp(m->elems[j].key, "title")) {
+ av_free(ctx->chapters[i]->title);
+ ctx->chapters[i]->title = av_strdup(m->elems[j].value);
+ }
+
+ for (i=0; i<ctx->nb_programs; i++)
+ if ((m = ctx->programs[i]->metadata))
+ for (j=0; j<m->count; j++) {
+ if (!strcasecmp(m->elems[j].key, "name")) {
+ av_free(ctx->programs[i]->name);
+ ctx->programs[i]->name = av_strdup(m->elems[j].value);
+ }
+ if (!strcasecmp(m->elems[j].key, "provider_name")) {
+ av_free(ctx->programs[i]->provider_name);
+ ctx->programs[i]->provider_name = av_strdup(m->elems[j].value);
+ }
+ }
+
+ for (i=0; i<ctx->nb_streams; i++)
+ if ((m = ctx->streams[i]->metadata))
+ for (j=0; j<m->count; j++) {
+ if (!strcasecmp(m->elems[j].key, "language"))
+ av_strlcpy(ctx->streams[i]->language, m->elems[j].value, 4);
+ if (!strcasecmp(m->elems[j].key, "filename")) {
+ av_free(ctx->streams[i]->filename);
+ ctx->streams[i]->filename= av_strdup(m->elems[j].value);
+ }
+ }
+}
+
+
+#define FILL_METADATA(s, key, value) { \
+ if (value && *value && \
+ !av_metadata_get(s->metadata, #key, NULL, AV_METADATA_IGNORE_CASE)) \
+ av_metadata_set(&s->metadata, (const AVMetadataTag){#key, value}); \
+ }
+#define FILL_METADATA_STR(s, key) FILL_METADATA(s, key, s->key)
+#define FILL_METADATA_INT(s, key) { \
+ char number[10]; \
+ snprintf(number, sizeof(number), "%d", s->key); \
+ if(s->key) FILL_METADATA(s, key, number) }
+
+void ff_metadata_mux_compat(AVFormatContext *ctx)
+{
+ int i;
+
+ FILL_METADATA_STR(ctx, title);
+ FILL_METADATA_STR(ctx, author);
+ FILL_METADATA_STR(ctx, copyright);
+ FILL_METADATA_STR(ctx, comment);
+ FILL_METADATA_STR(ctx, album);
+ FILL_METADATA_INT(ctx, year);
+ FILL_METADATA_INT(ctx, track);
+ FILL_METADATA_STR(ctx, genre);
+ for (i=0; i<ctx->nb_chapters; i++)
+ FILL_METADATA_STR(ctx->chapters[i], title);
+ for (i=0; i<ctx->nb_programs; i++) {
+ FILL_METADATA_STR(ctx->programs[i], name);
+ FILL_METADATA_STR(ctx->programs[i], provider_name);
+ }
+ for (i=0; i<ctx->nb_streams; i++) {
+ FILL_METADATA_STR(ctx->streams[i], language);
+ FILL_METADATA_STR(ctx->streams[i], filename);
+ }
+}
+
+#endif /* LIBAVFORMAT_VERSION_MAJOR < 53 */
diff --git a/libavformat/mov.c b/libavformat/mov.c
index 32cb909..c0c057a 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -69,18 +69,18 @@ typedef struct {
int first;
int count;
int id;
-} MOV_stsc_t;
+} MOVStsc;
typedef struct {
uint32_t type;
char *path;
-} MOV_dref_t;
+} MOVDref;
typedef struct {
uint32_t type;
int64_t offset;
int64_t size; /* total size (excluding the size and type fields) */
-} MOV_atom_t;
+} MOVAtom;
struct MOVParseTableEntry;
@@ -109,12 +109,12 @@ typedef struct MOVStreamContext {
unsigned int chunk_count;
int64_t *chunk_offsets;
unsigned int stts_count;
- MOV_stts_t *stts_data;
+ MOVStts *stts_data;
unsigned int ctts_count;
- MOV_stts_t *ctts_data;
+ MOVStts *ctts_data;
unsigned int edit_count; /* number of 'edit' (elst atom) */
unsigned int sample_to_chunk_sz;
- MOV_stsc_t *sample_to_chunk;
+ MOVStsc *sample_to_chunk;
int sample_to_ctime_index;
int sample_to_ctime_sample;
unsigned int sample_size;
@@ -131,7 +131,7 @@ typedef struct MOVStreamContext {
int pseudo_stream_id; ///< -1 means demux all ids
int16_t audio_cid; ///< stsd audio compression id
unsigned drefs_count;
- MOV_dref_t *drefs;
+ MOVDref *drefs;
int dref_id;
int wrong_dts; ///< dts are wrong due to negative ctts
} MOVStreamContext;
@@ -149,6 +149,7 @@ typedef struct MOVContext {
MOVFragment fragment; ///< current fragment in moof atom
MOVTrackExt *trex_data;
unsigned trex_count;
+ int itunes_metadata; ///< metadata are itunes style
} MOVContext;
@@ -162,15 +163,15 @@ typedef struct MOVContext {
/* links atom IDs to parse functions */
typedef struct MOVParseTableEntry {
uint32_t type;
- int (*parse)(MOVContext *ctx, ByteIOContext *pb, MOV_atom_t atom);
+ int (*parse)(MOVContext *ctx, ByteIOContext *pb, MOVAtom atom);
} MOVParseTableEntry;
static const MOVParseTableEntry mov_default_parse_table[];
-static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
int64_t total_size = 0;
- MOV_atom_t a;
+ MOVAtom a;
int i;
int err = 0;
@@ -231,7 +232,7 @@ static int mov_read_default(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return err;
}
-static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -245,7 +246,7 @@ static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
sc->drefs = av_mallocz(entries * sizeof(*sc->drefs));
for (i = 0; i < sc->drefs_count; i++) {
- MOV_dref_t *dref = &sc->drefs[i];
+ MOVDref *dref = &sc->drefs[i];
uint32_t size = get_be32(pb);
int64_t next = url_ftell(pb) + size - 4;
@@ -299,7 +300,7 @@ static int mov_read_dref(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_hdlr(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_hdlr(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
uint32_t type;
@@ -372,7 +373,7 @@ static const AVCodecTag mp4_audio_types[] = {
{ CODEC_ID_NONE, 0 },
};
-static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
int tag, len;
@@ -429,8 +430,24 @@ static int mov_read_esds(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
+static int mov_read_pasp(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
+{
+ const int num = get_be32(pb);
+ const int den = get_be32(pb);
+ AVStream * const st = c->fc->streams[c->fc->nb_streams-1];
+ if (den != 0) {
+ if ((st->sample_aspect_ratio.den && den != st->sample_aspect_ratio.den) ||
+ (st->sample_aspect_ratio.num && num != st->sample_aspect_ratio.num))
+ av_log(c->fc, AV_LOG_WARNING,
+ "sample aspect ratio already set, overriding by 'pasp' atom\n");
+ st->sample_aspect_ratio.num = num;
+ st->sample_aspect_ratio.den = den;
+ }
+ return 0;
+}
+
/* this atom contains actual media data */
-static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
if(atom.size == 0) /* wrong one (MP4) */
return 0;
@@ -438,7 +455,7 @@ static int mov_read_mdat(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0; /* now go for moov */
}
-static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
uint32_t type = get_le32(pb);
@@ -451,7 +468,7 @@ static int mov_read_ftyp(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
}
/* this atom should contain all header atoms */
-static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
if (mov_read_default(c, pb, atom) < 0)
return -1;
@@ -461,14 +478,14 @@ static int mov_read_moov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0; /* now go for mdat */
}
-static int mov_read_moof(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_moof(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
c->fragment.moof_offset = url_ftell(pb) - 8;
dprintf(c->fc, "moof offset %llx\n", c->fragment.moof_offset);
return mov_read_default(c, pb, atom);
}
-static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -497,7 +514,7 @@ static int mov_read_mdhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
int version = get_byte(pb); /* version */
get_be24(pb); /* flags */
@@ -533,7 +550,7 @@ static int mov_read_mvhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
@@ -553,12 +570,13 @@ static int mov_read_smi(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
int little_endian = get_be16(pb);
- if (little_endian) {
+ dprintf(c->fc, "enda %d\n", little_endian);
+ if (little_endian == 1) {
switch (st->codec->codec_id) {
case CODEC_ID_PCM_S24BE:
st->codec->codec_id = CODEC_ID_PCM_S24LE;
@@ -580,11 +598,16 @@ static int mov_read_enda(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
}
/* FIXME modify qdm2/svq3/h264 decoders to take full atom as extradata */
-static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
- AVStream *st = c->fc->streams[c->fc->nb_streams-1];
- uint64_t size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE;
+ AVStream *st;
+ uint64_t size;
uint8_t *buf;
+
+ if (c->fc->nb_streams < 1) // will happen with jp2 files
+ return 0;
+ st= c->fc->streams[c->fc->nb_streams-1];
+ size= (uint64_t)st->codec->extradata_size + atom.size + 8 + FF_INPUT_BUFFER_PADDING_SIZE;
if(size > INT_MAX || (uint64_t)atom.size > INT_MAX)
return -1;
buf= av_realloc(st->codec->extradata, size);
@@ -599,7 +622,7 @@ static int mov_read_extradata(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
@@ -626,7 +649,7 @@ static int mov_read_wave(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
* This function reads atom content and puts data in extradata without tag
* nor size unlike mov_read_extradata.
*/
-static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
@@ -642,7 +665,7 @@ static int mov_read_glbl(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stco(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -707,7 +730,7 @@ static enum CodecID mov_get_lpcm_codec_id(int bps, int flags)
return CODEC_ID_NONE;
}
-static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -722,7 +745,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
//Parsing Sample description table
enum CodecID id;
int dref_id;
- MOV_atom_t a = { 0, 0, 0 };
+ MOVAtom a = { 0, 0, 0 };
int64_t start_pos = url_ftell(pb);
int size = get_be32(pb); /* size */
uint32_t format = get_le32(pb); /* data format */
@@ -987,6 +1010,9 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
#endif
/* no ifdef since parameters are always those */
case CODEC_ID_QCELP:
+ st->codec->frame_size= 160;
+ st->codec->channels= 1; /* really needed */
+ break;
case CODEC_ID_AMR_NB:
case CODEC_ID_AMR_WB:
st->codec->frame_size= sc->samples_per_frame;
@@ -1008,8 +1034,10 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
st->codec->block_align = sc->bytes_per_frame;
break;
case CODEC_ID_ALAC:
- if (st->codec->extradata_size == 36)
- st->codec->frame_size = AV_RB32((st->codec->extradata+12));
+ if (st->codec->extradata_size == 36) {
+ st->codec->frame_size = AV_RB32(st->codec->extradata+12);
+ st->codec->channels = AV_RB8 (st->codec->extradata+21);
+ }
break;
default:
break;
@@ -1018,7 +1046,7 @@ static int mov_read_stsd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -1029,13 +1057,13 @@ static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
entries = get_be32(pb);
- if(entries >= UINT_MAX / sizeof(MOV_stsc_t))
+ if(entries >= UINT_MAX / sizeof(*sc->sample_to_chunk))
return -1;
dprintf(c->fc, "track[%i].stsc.entries = %i\n", c->fc->nb_streams-1, entries);
sc->sample_to_chunk_sz = entries;
- sc->sample_to_chunk = av_malloc(entries * sizeof(MOV_stsc_t));
+ sc->sample_to_chunk = av_malloc(entries * sizeof(*sc->sample_to_chunk));
if (!sc->sample_to_chunk)
return -1;
for(i=0; i<entries; i++) {
@@ -1046,7 +1074,7 @@ static int mov_read_stsc(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -1074,7 +1102,7 @@ static int mov_read_stss(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -1104,7 +1132,7 @@ static int mov_read_stsz(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -1115,11 +1143,11 @@ static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
get_byte(pb); /* version */
get_be24(pb); /* flags */
entries = get_be32(pb);
- if(entries >= UINT_MAX / sizeof(MOV_stts_t))
+ if(entries >= UINT_MAX / sizeof(*sc->stts_data))
return -1;
sc->stts_count = entries;
- sc->stts_data = av_malloc(entries * sizeof(MOV_stts_t));
+ sc->stts_data = av_malloc(entries * sizeof(*sc->stts_data));
if (!sc->stts_data)
return -1;
dprintf(c->fc, "track[%i].stts.entries = %i\n", c->fc->nb_streams-1, entries);
@@ -1149,7 +1177,7 @@ static int mov_read_stts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st = c->fc->streams[c->fc->nb_streams-1];
MOVStreamContext *sc = st->priv_data;
@@ -1158,11 +1186,11 @@ static int mov_read_ctts(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
get_byte(pb); /* version */
get_be24(pb); /* flags */
entries = get_be32(pb);
- if(entries >= UINT_MAX / sizeof(MOV_stts_t))
+ if(entries >= UINT_MAX / sizeof(*sc->ctts_data))
return -1;
sc->ctts_count = entries;
- sc->ctts_data = av_malloc(entries * sizeof(MOV_stts_t));
+ sc->ctts_data = av_malloc(entries * sizeof(*sc->ctts_data));
if (!sc->ctts_data)
return -1;
dprintf(c->fc, "track[%i].ctts.entries = %i\n", c->fc->nb_streams-1, entries);
@@ -1291,7 +1319,7 @@ static void mov_build_index(MOVContext *mov, AVStream *st)
sc->sample_count = st->nb_index_entries;
}
-static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
AVStream *st;
MOVStreamContext *sc;
@@ -1366,50 +1394,71 @@ static int mov_read_trak(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static void mov_parse_udta_string(ByteIOContext *pb, char *str, int size)
+static int mov_read_ilst(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
- uint16_t str_size = get_be16(pb); /* string length */;
-
- get_be16(pb); /* skip language */
- get_buffer(pb, str, FFMIN(size, str_size));
+ int ret;
+ c->itunes_metadata = 1;
+ ret = mov_read_default(c, pb, atom);
+ c->itunes_metadata = 0;
+ return ret;
}
-static int mov_read_udta(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_meta(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
- uint64_t end = url_ftell(pb) + atom.size;
-
- while (url_ftell(pb) + 8 < end) {
- uint32_t tag_size = get_be32(pb);
- uint32_t tag = get_le32(pb);
- uint64_t next = url_ftell(pb) + tag_size - 8;
-
- if (tag_size < 8 || next > end) // stop if tag_size is wrong
- break;
+ url_fskip(pb, 4); // version + flags
+ atom.size -= 4;
+ return mov_read_default(c, pb, atom);
+}
- switch (tag) {
- case MKTAG(0xa9,'n','a','m'):
- mov_parse_udta_string(pb, c->fc->title, sizeof(c->fc->title));
- break;
- case MKTAG(0xa9,'w','r','t'):
- mov_parse_udta_string(pb, c->fc->author, sizeof(c->fc->author));
- break;
- case MKTAG(0xa9,'c','p','y'):
- mov_parse_udta_string(pb, c->fc->copyright, sizeof(c->fc->copyright));
- break;
- case MKTAG(0xa9,'i','n','f'):
- mov_parse_udta_string(pb, c->fc->comment, sizeof(c->fc->comment));
- break;
- default:
- break;
- }
+static int mov_read_trkn(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
+{
+ get_be32(pb); // type
+ get_be32(pb); // unknown
+ c->fc->track = get_be32(pb);
+ dprintf(c->fc, "%.4s %d\n", (char*)&atom.type, c->fc->track);
+ return 0;
+}
- url_fseek(pb, next, SEEK_SET);
+static int mov_read_udta_string(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
+{
+ char *str = NULL;
+ int size;
+ uint16_t str_size;
+
+ if (c->itunes_metadata) {
+ int data_size = get_be32(pb);
+ int tag = get_le32(pb);
+ if (tag == MKTAG('d','a','t','a')) {
+ get_be32(pb); // type
+ get_be32(pb); // unknown
+ str_size = data_size - 16;
+ } else return 0;
+ } else {
+ str_size = get_be16(pb); // string length
+ get_be16(pb); // language
}
-
+ switch (atom.type) {
+ case MKTAG(0xa9,'n','a','m'):
+ str = c->fc->title; size = sizeof(c->fc->title); break;
+ case MKTAG(0xa9,'A','R','T'):
+ case MKTAG(0xa9,'w','r','t'):
+ str = c->fc->author; size = sizeof(c->fc->author); break;
+ case MKTAG(0xa9,'c','p','y'):
+ str = c->fc->copyright; size = sizeof(c->fc->copyright); break;
+ case MKTAG(0xa9,'c','m','t'):
+ case MKTAG(0xa9,'i','n','f'):
+ str = c->fc->comment; size = sizeof(c->fc->comment); break;
+ case MKTAG(0xa9,'a','l','b'):
+ str = c->fc->album; size = sizeof(c->fc->album); break;
+ }
+ if (!str)
+ return 0;
+ get_buffer(pb, str, FFMIN(size, str_size));
+ dprintf(c->fc, "%.4s %s\n", (char*)&atom.type, str);
return 0;
}
-static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
int i;
int width;
@@ -1480,7 +1529,7 @@ static int mov_read_tkhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
MOVFragment *frag = &c->fragment;
MOVTrackExt *trex = NULL;
@@ -1515,7 +1564,7 @@ static int mov_read_tfhd(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
MOVTrackExt *trex;
@@ -1535,7 +1584,7 @@ static int mov_read_trex(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return 0;
}
-static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
MOVFragment *frag = &c->fragment;
AVStream *st;
@@ -1606,7 +1655,7 @@ static int mov_read_trun(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
/* this atom should be null (from specs), but some buggy files put the 'moov' atom inside it... */
/* like the files created with Adobe Premiere 5.0, for samples see */
/* http://graphics.tudelft.nl/~wouter/publications/soundtests/ */
-static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
int err;
@@ -1627,7 +1676,7 @@ static int mov_read_wide(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
return err;
}
-static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_cmov(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
#ifdef CONFIG_ZLIB
ByteIOContext ctx;
@@ -1680,7 +1729,7 @@ free_and_return:
}
/* edit list atom */
-static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOV_atom_t atom)
+static int mov_read_elst(MOVContext *c, ByteIOContext *pb, MOVAtom atom)
{
MOVStreamContext *sc = c->fc->streams[c->fc->nb_streams-1]->priv_data;
int i, edit_count;
@@ -1715,10 +1764,12 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('f','t','y','p'), mov_read_ftyp },
{ MKTAG('g','l','b','l'), mov_read_glbl },
{ MKTAG('h','d','l','r'), mov_read_hdlr },
+{ MKTAG('i','l','s','t'), mov_read_ilst },
{ MKTAG('j','p','2','h'), mov_read_extradata },
{ MKTAG('m','d','a','t'), mov_read_mdat },
{ MKTAG('m','d','h','d'), mov_read_mdhd },
{ MKTAG('m','d','i','a'), mov_read_default },
+{ MKTAG('m','e','t','a'), mov_read_meta },
{ MKTAG('m','i','n','f'), mov_read_default },
{ MKTAG('m','o','o','f'), mov_read_moof },
{ MKTAG('m','o','o','v'), mov_read_moov },
@@ -1727,6 +1778,7 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('S','M','I',' '), mov_read_smi }, /* Sorenson extension ??? */
{ MKTAG('a','l','a','c'), mov_read_extradata }, /* alac specific atom */
{ MKTAG('a','v','c','C'), mov_read_glbl },
+{ MKTAG('p','a','s','p'), mov_read_pasp },
{ MKTAG('s','t','b','l'), mov_read_default },
{ MKTAG('s','t','c','o'), mov_read_stco },
{ MKTAG('s','t','s','c'), mov_read_stsc },
@@ -1739,12 +1791,21 @@ static const MOVParseTableEntry mov_default_parse_table[] = {
{ MKTAG('t','r','a','k'), mov_read_trak },
{ MKTAG('t','r','a','f'), mov_read_default },
{ MKTAG('t','r','e','x'), mov_read_trex },
+{ MKTAG('t','r','k','n'), mov_read_trkn },
{ MKTAG('t','r','u','n'), mov_read_trun },
-{ MKTAG('u','d','t','a'), mov_read_udta },
+{ MKTAG('u','d','t','a'), mov_read_default },
{ MKTAG('w','a','v','e'), mov_read_wave },
{ MKTAG('e','s','d','s'), mov_read_esds },
{ MKTAG('w','i','d','e'), mov_read_wide }, /* place holder */
{ MKTAG('c','m','o','v'), mov_read_cmov },
+{ MKTAG(0xa9,'n','a','m'), mov_read_udta_string },
+{ MKTAG(0xa9,'w','r','t'), mov_read_udta_string },
+{ MKTAG(0xa9,'c','p','y'), mov_read_udta_string },
+{ MKTAG(0xa9,'i','n','f'), mov_read_udta_string },
+{ MKTAG(0xa9,'i','n','f'), mov_read_udta_string },
+{ MKTAG(0xa9,'A','R','T'), mov_read_udta_string },
+{ MKTAG(0xa9,'a','l','b'), mov_read_udta_string },
+{ MKTAG(0xa9,'c','m','t'), mov_read_udta_string },
{ 0, NULL }
};
@@ -1798,7 +1859,7 @@ static int mov_read_header(AVFormatContext *s, AVFormatParameters *ap)
MOVContext *mov = s->priv_data;
ByteIOContext *pb = s->pb;
int err;
- MOV_atom_t atom = { 0, 0, 0 };
+ MOVAtom atom = { 0, 0, 0 };
mov->fc = s;
/* .mov and .mp4 aren't streamable anyway (only progressive download if moov is before mdat) */
@@ -1851,7 +1912,7 @@ static int mov_read_packet(AVFormatContext *s, AVPacket *pkt)
if (!sample) {
mov->found_mdat = 0;
if (!url_is_streamed(s->pb) ||
- mov_read_default(mov, s->pb, (MOV_atom_t){ 0, 0, INT64_MAX }) < 0 ||
+ mov_read_default(mov, s->pb, (MOVAtom){ 0, 0, INT64_MAX }) < 0 ||
url_feof(s->pb))
return -1;
dprintf(s, "read fragments, offset 0x%llx\n", url_ftell(s->pb));
diff --git a/libavformat/movenc.c b/libavformat/movenc.c
index 9588bb6..6ddbed1 100644
--- a/libavformat/movenc.c
+++ b/libavformat/movenc.c
@@ -723,7 +723,7 @@ static int mov_write_stsd_tag(ByteIOContext *pb, MOVTrack *track)
static int mov_write_ctts_tag(ByteIOContext *pb, MOVTrack *track)
{
- MOV_stts_t *ctts_entries;
+ MOVStts *ctts_entries;
uint32_t entries = 0;
uint32_t atom_size;
int i;
@@ -757,7 +757,7 @@ static int mov_write_ctts_tag(ByteIOContext *pb, MOVTrack *track)
/* Time to sample atom */
static int mov_write_stts_tag(ByteIOContext *pb, MOVTrack *track)
{
- MOV_stts_t *stts_entries;
+ MOVStts *stts_entries;
uint32_t entries = -1;
uint32_t atom_size;
int i;
diff --git a/libavformat/mp3.c b/libavformat/mp3.c
index 407d4f0..a4eb49c 100644
--- a/libavformat/mp3.c
+++ b/libavformat/mp3.c
@@ -383,7 +383,7 @@ static int mp3_read_probe(AVProbeData *p)
for(frames = 0; buf2 < end; frames++) {
header = AV_RB32(buf2);
- fsize = ff_mpa_decode_header(&avctx, header, &sample_rate);
+ fsize = ff_mpa_decode_header(&avctx, header, &sample_rate, &sample_rate, &sample_rate, &sample_rate);
if(fsize < 0)
break;
buf2 += fsize;
@@ -402,20 +402,22 @@ static int mp3_read_probe(AVProbeData *p)
/**
* Try to find Xing/Info/VBRI tags and compute duration from info therein
*/
-static void mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
+static int mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
{
uint32_t v, spf;
int frames = -1; /* Total number of frames in file */
const int64_t xing_offtbl[2][2] = {{32, 17}, {17,9}};
MPADecodeContext c;
+ int vbrtag_size = 0;
v = get_be32(s->pb);
if(ff_mpa_check_header(v) < 0)
- return;
+ return -1;
- ff_mpegaudio_decode_header(&c, v);
+ if (ff_mpegaudio_decode_header(&c, v) == 0)
+ vbrtag_size = c.frame_size;
if(c.layer != 3)
- return;
+ return -1;
/* Check for Xing / Info tag */
url_fseek(s->pb, xing_offtbl[c.lsf == 1][c.nb_channels == 1], SEEK_CUR);
@@ -439,11 +441,15 @@ static void mp3_parse_vbr_tags(AVFormatContext *s, AVStream *st, int64_t base)
}
if(frames < 0)
- return;
+ return -1;
+
+ /* Skip the vbr tag frame */
+ url_fseek(s->pb, base + vbrtag_size, SEEK_SET);
spf = c.lsf ? 576 : 1152; /* Samples per frame, layer 3 */
st->duration = av_rescale_q(frames, (AVRational){spf, c.sample_rate},
st->time_base);
+ return 0;
}
static int mp3_read_header(AVFormatContext *s,
@@ -493,8 +499,8 @@ static int mp3_read_header(AVFormatContext *s,
}
off = url_ftell(s->pb);
- mp3_parse_vbr_tags(s, st, off);
- url_fseek(s->pb, off, SEEK_SET);
+ if (mp3_parse_vbr_tags(s, st, off) < 0)
+ url_fseek(s->pb, off, SEEK_SET);
/* the parameters will be extracted from the compressed bitstream */
return 0;
diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index 48aca43..00ce336 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -22,6 +22,7 @@
#include "libavutil/crc.h"
#include "avformat.h"
#include "mpegts.h"
+#include "internal.h"
//#define DEBUG_SI
//#define DEBUG_SEEK
@@ -38,8 +39,6 @@ typedef struct PESContext PESContext;
static PESContext* add_pes_stream(MpegTSContext *ts, int pid, int pcr_pid, int stream_type);
static AVStream* new_pes_av_stream(PESContext *pes, uint32_t code);
-extern void av_set_program_name(AVProgram *program, char *provider_name, char *name);
-extern void av_program_add_stream_index(AVFormatContext *ac, int progid, unsigned int idx);
enum MpegTSFilterType {
MPEGTS_PES,
@@ -80,11 +79,11 @@ struct MpegTSFilter {
};
#define MAX_PIDS_PER_PROGRAM 64
-typedef struct {
+struct Program {
unsigned int id; //program id/service id
unsigned int nb_pids;
unsigned int pids[MAX_PIDS_PER_PROGRAM];
-} Program_t;
+};
struct MpegTSContext {
/* user data */
@@ -114,7 +113,7 @@ struct MpegTSContext {
/* scan context */
/** structure to keep track of Program->pids mapping */
unsigned int nb_prg;
- Program_t *prg;
+ struct Program *prg;
/** filters for various streams specified by PMT + for the PAT and PMT */
@@ -169,8 +168,8 @@ static void clear_programs(MpegTSContext *ts)
static void add_pat_entry(MpegTSContext *ts, unsigned int programid)
{
- Program_t *p;
- void *tmp = av_realloc(ts->prg, (ts->nb_prg+1)*sizeof(Program_t));
+ struct Program *p;
+ void *tmp = av_realloc(ts->prg, (ts->nb_prg+1)*sizeof(struct Program));
if(!tmp)
return;
ts->prg = tmp;
@@ -183,7 +182,7 @@ static void add_pat_entry(MpegTSContext *ts, unsigned int programid)
static void add_pid_to_pmt(MpegTSContext *ts, unsigned int programid, unsigned int pid)
{
int i;
- Program_t *p = NULL;
+ struct Program *p = NULL;
for(i=0; i<ts->nb_prg; i++) {
if(ts->prg[i].id == programid) {
p = &ts->prg[i];
@@ -210,7 +209,7 @@ static int discard_pid(MpegTSContext *ts, unsigned int pid)
{
int i, j, k;
int used = 0, discarded = 0;
- Program_t *p;
+ struct Program *p;
for(i=0; i<ts->nb_prg; i++) {
p = &ts->prg[i];
for(j=0; j<p->nb_pids; j++) {
diff --git a/libavformat/mtv.c b/libavformat/mtv.c
index 75da0e7..7bf0d84 100644
--- a/libavformat/mtv.c
+++ b/libavformat/mtv.c
@@ -36,24 +36,23 @@
typedef struct MTVDemuxContext {
- unsigned int file_size; ///< filesize, not always right
- unsigned int segments; ///< number of 512 byte segments
- unsigned int audio_identifier; ///< 'MP3' on all files I have seen
- unsigned int audio_br; ///< bitrate of audio chanel (mp3)
- unsigned int img_colorfmt; ///< frame colorfmt rgb 565/555
- unsigned int img_bpp; ///< frame bits per pixel
- unsigned int img_width; //
- unsigned int img_height; //
- unsigned int img_segment_size; ///< size of image segment
- unsigned int video_fps; //
- unsigned int full_segment_size;
+ unsigned int file_size; ///< filesize, not always right
+ unsigned int segments; ///< number of 512 byte segments
+ unsigned int audio_identifier; ///< 'MP3' on all files I have seen
+ unsigned int audio_br; ///< bitrate of audio chanel (mp3)
+ unsigned int img_colorfmt; ///< frame colorfmt rgb 565/555
+ unsigned int img_bpp; ///< frame bits per pixel
+ unsigned int img_width; //
+ unsigned int img_height; //
+ unsigned int img_segment_size; ///< size of image segment
+ unsigned int video_fps; //
+ unsigned int full_segment_size;
} MTVDemuxContext;
static int mtv_probe(AVProbeData *p)
{
/* Magic is 'AMV' */
-
if(*(p->buf) != 'A' || *(p->buf+1) != 'M' || *(p->buf+2) != 'V')
return 0;
@@ -62,11 +61,10 @@ static int mtv_probe(AVProbeData *p)
static int mtv_read_header(AVFormatContext *s, AVFormatParameters *ap)
{
- MTVDemuxContext *mtv = s->priv_data;
- ByteIOContext *pb = s->pb;
- AVStream *st;
- unsigned int audio_subsegments;
-
+ MTVDemuxContext *mtv = s->priv_data;
+ ByteIOContext *pb = s->pb;
+ AVStream *st;
+ unsigned int audio_subsegments;
url_fskip(pb, 3);
mtv->file_size = get_le32(pb);
@@ -86,11 +84,11 @@ static int mtv_read_header(AVFormatContext *s, AVFormatParameters *ap)
mtv->img_segment_size;
mtv->video_fps = (mtv->audio_br / 4) / audio_subsegments;
- /* FIXME Add sanity check here */
+ // FIXME Add sanity check here
- /* all systems go! init decoders */
+ // all systems go! init decoders
- /* video - raw rgb565 */
+ // video - raw rgb565
st = av_new_stream(s, VIDEO_SID);
if(!st)
@@ -105,7 +103,7 @@ static int mtv_read_header(AVFormatContext *s, AVFormatParameters *ap)
st->codec->bits_per_coded_sample = mtv->img_bpp;
st->codec->sample_rate = mtv->video_fps;
- /* audio - mp3 */
+ // audio - mp3
st = av_new_stream(s, AUDIO_SID);
if(!st)
@@ -117,7 +115,7 @@ static int mtv_read_header(AVFormatContext *s, AVFormatParameters *ap)
st->codec->bit_rate = mtv->audio_br;
st->need_parsing = AVSTREAM_PARSE_FULL;
- /* Jump over header */
+ // Jump over header
if(url_fseek(pb, MTV_HEADER_SIZE, SEEK_SET) != MTV_HEADER_SIZE)
return AVERROR(EIO);
diff --git a/libavformat/mxf.h b/libavformat/mxf.h
index 4c17a9e..39c8c16 100644
--- a/libavformat/mxf.h
+++ b/libavformat/mxf.h
@@ -41,6 +41,7 @@ enum MXFMetadataSetType {
Identification,
ContentStorage,
SubDescriptor,
+ IndexTableSegment,
TypeBottom,// add metadata type before this
};
diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index 98cf41d..6dafdc6 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -104,6 +104,11 @@ typedef struct {
typedef struct {
UID uid;
enum MXFMetadataSetType type;
+} MXFIndexTableSegment;
+
+typedef struct {
+ UID uid;
+ enum MXFMetadataSetType type;
UID package_uid;
UID *tracks_refs;
int tracks_count;
@@ -300,6 +305,7 @@ static int mxf_read_packet(AVFormatContext *s, AVPacket *pkt)
if (klv_read_packet(&klv, s->pb) < 0)
return -1;
PRINT_KEY(s, "read packet", klv.key);
+ dprintf(s, "size %lld offset %#llx\n", klv.length, klv.offset);
if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key)) {
int res = mxf_decrypt_triplet(s, pkt, &klv);
if (res < 0) {
@@ -496,13 +502,26 @@ static int mxf_read_source_package(MXFPackage *package, ByteIOContext *pb, int t
return 0;
}
+static int mxf_read_index_table_segment(MXFIndexTableSegment *segment, ByteIOContext *pb, int tag)
+{
+ switch(tag) {
+ case 0x3F05: dprintf(NULL, "EditUnitByteCount %d\n", get_be32(pb)); break;
+ case 0x3F06: dprintf(NULL, "IndexSID %d\n", get_be32(pb)); break;
+ case 0x3F07: dprintf(NULL, "BodySID %d\n", get_be32(pb)); break;
+ case 0x3F0B: dprintf(NULL, "IndexEditRate %d/%d\n", get_be32(pb), get_be32(pb)); break;
+ case 0x3F0C: dprintf(NULL, "IndexStartPosition %lld\n", get_be64(pb)); break;
+ case 0x3F0D: dprintf(NULL, "IndexDuration %lld\n", get_be64(pb)); break;
+ }
+ return 0;
+}
+
static void mxf_read_pixel_layout(ByteIOContext *pb, MXFDescriptor *descriptor)
{
int code;
do {
code = get_byte(pb);
- dprintf(NULL, "pixel layout: code 0x%x\n", code);
+ dprintf(NULL, "pixel layout: code %#x\n", code);
switch (code) {
case 0x52: /* R */
descriptor->bits_per_sample += get_byte(pb);
@@ -837,6 +856,7 @@ static const MXFMetadataReadTableEntry mxf_metadata_read_table[] = {
{ { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3A,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Static Track */
{ { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x01,0x01,0x01,0x01,0x3B,0x00 }, mxf_read_track, sizeof(MXFTrack), Track }, /* Generic Track */
{ { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x04,0x01,0x02,0x02,0x00,0x00 }, mxf_read_cryptographic_context, sizeof(MXFCryptoContext), CryptoContext },
+ { { 0x06,0x0E,0x2B,0x34,0x02,0x53,0x01,0x01,0x0d,0x01,0x02,0x01,0x01,0x10,0x01,0x00 }, mxf_read_index_table_segment, sizeof(MXFIndexTableSegment), IndexTableSegment },
{ { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, NULL, 0, AnyType },
};
@@ -854,8 +874,9 @@ static int mxf_read_local_tags(MXFContext *mxf, KLVPacket *klv, int (*read_child
uint64_t next = url_ftell(pb) + size;
UID uid = {0};
+ dprintf(mxf->fc, "local tag %#04x size %d\n", tag, size);
if (!size) { /* ignore empty tag, needed for some files with empty UMID tag */
- av_log(mxf->fc, AV_LOG_ERROR, "local tag 0x%04X with 0 size\n", tag);
+ av_log(mxf->fc, AV_LOG_ERROR, "local tag %#04x with 0 size\n", tag);
continue;
}
if (tag > 0x7FFF) { /* dynamic tag */
@@ -864,7 +885,7 @@ static int mxf_read_local_tags(MXFContext *mxf, KLVPacket *klv, int (*read_child
int local_tag = AV_RB16(mxf->local_tags+i*18);
if (local_tag == tag) {
memcpy(uid, mxf->local_tags+i*18+2, 16);
- dprintf(mxf->fc, "local tag 0x%04X\n", local_tag);
+ dprintf(mxf->fc, "local tag %#04x\n", local_tag);
PRINT_KEY(mxf->fc, "uid", uid);
}
}
@@ -897,6 +918,7 @@ static int mxf_read_header(AVFormatContext *s, AVFormatParameters *ap)
if (klv_read_packet(&klv, s->pb) < 0)
return -1;
PRINT_KEY(s, "read header", klv.key);
+ dprintf(s, "size %lld offset %#llx\n", klv.length, klv.offset);
if (IS_KLV_KEY(klv.key, mxf_encrypted_triplet_key) ||
IS_KLV_KEY(klv.key, mxf_essence_element_key)) {
/* FIXME avoid seek */
diff --git a/libavformat/nut.c b/libavformat/nut.c
index 7a978a5..6fdc298 100644
--- a/libavformat/nut.c
+++ b/libavformat/nut.c
@@ -47,16 +47,16 @@ int64_t ff_lsb2full(StreamContext *stream, int64_t lsb){
return ((lsb - delta)&mask) + delta;
}
-int ff_nut_sp_pos_cmp(syncpoint_t *a, syncpoint_t *b){
+int ff_nut_sp_pos_cmp(Syncpoint *a, Syncpoint *b){
return ((a->pos - b->pos) >> 32) - ((b->pos - a->pos) >> 32);
}
-int ff_nut_sp_pts_cmp(syncpoint_t *a, syncpoint_t *b){
+int ff_nut_sp_pts_cmp(Syncpoint *a, Syncpoint *b){
return ((a->ts - b->ts) >> 32) - ((b->ts - a->ts) >> 32);
}
void ff_nut_add_sp(NUTContext *nut, int64_t pos, int64_t back_ptr, int64_t ts){
- syncpoint_t *sp= av_mallocz(sizeof(syncpoint_t));
+ Syncpoint *sp= av_mallocz(sizeof(Syncpoint));
struct AVTreeNode *node= av_mallocz(av_tree_node_size);
sp->pos= pos;
diff --git a/libavformat/nut.h b/libavformat/nut.h
index 713d27d..a1081ed 100644
--- a/libavformat/nut.h
+++ b/libavformat/nut.h
@@ -50,14 +50,14 @@ typedef enum{
FLAG_MATCH_TIME =2048, ///<If set, match_time_delta is coded in the frame header
FLAG_CODED =4096, ///<if set, coded_flags are stored in the frame header
FLAG_INVALID =8192, ///<if set, frame_code is invalid
-}flag_t;
+} Flag;
typedef struct {
uint64_t pos;
uint64_t back_ptr;
// uint64_t global_key_pts;
int64_t ts;
-} syncpoint_t;
+} Syncpoint;
typedef struct {
uint16_t flags;
@@ -67,7 +67,7 @@ typedef struct {
int16_t pts_delta;
uint8_t reserved_count;
uint8_t header_idx;
-} FrameCode; // maybe s/FrameCode/framecode_t/ or change all to Java style but do not mix
+} FrameCode;
typedef struct {
int last_flags;
@@ -78,7 +78,7 @@ typedef struct {
int msb_pts_shift;
int max_pts_distance;
int decode_delay; //FIXME duplicate of has_b_frames
-} StreamContext;// maybe s/StreamContext/streamcontext_t/
+} StreamContext;
typedef struct {
AVFormatContext *avf;
@@ -106,8 +106,8 @@ typedef struct {
void ff_nut_reset_ts(NUTContext *nut, AVRational time_base, int64_t val);
int64_t ff_lsb2full(StreamContext *stream, int64_t lsb);
-int ff_nut_sp_pos_cmp(syncpoint_t *a, syncpoint_t *b);
-int ff_nut_sp_pts_cmp(syncpoint_t *a, syncpoint_t *b);
+int ff_nut_sp_pos_cmp(Syncpoint *a, Syncpoint *b);
+int ff_nut_sp_pts_cmp(Syncpoint *a, Syncpoint *b);
void ff_nut_add_sp(NUTContext *nut, int64_t pos, int64_t back_ptr, int64_t ts);
extern const Dispositions ff_nut_dispositions[];
diff --git a/libavformat/nutdec.c b/libavformat/nutdec.c
index cf41f62..098bc84 100644
--- a/libavformat/nutdec.c
+++ b/libavformat/nutdec.c
@@ -846,9 +846,9 @@ assert(0);
static int read_seek(AVFormatContext *s, int stream_index, int64_t pts, int flags){
NUTContext *nut = s->priv_data;
AVStream *st= s->streams[stream_index];
- syncpoint_t dummy={.ts= pts*av_q2d(st->time_base)*AV_TIME_BASE};
- syncpoint_t nopts_sp= {.ts= AV_NOPTS_VALUE, .back_ptr= AV_NOPTS_VALUE};
- syncpoint_t *sp, *next_node[2]= {&nopts_sp, &nopts_sp};
+ Syncpoint dummy={.ts= pts*av_q2d(st->time_base)*AV_TIME_BASE};
+ Syncpoint nopts_sp= {.ts= AV_NOPTS_VALUE, .back_ptr= AV_NOPTS_VALUE};
+ Syncpoint *sp, *next_node[2]= {&nopts_sp, &nopts_sp};
int64_t pos, pos2, ts;
int i;
diff --git a/libavformat/nutenc.c b/libavformat/nutenc.c
index 478fc4f..54d4b07 100644
--- a/libavformat/nutenc.c
+++ b/libavformat/nutenc.c
@@ -260,7 +260,7 @@ static void put_v(ByteIOContext *bc, uint64_t val){
put_byte(bc, val&127);
}
-static void put_t(NUTContext *nut, StreamContext *nus, ByteIOContext *bc, uint64_t val){
+static void put_tt(NUTContext *nut, StreamContext *nus, ByteIOContext *bc, uint64_t val){
val *= nut->time_base_count;
val += nus->time_base - nut->time_base;
put_v(bc, val);
@@ -664,7 +664,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt){
//FIXME: Ensure store_sp is 1 in the first place.
if(store_sp){
- syncpoint_t *sp, dummy= {.pos= INT64_MAX};
+ Syncpoint *sp, dummy= {.pos= INT64_MAX};
ff_nut_reset_ts(nut, *nus->time_base, pkt->dts);
for(i=0; i<s->nb_streams; i++){
@@ -684,7 +684,7 @@ static int write_packet(AVFormatContext *s, AVPacket *pkt){
ret = url_open_dyn_buf(&dyn_bc);
if(ret < 0)
return ret;
- put_t(nut, nus, dyn_bc, pkt->dts);
+ put_tt(nut, nus, dyn_bc, pkt->dts);
put_v(dyn_bc, sp ? (nut->last_syncpoint_pos - sp->pos)>>4 : 0);
put_packet(nut, bc, dyn_bc, 1, SYNCPOINT_STARTCODE);
diff --git a/libavformat/nuv.c b/libavformat/nuv.c
index bd98839..7a01e68 100644
--- a/libavformat/nuv.c
+++ b/libavformat/nuv.c
@@ -33,7 +33,7 @@ typedef enum {
NUV_AUDIO = 'A',
NUV_SEEKP = 'R',
NUV_MYTHEXT = 'X'
-} frametype_t;
+} nuv_frametype;
static int nuv_probe(AVProbeData *p) {
if (!memcmp(p->buf, "NuppelVideo", 12))
@@ -55,7 +55,7 @@ static int nuv_probe(AVProbeData *p) {
*/
static int get_codec_data(ByteIOContext *pb, AVStream *vst,
AVStream *ast, int myth) {
- frametype_t frametype;
+ nuv_frametype frametype;
if (!vst && !myth)
return 1; // no codec data needed
while (!url_feof(pb)) {
@@ -191,7 +191,7 @@ static int nuv_packet(AVFormatContext *s, AVPacket *pkt) {
NUVContext *ctx = s->priv_data;
ByteIOContext *pb = s->pb;
uint8_t hdr[HDRSIZE];
- frametype_t frametype;
+ nuv_frametype frametype;
int ret, size;
while (!url_feof(pb)) {
int copyhdrsize = ctx->rtjpg_video ? HDRSIZE : 0;
diff --git a/libavformat/oggdec.h b/libavformat/oggdec.h
index 1a73fe0..d238e99 100644
--- a/libavformat/oggdec.h
+++ b/libavformat/oggdec.h
@@ -85,6 +85,6 @@ extern const struct ogg_codec ff_speex_codec;
extern const struct ogg_codec ff_theora_codec;
extern const struct ogg_codec ff_vorbis_codec;
-extern int vorbis_comment(AVFormatContext *ms, uint8_t *buf, int size);
+int vorbis_comment(AVFormatContext *ms, uint8_t *buf, int size);
#endif /* AVFORMAT_OGGDEC_H */
diff --git a/libavformat/oggparseflac.c b/libavformat/oggparseflac.c
index b1332c6..802453a 100644
--- a/libavformat/oggparseflac.c
+++ b/libavformat/oggparseflac.c
@@ -38,21 +38,21 @@ flac_header (AVFormatContext * s, int idx)
return 0;
init_get_bits(&gb, os->buf + os->pstart, os->psize*8);
- get_bits(&gb, 1); /* metadata_last */
+ skip_bits1(&gb); /* metadata_last */
mdt = get_bits(&gb, 7);
if (mdt == 0x7f) {
- skip_bits(&gb, 4*8); /* "FLAC" */
+ skip_bits_long(&gb, 4*8); /* "FLAC" */
if(get_bits(&gb, 8) != 1) /* unsupported major version */
return -1;
- skip_bits(&gb, 8 + 16); /* minor version + header count */
- skip_bits(&gb, 4*8); /* "fLaC" */
+ skip_bits_long(&gb, 8 + 16); /* minor version + header count */
+ skip_bits_long(&gb, 4*8); /* "fLaC" */
/* METADATA_BLOCK_HEADER */
if (get_bits_long(&gb, 32) != FLAC_STREAMINFO_SIZE)
return -1;
- skip_bits(&gb, 16*2+24*2);
+ skip_bits_long(&gb, 16*2+24*2);
st->codec->sample_rate = get_bits_long(&gb, 20);
st->codec->channels = get_bits(&gb, 3) + 1;
diff --git a/libavformat/oggparsevorbis.c b/libavformat/oggparsevorbis.c
index 1f0bcfe..a1eb20d 100644
--- a/libavformat/oggparsevorbis.c
+++ b/libavformat/oggparsevorbis.c
@@ -30,7 +30,7 @@
#include "avformat.h"
#include "oggdec.h"
-extern int
+int
vorbis_comment(AVFormatContext * as, uint8_t *buf, int size)
{
const uint8_t *p = buf;
diff --git a/libavformat/os_support.h b/libavformat/os_support.h
index 2fd3112..0b6a292 100644
--- a/libavformat/os_support.h
+++ b/libavformat/os_support.h
@@ -93,7 +93,7 @@ struct pollfd {
#define POLLNVAL 0x1000 /* invalid file descriptor */
-extern int poll(struct pollfd *fds, nfds_t numfds, int timeout);
+int poll(struct pollfd *fds, nfds_t numfds, int timeout);
#endif /* HAVE_POLL_H */
#endif /* CONFIG_FFSERVER */
#endif /* CONFIG_NETWORK */
diff --git a/libavformat/raw.c b/libavformat/raw.c
index f2a886c..b93cdcc 100644
--- a/libavformat/raw.c
+++ b/libavformat/raw.c
@@ -577,6 +577,38 @@ static int flac_probe(AVProbeData *p)
}
#endif
+#ifdef CONFIG_AAC_DEMUXER
+static int adts_aac_probe(AVProbeData *p)
+{
+ int max_frames = 0, first_frames = 0;
+ int fsize, frames;
+ uint8_t *buf2;
+ uint8_t *buf = p->buf;
+ uint8_t *end = buf + p->buf_size - 7;
+
+ for(; buf < end; buf= buf2+1) {
+ buf2 = buf;
+
+ for(frames = 0; buf2 < end; frames++) {
+ uint32_t header = AV_RB16(buf2);
+ if((header&0xFFF6) != 0xFFF0)
+ break;
+ fsize = (AV_RB32(buf2+3)>>13) & 0x8FFF;
+ if(fsize < 7)
+ break;
+ buf2 += fsize;
+ }
+ max_frames = FFMAX(max_frames, frames);
+ if(buf == p->buf)
+ first_frames= frames;
+ }
+ if (first_frames>=3) return AVPROBE_SCORE_MAX/2+1;
+ else if(max_frames>500)return AVPROBE_SCORE_MAX/2;
+ else if(max_frames>=3) return AVPROBE_SCORE_MAX/4;
+ else if(max_frames>=1) return 1;
+ else return 0;
+}
+#endif
/* Note: Do not forget to add new entries to the Makefile as well. */
@@ -585,7 +617,7 @@ AVInputFormat aac_demuxer = {
"aac",
NULL_IF_CONFIG_SMALL("ADTS AAC"),
0,
- NULL,
+ adts_aac_probe,
audio_read_header,
raw_read_partial_packet,
.flags= AVFMT_GENERIC_INDEX,
@@ -891,7 +923,7 @@ AVInputFormat m4v_demuxer = {
video_read_header,
raw_read_partial_packet,
.flags= AVFMT_GENERIC_INDEX,
- .extensions = "m4v", //FIXME remove after writing mpeg4_probe
+ .extensions = "m4v",
.value = CODEC_ID_MPEG4,
};
#endif
diff --git a/libavformat/rdt.c b/libavformat/rdt.c
index daf8cbe..3680527 100644
--- a/libavformat/rdt.c
+++ b/libavformat/rdt.c
@@ -33,17 +33,25 @@
#include "libavutil/md5.h"
#include "rm.h"
#include "internal.h"
+#include <libavcodec/bitstream.h>
struct RDTDemuxContext {
- AVFormatContext *ic;
- AVStream *st;
+ AVFormatContext *ic; /**< the containing (RTSP) demux context */
+ /** Each RDT stream-set (represented by one RTSPStream) can contain
+ * multiple streams (of the same content, but with possibly different
+ * codecs/bitrates). Each such stream is represented by one AVStream
+ * in the AVFormatContext, and this variable points to the offset in
+ * that array such that the first is the first stream of this set. */
+ AVStream **streams;
+ int n_streams; /**< streams with identifical content in this set */
void *dynamic_protocol_context;
DynamicPayloadPacketHandlerProc parse_packet;
- uint32_t prev_sn, prev_ts;
+ uint32_t prev_timestamp;
+ int prev_set_id, prev_stream_id;
};
RDTDemuxContext *
-ff_rdt_parse_open(AVFormatContext *ic, AVStream *st,
+ff_rdt_parse_open(AVFormatContext *ic, int first_stream_of_set_idx,
void *priv_data, RTPDynamicProtocolHandler *handler)
{
RDTDemuxContext *s = av_mallocz(sizeof(RDTDemuxContext));
@@ -51,9 +59,14 @@ ff_rdt_parse_open(AVFormatContext *ic, AVStream *st,
return NULL;
s->ic = ic;
- s->st = st;
- s->prev_sn = -1;
- s->prev_ts = -1;
+ s->streams = &ic->streams[first_stream_of_set_idx];
+ do {
+ s->n_streams++;
+ } while (first_stream_of_set_idx + s->n_streams < ic->nb_streams &&
+ s->streams[s->n_streams]->priv_data == s->streams[0]->priv_data);
+ s->prev_set_id = -1;
+ s->prev_stream_id = -1;
+ s->prev_timestamp = -1;
s->parse_packet = handler->parse_packet;
s->dynamic_protocol_context = priv_data;
@@ -63,14 +76,21 @@ ff_rdt_parse_open(AVFormatContext *ic, AVStream *st,
void
ff_rdt_parse_close(RDTDemuxContext *s)
{
+ int i;
+
+ for (i = 1; i < s->n_streams; i++)
+ s->streams[i]->priv_data = NULL;
+
av_free(s);
}
struct PayloadContext {
AVFormatContext *rmctx;
+ RMStream *rmst[MAX_STREAMS];
uint8_t *mlti_data;
unsigned int mlti_data_size;
char buffer[RTP_MAX_PACKET_LENGTH + FF_INPUT_BUFFER_PADDING_SIZE];
+ int audio_pkt_cnt; /**< remaining audio packets in rmdec */
};
void
@@ -115,7 +135,7 @@ ff_rdt_calc_response_and_checksum(char response[41], char chksum[9],
static int
rdt_load_mdpr (PayloadContext *rdt, AVStream *st, int rule_nr)
{
- ByteIOContext *pb;
+ ByteIOContext pb;
int size;
uint32_t tag;
@@ -135,35 +155,34 @@ rdt_load_mdpr (PayloadContext *rdt, AVStream *st, int rule_nr)
*/
if (!rdt->mlti_data)
return -1;
- url_open_buf(&pb, rdt->mlti_data, rdt->mlti_data_size, URL_RDONLY);
- tag = get_le32(pb);
+ init_put_byte(&pb, rdt->mlti_data, rdt->mlti_data_size, 0,
+ NULL, NULL, NULL, NULL);
+ tag = get_le32(&pb);
if (tag == MKTAG('M', 'L', 'T', 'I')) {
int num, chunk_nr;
/* read index of MDPR chunk numbers */
- num = get_be16(pb);
+ num = get_be16(&pb);
if (rule_nr < 0 || rule_nr >= num)
return -1;
- url_fskip(pb, rule_nr * 2);
- chunk_nr = get_be16(pb);
- url_fskip(pb, (num - 1 - rule_nr) * 2);
+ url_fskip(&pb, rule_nr * 2);
+ chunk_nr = get_be16(&pb);
+ url_fskip(&pb, (num - 1 - rule_nr) * 2);
/* read MDPR chunks */
- num = get_be16(pb);
+ num = get_be16(&pb);
if (chunk_nr >= num)
return -1;
while (chunk_nr--)
- url_fskip(pb, get_be32(pb));
- size = get_be32(pb);
+ url_fskip(&pb, get_be32(&pb));
+ size = get_be32(&pb);
} else {
size = rdt->mlti_data_size;
- url_fseek(pb, 0, SEEK_SET);
+ url_fseek(&pb, 0, SEEK_SET);
}
- rdt->rmctx->pb = pb;
- if (ff_rm_read_mdpr_codecdata(rdt->rmctx, st, size) < 0)
+ if (ff_rm_read_mdpr_codecdata(rdt->rmctx, &pb, st, rdt->rmst[st->index], size) < 0)
return -1;
- url_close_buf(pb);
return 0;
}
@@ -173,16 +192,27 @@ rdt_load_mdpr (PayloadContext *rdt, AVStream *st, int rule_nr)
int
ff_rdt_parse_header(const uint8_t *buf, int len,
- int *sn, int *seq, int *rn, uint32_t *ts)
+ int *pset_id, int *pseq_no, int *pstream_id,
+ int *pis_keyframe, uint32_t *ptimestamp)
{
- int consumed = 10;
+ GetBitContext gb;
+ int consumed = 0, set_id, seq_no, stream_id, is_keyframe,
+ len_included, need_reliable;
+ uint32_t timestamp;
+
+ /* skip status packets */
+ while (len >= 5 && buf[1] == 0xFF /* status packet */) {
+ int pkt_len;
+
+ if (!(buf[0] & 0x80))
+ return -1; /* not followed by a data packet */
- if (len > 0 && (buf[0] < 0x40 || buf[0] > 0x42)) {
- buf += 9;
- len -= 9;
- consumed += 9;
+ pkt_len = AV_RB16(buf+3);
+ buf += pkt_len;
+ len -= pkt_len;
+ consumed += pkt_len;
}
- if (len < 10)
+ if (len < 16)
return -1;
/**
* Layout of the header (in bits):
@@ -235,12 +265,32 @@ ff_rdt_parse_header(const uint8_t *buf, int len,
* [2] http://www.wireshark.org/docs/dfref/r/rdt.html and
* http://anonsvn.wireshark.org/viewvc/trunk/epan/dissectors/packet-rdt.c
*/
- if (sn) *sn = (buf[0]>>1) & 0x1f;
- if (seq) *seq = AV_RB16(buf+1);
- if (ts) *ts = AV_RB32(buf+4);
- if (rn) *rn = buf[3] & 0x3f;
-
- return consumed;
+ init_get_bits(&gb, buf, len << 3);
+ len_included = get_bits1(&gb);
+ need_reliable = get_bits1(&gb);
+ set_id = get_bits(&gb, 5);
+ skip_bits(&gb, 1);
+ seq_no = get_bits(&gb, 16);
+ if (len_included)
+ skip_bits(&gb, 16);
+ skip_bits(&gb, 2);
+ stream_id = get_bits(&gb, 5);
+ is_keyframe = !get_bits1(&gb);
+ timestamp = get_bits_long(&gb, 32);
+ if (set_id == 0x1f)
+ set_id = get_bits(&gb, 16);
+ if (need_reliable)
+ skip_bits(&gb, 16);
+ if (stream_id == 0x1f)
+ stream_id = get_bits(&gb, 16);
+
+ if (pset_id) *pset_id = set_id;
+ if (pseq_no) *pseq_no = seq_no;
+ if (pstream_id) *pstream_id = stream_id;
+ if (pis_keyframe) *pis_keyframe = is_keyframe;
+ if (ptimestamp) *ptimestamp = timestamp;
+
+ return consumed + (get_bits_count(&gb) >> 3);
}
/**< return 0 on packet, no more left, 1 on packet, 1 on partial packet... */
@@ -250,73 +300,83 @@ rdt_parse_packet (PayloadContext *rdt, AVStream *st,
const uint8_t *buf, int len, int flags)
{
int seq = 1, res;
- ByteIOContext *pb = rdt->rmctx->pb;
- RMContext *rm = rdt->rmctx->priv_data;
+ ByteIOContext pb;
- if (rm->audio_pkt_cnt == 0) {
+ if (rdt->audio_pkt_cnt == 0) {
int pos;
- url_open_buf (&pb, buf, len, URL_RDONLY);
+ init_put_byte(&pb, buf, len, 0, NULL, NULL, NULL, NULL);
flags = (flags & PKT_FLAG_KEY) ? 2 : 0;
- rdt->rmctx->pb = pb;
- res = ff_rm_parse_packet (rdt->rmctx, st, len, pkt,
+ res = ff_rm_parse_packet (rdt->rmctx, &pb, st, rdt->rmst[st->index], len, pkt,
&seq, &flags, timestamp);
- pos = url_ftell(pb);
- url_close_buf (pb);
+ pos = url_ftell(&pb);
if (res < 0)
return res;
- if (rm->audio_pkt_cnt > 0 &&
+ rdt->audio_pkt_cnt = res;
+ if (rdt->audio_pkt_cnt > 0 &&
st->codec->codec_id == CODEC_ID_AAC) {
memcpy (rdt->buffer, buf + pos, len - pos);
- url_open_buf (&pb, rdt->buffer, len - pos, URL_RDONLY);
- rdt->rmctx->pb = pb;
+ rdt->rmctx->pb = av_alloc_put_byte (rdt->buffer, len - pos, 0,
+ NULL, NULL, NULL, NULL);
}
} else {
- ff_rm_retrieve_cache (rdt->rmctx, st, pkt);
- if (rm->audio_pkt_cnt == 0 &&
+ rdt->audio_pkt_cnt =
+ ff_rm_retrieve_cache (rdt->rmctx, rdt->rmctx->pb,
+ st, rdt->rmst[st->index], pkt);
+ if (rdt->audio_pkt_cnt == 0 &&
st->codec->codec_id == CODEC_ID_AAC)
- url_close_buf (pb);
+ av_freep(&rdt->rmctx->pb);
}
pkt->stream_index = st->index;
pkt->pts = *timestamp;
- return rm->audio_pkt_cnt > 0;
+ return rdt->audio_pkt_cnt > 0;
}
int
ff_rdt_parse_packet(RDTDemuxContext *s, AVPacket *pkt,
const uint8_t *buf, int len)
{
- int seq, flags = 0, rule, sn;
+ int seq_no, flags = 0, stream_id, set_id, is_keyframe;
uint32_t timestamp;
int rv= 0;
if (!s->parse_packet)
return -1;
- if (!buf) {
+ if (!buf && s->prev_stream_id != -1) {
/* return the next packets, if any */
timestamp= 0; ///< Should not be used if buf is NULL, but should be set to the timestamp of the packet returned....
rv= s->parse_packet(s->dynamic_protocol_context,
- s->st, pkt, ×tamp, NULL, 0, flags);
+ s->streams[s->prev_stream_id],
+ pkt, ×tamp, NULL, 0, flags);
return rv;
}
if (len < 12)
return -1;
- rv = ff_rdt_parse_header(buf, len, &sn, &seq, &rule, ×tamp);
+ rv = ff_rdt_parse_header(buf, len, &set_id, &seq_no, &stream_id, &is_keyframe, ×tamp);
if (rv < 0)
return rv;
- if (!(rule & 1) && (sn != s->prev_sn || timestamp != s->prev_ts)) {
+ if (is_keyframe &&
+ (set_id != s->prev_set_id || timestamp != s->prev_timestamp ||
+ stream_id != s->prev_stream_id)) {
flags |= PKT_FLAG_KEY;
- s->prev_sn = sn;
- s->prev_ts = timestamp;
+ s->prev_set_id = set_id;
+ s->prev_timestamp = timestamp;
}
+ s->prev_stream_id = stream_id;
buf += rv;
len -= rv;
+ if (s->prev_stream_id >= s->n_streams) {
+ s->prev_stream_id = -1;
+ return -1;
+ }
+
rv = s->parse_packet(s->dynamic_protocol_context,
- s->st, pkt, ×tamp, buf, len, flags);
+ s->streams[s->prev_stream_id],
+ pkt, ×tamp, buf, len, flags);
return rv;
}
@@ -329,15 +389,6 @@ ff_rdt_subscribe_rule (char *cmd, int size,
stream_nr, rule_nr * 2, stream_nr, rule_nr * 2 + 1);
}
-void
-ff_rdt_subscribe_rule2 (RDTDemuxContext *s, char *cmd, int size,
- int stream_nr, int rule_nr)
-{
- PayloadContext *rdt = s->dynamic_protocol_context;
-
- rdt_load_mdpr(rdt, s->st, rule_nr * 2);
-}
-
static unsigned char *
rdt_parse_b64buf (unsigned int *target_len, const char *p)
{
@@ -354,18 +405,109 @@ rdt_parse_b64buf (unsigned int *target_len, const char *p)
}
static int
-rdt_parse_sdp_line (AVStream *stream, PayloadContext *rdt, const char *line)
+rdt_parse_sdp_line (AVFormatContext *s, int st_index,
+ PayloadContext *rdt, const char *line)
{
+ AVStream *stream = s->streams[st_index];
const char *p = line;
if (av_strstart(p, "OpaqueData:buffer;", &p)) {
rdt->mlti_data = rdt_parse_b64buf(&rdt->mlti_data_size, p);
} else if (av_strstart(p, "StartTime:integer;", &p))
stream->first_dts = atoi(p);
+ else if (av_strstart(p, "ASMRuleBook:string;", &p)) {
+ int n = st_index, first = -1;
+
+ for (n = 0; n < s->nb_streams; n++)
+ if (s->streams[n]->priv_data == stream->priv_data) {
+ if (first == -1) first = n;
+ rdt->rmst[s->streams[n]->index] = ff_rm_alloc_rmstream();
+ rdt_load_mdpr(rdt, s->streams[n], (n - first) * 2);
+
+ if (s->streams[n]->codec->codec_id == CODEC_ID_AAC)
+ s->streams[n]->codec->frame_size = 1; // FIXME
+ }
+ }
return 0;
}
+static void
+real_parse_asm_rule(AVStream *st, const char *p, const char *end)
+{
+ do {
+ /* can be either averagebandwidth= or AverageBandwidth= */
+ if (sscanf(p, " %*1[Aa]verage%*1[Bb]andwidth=%d", &st->codec->bit_rate) == 1)
+ break;
+ if (!(p = strchr(p, ',')) || p > end)
+ p = end;
+ p++;
+ } while (p < end);
+}
+
+static AVStream *
+add_dstream(AVFormatContext *s, AVStream *orig_st)
+{
+ AVStream *st;
+
+ if (!(st = av_new_stream(s, 0)))
+ return NULL;
+ st->codec->codec_type = orig_st->codec->codec_type;
+ st->priv_data = orig_st->priv_data;
+ st->first_dts = orig_st->first_dts;
+
+ return st;
+}
+
+static void
+real_parse_asm_rulebook(AVFormatContext *s, AVStream *orig_st,
+ const char *p)
+{
+ const char *end;
+ int n_rules, odd = 0;
+ AVStream *st;
+
+ /**
+ * The ASMRuleBook contains a list of comma-separated strings per rule,
+ * and each rule is separated by a ;. The last one also has a ; at the
+ * end so we can use it as delimiter.
+ * Every rule occurs twice, once for when the RTSP packet header marker
+ * is set and once for if it isn't. We only read the first because we
+ * don't care much (that's what the "odd" variable is for).
+ * Each rule contains a set of one or more statements, optionally
+ * preceeded by a single condition. If there's a condition, the rule
+ * starts with a '#'. Multiple conditions are merged between brackets,
+ * so there are never multiple conditions spread out over separate
+ * statements. Generally, these conditions are bitrate limits (min/max)
+ * for multi-bitrate streams.
+ */
+ if (*p == '\"') p++;
+ for (n_rules = 0; s->nb_streams < MAX_STREAMS;) {
+ if (!(end = strchr(p, ';')))
+ break;
+ if (!odd && end != p) {
+ if (n_rules > 0)
+ st = add_dstream(s, orig_st);
+ else
+ st = orig_st;
+ real_parse_asm_rule(st, p, end);
+ n_rules++;
+ }
+ p = end + 1;
+ odd ^= 1;
+ }
+}
+
+void
+ff_real_parse_sdp_a_line (AVFormatContext *s, int stream_index,
+ const char *line)
+{
+ const char *p = line;
+
+ if (av_strstart(p, "ASMRuleBook:string;", &p))
+ real_parse_asm_rulebook(s, s->streams[stream_index], p);
+}
+
static PayloadContext *
rdt_new_extradata (void)
{
@@ -379,6 +521,13 @@ rdt_new_extradata (void)
static void
rdt_free_extradata (PayloadContext *rdt)
{
+ int i;
+
+ for (i = 0; i < MAX_STREAMS; i++)
+ if (rdt->rmst[i]) {
+ ff_rm_free_rmstream(rdt->rmst[i]);
+ av_freep(&rdt->rmst[i]);
+ }
if (rdt->rmctx)
av_close_input_stream(rdt->rmctx);
av_freep(&rdt->mlti_data);
diff --git a/libavformat/rdt.h b/libavformat/rdt.h
index aa6cbaa..e24a0d5 100644
--- a/libavformat/rdt.h
+++ b/libavformat/rdt.h
@@ -28,7 +28,18 @@
typedef struct RDTDemuxContext RDTDemuxContext;
-RDTDemuxContext *ff_rdt_parse_open(AVFormatContext *ic, AVStream *st,
+/**
+ * Allocate and init the RDT parsing context.
+ * @param ic the containing RTSP demuxer context
+ * @param first_stream_of_set_idx index to the first AVStream in the RTSP
+ * demuxer context's ic->streams array that is part of this
+ * particular stream's set of streams (with identical content)
+ * @param priv_data private data of the payload data handler context
+ * @param handler pointer to the parse_packet() payload parsing function
+ * @return a newly allocated RDTDemuxContext. Free with ff_rdt_parse_close().
+ */
+RDTDemuxContext *ff_rdt_parse_open(AVFormatContext *ic,
+ int first_stream_of_set_idx,
void *priv_data,
RTPDynamicProtocolHandler *handler);
void ff_rdt_parse_close(RDTDemuxContext *s);
@@ -63,23 +74,22 @@ void av_register_rdt_dynamic_payload_handlers(void);
*/
void ff_rdt_subscribe_rule(char *cmd, int size,
int stream_nr, int rule_nr);
-// FIXME this will be removed ASAP
-void ff_rdt_subscribe_rule2(RDTDemuxContext *s, char *cmd, int size,
- int stream_nr, int rule_nr);
/**
* Parse RDT-style packet header.
*
* @param buf input buffer
* @param len length of input buffer
- * @param sn will be set to the stream number this packet belongs to
- * @param seq will be set to the sequence number this packet belongs to
- * @param rn will be set to the rule number this packet belongs to
- * @param ts will be set to the timestamp of the packet
+ * @param set_id will be set to the set ID this packet belongs to
+ * @param seq_no will be set to the sequence number of the packet
+ * @param stream_id will be set to the stream ID this packet belongs to
+ * @param is_keyframe will be whether this packet belongs to a keyframe
+ * @param timestamp will be set to the timestamp of the packet
* @return the amount of bytes consumed, or <0 on error
*/
int ff_rdt_parse_header(const uint8_t *buf, int len,
- int *sn, int *seq, int *rn, uint32_t *ts);
+ int *set_id, int *seq_no, int *stream_id,
+ int *is_keyframe, uint32_t *timestamp);
/**
* Parse RDT-style packet data (header + media data).
@@ -88,4 +98,15 @@ int ff_rdt_parse_header(const uint8_t *buf, int len,
int ff_rdt_parse_packet(RDTDemuxContext *s, AVPacket *pkt,
const uint8_t *buf, int len);
+/**
+ * Parse a server-related SDP line.
+ *
+ * @param s the RTSP AVFormatContext
+ * @param stream_index the index of the first stream in the set represented
+ * by the SDP m= line (in s->streams)
+ * @param buf the SDP line
+ */
+void ff_real_parse_sdp_a_line(AVFormatContext *s, int stream_index,
+ const char *buf);
+
#endif /* AVFORMAT_RDT_H */
diff --git a/libavformat/riff.c b/libavformat/riff.c
index ae6b7ba..0f9b177 100644
--- a/libavformat/riff.c
+++ b/libavformat/riff.c
@@ -33,6 +33,9 @@ const AVCodecTag codec_bmp_tags[] = {
{ CODEC_ID_H264, MKTAG('a', 'v', 'c', '1') },
{ CODEC_ID_H264, MKTAG('V', 'S', 'S', 'H') },
{ CODEC_ID_H263, MKTAG('H', '2', '6', '3') },
+ { CODEC_ID_H263, MKTAG('X', '2', '6', '3') },
+ { CODEC_ID_H263, MKTAG('L', '2', '6', '3') },
+ { CODEC_ID_H263, MKTAG('V', 'X', '1', 'K') },
{ CODEC_ID_H263P, MKTAG('H', '2', '6', '3') },
{ CODEC_ID_H263I, MKTAG('I', '2', '6', '3') }, /* intel h263 */
{ CODEC_ID_H261, MKTAG('H', '2', '6', '1') },
@@ -53,18 +56,34 @@ const AVCodecTag codec_bmp_tags[] = {
{ CODEC_ID_MPEG4, MKTAG('S', 'E', 'D', 'G') },
{ CODEC_ID_MPEG4, MKTAG('R', 'M', 'P', '4') },
{ CODEC_ID_MPEG4, MKTAG('3', 'I', 'V', '2') },
+ { CODEC_ID_MPEG4, MKTAG('F', 'F', 'D', 'S') },
+ { CODEC_ID_MPEG4, MKTAG('F', 'V', 'F', 'W') },
+ { CODEC_ID_MPEG4, MKTAG('D', 'C', 'O', 'D') },
+ { CODEC_ID_MPEG4, MKTAG('M', 'V', 'X', 'M') },
+ { CODEC_ID_MPEG4, MKTAG('P', 'M', '4', 'V') },
+ { CODEC_ID_MPEG4, MKTAG('S', 'M', 'P', '4') },
+ { CODEC_ID_MPEG4, MKTAG('D', 'X', 'G', 'M') },
+ { CODEC_ID_MPEG4, MKTAG('V', 'I', 'D', 'M') },
+ { CODEC_ID_MPEG4, MKTAG('M', '4', 'T', '3') },
+ { CODEC_ID_MPEG4, MKTAG('G', 'E', 'O', 'X') },
+ { CODEC_ID_MPEG4, MKTAG('H', 'D', 'X', '4') }, /* flipped video */
+ { CODEC_ID_MPEG4, MKTAG('D', 'M', 'K', '2') },
+ { CODEC_ID_MPEG4, MKTAG('D', 'I', 'G', 'I') },
+ { CODEC_ID_MPEG4, MKTAG('I', 'N', 'M', 'C') },
{ CODEC_ID_MSMPEG4V3, MKTAG('D', 'I', 'V', '3') }, /* default signature when using MSMPEG4 */
{ CODEC_ID_MSMPEG4V3, MKTAG('M', 'P', '4', '3') },
{ CODEC_ID_MSMPEG4V3, MKTAG('M', 'P', 'G', '3') },
{ CODEC_ID_MSMPEG4V3, MKTAG('D', 'I', 'V', '5') },
{ CODEC_ID_MSMPEG4V3, MKTAG('D', 'I', 'V', '6') },
{ CODEC_ID_MSMPEG4V3, MKTAG('D', 'I', 'V', '4') },
+ { CODEC_ID_MSMPEG4V3, MKTAG('D', 'V', 'X', '3') },
{ CODEC_ID_MSMPEG4V3, MKTAG('A', 'P', '4', '1') },
{ CODEC_ID_MSMPEG4V3, MKTAG('C', 'O', 'L', '1') },
{ CODEC_ID_MSMPEG4V3, MKTAG('C', 'O', 'L', '0') },
{ CODEC_ID_MSMPEG4V2, MKTAG('M', 'P', '4', '2') },
{ CODEC_ID_MSMPEG4V2, MKTAG('D', 'I', 'V', '2') },
{ CODEC_ID_MSMPEG4V1, MKTAG('M', 'P', 'G', '4') },
+ { CODEC_ID_MSMPEG4V1, MKTAG('M', 'P', '4', '1') },
{ CODEC_ID_WMV1, MKTAG('W', 'M', 'V', '1') },
{ CODEC_ID_WMV2, MKTAG('W', 'M', 'V', '2') },
{ CODEC_ID_DVVIDEO, MKTAG('d', 'v', 's', 'd') },
@@ -73,18 +92,22 @@ const AVCodecTag codec_bmp_tags[] = {
{ CODEC_ID_DVVIDEO, MKTAG('d', 'v', '2', '5') },
{ CODEC_ID_DVVIDEO, MKTAG('d', 'v', '5', '0') },
{ CODEC_ID_DVVIDEO, MKTAG('c', 'd', 'v', 'c') }, /* Canopus DV */
+ { CODEC_ID_DVVIDEO, MKTAG('d', 'v', 'c', ' ') },
{ CODEC_ID_MPEG1VIDEO, MKTAG('m', 'p', 'g', '1') },
{ CODEC_ID_MPEG1VIDEO, MKTAG('m', 'p', 'g', '2') },
{ CODEC_ID_MPEG2VIDEO, MKTAG('m', 'p', 'g', '2') },
{ CODEC_ID_MPEG2VIDEO, MKTAG('M', 'P', 'E', 'G') },
{ CODEC_ID_MPEG1VIDEO, MKTAG('P', 'I', 'M', '1') },
+ { CODEC_ID_MPEG2VIDEO, MKTAG('P', 'I', 'M', '2') },
{ CODEC_ID_MPEG1VIDEO, MKTAG('V', 'C', 'R', '2') },
{ CODEC_ID_MPEG1VIDEO, MKTAG( 1 , 0 , 0 , 16) },
{ CODEC_ID_MPEG2VIDEO, MKTAG( 2 , 0 , 0 , 16) },
{ CODEC_ID_MPEG2VIDEO, MKTAG('D', 'V', 'R', ' ') },
{ CODEC_ID_MPEG2VIDEO, MKTAG('M', 'M', 'E', 'S') },
+ { CODEC_ID_MPEG2VIDEO, MKTAG('L', 'M', 'P', '2') }, /* Lead MPEG2 in avi */
{ CODEC_ID_MJPEG, MKTAG('M', 'J', 'P', 'G') },
{ CODEC_ID_MJPEG, MKTAG('L', 'J', 'P', 'G') },
+ { CODEC_ID_MJPEG, MKTAG('d', 'm', 'b', '1') },
{ CODEC_ID_LJPEG, MKTAG('L', 'J', 'P', 'G') },
{ CODEC_ID_MJPEG, MKTAG('J', 'P', 'G', 'L') }, /* Pegasus lossless JPEG */
{ CODEC_ID_JPEGLS, MKTAG('M', 'J', 'L', 'S') }, /* JPEG-LS custom FOURCC for avi - encoder */
@@ -92,6 +115,9 @@ const AVCodecTag codec_bmp_tags[] = {
{ CODEC_ID_MJPEG, MKTAG('j', 'p', 'e', 'g') },
{ CODEC_ID_MJPEG, MKTAG('I', 'J', 'P', 'G') },
{ CODEC_ID_MJPEG, MKTAG('A', 'V', 'R', 'n') },
+ { CODEC_ID_MJPEG, MKTAG('A', 'C', 'D', 'V') },
+ { CODEC_ID_MJPEG, MKTAG('Q', 'I', 'V', 'G') },
+ { CODEC_ID_MJPEG, MKTAG('C', 'J', 'P', 'G') }, /* Creative Webcam JPEG */
{ CODEC_ID_HUFFYUV, MKTAG('H', 'F', 'Y', 'U') },
{ CODEC_ID_FFVHUFF, MKTAG('F', 'F', 'V', 'H') },
{ CODEC_ID_CYUV, MKTAG('C', 'Y', 'U', 'V') },
@@ -161,8 +187,12 @@ const AVCodecTag codec_bmp_tags[] = {
{ CODEC_ID_JPEG2000, MKTAG('M', 'J', '2', 'C') },
{ CODEC_ID_VMNC, MKTAG('V', 'M', 'n', 'c') },
{ CODEC_ID_TARGA, MKTAG('t', 'g', 'a', ' ') },
+ { CODEC_ID_PNG, MKTAG('M', 'P', 'N', 'G') },
{ CODEC_ID_CLJR, MKTAG('c', 'l', 'j', 'r') },
{ CODEC_ID_DIRAC, MKTAG('d', 'r', 'a', 'c') },
+ { CODEC_ID_RPZA, MKTAG('a', 'z', 'p', 'r') },
+ { CODEC_ID_RPZA, MKTAG('R', 'P', 'Z', 'A') },
+ { CODEC_ID_RPZA, MKTAG('r', 'p', 'z', 'a') },
{ CODEC_ID_NONE, 0 }
};
@@ -185,6 +215,8 @@ const AVCodecTag codec_wav_tags[] = {
{ CODEC_ID_ADPCM_G726, 0x0045 },
{ CODEC_ID_MP2, 0x0050 },
{ CODEC_ID_MP3, 0x0055 },
+ { CODEC_ID_AMR_NB, 0x0057 },
+ { CODEC_ID_AMR_WB, 0x0058 },
{ CODEC_ID_ADPCM_IMA_DK4, 0x0061 }, /* rogue format number */
{ CODEC_ID_ADPCM_IMA_DK3, 0x0062 }, /* rogue format number */
{ CODEC_ID_VOXWARE, 0x0075 },
@@ -202,6 +234,7 @@ const AVCodecTag codec_wav_tags[] = {
{ CODEC_ID_SONIC, 0x2048 },
{ CODEC_ID_SONIC_LS, 0x2048 },
{ CODEC_ID_AAC, 0x706d },
+ { CODEC_ID_AAC, 0x4143 },
{ CODEC_ID_FLAC, 0xF1AC },
{ CODEC_ID_ADPCM_SWF, ('S'<<8)+'F' },
{ CODEC_ID_VORBIS, ('V'<<8)+'o' }, //HACK/FIXME, does vorbis in WAV/AVI have an (in)official id?
@@ -395,9 +428,9 @@ void get_wav_header(ByteIOContext *pb, AVCodecContext *codec, int size)
}
-int wav_codec_get_id(unsigned int tag, int bps)
+enum CodecID wav_codec_get_id(unsigned int tag, int bps)
{
- int id;
+ enum CodecID id;
id = codec_get_id(codec_wav_tags, tag);
if (id <= 0)
return id;
diff --git a/libavformat/riff.h b/libavformat/riff.h
index bd4e9f1..93c1d71 100644
--- a/libavformat/riff.h
+++ b/libavformat/riff.h
@@ -41,7 +41,7 @@ typedef struct AVCodecTag {
void put_bmp_header(ByteIOContext *pb, AVCodecContext *enc, const AVCodecTag *tags, int for_asf);
int put_wav_header(ByteIOContext *pb, AVCodecContext *enc);
-int wav_codec_get_id(unsigned int tag, int bps);
+enum CodecID wav_codec_get_id(unsigned int tag, int bps);
void get_wav_header(ByteIOContext *pb, AVCodecContext *codec, int size);
extern const AVCodecTag codec_bmp_tags[];
diff --git a/libavformat/rm.h b/libavformat/rm.h
index 4ad1c30..2f45c0f 100644
--- a/libavformat/rm.h
+++ b/libavformat/rm.h
@@ -24,44 +24,10 @@
#include "avformat.h"
+typedef struct RMStream RMStream;
-typedef struct {
- int nb_packets;
- int packet_total_size;
- int packet_max_size;
- /* codec related output */
- int bit_rate;
- float frame_rate;
- int nb_frames; /* current frame number */
- int total_frames; /* total number of frames */
- int num;
- AVCodecContext *enc;
-} StreamInfo;
-
-typedef struct {
- StreamInfo streams[2];
- StreamInfo *audio_stream, *video_stream;
- int data_pos; /* position of the data after the header */
- int nb_packets;
- int old_format;
- int current_stream;
- int remaining_len;
- uint8_t *videobuf; ///< place to store merged video frame
- int videobufsize; ///< current assembled frame size
- int videobufpos; ///< position for the next slice in the video buffer
- int curpic_num; ///< picture number of current frame
- int cur_slice, slices;
- int64_t pktpos; ///< first slice position in file
- /// Audio descrambling matrix parameters
- uint8_t *audiobuf; ///< place to store reordered audio data
- int64_t audiotimestamp; ///< Audio packet timestamp
- int sub_packet_cnt; // Subpacket counter, used while reading
- int sub_packet_size, sub_packet_h, coded_framesize; ///< Descrambling parameters from container
- int audio_stream_num; ///< Stream number for audio packets
- int audio_pkt_cnt; ///< Output packet counter
- int audio_framesize; /// Audio frame size from container
- int sub_packet_lengths[16]; /// Length of each aac subpacket
-} RMContext;
+RMStream *ff_rm_alloc_rmstream (void);
+void ff_rm_free_rmstream (RMStream *rms);
/*< input format for Realmedia-style RTSP streams */
extern AVInputFormat rdt_demuxer;
@@ -71,18 +37,24 @@ extern AVInputFormat rdt_demuxer;
* parameters.
*
* @param s context containing RMContext and ByteIOContext for stream reading
+ * @param pb context to read the data from
* @param st the stream that the MDPR chunk belongs to and where to store the
* parameters read from the chunk into
+ * @param rst real-specific stream information
* @param codec_data_size size of the MDPR chunk
* @return 0 on success, errno codes on error
*/
-int ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVStream *st, int codec_data_size);
+int ff_rm_read_mdpr_codecdata (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *rst,
+ int codec_data_size);
/**
* Parse one rm-stream packet from the input bytestream.
*
* @param s context containing RMContext and ByteIOContext for stream reading
+ * @param pb context to read the data from
* @param st stream to which the packet to be read belongs
+ * @param rst Real-specific stream information
* @param len packet length to read from the input
* @param pkt packet location to store the parsed packet data
* @param seq pointer to an integer containing the sequence number, may be
@@ -90,9 +62,12 @@ int ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVStream *st, int codec_data_
* @param flags pointer to an integer containing the packet flags, may be
updated
* @param ts pointer to timestamp, may be updated
- * @return 0 on success, errno codes on error
+ * @return >=0 on success (where >0 indicates there are cached samples that
+ * can be retrieved with subsequent calls to ff_rm_retrieve_cache()),
+ * errno codes on error
*/
-int ff_rm_parse_packet (AVFormatContext *s, AVStream *st, int len,
+int ff_rm_parse_packet (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *rst, int len,
AVPacket *pkt, int *seq, int *flags, int64_t *ts);
/**
@@ -104,9 +79,14 @@ int ff_rm_parse_packet (AVFormatContext *s, AVStream *st, int len,
* of those packets can be retrieved sequentially.
*
* @param s context containing RMContext and ByteIOContext for stream reading
+ * @param pb context to read the data from
* @param st stream that this packet belongs to
+ * @param rst Real-specific stream information
* @param pkt location to store the packet data
+ * @returns the number of samples left for subsequent calls to this same
+ * function, or 0 if all samples have been retrieved.
*/
-void ff_rm_retrieve_cache (AVFormatContext *s, AVStream *st, AVPacket *pkt);
+int ff_rm_retrieve_cache (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *rst, AVPacket *pkt);
#endif /* AVFORMAT_RM_H */
diff --git a/libavformat/rmdec.c b/libavformat/rmdec.c
index 9db09e1..fc066df 100644
--- a/libavformat/rmdec.c
+++ b/libavformat/rmdec.c
@@ -23,6 +23,30 @@
#include "avformat.h"
#include "rm.h"
+struct RMStream {
+ AVPacket pkt; ///< place to store merged video frame / reordered audio data
+ int videobufsize; ///< current assembled frame size
+ int videobufpos; ///< position for the next slice in the video buffer
+ int curpic_num; ///< picture number of current frame
+ int cur_slice, slices;
+ int64_t pktpos; ///< first slice position in file
+ /// Audio descrambling matrix parameters
+ int64_t audiotimestamp; ///< Audio packet timestamp
+ int sub_packet_cnt; // Subpacket counter, used while reading
+ int sub_packet_size, sub_packet_h, coded_framesize; ///< Descrambling parameters from container
+ int audio_framesize; /// Audio frame size from container
+ int sub_packet_lengths[16]; /// Length of each subpacket
+};
+
+typedef struct {
+ int nb_packets;
+ int old_format;
+ int current_stream;
+ int remaining_len;
+ int audio_stream_num; ///< Stream number for audio packets
+ int audio_pkt_cnt; ///< Output packet counter
+} RMDemuxContext;
+
static inline void get_strl(ByteIOContext *pb, char *buf, int buf_size, int len)
{
int i;
@@ -47,30 +71,37 @@ static void get_str8(ByteIOContext *pb, char *buf, int buf_size)
get_strl(pb, buf, buf_size, get_byte(pb));
}
-static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
- int read_all)
+RMStream *ff_rm_alloc_rmstream (void)
+{
+ RMStream *rms = av_mallocz(sizeof(RMStream));
+ rms->curpic_num = -1;
+ return rms;
+}
+
+void ff_rm_free_rmstream (RMStream *rms)
+{
+ av_free_packet(&rms->pkt);
+}
+
+static int rm_read_audio_stream_info(AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *ast, int read_all)
{
- RMContext *rm = s->priv_data;
- ByteIOContext *pb = s->pb;
char buf[256];
uint32_t version;
- int i;
/* ra type header */
version = get_be32(pb); /* version */
if (((version >> 16) & 0xff) == 3) {
int64_t startpos = url_ftell(pb);
- /* very old version */
- for(i = 0; i < 14; i++)
- get_byte(pb);
+ url_fskip(pb, 14);
get_str8(pb, s->title, sizeof(s->title));
get_str8(pb, s->author, sizeof(s->author));
get_str8(pb, s->copyright, sizeof(s->copyright));
get_str8(pb, s->comment, sizeof(s->comment));
if ((startpos + (version & 0xffff)) >= url_ftell(pb) + 2) {
- // fourcc (should always be "lpcJ")
- get_byte(pb);
- get_str8(pb, buf, sizeof(buf));
+ // fourcc (should always be "lpcJ")
+ get_byte(pb);
+ get_str8(pb, buf, sizeof(buf));
}
// Skip extra header crap (this should never happen)
if ((startpos + (version & 0xffff)) > url_ftell(pb))
@@ -87,25 +118,23 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
get_be16(pb); /* version2 */
get_be32(pb); /* header size */
flavor= get_be16(pb); /* add codec info / flavor */
- rm->coded_framesize = coded_framesize = get_be32(pb); /* coded frame size */
+ ast->coded_framesize = coded_framesize = get_be32(pb); /* coded frame size */
get_be32(pb); /* ??? */
get_be32(pb); /* ??? */
get_be32(pb); /* ??? */
- rm->sub_packet_h = sub_packet_h = get_be16(pb); /* 1 */
+ ast->sub_packet_h = sub_packet_h = get_be16(pb); /* 1 */
st->codec->block_align= get_be16(pb); /* frame size */
- rm->sub_packet_size = sub_packet_size = get_be16(pb); /* sub packet size */
+ ast->sub_packet_size = sub_packet_size = get_be16(pb); /* sub packet size */
get_be16(pb); /* ??? */
if (((version >> 16) & 0xff) == 5) {
- get_be16(pb); get_be16(pb); get_be16(pb); }
+ get_be16(pb); get_be16(pb); get_be16(pb);
+ }
st->codec->sample_rate = get_be16(pb);
get_be32(pb);
st->codec->channels = get_be16(pb);
if (((version >> 16) & 0xff) == 5) {
get_be32(pb);
- buf[0] = get_byte(pb);
- buf[1] = get_byte(pb);
- buf[2] = get_byte(pb);
- buf[3] = get_byte(pb);
+ get_buffer(pb, buf, 4);
buf[4] = 0;
} else {
get_str8(pb, buf, sizeof(buf)); /* desc */
@@ -118,17 +147,17 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
} else if (!strcmp(buf, "28_8")) {
st->codec->codec_id = CODEC_ID_RA_288;
st->codec->extradata_size= 0;
- rm->audio_framesize = st->codec->block_align;
+ ast->audio_framesize = st->codec->block_align;
st->codec->block_align = coded_framesize;
- if(rm->audio_framesize >= UINT_MAX / sub_packet_h){
- av_log(s, AV_LOG_ERROR, "rm->audio_framesize * sub_packet_h too large\n");
+ if(ast->audio_framesize >= UINT_MAX / sub_packet_h){
+ av_log(s, AV_LOG_ERROR, "ast->audio_framesize * sub_packet_h too large\n");
return -1;
}
- rm->audiobuf = av_malloc(rm->audio_framesize * sub_packet_h);
+ av_new_packet(&ast->pkt, ast->audio_framesize * sub_packet_h);
} else if ((!strcmp(buf, "cook")) || (!strcmp(buf, "atrc")) || (!strcmp(buf, "sipr"))) {
- int codecdata_length, i;
+ int codecdata_length;
get_be16(pb); get_byte(pb);
if (((version >> 16) & 0xff) == 5)
get_byte(pb);
@@ -148,19 +177,18 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
else st->codec->codec_id = CODEC_ID_ATRAC3;
st->codec->extradata_size= codecdata_length;
st->codec->extradata= av_mallocz(st->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
- for(i = 0; i < codecdata_length; i++)
- ((uint8_t*)st->codec->extradata)[i] = get_byte(pb);
- rm->audio_framesize = st->codec->block_align;
- st->codec->block_align = rm->sub_packet_size;
+ get_buffer(pb, st->codec->extradata, st->codec->extradata_size);
+ ast->audio_framesize = st->codec->block_align;
+ st->codec->block_align = ast->sub_packet_size;
- if(rm->audio_framesize >= UINT_MAX / sub_packet_h){
+ if(ast->audio_framesize >= UINT_MAX / sub_packet_h){
av_log(s, AV_LOG_ERROR, "rm->audio_framesize * sub_packet_h too large\n");
return -1;
}
- rm->audiobuf = av_malloc(rm->audio_framesize * sub_packet_h);
+ av_new_packet(&ast->pkt, ast->audio_framesize * sub_packet_h);
} else if (!strcmp(buf, "raac") || !strcmp(buf, "racp")) {
- int codecdata_length, i;
+ int codecdata_length;
get_be16(pb); get_byte(pb);
if (((version >> 16) & 0xff) == 5)
get_byte(pb);
@@ -174,8 +202,7 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
st->codec->extradata_size = codecdata_length - 1;
st->codec->extradata = av_mallocz(st->codec->extradata_size + FF_INPUT_BUFFER_PADDING_SIZE);
get_byte(pb);
- for(i = 0; i < st->codec->extradata_size; i++)
- ((uint8_t*)st->codec->extradata)[i] = get_byte(pb);
+ get_buffer(pb, st->codec->extradata, st->codec->extradata_size);
}
} else {
st->codec->codec_id = CODEC_ID_NONE;
@@ -196,9 +223,9 @@ static int rm_read_audio_stream_info(AVFormatContext *s, AVStream *st,
}
int
-ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVStream *st, int codec_data_size)
+ff_rm_read_mdpr_codecdata (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *rst, int codec_data_size)
{
- ByteIOContext *pb = s->pb;
unsigned int v;
int size;
int64_t codec_pos;
@@ -208,7 +235,7 @@ ff_rm_read_mdpr_codecdata (AVFormatContext *s, AVStream *st, int codec_data_size
v = get_be32(pb);
if (v == MKTAG(0xfd, 'a', 'r', '.')) {
/* ra type header */
- if (rm_read_audio_stream_info(s, st, 0))
+ if (rm_read_audio_stream_info(s, pb, st, rst, 0))
return -1;
} else {
int fps, fps2;
@@ -268,19 +295,20 @@ skip:
static int rm_read_header_old(AVFormatContext *s, AVFormatParameters *ap)
{
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
AVStream *st;
rm->old_format = 1;
st = av_new_stream(s, 0);
if (!st)
return -1;
- return rm_read_audio_stream_info(s, st, 1);
+ st->priv_data = ff_rm_alloc_rmstream();
+ return rm_read_audio_stream_info(s, s->pb, st, st->priv_data, 1);
}
static int rm_read_header(AVFormatContext *s, AVFormatParameters *ap)
{
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
AVStream *st;
ByteIOContext *pb = s->pb;
unsigned int tag;
@@ -357,7 +385,9 @@ static int rm_read_header(AVFormatContext *s, AVFormatParameters *ap)
get_str8(pb, buf, sizeof(buf)); /* desc */
get_str8(pb, buf, sizeof(buf)); /* mimetype */
st->codec->codec_type = CODEC_TYPE_DATA;
- if (ff_rm_read_mdpr_codecdata(s, st, get_be32(pb)) < 0)
+ st->priv_data = ff_rm_alloc_rmstream();
+ if (ff_rm_read_mdpr_codecdata(s, s->pb, st, st->priv_data,
+ get_be32(pb)) < 0)
return -1;
break;
case MKTAG('D', 'A', 'T', 'A'):
@@ -373,7 +403,6 @@ static int rm_read_header(AVFormatContext *s, AVFormatParameters *ap)
if (!rm->nb_packets && (flags & 4))
rm->nb_packets = 3600 * 25;
get_be32(pb); /* next data header */
- rm->curpic_num = -1;
return 0;
}
@@ -397,7 +426,7 @@ static int get_num(ByteIOContext *pb, int *len)
#define RAW_PACKET_SIZE 1000
static int sync(AVFormatContext *s, int64_t *timestamp, int *flags, int *stream_index, int64_t *pos){
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
ByteIOContext *pb = s->pb;
int len, num, res, i;
AVStream *st;
@@ -452,83 +481,80 @@ skip:
return -1;
}
-static int rm_assemble_video_frame(AVFormatContext *s, RMContext *rm, AVPacket *pkt, int len)
+static int rm_assemble_video_frame(AVFormatContext *s, ByteIOContext *pb,
+ RMDemuxContext *rm, RMStream *vst,
+ AVPacket *pkt, int len)
{
- ByteIOContext *pb = s->pb;
int hdr, seq, pic_num, len2, pos;
int type;
hdr = get_byte(pb); len--;
type = hdr >> 6;
- switch(type){
- case 0: // slice
- case 2: // last slice
+
+ if(type != 3){ // not frame as a part of packet
seq = get_byte(pb); len--;
+ }
+ if(type != 1){ // not whole frame
len2 = get_num(pb, &len);
- pos = get_num(pb, &len);
+ pos = get_num(pb, &len);
pic_num = get_byte(pb); len--;
- rm->remaining_len = len;
- break;
- case 1: //whole frame
- seq = get_byte(pb); len--;
+ }
+ if(len<0)
+ return -1;
+ rm->remaining_len = len;
+ if(type&1){ // frame, not slice
+ if(type == 3) // frame as a part of packet
+ len= len2;
+ if(rm->remaining_len < len)
+ return -1;
+ rm->remaining_len -= len;
if(av_new_packet(pkt, len + 9) < 0)
return AVERROR(EIO);
pkt->data[0] = 0;
AV_WL32(pkt->data + 1, 1);
AV_WL32(pkt->data + 5, 0);
get_buffer(pb, pkt->data + 9, len);
- rm->remaining_len = 0;
- return 0;
- case 3: //frame as a part of packet
- len2 = get_num(pb, &len);
- pos = get_num(pb, &len);
- pic_num = get_byte(pb); len--;
- rm->remaining_len = len - len2;
- if(av_new_packet(pkt, len2 + 9) < 0)
- return AVERROR(EIO);
- pkt->data[0] = 0;
- AV_WL32(pkt->data + 1, 1);
- AV_WL32(pkt->data + 5, 0);
- get_buffer(pb, pkt->data + 9, len2);
return 0;
}
//now we have to deal with single slice
- if((seq & 0x7F) == 1 || rm->curpic_num != pic_num){
- rm->slices = ((hdr & 0x3F) << 1) + 1;
- rm->videobufsize = len2 + 8*rm->slices + 1;
- av_free(rm->videobuf);
- if(!(rm->videobuf = av_malloc(rm->videobufsize)))
+ if((seq & 0x7F) == 1 || vst->curpic_num != pic_num){
+ vst->slices = ((hdr & 0x3F) << 1) + 1;
+ vst->videobufsize = len2 + 8*vst->slices + 1;
+ av_free_packet(&vst->pkt); //FIXME this should be output.
+ if(av_new_packet(&vst->pkt, vst->videobufsize) < 0)
return AVERROR(ENOMEM);
- rm->videobufpos = 8*rm->slices + 1;
- rm->cur_slice = 0;
- rm->curpic_num = pic_num;
- rm->pktpos = url_ftell(pb);
+ vst->videobufpos = 8*vst->slices + 1;
+ vst->cur_slice = 0;
+ vst->curpic_num = pic_num;
+ vst->pktpos = url_ftell(pb);
}
if(type == 2)
len = FFMIN(len, pos);
- if(++rm->cur_slice > rm->slices)
+ if(++vst->cur_slice > vst->slices)
return 1;
- AV_WL32(rm->videobuf - 7 + 8*rm->cur_slice, 1);
- AV_WL32(rm->videobuf - 3 + 8*rm->cur_slice, rm->videobufpos - 8*rm->slices - 1);
- if(rm->videobufpos + len > rm->videobufsize)
+ AV_WL32(vst->pkt.data - 7 + 8*vst->cur_slice, 1);
+ AV_WL32(vst->pkt.data - 3 + 8*vst->cur_slice, vst->videobufpos - 8*vst->slices - 1);
+ if(vst->videobufpos + len > vst->videobufsize)
return 1;
- if (get_buffer(pb, rm->videobuf + rm->videobufpos, len) != len)
+ if (get_buffer(pb, vst->pkt.data + vst->videobufpos, len) != len)
return AVERROR(EIO);
- rm->videobufpos += len;
+ vst->videobufpos += len;
rm->remaining_len-= len;
- if(type == 2 || (rm->videobufpos) == rm->videobufsize){
- rm->videobuf[0] = rm->cur_slice-1;
- if(av_new_packet(pkt, rm->videobufpos - 8*(rm->slices - rm->cur_slice)) < 0)
- return AVERROR(ENOMEM);
- memcpy(pkt->data, rm->videobuf, 1 + 8*rm->cur_slice);
- memcpy(pkt->data + 1 + 8*rm->cur_slice, rm->videobuf + 1 + 8*rm->slices,
- rm->videobufpos - 1 - 8*rm->slices);
- pkt->pts = AV_NOPTS_VALUE;
- pkt->pos = rm->pktpos;
- return 0;
+ if(type == 2 || (vst->videobufpos) == vst->videobufsize){
+ vst->pkt.data[0] = vst->cur_slice-1;
+ *pkt= vst->pkt;
+ vst->pkt.data= NULL;
+ vst->pkt.size= 0;
+ if(vst->slices != vst->cur_slice) //FIXME find out how to set slices correct from the begin
+ memmove(pkt->data + 1 + 8*vst->cur_slice, pkt->data + 1 + 8*vst->slices,
+ vst->videobufpos - 1 - 8*vst->slices);
+ pkt->size = vst->videobufpos + 8*(vst->cur_slice - vst->slices);
+ pkt->pts = AV_NOPTS_VALUE;
+ pkt->pos = vst->pktpos;
+ return 0;
}
return 1;
@@ -550,15 +576,15 @@ rm_ac3_swap_bytes (AVStream *st, AVPacket *pkt)
}
int
-ff_rm_parse_packet (AVFormatContext *s, AVStream *st, int len, AVPacket *pkt,
+ff_rm_parse_packet (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *ast, int len, AVPacket *pkt,
int *seq, int *flags, int64_t *timestamp)
{
- ByteIOContext *pb = s->pb;
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
if (st->codec->codec_type == CODEC_TYPE_VIDEO) {
rm->current_stream= st->id;
- if(rm_assemble_video_frame(s, rm, pkt, len) == 1)
+ if(rm_assemble_video_frame(s, pb, rm, ast, pkt, len))
return -1; //got partial frame
} else if (st->codec->codec_type == CODEC_TYPE_AUDIO) {
if ((st->codec->codec_id == CODEC_ID_RA_288) ||
@@ -566,51 +592,51 @@ ff_rm_parse_packet (AVFormatContext *s, AVStream *st, int len, AVPacket *pkt,
(st->codec->codec_id == CODEC_ID_ATRAC3) ||
(st->codec->codec_id == CODEC_ID_SIPR)) {
int x;
- int sps = rm->sub_packet_size;
- int cfs = rm->coded_framesize;
- int h = rm->sub_packet_h;
- int y = rm->sub_packet_cnt;
- int w = rm->audio_framesize;
+ int sps = ast->sub_packet_size;
+ int cfs = ast->coded_framesize;
+ int h = ast->sub_packet_h;
+ int y = ast->sub_packet_cnt;
+ int w = ast->audio_framesize;
if (*flags & 2)
- y = rm->sub_packet_cnt = 0;
+ y = ast->sub_packet_cnt = 0;
if (!y)
- rm->audiotimestamp = *timestamp;
+ ast->audiotimestamp = *timestamp;
switch(st->codec->codec_id) {
case CODEC_ID_RA_288:
for (x = 0; x < h/2; x++)
- get_buffer(pb, rm->audiobuf+x*2*w+y*cfs, cfs);
+ get_buffer(pb, ast->pkt.data+x*2*w+y*cfs, cfs);
break;
case CODEC_ID_ATRAC3:
case CODEC_ID_COOK:
for (x = 0; x < w/sps; x++)
- get_buffer(pb, rm->audiobuf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), sps);
+ get_buffer(pb, ast->pkt.data+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), sps);
break;
}
- if (++(rm->sub_packet_cnt) < h)
+ if (++(ast->sub_packet_cnt) < h)
return -1;
else {
- rm->sub_packet_cnt = 0;
+ ast->sub_packet_cnt = 0;
rm->audio_stream_num = st->index;
rm->audio_pkt_cnt = h * w / st->codec->block_align - 1;
// Release first audio packet
av_new_packet(pkt, st->codec->block_align);
- memcpy(pkt->data, rm->audiobuf, st->codec->block_align);
- *timestamp = rm->audiotimestamp;
+ memcpy(pkt->data, ast->pkt.data, st->codec->block_align); //FIXME avoid this
+ *timestamp = ast->audiotimestamp;
*flags = 2; // Mark first packet as keyframe
}
} else if (st->codec->codec_id == CODEC_ID_AAC) {
int x;
rm->audio_stream_num = st->index;
- rm->sub_packet_cnt = (get_be16(pb) & 0xf0) >> 4;
- if (rm->sub_packet_cnt) {
- for (x = 0; x < rm->sub_packet_cnt; x++)
- rm->sub_packet_lengths[x] = get_be16(pb);
+ ast->sub_packet_cnt = (get_be16(pb) & 0xf0) >> 4;
+ if (ast->sub_packet_cnt) {
+ for (x = 0; x < ast->sub_packet_cnt; x++)
+ ast->sub_packet_lengths[x] = get_be16(pb);
// Release first audio packet
- rm->audio_pkt_cnt = rm->sub_packet_cnt - 1;
- av_get_packet(pb, pkt, rm->sub_packet_lengths[0]);
+ rm->audio_pkt_cnt = ast->sub_packet_cnt - 1;
+ av_get_packet(pb, pkt, ast->sub_packet_lengths[0]);
*flags = 2; // Mark first packet as keyframe
}
} else {
@@ -645,33 +671,35 @@ ff_rm_parse_packet (AVFormatContext *s, AVStream *st, int len, AVPacket *pkt,
if (*flags & 2)
pkt->flags |= PKT_FLAG_KEY;
- return 0;
+ return st->codec->codec_type == CODEC_TYPE_AUDIO ? rm->audio_pkt_cnt : 0;
}
-void
-ff_rm_retrieve_cache (AVFormatContext *s, AVStream *st, AVPacket *pkt)
+int
+ff_rm_retrieve_cache (AVFormatContext *s, ByteIOContext *pb,
+ AVStream *st, RMStream *ast, AVPacket *pkt)
{
- ByteIOContext *pb = s->pb;
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
assert (rm->audio_pkt_cnt > 0);
if (st->codec->codec_id == CODEC_ID_AAC)
- av_get_packet(pb, pkt, rm->sub_packet_lengths[rm->sub_packet_cnt - rm->audio_pkt_cnt]);
+ av_get_packet(pb, pkt, ast->sub_packet_lengths[ast->sub_packet_cnt - rm->audio_pkt_cnt]);
else {
av_new_packet(pkt, st->codec->block_align);
- memcpy(pkt->data, rm->audiobuf + st->codec->block_align *
- (rm->sub_packet_h * rm->audio_framesize / st->codec->block_align - rm->audio_pkt_cnt),
+ memcpy(pkt->data, ast->pkt.data + st->codec->block_align * //FIXME avoid this
+ (ast->sub_packet_h * ast->audio_framesize / st->codec->block_align - rm->audio_pkt_cnt),
st->codec->block_align);
}
rm->audio_pkt_cnt--;
pkt->flags = 0;
pkt->stream_index = st->index;
+
+ return rm->audio_pkt_cnt;
}
static int rm_read_packet(AVFormatContext *s, AVPacket *pkt)
{
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
ByteIOContext *pb = s->pb;
AVStream *st;
int i, len;
@@ -681,21 +709,24 @@ static int rm_read_packet(AVFormatContext *s, AVPacket *pkt)
if (rm->audio_pkt_cnt) {
// If there are queued audio packet return them first
st = s->streams[rm->audio_stream_num];
- ff_rm_retrieve_cache(s, st, pkt);
+ ff_rm_retrieve_cache(s, s->pb, st, st->priv_data, pkt);
} else if (rm->old_format) {
+ RMStream *ast;
+
st = s->streams[0];
+ ast = st->priv_data;
if (st->codec->codec_id == CODEC_ID_RA_288) {
int x, y;
- for (y = 0; y < rm->sub_packet_h; y++)
- for (x = 0; x < rm->sub_packet_h/2; x++)
- if (get_buffer(pb, rm->audiobuf+x*2*rm->audio_framesize+y*rm->coded_framesize, rm->coded_framesize) <= 0)
+ for (y = 0; y < ast->sub_packet_h; y++)
+ for (x = 0; x < ast->sub_packet_h/2; x++)
+ if (get_buffer(pb, ast->pkt.data+x*2*ast->audio_framesize+y*ast->coded_framesize, ast->coded_framesize) <= 0)
return AVERROR(EIO);
rm->audio_stream_num = 0;
- rm->audio_pkt_cnt = rm->sub_packet_h * rm->audio_framesize / st->codec->block_align - 1;
+ rm->audio_pkt_cnt = ast->sub_packet_h * ast->audio_framesize / st->codec->block_align - 1;
// Release first audio packet
av_new_packet(pkt, st->codec->block_align);
- memcpy(pkt->data, rm->audiobuf, st->codec->block_align);
+ memcpy(pkt->data, ast->pkt.data, st->codec->block_align); //FIXME avoid this
pkt->flags |= PKT_FLAG_KEY; // Mark first packet as keyframe
pkt->stream_index = 0;
} else {
@@ -717,7 +748,8 @@ resync:
return AVERROR(EIO);
st = s->streams[i];
- if (ff_rm_parse_packet (s, st, len, pkt, &seq, &flags, ×tamp) < 0)
+ if (ff_rm_parse_packet (s, s->pb, st, st->priv_data, len, pkt,
+ &seq, &flags, ×tamp) < 0)
goto resync;
if((flags&2) && (seq&0x7F) == 1)
@@ -729,10 +761,11 @@ resync:
static int rm_read_close(AVFormatContext *s)
{
- RMContext *rm = s->priv_data;
+ int i;
+
+ for (i=0;i<s->nb_streams;i++)
+ ff_rm_free_rmstream(s->streams[i]->priv_data);
- av_free(rm->audiobuf);
- av_free(rm->videobuf);
return 0;
}
@@ -752,7 +785,7 @@ static int rm_probe(AVProbeData *p)
static int64_t rm_read_dts(AVFormatContext *s, int stream_index,
int64_t *ppos, int64_t pos_limit)
{
- RMContext *rm = s->priv_data;
+ RMDemuxContext *rm = s->priv_data;
int64_t pos, dts;
int stream_index2, flags, len, h;
@@ -795,7 +828,7 @@ static int64_t rm_read_dts(AVFormatContext *s, int stream_index,
AVInputFormat rm_demuxer = {
"rm",
NULL_IF_CONFIG_SMALL("RM format"),
- sizeof(RMContext),
+ sizeof(RMDemuxContext),
rm_probe,
rm_read_header,
rm_read_packet,
@@ -807,6 +840,6 @@ AVInputFormat rm_demuxer = {
AVInputFormat rdt_demuxer = {
"rdt",
NULL_IF_CONFIG_SMALL("RDT demuxer"),
- sizeof(RMContext),
+ sizeof(RMDemuxContext),
NULL, NULL, NULL, rm_read_close, NULL, NULL
};
diff --git a/libavformat/rmenc.c b/libavformat/rmenc.c
index 0e61f02..84a265a 100644
--- a/libavformat/rmenc.c
+++ b/libavformat/rmenc.c
@@ -21,6 +21,25 @@
#include "avformat.h"
#include "rm.h"
+typedef struct {
+ int nb_packets;
+ int packet_total_size;
+ int packet_max_size;
+ /* codec related output */
+ int bit_rate;
+ float frame_rate;
+ int nb_frames; /* current frame number */
+ int total_frames; /* total number of frames */
+ int num;
+ AVCodecContext *enc;
+} StreamInfo;
+
+typedef struct {
+ StreamInfo streams[2];
+ StreamInfo *audio_stream, *video_stream;
+ int data_pos; /* position of the data after the header */
+} RMMuxContext;
+
/* in ms */
#define BUFFER_DURATION 0
@@ -44,7 +63,7 @@ static void put_str8(ByteIOContext *s, const char *tag)
static void rv10_write_header(AVFormatContext *ctx,
int data_size, int index_pos)
{
- RMContext *rm = ctx->priv_data;
+ RMMuxContext *rm = ctx->priv_data;
ByteIOContext *s = ctx->pb;
StreamInfo *stream;
unsigned char *data_offset_ptr, *start_ptr;
@@ -271,7 +290,7 @@ static void write_packet_header(AVFormatContext *ctx, StreamInfo *stream,
static int rm_write_header(AVFormatContext *s)
{
- RMContext *rm = s->priv_data;
+ RMMuxContext *rm = s->priv_data;
StreamInfo *stream;
int n;
AVCodecContext *codec;
@@ -315,7 +334,7 @@ static int rm_write_header(AVFormatContext *s)
static int rm_write_audio(AVFormatContext *s, const uint8_t *buf, int size, int flags)
{
uint8_t *buf1;
- RMContext *rm = s->priv_data;
+ RMMuxContext *rm = s->priv_data;
ByteIOContext *pb = s->pb;
StreamInfo *stream = rm->audio_stream;
int i;
@@ -339,7 +358,7 @@ static int rm_write_audio(AVFormatContext *s, const uint8_t *buf, int size, int
static int rm_write_video(AVFormatContext *s, const uint8_t *buf, int size, int flags)
{
- RMContext *rm = s->priv_data;
+ RMMuxContext *rm = s->priv_data;
ByteIOContext *pb = s->pb;
StreamInfo *stream = rm->video_stream;
int key_frame = !!(flags & PKT_FLAG_KEY);
@@ -393,7 +412,7 @@ static int rm_write_packet(AVFormatContext *s, AVPacket *pkt)
static int rm_write_trailer(AVFormatContext *s)
{
- RMContext *rm = s->priv_data;
+ RMMuxContext *rm = s->priv_data;
int data_size, index_pos, i;
ByteIOContext *pb = s->pb;
@@ -435,7 +454,7 @@ AVOutputFormat rm_muxer = {
NULL_IF_CONFIG_SMALL("RM format"),
"application/vnd.rn-realmedia",
"rm,ra",
- sizeof(RMContext),
+ sizeof(RMMuxContext),
CODEC_ID_AC3,
CODEC_ID_RV10,
rm_write_header,
diff --git a/libavformat/rtp.h b/libavformat/rtp.h
index 4de5919..7819ceb 100644
--- a/libavformat/rtp.h
+++ b/libavformat/rtp.h
@@ -24,6 +24,33 @@
#include "libavcodec/avcodec.h"
#include "avformat.h"
+/** Structure listing useful vars to parse RTP packet payload*/
+typedef struct rtp_payload_data
+{
+ int sizelength;
+ int indexlength;
+ int indexdeltalength;
+ int profile_level_id;
+ int streamtype;
+ int objecttype;
+ char *mode;
+
+ /** mpeg 4 AU headers */
+ struct AUHeaders {
+ int size;
+ int index;
+ int cts_flag;
+ int cts;
+ int dts_flag;
+ int dts;
+ int rap_flag;
+ int streamstate;
+ } *au_headers;
+ int nb_au_headers;
+ int au_headers_length_bytes;
+ int cur_au_index;
+} RTPPayloadData;
+
typedef struct PayloadContext PayloadContext;
typedef struct RTPDynamicProtocolHandler_s RTPDynamicProtocolHandler;
@@ -36,8 +63,7 @@ int rtp_get_codec_info(AVCodecContext *codec, int payload_type);
int rtp_get_payload_type(AVCodecContext *codec);
typedef struct RTPDemuxContext RTPDemuxContext;
-typedef struct rtp_payload_data_s rtp_payload_data_s;
-RTPDemuxContext *rtp_parse_open(AVFormatContext *s1, AVStream *st, URLContext *rtpc, int payload_type, rtp_payload_data_s *rtp_payload_data);
+RTPDemuxContext *rtp_parse_open(AVFormatContext *s1, AVStream *st, URLContext *rtpc, int payload_type, RTPPayloadData *rtp_payload_data);
void rtp_parse_set_dynamic_protocol(RTPDemuxContext *s, PayloadContext *ctx,
RTPDynamicProtocolHandler *handler);
int rtp_parse_packet(RTPDemuxContext *s, AVPacket *pkt,
@@ -63,56 +89,4 @@ int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count);
#define RTCP_TX_RATIO_NUM 5
#define RTCP_TX_RATIO_DEN 1000
-/** Structure listing useful vars to parse RTP packet payload*/
-typedef struct rtp_payload_data_s
-{
- int sizelength;
- int indexlength;
- int indexdeltalength;
- int profile_level_id;
- int streamtype;
- int objecttype;
- char *mode;
-
- /** mpeg 4 AU headers */
- struct AUHeaders {
- int size;
- int index;
- int cts_flag;
- int cts;
- int dts_flag;
- int dts;
- int rap_flag;
- int streamstate;
- } *au_headers;
- int nb_au_headers;
- int au_headers_length_bytes;
- int cur_au_index;
-} rtp_payload_data_t;
-
-#if 0
-typedef enum {
- RTCP_SR = 200,
- RTCP_RR = 201,
- RTCP_SDES = 202,
- RTCP_BYE = 203,
- RTCP_APP = 204
-} rtcp_type_t;
-
-typedef enum {
- RTCP_SDES_END = 0,
- RTCP_SDES_CNAME = 1,
- RTCP_SDES_NAME = 2,
- RTCP_SDES_EMAIL = 3,
- RTCP_SDES_PHONE = 4,
- RTCP_SDES_LOC = 5,
- RTCP_SDES_TOOL = 6,
- RTCP_SDES_NOTE = 7,
- RTCP_SDES_PRIV = 8,
- RTCP_SDES_IMG = 9,
- RTCP_SDES_DOOR = 10,
- RTCP_SDES_SOURCE = 11
-} rtcp_sdes_type_t;
-#endif
-
#endif /* AVFORMAT_RTP_H */
diff --git a/libavformat/rtp_h264.c b/libavformat/rtp_h264.c
index bc29852..7d6f96c 100644
--- a/libavformat/rtp_h264.c
+++ b/libavformat/rtp_h264.c
@@ -348,9 +348,10 @@ static void h264_free_extradata(PayloadContext *data)
av_free(data);
}
-static int parse_h264_sdp_line(AVStream * stream, PayloadContext *h264_data,
- const char *line)
+static int parse_h264_sdp_line(AVFormatContext *s, int st_index,
+ PayloadContext *h264_data, const char *line)
{
+ AVStream *stream = s->streams[st_index];
AVCodecContext *codec = stream->codec;
const char *p = line;
diff --git a/libavformat/rtp_internal.h b/libavformat/rtp_internal.h
index 80c81cb..6b7588f 100644
--- a/libavformat/rtp_internal.h
+++ b/libavformat/rtp_internal.h
@@ -66,7 +66,8 @@ struct RTPDynamicProtocolHandler_s {
enum CodecID codec_id;
// may be null
- int (*parse_sdp_a_line) (AVStream * stream,
+ int (*parse_sdp_a_line) (AVFormatContext *s,
+ int st_index,
PayloadContext *priv_data,
const char *line); ///< Parse the a= line from the sdp field
PayloadContext *(*open) (); ///< allocate any data needed by the rtp parsing for this dynamic data.
@@ -111,7 +112,7 @@ struct RTPDemuxContext {
uint8_t *buf_ptr;
/* special infos for au headers parsing */
- rtp_payload_data_t *rtp_payload_data; // TODO: Move into dynamic payload handlers
+ RTPPayloadData *rtp_payload_data; // TODO: Move into dynamic payload handlers
/* dynamic payload stuff */
DynamicPayloadPacketHandlerProc parse_packet; ///< This is also copied from the dynamic protocol handler structure
diff --git a/libavformat/rtpdec.c b/libavformat/rtpdec.c
index b08509c..7d8fc2c 100644
--- a/libavformat/rtpdec.c
+++ b/libavformat/rtpdec.c
@@ -267,7 +267,7 @@ int rtp_check_and_send_back_rr(RTPDemuxContext *s, int count)
* rtp demux (otherwise CODEC_ID_MPEG2TS packets are returned)
* TODO: change this to not take rtp_payload data, and use the new dynamic payload system.
*/
-RTPDemuxContext *rtp_parse_open(AVFormatContext *s1, AVStream *st, URLContext *rtpc, int payload_type, rtp_payload_data_t *rtp_payload_data)
+RTPDemuxContext *rtp_parse_open(AVFormatContext *s1, AVStream *st, URLContext *rtpc, int payload_type, RTPPayloadData *rtp_payload_data)
{
RTPDemuxContext *s;
@@ -323,7 +323,7 @@ static int rtp_parse_mp4_au(RTPDemuxContext *s, const uint8_t *buf)
{
int au_headers_length, au_header_size, i;
GetBitContext getbitcontext;
- rtp_payload_data_t *infos;
+ RTPPayloadData *infos;
infos = s->rtp_payload_data;
@@ -512,7 +512,7 @@ int rtp_parse_packet(RTPDemuxContext *s, AVPacket *pkt,
if (rtp_parse_mp4_au(s, buf))
return -1;
{
- rtp_payload_data_t *infos = s->rtp_payload_data;
+ RTPPayloadData *infos = s->rtp_payload_data;
if (infos == NULL)
return -1;
buf += infos->au_headers_length_bytes + 2;
diff --git a/libavformat/rtsp.c b/libavformat/rtsp.c
index f7596d2..464cffe 100644
--- a/libavformat/rtsp.c
+++ b/libavformat/rtsp.c
@@ -39,62 +39,6 @@
//#define DEBUG
//#define DEBUG_RTP_TCP
-enum RTSPClientState {
- RTSP_STATE_IDLE,
- RTSP_STATE_PLAYING,
- RTSP_STATE_PAUSED,
-};
-
-enum RTSPServerType {
- RTSP_SERVER_RTP, /*< Standard-compliant RTP-server */
- RTSP_SERVER_REAL, /*< Realmedia-style server */
- RTSP_SERVER_LAST
-};
-
-enum RTSPTransport {
- RTSP_TRANSPORT_RTP,
- RTSP_TRANSPORT_RDT,
- RTSP_TRANSPORT_LAST
-};
-
-typedef struct RTSPState {
- URLContext *rtsp_hd; /* RTSP TCP connexion handle */
- int nb_rtsp_streams;
- struct RTSPStream **rtsp_streams;
-
- enum RTSPClientState state;
- int64_t seek_timestamp;
-
- /* XXX: currently we use unbuffered input */
- // ByteIOContext rtsp_gb;
- int seq; /* RTSP command sequence number */
- char session_id[512];
- enum RTSPTransport transport;
- enum RTSPLowerTransport lower_transport;
- enum RTSPServerType server_type;
- char last_reply[2048]; /* XXX: allocate ? */
- void *cur_tx;
- int need_subscription;
-} RTSPState;
-
-typedef struct RTSPStream {
- URLContext *rtp_handle; /* RTP stream handle */
- void *tx_ctx; /* RTP/RDT parse context */
-
- int stream_index; /* corresponding stream index, if any. -1 if none (MPEG2TS case) */
- int interleaved_min, interleaved_max; /* interleave ids, if TCP transport */
- char control_url[1024]; /* url for this stream (from SDP) */
-
- int sdp_port; /* port (from SDP content - not used in RTSP) */
- struct in_addr sdp_ip; /* IP address (from SDP content - not used in RTSP) */
- int sdp_ttl; /* IP TTL (from SDP content - not used in RTSP) */
- int sdp_payload_type; /* payload type - only used in SDP */
- rtp_payload_data_t rtp_payload_data; /* rtp payload parsing infos from SDP */
-
- RTPDynamicProtocolHandler *dynamic_handler; ///< Only valid if it's a dynamic protocol. (This is the handler structure)
- PayloadContext *dynamic_protocol_context; ///< Only valid if it's a dynamic protocol. (This is any private data associated with the dynamic protocol)
-} RTSPStream;
-
static int rtsp_read_play(AVFormatContext *s);
/* XXX: currently, the only way to change the protocols consists in
@@ -283,24 +227,23 @@ static void sdp_parse_fmtp_config(AVCodecContext *codec, char *attr, char *value
return;
}
-typedef struct attrname_map
-{
+typedef struct {
const char *str;
uint16_t type;
uint32_t offset;
-} attrname_map_t;
+} AttrNameMap;
/* All known fmtp parmeters and the corresping RTPAttrTypeEnum */
#define ATTR_NAME_TYPE_INT 0
#define ATTR_NAME_TYPE_STR 1
-static const attrname_map_t attr_names[]=
+static const AttrNameMap attr_names[]=
{
- {"SizeLength", ATTR_NAME_TYPE_INT, offsetof(rtp_payload_data_t, sizelength)},
- {"IndexLength", ATTR_NAME_TYPE_INT, offsetof(rtp_payload_data_t, indexlength)},
- {"IndexDeltaLength", ATTR_NAME_TYPE_INT, offsetof(rtp_payload_data_t, indexdeltalength)},
- {"profile-level-id", ATTR_NAME_TYPE_INT, offsetof(rtp_payload_data_t, profile_level_id)},
- {"StreamType", ATTR_NAME_TYPE_INT, offsetof(rtp_payload_data_t, streamtype)},
- {"mode", ATTR_NAME_TYPE_STR, offsetof(rtp_payload_data_t, mode)},
+ {"SizeLength", ATTR_NAME_TYPE_INT, offsetof(RTPPayloadData, sizelength)},
+ {"IndexLength", ATTR_NAME_TYPE_INT, offsetof(RTPPayloadData, indexlength)},
+ {"IndexDeltaLength", ATTR_NAME_TYPE_INT, offsetof(RTPPayloadData, indexdeltalength)},
+ {"profile-level-id", ATTR_NAME_TYPE_INT, offsetof(RTPPayloadData, profile_level_id)},
+ {"StreamType", ATTR_NAME_TYPE_INT, offsetof(RTPPayloadData, streamtype)},
+ {"mode", ATTR_NAME_TYPE_STR, offsetof(RTPPayloadData, mode)},
{NULL, -1, -1},
};
@@ -332,7 +275,7 @@ static void sdp_parse_fmtp(AVStream *st, const char *p)
RTSPStream *rtsp_st = st->priv_data;
AVCodecContext *codec = st->codec;
- rtp_payload_data_t *rtp_payload_data = &rtsp_st->rtp_payload_data;
+ RTPPayloadData *rtp_payload_data = &rtsp_st->rtp_payload_data;
/* loop on each attribute */
while(rtsp_next_attr_and_value(&p, attr, sizeof(attr), value, sizeof(value)))
@@ -518,7 +461,7 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
rtsp_st = st->priv_data;
if (rtsp_st->sdp_payload_type == payload_type) {
if(rtsp_st->dynamic_handler && rtsp_st->dynamic_handler->parse_sdp_a_line) {
- if(!rtsp_st->dynamic_handler->parse_sdp_a_line(st, rtsp_st->dynamic_protocol_context, buf)) {
+ if(!rtsp_st->dynamic_handler->parse_sdp_a_line(s, i, rtsp_st->dynamic_protocol_context, buf)) {
sdp_parse_fmtp(st, p);
}
} else {
@@ -535,7 +478,7 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
rtsp_st = st->priv_data;
if (rtsp_st->sdp_payload_type == payload_type) {
if(rtsp_st->dynamic_handler && rtsp_st->dynamic_handler->parse_sdp_a_line) {
- rtsp_st->dynamic_handler->parse_sdp_a_line(st, rtsp_st->dynamic_protocol_context, buf);
+ rtsp_st->dynamic_handler->parse_sdp_a_line(s, i, rtsp_st->dynamic_protocol_context, buf);
}
}
}
@@ -550,10 +493,13 @@ static void sdp_parse_line(AVFormatContext *s, SDPParseState *s1,
if (atoi(p) == 1)
rt->transport = RTSP_TRANSPORT_RDT;
} else if (s->nb_streams > 0) {
+ if (rt->server_type == RTSP_SERVER_REAL)
+ ff_real_parse_sdp_a_line(s, s->nb_streams - 1, p);
+
rtsp_st = s->streams[s->nb_streams - 1]->priv_data;
if (rtsp_st->dynamic_handler &&
rtsp_st->dynamic_handler->parse_sdp_a_line)
- rtsp_st->dynamic_handler->parse_sdp_a_line(s->streams[s->nb_streams - 1],
+ rtsp_st->dynamic_handler->parse_sdp_a_line(s, s->nb_streams - 1,
rtsp_st->dynamic_protocol_context, buf);
}
break;
@@ -564,7 +510,11 @@ static int sdp_parse(AVFormatContext *s, const char *content)
{
const char *p;
int letter;
- char buf[2048], *q;
+ /* Some SDP lines, particularly for Realmedia or ASF RTSP streams, contain long SDP
+ * lines containing complete ASF Headers (several kB) or arrays of MDPR (RM stream
+ * descriptor) headers plus "rulebooks" describing their properties. Therefore, the
+ * SDP line buffer is large. */
+ char buf[8192], *q;
SDPParseState sdp_parse_state, *s1 = &sdp_parse_state;
memset(s1, 0, sizeof(SDPParseState));
@@ -738,6 +688,9 @@ void rtsp_parse_line(RTSPHeader *reply, const char *buf)
} else if (av_stristart(p, "RealChallenge1:", &p)) {
skip_spaces(&p);
av_strlcpy(reply->real_challenge, p, sizeof(reply->real_challenge));
+ } else if (av_stristart(p, "Server:", &p)) {
+ skip_spaces(&p);
+ av_strlcpy(reply->server, p, sizeof(reply->server));
}
}
@@ -901,7 +854,7 @@ rtsp_open_transport_ctx(AVFormatContext *s, RTSPStream *rtsp_st)
s->ctx_flags |= AVFMTCTX_NOHEADER;
if (rt->transport == RTSP_TRANSPORT_RDT)
- rtsp_st->tx_ctx = ff_rdt_parse_open(s, st,
+ rtsp_st->tx_ctx = ff_rdt_parse_open(s, st->index,
rtsp_st->dynamic_protocol_context,
rtsp_st->dynamic_handler);
else
@@ -1173,6 +1126,8 @@ static int rtsp_read_header(AVFormatContext *s,
if (rt->server_type != RTSP_SERVER_REAL && reply->real_challenge[0]) {
rt->server_type = RTSP_SERVER_REAL;
continue;
+ } else if (!strncasecmp(reply->server, "WMServer/", 9)) {
+ rt->server_type = RTSP_SERVER_WMS;
} else if (rt->server_type == RTSP_SERVER_REAL) {
strcpy(real_challenge, reply->real_challenge);
}
@@ -1281,7 +1236,7 @@ static int tcp_read_packet(AVFormatContext *s, RTSPStream **prtsp_st,
if (ret != len)
return -1;
if (rt->transport == RTSP_TRANSPORT_RDT &&
- ff_rdt_parse_header(buf, len, &id, NULL, NULL, NULL) < 0)
+ ff_rdt_parse_header(buf, len, &id, NULL, NULL, NULL, NULL) < 0)
return -1;
/* find the matching stream */
@@ -1344,33 +1299,68 @@ static int rtsp_read_packet(AVFormatContext *s,
RTSPState *rt = s->priv_data;
RTSPStream *rtsp_st;
int ret, len;
- uint8_t buf[RTP_MAX_PACKET_LENGTH];
+ uint8_t buf[10 * RTP_MAX_PACKET_LENGTH];
- if (rt->server_type == RTSP_SERVER_REAL && rt->need_subscription) {
+ if (rt->server_type == RTSP_SERVER_REAL) {
int i;
RTSPHeader reply1, *reply = &reply1;
+ enum AVDiscard cache[MAX_STREAMS];
char cmd[1024];
- snprintf(cmd, sizeof(cmd),
- "SET_PARAMETER %s RTSP/1.0\r\n"
- "Subscribe: ",
- s->filename);
- for (i = 0; i < rt->nb_rtsp_streams; i++) {
- if (i != 0) av_strlcat(cmd, ",", sizeof(cmd));
- ff_rdt_subscribe_rule(cmd, sizeof(cmd), i, 0);
- if (rt->transport == RTSP_TRANSPORT_RDT)
- ff_rdt_subscribe_rule2(
- rt->rtsp_streams[i]->tx_ctx,
- cmd, sizeof(cmd), i, 0);
+ for (i = 0; i < s->nb_streams; i++)
+ cache[i] = s->streams[i]->discard;
+
+ if (!rt->need_subscription) {
+ if (memcmp (cache, rt->real_setup_cache,
+ sizeof(enum AVDiscard) * s->nb_streams)) {
+ av_strlcatf(cmd, sizeof(cmd),
+ "SET_PARAMETER %s RTSP/1.0\r\n"
+ "Unsubscribe: %s\r\n",
+ s->filename, rt->last_subscription);
+ rtsp_send_cmd(s, cmd, reply, NULL);
+ if (reply->status_code != RTSP_STATUS_OK)
+ return AVERROR_INVALIDDATA;
+ rt->need_subscription = 1;
+ }
}
- av_strlcat(cmd, "\r\n", sizeof(cmd));
- rtsp_send_cmd(s, cmd, reply, NULL);
- if (reply->status_code != RTSP_STATUS_OK)
- return AVERROR_INVALIDDATA;
- rt->need_subscription = 0;
- if (rt->state == RTSP_STATE_PLAYING)
- rtsp_read_play (s);
+ if (rt->need_subscription) {
+ int r, rule_nr, first = 1;
+
+ memcpy(rt->real_setup_cache, cache,
+ sizeof(enum AVDiscard) * s->nb_streams);
+ rt->last_subscription[0] = 0;
+
+ snprintf(cmd, sizeof(cmd),
+ "SET_PARAMETER %s RTSP/1.0\r\n"
+ "Subscribe: ",
+ s->filename);
+ for (i = 0; i < rt->nb_rtsp_streams; i++) {
+ rule_nr = 0;
+ for (r = 0; r < s->nb_streams; r++) {
+ if (s->streams[r]->priv_data == rt->rtsp_streams[i]) {
+ if (s->streams[r]->discard != AVDISCARD_ALL) {
+ if (!first)
+ av_strlcat(rt->last_subscription, ",",
+ sizeof(rt->last_subscription));
+ ff_rdt_subscribe_rule(
+ rt->last_subscription,
+ sizeof(rt->last_subscription), i, rule_nr);
+ first = 0;
+ }
+ rule_nr++;
+ }
+ }
+ }
+ av_strlcatf(cmd, sizeof(cmd), "%s\r\n", rt->last_subscription);
+ rtsp_send_cmd(s, cmd, reply, NULL);
+ if (reply->status_code != RTSP_STATUS_OK)
+ return AVERROR_INVALIDDATA;
+ rt->need_subscription = 0;
+
+ if (rt->state == RTSP_STATE_PLAYING)
+ rtsp_read_play (s);
+ }
}
/* get next frames from the same RTP packet */
@@ -1399,7 +1389,7 @@ static int rtsp_read_packet(AVFormatContext *s,
case RTSP_LOWER_TRANSPORT_UDP:
case RTSP_LOWER_TRANSPORT_UDP_MULTICAST:
len = udp_read_packet(s, &rtsp_st, buf, sizeof(buf));
- if (len >=0 && rtsp_st->tx_ctx)
+ if (len >=0 && rtsp_st->tx_ctx && rt->transport == RTSP_TRANSPORT_RTP)
rtp_check_and_send_back_rr(rtsp_st->tx_ctx, len);
break;
}
diff --git a/libavformat/rtsp.h b/libavformat/rtsp.h
index 611f5c3..ec3477b 100644
--- a/libavformat/rtsp.h
+++ b/libavformat/rtsp.h
@@ -24,6 +24,8 @@
#include <stdint.h>
#include "avformat.h"
#include "rtspcodes.h"
+#include "rtp.h"
+#include "network.h"
enum RTSPLowerTransport {
RTSP_LOWER_TRANSPORT_UDP = 0,
@@ -64,8 +66,68 @@ typedef struct RTSPHeader {
int seq; /**< sequence number */
char session_id[512];
char real_challenge[64]; /**< the RealChallenge1 field from the server */
+ char server[64];
} RTSPHeader;
+enum RTSPClientState {
+ RTSP_STATE_IDLE,
+ RTSP_STATE_PLAYING,
+ RTSP_STATE_PAUSED,
+};
+
+enum RTSPServerType {
+ RTSP_SERVER_RTP, /**< Standards-compliant RTP-server */
+ RTSP_SERVER_REAL, /**< Realmedia-style server */
+ RTSP_SERVER_WMS, /**< Windows Media server */
+ RTSP_SERVER_LAST
+};
+
+enum RTSPTransport {
+ RTSP_TRANSPORT_RTP,
+ RTSP_TRANSPORT_RDT,
+ RTSP_TRANSPORT_LAST
+};
+
+typedef struct RTSPState {
+ URLContext *rtsp_hd; /* RTSP TCP connexion handle */
+ int nb_rtsp_streams;
+ struct RTSPStream **rtsp_streams;
+
+ enum RTSPClientState state;
+ int64_t seek_timestamp;
+
+ /* XXX: currently we use unbuffered input */
+ // ByteIOContext rtsp_gb;
+ int seq; /* RTSP command sequence number */
+ char session_id[512];
+ enum RTSPTransport transport;
+ enum RTSPLowerTransport lower_transport;
+ enum RTSPServerType server_type;
+ char last_reply[2048]; /* XXX: allocate ? */
+ void *cur_tx;
+ int need_subscription;
+ enum AVDiscard real_setup_cache[MAX_STREAMS];
+ char last_subscription[1024];
+} RTSPState;
+
+typedef struct RTSPStream {
+ URLContext *rtp_handle; /* RTP stream handle */
+ void *tx_ctx; /* RTP/RDT parse context */
+
+ int stream_index; /* corresponding stream index, if any. -1 if none (MPEG2TS case) */
+ int interleaved_min, interleaved_max; /* interleave ids, if TCP transport */
+ char control_url[1024]; /* url for this stream (from SDP) */
+
+ int sdp_port; /* port (from SDP content - not used in RTSP) */
+ struct in_addr sdp_ip; /* IP address (from SDP content - not used in RTSP) */
+ int sdp_ttl; /* IP TTL (from SDP content - not used in RTSP) */
+ int sdp_payload_type; /* payload type - only used in SDP */
+ RTPPayloadData rtp_payload_data; /* rtp payload parsing infos from SDP */
+
+ RTPDynamicProtocolHandler *dynamic_handler; ///< Only valid if it's a dynamic protocol. (This is the handler structure)
+ PayloadContext *dynamic_protocol_context; ///< Only valid if it's a dynamic protocol. (This is any private data associated with the dynamic protocol)
+} RTSPStream;
+
/** the callback can be used to extend the connection setup/teardown step */
enum RTSPCallbackAction {
RTSP_ACTION_SERVER_SETUP,
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index dfbe52e..d22f516 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -43,7 +43,7 @@ struct sdp_session_level {
const char *name; /**< session name (can be an empty string) */
};
-static void dest_write(char *buff, int size, const char *dest_addr, int ttl)
+static void sdp_write_address(char *buff, int size, const char *dest_addr, int ttl)
{
if (dest_addr) {
if (ttl > 0) {
@@ -65,10 +65,10 @@ static void sdp_write_header(char *buff, int size, struct sdp_session_level *s)
s->id, s->version, s->src_addr,
s->start_time, s->end_time,
s->name[0] ? s->name : "No Name");
- dest_write(buff, size, s->dst_addr, s->ttl);
+ sdp_write_address(buff, size, s->dst_addr, s->ttl);
}
-static int get_address(char *dest_addr, int size, int *ttl, const char *url)
+static int sdp_get_address(char *dest_addr, int size, int *ttl, const char *url)
{
int port;
const char *p;
@@ -157,7 +157,7 @@ static char *extradata2config(AVCodecContext *c)
return config;
}
-static char *sdp_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type)
+static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c, int payload_type)
{
char *config = NULL;
@@ -246,12 +246,12 @@ static void sdp_write_media(char *buff, int size, AVCodecContext *c, const char
}
av_strlcatf(buff, size, "m=%s %d RTP/AVP %d\r\n", type, port, payload_type);
- dest_write(buff, size, dest_addr, ttl);
+ sdp_write_address(buff, size, dest_addr, ttl);
if (c->bit_rate) {
av_strlcatf(buff, size, "b=AS:%d\r\n", c->bit_rate / 1000);
}
- sdp_media_attributes(buff, size, c, payload_type);
+ sdp_write_media_attributes(buff, size, c, payload_type);
}
int avf_sdp_create(AVFormatContext *ac[], int n_files, char *buff, int size)
@@ -269,7 +269,7 @@ int avf_sdp_create(AVFormatContext *ac[], int n_files, char *buff, int size)
port = 0;
ttl = 0;
if (n_files == 1) {
- port = get_address(dst, sizeof(dst), &ttl, ac[0]->filename);
+ port = sdp_get_address(dst, sizeof(dst), &ttl, ac[0]->filename);
if (port > 0) {
s.dst_addr = dst;
s.ttl = ttl;
@@ -280,7 +280,7 @@ int avf_sdp_create(AVFormatContext *ac[], int n_files, char *buff, int size)
dst[0] = 0;
for (i = 0; i < n_files; i++) {
if (n_files != 1) {
- port = get_address(dst, sizeof(dst), &ttl, ac[i]->filename);
+ port = sdp_get_address(dst, sizeof(dst), &ttl, ac[i]->filename);
}
for (j = 0; j < ac[i]->nb_streams; j++) {
sdp_write_media(buff, size,
diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index 27ff06b..1ec8c28 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c
@@ -40,7 +40,7 @@ typedef struct {
unsigned int sample_size;
int64_t pts;
int keyframe;
-} film_sample_t;
+} film_sample;
typedef struct FilmDemuxContext {
int video_stream_index;
@@ -53,7 +53,7 @@ typedef struct FilmDemuxContext {
enum CodecID video_type;
unsigned int sample_count;
- film_sample_t *sample_table;
+ film_sample *sample_table;
unsigned int current_sample;
unsigned int base_clock;
@@ -163,9 +163,9 @@ static int film_read_header(AVFormatContext *s,
return AVERROR_INVALIDDATA;
film->base_clock = AV_RB32(&scratch[8]);
film->sample_count = AV_RB32(&scratch[12]);
- if(film->sample_count >= UINT_MAX / sizeof(film_sample_t))
+ if(film->sample_count >= UINT_MAX / sizeof(film_sample))
return -1;
- film->sample_table = av_malloc(film->sample_count * sizeof(film_sample_t));
+ film->sample_table = av_malloc(film->sample_count * sizeof(film_sample));
for(i=0; i<s->nb_streams; i++)
av_set_pts_info(s->streams[i], 33, 1, film->base_clock);
@@ -205,7 +205,7 @@ static int film_read_packet(AVFormatContext *s,
{
FilmDemuxContext *film = s->priv_data;
ByteIOContext *pb = s->pb;
- film_sample_t *sample;
+ film_sample *sample;
int ret = 0;
int i;
int left, right;
diff --git a/libavformat/sierravmd.c b/libavformat/sierravmd.c
index 0a5f2fa..1e15a22 100644
--- a/libavformat/sierravmd.c
+++ b/libavformat/sierravmd.c
@@ -39,7 +39,7 @@ typedef struct {
int64_t pts;
int keyframe;
unsigned char frame_record[BYTES_PER_FRAME_RECORD];
-} vmd_frame_t;
+} vmd_frame;
typedef struct VmdDemuxContext {
int video_stream_index;
@@ -47,8 +47,9 @@ typedef struct VmdDemuxContext {
unsigned int frame_count;
unsigned int frames_per_block;
- vmd_frame_t *frame_table;
+ vmd_frame *frame_table;
unsigned int current_frame;
+ int is_indeo3;
int sample_rate;
int64_t audio_sample_counter;
@@ -91,6 +92,10 @@ static int vmd_read_header(AVFormatContext *s,
if (get_buffer(pb, vmd->vmd_header, VMD_HEADER_SIZE) != VMD_HEADER_SIZE)
return AVERROR(EIO);
+ if(vmd->vmd_header[16] == 'i' && vmd->vmd_header[17] == 'v' && vmd->vmd_header[18] == '3')
+ vmd->is_indeo3 = 1;
+ else
+ vmd->is_indeo3 = 0;
/* start up the decoders */
vst = av_new_stream(s, 0);
if (!vst)
@@ -98,10 +103,14 @@ static int vmd_read_header(AVFormatContext *s,
av_set_pts_info(vst, 33, 1, 10);
vmd->video_stream_index = vst->index;
vst->codec->codec_type = CODEC_TYPE_VIDEO;
- vst->codec->codec_id = CODEC_ID_VMDVIDEO;
+ vst->codec->codec_id = vmd->is_indeo3 ? CODEC_ID_INDEO3 : CODEC_ID_VMDVIDEO;
vst->codec->codec_tag = 0; /* no fourcc */
vst->codec->width = AV_RL16(&vmd->vmd_header[12]);
vst->codec->height = AV_RL16(&vmd->vmd_header[14]);
+ if(vmd->is_indeo3 && vst->codec->width > 320){
+ vst->codec->width >>= 1;
+ vst->codec->height >>= 1;
+ }
vst->codec->extradata_size = VMD_HEADER_SIZE;
vst->codec->extradata = av_mallocz(VMD_HEADER_SIZE + FF_INPUT_BUFFER_PADDING_SIZE);
memcpy(vst->codec->extradata, vmd->vmd_header, VMD_HEADER_SIZE);
@@ -146,12 +155,12 @@ static int vmd_read_header(AVFormatContext *s,
vmd->frame_table = NULL;
sound_buffers = AV_RL16(&vmd->vmd_header[808]);
raw_frame_table_size = vmd->frame_count * 6;
- if(vmd->frame_count * vmd->frames_per_block >= UINT_MAX / sizeof(vmd_frame_t)){
+ if(vmd->frame_count * vmd->frames_per_block >= UINT_MAX / sizeof(vmd_frame)){
av_log(s, AV_LOG_ERROR, "vmd->frame_count * vmd->frames_per_block too large\n");
return -1;
}
raw_frame_table = av_malloc(raw_frame_table_size);
- vmd->frame_table = av_malloc((vmd->frame_count * vmd->frames_per_block + sound_buffers) * sizeof(vmd_frame_t));
+ vmd->frame_table = av_malloc((vmd->frame_count * vmd->frames_per_block + sound_buffers) * sizeof(vmd_frame));
if (!raw_frame_table || !vmd->frame_table) {
av_free(raw_frame_table);
av_free(vmd->frame_table);
@@ -248,7 +257,7 @@ static int vmd_read_packet(AVFormatContext *s,
VmdDemuxContext *vmd = s->priv_data;
ByteIOContext *pb = s->pb;
int ret = 0;
- vmd_frame_t *frame;
+ vmd_frame *frame;
if (vmd->current_frame >= vmd->frame_count)
return AVERROR(EIO);
@@ -261,8 +270,11 @@ static int vmd_read_packet(AVFormatContext *s,
return AVERROR(ENOMEM);
pkt->pos= url_ftell(pb);
memcpy(pkt->data, frame->frame_record, BYTES_PER_FRAME_RECORD);
- ret = get_buffer(pb, pkt->data + BYTES_PER_FRAME_RECORD,
- frame->frame_size);
+ if(vmd->is_indeo3)
+ ret = get_buffer(pb, pkt->data, frame->frame_size);
+ else
+ ret = get_buffer(pb, pkt->data + BYTES_PER_FRAME_RECORD,
+ frame->frame_size);
if (ret != frame->frame_size) {
av_free_packet(pkt);
diff --git a/libavformat/udp.c b/libavformat/udp.c
index a8e8cd8..1101ffc 100644
--- a/libavformat/udp.c
+++ b/libavformat/udp.c
@@ -32,6 +32,7 @@
#ifdef HAVE_SYS_SELECT_H
#include <sys/select.h>
#endif
+#include <sys/time.h>
#ifndef IPV6_ADD_MEMBERSHIP
#define IPV6_ADD_MEMBERSHIP IPV6_JOIN_GROUP
@@ -336,7 +337,7 @@ int udp_get_file_handle(URLContext *h)
static int udp_open(URLContext *h, const char *uri, int flags)
{
char hostname[1024];
- int port, udp_fd = -1, tmp;
+ int port, udp_fd = -1, tmp, bind_ret = -1;
UDPContext *s = NULL;
int is_output;
const char *p;
@@ -404,7 +405,13 @@ static int udp_open(URLContext *h, const char *uri, int flags)
goto fail;
/* the bind is needed to give a port to the socket now */
- if (bind(udp_fd,(struct sockaddr *)&my_addr, len) < 0)
+ /* if multicast, try the multicast address bind first */
+ if (s->is_multicast && !(h->flags & URL_WRONLY)) {
+ bind_ret = bind(udp_fd,(struct sockaddr *)&s->dest_addr, len);
+ }
+ /* bind to the local address if not multicast or if the multicast
+ * bind failed */
+ if (bind_ret < 0 && bind(udp_fd,(struct sockaddr *)&my_addr, len) < 0)
goto fail;
len = sizeof(my_addr);
@@ -437,6 +444,8 @@ static int udp_open(URLContext *h, const char *uri, int flags)
if (setsockopt(udp_fd, SOL_SOCKET, SO_RCVBUF, &tmp, sizeof(tmp)) < 0) {
av_log(NULL, AV_LOG_WARNING, "setsockopt(SO_RECVBUF): %s\n", strerror(errno));
}
+ /* make the socket non-blocking */
+ ff_socket_nonblock(udp_fd, 1);
}
s->udp_fd = udp_fd;
@@ -468,7 +477,7 @@ static int udp_read(URLContext *h, uint8_t *buf, int size)
return AVERROR(EIO);
if (!(ret > 0 && FD_ISSET(s->udp_fd, &rfds)))
continue;
- len = recv(s->udp_fd, buf, size, MSG_DONTWAIT);
+ len = recv(s->udp_fd, buf, size, 0);
if (len < 0) {
if (ff_neterrno() != FF_NETERROR(EAGAIN) &&
ff_neterrno() != FF_NETERROR(EINTR))
diff --git a/libavformat/utils.c b/libavformat/utils.c
index 7a91b0f..9c3acfb 100644
--- a/libavformat/utils.c
+++ b/libavformat/utils.c
@@ -21,6 +21,7 @@
#include "avformat.h"
#include "internal.h"
#include "libavcodec/opt.h"
+#include "metadata.h"
#include "libavutil/avstring.h"
#include "riff.h"
#include <sys/time.h>
@@ -294,7 +295,7 @@ int av_get_packet(ByteIOContext *s, AVPacket *pkt, int size)
int av_dup_packet(AVPacket *pkt)
{
- if (pkt->destruct != av_destruct_packet) {
+ if (((pkt->destruct == av_destruct_packet_nofree) || (pkt->destruct == NULL)) && pkt->data) {
uint8_t *data;
/* We duplicate the packet and don't forget to add the padding again. */
if((unsigned)pkt->size > (unsigned)pkt->size + FF_INPUT_BUFFER_PADDING_SIZE)
@@ -484,6 +485,10 @@ int av_open_input_stream(AVFormatContext **ic_ptr,
if (pb && !ic->data_offset)
ic->data_offset = url_ftell(ic->pb);
+#if LIBAVFORMAT_VERSION_MAJOR < 53
+ ff_metadata_demux_compat(ic);
+#endif
+
*ic_ptr = ic;
return 0;
fail:
@@ -2284,6 +2289,7 @@ void av_close_input_stream(AVFormatContext *s)
if (st->parser) {
av_parser_close(st->parser);
}
+ av_metadata_free(&st->metadata);
av_free(st->index_entries);
av_free(st->codec->extradata);
av_free(st->codec);
@@ -2294,6 +2300,7 @@ void av_close_input_stream(AVFormatContext *s)
for(i=s->nb_programs-1; i>=0; i--) {
av_freep(&s->programs[i]->provider_name);
av_freep(&s->programs[i]->name);
+ av_metadata_free(&s->programs[i]->metadata);
av_freep(&s->programs[i]->stream_index);
av_freep(&s->programs[i]);
}
@@ -2302,9 +2309,11 @@ void av_close_input_stream(AVFormatContext *s)
av_freep(&s->priv_data);
while(s->nb_chapters--) {
av_free(s->chapters[s->nb_chapters]->title);
+ av_metadata_free(&s->chapters[s->nb_chapters]->metadata);
av_free(s->chapters[s->nb_chapters]);
}
av_freep(&s->chapters);
+ av_metadata_free(&s->metadata);
av_free(s);
}
@@ -2492,6 +2501,10 @@ int av_write_header(AVFormatContext *s)
return AVERROR(ENOMEM);
}
+#if LIBAVFORMAT_VERSION_MAJOR < 53
+ ff_metadata_mux_compat(s);
+#endif
+
if(s->oformat->write_header){
ret = s->oformat->write_header(s);
if (ret < 0)
@@ -2849,6 +2862,7 @@ void dump_format(AVFormatContext *ic,
dump_stream_format(ic, i, index, is_output);
}
+#if LIBAVFORMAT_VERSION_MAJOR < 53
int parse_image_size(int *width_ptr, int *height_ptr, const char *str)
{
return av_parse_video_frame_size(width_ptr, height_ptr, str);
@@ -2862,6 +2876,7 @@ int parse_frame_rate(int *frame_rate_num, int *frame_rate_den, const char *arg)
*frame_rate_den= frame_rate.den;
return ret;
}
+#endif
int64_t av_gettime(void)
{
diff --git a/libavformat/voc.h b/libavformat/voc.h
index 0e8aa61..7993146 100644
--- a/libavformat/voc.h
+++ b/libavformat/voc.h
@@ -27,7 +27,7 @@
typedef struct voc_dec_context {
int remaining_size;
-} voc_dec_context_t;
+} VocDecContext;
typedef enum voc_type {
VOC_TYPE_EOF = 0x00,
@@ -40,7 +40,7 @@ typedef enum voc_type {
VOC_TYPE_REPETITION_END = 0x07,
VOC_TYPE_EXTENDED = 0x08,
VOC_TYPE_NEW_VOICE_DATA = 0x09,
-} voc_type_t;
+} VocType;
extern const unsigned char ff_voc_magic[21];
extern const AVCodecTag ff_voc_codec_tags[];
diff --git a/libavformat/vocdec.c b/libavformat/vocdec.c
index 7aec2ab..8ad6909 100644
--- a/libavformat/vocdec.c
+++ b/libavformat/vocdec.c
@@ -38,7 +38,7 @@ static int voc_probe(AVProbeData *p)
static int voc_read_header(AVFormatContext *s, AVFormatParameters *ap)
{
- voc_dec_context_t *voc = s->priv_data;
+ VocDecContext *voc = s->priv_data;
ByteIOContext *pb = s->pb;
int header_size;
AVStream *st;
@@ -62,10 +62,10 @@ static int voc_read_header(AVFormatContext *s, AVFormatParameters *ap)
int
voc_get_packet(AVFormatContext *s, AVPacket *pkt, AVStream *st, int max_size)
{
- voc_dec_context_t *voc = s->priv_data;
+ VocDecContext *voc = s->priv_data;
AVCodecContext *dec = st->codec;
ByteIOContext *pb = s->pb;
- voc_type_t type;
+ VocType type;
int size;
int sample_rate = 0;
int channels = 1;
@@ -137,7 +137,7 @@ static int voc_read_packet(AVFormatContext *s, AVPacket *pkt)
AVInputFormat voc_demuxer = {
"voc",
NULL_IF_CONFIG_SMALL("Creative Voice file format"),
- sizeof(voc_dec_context_t),
+ sizeof(VocDecContext),
voc_probe,
voc_read_header,
voc_read_packet,
diff --git a/libavformat/vocenc.c b/libavformat/vocenc.c
index 4badb1d..744b233 100644
--- a/libavformat/vocenc.c
+++ b/libavformat/vocenc.c
@@ -24,7 +24,7 @@
typedef struct voc_enc_context {
int param_written;
-} voc_enc_context_t;
+} VocEncContext;
static int voc_write_header(AVFormatContext *s)
{
@@ -46,7 +46,7 @@ static int voc_write_header(AVFormatContext *s)
static int voc_write_packet(AVFormatContext *s, AVPacket *pkt)
{
- voc_enc_context_t *voc = s->priv_data;
+ VocEncContext *voc = s->priv_data;
AVCodecContext *enc = s->streams[0]->codec;
ByteIOContext *pb = s->pb;
@@ -93,7 +93,7 @@ AVOutputFormat voc_muxer = {
NULL_IF_CONFIG_SMALL("Creative Voice file format"),
"audio/x-voc",
"voc",
- sizeof(voc_enc_context_t),
+ sizeof(VocEncContext),
CODEC_ID_PCM_U8,
CODEC_ID_NONE,
voc_write_header,
diff --git a/libavutil/Makefile b/libavutil/Makefile
index bcbc9b3..be2f3e1 100644
--- a/libavutil/Makefile
+++ b/libavutil/Makefile
@@ -2,6 +2,23 @@ include $(SUBDIR)../config.mak
NAME = avutil
+HEADERS = adler32.h \
+ avstring.h \
+ avutil.h \
+ base64.h \
+ common.h \
+ crc.h \
+ fifo.h \
+ intfloat_readwrite.h \
+ log.h \
+ lzo.h \
+ mathematics.h \
+ md5.h \
+ mem.h \
+ random.h \
+ rational.h \
+ sha1.h
+
OBJS = adler32.o \
aes.o \
base64.o \
@@ -24,23 +41,6 @@ OBJS = adler32.o \
tree.o \
utils.o \
-HEADERS = adler32.h \
- avstring.h \
- avutil.h \
- base64.h \
- common.h \
- crc.h \
- fifo.h \
- intfloat_readwrite.h \
- log.h \
- lzo.h \
- mathematics.h \
- md5.h \
- mem.h \
- random.h \
- rational.h \
- sha1.h
-
TESTS = $(addsuffix -test$(EXESUF), adler32 aes crc des lls md5 pca random sha1 softfloat tree)
include $(SUBDIR)../subdir.mak
diff --git a/libavutil/avutil.h b/libavutil/avutil.h
index a21a8a4..7366718 100644
--- a/libavutil/avutil.h
+++ b/libavutil/avutil.h
@@ -121,6 +121,7 @@ enum PixelFormat {
PIX_FMT_YUV440P, ///< Planar YUV 4:4:0 (1 Cr & Cb sample per 1x2 Y samples)
PIX_FMT_YUVJ440P, ///< Planar YUV 4:4:0 full scale (jpeg)
PIX_FMT_YUVA420P, ///< Planar YUV 4:2:0, 20bpp, (1 Cr & Cb sample per 2x2 Y & A samples)
+ PIX_FMT_VDPAU_H264,///< H264 HW decoding with VDPAU, data[0] contains a vdpau_render_state struct which contains the bitstream of the slices as well as various fields extracted from headers
PIX_FMT_NB, ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
};
diff --git a/libavutil/bswap.h b/libavutil/bswap.h
index c14676e..100ed1c 100644
--- a/libavutil/bswap.h
+++ b/libavutil/bswap.h
@@ -30,7 +30,7 @@
#include "config.h"
#include "common.h"
-#if defined(ARCH_ARMV4L)
+#if defined(ARCH_ARM)
# include "arm/bswap.h"
#elif defined(ARCH_BFIN)
# include "bfin/bswap.h"
diff --git a/libavutil/common.h b/libavutil/common.h
index cd43abd..d66120f 100644
--- a/libavutil/common.h
+++ b/libavutil/common.h
@@ -41,8 +41,10 @@
# include <math.h>
#endif /* HAVE_AV_CONFIG_H */
+#define AV_GCC_VERSION_AT_LEAST(x,y) (defined(__GNUC__) && (__GNUC__ > x || __GNUC__ == x && __GNUC_MINOR__ >= y))
+
#ifndef av_always_inline
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
# define av_always_inline __attribute__((always_inline)) inline
#else
# define av_always_inline inline
@@ -50,7 +52,7 @@
#endif
#ifndef av_noinline
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
# define av_noinline __attribute__((noinline))
#else
# define av_noinline
@@ -58,7 +60,7 @@
#endif
#ifndef av_pure
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
# define av_pure __attribute__((pure))
#else
# define av_pure
@@ -66,7 +68,7 @@
#endif
#ifndef av_const
-#if defined(__GNUC__) && (__GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ > 5)
+#if AV_GCC_VERSION_AT_LEAST(2,6)
# define av_const __attribute__((const))
#else
# define av_const
@@ -74,7 +76,7 @@
#endif
#ifndef av_cold
-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 2)
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
# define av_cold __attribute__((cold))
#else
# define av_cold
@@ -86,7 +88,7 @@
#endif /* HAVE_AV_CONFIG_H */
#ifndef attribute_deprecated
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
# define attribute_deprecated __attribute__((deprecated))
#else
# define attribute_deprecated
@@ -321,7 +323,7 @@ static inline av_pure int ff_get_fourcc(const char *s){
}\
}
-#if defined(ARCH_X86) || defined(ARCH_POWERPC) || defined(ARCH_BFIN)
+#if defined(ARCH_X86) || defined(ARCH_PPC) || defined(ARCH_BFIN)
#define AV_READ_TIME read_time
#if defined(ARCH_X86)
static inline uint64_t read_time(void)
diff --git a/libavutil/internal.h b/libavutil/internal.h
index 5ade1af..251cc33 100644
--- a/libavutil/internal.h
+++ b/libavutil/internal.h
@@ -33,9 +33,10 @@
#include <stdint.h>
#include <stddef.h>
#include <assert.h>
+#include "common.h"
#ifndef attribute_align_arg
-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__>1)
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,2)
# define attribute_align_arg __attribute__((force_align_arg_pointer))
#else
# define attribute_align_arg
@@ -43,7 +44,7 @@
#endif
#ifndef attribute_used
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
# define attribute_used __attribute__((used))
#else
# define attribute_used
@@ -148,17 +149,14 @@ static inline av_const int FASTDIV(int a, int b)
: "=&r"(r), "=&r"(t) : "r"(a), "r"(b), "r"(ff_inverse));
return r;
}
-#elif defined(ARCH_ARMV4L)
-# define FASTDIV(a,b) \
- ({\
- int ret,dmy;\
- __asm__ volatile(\
- "umull %1, %0, %2, %3"\
- :"=&r"(ret),"=&r"(dmy)\
- :"r"(a),"r"(ff_inverse[b])\
- );\
- ret;\
- })
+#elif defined(ARCH_ARM)
+static inline av_const int FASTDIV(int a, int b)
+{
+ int r, t;
+ __asm__ volatile ("umull %1, %0, %2, %3"
+ : "=&r"(r), "=&r"(t) : "r"(a), "r"(ff_inverse[b]));
+ return r;
+}
#elif defined(CONFIG_FASTDIV)
# define FASTDIV(a,b) ((uint32_t)((((uint64_t)a)*ff_inverse[b])>>32))
#else
diff --git a/libavutil/mem.c b/libavutil/mem.c
index 960074c..328bef7 100644
--- a/libavutil/mem.c
+++ b/libavutil/mem.c
@@ -31,6 +31,7 @@
#undef free
#undef realloc
+#include <stdlib.h>
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
@@ -41,7 +42,7 @@
void *av_malloc(unsigned int size)
{
- void *ptr;
+ void *ptr = NULL;
#ifdef CONFIG_MEMALIGN_HACK
long diff;
#endif
@@ -57,6 +58,8 @@ void *av_malloc(unsigned int size)
diff= ((-(long)ptr - 1)&15) + 1;
ptr = (char*)ptr + diff;
((char*)ptr)[-1]= diff;
+#elif defined (HAVE_POSIX_MEMALIGN)
+ posix_memalign(&ptr,16,size);
#elif defined (HAVE_MEMALIGN)
ptr = memalign(16,size);
/* Why 64?
diff --git a/libavutil/mem.h b/libavutil/mem.h
index a02c7e1..3ea9858 100644
--- a/libavutil/mem.h
+++ b/libavutil/mem.h
@@ -26,6 +26,8 @@
#ifndef AVUTIL_MEM_H
#define AVUTIL_MEM_H
+#include "common.h"
+
#if defined(__ICC) || defined(__SUNPRO_C)
#define DECLARE_ALIGNED(n,t,v) t v __attribute__ ((aligned (n)))
#define DECLARE_ASM_CONST(n,t,v) const t __attribute__ ((aligned (n))) v
@@ -42,13 +44,13 @@
#define DECLARE_ASM_CONST(n,t,v) static const t v
#endif
-#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
+#if AV_GCC_VERSION_AT_LEAST(3,1)
#define av_malloc_attrib __attribute__((__malloc__))
#else
#define av_malloc_attrib
#endif
-#if defined(__GNUC__) && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ > 2)
+#if (!defined(__ICC) || __ICC > 1100) && AV_GCC_VERSION_AT_LEAST(4,3)
#define av_alloc_size(n) __attribute__((alloc_size(n)))
#else
#define av_alloc_size(n)
diff --git a/libavutil/pca.c b/libavutil/pca.c
index d21814c..76966f0 100644
--- a/libavutil/pca.c
+++ b/libavutil/pca.c
@@ -168,7 +168,7 @@ int ff_pca(PCA *pca, double *eigenvector, double *eigenvalue){
#include <stdio.h>
#include <stdlib.h>
-int main(){
+int main(void){
PCA *pca;
int i, j, k;
#define LEN 8
diff --git a/libavutil/tree.c b/libavutil/tree.c
index 64653aa..008beb6 100644
--- a/libavutil/tree.c
+++ b/libavutil/tree.c
@@ -128,9 +128,11 @@ void *av_tree_insert(AVTreeNode **tp, void *key, int (*cmp)(void *key, const voi
}
void av_tree_destroy(AVTreeNode *t){
- av_tree_destroy(t->child[0]);
- av_tree_destroy(t->child[1]);
- av_free(t);
+ if(t){
+ av_tree_destroy(t->child[0]);
+ av_tree_destroy(t->child[1]);
+ av_free(t);
+ }
}
#if 0
diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
index 56de4f1..b2f5bad 100644
--- a/libpostproc/postprocess.c
+++ b/libpostproc/postprocess.c
@@ -79,9 +79,6 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
//#undef HAVE_MMX2
//#define HAVE_3DNOW
//#undef HAVE_MMX
@@ -557,7 +554,7 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
//Plain C versions
-#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
+#if !(defined (HAVE_MMX) || defined (HAVE_ALTIVEC)) || defined (RUNTIME_CPUDETECT)
#define COMPILE_C
#endif
@@ -634,7 +631,7 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
// minor note: the HAVE_xyz is messed up after that line so do not use it.
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
- const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
+ const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
{
PPContext *c= (PPContext *)vc;
PPMode *ppMode= (PPMode *)vm;
@@ -730,7 +727,7 @@ const char pp_help[] =
"\n"
;
-pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
+pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
{
char temp[GET_MODE_BUFFER_SIZE];
char *p= temp;
@@ -905,7 +902,7 @@ pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
return ppMode;
}
-void pp_free_mode(pp_mode_t *mode){
+void pp_free_mode(pp_mode *mode){
av_free(mode);
}
@@ -947,7 +944,7 @@ static const char * context_to_name(void * ptr) {
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
-pp_context_t *pp_get_context(int width, int height, int cpuCaps){
+pp_context *pp_get_context(int width, int height, int cpuCaps){
PPContext *c= av_malloc(sizeof(PPContext));
int stride= (width+15)&(~15); //assumed / will realloc if needed
int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
@@ -995,7 +992,7 @@ void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
uint8_t * dst[3], const int dstStride[3],
int width, int height,
const QP_STORE_T *QP_store, int QPStride,
- pp_mode_t *vm, void *vc, int pict_type)
+ pp_mode *vm, void *vc, int pict_type)
{
int mbWidth = (width+15)>>4;
int mbHeight= (height+15)>>4;
diff --git a/libpostproc/postprocess.h b/libpostproc/postprocess.h
index 2a1d6d5..7e30da7 100644
--- a/libpostproc/postprocess.h
+++ b/libpostproc/postprocess.h
@@ -54,10 +54,12 @@ unsigned postproc_version(void);
#include <inttypes.h>
-typedef void pp_context_t;
-typedef void pp_mode_t;
+typedef void pp_context;
+typedef void pp_mode;
#if LIBPOSTPROC_VERSION_INT < (52<<16)
+typedef pp_context pp_context_t;
+typedef pp_mode pp_mode_t;
extern const char *const pp_help; ///< a simple help text
#else
extern const char pp_help[]; ///< a simple help text
@@ -67,19 +69,19 @@ void pp_postprocess(const uint8_t * src[3], const int srcStride[3],
uint8_t * dst[3], const int dstStride[3],
int horizontalSize, int verticalSize,
const QP_STORE_T *QP_store, int QP_stride,
- pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
+ pp_mode *mode, pp_context *ppContext, int pict_type);
/**
- * returns a pp_mode_t or NULL if an error occurred
+ * returns a pp_mode or NULL if an error occurred
* name is the string after "-pp" on the command line
* quality is a number from 0 to PP_QUALITY_MAX
*/
-pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality);
-void pp_free_mode(pp_mode_t *mode);
+pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality);
+void pp_free_mode(pp_mode *mode);
-pp_context_t *pp_get_context(int width, int height, int flags);
-void pp_free_context(pp_context_t *ppContext);
+pp_context *pp_get_context(int width, int height, int flags);
+void pp_free_context(pp_context *ppContext);
#define PP_CPU_CAPS_MMX 0x80000000
#define PP_CPU_CAPS_MMX2 0x20000000
diff --git a/libswscale/Makefile b/libswscale/Makefile
index 84675e0..a959661 100644
--- a/libswscale/Makefile
+++ b/libswscale/Makefile
@@ -3,6 +3,8 @@ include $(SUBDIR)../config.mak
NAME = swscale
FFLIBS = avutil
+HEADERS = swscale.h
+
OBJS = rgb2rgb.o swscale.o swscale_avoption.o
OBJS-$(ARCH_BFIN) += internal_bfin.o swscale_bfin.o yuv2rgb_bfin.o
@@ -11,8 +13,6 @@ OBJS-$(CONFIG_MLIB) += yuv2rgb_mlib.o
OBJS-$(HAVE_ALTIVEC) += yuv2rgb_altivec.o
OBJS-$(HAVE_VIS) += yuv2rgb_vis.o
-HEADERS = swscale.h
-
TESTS = cs_test swscale-example
CLEANFILES = cs_test swscale-example
diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
index b551412..ac452a0 100644
--- a/libswscale/rgb2rgb.c
+++ b/libswscale/rgb2rgb.c
@@ -47,12 +47,10 @@ void (*rgb15to32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb16to32)(const uint8_t *src, uint8_t *dst, long src_size);
-//void (*rgb24tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb24to15)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
-//void (*rgb32tobgr24)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
@@ -124,21 +122,6 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
-
-#if 0
-static volatile uint64_t __attribute__((aligned(8))) b5Dither;
-static volatile uint64_t __attribute__((aligned(8))) g5Dither;
-static volatile uint64_t __attribute__((aligned(8))) g6Dither;
-static volatile uint64_t __attribute__((aligned(8))) r5Dither;
-
-static uint64_t __attribute__((aligned(8))) dither4[2]={
- 0x0103010301030103LL,
- 0x0200020002000200LL,};
-
-static uint64_t __attribute__((aligned(8))) dither8[2]={
- 0x0602060206020602LL,
- 0x0004000400040004LL,};
-#endif
#endif /* defined(ARCH_X86) */
#define RGB2YUV_SHIFT 8
diff --git a/libswscale/rgb2rgb.h b/libswscale/rgb2rgb.h
index 5cc6a2d..df912c8 100644
--- a/libswscale/rgb2rgb.h
+++ b/libswscale/rgb2rgb.h
@@ -48,25 +48,25 @@ extern void (*rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr16)(const uint8_t *src, uint8_t *dst, long src_size);
extern void (*rgb32tobgr15)(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
-extern void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
-extern void bgr8torgb8 (const uint8_t *src, uint8_t *dst, long src_size);
-
-
-extern void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-extern void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-extern void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-extern void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-extern void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
-extern void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void rgb24to32 (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb32to24 (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16to24 (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb16tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr32(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15to24 (const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr16(const uint8_t *src, uint8_t *dst, long src_size);
+void rgb15tobgr15(const uint8_t *src, uint8_t *dst, long src_size);
+void bgr8torgb8 (const uint8_t *src, uint8_t *dst, long src_size);
+
+
+void palette8topacked32(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8topacked24(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8torgb16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8tobgr16(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8torgb15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
+void palette8tobgr15(const uint8_t *src, uint8_t *dst, long num_pixels, const uint8_t *palette);
/**
* Height should be a multiple of 2 and width should be a multiple of 16.
diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
index ab82035..ce76c5e 100644
--- a/libswscale/rgb2rgb_template.c
+++ b/libswscale/rgb2rgb_template.c
@@ -2016,14 +2016,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
{
#ifdef HAVE_MMX
__asm__ volatile(
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
ASMALIGN(4)
"1: \n\t"
- PREFETCH" 64(%0, %%eax, 4) \n\t"
- "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
- "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4)
+ PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
+ "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
+ "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
"movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
"pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
@@ -2033,10 +2033,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
- MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
+ MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t"
- "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8)
- "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12)
+ "movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8)
+ "movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
"movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
@@ -2046,7 +2046,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
- MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
+ MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t"
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
@@ -2057,28 +2057,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
- MOVNTQ" %%mm0, (%3, %%eax) \n\t"
- MOVNTQ" %%mm2, (%2, %%eax) \n\t"
+ MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t"
+ MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t"
- "addl $8, %%eax \n\t"
- "cmpl %4, %%eax \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
- : "memory", "%eax"
+ : "memory", "%"REG_a
);
ydst += lumStride;
src += srcStride;
__asm__ volatile(
- "xorl %%eax, %%eax \n\t"
+ "xor %%"REG_a", %%"REG_a" \n\t"
ASMALIGN(4)
"1: \n\t"
- PREFETCH" 64(%0, %%eax, 4) \n\t"
- "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
- "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
- "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8)
- "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12)
+ PREFETCH" 64(%0, %%"REG_a", 4) \n\t"
+ "movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
+ "movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
+ "movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
+ "movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
@@ -2086,15 +2086,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
- MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
- MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
+ MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t"
+ MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t"
- "addl $8, %%eax \n\t"
- "cmpl %4, %%eax \n\t"
+ "add $8, %%"REG_a" \n\t"
+ "cmp %4, %%"REG_a" \n\t"
" jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
- : "memory", "%eax"
+ : "memory", "%"REG_a
);
#else
long i;
diff --git a/libswscale/swscale.c b/libswscale/swscale.c
index 577d093..cfa3580 100644
--- a/libswscale/swscale.c
+++ b/libswscale/swscale.c
@@ -452,6 +452,8 @@ const char *sws_format_name(enum PixelFormat format)
return "nv21";
case PIX_FMT_YUV440P:
return "yuv440p";
+ case PIX_FMT_VDPAU_H264:
+ return "vdpau_h264";
default:
return "Unknown format";
}
@@ -949,11 +951,11 @@ static inline void yuv2rgbXinC_full(SwsContext *c, int16_t *lumFilter, int16_t *
#define COMPILE_C
#endif
-#ifdef ARCH_POWERPC
+#ifdef ARCH_PPC
#if (defined (HAVE_ALTIVEC) || defined (RUNTIME_CPUDETECT)) && defined (CONFIG_GPL)
#define COMPILE_ALTIVEC
#endif //HAVE_ALTIVEC
-#endif //ARCH_POWERPC
+#endif //ARCH_PPC
#if defined(ARCH_X86)
@@ -1628,7 +1630,7 @@ static SwsFunc getSwsFunc(int flags){
return swScale_C;
#else
-#ifdef ARCH_POWERPC
+#ifdef ARCH_PPC
if (flags & SWS_CPU_CAPS_ALTIVEC)
return swScale_altivec;
else
diff --git a/libswscale/swscale_bfin.c b/libswscale/swscale_bfin.c
index 3e63bbd..ed7d957 100644
--- a/libswscale/swscale_bfin.c
+++ b/libswscale/swscale_bfin.c
@@ -26,9 +26,6 @@
#include <inttypes.h>
#include <assert.h>
#include "config.h"
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
#include <unistd.h>
#include "rgb2rgb.h"
#include "swscale.h"
@@ -40,13 +37,13 @@
#define L1CODE
#endif
-extern int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- long width, long height,
- long lumStride, long chromStride, long srcStride) L1CODE;
+int ff_bfin_uyvytoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ long width, long height,
+ long lumStride, long chromStride, long srcStride) L1CODE;
-extern int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
- long width, long height,
- long lumStride, long chromStride, long srcStride) L1CODE;
+int ff_bfin_yuyvtoyv12 (const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ long width, long height,
+ long lumStride, long chromStride, long srcStride) L1CODE;
static int uyvytoyv12_unscaled (SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
int srcSliceH, uint8_t* dst[], int dstStride[])
diff --git a/libswscale/swscale_template.c b/libswscale/swscale_template.c
index 5754c10..4c8bc6e 100644
--- a/libswscale/swscale_template.c
+++ b/libswscale/swscale_template.c
@@ -1826,11 +1826,12 @@ static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, long width, uint
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width, uint32_t *unused)
{
- int i;
- assert(src1==src2);
#ifdef HAVE_MMX
+ assert(src1==src2);
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
#else
+ int i;
+ assert(src1==src2);
for (i=0; i<width; i++)
{
int r= src1[3*i + 0];
diff --git a/libswscale/yuv2rgb_altivec.c b/libswscale/yuv2rgb_altivec.c
index 0223fdd..baffbc8 100644
--- a/libswscale/yuv2rgb_altivec.c
+++ b/libswscale/yuv2rgb_altivec.c
@@ -91,9 +91,6 @@ adjustment.
#include <inttypes.h>
#include <assert.h>
#include "config.h"
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
#include "rgb2rgb.h"
#include "swscale.h"
#include "swscale_internal.h"
diff --git a/libswscale/yuv2rgb_bfin.c b/libswscale/yuv2rgb_bfin.c
index 1500a96..58cc5b6 100644
--- a/libswscale/yuv2rgb_bfin.c
+++ b/libswscale/yuv2rgb_bfin.c
@@ -27,9 +27,6 @@
#include <inttypes.h>
#include <assert.h>
#include "config.h"
-#ifdef HAVE_MALLOC_H
-#include <malloc.h>
-#endif
#include <unistd.h>
#include "rgb2rgb.h"
#include "swscale.h"
@@ -41,17 +38,17 @@
#define L1CODE
#endif
-extern void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
- int w, uint32_t *coeffs) L1CODE;
+void ff_bfin_yuv2rgb555_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+ int w, uint32_t *coeffs) L1CODE;
-extern void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
- int w, uint32_t *coeffs) L1CODE;
+void ff_bfin_yuv2rgb565_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+ int w, uint32_t *coeffs) L1CODE;
-extern void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
- int w, uint32_t *coeffs) L1CODE;
+void ff_bfin_yuv2rgb24_line (uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+ int w, uint32_t *coeffs) L1CODE;
-typedef void (* ltransform_t)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
- int w, uint32_t *coeffs);
+typedef void (* ltransform)(uint8_t *Y, uint8_t *U, uint8_t *V, uint8_t *out,
+ int w, uint32_t *coeffs);
static void bfin_prepare_coefficients (SwsContext *c, int rgb, int masks)
@@ -95,7 +92,7 @@ static int core_yuv420_rgb (SwsContext *c,
uint8_t **in, int *instrides,
int srcSliceY, int srcSliceH,
uint8_t **oplanes, int *outstrides,
- ltransform_t lcscf, int rgb, int masks)
+ ltransform lcscf, int rgb, int masks)
{
uint8_t *py,*pu,*pv,*op;
int w = instrides[0];
diff --git a/subdir.mak b/subdir.mak
index 7c38b77..df5bb8a 100644
--- a/subdir.mak
+++ b/subdir.mak
@@ -29,7 +29,7 @@ $(SUBDIR)$(SLIBNAME): $(SUBDIR)$(SLIBNAME_WITH_MAJOR)
$(SUBDIR)$(SLIBNAME_WITH_MAJOR): $(OBJS)
$(SLIB_CREATE_DEF_CMD)
- $(CC) $(SHFLAGS) $(FFLDFLAGS) -o $$@ $$^ $(FFEXTRALIBS) $(EXTRAOBJS)
+ $(CC) $(SHFLAGS) $(FFLDFLAGS) -o $$@ $$(filter-out $(DEP_LIBS),$$^) $(FFEXTRALIBS) $(EXTRAOBJS)
$(SLIB_EXTRA_CMD)
ifdef SUBDIR
diff --git a/tests/regression.sh b/tests/regression.sh
index 24b42da..529a703 100755
--- a/tests/regression.sh
+++ b/tests/regression.sh
@@ -7,7 +7,11 @@
set -e
+target_exec=$5
+target_path=$6
+
datadir="./tests/data"
+target_datadir="${target_path}/${datadir}"
test="${1#regtest-}"
this="$test.$2"
@@ -17,21 +21,24 @@ outfile="$datadir/$4-"
eval do_$test=y
# various files
-ffmpeg="./ffmpeg_g"
+ffmpeg="$target_exec ${target_path}/ffmpeg_g"
tiny_psnr="tests/tiny_psnr"
benchfile="$datadir/$this.bench"
bench="$datadir/$this.bench.tmp"
bench2="$datadir/$this.bench2.tmp"
-raw_src="$3/%02d.pgm"
+raw_src="${target_path}/$3/%02d.pgm"
raw_dst="$datadir/$this.out.yuv"
raw_ref="$datadir/$2.ref.yuv"
-pcm_src="tests/asynth1.sw"
+pcm_src="${target_path}/tests/asynth1.sw"
pcm_dst="$datadir/$this.out.wav"
pcm_ref="$datadir/$2.ref.wav"
crcfile="$datadir/$this.crc"
+target_crcfile="$target_datadir/$this.crc"
if [ X"`echo | md5sum 2> /dev/null`" != X ]; then
do_md5sum() { md5sum -b $1; }
+elif [ X"`echo | md5 2> /dev/null`" != X ]; then
+ do_md5sum() { md5 -r $1 | sed 's# \**\./# *./#'; }
elif [ -x /sbin/md5 ]; then
do_md5sum() { /sbin/md5 -r $1 | sed 's# \**\./# *./#'; }
else
@@ -47,6 +54,7 @@ do_ffmpeg()
{
f="$1"
shift
+ set -- $* ${target_path}/$f
echo $ffmpeg $FFMPEG_OPTS $*
$ffmpeg $FFMPEG_OPTS -benchmark $* > $bench 2> /tmp/ffmpeg$$
egrep -v "^(Stream|Press|Input|Output|frame| Stream| Duration|video:)" /tmp/ffmpeg$$ || true
@@ -67,6 +75,7 @@ do_ffmpeg_nomd5()
{
f="$1"
shift
+ set -- $* ${target_path}/$f
echo $ffmpeg $FFMPEG_OPTS $*
$ffmpeg $FFMPEG_OPTS -benchmark $* > $bench 2> /tmp/ffmpeg$$
egrep -v "^(Stream|Press|Input|Output|frame| Stream| Duration|video:)" /tmp/ffmpeg$$ || true
@@ -86,8 +95,8 @@ do_ffmpeg_crc()
{
f="$1"
shift
- echo $ffmpeg $FFMPEG_OPTS $* -f crc "$crcfile"
- $ffmpeg $FFMPEG_OPTS $* -f crc "$crcfile" > /tmp/ffmpeg$$ 2>&1
+ echo $ffmpeg $FFMPEG_OPTS $* -f crc "$target_crcfile"
+ $ffmpeg $FFMPEG_OPTS $* -f crc "$target_crcfile" > /tmp/ffmpeg$$ 2>&1
egrep -v "^(Stream|Press|Input|Output|frame| Stream| Duration|video:|ffmpeg version| configuration| built)" /tmp/ffmpeg$$ || true
rm -f /tmp/ffmpeg$$
echo "$f `cat $crcfile`" >> $logfile
@@ -108,55 +117,55 @@ do_ffmpeg_nocheck()
do_video_decoding()
{
- do_ffmpeg $raw_dst $1 -i $file -f rawvideo $2 $raw_dst
+ do_ffmpeg $raw_dst $1 -i $target_path/$file -f rawvideo $2
rm -f $raw_dst
}
do_video_encoding()
{
file=${outfile}$1
- do_ffmpeg $file $2 -f image2 -vcodec pgmyuv -i $raw_src $3 $file
+ do_ffmpeg $file $2 -f image2 -vcodec pgmyuv -i $raw_src $3
}
do_audio_encoding()
{
file=${outfile}$1
- do_ffmpeg $file -ab 128k -ac 2 -f s16le -i $pcm_src $3 $file
+ do_ffmpeg $file -ab 128k -ac 2 -f s16le -i $pcm_src $3
}
do_audio_decoding()
{
- do_ffmpeg $pcm_dst -i $file -sample_fmt s16 -f wav $pcm_dst
+ do_ffmpeg $pcm_dst -i $target_path/$file -sample_fmt s16 -f wav
}
do_libav()
{
file=${outfile}libav.$1
- do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f s16le -i $pcm_src $2 $file
- do_ffmpeg_crc $file -i $file $3
+ do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f s16le -i $pcm_src $2
+ do_ffmpeg_crc $file -i $target_path/$file $3
}
do_streamed_images()
{
file=${outfile}${1}pipe.$1
- do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f image2pipe $file
- do_ffmpeg_crc $file -f image2pipe -i $file
+ do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f image2pipe
+ do_ffmpeg_crc $file -f image2pipe -i $target_path/$file
}
do_image_formats()
{
file=${outfile}libav%02d.$1
- $ffmpeg -t 0.5 -y -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src $2 $3 -flags +bitexact -sws_flags +accurate_rnd+bitexact $file
+ $ffmpeg -t 0.5 -y -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src $2 $3 -flags +bitexact -sws_flags +accurate_rnd+bitexact $target_path/$file
do_md5sum ${outfile}libav02.$1 >> $logfile
- do_ffmpeg_crc $file $3 -i $file
+ do_ffmpeg_crc $file $3 -i $target_path/$file
wc -c ${outfile}libav02.$1 >> $logfile
}
do_audio_only()
{
file=${outfile}libav.$1
- do_ffmpeg $file -t 1 -qscale 10 -f s16le -i $pcm_src $file
- do_ffmpeg_crc $file -i $file
+ do_ffmpeg $file -t 1 -qscale 10 -f s16le -i $pcm_src
+ do_ffmpeg_crc $file -i $target_path/$file
}
rm -f "$logfile"
@@ -164,8 +173,8 @@ rm -f "$benchfile"
# generate reference for quality check
if [ -n "$do_ref" ]; then
-do_ffmpeg_nocheck $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo $raw_ref
-do_ffmpeg_nocheck $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav $pcm_ref
+do_ffmpeg_nocheck $raw_ref -f image2 -vcodec pgmyuv -i $raw_src -an -f rawvideo $target_path/$raw_ref
+do_ffmpeg_nocheck $pcm_ref -ab 128k -ac 2 -ar 44100 -f s16le -i $pcm_src -f wav $target_path/$pcm_ref
fi
if [ -n "$do_mpeg" ] ; then
@@ -207,7 +216,7 @@ do_video_decoding
# mpeg2 encoding interlaced
file=${outfile}mpeg2reuse.mpg
-do_ffmpeg $file -sameq -me_threshold 256 -mb_threshold 1024 -i ${outfile}mpeg2thread.mpg -vcodec mpeg2video -f mpeg1video -bf 2 -flags +ildct+ilme -threads 4 $file
+do_ffmpeg $file -sameq -me_threshold 256 -mb_threshold 1024 -i ${target_path}/${outfile}mpeg2thread.mpg -vcodec mpeg2video -f mpeg1video -bf 2 -flags +ildct+ilme -threads 4
do_video_decoding
fi
@@ -424,11 +433,11 @@ fi
if [ -n "$do_wma" ] ; then
# wmav1
do_audio_encoding wmav1.asf "-ar 44100" "-acodec wmav1"
-do_ffmpeg_nomd5 $pcm_dst -i $file -f wav $pcm_dst
+do_ffmpeg_nomd5 $pcm_dst -i $target_path/$file -f wav
$tiny_psnr $pcm_dst $pcm_ref 2 8192 >> $logfile
# wmav2
do_audio_encoding wmav2.asf "-ar 44100" "-acodec wmav2"
-do_ffmpeg_nomd5 $pcm_dst -i $file -f wav $pcm_dst
+do_ffmpeg_nomd5 $pcm_dst -i $target_path/$file -f wav
$tiny_psnr $pcm_dst $pcm_ref 2 8192 >> $logfile
fi
@@ -481,9 +490,9 @@ fi
if [ -n "$do_rm" ] ; then
file=${outfile}libav.rm
-do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f s16le -i $pcm_src $file
+do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -f s16le -i $pcm_src
# broken
-#do_ffmpeg_crc $file -i $file
+#do_ffmpeg_crc $file -i $target_path/$file
fi
if [ -n "$do_mpg" ] ; then
@@ -530,8 +539,8 @@ fi
# streamed images
# mjpeg
#file=${outfile}libav.mjpeg
-#do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src $file
-#do_ffmpeg_crc $file -i $file
+#do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src
+#do_ffmpeg_crc $file -i $target_path/$file
if [ -n "$do_pbmpipe" ] ; then
do_streamed_images pbm
@@ -547,14 +556,14 @@ fi
if [ -n "$do_gif" ] ; then
file=${outfile}libav.gif
-do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -pix_fmt rgb24 $file
-#do_ffmpeg_crc $file -i $file
+do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src -pix_fmt rgb24
+#do_ffmpeg_crc $file -i $target_path/$file
fi
if [ -n "$do_yuv4mpeg" ] ; then
file=${outfile}libav.y4m
-do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src $file
-#do_ffmpeg_crc $file -i $file
+do_ffmpeg $file -t 1 -qscale 10 -f image2 -vcodec pgmyuv -i $raw_src
+#do_ffmpeg_crc $file -i $target_path/$file
fi
# image formats
@@ -630,9 +639,9 @@ conversions="yuv420p yuv422p yuv444p yuyv422 yuv410p yuv411p yuvj420p \
for pix_fmt in $conversions ; do
file=${outfile}libav-${pix_fmt}.yuv
do_ffmpeg_nocheck $file -r 1 -t 1 -f image2 -vcodec pgmyuv -i $raw_src \
- -f rawvideo -s 352x288 -pix_fmt $pix_fmt $raw_dst
- do_ffmpeg $file -f rawvideo -s 352x288 -pix_fmt $pix_fmt -i $raw_dst \
- -f rawvideo -s 352x288 -pix_fmt yuv444p $file
+ -f rawvideo -s 352x288 -pix_fmt $pix_fmt $target_path/$raw_dst
+ do_ffmpeg $file -f rawvideo -s 352x288 -pix_fmt $pix_fmt -i $target_path/$raw_dst \
+ -f rawvideo -s 352x288 -pix_fmt yuv444p
done
fi
diff --git a/tests/seek.regression.ref b/tests/seek.regression.ref
index a38f593..90181f8 100644
--- a/tests/seek.regression.ref
+++ b/tests/seek.regression.ref
@@ -1532,99 +1532,99 @@ ret: 0 st: 0 dts:0.880000 pts:-102481911520608.625000 pos:79182 size:10791 flags
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-Q.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:142978 size:15562 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:142978 size:15562 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:64442 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:64442 size:13382 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:101236 size:15057 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:101236 size:15057 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:32850 size:11813 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:32850 size:11813 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:64442 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:64442 size:13382 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:142978 size:15562 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:142978 size:15562 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:142978 size:15562 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:142978 size:15562 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:32850 size:11813 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:32850 size:11813 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:142978 size:15562 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:142978 size:15562 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:101236 size:15057 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:101236 size:15057 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:11942 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:11942 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:142978 size:15562 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:142978 size:15562 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:64442 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:64442 size:13382 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:101236 size:15057 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:101236 size:15057 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:64442 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:64442 size:13382 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-adap.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:175668 size:16884 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:175668 size:16884 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:99006 size:16933 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:99006 size:16933 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:136592 size:17435 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:136592 size:17435 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:59872 size:17261 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:59872 size:17261 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:99006 size:16933 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:99006 size:16933 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:175668 size:16884 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:175668 size:16884 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:175668 size:16884 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:175668 size:16884 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:59872 size:17261 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:59872 size:17261 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:175668 size:16884 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:175668 size:16884 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:136592 size:17435 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:136592 size:17435 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:6855 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:6855 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:175668 size:16884 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:175668 size:16884 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:99006 size:16933 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:99006 size:16933 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:136592 size:17435 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:136592 size:17435 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:99006 size:16933 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:99006 size:16933 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-adv.avi
@@ -1724,147 +1724,147 @@ ret: 0 st: 0 dts:0.480000 pts:0.480000 pos:35312 size:9987 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-qprd.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:212738 size:14347 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:212738 size:14347 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:152640 size:14348 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:152640 size:14348 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182580 size:14281 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182580 size:14281 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:94070 size:29366 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:94070 size:29366 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:152640 size:14348 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:152640 size:14348 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:212738 size:14347 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:212738 size:14347 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:212738 size:14347 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:212738 size:14347 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:94070 size:29366 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:94070 size:29366 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:212738 size:14347 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:212738 size:14347 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182580 size:14281 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182580 size:14281 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14873 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14873 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:212738 size:14347 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:212738 size:14347 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:152640 size:14348 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:152640 size:14348 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182580 size:14281 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182580 size:14281 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:152640 size:14348 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:152640 size:14348 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-rc.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:209236 size:13826 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:209236 size:13826 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:154792 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:154792 size:13382 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182316 size:13326 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182316 size:13326 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:95408 size:32807 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:95408 size:32807 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:154792 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:154792 size:13382 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:209236 size:13826 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:209236 size:13826 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:209236 size:13826 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:209236 size:13826 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:95408 size:32807 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:95408 size:32807 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:209236 size:13826 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:209236 size:13826 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182316 size:13326 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182316 size:13326 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:15766 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:15766 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:209236 size:13826 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:209236 size:13826 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:154792 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:154792 size:13382 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:182316 size:13326 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:182316 size:13326 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:154792 size:13382 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:154792 size:13382 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-mpeg4-thread.avi
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st:-1 ts:-1.000000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st:-1 ts:1.894167 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:229568 size:14638 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:229568 size:14638 flags:1
ret: 0 st: 0 ts:0.800000 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:163772 size:16380 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:163772 size:16380 flags:1
ret:-1 st: 0 ts:-0.320000 flags:1
ret:-1 st:-1 ts:2.576668 flags:0
ret: 0 st:-1 ts:1.470835 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:196664 size:16051 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:196664 size:16051 flags:1
ret: 0 st: 0 ts:0.360000 flags:0
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:98760 size:33020 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:98760 size:33020 flags:1
ret:-1 st: 0 ts:-0.760000 flags:1
ret:-1 st:-1 ts:2.153336 flags:0
ret: 0 st:-1 ts:1.047503 flags:1
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:163772 size:16380 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:163772 size:16380 flags:1
ret: 0 st: 0 ts:-0.040000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st: 0 ts:2.840000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:229568 size:14638 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:229568 size:14638 flags:1
ret: 0 st:-1 ts:1.730004 flags:0
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:229568 size:14638 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:229568 size:14638 flags:1
ret: 0 st:-1 ts:0.624171 flags:1
-ret: 0 st: 0 dts:0.400000 pts:0.400000 pos:98760 size:33020 flags:1
+ret: 0 st: 0 dts:0.400000 pts:-368934881474191040.000000 pos:98760 size:33020 flags:1
ret: 0 st: 0 ts:-0.480000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st: 0 ts:2.400000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:229568 size:14638 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:229568 size:14638 flags:1
ret: 0 st:-1 ts:1.306672 flags:0
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:196664 size:16051 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:196664 size:16051 flags:1
ret: 0 st:-1 ts:0.200839 flags:1
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st: 0 ts:-0.920000 flags:0
-ret: 0 st: 0 dts:0.000000 pts:0.000000 pos:5660 size:14874 flags:1
+ret: 0 st: 0 dts:0.000000 pts:-368934881474191040.000000 pos:5660 size:14874 flags:1
ret: 0 st: 0 ts:2.000000 flags:1
-ret: 0 st: 0 dts:1.840000 pts:1.840000 pos:229568 size:14638 flags:1
+ret: 0 st: 0 dts:1.840000 pts:-368934881474191040.000000 pos:229568 size:14638 flags:1
ret: 0 st:-1 ts:0.883340 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:163772 size:16380 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:163772 size:16380 flags:1
ret:-1 st:-1 ts:-0.222493 flags:1
ret:-1 st: 0 ts:2.680000 flags:0
ret: 0 st: 0 ts:1.560000 flags:1
-ret: 0 st: 0 dts:1.360000 pts:1.360000 pos:196664 size:16051 flags:1
+ret: 0 st: 0 dts:1.360000 pts:-368934881474191040.000000 pos:196664 size:16051 flags:1
ret: 0 st:-1 ts:0.460008 flags:0
-ret: 0 st: 0 dts:0.880000 pts:0.880000 pos:163772 size:16380 flags:1
+ret: 0 st: 0 dts:0.880000 pts:-368934881474191040.000000 pos:163772 size:16380 flags:1
ret:-1 st:-1 ts:-0.645825 flags:1
----------------
tests/data/a-msmpeg4.avi
diff --git a/tests/seek_test.sh b/tests/seek_test.sh
index 996acb3..3074d70 100755
--- a/tests/seek_test.sh
+++ b/tests/seek_test.sh
@@ -3,6 +3,9 @@
LC_ALL=C
export LC_ALL
+target_exec=$2
+target_path=$3
+
datadir="tests/data"
logfile="$datadir/seek.regression"
@@ -13,7 +16,7 @@ rm -f $logfile
for i in $list ; do
echo ---------------- >> $logfile
echo $i >> $logfile
- tests/seek_test $i >> $logfile
+ $target_exec $target_path/tests/seek_test $target_path/$i >> $logfile
done
if diff -u -w "$reffile" "$logfile" ; then
--
FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list