[SCM] libav/experimental: (commit by michael) mmx & mmx2 quantizer c dct permutation bugfix dont copy input on intra only encodings if it can be avoided dont draw edges on intra only stuff

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sun Jun 30 15:30:54 UTC 2013


The following commit has been merged in the experimental branch:
commit 2f349de2861fdbc957f12c925ce5146c045ba834
Author: Michael Niedermayer <michaelni at gmx.at>
Date:   Sun Jan 27 13:30:18 2002 +0000

    (commit by michael)
    mmx & mmx2 quantizer
    c dct permutation bugfix
    dont copy input on intra only encodings if it can be avoided
    dont draw edges on intra only stuff
    
    Originally committed as revision 281 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index 1d003cf..701cb99 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -49,6 +49,12 @@ UINT8 zigzag_direct[64] = {
     53, 60, 61, 54, 47, 55, 62, 63
 };
 
+/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
+UINT16 __align8 inv_zigzag_direct16[64];
+
+/* not permutated zigzag_direct for MMX quantizer */
+UINT8 zigzag_direct_noperm[64];
+
 UINT8 ff_alternate_horizontal_scan[64] = {
     0,  1,  2,  3,  8,  9, 16, 17, 
     10, 11,  4,  5,  6,  7, 15, 14,
@@ -83,6 +89,42 @@ static UINT8 simple_mmx_permutation[64]={
 	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 };
 
+/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
+UINT32 inverse[256]={
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
+  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
+};
+
 /* used to skip zeros at the end */
 UINT8 zigzag_end[64];
 
@@ -515,6 +557,9 @@ void dsputil_init(void)
     else
         for(i=0; i<64; i++) permutation[i]=i;
 
+    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
+    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
+    
     if (use_permuted_idct) {
         /* permute for IDCT */
         for(i=0;i<64;i++) {
diff --git a/libavcodec/i386/mpegvideo_mmx.c b/libavcodec/i386/mpegvideo_mmx.c
index 367fa72..3ca40ca 100644
--- a/libavcodec/i386/mpegvideo_mmx.c
+++ b/libavcodec/i386/mpegvideo_mmx.c
@@ -22,9 +22,16 @@
 
 #include "../dsputil.h"
 #include "../mpegvideo.h"
+#include "../avcodec.h"
+#include "../mangle.h"
 
 extern UINT8 zigzag_end[64];
 extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
+extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale);
+
+extern UINT8 zigzag_direct_noperm[64];
+extern UINT16 inv_zigzag_direct16[64];
+extern UINT32 inverse[256];
 
 #if 0
 
@@ -252,7 +259,7 @@ static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
         }
     } else {
         i = 0;
-    unquant_even:
+//    unquant_even:
         quant_matrix = s->non_intra_matrix;
 	/* Align on 4 elements boundary */
 	while(i&7)
@@ -411,6 +418,20 @@ static void draw_edges_mmx(UINT8 *buf, int wrap, int width, int height, int w)
     }
 }
 
+static volatile int esp_temp;
+
+void unused_var_warning_killer(){
+	esp_temp++;
+}
+
+#undef HAVE_MMX2
+#define RENAME(a) a ## _MMX
+#include "mpegvideo_mmx_template.c"
+
+#define HAVE_MMX2
+#undef RENAME
+#define RENAME(a) a ## _MMX2
+#include "mpegvideo_mmx_template.c"
 
 void MPV_common_init_mmx(MpegEncContext *s)
 {
@@ -421,5 +442,11 @@ void MPV_common_init_mmx(MpegEncContext *s)
         	s->dct_unquantize = dct_unquantize_mpeg1_mmx;
 	
 	draw_edges = draw_edges_mmx;
+
+	if(mm_flags & MM_MMXEXT){
+	        dct_quantize= dct_quantize_MMX2;
+	}else{
+		dct_quantize= dct_quantize_MMX;
+	}
     }
 }
diff --git a/libavcodec/i386/mpegvideo_mmx_template.c b/libavcodec/i386/mpegvideo_mmx_template.c
new file mode 100644
index 0000000..71df065
--- /dev/null
+++ b/libavcodec/i386/mpegvideo_mmx_template.c
@@ -0,0 +1,201 @@
+/*
+    Copyright (C) 2002 Michael Niedermayer <michaelni at gmx.at>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#undef SPREADW
+#undef PMAXW
+#ifdef HAVE_MMX2
+#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
+#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
+
+#else
+#define SPREADW(a) \
+	"punpcklwd " #a ", " #a " \n\t"\
+	"punpcklwd " #a ", " #a " \n\t"
+#define PMAXW(a,b) \
+	"psubusw " #a ", " #b " \n\t"\
+	"paddw " #a ", " #b " \n\t"
+#endif
+
+static int RENAME(dct_quantize)(MpegEncContext *s,
+                            DCTELEM *block, int n,
+                            int qscale)
+{
+    int i, level, last_non_zero_p1, q;
+    const UINT16 *qmat;
+    static __align8 INT16 temp_block[64];
+    int minLevel, maxLevel;
+    
+    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
+	/* mpeg4 */
+        minLevel= -2048;
+	maxLevel= 2047;
+    }else if(s->out_format==FMT_MPEG1){
+	/* mpeg1 */
+        minLevel= -255;
+	maxLevel= 255;
+    }else{
+	/* h263 / msmpeg4 */
+        minLevel= -128;
+	maxLevel= 127;
+    }
+
+    av_fdct (block);
+    
+    if (s->mb_intra) {
+        int dummy;
+        if (n < 4)
+            q = s->y_dc_scale;
+        else
+            q = s->c_dc_scale;
+        
+        /* note: block[0] is assumed to be positive */
+#if 1
+	asm volatile (
+		"xorl %%edx, %%edx	\n\t"
+		"mul %%ebx		\n\t"
+		: "=d" (temp_block[0]), "=a"(dummy)
+		: "a" (block[0] + (q >> 1)), "b" (inverse[q])
+	);
+#else
+	asm volatile (
+		"xorl %%edx, %%edx	\n\t"
+		"divw %%bx		\n\t"
+		"movzwl %%ax, %%eax	\n\t"
+		: "=a" (temp_block[0])
+		: "a" (block[0] + (q >> 1)), "b" (q)
+		: "%edx"
+	);
+#endif
+//        temp_block[0] = (block[0] + (q >> 1)) / q;
+        i = 1;
+        last_non_zero_p1 = 1;
+        if (s->out_format == FMT_H263) {
+            qmat = s->q_non_intra_matrix16;
+        } else {
+            qmat = s->q_intra_matrix16;
+        }
+        for(i=1;i<4;i++) {
+            level = block[i] * qmat[i];
+            level = level / (1 << (QMAT_SHIFT_MMX - 3));
+            /* XXX: currently, this code is not optimal. the range should be:
+               mpeg1: -255..255
+               mpeg2: -2048..2047
+               h263:  -128..127
+               mpeg4: -2048..2047
+            */
+            if (level > maxLevel)
+                level = maxLevel;
+            else if (level < minLevel)
+                level = minLevel;
+            temp_block[i] = level;
+
+	    if(level) 
+	        if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
+	    block[i]=0;
+        }
+    } else {
+        i = 0;
+        last_non_zero_p1 = 0;
+        qmat = s->q_non_intra_matrix16;
+    }
+
+    asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
+	"movd %3, %%mm3			\n\t"
+	SPREADW(%%mm3)
+	"movd %4, %%mm4			\n\t"
+	SPREADW(%%mm4)
+	"movd %5, %%mm5			\n\t"
+	SPREADW(%%mm5)
+	"pxor %%mm7, %%mm7		\n\t"
+	"movd %%eax, %%mm2		\n\t"
+	SPREADW(%%mm2)
+	"movl %6, %%eax			\n\t"
+	".balign 16			\n\t"
+	"1:				\n\t"
+	"movq (%1, %%eax), %%mm0	\n\t"
+	"movq (%2, %%eax), %%mm1	\n\t"
+	"movq %%mm0, %%mm6		\n\t"
+	"psraw $15, %%mm6		\n\t"
+	"pmulhw %%mm0, %%mm1		\n\t"
+	"psubsw %%mm6, %%mm1		\n\t"
+#ifdef HAVE_MMX2
+	"pminsw %%mm3, %%mm1		\n\t"
+	"pmaxsw %%mm4, %%mm1		\n\t"
+#else
+	"paddsw %%mm3, %%mm1		\n\t"
+	"psubusw %%mm4, %%mm1		\n\t"
+	"paddsw %%mm5, %%mm1		\n\t"
+#endif
+	"movq %%mm1, (%8, %%eax)	\n\t"
+	"pcmpeqw %%mm7, %%mm1		\n\t"
+	"movq (%7, %%eax), %%mm0	\n\t"
+	"movq %%mm7, (%1, %%eax)	\n\t"
+	"pandn %%mm0, %%mm1		\n\t"
+	PMAXW(%%mm1, %%mm2)
+	"addl $8, %%eax			\n\t"
+	" js 1b				\n\t"
+	"movq %%mm2, %%mm0		\n\t"
+	"psrlq $32, %%mm2		\n\t"
+	PMAXW(%%mm0, %%mm2)
+	"movq %%mm2, %%mm0		\n\t"
+	"psrlq $16, %%mm2		\n\t"
+	PMAXW(%%mm0, %%mm2)
+	"movd %%mm2, %%eax		\n\t"
+	"movzbl %%al, %%eax		\n\t"
+	: "+a" (last_non_zero_p1)
+	: "r" (block+64), "r" (qmat+64), 
+#ifdef HAVE_MMX2
+	  "m" (maxLevel),          "m" (minLevel),                    "m" (0 /* dummy */), "g" (2*i - 128),
+#else
+	  "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel),      "g" (2*i - 128),
+#endif
+	  "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
+    );
+// last_non_zero_p1=64;       
+    /* permute for IDCT */
+    asm volatile(
+	"movl %0, %%eax			\n\t"
+	"pushl %%ebp			\n\t"
+	"movl %%esp, " MANGLE(esp_temp) "\n\t"
+	"1:				\n\t"
+	"movzbl (%1, %%eax), %%ebx	\n\t"
+	"movzbl 1(%1, %%eax), %%ebp	\n\t"
+	"movw (%2, %%ebx, 2), %%cx	\n\t"
+	"movw (%2, %%ebp, 2), %%sp	\n\t"
+	"movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
+	"movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
+	"movw %%cx, (%3, %%ebx, 2)	\n\t"
+	"movw %%sp, (%3, %%ebp, 2)	\n\t"
+	"addl $2, %%eax			\n\t"
+	" js 1b				\n\t"
+	"movl " MANGLE(esp_temp) ", %%esp\n\t"
+	"popl %%ebp			\n\t"
+	: 
+	: "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
+	: "%eax", "%ebx", "%ecx"
+	);
+/*
+    for(i=0; i<last_non_zero_p1; i++)
+    {
+       int j= zigzag_direct_noperm[i];
+       block[block_permute_op(j)]= temp_block[j];
+    }
+*/
+//block_permute(block);
+    return last_non_zero_p1 - 1;
+}
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 6f92f09..7937be4 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -35,12 +35,10 @@ static void dct_unquantize_mpeg1_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
 static void dct_unquantize_h263_c(MpegEncContext *s, 
                                   DCTELEM *block, int n, int qscale);
-static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
-static int dct_quantize_mmx(MpegEncContext *s, 
-                            DCTELEM *block, int n,
-                            int qscale);
 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
+static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale);
 
+int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c;
 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
 
 #define EDGE_WIDTH 16
@@ -74,29 +72,29 @@ int motion_estimation_method = ME_LOG;
 
 extern UINT8 zigzag_end[64];
 
-/* XXX: should use variable shift ? */
-#define QMAT_SHIFT_MMX 19
-#define QMAT_SHIFT 25
-
-static void convert_matrix(int *qmat, const UINT16 *quant_matrix, int qscale)
+static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
 {
     int i;
 
     if (av_fdct == jpeg_fdct_ifast) {
         for(i=0;i<64;i++) {
             /* 16 <= qscale * quant_matrix[i] <= 7905 */
-            /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
+            /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
+            /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
+            /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
             
-            qmat[i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
-                            (aanscales[i] * qscale * quant_matrix[i]));
+            qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
+                            (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
         }
     } else {
         for(i=0;i<64;i++) {
             /* We can safely suppose that 16 <= quant_matrix[i] <= 255
-               So 16 <= qscale * quant_matrix[i] <= 7905
-               so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
+               So 16           <= qscale * quant_matrix[i]             <= 7905
+               so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
+               so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
             */
-            qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+            qmat[i]   = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
+            qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
         }
     }
 }
@@ -418,7 +416,7 @@ void MPV_frame_start(MpegEncContext *s)
 void MPV_frame_end(MpegEncContext *s)
 {
     /* draw edge for correct motion prediction if outside */
-    if (s->pict_type != B_TYPE) {
+    if (s->pict_type != B_TYPE && !s->intra_only) {
       if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
         draw_edges(s->current_picture[0], s->linesize, s->mb_width*16, s->mb_height*16, EDGE_WIDTH);
         draw_edges(s->current_picture[1], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
@@ -457,7 +455,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
     avctx->key_frame = (s->pict_type == I_TYPE);
     
     MPV_frame_start(s);
-
+    
     for(i=0;i<3;i++) {
         UINT8 *src = pict->data[i];
         UINT8 *dest = s->current_picture[i];
@@ -472,11 +470,15 @@ int MPV_encode_picture(AVCodecContext *avctx,
             h >>= 1;
         }
 
-        for(j=0;j<h;j++) {
-            memcpy(dest, src, w);
-            dest += dest_wrap;
-            src += src_wrap;
-        }
+	if(s->intra_only && dest_wrap==src_wrap){
+	    s->current_picture[i] = pict->data[i];
+	}else {
+            for(j=0;j<h;j++) {
+                memcpy(dest, src, w);
+                dest += dest_wrap;
+                src += src_wrap;
+            }
+	}
         s->new_picture[i] = s->current_picture[i];
     }
 
@@ -873,10 +875,10 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         s->intra_matrix[0] = default_intra_matrix[0];
         for(i=1;i<64;i++)
             s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
-        convert_matrix(s->q_intra_matrix, s->intra_matrix, 8);
+        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8);
     } else {
-        convert_matrix(s->q_intra_matrix, s->intra_matrix, s->qscale);
-        convert_matrix(s->q_non_intra_matrix, s->non_intra_matrix, s->qscale);
+        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale);
+        convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
     }
 
     switch(s->out_format) {
@@ -1011,14 +1013,8 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 s->y_dc_scale = 8;
                 s->c_dc_scale = 8;
             }
-
             for(i=0;i<6;i++) {
-                int last_index;
-                if (av_fdct == jpeg_fdct_ifast)
-                    last_index = dct_quantize(s, s->block[i], i, s->qscale);
-                else
-                    last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale);
-                s->block_last_index[i] = last_index;
+                s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
             }
 
             /* huffman encode */
@@ -1060,7 +1056,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
 }
 
-static int dct_quantize(MpegEncContext *s, 
+static int dct_quantize_c(MpegEncContext *s, 
                         DCTELEM *block, int n,
                         int qscale)
 {
@@ -1157,85 +1153,7 @@ static int dct_quantize(MpegEncContext *s,
                 level = maxLevel;
             else if (level < minLevel)
                 level = minLevel;
-            block[j] = level;
-            last_non_zero = i;
-        } else {
-            block[j] = 0;
-        }
-    }
-    return last_non_zero;
-}
-
-static int dct_quantize_mmx(MpegEncContext *s, 
-                            DCTELEM *block, int n,
-                            int qscale)
-{
-    int i, j, level, last_non_zero, q;
-    const int *qmat;
-    int minLevel, maxLevel;
-
-    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
-	/* mpeg4 */
-        minLevel= -2048;
-	maxLevel= 2047;
-    }else if(s->out_format==FMT_MPEG1){
-	/* mpeg1 */
-        minLevel= -255;
-	maxLevel= 255;
-    }else{
-	/* h263 / msmpeg4 */
-        minLevel= -128;
-	maxLevel= 127;
-    }
 
-    av_fdct (block);
-    
-    /* we need this permutation so that we correct the IDCT
-       permutation. will be moved into DCT code */
-    block_permute(block);
-
-    if (s->mb_intra) {
-        if (n < 4)
-            q = s->y_dc_scale;
-        else
-            q = s->c_dc_scale;
-        
-        /* note: block[0] is assumed to be positive */
-        block[0] = (block[0] + (q >> 1)) / q;
-        i = 1;
-        last_non_zero = 0;
-        if (s->out_format == FMT_H263) {
-            qmat = s->q_non_intra_matrix;
-        } else {
-            qmat = s->q_intra_matrix;
-        }
-    } else {
-        i = 0;
-        last_non_zero = -1;
-        qmat = s->q_non_intra_matrix;
-    }
-
-    for(;i<64;i++) {
-        j = zigzag_direct[i];
-        level = block[j];
-        level = level * qmat[j];
-        /* XXX: slight error for the low range. Test should be equivalent to
-           (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
-           (QMAT_SHIFT_MMX - 3)))
-        */
-        if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != 
-            level) {
-            level = level / (1 << (QMAT_SHIFT_MMX - 3));
-            /* XXX: currently, this code is not optimal. the range should be:
-               mpeg1: -255..255
-               mpeg2: -2048..2047
-               h263:  -128..127
-               mpeg4: -2048..2047
-            */
-            if (level > maxLevel)
-                level = maxLevel;
-            else if (level < minLevel)
-                level = minLevel;
             block[j] = level;
             last_non_zero = i;
         } else {
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index 03ddfc8..70c962d 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -30,6 +30,9 @@ enum OutputFormat {
 
 #define MPEG_BUF_SIZE (16 * 1024)
 
+#define QMAT_SHIFT_MMX 19
+#define QMAT_SHIFT 25
+
 typedef struct MpegEncContext {
     struct AVCodecContext *avctx;
     /* the following parameters must be initialized before encoding */
@@ -120,6 +123,9 @@ typedef struct MpegEncContext {
     /* precomputed matrix (combine qscale and DCT renorm) */
     int q_intra_matrix[64];
     int q_non_intra_matrix[64];
+    /* identical to the above but for MMX & these are not permutated */
+    UINT16 __align8 q_intra_matrix16[64] ;
+    UINT16 __align8 q_non_intra_matrix16[64];
     int block_last_index[6];  /* last non zero coefficient in block */
 
     void *opaque; /* private data for the user */

-- 
Libav/FFmpeg packaging



More information about the pkg-multimedia-commits mailing list