[SCM] libav/experimental: Reorder indexes in weight tables. 5 cpu cycles faster.
siretart at users.alioth.debian.org
siretart at users.alioth.debian.org
Sun Jun 30 17:02:46 UTC 2013
The following commit has been merged in the experimental branch:
commit 3d9137c8830bce3521a2249c02e2f96aefd7742c
Author: Michael Niedermayer <michaelni at gmx.at>
Date: Wed Mar 3 21:10:08 2010 +0000
Reorder indexes in weight tables.
5 cpu cycles faster.
Originally committed as revision 22183 to svn://svn.ffmpeg.org/ffmpeg/trunk
diff --git a/libavcodec/h264.c b/libavcodec/h264.c
index 08ffe3a..21ad65c 100644
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -495,14 +495,14 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
}else{
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
- h->luma_weight[0][refn0][0], h->luma_weight[1][refn1][0],
- h->luma_weight[0][refn0][1] + h->luma_weight[1][refn1][1]);
+ h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
+ h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[0][refn0][0][0], h->chroma_weight[1][refn1][0][0],
- h->chroma_weight[0][refn0][0][1] + h->chroma_weight[1][refn1][0][1]);
+ h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
+ h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[0][refn0][1][0], h->chroma_weight[1][refn1][1][0],
- h->chroma_weight[0][refn0][1][1] + h->chroma_weight[1][refn1][1][1]);
+ h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
+ h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
}
}else{
int list = list1 ? 1 : 0;
@@ -513,12 +513,12 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
qpix_put, chroma_put);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
- h->luma_weight[list][refn][0], h->luma_weight[list][refn][1]);
+ h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
if(h->use_weight_chroma){
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[list][refn][0][0], h->chroma_weight[list][refn][0][1]);
+ h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[list][refn][1][0], h->chroma_weight[list][refn][1][1]);
+ h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
}
}
}
@@ -1368,16 +1368,16 @@ static int pred_weight_table(H264Context *h){
luma_weight_flag= get_bits1(&s->gb);
if(luma_weight_flag){
- h->luma_weight[list][i][0]= get_se_golomb(&s->gb);
- h->luma_weight[list][i][1]= get_se_golomb(&s->gb);
- if( h->luma_weight[list][i][0] != luma_def
- || h->luma_weight[list][i][1] != 0) {
+ h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
+ h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
+ if( h->luma_weight[i][list][0] != luma_def
+ || h->luma_weight[i][list][1] != 0) {
h->use_weight= 1;
h->luma_weight_flag[list]= 1;
}
}else{
- h->luma_weight[list][i][0]= luma_def;
- h->luma_weight[list][i][1]= 0;
+ h->luma_weight[i][list][0]= luma_def;
+ h->luma_weight[i][list][1]= 0;
}
if(CHROMA){
@@ -1385,10 +1385,10 @@ static int pred_weight_table(H264Context *h){
if(chroma_weight_flag){
int j;
for(j=0; j<2; j++){
- h->chroma_weight[list][i][j][0]= get_se_golomb(&s->gb);
- h->chroma_weight[list][i][j][1]= get_se_golomb(&s->gb);
- if( h->chroma_weight[list][i][j][0] != chroma_def
- || h->chroma_weight[list][i][j][1] != 0) {
+ h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
+ h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
+ if( h->chroma_weight[i][list][j][0] != chroma_def
+ || h->chroma_weight[i][list][j][1] != 0) {
h->use_weight_chroma= 1;
h->chroma_weight_flag[list]= 1;
}
@@ -1396,8 +1396,8 @@ static int pred_weight_table(H264Context *h){
}else{
int j;
for(j=0; j<2; j++){
- h->chroma_weight[list][i][j][0]= chroma_def;
- h->chroma_weight[list][i][j][1]= 0;
+ h->chroma_weight[i][list][j][0]= chroma_def;
+ h->chroma_weight[i][list][j][1]= 0;
}
}
}
diff --git a/libavcodec/h264.h b/libavcodec/h264.h
index 1ebe081..842088b 100644
--- a/libavcodec/h264.h
+++ b/libavcodec/h264.h
@@ -374,8 +374,8 @@ typedef struct H264Context{
int luma_log2_weight_denom;
int chroma_log2_weight_denom;
//The following 2 can be changed to int8_t but that causes 10cpu cycles speedloss
- int luma_weight[2][48][2];
- int chroma_weight[2][48][2][2];
+ int luma_weight[48][2][2];
+ int chroma_weight[48][2][2][2];
int implicit_weight[48][48];
int direct_spatial_mv_pred;
diff --git a/libavcodec/h264_refs.c b/libavcodec/h264_refs.c
index 8549b5b..7702aad 100644
--- a/libavcodec/h264_refs.c
+++ b/libavcodec/h264_refs.c
@@ -315,11 +315,11 @@ void ff_h264_fill_mbaff_ref_list(H264Context *h){
field[1].reference = PICT_BOTTOM_FIELD;
field[1].poc= field[1].field_poc[1];
- h->luma_weight[list][16+2*i][0] = h->luma_weight[list][16+2*i+1][0] = h->luma_weight[list][i][0];
- h->luma_weight[list][16+2*i][1] = h->luma_weight[list][16+2*i+1][1] = h->luma_weight[list][i][1];
+ h->luma_weight[16+2*i][list][0] = h->luma_weight[16+2*i+1][list][0] = h->luma_weight[i][list][0];
+ h->luma_weight[16+2*i][list][1] = h->luma_weight[16+2*i+1][list][1] = h->luma_weight[i][list][1];
for(j=0; j<2; j++){
- h->chroma_weight[list][16+2*i][j][0] = h->chroma_weight[list][16+2*i+1][j][0] = h->chroma_weight[list][i][j][0];
- h->chroma_weight[list][16+2*i][j][1] = h->chroma_weight[list][16+2*i+1][j][1] = h->chroma_weight[list][i][j][1];
+ h->chroma_weight[16+2*i][list][j][0] = h->chroma_weight[16+2*i+1][list][j][0] = h->chroma_weight[i][list][j][0];
+ h->chroma_weight[16+2*i][list][j][1] = h->chroma_weight[16+2*i+1][list][j][1] = h->chroma_weight[i][list][j][1];
}
}
}
--
Libav/FFmpeg packaging
More information about the pkg-multimedia-commits
mailing list