[SCM] libav/experimental: vp8: K&R formatting cosmetics

Sun Aug 10 16:02:29 UTC 2014

The following commit has been merged in the experimental branch:
commit 53c20f17c78d1d8a0fc2505868f201e69ff59cc5
Author: Vittorio Giovara <vittorio.giovara at gmail.com>
Date:   Fri Mar 28 01:13:54 2014 +0100

    vp8: K&R formatting cosmetics
    
    Signed-off-by: Diego Biurrun <diego at biurrun.de>

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c
index 30e24cc..a1d24cd 100644
--- a/libavcodec/vp8.c
+++ b/libavcodec/vp8.c
@@ -24,12 +24,13 @@
  */
 
 #include "libavutil/imgutils.h"
+
 #include "avcodec.h"
 #include "internal.h"
-#include "vp8.h"
-#include "vp8data.h"
 #include "rectangle.h"
 #include "thread.h"
+#include "vp8.h"
+#include "vp8data.h"
 
 #if ARCH_ARM
 #   include "arm/vp8.h"
@@ -91,7 +92,6 @@ static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
     return 0;
 }
 
-
 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
 {
     VP8Context *s = avctx->priv_data;
@@ -124,22 +124,25 @@ static int update_dimensions(VP8Context *s, int width, int height)
             return ret;
     }
 
-    s->mb_width  = (s->avctx->coded_width +15) / 16;
-    s->mb_height = (s->avctx->coded_height+15) / 16;
+    s->mb_width  = (s->avctx->coded_width  + 15) / 16;
+    s->mb_height = (s->avctx->coded_height + 15) / 16;
 
-    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
+    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) &&
+                   (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
     if (!s->mb_layout) { // Frame threading and one thread
-        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
-        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
-    }
-    else // Sliced threading
-        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
-    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
-    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
-    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
+        s->macroblocks_base       = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
+                                               sizeof(*s->macroblocks));
+        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
+    } else // Sliced threading
+        s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
+                                         sizeof(*s->macroblocks));
+    s->top_nnz     = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
+    s->top_border  = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
+    s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
 
     for (i = 0; i < MAX_THREADS; i++) {
-        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
+        s->thread_data[i].filter_strength =
+            av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
 #if HAVE_THREADS
         pthread_mutex_init(&s->thread_data[i].lock, NULL);
         pthread_cond_init(&s->thread_data[i].cond, NULL);
@@ -150,7 +153,7 @@ static int update_dimensions(VP8Context *s, int width, int height)
         (!s->intra4x4_pred_mode_top && !s->mb_layout))
         return AVERROR(ENOMEM);
 
-    s->macroblocks        = s->macroblocks_base + 1;
+    s->macroblocks = s->macroblocks_base + 1;
 
     return 0;
 }
@@ -207,13 +210,13 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
 
     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
 
-    buf      += 3*(s->num_coeff_partitions-1);
-    buf_size -= 3*(s->num_coeff_partitions-1);
+    buf      += 3 * (s->num_coeff_partitions - 1);
+    buf_size -= 3 * (s->num_coeff_partitions - 1);
     if (buf_size < 0)
         return -1;
 
-    for (i = 0; i < s->num_coeff_partitions-1; i++) {
-        int size = AV_RL24(sizes + 3*i);
+    for (i = 0; i < s->num_coeff_partitions - 1; i++) {
+        int size = AV_RL24(sizes + 3 * i);
         if (buf_size - size < 0)
             return -1;
 
@@ -246,13 +249,13 @@ static void get_quants(VP8Context *s)
         } else
             base_qi = yac_qi;
 
-        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
-        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
-        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
+        s->qmat[i].luma_qmul[0]    = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta,  7)];
+        s->qmat[i].luma_qmul[1]    = vp8_ac_qlookup[av_clip_uintp2(base_qi,              7)];
+        s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
         /* 101581>>16 is equivalent to 155/100 */
-        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
-        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
-        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
+        s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
+        s->qmat[i].chroma_qmul[0]  = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
+        s->qmat[i].chroma_qmul[1]  = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
 
         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
@@ -317,24 +320,27 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
 
     if (!s->profile)
-        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
+               sizeof(s->put_pixels_tab));
     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
-        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
+        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
+               sizeof(s->put_pixels_tab));
 
-    if (header_size > buf_size - 7*s->keyframe) {
+    if (header_size > buf_size - 7 * s->keyframe) {
         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
         return AVERROR_INVALIDDATA;
     }
 
     if (s->keyframe) {
         if (AV_RL24(buf) != 0x2a019d) {
-            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
+            av_log(s->avctx, AV_LOG_ERROR,
+                   "Invalid start code 0x%x\n", AV_RL24(buf));
             return AVERROR_INVALIDDATA;
         }
-        width  = AV_RL16(buf+3) & 0x3fff;
-        height = AV_RL16(buf+5) & 0x3fff;
-        hscale = buf[4] >> 6;
-        vscale = buf[6] >> 6;
+        width     = AV_RL16(buf + 3) & 0x3fff;
+        height    = AV_RL16(buf + 5) & 0x3fff;
+        hscale    = buf[4] >> 6;
+        vscale    = buf[6] >> 6;
         buf      += 7;
         buf_size -= 7;
 
@@ -344,11 +350,15 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
         for (i = 0; i < 4; i++)
             for (j = 0; j < 16; j++)
-                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
+                memcpy(s->prob->token[i][j],
+                       vp8_token_default_probs[i][vp8_coeff_band[j]],
                        sizeof(s->prob->token[i][j]));
-        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
-        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
-        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
+        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
+               sizeof(s->prob->pred16x16));
+        memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
+               sizeof(s->prob->pred8x8c));
+        memcpy(s->prob->mvc, vp8_mv_default_prob,
+               sizeof(s->prob->mvc));
         memset(&s->segmentation, 0, sizeof(s->segmentation));
         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
     }
@@ -382,10 +392,9 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
     }
 
     if (!s->macroblocks_base || /* first frame */
-        width != s->avctx->width || height != s->avctx->height) {
+        width != s->avctx->width || height != s->avctx->height)
         if ((ret = update_dimensions(s, width, height)) < 0)
             return ret;
-    }
 
     get_quants(s);
 
@@ -405,7 +414,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
     for (i = 0; i < 4; i++)
         for (j = 0; j < 8; j++)
             for (k = 0; k < 3; k++)
-                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
+                for (l = 0; l < NUM_DCT_TOKENS - 1; l++)
                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                         int prob = vp8_rac_get_uint(c, 8);
                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
@@ -437,7 +446,8 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
     return 0;
 }
 
-static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
+static av_always_inline
+void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
 {
     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
@@ -461,13 +471,13 @@ static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
             x += 8;
     } else {
         // small_mvtree
-        const uint8_t *ps = p+2;
+        const uint8_t *ps = p + 2;
         bit = vp56_rac_get_prob(c, *ps);
-        ps += 1 + 3*bit;
-        x  += 4*bit;
+        ps += 1 + 3 * bit;
+        x  += 4 * bit;
         bit = vp56_rac_get_prob(c, *ps);
         ps += 1 + bit;
-        x  += 2*bit;
+        x  += 2 * bit;
         x  += vp56_rac_get_prob(c, *ps);
     }
 
@@ -478,10 +488,10 @@ static av_always_inline
 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
 {
     if (left == top)
-        return vp8_submv_prob[4-!!left];
+        return vp8_submv_prob[4 - !!left];
     if (!top)
         return vp8_submv_prob[2];
-    return vp8_submv_prob[1-!!left];
+    return vp8_submv_prob[1 - !!left];
 }
 
 /**
@@ -495,9 +505,8 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
     int n, num;
     VP8Macroblock *top_mb;
     VP8Macroblock *left_mb = &mb[-1];
-    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
-                  *mbsplits_top,
-                  *mbsplits_cur, *firstidx;
+    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
+    const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
     VP56mv *top_mv;
     VP56mv *left_mv = left_mb->bmv;
     VP56mv *cur_mv  = mb->bmv;
@@ -505,23 +514,22 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
     if (!layout) // layout is inlined, s->mb_layout is not
         top_mb = &mb[2];
     else
-        top_mb = &mb[-s->mb_width-1];
+        top_mb = &mb[-s->mb_width - 1];
     mbsplits_top = vp8_mbsplits[top_mb->partitioning];
-    top_mv = top_mb->bmv;
+    top_mv       = top_mb->bmv;
 
     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
-        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
+        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
-        } else {
+        else
             part_idx = VP8_SPLITMVMODE_8x8;
-        }
     } else {
         part_idx = VP8_SPLITMVMODE_4x4;
     }
 
-    num = vp8_mbsplit_count[part_idx];
-    mbsplits_cur = vp8_mbsplits[part_idx],
-    firstidx = vp8_mbfirstidx[part_idx];
+    num              = vp8_mbsplit_count[part_idx];
+    mbsplits_cur     = vp8_mbsplits[part_idx],
+    firstidx         = vp8_mbfirstidx[part_idx];
     mb->partitioning = part_idx;
 
     for (n = 0; n < num; n++) {
@@ -532,7 +540,7 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
         if (!(k & 3))
             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
         else
-            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
+            left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
         if (k <= 3)
             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
         else
@@ -560,11 +568,12 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int lay
 }
 
 static av_always_inline
-void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
+void decode_mvs(VP8Context *s, VP8Macroblock *mb,
+                int mb_x, int mb_y, int layout)
 {
-    VP8Macroblock *mb_edge[3] = { 0 /* top */,
+    VP8Macroblock *mb_edge[3] = { 0      /* top */,
                                   mb - 1 /* left */,
-                                  0 /* top-left */ };
+                                  0      /* top-left */ };
     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
     int idx = CNT_ZERO;
@@ -577,10 +586,9 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
     if (!layout) { // layout is inlined (s->mb_layout is not)
         mb_edge[0] = mb + 2;
         mb_edge[2] = mb + 1;
-    }
-    else {
-        mb_edge[0] = mb - s->mb_width-1;
-        mb_edge[2] = mb - s->mb_width-2;
+    } else {
+        mb_edge[0] = mb - s->mb_width - 1;
+        mb_edge[2] = mb - s->mb_width - 2;
     }
 
     AV_ZERO32(&near_mv[0]);
@@ -588,24 +596,25 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
     AV_ZERO32(&near_mv[2]);
 
     /* Process MB on top, left and top-left */
-    #define MV_EDGE_CHECK(n)\
-    {\
-        VP8Macroblock *edge = mb_edge[n];\
-        int edge_ref = edge->ref_frame;\
-        if (edge_ref != VP56_FRAME_CURRENT) {\
-            uint32_t mv = AV_RN32A(&edge->mv);\
-            if (mv) {\
-                if (cur_sign_bias != sign_bias[edge_ref]) {\
-                    /* SWAR negate of the values in mv. */\
-                    mv = ~mv;\
-                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
-                }\
-                if (!n || mv != AV_RN32A(&near_mv[idx]))\
-                    AV_WN32A(&near_mv[++idx], mv);\
-                cnt[idx]      += 1 + (n != 2);\
-            } else\
-                cnt[CNT_ZERO] += 1 + (n != 2);\
-        }\
+#define MV_EDGE_CHECK(n)                                                      \
+    {                                                                         \
+        VP8Macroblock *edge = mb_edge[n];                                     \
+        int edge_ref = edge->ref_frame;                                       \
+        if (edge_ref != VP56_FRAME_CURRENT) {                                 \
+            uint32_t mv = AV_RN32A(&edge->mv);                                \
+            if (mv) {                                                         \
+                if (cur_sign_bias != sign_bias[edge_ref]) {                   \
+                    /* SWAR negate of the values in mv. */                    \
+                    mv = ~mv;                                                 \
+                    mv = ((mv & 0x7fff7fff) +                                 \
+                          0x00010001) ^ (mv & 0x80008000);                    \
+                }                                                             \
+                if (!n || mv != AV_RN32A(&near_mv[idx]))                      \
+                    AV_WN32A(&near_mv[++idx], mv);                            \
+                cnt[idx] += 1 + (n != 2);                                     \
+            } else                                                            \
+                cnt[CNT_ZERO] += 1 + (n != 2);                                \
+        }                                                                     \
     }
 
     MV_EDGE_CHECK(0)
@@ -617,7 +626,8 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
         mb->mode = VP8_MVMODE_MV;
 
         /* If we have three distinct MVs, merge first and last if they're the same */
-        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
+        if (cnt[CNT_SPLITMV] &&
+            AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
             cnt[CNT_NEAREST] += 1;
 
         /* Swap near and nearest if necessary */
@@ -628,7 +638,6 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
 
         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
-
                 /* Choose the best mv out of 0,0 and the nearest mv */
                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
@@ -637,10 +646,10 @@ void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout
 
                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                     mb->mode = VP8_MVMODE_SPLIT;
-                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
+                    mb->mv   = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
                 } else {
-                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
-                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
+                    mb->mv.y  += read_mv_component(c, s->prob->mvc[0]);
+                    mb->mv.x  += read_mv_component(c, s->prob->mvc[1]);
                     mb->bmv[0] = mb->mv;
                 }
             } else {
@@ -670,8 +679,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
     }
     if (keyframe) {
         int x, y;
-        uint8_t* top;
-        uint8_t* const left = s->intra4x4_pred_mode_left;
+        uint8_t *top;
+        uint8_t *const left = s->intra4x4_pred_mode_left;
         if (layout == 1)
             top = mb->intra4x4_pred_mode_top;
         else
@@ -679,16 +688,17 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
         for (y = 0; y < 4; y++) {
             for (x = 0; x < 4; x++) {
                 const uint8_t *ctx;
-                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
+                ctx       = vp8_pred4x4_prob_intra[top[x]][left[y]];
                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
-                left[y] = top[x] = *intra4x4;
+                left[y]   = top[x] = *intra4x4;
                 intra4x4++;
             }
         }
     } else {
         int i;
         for (i = 0; i < 16; i++)
-            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
+            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
+                                           vp8_pred4x4_prob_inter);
     }
 }
 
@@ -707,7 +717,8 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
 
     if (s->keyframe) {
-        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
+        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
+                                    vp8_pred16x16_prob_intra);
 
         if (mb->mode == MODE_I4x4) {
             decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
@@ -717,19 +728,21 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
             else
                 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
-            AV_WN32A( s->intra4x4_pred_mode_left, modes);
+            AV_WN32A(s->intra4x4_pred_mode_left, modes);
         }
 
-        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
-        mb->ref_frame = VP56_FRAME_CURRENT;
+        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                vp8_pred8x8c_prob_intra);
+        mb->ref_frame        = VP56_FRAME_CURRENT;
     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
         // inter MB, 16.2
         if (vp56_rac_get_prob_branchy(c, s->prob->last))
-            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
-                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
+            mb->ref_frame =
+                vp56_rac_get_prob(c, s->prob->golden) ? VP56_FRAME_GOLDEN2 /* altref */
+                                                      : VP56_FRAME_GOLDEN;
         else
             mb->ref_frame = VP56_FRAME_PREVIOUS;
-        s->ref_count[mb->ref_frame-1]++;
+        s->ref_count[mb->ref_frame - 1]++;
 
         // motion vectors, 16.3
         decode_mvs(s, mb, mb_x, mb_y, layout);
@@ -740,26 +753,29 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
         if (mb->mode == MODE_I4x4)
             decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
 
-        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
-        mb->ref_frame = VP56_FRAME_CURRENT;
-        mb->partitioning = VP8_SPLITMVMODE_NONE;
+        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
+                                                s->prob->pred8x8c);
+        mb->ref_frame        = VP56_FRAME_CURRENT;
+        mb->partitioning     = VP8_SPLITMVMODE_NONE;
         AV_ZERO32(&mb->bmv[0]);
     }
 }
 
 #ifndef decode_block_coeffs_internal
 /**
- * @param r arithmetic bitstream reader context
+ * @param r     arithmetic bitstream reader context
  * @param block destination for block coefficients
  * @param probs probabilities to use when reading trees from the bitstream
- * @param i initial coeff index, 0 unless a separate DC block is coded
- * @param qmul array holding the dc/ac dequant factor at position 0/1
+ * @param i     initial coeff index, 0 unless a separate DC block is coded
+ * @param qmul  array holding the dc/ac dequant factor at position 0/1
+ *
  * @return 0 if no coeffs were decoded
  *         otherwise, the index of the last coeff decoded plus one
  */
 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
-                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
-                                        int i, uint8_t *token_prob, int16_t qmul[2])
+                                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
+                                        int i, uint8_t *token_prob,
+                                        int16_t qmul[2])
 {
     VP56RangeCoder c = *r;
     goto skip_eob;
@@ -778,7 +794,7 @@ skip_eob:
 
         if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
             coeff = 1;
-            token_prob = probs[i+1][1];
+            token_prob = probs[i + 1][1];
         } else {
             if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
@@ -789,21 +805,21 @@ skip_eob:
                 // DCT_CAT*
                 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                     if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
-                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
+                        coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
                     } else {                                    // DCT_CAT2
                         coeff  = 7;
                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
                     }
                 } else {    // DCT_CAT3 and up
-                    int a = vp56_rac_get_prob(&c, token_prob[8]);
-                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
-                    int cat = (a<<1) + b;
-                    coeff  = 3 + (8<<cat);
+                    int a   = vp56_rac_get_prob(&c, token_prob[8]);
+                    int b   = vp56_rac_get_prob(&c, token_prob[9 + a]);
+                    int cat = (a << 1) + b;
+                    coeff  = 3 + (8 << cat);
                     coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
                 }
             }
-            token_prob = probs[i+1][2];
+            token_prob = probs[i + 1][2];
         }
         block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
     } while (++i < 16);
@@ -814,19 +830,20 @@ skip_eob:
 #endif
 
 /**
- * @param c arithmetic bitstream reader context
- * @param block destination for block coefficients
- * @param probs probabilities to use when reading trees from the bitstream
- * @param i initial coeff index, 0 unless a separate DC block is coded
+ * @param c          arithmetic bitstream reader context
+ * @param block      destination for block coefficients
+ * @param probs      probabilities to use when reading trees from the bitstream
+ * @param i          initial coeff index, 0 unless a separate DC block is coded
  * @param zero_nhood the initial prediction context for number of surrounding
  *                   all-zero blocks (only left/top, so 0-2)
- * @param qmul array holding the dc/ac dequant factor at position 0/1
+ * @param qmul       array holding the dc/ac dequant factor at position 0/1
+ *
  * @return 0 if no coeffs were decoded
  *         otherwise, the index of the last coeff decoded plus one
  */
 static av_always_inline
 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
-                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
+                        uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
                         int i, int zero_nhood, int16_t qmul[2])
 {
     uint8_t *token_prob = probs[i][zero_nhood];
@@ -836,8 +853,8 @@ int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
 }
 
 static av_always_inline
-void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
-                      uint8_t t_nnz[9], uint8_t l_nnz[9])
+void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
+                      VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9])
 {
     int i, x, y, luma_start = 0, luma_ctx = 3;
     int nnz_pred, nnz, nnz_total = 0;
@@ -848,28 +865,31 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
         nnz_pred = t_nnz[8] + l_nnz[8];
 
         // decode DC values and do hadamard
-        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
-                                  s->qmat[segment].luma_dc_qmul);
+        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
+                                  nnz_pred, s->qmat[segment].luma_dc_qmul);
         l_nnz[8] = t_nnz[8] = !!nnz;
         if (nnz) {
             nnz_total += nnz;
-            block_dc = 1;
+            block_dc   = 1;
             if (nnz == 1)
                 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
             else
                 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
         }
         luma_start = 1;
-        luma_ctx = 0;
+        luma_ctx   = 0;
     }
 
     // luma blocks
     for (y = 0; y < 4; y++)
         for (x = 0; x < 4; x++) {
             nnz_pred = l_nnz[y] + t_nnz[x];
-            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
-                                      nnz_pred, s->qmat[segment].luma_qmul);
-            // nnz+block_dc may be one more than the actual last index, but we don't care
+            nnz = decode_block_coeffs(c, td->block[y][x],
+                                      s->prob->token[luma_ctx],
+                                      luma_start, nnz_pred,
+                                      s->qmat[segment].luma_qmul);
+            /* nnz+block_dc may be one more than the actual last index,
+             * but we don't care */
             td->non_zero_count_cache[y][x] = nnz + block_dc;
             t_nnz[x] = l_nnz[y] = !!nnz;
             nnz_total += nnz;
@@ -881,12 +901,14 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
     for (i = 4; i < 6; i++)
         for (y = 0; y < 2; y++)
             for (x = 0; x < 2; x++) {
-                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
-                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
-                                          nnz_pred, s->qmat[segment].chroma_qmul);
-                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
-                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
-                nnz_total += nnz;
+                nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
+                nnz      = decode_block_coeffs(c, td->block[i][(y << 1) + x],
+                                               s->prob->token[2],
+                                               0, nnz_pred,
+                                               s->qmat[segment].chroma_qmul);
+                td->non_zero_count_cache[i][(y << 1) + x] = nnz;
+                t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
+                nnz_total       += nnz;
             }
 
     // if there were no coded coeffs despite the macroblock not being marked skip,
@@ -897,65 +919,67 @@ void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Ma
 }
 
 static av_always_inline
-void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
+void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
+                      uint8_t *src_cb, uint8_t *src_cr,
                       int linesize, int uvlinesize, int simple)
 {
-    AV_COPY128(top_border, src_y + 15*linesize);
+    AV_COPY128(top_border, src_y + 15 * linesize);
     if (!simple) {
-        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
-        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
+        AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
+        AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
     }
 }
 
 static av_always_inline
-void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
-                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
-                    int simple, int xchg)
+void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
+                    uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
+                    int mb_y, int mb_width, int simple, int xchg)
 {
-    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
-    src_y  -=   linesize;
+    uint8_t *top_border_m1 = top_border - 32;     // for TL prediction
+    src_y  -= linesize;
     src_cb -= uvlinesize;
     src_cr -= uvlinesize;
 
-#define XCHG(a,b,xchg) do {                     \
-        if (xchg) AV_SWAP64(b,a);               \
-        else      AV_COPY64(b,a);               \
+#define XCHG(a, b, xchg)                                                      \
+    do {                                                                      \
+        if (xchg)                                                             \
+            AV_SWAP64(b, a);                                                  \
+        else                                                                  \
+            AV_COPY64(b, a);                                                  \
     } while (0)
 
-    XCHG(top_border_m1+8, src_y-8, xchg);
-    XCHG(top_border,      src_y,   xchg);
-    XCHG(top_border+8,    src_y+8, 1);
-    if (mb_x < mb_width-1)
-        XCHG(top_border+32, src_y+16, 1);
+    XCHG(top_border_m1 + 8, src_y - 8, xchg);
+    XCHG(top_border, src_y, xchg);
+    XCHG(top_border + 8, src_y + 8, 1);
+    if (mb_x < mb_width - 1)
+        XCHG(top_border + 32, src_y + 16, 1);
 
     // only copy chroma for normal loop filter
     // or to initialize the top row to 127
     if (!simple || !mb_y) {
-        XCHG(top_border_m1+16, src_cb-8, xchg);
-        XCHG(top_border_m1+24, src_cr-8, xchg);
-        XCHG(top_border+16,    src_cb, 1);
-        XCHG(top_border+24,    src_cr, 1);
+        XCHG(top_border_m1 + 16, src_cb - 8, xchg);
+        XCHG(top_border_m1 + 24, src_cr - 8, xchg);
+        XCHG(top_border + 16, src_cb, 1);
+        XCHG(top_border + 24, src_cr, 1);
     }
 }
 
 static av_always_inline
 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
 {
-    if (!mb_x) {
+    if (!mb_x)
         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
-    } else {
+    else
         return mb_y ? mode : LEFT_DC_PRED8x8;
-    }
 }
 
 static av_always_inline
 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
 {
-    if (!mb_x) {
+    if (!mb_x)
         return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
-    } else {
+    else
         return mb_y ? mode : HOR_PRED8x8;
-    }
 }
 
 static av_always_inline
@@ -968,7 +992,7 @@ int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
         return !mb_y ? DC_127_PRED8x8 : mode;
     case HOR_PRED8x8:
         return !mb_x ? DC_129_PRED8x8 : mode;
-    case PLANE_PRED8x8 /*TM*/:
+    case PLANE_PRED8x8: /* TM */
         return check_tm_pred8x8_mode(mode, mb_x, mb_y);
     }
     return mode;
@@ -1007,7 +1031,8 @@ int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf
         return !mb_x ? DC_129_PRED : mode;
     case TM_VP8_PRED:
         return check_tm_pred4x4_mode(mode, mb_x, mb_y);
-    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
+    case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
+                   * as 16x16/8x8 DC */
     case DIAG_DOWN_RIGHT_PRED:
     case VERT_RIGHT_PRED:
     case HOR_DOWN_PRED:
@@ -1025,10 +1050,10 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
     int x, y, mode, nnz;
     uint32_t tr;
 
-    // for the first row, we need to run xchg_mb_border to init the top edge to 127
-    // otherwise, skip it if we aren't going to deblock
+    /* for the first row, we need to run xchg_mb_border to init the top edge
+     * to 127 otherwise, skip it if we aren't going to deblock */
     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
-        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                        s->filter.simple, 1);
 
@@ -1046,10 +1071,9 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
 
         // if we're on the right edge of the frame, said edge is extended
         // from the top macroblock
-        if (mb_y &&
-            mb_x == s->mb_width-1) {
-            tr = tr_right[-1]*0x01010101u;
-            tr_right = (uint8_t *)&tr;
+        if (mb_y && mb_x == s->mb_width - 1) {
+            tr       = tr_right[-1] * 0x01010101u;
+            tr_right = (uint8_t *) &tr;
         }
 
         if (mb->skip)
@@ -1059,27 +1083,29 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
             uint8_t *topright = ptr + 4 - s->linesize;
             for (x = 0; x < 4; x++) {
                 int copy = 0, linesize = s->linesize;
-                uint8_t *dst = ptr+4*x;
-                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
+                uint8_t *dst = ptr + 4 * x;
+                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
 
                 if ((y == 0 || x == 3) && mb_y == 0) {
                     topright = tr_top;
                 } else if (x == 3)
                     topright = tr_right;
 
-                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
+                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x],
+                                                        mb_x + x, mb_y + y,
+                                                        &copy);
                 if (copy) {
-                    dst = copy_dst + 12;
+                    dst      = copy_dst + 12;
                     linesize = 8;
                     if (!(mb_y + y)) {
                         copy_dst[3] = 127U;
-                        AV_WN32A(copy_dst+4, 127U * 0x01010101U);
+                        AV_WN32A(copy_dst + 4, 127U * 0x01010101U);
                     } else {
-                        AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
+                        AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
                         if (!(mb_x + x)) {
                             copy_dst[3] = 129U;
                         } else {
-                            copy_dst[3] = ptr[4*x-s->linesize-1];
+                            copy_dst[3] = ptr[4 * x - s->linesize - 1];
                         }
                     }
                     if (!(mb_x + x)) {
@@ -1088,31 +1114,33 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                         copy_dst[27] =
                         copy_dst[35] = 129U;
                     } else {
-                        copy_dst[11] = ptr[4*x              -1];
-                        copy_dst[19] = ptr[4*x+s->linesize  -1];
-                        copy_dst[27] = ptr[4*x+s->linesize*2-1];
-                        copy_dst[35] = ptr[4*x+s->linesize*3-1];
+                        copy_dst[11] = ptr[4 * x                   - 1];
+                        copy_dst[19] = ptr[4 * x + s->linesize     - 1];
+                        copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
+                        copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
                     }
                 }
                 s->hpc.pred4x4[mode](dst, topright, linesize);
                 if (copy) {
-                    AV_COPY32(ptr+4*x              , copy_dst+12);
-                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
-                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
-                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
+                    AV_COPY32(ptr + 4 * x,                   copy_dst + 12);
+                    AV_COPY32(ptr + 4 * x + s->linesize,     copy_dst + 20);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
+                    AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
                 }
 
                 nnz = td->non_zero_count_cache[y][x];
                 if (nnz) {
                     if (nnz == 1)
-                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
+                        s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
+                                                  td->block[y][x], s->linesize);
                     else
-                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
+                        s->vp8dsp.vp8_idct_add(ptr + 4 * x,
+                                               td->block[y][x], s->linesize);
                 }
                 topright += 4;
             }
 
-            ptr   += 4*s->linesize;
+            ptr      += 4 * s->linesize;
             intra4x4 += 4;
         }
     }
@@ -1122,7 +1150,7 @@ void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
 
     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
-        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+        xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                        s->filter.simple, 0);
 }
@@ -1137,18 +1165,18 @@ static const uint8_t subpel_idx[3][8] = {
 /**
  * luma MC function
  *
- * @param s VP8 decoding context
- * @param dst target buffer for block data at block position
- * @param ref reference picture buffer at origin (0, 0)
- * @param mv motion vector (relative to block position) to get pixel data from
- * @param x_off horizontal position of block from origin (0, 0)
- * @param y_off vertical position of block from origin (0, 0)
- * @param block_w width of block (16, 8 or 4)
- * @param block_h height of block (always same as block_w)
- * @param width width of src/dst plane data
- * @param height height of src/dst plane data
+ * @param s        VP8 decoding context
+ * @param dst      target buffer for block data at block position
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
  * @param linesize size of a single line of plane data, including padding
- * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
  */
 static av_always_inline
 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
@@ -1162,8 +1190,8 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
     if (AV_RN32A(mv)) {
         int src_linesize = linesize;
 
-        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
-        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
+        int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
+        int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
 
         x_off += mv->x >> 2;
         y_off += mv->y >> 2;
@@ -1176,46 +1204,50 @@ void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                      src - my_idx * linesize - mx_idx,
                                      EDGE_EMU_LINESIZE, linesize,
-                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
-                                     x_off - mx_idx, y_off - my_idx, width, height);
+                                     block_w + subpel_idx[1][mx],
+                                     block_h + subpel_idx[1][my],
+                                     x_off - mx_idx, y_off - my_idx,
+                                     width, height);
             src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
             src_linesize = EDGE_EMU_LINESIZE;
         }
         mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
     } else {
         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
-        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
+        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
+                      linesize, block_h, 0, 0);
     }
 }
 
 /**
  * chroma MC function
  *
- * @param s VP8 decoding context
- * @param dst1 target buffer for block data at block position (U plane)
- * @param dst2 target buffer for block data at block position (V plane)
- * @param ref reference picture buffer at origin (0, 0)
- * @param mv motion vector (relative to block position) to get pixel data from
- * @param x_off horizontal position of block from origin (0, 0)
- * @param y_off vertical position of block from origin (0, 0)
- * @param block_w width of block (16, 8 or 4)
- * @param block_h height of block (always same as block_w)
- * @param width width of src/dst plane data
- * @param height height of src/dst plane data
+ * @param s        VP8 decoding context
+ * @param dst1     target buffer for block data at block position (U plane)
+ * @param dst2     target buffer for block data at block position (V plane)
+ * @param ref      reference picture buffer at origin (0, 0)
+ * @param mv       motion vector (relative to block position) to get pixel data from
+ * @param x_off    horizontal position of block from origin (0, 0)
+ * @param y_off    vertical position of block from origin (0, 0)
+ * @param block_w  width of block (16, 8 or 4)
+ * @param block_h  height of block (always same as block_w)
+ * @param width    width of src/dst plane data
+ * @param height   height of src/dst plane data
  * @param linesize size of a single line of plane data, including padding
- * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
+ * @param mc_func  motion compensation function pointers (bilinear or sixtap MC)
  */
 static av_always_inline
-void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
-                   ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
-                   int block_w, int block_h, int width, int height, ptrdiff_t linesize,
+void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
+                   uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
+                   int x_off, int y_off, int block_w, int block_h,
+                   int width, int height, ptrdiff_t linesize,
                    vp8_mc_func mc_func[3][3])
 {
     uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
 
     if (AV_RN32A(mv)) {
-        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
-        int my = mv->y&7, my_idx = subpel_idx[0][my];
+        int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
+        int my = mv->y & 7, my_idx = subpel_idx[0][my];
 
         x_off += mv->x >> 3;
         y_off += mv->y >> 3;
@@ -1239,7 +1271,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
                                      EDGE_EMU_LINESIZE, linesize,
                                      block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                      x_off - mx_idx, y_off - my_idx, width, height);
-            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE* my_idx;
+            src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
             mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
         } else {
             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
@@ -1255,8 +1287,7 @@ void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst
 static av_always_inline
 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                  ThreadFrame *ref_frame, int x_off, int y_off,
-                 int bx_off, int by_off,
-                 int block_w, int block_h,
+                 int bx_off, int by_off, int block_w, int block_h,
                  int width, int height, VP56mv *mv)
 {
     VP56mv uvmv = *mv;
@@ -1272,10 +1303,14 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
         uvmv.x &= ~7;
         uvmv.y &= ~7;
     }
-    x_off   >>= 1; y_off   >>= 1;
-    bx_off  >>= 1; by_off  >>= 1;
-    width   >>= 1; height  >>= 1;
-    block_w >>= 1; block_h >>= 1;
+    x_off   >>= 1;
+    y_off   >>= 1;
+    bx_off  >>= 1;
+    by_off  >>= 1;
+    width   >>= 1;
+    height  >>= 1;
+    block_w >>= 1;
+    block_h >>= 1;
     vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                   &uvmv, x_off + bx_off, y_off + by_off,
@@ -1284,22 +1319,24 @@ void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
 }
 
 /* Fetch pixels for estimated mv 4 macroblocks ahead.
- * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
-static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
+ * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
+static av_always_inline
+void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+                     int mb_xy, int ref)
 {
     /* Don't prefetch refs that haven't been used very often this frame. */
-    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
+    if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
         int x_off = mb_x << 4, y_off = mb_y << 4;
-        int mx = (mb->mv.x>>2) + x_off + 8;
-        int my = (mb->mv.y>>2) + y_off;
-        uint8_t **src= s->framep[ref]->tf.f->data;
-        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
+        int mx = (mb->mv.x >> 2) + x_off + 8;
+        int my = (mb->mv.y >> 2) + y_off;
+        uint8_t **src = s->framep[ref]->tf.f->data;
+        int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
         /* For threading, a ff_thread_await_progress here might be useful, but
          * it actually slows down the decoder. Since a bad prefetch doesn't
          * generate bad decoder output, we don't run it here. */
-        s->vdsp.prefetch(src[0]+off, s->linesize, 4);
-        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
-        s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
+        s->vdsp.prefetch(src[0] + off, s->linesize, 4);
+        off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
+        s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
     }
 }
 
@@ -1311,7 +1348,7 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                    VP8Macroblock *mb, int mb_x, int mb_y)
 {
     int x_off = mb_x << 4, y_off = mb_y << 4;
-    int width = 16*s->mb_width, height = 16*s->mb_height;
+    int width = 16 * s->mb_width, height = 16 * s->mb_height;
     ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
     VP56mv *bmv = mb->bmv;
 
@@ -1327,35 +1364,38 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
         /* Y */
         for (y = 0; y < 4; y++) {
             for (x = 0; x < 4; x++) {
-                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
-                            ref, &bmv[4*y + x],
-                            4*x + x_off, 4*y + y_off, 4, 4,
+                vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
+                            ref, &bmv[4 * y + x],
+                            4 * x + x_off, 4 * y + y_off, 4, 4,
                             width, height, s->linesize,
                             s->put_pixels_tab[2]);
             }
         }
 
         /* U/V */
-        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
+        x_off  >>= 1;
+        y_off  >>= 1;
+        width  >>= 1;
+        height >>= 1;
         for (y = 0; y < 2; y++) {
             for (x = 0; x < 2; x++) {
-                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
-                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
-                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
-                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
-                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
-                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
-                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
-                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
-                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
-                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
+                uvmv.x = mb->bmv[2 * y       * 4 + 2 * x    ].x +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].x +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
+                uvmv.y = mb->bmv[2 * y       * 4 + 2 * x    ].y +
+                         mb->bmv[2 * y       * 4 + 2 * x + 1].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x    ].y +
+                         mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
+                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2;
+                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2;
                 if (s->profile == 3) {
                     uvmv.x &= ~7;
                     uvmv.y &= ~7;
                 }
-                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
-                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
-                              4*x + x_off, 4*y + y_off, 4, 4,
+                vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
+                              dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
+                              &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
                               width, height, s->uvlinesize,
                               s->put_pixels_tab[2]);
             }
@@ -1387,8 +1427,8 @@ void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
     }
 }
 
-static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
-                                     uint8_t *dst[3], VP8Macroblock *mb)
+static av_always_inline
+void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
 {
     int x, y, ch;
 
@@ -1397,12 +1437,16 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
         for (y = 0; y < 4; y++) {
             uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
             if (nnz4) {
-                if (nnz4&~0x01010101) {
+                if (nnz4 & ~0x01010101) {
                     for (x = 0; x < 4; x++) {
-                        if ((uint8_t)nnz4 == 1)
-                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
-                        else if((uint8_t)nnz4 > 1)
-                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
+                                                      td->block[y][x],
+                                                      s->linesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
+                                                   td->block[y][x],
+                                                   s->linesize);
                         nnz4 >>= 8;
                         if (!nnz4)
                             break;
@@ -1411,36 +1455,42 @@ static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
                 }
             }
-            y_dst += 4*s->linesize;
+            y_dst += 4 * s->linesize;
         }
     }
 
     for (ch = 0; ch < 2; ch++) {
-        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
+        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
         if (nnz4) {
-            uint8_t *ch_dst = dst[1+ch];
-            if (nnz4&~0x01010101) {
+            uint8_t *ch_dst = dst[1 + ch];
+            if (nnz4 & ~0x01010101) {
                 for (y = 0; y < 2; y++) {
                     for (x = 0; x < 2; x++) {
-                        if ((uint8_t)nnz4 == 1)
-                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
-                        else if((uint8_t)nnz4 > 1)
-                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
+                        if ((uint8_t) nnz4 == 1)
+                            s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
+                                                      td->block[4 + ch][(y << 1) + x],
+                                                      s->uvlinesize);
+                        else if ((uint8_t) nnz4 > 1)
+                            s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
+                                                   td->block[4 + ch][(y << 1) + x],
+                                                   s->uvlinesize);
                         nnz4 >>= 8;
                         if (!nnz4)
                             goto chroma_idct_end;
                     }
-                    ch_dst += 4*s->uvlinesize;
+                    ch_dst += 4 * s->uvlinesize;
                 }
             } else {
-                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
+                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
             }
         }
-chroma_idct_end: ;
+chroma_idct_end:
+        ;
     }
 }
 
-static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
+static av_always_inline
+void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f)
 {
     int interior_limit, filter_level;
 
@@ -1467,10 +1517,13 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m
 
     f->filter_level = filter_level;
     f->inner_limit = interior_limit;
-    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
+    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 ||
+                      mb->mode == VP8_MVMODE_SPLIT;
 }
 
-static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline
+void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
+               int mb_x, int mb_y)
 {
     int mbedge_lim, bedge_lim, hev_thresh;
     int filter_level = f->filter_level;
@@ -1492,82 +1545,84 @@ static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Filter
     if (!filter_level)
         return;
 
-     bedge_lim = 2*filter_level + inner_limit;
+    bedge_lim  = 2 * filter_level + inner_limit;
     mbedge_lim = bedge_lim + 4;
 
     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
 
     if (mb_x) {
-        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
+        s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
                                        mbedge_lim, inner_limit, hev_thresh);
-        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
+        s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
                                        mbedge_lim, inner_limit, hev_thresh);
     }
 
     if (inner_filter) {
-        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  4, linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] +  8, linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
+        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
-                                             uvlinesize,  bedge_lim,
+        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] +  4, dst[2] + 4,
+                                             uvlinesize, bedge_lim,
                                              inner_limit, hev_thresh);
     }
 
     if (mb_y) {
-        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
+        s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
                                        mbedge_lim, inner_limit, hev_thresh);
-        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
+        s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
                                        mbedge_lim, inner_limit, hev_thresh);
     }
 
     if (inner_filter) {
-        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
-                                             linesize,    bedge_lim,
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  4 * linesize,
+                                             linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
-                                             linesize,    bedge_lim,
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] +  8 * linesize,
+                                             linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
-                                             linesize,    bedge_lim,
+        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
+                                             linesize, bedge_lim,
                                              inner_limit, hev_thresh);
-        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
-                                             dst[2] + 4 * uvlinesize,
-                                             uvlinesize,  bedge_lim,
+        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] +  4 * uvlinesize,
+                                             dst[2] +  4 * uvlinesize,
+                                             uvlinesize, bedge_lim,
                                              inner_limit, hev_thresh);
     }
 }
 
-static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
+static av_always_inline
+void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
+                      int mb_x, int mb_y)
 {
     int mbedge_lim, bedge_lim;
     int filter_level = f->filter_level;
-    int inner_limit = f->inner_limit;
+    int inner_limit  = f->inner_limit;
     int inner_filter = f->inner_filter;
-    int linesize = s->linesize;
+    int linesize     = s->linesize;
 
     if (!filter_level)
         return;
 
-     bedge_lim = 2*filter_level + inner_limit;
+    bedge_lim  = 2 * filter_level + inner_limit;
     mbedge_lim = bedge_lim + 4;
 
     if (mb_x)
         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
     if (inner_filter) {
-        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
-        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
-        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  4, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst +  8, linesize, bedge_lim);
+        s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
     }
 
     if (mb_y)
         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
     if (inner_filter) {
-        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
-        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
-        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  4 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst +  8 * linesize, linesize, bedge_lim);
+        s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
     }
 }
 
@@ -1581,16 +1636,18 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
     s->mv_min.y = -MARGIN;
     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
-        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
-        int mb_xy = mb_y*s->mb_width;
+        VP8Macroblock *mb = s->macroblocks_base +
+                            ((s->mb_width + 1) * (mb_y + 1) + 1);
+        int mb_xy = mb_y * s->mb_width;
 
-        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
 
         s->mv_min.x = -MARGIN;
         s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
             if (mb_y == 0)
-                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
+                AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
+                         DC_PRED * 0x01010101);
             decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                            prev_frame && prev_frame->seg_map ?
                            prev_frame->seg_map->data + mb_xy : NULL, 1);
@@ -1603,37 +1660,40 @@ static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
 }
 
 #if HAVE_THREADS
-#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
-    do {\
-        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
-        if (otd->thread_mb_pos < tmp) {\
-            pthread_mutex_lock(&otd->lock);\
-            td->wait_mb_pos = tmp;\
-            do {\
-                if (otd->thread_mb_pos >= tmp)\
-                    break;\
-                pthread_cond_wait(&otd->cond, &otd->lock);\
-            } while (1);\
-            td->wait_mb_pos = INT_MAX;\
-            pthread_mutex_unlock(&otd->lock);\
-        }\
-    } while(0);
-
-#define update_pos(td, mb_y, mb_x)\
-    do {\
-    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
-    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
-    int is_null          = (next_td == NULL) || (prev_td == NULL);\
-    int pos_check        = (is_null) ? 1 :\
-                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
-                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
-    td->thread_mb_pos = pos;\
-    if (sliced_threading && pos_check) {\
-        pthread_mutex_lock(&td->lock);\
-        pthread_cond_broadcast(&td->cond);\
-        pthread_mutex_unlock(&td->lock);\
-    }\
-    } while(0);
+#define check_thread_pos(td, otd, mb_x_check, mb_y_check)                     \
+    do {                                                                      \
+        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);                 \
+        if (otd->thread_mb_pos < tmp) {                                       \
+            pthread_mutex_lock(&otd->lock);                                   \
+            td->wait_mb_pos = tmp;                                            \
+            do {                                                              \
+                if (otd->thread_mb_pos >= tmp)                                \
+                    break;                                                    \
+                pthread_cond_wait(&otd->cond, &otd->lock);                    \
+            } while (1);                                                      \
+            td->wait_mb_pos = INT_MAX;                                        \
+            pthread_mutex_unlock(&otd->lock);                                 \
+        }                                                                     \
+    } while (0);
+
+#define update_pos(td, mb_y, mb_x)                                            \
+    do {                                                                      \
+        int pos              = (mb_y << 16) | (mb_x & 0xFFFF);                \
+        int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
+                               (num_jobs > 1);                                \
+        int is_null          = (next_td == NULL) || (prev_td == NULL);        \
+        int pos_check        = (is_null) ? 1                                  \
+                                         : (next_td != td &&                  \
+                                            pos >= next_td->wait_mb_pos) ||   \
+                                           (prev_td != td &&                  \
+                                            pos >= prev_td->wait_mb_pos);     \
+        td->thread_mb_pos = pos;                                              \
+        if (sliced_threading && pos_check) {                                  \
+            pthread_mutex_lock(&td->lock);                                    \
+            pthread_cond_broadcast(&td->cond);                                \
+            pthread_mutex_unlock(&td->lock);                                  \
+        }                                                                     \
+    } while (0);
 #else
 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
 #define update_pos(td, mb_y, mb_x)
@@ -1644,51 +1704,58 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
 {
     VP8Context *s = avctx->priv_data;
     VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
-    int mb_y = td->thread_mb_pos>>16;
-    int mb_x, mb_xy = mb_y*s->mb_width;
+    int mb_y = td->thread_mb_pos >> 16;
+    int mb_x, mb_xy = mb_y * s->mb_width;
     int num_jobs = s->num_jobs;
     VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
-    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
+    VP56RangeCoder *c  = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
     VP8Macroblock *mb;
     uint8_t *dst[3] = {
-        curframe->tf.f->data[0] + 16*mb_y*s->linesize,
-        curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
-        curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
+        curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
+        curframe->tf.f->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->tf.f->data[2] +  8 * mb_y * s->uvlinesize
     };
-    if (mb_y == 0) prev_td = td;
-    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
-    if (mb_y == s->mb_height-1) next_td = td;
-    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
     if (s->mb_layout == 1)
-        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
     else {
         // Make sure the previous frame has read its segmentation map,
         // if we re-use the same map.
         if (prev_frame && s->segmentation.enabled &&
             !s->segmentation.update_map)
             ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
-        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
         memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
-        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
+        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
     }
 
     memset(td->left_nnz, 0, sizeof(td->left_nnz));
 
     s->mv_min.x = -MARGIN;
-    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
+    s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
         // Wait for previous thread to read mb_x+2, and reach mb_y-1.
         if (prev_td != td) {
             if (threadnr != 0) {
-                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
+                check_thread_pos(td, prev_td, mb_x + 1, mb_y - 1);
             } else {
-                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
+                check_thread_pos(td, prev_td,
+                                 (s->mb_width + 3) + (mb_x + 1), mb_y - 1);
             }
         }
 
-        s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
-        s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
+        s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
+                         s->linesize, 4);
+        s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
+                         dst[2] - dst[1], 2);
 
         if (!s->mb_layout)
             decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
@@ -1713,7 +1780,8 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
             AV_ZERO64(td->left_nnz);
             AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
 
-            // Reset DC block predictors if they would exist if the mb had coefficients
+            /* Reset DC block predictors if they would exist
+             * if the mb had coefficients */
             if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
                 td->left_nnz[8]     = 0;
                 s->top_nnz[mb_x][8] = 0;
@@ -1723,23 +1791,25 @@ static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
         if (s->deblock_filter)
             filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
 
-        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
+        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
             if (s->filter.simple)
-                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
             else
-                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
         }
 
         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
 
-        dst[0] += 16;
-        dst[1] += 8;
-        dst[2] += 8;
+        dst[0]      += 16;
+        dst[1]      += 8;
+        dst[2]      += 8;
         s->mv_min.x -= 64;
         s->mv_max.x -= 64;
 
-        if (mb_x == s->mb_width+1) {
-            update_pos(td, mb_y, s->mb_width+3);
+        if (mb_x == s->mb_width + 1) {
+            update_pos(td, mb_y, s->mb_width + 3);
         } else {
             update_pos(td, mb_y, mb_x);
         }
@@ -1751,41 +1821,46 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
 {
     VP8Context *s = avctx->priv_data;
     VP8ThreadData *td = &s->thread_data[threadnr];
-    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
+    int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
     AVFrame *curframe = s->curframe->tf.f;
     VP8Macroblock *mb;
     VP8ThreadData *prev_td, *next_td;
     uint8_t *dst[3] = {
-        curframe->data[0] + 16*mb_y*s->linesize,
-        curframe->data[1] +  8*mb_y*s->uvlinesize,
-        curframe->data[2] +  8*mb_y*s->uvlinesize
+        curframe->data[0] + 16 * mb_y * s->linesize,
+        curframe->data[1] +  8 * mb_y * s->uvlinesize,
+        curframe->data[2] +  8 * mb_y * s->uvlinesize
     };
 
     if (s->mb_layout == 1)
-        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
+        mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
     else
-        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
+        mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
 
-    if (mb_y == 0) prev_td = td;
-    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
-    if (mb_y == s->mb_height-1) next_td = td;
-    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
+    if (mb_y == 0)
+        prev_td = td;
+    else
+        prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
+    if (mb_y == s->mb_height - 1)
+        next_td = td;
+    else
+        next_td = &s->thread_data[(jobnr + 1) % num_jobs];
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
         VP8FilterStrength *f = &td->filter_strength[mb_x];
-        if (prev_td != td) {
-            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
-        }
+        if (prev_td != td)
+            check_thread_pos(td, prev_td,
+                             (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
         if (next_td != td)
-            if (next_td != &s->thread_data[0]) {
-                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
-            }
+            if (next_td != &s->thread_data[0])
+                check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
 
         if (num_jobs == 1) {
             if (s->filter.simple)
-                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 NULL, NULL, s->linesize, 0, 1);
             else
-                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
+                backup_mb_border(s->top_border[mb_x + 1], dst[0],
+                                 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
         }
 
         if (s->filter.simple)
@@ -1796,7 +1871,7 @@ static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
         dst[1] += 8;
         dst[2] += 8;
 
-        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
+        update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
     }
 }
 
@@ -1808,10 +1883,12 @@ static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
     VP8ThreadData *next_td = NULL, *prev_td = NULL;
     VP8Frame *curframe = s->curframe;
     int mb_y, num_jobs = s->num_jobs;
+
     td->thread_nr = threadnr;
     for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
-        if (mb_y >= s->mb_height) break;
-        td->thread_mb_pos = mb_y<<16;
+        if (mb_y >= s->mb_height)
+            break;
+        td->thread_mb_pos = mb_y << 16;
         vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
         if (s->deblock_filter)
             vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
@@ -1840,11 +1917,12 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
 
     prev_frame = s->framep[VP56_FRAME_CURRENT];
 
-    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
-                                || s->update_altref == VP56_FRAME_CURRENT;
+    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
+                 s->update_altref == VP56_FRAME_CURRENT;
 
-    skip_thresh = !referenced ? AVDISCARD_NONREF :
-                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
+    skip_thresh = !referenced ? AVDISCARD_NONREF
+                              : !s->keyframe ? AVDISCARD_NONKEY
+                                             : AVDISCARD_ALL;
 
     if (avctx->skip_frame >= skip_thresh) {
         s->invisible = 1;
@@ -1858,7 +1936,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
         if (s->frames[i].tf.f->data[0] &&
             &s->frames[i] != prev_frame &&
             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
-            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
             vp8_release_frame(s, &s->frames[i]);
 
@@ -1866,7 +1944,7 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     for (i = 0; i < 5; i++)
         if (&s->frames[i] != prev_frame &&
             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
-            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
+            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN]   &&
             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
             curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
             break;
@@ -1878,57 +1956,61 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     if (curframe->tf.f->data[0])
         vp8_release_frame(s, curframe);
 
-    // Given that arithmetic probabilities are updated every frame, it's quite likely
-    // that the values we have on a random interframe are complete junk if we didn't
-    // start decode on a keyframe. So just don't display anything rather than junk.
+    /* Given that arithmetic probabilities are updated every frame, it's quite
+     * likely that the values we have on a random interframe are complete
+     * junk if we didn't start decode on a keyframe. So just don't display
+     * anything rather than junk. */
     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
-                         !s->framep[VP56_FRAME_GOLDEN] ||
+                         !s->framep[VP56_FRAME_GOLDEN]   ||
                          !s->framep[VP56_FRAME_GOLDEN2])) {
-        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
+        av_log(avctx, AV_LOG_WARNING,
+               "Discarding interframe without a prior keyframe!\n");
         ret = AVERROR_INVALIDDATA;
         goto err;
     }
 
     curframe->tf.f->key_frame = s->keyframe;
-    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
+    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
+                                            : AV_PICTURE_TYPE_P;
     if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
         goto err;
     }
 
     // check if golden and altref are swapped
-    if (s->update_altref != VP56_FRAME_NONE) {
-        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
-    } else {
-        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
-    }
-    if (s->update_golden != VP56_FRAME_NONE) {
-        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
-    } else {
-        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
-    }
-    if (s->update_last) {
+    if (s->update_altref != VP56_FRAME_NONE)
+        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
+    else
+        s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
+
+    if (s->update_golden != VP56_FRAME_NONE)
+        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
+    else
+        s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
+
+    if (s->update_last)
         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
-    } else {
+    else
         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
-    }
-    s->next_framep[VP56_FRAME_CURRENT]      = curframe;
+
+    s->next_framep[VP56_FRAME_CURRENT] = curframe;
 
     ff_thread_finish_setup(avctx);
 
     s->linesize   = curframe->tf.f->linesize[0];
     s->uvlinesize = curframe->tf.f->linesize[1];
 
-    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
-    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
+    memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
+    /* Zero macroblock structures for top/top-left prediction
+     * from outside the frame. */
     if (!s->mb_layout)
-        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
+        memset(s->macroblocks + s->mb_height * 2 - 1, 0,
+               (s->mb_width + 1) * sizeof(*s->macroblocks));
     if (!s->mb_layout && s->keyframe)
-        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
+        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
 
     memset(s->ref_count, 0, sizeof(s->ref_count));
 
-
     if (s->mb_layout == 1) {
         // Make sure the previous frame has read its segmentation map,
         // if we re-use the same map.
@@ -1949,9 +2031,10 @@ int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
     s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
     for (i = 0; i < MAX_THREADS; i++) {
         s->thread_data[i].thread_mb_pos = 0;
-        s->thread_data[i].wait_mb_pos = INT_MAX;
+        s->thread_data[i].wait_mb_pos   = INT_MAX;
     }
-    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
+    avctx->execute2(avctx, vp8_decode_mb_row_sliced,
+                    s->thread_data, NULL, num_jobs);
 
     ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
@@ -1965,7 +2048,7 @@ skip_decode:
     if (!s->invisible) {
         if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
             return ret;
-        *got_frame      = 1;
+        *got_frame = 1;
     }
 
     return avpkt->size;
@@ -2033,10 +2116,10 @@ static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
     return 0;
 }
 
-#define REBASE(pic) \
-    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
+#define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
 
-static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+static int vp8_decode_update_thread_context(AVCodecContext *dst,
+                                            const AVCodecContext *src)
 {
     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
     int i;
@@ -2048,9 +2131,9 @@ static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo
         s->mb_height = s_src->mb_height;
     }
 
-    s->prob[0] = s_src->prob[!s_src->update_probabilities];
+    s->prob[0]      = s_src->prob[!s_src->update_probabilities];
     s->segmentation = s_src->segmentation;
-    s->lf_delta = s_src->lf_delta;
+    s->lf_delta     = s_src->lf_delta;
     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
 
     for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h
index 6d864b9..1eb0512 100644
--- a/libavcodec/vp8.h
+++ b/libavcodec/vp8.h
@@ -28,14 +28,15 @@
 
 #include "libavutil/buffer.h"
 
-#include "vp56.h"
-#include "vp8dsp.h"
 #include "h264pred.h"
 #include "thread.h"
+#include "vp56.h"
+#include "vp8dsp.h"
+
 #if HAVE_PTHREADS
-#include <pthread.h>
+#   include <pthread.h>
 #elif HAVE_W32THREADS
-#include "compat/w32pthreads.h"
+#   include "compat/w32pthreads.h"
 #endif
 
 #define VP8_MAX_QUANT 127
@@ -82,7 +83,7 @@ typedef struct VP8FilterStrength {
 
 typedef struct VP8Macroblock {
     uint8_t skip;
-    // todo: make it possible to check for at least (i4x4 or split_mv)
+    // TODO: make it possible to check for at least (i4x4 or split_mv)
     // in one op. are others needed?
     uint8_t mode;
     uint8_t ref_frame;
@@ -116,7 +117,7 @@ typedef struct VP8ThreadData {
     int thread_nr;
 #if HAVE_THREADS
     pthread_mutex_t lock;
-    pthread_cond_t  cond;
+    pthread_cond_t cond;
 #endif
     int thread_mb_pos; // (mb_y << 16) | (mb_x & 0xFFFF)
     int wait_mb_pos; // What the current thread is waiting on.
@@ -203,7 +204,7 @@ typedef struct VP8Context {
          * [7]   - split mv
          *  i16x16 modes never have any adjustment
          */
-        int8_t mode[VP8_MVMODE_SPLIT+1];
+        int8_t mode[VP8_MVMODE_SPLIT + 1];
 
         /**
          * filter strength adjustment for macroblocks that reference:
@@ -215,7 +216,7 @@ typedef struct VP8Context {
         int8_t ref[4];
     } lf_delta;
 
-    uint8_t (*top_border)[16+8+8];
+    uint8_t (*top_border)[16 + 8 + 8];
     uint8_t (*top_nnz)[9];
 
     VP56RangeCoder c;   ///< header context, includes mb modes and motion vectors
@@ -234,7 +235,7 @@ typedef struct VP8Context {
         uint8_t golden;
         uint8_t pred16x16[4];
         uint8_t pred8x8c[3];
-        uint8_t token[4][16][3][NUM_DCT_TOKENS-1];
+        uint8_t token[4][16][3][NUM_DCT_TOKENS - 1];
         uint8_t mvc[2][19];
     } prob[2];
 
diff --git a/libavcodec/vp8_parser.c b/libavcodec/vp8_parser.c
index 196de83..8f6459c 100644
--- a/libavcodec/vp8_parser.c
+++ b/libavcodec/vp8_parser.c
@@ -21,18 +21,19 @@
 #include "parser.h"
 
 static int parse(AVCodecParserContext *s,
-                           AVCodecContext *avctx,
-                           const uint8_t **poutbuf, int *poutbuf_size,
-                           const uint8_t *buf, int buf_size)
+                 AVCodecContext *avctx,
+                 const uint8_t **poutbuf, int *poutbuf_size,
+                 const uint8_t *buf, int buf_size)
 {
-    s->pict_type= (buf[0]&0x01) ? AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_I;
+    s->pict_type = (buf[0] & 0x01) ? AV_PICTURE_TYPE_P
+                                   : AV_PICTURE_TYPE_I;
 
-    *poutbuf = buf;
+    *poutbuf      = buf;
     *poutbuf_size = buf_size;
     return buf_size;
 }
 
 AVCodecParser ff_vp8_parser = {
-    .codec_ids      = { AV_CODEC_ID_VP8 },
-    .parser_parse   = parse,
+    .codec_ids    = { AV_CODEC_ID_VP8 },
+    .parser_parse = parse,
 };
diff --git a/libavcodec/vp8data.h b/libavcodec/vp8data.h
index a48b0f6..2a17cc2 100644
--- a/libavcodec/vp8data.h
+++ b/libavcodec/vp8data.h
@@ -30,28 +30,25 @@
 #include "vp8.h"
 #include "h264pred.h"
 
-static const uint8_t vp8_pred4x4_mode[] =
-{
+static const uint8_t vp8_pred4x4_mode[] = {
     [DC_PRED8x8]    = DC_PRED,
     [VERT_PRED8x8]  = VERT_PRED,
     [HOR_PRED8x8]   = HOR_PRED,
     [PLANE_PRED8x8] = TM_VP8_PRED,
 };
 
-static const int8_t vp8_pred16x16_tree_intra[4][2] =
-{
-    { -MODE_I4x4, 1 },                      // '0'
-     { 2, 3 },
-      {  -DC_PRED8x8,  -VERT_PRED8x8 },     // '100', '101'
-      { -HOR_PRED8x8, -PLANE_PRED8x8 },     // '110', '111'
+static const int8_t vp8_pred16x16_tree_intra[4][2] = {
+    {   -MODE_I4x4,              1 }, // '0'
+    {            2,              3 },
+    {  -DC_PRED8x8,  -VERT_PRED8x8 }, // '100', '101'
+    { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111'
 };
 
-static const int8_t vp8_pred16x16_tree_inter[4][2] =
-{
-    { -DC_PRED8x8, 1 },                     // '0'
-     { 2, 3 },
-      {  -VERT_PRED8x8, -HOR_PRED8x8 },     // '100', '101'
-      { -PLANE_PRED8x8, -MODE_I4x4 },       // '110', '111'
+static const int8_t vp8_pred16x16_tree_inter[4][2] = {
+    {    -DC_PRED8x8,            1 }, // '0'
+    {              2,            3 },
+    {  -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101'
+    { -PLANE_PRED8x8,   -MODE_I4x4 }, // '110', '111'
 };
 
 static const int vp8_mode_contexts[6][4] = {
@@ -64,26 +61,26 @@ static const int vp8_mode_contexts[6][4] = {
 };
 
 static const uint8_t vp8_mbsplits[5][16] = {
-    {  0,  0,  0,  0,  0,  0,  0,  0,
-       1,  1,  1,  1,  1,  1,  1,  1  },
-    {  0,  0,  1,  1,  0,  0,  1,  1,
-       0,  0,  1,  1,  0,  0,  1,  1  },
-    {  0,  0,  1,  1,  0,  0,  1,  1,
-       2,  2,  3,  3,  2,  2,  3,  3  },
-    {  0,  1,  2,  3,  4,  5,  6,  7,
-       8,  9, 10, 11, 12, 13, 14, 15  },
-    {  0,  0,  0,  0,  0,  0,  0,  0,
-       0,  0,  0,  0,  0,  0,  0,  0  }
+    { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,  1,  1,  1,  1,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,  1,  1,  0,  0,  1,  1 },
+    { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2,  3,  3,  2,  2,  3,  3 },
+    { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0 }
 };
 
 static const uint8_t vp8_mbfirstidx[4][16] = {
-    {  0,  8 }, {  0,  2 }, {  0,  2,  8,  10 },
-    {  0,  1,  2,  3,  4,  5,  6,  7,
-       8,  9, 10, 11, 12, 13, 14, 15 }
+    { 0, 8 },
+    { 0, 2 },
+    { 0, 2, 8, 10 },
+    { 0, 1, 2,  3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }
 };
 
-static const uint8_t vp8_mbsplit_count[4] = {   2,   2,   4,  16 };
-static const uint8_t vp8_mbsplit_prob[3]  = { 110, 111, 150 };
+static const uint8_t vp8_mbsplit_count[4] = {
+    2, 2, 4, 16
+};
+static const uint8_t vp8_mbsplit_prob[3] = {
+    110, 111, 150
+};
 
 static const uint8_t vp8_submv_prob[5][3] = {
     { 147, 136,  18 },
@@ -93,39 +90,42 @@ static const uint8_t vp8_submv_prob[5][3] = {
     { 208,   1,   1 }
 };
 
-static const uint8_t vp8_pred16x16_prob_intra[4] = { 145, 156, 163, 128 };
-static const uint8_t vp8_pred16x16_prob_inter[4] = { 112,  86, 140,  37 };
-
-static const int8_t vp8_pred4x4_tree[9][2] =
-{
-    { -DC_PRED, 1 },                                    // '0'
-     { -TM_VP8_PRED, 2 },                               // '10'
-      { -VERT_PRED, 3 },                                // '110'
-       { 4, 6 },
-        { -HOR_PRED, 5 },                               // '11100'
-         { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED },   // '111010', '111011'
-        { -DIAG_DOWN_LEFT_PRED, 7 },                    // '11110'
-         { -VERT_LEFT_PRED, 8 },                        // '111110'
-          { -HOR_DOWN_PRED, -HOR_UP_PRED },             // '1111110', '1111111'
+static const uint8_t vp8_pred16x16_prob_intra[4] = {
+    145, 156, 163, 128
+};
+static const uint8_t vp8_pred16x16_prob_inter[4] = {
+    112, 86, 140, 37
 };
 
-static const int8_t vp8_pred8x8c_tree[3][2] =
-{
-    { -DC_PRED8x8, 1 },                 // '0'
-     { -VERT_PRED8x8, 2 },              // '10
-      { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111'
+static const int8_t vp8_pred4x4_tree[9][2] = {
+    {              -DC_PRED,                1 }, // '0'
+    {          -TM_VP8_PRED,                2 }, // '10'
+    {            -VERT_PRED,                3 }, // '110'
+    {                     4,                6 },
+    {             -HOR_PRED,                5 }, // '11100'
+    { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011'
+    {  -DIAG_DOWN_LEFT_PRED,                7 }, // '11110'
+    {       -VERT_LEFT_PRED,                8 }, // '111110'
+    {        -HOR_DOWN_PRED,     -HOR_UP_PRED }, // '1111110', '1111111'
 };
 
-static const uint8_t vp8_pred8x8c_prob_intra[3] = { 142, 114, 183 };
-static const uint8_t vp8_pred8x8c_prob_inter[3] = { 162, 101, 204 };
+static const int8_t vp8_pred8x8c_tree[3][2] = {
+    {   -DC_PRED8x8,              1 },  // '0'
+    { -VERT_PRED8x8,              2 },  // '10
+    {  -HOR_PRED8x8, -PLANE_PRED8x8 },  // '110', '111'
+};
 
-static const uint8_t vp8_pred4x4_prob_inter[9] =
-{
+static const uint8_t vp8_pred8x8c_prob_intra[3] = {
+    142, 114, 183
+};
+static const uint8_t vp8_pred8x8c_prob_inter[3] = {
+    162, 101, 204
+};
+static const uint8_t vp8_pred4x4_prob_inter[9] = {
     120, 90, 79, 133, 87, 85, 80, 111, 151
 };
 
-static const uint8_t vp8_pred4x4_prob_intra[10][10][9] =
-{
+static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = {
     {
         {  39,  53, 200,  87,  26,  21,  43, 232, 171 },
         {  56,  34,  51, 104, 114, 102,  29,  93,  77 },
@@ -248,50 +248,57 @@ static const uint8_t vp8_pred4x4_prob_intra[10][10][9] =
     },
 };
 
-static const int8_t vp8_segmentid_tree[][2] =
-{
-    { 1, 2 },
-     { -0, -1 },    // '00', '01'
-     { -2, -3 },    // '10', '11'
+static const int8_t vp8_segmentid_tree[][2] = {
+    {  1,  2 },
+    { -0, -1 }, // '00', '01'
+    { -2, -3 }, // '10', '11'
 };
 
-static const uint8_t vp8_coeff_band[16] =
-{
+static const uint8_t vp8_coeff_band[16] = {
     0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7
 };
 
 /* Inverse of vp8_coeff_band: mappings of bands to coefficient indexes.
  * Each list is -1-terminated. */
-static const int8_t vp8_coeff_band_indexes[8][10] =
-{
-    {0, -1},
-    {1, -1},
-    {2, -1},
-    {3, -1},
-    {5, -1},
-    {6, -1},
-    {4, 7, 8, 9, 10, 11, 12, 13, 14, -1},
-    {15, -1}
+static const int8_t vp8_coeff_band_indexes[8][10] = {
+    {  0, -1 },
+    {  1, -1 },
+    {  2, -1 },
+    {  3, -1 },
+    {  5, -1 },
+    {  6, -1 },
+    {  4,  7, 8, 9, 10, 11, 12, 13, 14, -1 },
+    { 15, -1 }
 };
 
-static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 };
-static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 };
-static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 };
-static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 };
-static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 };
-static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 };
+static const uint8_t vp8_dct_cat1_prob[] = {
+    159, 0
+};
+static const uint8_t vp8_dct_cat2_prob[] = {
+    165, 145, 0
+};
+static const uint8_t vp8_dct_cat3_prob[] = {
+    173, 148, 140, 0
+};
+static const uint8_t vp8_dct_cat4_prob[] = {
+    176, 155, 140, 135, 0
+};
+static const uint8_t vp8_dct_cat5_prob[] = {
+    180, 157, 141, 134, 130, 0
+};
+static const uint8_t vp8_dct_cat6_prob[] = {
+    254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0
+};
 
 // only used for cat3 and above; cat 1 and 2 are referenced directly
-const uint8_t * const ff_vp8_dct_cat_prob[] =
-{
+const uint8_t *const ff_vp8_dct_cat_prob[] = {
     vp8_dct_cat3_prob,
     vp8_dct_cat4_prob,
     vp8_dct_cat5_prob,
     vp8_dct_cat6_prob,
 };
 
-static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] =
-{
+static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
     {
         {
             { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 },
@@ -462,8 +469,7 @@ static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] =
     },
 };
 
-static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] =
-{
+static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS - 1] = {
     {
         {
             { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 },
@@ -635,15 +641,14 @@ static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] =
 };
 
 // fixme: copied from h264data.h
-static const uint8_t zigzag_scan[16]={
-    0+0*4, 1+0*4, 0+1*4, 0+2*4,
-    1+1*4, 2+0*4, 3+0*4, 2+1*4,
-    1+2*4, 0+3*4, 1+3*4, 2+2*4,
-    3+1*4, 3+2*4, 2+3*4, 3+3*4,
+static const uint8_t zigzag_scan[16] = {
+    0 + 0 * 4, 1 + 0 * 4, 0 + 1 * 4, 0 + 2 * 4,
+    1 + 1 * 4, 2 + 0 * 4, 3 + 0 * 4, 2 + 1 * 4,
+    1 + 2 * 4, 0 + 3 * 4, 1 + 3 * 4, 2 + 2 * 4,
+    3 + 1 * 4, 3 + 2 * 4, 2 + 3 * 4, 3 + 3 * 4,
 };
 
-static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] =
-{
+static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT + 1] = {
       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
@@ -654,8 +659,7 @@ static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] =
     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
 };
 
-static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT+1] =
-{
+static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT + 1] = {
       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
diff --git a/libavcodec/vp8dsp.c b/libavcodec/vp8dsp.c
index ad7c603..431c96e 100644
--- a/libavcodec/vp8dsp.c
+++ b/libavcodec/vp8dsp.c
@@ -24,9 +24,10 @@
  * VP8 compatible video decoder
  */
 
+#include "libavutil/common.h"
+
 #include "mathops.h"
 #include "vp8dsp.h"
-#include "libavutil/common.h"
 
 // TODO: Maybe add dequant
 static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
@@ -34,26 +35,26 @@ static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
     int i, t0, t1, t2, t3;
 
     for (i = 0; i < 4; i++) {
-        t0 = dc[0*4+i] + dc[3*4+i];
-        t1 = dc[1*4+i] + dc[2*4+i];
-        t2 = dc[1*4+i] - dc[2*4+i];
-        t3 = dc[0*4+i] - dc[3*4+i];
-
-        dc[0*4+i] = t0 + t1;
-        dc[1*4+i] = t3 + t2;
-        dc[2*4+i] = t0 - t1;
-        dc[3*4+i] = t3 - t2;
+        t0 = dc[0 * 4 + i] + dc[3 * 4 + i];
+        t1 = dc[1 * 4 + i] + dc[2 * 4 + i];
+        t2 = dc[1 * 4 + i] - dc[2 * 4 + i];
+        t3 = dc[0 * 4 + i] - dc[3 * 4 + i];
+
+        dc[0 * 4 + i] = t0 + t1;
+        dc[1 * 4 + i] = t3 + t2;
+        dc[2 * 4 + i] = t0 - t1;
+        dc[3 * 4 + i] = t3 - t2;
     }
 
     for (i = 0; i < 4; i++) {
-        t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
-        t1 = dc[i*4+1] + dc[i*4+2];
-        t2 = dc[i*4+1] - dc[i*4+2];
-        t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
-        dc[i*4+0] = 0;
-        dc[i*4+1] = 0;
-        dc[i*4+2] = 0;
-        dc[i*4+3] = 0;
+        t0 = dc[i * 4 + 0] + dc[i * 4 + 3] + 3; // rounding
+        t1 = dc[i * 4 + 1] + dc[i * 4 + 2];
+        t2 = dc[i * 4 + 1] - dc[i * 4 + 2];
+        t3 = dc[i * 4 + 0] - dc[i * 4 + 3] + 3; // rounding
+        dc[i * 4 + 0] = 0;
+        dc[i * 4 + 1] = 0;
+        dc[i * 4 + 2] = 0;
+        dc[i * 4 + 3] = 0;
 
         block[i][0][0] = (t0 + t1) >> 3;
         block[i][1][0] = (t3 + t2) >> 3;
@@ -75,8 +76,8 @@ static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
     }
 }
 
-#define MUL_20091(a) ((((a)*20091) >> 16) + (a))
-#define MUL_35468(a)  (((a)*35468) >> 16)
+#define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
+#define MUL_35468(a)  (((a) * 35468) >> 16)
 
 static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
 {
@@ -84,32 +85,32 @@ static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
     int16_t tmp[16];
 
     for (i = 0; i < 4; i++) {
-        t0 = block[0*4+i] + block[2*4+i];
-        t1 = block[0*4+i] - block[2*4+i];
-        t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
-        t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
-        block[0*4+i] = 0;
-        block[1*4+i] = 0;
-        block[2*4+i] = 0;
-        block[3*4+i] = 0;
-
-        tmp[i*4+0] = t0 + t3;
-        tmp[i*4+1] = t1 + t2;
-        tmp[i*4+2] = t1 - t2;
-        tmp[i*4+3] = t0 - t3;
+        t0 = block[0 * 4 + i] + block[2 * 4 + i];
+        t1 = block[0 * 4 + i] - block[2 * 4 + i];
+        t2 = MUL_35468(block[1 * 4 + i]) - MUL_20091(block[3 * 4 + i]);
+        t3 = MUL_20091(block[1 * 4 + i]) + MUL_35468(block[3 * 4 + i]);
+        block[0 * 4 + i] = 0;
+        block[1 * 4 + i] = 0;
+        block[2 * 4 + i] = 0;
+        block[3 * 4 + i] = 0;
+
+        tmp[i * 4 + 0] = t0 + t3;
+        tmp[i * 4 + 1] = t1 + t2;
+        tmp[i * 4 + 2] = t1 - t2;
+        tmp[i * 4 + 3] = t0 - t3;
     }
 
     for (i = 0; i < 4; i++) {
-        t0 = tmp[0*4+i] + tmp[2*4+i];
-        t1 = tmp[0*4+i] - tmp[2*4+i];
-        t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
-        t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
+        t0 = tmp[0 * 4 + i] + tmp[2 * 4 + i];
+        t1 = tmp[0 * 4 + i] - tmp[2 * 4 + i];
+        t2 = MUL_35468(tmp[1 * 4 + i]) - MUL_20091(tmp[3 * 4 + i]);
+        t3 = MUL_20091(tmp[1 * 4 + i]) + MUL_35468(tmp[3 * 4 + i]);
 
         dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
         dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
         dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
         dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
-        dst += stride;
+        dst   += stride;
     }
 }
 
@@ -123,46 +124,49 @@ static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
         dst[1] = av_clip_uint8(dst[1] + dc);
         dst[2] = av_clip_uint8(dst[2] + dc);
         dst[3] = av_clip_uint8(dst[3] + dc);
-        dst += stride;
+        dst   += stride;
     }
 }
 
-static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
+static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16],
+                                 ptrdiff_t stride)
 {
-    vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride);
-    vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride);
-    vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride);
-    vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride);
+    vp8_idct_dc_add_c(dst + stride * 0 + 0, block[0], stride);
+    vp8_idct_dc_add_c(dst + stride * 0 + 4, block[1], stride);
+    vp8_idct_dc_add_c(dst + stride * 4 + 0, block[2], stride);
+    vp8_idct_dc_add_c(dst + stride * 4 + 4, block[3], stride);
 }
 
-static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
+static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16],
+                                ptrdiff_t stride)
 {
-    vp8_idct_dc_add_c(dst+ 0, block[0], stride);
-    vp8_idct_dc_add_c(dst+ 4, block[1], stride);
-    vp8_idct_dc_add_c(dst+ 8, block[2], stride);
-    vp8_idct_dc_add_c(dst+12, block[3], stride);
+    vp8_idct_dc_add_c(dst +  0, block[0], stride);
+    vp8_idct_dc_add_c(dst +  4, block[1], stride);
+    vp8_idct_dc_add_c(dst +  8, block[2], stride);
+    vp8_idct_dc_add_c(dst + 12, block[3], stride);
 }
 
 // because I like only having two parameters to pass functions...
-#define LOAD_PIXELS\
-    int av_unused p3 = p[-4*stride];\
-    int av_unused p2 = p[-3*stride];\
-    int av_unused p1 = p[-2*stride];\
-    int av_unused p0 = p[-1*stride];\
-    int av_unused q0 = p[ 0*stride];\
-    int av_unused q1 = p[ 1*stride];\
-    int av_unused q2 = p[ 2*stride];\
-    int av_unused q3 = p[ 3*stride];
-
-#define clip_int8(n) (cm[n+0x80]-0x80)
-
-static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
+#define LOAD_PIXELS                                                           \
+    int av_unused p3 = p[-4 * stride];                                        \
+    int av_unused p2 = p[-3 * stride];                                        \
+    int av_unused p1 = p[-2 * stride];                                        \
+    int av_unused p0 = p[-1 * stride];                                        \
+    int av_unused q0 = p[ 0 * stride];                                        \
+    int av_unused q1 = p[ 1 * stride];                                        \
+    int av_unused q2 = p[ 2 * stride];                                        \
+    int av_unused q3 = p[ 3 * stride];
+
+#define clip_int8(n) (cm[n + 0x80] - 0x80)
+
+static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride,
+                                           int is4tap)
 {
     LOAD_PIXELS
     int a, f1, f2;
     const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
 
-    a = 3*(q0 - p0);
+    a = 3 * (q0 - p0);
 
     if (is4tap)
         a += clip_int8(p1 - q1);
@@ -171,45 +175,50 @@ static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4
 
     // We deviate from the spec here with c(a+3) >> 3
     // since that's what libvpx does.
-    f1 = FFMIN(a+4, 127) >> 3;
-    f2 = FFMIN(a+3, 127) >> 3;
+    f1 = FFMIN(a + 4, 127) >> 3;
+    f2 = FFMIN(a + 3, 127) >> 3;
 
     // Despite what the spec says, we do need to clamp here to
     // be bitexact with libvpx.
-    p[-1*stride] = cm[p0 + f2];
-    p[ 0*stride] = cm[q0 - f1];
+    p[-1 * stride] = cm[p0 + f2];
+    p[ 0 * stride] = cm[q0 - f1];
 
     // only used for _inner on blocks without high edge variance
     if (!is4tap) {
-        a = (f1+1)>>1;
-        p[-2*stride] = cm[p1 + a];
-        p[ 1*stride] = cm[q1 - a];
+        a = (f1 + 1) >> 1;
+        p[-2 * stride] = cm[p1 + a];
+        p[ 1 * stride] = cm[q1 - a];
     }
 }
 
 static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
 {
     LOAD_PIXELS
-    return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
+    return 2 * FFABS(p0 - q0) + (FFABS(p1 - q1) >> 1) <= flim;
 }
 
 /**
  * E - limit at the macroblock edge
  * I - limit for interior difference
  */
-static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
+static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride,
+                                         int E, int I)
 {
     LOAD_PIXELS
-    return simple_limit(p, stride, E)
-        && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
-        && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
+    return simple_limit(p, stride, E) &&
+           FFABS(p3 - p2) <= I &&
+           FFABS(p2 - p1) <= I &&
+           FFABS(p1 - p0) <= I &&
+           FFABS(q3 - q2) <= I &&
+           FFABS(q2 - q1) <= I &&
+           FFABS(q1 - q0) <= I;
 }
 
 // high edge variance
 static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
 {
     LOAD_PIXELS
-    return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
+    return FFABS(p1 - p0) > thresh || FFABS(q1 - q0) > thresh;
 }
 
 static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
@@ -219,67 +228,75 @@ static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
 
     LOAD_PIXELS
 
-    w = clip_int8(p1-q1);
-    w = clip_int8(w + 3*(q0-p0));
+    w = clip_int8(p1 - q1);
+    w = clip_int8(w + 3 * (q0 - p0));
 
-    a0 = (27*w + 63) >> 7;
-    a1 = (18*w + 63) >> 7;
-    a2 = ( 9*w + 63) >> 7;
+    a0 = (27 * w + 63) >> 7;
+    a1 = (18 * w + 63) >> 7;
+    a2 =  (9 * w + 63) >> 7;
 
-    p[-3*stride] = cm[p2 + a2];
-    p[-2*stride] = cm[p1 + a1];
-    p[-1*stride] = cm[p0 + a0];
-    p[ 0*stride] = cm[q0 - a0];
-    p[ 1*stride] = cm[q1 - a1];
-    p[ 2*stride] = cm[q2 - a2];
+    p[-3 * stride] = cm[p2 + a2];
+    p[-2 * stride] = cm[p1 + a1];
+    p[-1 * stride] = cm[p0 + a0];
+    p[ 0 * stride] = cm[q0 - a0];
+    p[ 1 * stride] = cm[q1 - a1];
+    p[ 2 * stride] = cm[q2 - a2];
 }
 
-#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
-static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\
-                                     int flim_E, int flim_I, int hev_thresh)\
-{\
-    int i;\
-\
-    for (i = 0; i < size; i++)\
-        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
-            if (hev(dst+i*stridea, strideb, hev_thresh))\
-                filter_common(dst+i*stridea, strideb, 1);\
-            else\
-                filter_mbedge(dst+i*stridea, strideb);\
-        }\
-}\
-\
-static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\
-                                      int flim_E, int flim_I, int hev_thresh)\
-{\
-    int i;\
-\
-    for (i = 0; i < size; i++)\
-        if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
-            int hv = hev(dst+i*stridea, strideb, hev_thresh);\
-            if (hv) \
-                filter_common(dst+i*stridea, strideb, 1);\
-            else \
-                filter_common(dst+i*stridea, strideb, 0);\
-        }\
+#define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline)                \
+static maybe_inline                                                           \
+void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst,                  \
+                                               ptrdiff_t stride,              \
+                                               int flim_E, int flim_I,        \
+                                               int hev_thresh)                \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) {       \
+            if (hev(dst + i * stridea, strideb, hev_thresh))                  \
+                filter_common(dst + i * stridea, strideb, 1);                 \
+            else                                                              \
+                filter_mbedge(dst + i * stridea, strideb);                    \
+        }                                                                     \
+}                                                                             \
+                                                                              \
+static maybe_inline                                                           \
+void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst,            \
+                                                     ptrdiff_t stride,        \
+                                                     int flim_E, int flim_I,  \
+                                                     int hev_thresh)          \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < size; i++)                                                \
+        if (normal_limit(dst + i * stridea, strideb, flim_E, flim_I)) {       \
+            int hv = hev(dst + i * stridea, strideb, hev_thresh);             \
+            if (hv)                                                           \
+                filter_common(dst + i * stridea, strideb, 1);                 \
+            else                                                              \
+                filter_common(dst + i * stridea, strideb, 0);                 \
+        }                                                                     \
 }
 
-LOOP_FILTER(v, 16, 1, stride,)
-LOOP_FILTER(h, 16, stride, 1,)
-
-#define UV_LOOP_FILTER(dir, stridea, strideb) \
-LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
-static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
-                                      int fE, int fI, int hev_thresh)\
-{\
-  vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
-  vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
-}\
-static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
-                                      int fE, int fI, int hev_thresh)\
-{\
-  vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
-  vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
+LOOP_FILTER(v, 16, 1, stride, )
+LOOP_FILTER(h, 16, stride, 1, )
+
+#define UV_LOOP_FILTER(dir, stridea, strideb)                                 \
+LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline)                       \
+static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV,    \
+                                             ptrdiff_t stride, int fE,        \
+                                             int fI, int hev_thresh)          \
+{                                                                             \
+    vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);         \
+    vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);         \
+}                                                                             \
+                                                                              \
+static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU,             \
+                                                   uint8_t *dstV,             \
+                                                   ptrdiff_t stride, int fE,  \
+                                                   int fI, int hev_thresh)    \
+{                                                                             \
+    vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);   \
+    vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);   \
 }
 
 UV_LOOP_FILTER(v, 1, stride)
@@ -290,8 +307,8 @@ static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
     int i;
 
     for (i = 0; i < 16; i++)
-        if (simple_limit(dst+i, stride, flim))
-            filter_common(dst+i, stride, 1);
+        if (simple_limit(dst + i, stride, flim))
+            filter_common(dst + i, stride, 1);
 }
 
 static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
@@ -299,94 +316,110 @@ static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
     int i;
 
     for (i = 0; i < 16; i++)
-        if (simple_limit(dst+i*stride, 1, flim))
-            filter_common(dst+i*stride, 1, 1);
+        if (simple_limit(dst + i * stride, 1, flim))
+            filter_common(dst + i * stride, 1, 1);
 }
 
 static const uint8_t subpel_filters[7][6] = {
-    { 0,   6, 123,  12,   1,   0 },
-    { 2,  11, 108,  36,   8,   1 },
-    { 0,   9,  93,  50,   6,   0 },
-    { 3,  16,  77,  77,  16,   3 },
-    { 0,   6,  50,  93,   9,   0 },
-    { 1,   8,  36, 108,  11,   2 },
-    { 0,   1,  12, 123,   6,   0 },
+    { 0,  6, 123,  12,  1, 0 },
+    { 2, 11, 108,  36,  8, 1 },
+    { 0,  9,  93,  50,  6, 0 },
+    { 3, 16,  77,  77, 16, 3 },
+    { 0,  6,  50,  93,  9, 0 },
+    { 1,  8,  36, 108, 11, 2 },
+    { 0,  1,  12, 123,  6, 0 },
 };
 
-#define PUT_PIXELS(WIDTH) \
-static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \
-    int i; \
-    for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
-        memcpy(dst, src, WIDTH); \
-    } \
+#define PUT_PIXELS(WIDTH)                                                     \
+static void put_vp8_pixels ## WIDTH ## _c(uint8_t *dst, ptrdiff_t dststride,  \
+                                          uint8_t *src, ptrdiff_t srcstride,  \
+                                          int h, int x, int y)                \
+{                                                                             \
+    int i;                                                                    \
+    for (i = 0; i < h; i++, dst += dststride, src += srcstride)               \
+        memcpy(dst, src, WIDTH);                                              \
 }
 
 PUT_PIXELS(16)
 PUT_PIXELS(8)
 PUT_PIXELS(4)
 
-#define FILTER_6TAP(src, F, stride) \
-    cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
-        F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
-
-#define FILTER_4TAP(src, F, stride) \
-    cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
-        F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
-
-#define VP8_EPEL_H(SIZE, TAPS) \
-static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
-{ \
-    const uint8_t *filter = subpel_filters[mx-1]; \
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
-    int x, y; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
-        dst += dststride; \
-        src += srcstride; \
-    } \
+#define FILTER_6TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] -             \
+        F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
+
+#define FILTER_4TAP(src, F, stride)                                           \
+    cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] +             \
+        F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
+
+#define VP8_EPEL_H(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     uint8_t *src,            \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1);                  \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
 }
-#define VP8_EPEL_V(SIZE, TAPS) \
-static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
-{ \
-    const uint8_t *filter = subpel_filters[my-1]; \
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
-    int x, y; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
-        dst += dststride; \
-        src += srcstride; \
-    } \
+
+#define VP8_EPEL_V(SIZE, TAPS)                                                \
+static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst,            \
+                                                     ptrdiff_t dststride,     \
+                                                     uint8_t *src,            \
+                                                     ptrdiff_t srcstride,     \
+                                                     int h, int mx, int my)   \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[my - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride);          \
+        dst += dststride;                                                     \
+        src += srcstride;                                                     \
+    }                                                                         \
 }
-#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
-static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
-{ \
-    const uint8_t *filter = subpel_filters[mx-1]; \
-    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP; \
-    int x, y; \
-    uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \
-    uint8_t *tmp = tmp_array; \
-    src -= (2-(VTAPS==4))*srcstride; \
-\
-    for (y = 0; y < h+VTAPS-1; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
-        tmp += SIZE; \
-        src += srcstride; \
-    } \
-\
-    tmp = tmp_array + (2-(VTAPS==4))*SIZE; \
-    filter = subpel_filters[my-1]; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
-        dst += dststride; \
-        tmp += SIZE; \
-    } \
+
+#define VP8_EPEL_HV(SIZE, HTAPS, VTAPS)                                       \
+static void                                                                   \
+put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst,         \
+                                                        ptrdiff_t dststride,  \
+                                                        uint8_t *src,         \
+                                                        ptrdiff_t srcstride,  \
+                                                        int h, int mx,        \
+                                                        int my)               \
+{                                                                             \
+    const uint8_t *filter = subpel_filters[mx - 1];                           \
+    const uint8_t *cm     = ff_crop_tab + MAX_NEG_CROP;                       \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + VTAPS - 1) * SIZE];                         \
+    uint8_t *tmp = tmp_array;                                                 \
+    src -= (2 - (VTAPS == 4)) * srcstride;                                    \
+                                                                              \
+    for (y = 0; y < h + VTAPS - 1; y++) {                                     \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1);                 \
+        tmp += SIZE;                                                          \
+        src += srcstride;                                                     \
+    }                                                                         \
+    tmp    = tmp_array + (2 - (VTAPS == 4)) * SIZE;                           \
+    filter = subpel_filters[my - 1];                                          \
+                                                                              \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE);              \
+        dst += dststride;                                                     \
+        tmp += SIZE;                                                          \
+    }                                                                         \
 }
 
 VP8_EPEL_H(16, 4)
@@ -401,6 +434,7 @@ VP8_EPEL_V(4,  4)
 VP8_EPEL_V(16, 6)
 VP8_EPEL_V(8,  6)
 VP8_EPEL_V(4,  6)
+
 VP8_EPEL_HV(16, 4, 4)
 VP8_EPEL_HV(8,  4, 4)
 VP8_EPEL_HV(4,  4, 4)
@@ -414,73 +448,77 @@ VP8_EPEL_HV(16, 6, 6)
 VP8_EPEL_HV(8,  6, 6)
 VP8_EPEL_HV(4,  6, 6)
 
-#define VP8_BILINEAR(SIZE) \
-static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
-{ \
-    int a = 8-mx, b = mx; \
-    int x, y; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
-        dst += dstride; \
-        src += sstride; \
-    } \
-} \
-static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
-{ \
-    int c = 8-my, d = my; \
-    int x, y; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \
-        dst += dstride; \
-        src += sstride; \
-    } \
-} \
-\
-static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
-{ \
-    int a = 8-mx, b = mx; \
-    int c = 8-my, d = my; \
-    int x, y; \
-    uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
-    uint8_t *tmp = tmp_array; \
-\
-    for (y = 0; y < h+1; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
-        tmp += SIZE; \
-        src += sstride; \
-    } \
-\
-    tmp = tmp_array; \
-\
-    for (y = 0; y < h; y++) { \
-        for (x = 0; x < SIZE; x++) \
-            dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
-        dst += dstride; \
-        tmp += SIZE; \
-    } \
+#define VP8_BILINEAR(SIZE)                                                    \
+static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, \
+                                             uint8_t *src, ptrdiff_t sstride, \
+                                             int h, int mx, int my)           \
+{                                                                             \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * src[x] + d * src[x + sstride] + 4) >> 3;            \
+        dst += dstride;                                                       \
+        src += sstride;                                                       \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst,                   \
+                                              ptrdiff_t dstride,              \
+                                              uint8_t *src,                   \
+                                              ptrdiff_t sstride,              \
+                                              int h, int mx, int my)          \
+{                                                                             \
+    int a = 8 - mx, b = mx;                                                   \
+    int c = 8 - my, d = my;                                                   \
+    int x, y;                                                                 \
+    uint8_t tmp_array[(2 * SIZE + 1) * SIZE];                                 \
+    uint8_t *tmp = tmp_array;                                                 \
+    for (y = 0; y < h + 1; y++) {                                             \
+        for (x = 0; x < SIZE; x++)                                            \
+            tmp[x] = (a * src[x] + b * src[x + 1] + 4) >> 3;                  \
+        tmp += SIZE;                                                          \
+        src += sstride;                                                       \
+    }                                                                         \
+    tmp = tmp_array;                                                          \
+    for (y = 0; y < h; y++) {                                                 \
+        for (x = 0; x < SIZE; x++)                                            \
+            dst[x] = (c * tmp[x] + d * tmp[x + SIZE] + 4) >> 3;               \
+        dst += dstride;                                                       \
+        tmp += SIZE;                                                          \
+    }                                                                         \
 }
 
 VP8_BILINEAR(16)
 VP8_BILINEAR(8)
 VP8_BILINEAR(4)
 
-#define VP8_MC_FUNC(IDX, SIZE) \
-    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
-    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
-    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
-    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
+#define VP8_MC_FUNC(IDX, SIZE)                                                \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c;   \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c;  \
+    dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c;  \
     dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
     dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
-    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
+    dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c;  \
     dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
     dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
 
-#define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
+#define VP8_BILINEAR_MC_FUNC(IDX, SIZE)                                       \
     dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
     dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
     dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
diff --git a/libavcodec/vp8dsp.h b/libavcodec/vp8dsp.h
index 877e264..9e313a7 100644
--- a/libavcodec/vp8dsp.h
+++ b/libavcodec/vp8dsp.h
@@ -30,8 +30,8 @@
 #include <stddef.h>
 #include <stdint.h>
 
-typedef void (*vp8_mc_func)(uint8_t *dst/*align 8*/, ptrdiff_t dstStride,
-                            uint8_t *src/*align 1*/, ptrdiff_t srcStride,
+typedef void (*vp8_mc_func)(uint8_t *dst /* align 8 */, ptrdiff_t dstStride,
+                            uint8_t *src /* align 1 */, ptrdiff_t srcStride,
                             int h, int x, int y);
 
 typedef struct VP8DSPContext {

-- 
Libav/FFmpeg packaging