[SCM] libav/experimental: cosmetics: Reformat PPC code in libavcodec according to style guidelines. This includes indentation changes, comment reformatting, consistent brace placement and some prettyprinting.

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sun Jun 30 16:25:43 UTC 2013


The following commit has been merged in the experimental branch:
commit e3905ce0afe91ad1422af83334d06d52e4e8fc80
Author: Diego Biurrun <diego at biurrun.de>
Date:   Sun Jul 20 18:58:30 2008 +0000

    cosmetics: Reformat PPC code in libavcodec according to style guidelines.
    This includes indentation changes, comment reformatting, consistent brace
    placement and some prettyprinting.
    
    Originally committed as revision 14316 to svn://svn.ffmpeg.org/ffmpeg/trunk

diff --git a/libavcodec/ppc/dsputil_ppc.c b/libavcodec/ppc/dsputil_ppc.c
index 168f8d8..28bcf2c 100644
--- a/libavcodec/ppc/dsputil_ppc.c
+++ b/libavcodec/ppc/dsputil_ppc.c
@@ -60,33 +60,33 @@ int mm_support(void)
 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 /* list below must match enum in dsputil_ppc.h */
 static unsigned char* perfname[] = {
-  "ff_fft_calc_altivec",
-  "gmc1_altivec",
-  "dct_unquantize_h263_altivec",
-  "fdct_altivec",
-  "idct_add_altivec",
-  "idct_put_altivec",
-  "put_pixels16_altivec",
-  "avg_pixels16_altivec",
-  "avg_pixels8_altivec",
-  "put_pixels8_xy2_altivec",
-  "put_no_rnd_pixels8_xy2_altivec",
-  "put_pixels16_xy2_altivec",
-  "put_no_rnd_pixels16_xy2_altivec",
-  "hadamard8_diff8x8_altivec",
-  "hadamard8_diff16_altivec",
-  "avg_pixels8_xy2_altivec",
-  "clear_blocks_dcbz32_ppc",
-  "clear_blocks_dcbz128_ppc",
-  "put_h264_chroma_mc8_altivec",
-  "avg_h264_chroma_mc8_altivec",
-  "put_h264_qpel16_h_lowpass_altivec",
-  "avg_h264_qpel16_h_lowpass_altivec",
-  "put_h264_qpel16_v_lowpass_altivec",
-  "avg_h264_qpel16_v_lowpass_altivec",
-  "put_h264_qpel16_hv_lowpass_altivec",
-  "avg_h264_qpel16_hv_lowpass_altivec",
-  ""
+    "ff_fft_calc_altivec",
+    "gmc1_altivec",
+    "dct_unquantize_h263_altivec",
+    "fdct_altivec",
+    "idct_add_altivec",
+    "idct_put_altivec",
+    "put_pixels16_altivec",
+    "avg_pixels16_altivec",
+    "avg_pixels8_altivec",
+    "put_pixels8_xy2_altivec",
+    "put_no_rnd_pixels8_xy2_altivec",
+    "put_pixels16_xy2_altivec",
+    "put_no_rnd_pixels16_xy2_altivec",
+    "hadamard8_diff8x8_altivec",
+    "hadamard8_diff16_altivec",
+    "avg_pixels8_xy2_altivec",
+    "clear_blocks_dcbz32_ppc",
+    "clear_blocks_dcbz128_ppc",
+    "put_h264_chroma_mc8_altivec",
+    "avg_h264_chroma_mc8_altivec",
+    "put_h264_qpel16_h_lowpass_altivec",
+    "avg_h264_qpel16_h_lowpass_altivec",
+    "put_h264_qpel16_v_lowpass_altivec",
+    "avg_h264_qpel16_v_lowpass_altivec",
+    "put_h264_qpel16_hv_lowpass_altivec",
+    "avg_h264_qpel16_hv_lowpass_altivec",
+    ""
 };
 #include <stdio.h>
 #endif
@@ -94,51 +94,44 @@ static unsigned char* perfname[] = {
 #ifdef CONFIG_POWERPC_PERF
 void powerpc_display_perf_report(void)
 {
-  int i, j;
-  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
-  for(i = 0 ; i < powerpc_perf_total ; i++)
-  {
-    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-      {
-        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
-          av_log(NULL, AV_LOG_INFO,
-                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
-                  perfname[i],
-                  j+1,
-                  perfdata[j][i][powerpc_data_min],
-                  perfdata[j][i][powerpc_data_max],
-                  (double)perfdata[j][i][powerpc_data_sum] /
-                  (double)perfdata[j][i][powerpc_data_num],
-                  perfdata[j][i][powerpc_data_num]);
-      }
-  }
+    int i, j;
+    av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
+    for(i = 0 ; i < powerpc_perf_total ; i++) {
+        for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
+            if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+                av_log(NULL, AV_LOG_INFO,
+                       " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
+                       perfname[i],
+                       j+1,
+                       perfdata[j][i][powerpc_data_min],
+                       perfdata[j][i][powerpc_data_max],
+                       (double)perfdata[j][i][powerpc_data_sum] /
+                       (double)perfdata[j][i][powerpc_data_num],
+                       perfdata[j][i][powerpc_data_num]);
+        }
+    }
 }
 #endif /* CONFIG_POWERPC_PERF */
 
 /* ***** WARNING ***** WARNING ***** WARNING ***** */
 /*
-  clear_blocks_dcbz32_ppc will not work properly
-  on PowerPC processors with a cache line size
-  not equal to 32 bytes.
-  Fortunately all processor used by Apple up to
-  at least the 7450 (aka second generation G4)
-  use 32 bytes cache line.
-  This is due to the use of the 'dcbz' instruction.
-  It simply clear to zero a single cache line,
-  so you need to know the cache line size to use it !
-  It's absurd, but it's fast...
+clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
+cache line size not equal to 32 bytes.
+Fortunately all processor used by Apple up to at least the 7450 (aka second
+generation G4) use 32 bytes cache line.
+This is due to the use of the 'dcbz' instruction. It simply clear to zero a
+single cache line, so you need to know the cache line size to use it !
+It's absurd, but it's fast...
 
-  update 24/06/2003 : Apple released yesterday the G5,
-  with a PPC970. cache line size : 128 bytes. Oups.
-  The semantic of dcbz was changed, it always clear
-  32 bytes. so the function below will work, but will
-  be slow. So I fixed check_dcbz_effect to use dcbzl,
-  which is defined to clear a cache line (as dcbz before).
-  So we still can distinguish, and use dcbz (32 bytes)
-  or dcbzl (one cache line) as required.
+update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
+size: 128 bytes. Oups.
+The semantic of dcbz was changed, it always clear 32 bytes. so the function
+below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
+which is defined to clear a cache line (as dcbz before). So we still can
+distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
 
-  see <http://developer.apple.com/technotes/tn/tn2087.html>
-  and <http://developer.apple.com/technotes/tn/tn2086.html>
+see <http://developer.apple.com/technotes/tn/tn2087.html>
+and <http://developer.apple.com/technotes/tn/tn2086.html>
 */
 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
 {
@@ -148,21 +141,21 @@ POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
 #if 1
     if (misal) {
-      ((unsigned long*)blocks)[0] = 0L;
-      ((unsigned long*)blocks)[1] = 0L;
-      ((unsigned long*)blocks)[2] = 0L;
-      ((unsigned long*)blocks)[3] = 0L;
-      i += 16;
+        ((unsigned long*)blocks)[0] = 0L;
+        ((unsigned long*)blocks)[1] = 0L;
+        ((unsigned long*)blocks)[2] = 0L;
+        ((unsigned long*)blocks)[3] = 0L;
+        i += 16;
     }
     for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
-      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
+        asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
     }
     if (misal) {
-      ((unsigned long*)blocks)[188] = 0L;
-      ((unsigned long*)blocks)[189] = 0L;
-      ((unsigned long*)blocks)[190] = 0L;
-      ((unsigned long*)blocks)[191] = 0L;
-      i += 16;
+        ((unsigned long*)blocks)[188] = 0L;
+        ((unsigned long*)blocks)[189] = 0L;
+        ((unsigned long*)blocks)[190] = 0L;
+        ((unsigned long*)blocks)[191] = 0L;
+        i += 16;
     }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
@@ -180,16 +173,16 @@ POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
     register int i = 0;
 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
 #if 1
- if (misal) {
-   // we could probably also optimize this case,
-   // but there's not much point as the machines
-   // aren't available yet (2003-06-26)
-      memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    if (misal) {
+        // we could probably also optimize this case,
+        // but there's not much point as the machines
+        // aren't available yet (2003-06-26)
+        memset(blocks, 0, sizeof(DCTELEM)*6*64);
     }
     else
-      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
-        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
-      }
+        for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
+            asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+        }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
 #endif
@@ -198,7 +191,7 @@ POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
 #else
 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
 {
-  memset(blocks, 0, sizeof(DCTELEM)*6*64);
+    memset(blocks, 0, sizeof(DCTELEM)*6*64);
 }
 #endif
 
@@ -210,34 +203,32 @@ void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
    knows about dcbzl ... */
 long check_dcbzl_effect(void)
 {
-  register char *fakedata = av_malloc(1024);
-  register char *fakedata_middle;
-  register long zero = 0;
-  register long i = 0;
-  long count = 0;
+    register char *fakedata = av_malloc(1024);
+    register char *fakedata_middle;
+    register long zero = 0;
+    register long i = 0;
+    long count = 0;
 
-  if (!fakedata)
-  {
-    return 0L;
-  }
+    if (!fakedata) {
+        return 0L;
+    }
 
-  fakedata_middle = (fakedata + 512);
+    fakedata_middle = (fakedata + 512);
 
-  memset(fakedata, 0xFF, 1024);
+    memset(fakedata, 0xFF, 1024);
 
-  /* below the constraint "b" seems to mean "Address base register"
-     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
-  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
+    /* below the constraint "b" seems to mean "Address base register"
+       in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
+    asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
 
-  for (i = 0; i < 1024 ; i ++)
-  {
-    if (fakedata[i] == (char)0)
-      count++;
-  }
+    for (i = 0; i < 1024 ; i ++) {
+        if (fakedata[i] == (char)0)
+            count++;
+    }
 
-  av_free(fakedata);
+    av_free(fakedata);
 
-  return count;
+    return count;
 }
 #else
 long check_dcbzl_effect(void)
@@ -286,36 +277,31 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 
 #ifdef CONFIG_ENCODERS
         if (avctx->dct_algo == FF_DCT_AUTO ||
-            avctx->dct_algo == FF_DCT_ALTIVEC)
-        {
+            avctx->dct_algo == FF_DCT_ALTIVEC) {
             c->fdct = fdct_altivec;
         }
 #endif //CONFIG_ENCODERS
 
-        if (avctx->lowres==0)
-        {
-        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
-                (avctx->idct_algo == FF_IDCT_ALTIVEC))
-        {
-            c->idct_put = idct_put_altivec;
-            c->idct_add = idct_add_altivec;
-            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
-        }
+        if (avctx->lowres==0) {
+            if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+                (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
+                c->idct_put = idct_put_altivec;
+                c->idct_add = idct_add_altivec;
+                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+            }
         }
 
 #ifdef CONFIG_POWERPC_PERF
         {
-          int i, j;
-          for (i = 0 ; i < powerpc_perf_total ; i++)
-          {
-            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-              {
-                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
-                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
-                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
-                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+            int i, j;
+            for (i = 0 ; i < powerpc_perf_total ; i++) {
+                for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++) {
+                    perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
+                    perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
+                    perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
+                    perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+                  }
               }
-          }
         }
 #endif /* CONFIG_POWERPC_PERF */
     }
diff --git a/libavcodec/ppc/dsputil_ppc.h b/libavcodec/ppc/dsputil_ppc.h
index d8f6b27..1276661 100644
--- a/libavcodec/ppc/dsputil_ppc.h
+++ b/libavcodec/ppc/dsputil_ppc.h
@@ -31,40 +31,40 @@ void powerpc_display_perf_report(void);
 /* if you add to the enum below, also add to the perfname array
    in dsputil_ppc.c */
 enum powerpc_perf_index {
-  altivec_fft_num = 0,
-  altivec_gmc1_num,
-  altivec_dct_unquantize_h263_num,
-  altivec_fdct,
-  altivec_idct_add_num,
-  altivec_idct_put_num,
-  altivec_put_pixels16_num,
-  altivec_avg_pixels16_num,
-  altivec_avg_pixels8_num,
-  altivec_put_pixels8_xy2_num,
-  altivec_put_no_rnd_pixels8_xy2_num,
-  altivec_put_pixels16_xy2_num,
-  altivec_put_no_rnd_pixels16_xy2_num,
-  altivec_hadamard8_diff8x8_num,
-  altivec_hadamard8_diff16_num,
-  altivec_avg_pixels8_xy2_num,
-  powerpc_clear_blocks_dcbz32,
-  powerpc_clear_blocks_dcbz128,
-  altivec_put_h264_chroma_mc8_num,
-  altivec_avg_h264_chroma_mc8_num,
-  altivec_put_h264_qpel16_h_lowpass_num,
-  altivec_avg_h264_qpel16_h_lowpass_num,
-  altivec_put_h264_qpel16_v_lowpass_num,
-  altivec_avg_h264_qpel16_v_lowpass_num,
-  altivec_put_h264_qpel16_hv_lowpass_num,
-  altivec_avg_h264_qpel16_hv_lowpass_num,
-  powerpc_perf_total
+    altivec_fft_num = 0,
+    altivec_gmc1_num,
+    altivec_dct_unquantize_h263_num,
+    altivec_fdct,
+    altivec_idct_add_num,
+    altivec_idct_put_num,
+    altivec_put_pixels16_num,
+    altivec_avg_pixels16_num,
+    altivec_avg_pixels8_num,
+    altivec_put_pixels8_xy2_num,
+    altivec_put_no_rnd_pixels8_xy2_num,
+    altivec_put_pixels16_xy2_num,
+    altivec_put_no_rnd_pixels16_xy2_num,
+    altivec_hadamard8_diff8x8_num,
+    altivec_hadamard8_diff16_num,
+    altivec_avg_pixels8_xy2_num,
+    powerpc_clear_blocks_dcbz32,
+    powerpc_clear_blocks_dcbz128,
+    altivec_put_h264_chroma_mc8_num,
+    altivec_avg_h264_chroma_mc8_num,
+    altivec_put_h264_qpel16_h_lowpass_num,
+    altivec_avg_h264_qpel16_h_lowpass_num,
+    altivec_put_h264_qpel16_v_lowpass_num,
+    altivec_avg_h264_qpel16_v_lowpass_num,
+    altivec_put_h264_qpel16_hv_lowpass_num,
+    altivec_avg_h264_qpel16_hv_lowpass_num,
+    powerpc_perf_total
 };
 enum powerpc_data_index {
-  powerpc_data_min = 0,
-  powerpc_data_max,
-  powerpc_data_sum,
-  powerpc_data_num,
-  powerpc_data_total
+    powerpc_data_min = 0,
+    powerpc_data_max,
+    powerpc_data_sum,
+    powerpc_data_num,
+    powerpc_data_total
 };
 extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
 
@@ -105,45 +105,42 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
 #define POWERPC_GET_PMC6(a) do {} while (0)
 #endif
 #endif /* HAVE_PPC64 */
-#define POWERPC_PERF_DECLARE(a, cond)   \
-  POWERP_PMC_DATATYPE                   \
-    pmc_start[POWERPC_NUM_PMC_ENABLED], \
-    pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
-    pmc_loop_index;
+#define POWERPC_PERF_DECLARE(a, cond)       \
+    POWERP_PMC_DATATYPE                     \
+        pmc_start[POWERPC_NUM_PMC_ENABLED], \
+        pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
+        pmc_loop_index;
 #define POWERPC_PERF_START_COUNT(a, cond) do { \
-  POWERPC_GET_PMC6(pmc_start[5]); \
-  POWERPC_GET_PMC5(pmc_start[4]); \
-  POWERPC_GET_PMC4(pmc_start[3]); \
-  POWERPC_GET_PMC3(pmc_start[2]); \
-  POWERPC_GET_PMC2(pmc_start[1]); \
-  POWERPC_GET_PMC1(pmc_start[0]); \
-  } while (0)
+    POWERPC_GET_PMC6(pmc_start[5]); \
+    POWERPC_GET_PMC5(pmc_start[4]); \
+    POWERPC_GET_PMC4(pmc_start[3]); \
+    POWERPC_GET_PMC3(pmc_start[2]); \
+    POWERPC_GET_PMC2(pmc_start[1]); \
+    POWERPC_GET_PMC1(pmc_start[0]); \
+    } while (0)
 #define POWERPC_PERF_STOP_COUNT(a, cond) do { \
-  POWERPC_GET_PMC1(pmc_stop[0]); \
-  POWERPC_GET_PMC2(pmc_stop[1]); \
-  POWERPC_GET_PMC3(pmc_stop[2]); \
-  POWERPC_GET_PMC4(pmc_stop[3]); \
-  POWERPC_GET_PMC5(pmc_stop[4]); \
-  POWERPC_GET_PMC6(pmc_stop[5]); \
-  if (cond)                       \
-  {                               \
-    for(pmc_loop_index = 0;       \
-        pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
-        pmc_loop_index++)         \
-    {                             \
-      if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
-        {                                                         \
-        POWERP_PMC_DATATYPE diff =                                \
-          pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
-        if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
-          perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
-        if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
-          perfdata[pmc_loop_index][a][powerpc_data_max] = diff;   \
-        perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
-        perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
-      }                           \
-    }                             \
-  }                               \
+    POWERPC_GET_PMC1(pmc_stop[0]);            \
+    POWERPC_GET_PMC2(pmc_stop[1]);            \
+    POWERPC_GET_PMC3(pmc_stop[2]);            \
+    POWERPC_GET_PMC4(pmc_stop[3]);            \
+    POWERPC_GET_PMC5(pmc_stop[4]);            \
+    POWERPC_GET_PMC6(pmc_stop[5]);            \
+    if (cond) {                               \
+        for(pmc_loop_index = 0;               \
+            pmc_loop_index < POWERPC_NUM_PMC_ENABLED; \
+            pmc_loop_index++) {               \
+            if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index]) {  \
+                POWERP_PMC_DATATYPE diff =                                \
+                  pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
+                if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
+                    perfdata[pmc_loop_index][a][powerpc_data_min] = diff; \
+                if (diff > perfdata[pmc_loop_index][a][powerpc_data_max]) \
+                    perfdata[pmc_loop_index][a][powerpc_data_max] = diff; \
+                perfdata[pmc_loop_index][a][powerpc_data_sum] += diff;    \
+                perfdata[pmc_loop_index][a][powerpc_data_num] ++;         \
+            }                                 \
+        }                                     \
+    }                                         \
 } while (0)
 #else /* CONFIG_POWERPC_PERF */
 // those are needed to avoid empty statements.
diff --git a/libavcodec/ppc/fft_altivec.c b/libavcodec/ppc/fft_altivec.c
index f560855..ddf142b 100644
--- a/libavcodec/ppc/fft_altivec.c
+++ b/libavcodec/ppc/fft_altivec.c
@@ -33,21 +33,21 @@
 /* butter fly op */
 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
 {\
-  FFTSample ax, ay, bx, by;\
-  bx=pre1;\
-  by=pim1;\
-  ax=qre1;\
-  ay=qim1;\
-  pre = (bx + ax);\
-  pim = (by + ay);\
-  qre = (bx - ax);\
-  qim = (by - ay);\
+    FFTSample ax, ay, bx, by;\
+    bx=pre1;\
+    by=pim1;\
+    ax=qre1;\
+    ay=qim1;\
+    pre = (bx + ax);\
+    pim = (by + ay);\
+    qre = (bx - ax);\
+    qim = (by - ay);\
 }
 #define MUL16(a,b) ((a) * (b))
 #define CMUL(pre, pim, are, aim, bre, bim) \
 {\
-   pre = (MUL16(are, bre) - MUL16(aim, bim));\
-   pim = (MUL16(are, bim) + MUL16(bre, aim));\
+    pre = (MUL16(are, bre) - MUL16(aim, bim));\
+    pim = (MUL16(are, bim) + MUL16(bre, aim));\
 }
 
 
@@ -85,14 +85,11 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
 
         c1 = vcii(p,p,n,n);
 
-        if (s->inverse)
-            {
-                c2 = vcii(p,p,n,p);
-            }
-        else
-            {
-                c2 = vcii(p,p,p,n);
-            }
+        if (s->inverse) {
+            c2 = vcii(p,p,n,p);
+        } else {
+            c2 = vcii(p,p,p,n);
+        }
 
         j = (np >> 2);
         do {
diff --git a/libavcodec/ppc/gmc_altivec.c b/libavcodec/ppc/gmc_altivec.c
index 0113a9a..a69062d 100644
--- a/libavcodec/ppc/gmc_altivec.c
+++ b/libavcodec/ppc/gmc_altivec.c
@@ -36,16 +36,16 @@ void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int str
 {
 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
     const DECLARE_ALIGNED_16(unsigned short, rounder_a[8]) =
-      {rounder, rounder, rounder, rounder,
-       rounder, rounder, rounder, rounder};
+        {rounder, rounder, rounder, rounder,
+         rounder, rounder, rounder, rounder};
     const DECLARE_ALIGNED_16(unsigned short, ABCD[8]) =
-      {
-        (16-x16)*(16-y16), /* A */
-        (   x16)*(16-y16), /* B */
-        (16-x16)*(   y16), /* C */
-        (   x16)*(   y16), /* D */
-        0, 0, 0, 0         /* padding */
-      };
+        {
+            (16-x16)*(16-y16), /* A */
+            (   x16)*(16-y16), /* B */
+            (16-x16)*(   y16), /* C */
+            (   x16)*(   y16), /* D */
+            0, 0, 0, 0         /* padding */
+        };
     register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
     register const vector unsigned short vcsr8 = (const vector unsigned short)vec_splat_u16(8);
     register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
@@ -74,73 +74,67 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     src_1 = vec_ld(16, src);
     srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
 
-    if (src_really_odd != 0x0000000F)
-    { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
-      srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
-    }
-    else
-    {
-      srcvB = src_1;
+    if (src_really_odd != 0x0000000F) {
+        // if src & 0xF == 0xF, then (src+1) is properly aligned
+        // on the second vector.
+        srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
+    } else {
+        srcvB = src_1;
     }
     srcvA = vec_mergeh(vczero, srcvA);
     srcvB = vec_mergeh(vczero, srcvB);
 
-    for(i=0; i<h; i++)
-    {
-      dst_odd = (unsigned long)dst & 0x0000000F;
-      src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
-
-      dstv = vec_ld(0, dst);
-
-      // we we'll be able to pick-up our 9 char elements
-      // at src + stride from those 32 bytes
-      // then reuse the resulting 2 vectors srvcC and srcvD
-      // as the next srcvA and srcvB
-      src_0 = vec_ld(stride + 0, src);
-      src_1 = vec_ld(stride + 16, src);
-      srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
-
-      if (src_really_odd != 0x0000000F)
-      { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
-        srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
-      }
-      else
-      {
-        srcvD = src_1;
-      }
-
-      srcvC = vec_mergeh(vczero, srcvC);
-      srcvD = vec_mergeh(vczero, srcvD);
-
-
-      // OK, now we (finally) do the math :-)
-      // those four instructions replaces 32 int muls & 32 int adds.
-      // isn't AltiVec nice ?
-      tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
-      tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
-      tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
-      tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
-
-      srcvA = srcvC;
-      srcvB = srcvD;
-
-      tempD = vec_sr(tempD, vcsr8);
-
-      dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
-
-      if (dst_odd)
-      {
-        dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
-      }
-      else
-      {
-        dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
-      }
-
-      vec_st(dstv2, 0, dst);
-
-      dst += stride;
-      src += stride;
+    for(i=0; i<h; i++) {
+        dst_odd = (unsigned long)dst & 0x0000000F;
+        src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
+
+        dstv = vec_ld(0, dst);
+
+        // we we'll be able to pick-up our 9 char elements
+        // at src + stride from those 32 bytes
+        // then reuse the resulting 2 vectors srvcC and srcvD
+        // as the next srcvA and srcvB
+        src_0 = vec_ld(stride + 0, src);
+        src_1 = vec_ld(stride + 16, src);
+        srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
+
+        if (src_really_odd != 0x0000000F) {
+            // if src & 0xF == 0xF, then (src+1) is properly aligned
+            // on the second vector.
+            srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
+        } else {
+            srcvD = src_1;
+        }
+
+        srcvC = vec_mergeh(vczero, srcvC);
+        srcvD = vec_mergeh(vczero, srcvD);
+
+
+        // OK, now we (finally) do the math :-)
+        // those four instructions replaces 32 int muls & 32 int adds.
+        // isn't AltiVec nice ?
+        tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
+        tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
+        tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
+        tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
+
+        srcvA = srcvC;
+        srcvB = srcvD;
+
+        tempD = vec_sr(tempD, vcsr8);
+
+        dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
+
+        if (dst_odd) {
+            dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
+        } else {
+            dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
+        }
+
+        vec_st(dstv2, 0, dst);
+
+        dst += stride;
+        src += stride;
     }
 
 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
diff --git a/libavcodec/ppc/h264_altivec.c b/libavcodec/ppc/h264_altivec.c
index 09a739b..04dad2e 100644
--- a/libavcodec/ppc/h264_altivec.c
+++ b/libavcodec/ppc/h264_altivec.c
@@ -196,7 +196,7 @@ void put_no_rnd_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride
     const vec_s16_t vD = vec_splat((vec_s16_t)vABCD, 7);
     LOAD_ZERO;
     const vec_s16_t v28ss = vec_sub(vec_sl(vec_splat_s16(1),vec_splat_u16(5)),vec_splat_s16(4));
-    const vec_u16_t v6us = vec_splat_u16(6);
+    const vec_u16_t v6us  = vec_splat_u16(6);
     register int loadSecond     = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
 
@@ -392,8 +392,8 @@ static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  */
 
-  H264_MC(put_, 16, altivec)
-  H264_MC(avg_, 16, altivec)
+H264_MC(put_, 16, altivec)
+H264_MC(avg_, 16, altivec)
 
 
 /****************************************************************************
@@ -685,9 +685,9 @@ static inline void write16x4(uint8_t *dst, int dst_stride,
     r15 = vec_mergel(r3, r7);   /*3,7,11,15 set 1*/                        \
                                                                            \
     /*Third merge*/                                                        \
-    r0 = vec_mergeh(r8, r12);   /*0,2,4,6,8,10,12,14 set 0*/               \
-    r1 = vec_mergel(r8, r12);   /*0,2,4,6,8,10,12,14 set 1*/               \
-    r2 = vec_mergeh(r9, r13);   /*0,2,4,6,8,10,12,14 set 2*/               \
+    r0 = vec_mergeh(r8,  r12);  /*0,2,4,6,8,10,12,14 set 0*/               \
+    r1 = vec_mergel(r8,  r12);  /*0,2,4,6,8,10,12,14 set 1*/               \
+    r2 = vec_mergeh(r9,  r13);  /*0,2,4,6,8,10,12,14 set 2*/               \
     r4 = vec_mergeh(r10, r14);  /*1,3,5,7,9,11,13,15 set 0*/               \
     r5 = vec_mergel(r10, r14);  /*1,3,5,7,9,11,13,15 set 1*/               \
     r6 = vec_mergeh(r11, r15);  /*1,3,5,7,9,11,13,15 set 2*/               \
diff --git a/libavcodec/ppc/h264_template_altivec.c b/libavcodec/ppc/h264_template_altivec.c
index d8ad964..b4a5ddc 100644
--- a/libavcodec/ppc/h264_template_altivec.c
+++ b/libavcodec/ppc/h264_template_altivec.c
@@ -206,489 +206,489 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src,
 
 /* this code assume stride % 16 == 0 */
 static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
-  register int i;
-
-  LOAD_ZERO;
-  const vec_u8_t permM2 = vec_lvsl(-2, src);
-  const vec_u8_t permM1 = vec_lvsl(-1, src);
-  const vec_u8_t permP0 = vec_lvsl(+0, src);
-  const vec_u8_t permP1 = vec_lvsl(+1, src);
-  const vec_u8_t permP2 = vec_lvsl(+2, src);
-  const vec_u8_t permP3 = vec_lvsl(+3, src);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_u16_t v5us = vec_splat_u16(5);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
-
-  vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
-
-  register int align = ((((unsigned long)src) - 2) % 16);
-
-  vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
-                      srcP2A, srcP2B, srcP3A, srcP3B,
-                      srcM1A, srcM1B, srcM2A, srcM2B,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
-                      pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
-                      psumA, psumB, sumA, sumB;
-
-  vec_u8_t sum, vdst, fsum;
-
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
-
-  for (i = 0 ; i < 16 ; i ++) {
-    vec_u8_t srcR1 = vec_ld(-2, src);
-    vec_u8_t srcR2 = vec_ld(14, src);
-
-    switch (align) {
-    default: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = vec_perm(srcR1, srcR2, permP3);
-    } break;
-    case 11: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = srcR2;
-    } break;
-    case 12: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = srcR2;
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 13: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = srcR2;
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 14: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = srcR2;
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 15: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = srcR2;
-      srcP0 = vec_perm(srcR2, srcR3, permP0);
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    }
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
+    register int i;
 
-    srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-    srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-    srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-    srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+    LOAD_ZERO;
+    const vec_u8_t permM2 = vec_lvsl(-2, src);
+    const vec_u8_t permM1 = vec_lvsl(-1, src);
+    const vec_u8_t permP0 = vec_lvsl(+0, src);
+    const vec_u8_t permP1 = vec_lvsl(+1, src);
+    const vec_u8_t permP2 = vec_lvsl(+2, src);
+    const vec_u8_t permP3 = vec_lvsl(+3, src);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_u16_t v5us = vec_splat_u16(5);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
 
-    srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-    srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-    srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+    vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
 
-    srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-    srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-    srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-    srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+    register int align = ((((unsigned long)src) - 2) % 16);
+
+    vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+              srcP2A, srcP2B, srcP3A, srcP3B,
+              srcM1A, srcM1B, srcM2A, srcM2B,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+              pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+              psumA, psumB, sumA, sumB;
+
+    vec_u8_t sum, vdst, fsum;
+
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+
+    for (i = 0 ; i < 16 ; i ++) {
+        vec_u8_t srcR1 = vec_ld(-2, src);
+        vec_u8_t srcR2 = vec_ld(14, src);
+
+        switch (align) {
+        default: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = vec_perm(srcR1, srcR2, permP3);
+        } break;
+        case 11: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = srcR2;
+        } break;
+        case 12: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = srcR2;
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 13: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = srcR2;
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 14: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = srcR2;
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 15: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = srcR2;
+            srcP0 = vec_perm(srcR2, srcR3, permP0);
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        }
 
-    sum1A = vec_adds(srcP0A, srcP1A);
-    sum1B = vec_adds(srcP0B, srcP1B);
-    sum2A = vec_adds(srcM1A, srcP2A);
-    sum2B = vec_adds(srcM1B, srcP2B);
-    sum3A = vec_adds(srcM2A, srcP3A);
-    sum3B = vec_adds(srcM2B, srcP3B);
+        srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+        srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+        srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+        srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
 
-    pp1A = vec_mladd(sum1A, v20ss, v16ss);
-    pp1B = vec_mladd(sum1B, v20ss, v16ss);
+        srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+        srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+        srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
 
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
+        srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+        srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+        srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+        srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
 
-    pp3A = vec_add(sum3A, pp1A);
-    pp3B = vec_add(sum3B, pp1B);
+        sum1A = vec_adds(srcP0A, srcP1A);
+        sum1B = vec_adds(srcP0B, srcP1B);
+        sum2A = vec_adds(srcM1A, srcP2A);
+        sum2B = vec_adds(srcM1B, srcP2B);
+        sum3A = vec_adds(srcM2A, srcP3A);
+        sum3B = vec_adds(srcM2B, srcP3B);
 
-    psumA = vec_sub(pp3A, pp2A);
-    psumB = vec_sub(pp3B, pp2B);
+        pp1A = vec_mladd(sum1A, v20ss, v16ss);
+        pp1B = vec_mladd(sum1B, v20ss, v16ss);
 
-    sumA = vec_sra(psumA, v5us);
-    sumB = vec_sra(psumB, v5us);
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
 
-    sum = vec_packsu(sumA, sumB);
+        pp3A = vec_add(sum3A, pp1A);
+        pp3B = vec_add(sum3B, pp1B);
 
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
+        psumA = vec_sub(pp3A, pp2A);
+        psumB = vec_sub(pp3B, pp2B);
 
-    OP_U8_ALTIVEC(fsum, sum, vdst);
+        sumA = vec_sra(psumA, v5us);
+        sumB = vec_sra(psumB, v5us);
 
-    vec_st(fsum, 0, dst);
+        sum = vec_packsu(sumA, sumB);
 
-    src += srcStride;
-    dst += dstStride;
-  }
-POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
+
+        OP_U8_ALTIVEC(fsum, sum, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        src += srcStride;
+        dst += dstStride;
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
 }
 
 /* this code assume stride % 16 == 0 */
 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
-
-  register int i;
-
-  LOAD_ZERO;
-  const vec_u8_t perm = vec_lvsl(0, src);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_u16_t v5us = vec_splat_u16(5);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
-
-  uint8_t *srcbis = src - (srcStride * 2);
-
-  const vec_u8_t srcM2a = vec_ld(0, srcbis);
-  const vec_u8_t srcM2b = vec_ld(16, srcbis);
-  const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcM1b = vec_ld(16, srcbis);
-  const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP0b = vec_ld(16, srcbis);
-  const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP1b = vec_ld(16, srcbis);
-  const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
-//  srcbis += srcStride;
-  const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
-  const vec_u8_t srcP2b = vec_ld(16, srcbis);
-  const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
-//  srcbis += srcStride;
-
-  vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-  vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
-  vec_s16_t srcM1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-  vec_s16_t srcM1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-  vec_s16_t srcP0ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-  vec_s16_t srcP0ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-  vec_s16_t srcP1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-  vec_s16_t srcP1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
-  vec_s16_t srcP2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-  vec_s16_t srcP2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-
-  vec_s16_t pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
-                      psumA, psumB, sumA, sumB,
-                      srcP3ssA, srcP3ssB,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
-
-  vec_u8_t sum, vdst, fsum, srcP3a, srcP3b, srcP3;
-
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
-
-  for (i = 0 ; i < 16 ; i++) {
-    srcP3a = vec_ld(0, srcbis += srcStride);
-    srcP3b = vec_ld(16, srcbis);
-    srcP3 = vec_perm(srcP3a, srcP3b, perm);
-    srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-//    srcbis += srcStride;
-
-    sum1A = vec_adds(srcP0ssA, srcP1ssA);
-    sum1B = vec_adds(srcP0ssB, srcP1ssB);
-    sum2A = vec_adds(srcM1ssA, srcP2ssA);
-    sum2B = vec_adds(srcM1ssB, srcP2ssB);
-    sum3A = vec_adds(srcM2ssA, srcP3ssA);
-    sum3B = vec_adds(srcM2ssB, srcP3ssB);
-
-    srcM2ssA = srcM1ssA;
-    srcM2ssB = srcM1ssB;
-    srcM1ssA = srcP0ssA;
-    srcM1ssB = srcP0ssB;
-    srcP0ssA = srcP1ssA;
-    srcP0ssB = srcP1ssB;
-    srcP1ssA = srcP2ssA;
-    srcP1ssB = srcP2ssB;
-    srcP2ssA = srcP3ssA;
-    srcP2ssB = srcP3ssB;
-
-    pp1A = vec_mladd(sum1A, v20ss, v16ss);
-    pp1B = vec_mladd(sum1B, v20ss, v16ss);
-
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
-
-    pp3A = vec_add(sum3A, pp1A);
-    pp3B = vec_add(sum3B, pp1B);
-
-    psumA = vec_sub(pp3A, pp2A);
-    psumB = vec_sub(pp3B, pp2B);
-
-    sumA = vec_sra(psumA, v5us);
-    sumB = vec_sra(psumB, v5us);
-
-    sum = vec_packsu(sumA, sumB);
-
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
-
-    OP_U8_ALTIVEC(fsum, sum, vdst);
-
-    vec_st(fsum, 0, dst);
-
-    dst += dstStride;
-  }
-  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+    register int i;
+
+    LOAD_ZERO;
+    const vec_u8_t perm = vec_lvsl(0, src);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_u16_t v5us = vec_splat_u16(5);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_s16_t v16ss = vec_sl(vec_splat_s16(1),vec_splat_u16(4));
+
+    uint8_t *srcbis = src - (srcStride * 2);
+
+    const vec_u8_t srcM2a = vec_ld(0, srcbis);
+    const vec_u8_t srcM2b = vec_ld(16, srcbis);
+    const vec_u8_t srcM2 = vec_perm(srcM2a, srcM2b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcM1a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcM1b = vec_ld(16, srcbis);
+    const vec_u8_t srcM1 = vec_perm(srcM1a, srcM1b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP0a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP0b = vec_ld(16, srcbis);
+    const vec_u8_t srcP0 = vec_perm(srcP0a, srcP0b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP1a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP1b = vec_ld(16, srcbis);
+    const vec_u8_t srcP1 = vec_perm(srcP1a, srcP1b, perm);
+    //srcbis += srcStride;
+    const vec_u8_t srcP2a = vec_ld(0, srcbis += srcStride);
+    const vec_u8_t srcP2b = vec_ld(16, srcbis);
+    const vec_u8_t srcP2 = vec_perm(srcP2a, srcP2b, perm);
+    //srcbis += srcStride;
+
+    vec_s16_t srcM2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+    vec_s16_t srcM2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+    vec_s16_t srcM1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+    vec_s16_t srcM1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+    vec_s16_t srcP0ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+    vec_s16_t srcP0ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+    vec_s16_t srcP1ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+    vec_s16_t srcP1ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+    vec_s16_t srcP2ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+    vec_s16_t srcP2ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+
+    vec_s16_t pp1A, pp1B, pp2A, pp2B, pp3A, pp3B,
+              psumA, psumB, sumA, sumB,
+              srcP3ssA, srcP3ssB,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B;
+
+    vec_u8_t sum, vdst, fsum, srcP3a, srcP3b, srcP3;
+
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
+
+    for (i = 0 ; i < 16 ; i++) {
+        srcP3a = vec_ld(0, srcbis += srcStride);
+        srcP3b = vec_ld(16, srcbis);
+        srcP3 = vec_perm(srcP3a, srcP3b, perm);
+        srcP3ssA = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3ssB = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+        //srcbis += srcStride;
+
+        sum1A = vec_adds(srcP0ssA, srcP1ssA);
+        sum1B = vec_adds(srcP0ssB, srcP1ssB);
+        sum2A = vec_adds(srcM1ssA, srcP2ssA);
+        sum2B = vec_adds(srcM1ssB, srcP2ssB);
+        sum3A = vec_adds(srcM2ssA, srcP3ssA);
+        sum3B = vec_adds(srcM2ssB, srcP3ssB);
+
+        srcM2ssA = srcM1ssA;
+        srcM2ssB = srcM1ssB;
+        srcM1ssA = srcP0ssA;
+        srcM1ssB = srcP0ssB;
+        srcP0ssA = srcP1ssA;
+        srcP0ssB = srcP1ssB;
+        srcP1ssA = srcP2ssA;
+        srcP1ssB = srcP2ssB;
+        srcP2ssA = srcP3ssA;
+        srcP2ssB = srcP3ssB;
+
+        pp1A = vec_mladd(sum1A, v20ss, v16ss);
+        pp1B = vec_mladd(sum1B, v20ss, v16ss);
+
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
+
+        pp3A = vec_add(sum3A, pp1A);
+        pp3B = vec_add(sum3B, pp1B);
+
+        psumA = vec_sub(pp3A, pp2A);
+        psumB = vec_sub(pp3B, pp2B);
+
+        sumA = vec_sra(psumA, v5us);
+        sumB = vec_sra(psumB, v5us);
+
+        sum = vec_packsu(sumA, sumB);
+
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
+
+        OP_U8_ALTIVEC(fsum, sum, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        dst += dstStride;
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
 }
 
 /* this code assume stride % 16 == 0 *and* tmp is properly aligned */
 static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp, uint8_t * src, int dstStride, int tmpStride, int srcStride) {
-  POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
-  register int i;
-  LOAD_ZERO;
-  const vec_u8_t permM2 = vec_lvsl(-2, src);
-  const vec_u8_t permM1 = vec_lvsl(-1, src);
-  const vec_u8_t permP0 = vec_lvsl(+0, src);
-  const vec_u8_t permP1 = vec_lvsl(+1, src);
-  const vec_u8_t permP2 = vec_lvsl(+2, src);
-  const vec_u8_t permP3 = vec_lvsl(+3, src);
-  const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
-  const vec_u32_t v10ui = vec_splat_u32(10);
-  const vec_s16_t v5ss = vec_splat_s16(5);
-  const vec_s16_t v1ss = vec_splat_s16(1);
-  const vec_s32_t v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
-  const vec_u32_t v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
-
-  register int align = ((((unsigned long)src) - 2) % 16);
-
-  vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
-                      srcP2A, srcP2B, srcP3A, srcP3B,
-                      srcM1A, srcM1B, srcM2A, srcM2B,
-                      sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
-                      pp1A, pp1B, pp2A, pp2B, psumA, psumB;
-
-  const vec_u8_t mperm = (const vec_u8_t)
-    AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
-        0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
-  int16_t *tmpbis = tmp;
-
-  vec_s16_t tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
-                      tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
-                      tmpP2ssA, tmpP2ssB;
-
-  vec_s32_t pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
-                    pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
-                    pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
-                    ssumAe, ssumAo, ssumBe, ssumBo;
-  vec_u8_t fsum, sumv, sum, vdst;
-  vec_s16_t ssume, ssumo;
-
-  POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
-  src -= (2 * srcStride);
-  for (i = 0 ; i < 21 ; i ++) {
-    vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
-    vec_u8_t srcR1 = vec_ld(-2, src);
-    vec_u8_t srcR2 = vec_ld(14, src);
-
-    switch (align) {
-    default: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = vec_perm(srcR1, srcR2, permP3);
-    } break;
-    case 11: {
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = vec_perm(srcR1, srcR2, permP2);
-      srcP3 = srcR2;
-    } break;
-    case 12: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = vec_perm(srcR1, srcR2, permP1);
-      srcP2 = srcR2;
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 13: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = vec_perm(srcR1, srcR2, permP0);
-      srcP1 = srcR2;
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 14: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = vec_perm(srcR1, srcR2, permM1);
-      srcP0 = srcR2;
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    case 15: {
-      vec_u8_t srcR3 = vec_ld(30, src);
-      srcM2 = vec_perm(srcR1, srcR2, permM2);
-      srcM1 = srcR2;
-      srcP0 = vec_perm(srcR2, srcR3, permP0);
-      srcP1 = vec_perm(srcR2, srcR3, permP1);
-      srcP2 = vec_perm(srcR2, srcR3, permP2);
-      srcP3 = vec_perm(srcR2, srcR3, permP3);
-    } break;
-    }
+    POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+    register int i;
+    LOAD_ZERO;
+    const vec_u8_t permM2 = vec_lvsl(-2, src);
+    const vec_u8_t permM1 = vec_lvsl(-1, src);
+    const vec_u8_t permP0 = vec_lvsl(+0, src);
+    const vec_u8_t permP1 = vec_lvsl(+1, src);
+    const vec_u8_t permP2 = vec_lvsl(+2, src);
+    const vec_u8_t permP3 = vec_lvsl(+3, src);
+    const vec_s16_t v20ss = vec_sl(vec_splat_s16(5),vec_splat_u16(2));
+    const vec_u32_t v10ui = vec_splat_u32(10);
+    const vec_s16_t v5ss = vec_splat_s16(5);
+    const vec_s16_t v1ss = vec_splat_s16(1);
+    const vec_s32_t v512si = vec_sl(vec_splat_s32(1),vec_splat_u32(9));
+    const vec_u32_t v16ui = vec_sl(vec_splat_u32(1),vec_splat_u32(4));
+
+    register int align = ((((unsigned long)src) - 2) % 16);
+
+    vec_s16_t srcP0A, srcP0B, srcP1A, srcP1B,
+              srcP2A, srcP2B, srcP3A, srcP3B,
+              srcM1A, srcM1B, srcM2A, srcM2B,
+              sum1A, sum1B, sum2A, sum2B, sum3A, sum3B,
+              pp1A, pp1B, pp2A, pp2B, psumA, psumB;
+
+    const vec_u8_t mperm = (const vec_u8_t)
+      AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
+          0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
+    int16_t *tmpbis = tmp;
+
+    vec_s16_t tmpM1ssA, tmpM1ssB, tmpM2ssA, tmpM2ssB,
+              tmpP0ssA, tmpP0ssB, tmpP1ssA, tmpP1ssB,
+              tmpP2ssA, tmpP2ssB;
+
+    vec_s32_t pp1Ae, pp1Ao, pp1Be, pp1Bo, pp2Ae, pp2Ao, pp2Be, pp2Bo,
+              pp3Ae, pp3Ao, pp3Be, pp3Bo, pp1cAe, pp1cAo, pp1cBe, pp1cBo,
+              pp32Ae, pp32Ao, pp32Be, pp32Bo, sumAe, sumAo, sumBe, sumBo,
+              ssumAe, ssumAo, ssumBe, ssumBo;
+    vec_u8_t fsum, sumv, sum, vdst;
+    vec_s16_t ssume, ssumo;
+
+    POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+    src -= (2 * srcStride);
+    for (i = 0 ; i < 21 ; i ++) {
+        vec_u8_t srcM2, srcM1, srcP0, srcP1, srcP2, srcP3;
+        vec_u8_t srcR1 = vec_ld(-2, src);
+        vec_u8_t srcR2 = vec_ld(14, src);
+
+        switch (align) {
+        default: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = vec_perm(srcR1, srcR2, permP3);
+        } break;
+        case 11: {
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = vec_perm(srcR1, srcR2, permP2);
+            srcP3 = srcR2;
+        } break;
+        case 12: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = vec_perm(srcR1, srcR2, permP1);
+            srcP2 = srcR2;
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 13: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = vec_perm(srcR1, srcR2, permP0);
+            srcP1 = srcR2;
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 14: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = vec_perm(srcR1, srcR2, permM1);
+            srcP0 = srcR2;
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        case 15: {
+            vec_u8_t srcR3 = vec_ld(30, src);
+            srcM2 = vec_perm(srcR1, srcR2, permM2);
+            srcM1 = srcR2;
+            srcP0 = vec_perm(srcR2, srcR3, permP0);
+            srcP1 = vec_perm(srcR2, srcR3, permP1);
+            srcP2 = vec_perm(srcR2, srcR3, permP2);
+            srcP3 = vec_perm(srcR2, srcR3, permP3);
+        } break;
+        }
+
+        srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
+        srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
+        srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
+        srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
+
+        srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
+        srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
+        srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
+        srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
+
+        srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
+        srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
+        srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
+        srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
+
+        sum1A = vec_adds(srcP0A, srcP1A);
+        sum1B = vec_adds(srcP0B, srcP1B);
+        sum2A = vec_adds(srcM1A, srcP2A);
+        sum2B = vec_adds(srcM1B, srcP2B);
+        sum3A = vec_adds(srcM2A, srcP3A);
+        sum3B = vec_adds(srcM2B, srcP3B);
+
+        pp1A = vec_mladd(sum1A, v20ss, sum3A);
+        pp1B = vec_mladd(sum1B, v20ss, sum3B);
+
+        pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
+        pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
 
-    srcP0A = (vec_s16_t) vec_mergeh(zero_u8v, srcP0);
-    srcP0B = (vec_s16_t) vec_mergel(zero_u8v, srcP0);
-    srcP1A = (vec_s16_t) vec_mergeh(zero_u8v, srcP1);
-    srcP1B = (vec_s16_t) vec_mergel(zero_u8v, srcP1);
-
-    srcP2A = (vec_s16_t) vec_mergeh(zero_u8v, srcP2);
-    srcP2B = (vec_s16_t) vec_mergel(zero_u8v, srcP2);
-    srcP3A = (vec_s16_t) vec_mergeh(zero_u8v, srcP3);
-    srcP3B = (vec_s16_t) vec_mergel(zero_u8v, srcP3);
-
-    srcM1A = (vec_s16_t) vec_mergeh(zero_u8v, srcM1);
-    srcM1B = (vec_s16_t) vec_mergel(zero_u8v, srcM1);
-    srcM2A = (vec_s16_t) vec_mergeh(zero_u8v, srcM2);
-    srcM2B = (vec_s16_t) vec_mergel(zero_u8v, srcM2);
-
-    sum1A = vec_adds(srcP0A, srcP1A);
-    sum1B = vec_adds(srcP0B, srcP1B);
-    sum2A = vec_adds(srcM1A, srcP2A);
-    sum2B = vec_adds(srcM1B, srcP2B);
-    sum3A = vec_adds(srcM2A, srcP3A);
-    sum3B = vec_adds(srcM2B, srcP3B);
-
-    pp1A = vec_mladd(sum1A, v20ss, sum3A);
-    pp1B = vec_mladd(sum1B, v20ss, sum3B);
-
-    pp2A = vec_mladd(sum2A, v5ss, zero_s16v);
-    pp2B = vec_mladd(sum2B, v5ss, zero_s16v);
-
-    psumA = vec_sub(pp1A, pp2A);
-    psumB = vec_sub(pp1B, pp2B);
-
-    vec_st(psumA, 0, tmp);
-    vec_st(psumB, 16, tmp);
-
-    src += srcStride;
-    tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
-  }
-
-  tmpM2ssA = vec_ld(0, tmpbis);
-  tmpM2ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpM1ssA = vec_ld(0, tmpbis);
-  tmpM1ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP0ssA = vec_ld(0, tmpbis);
-  tmpP0ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP1ssA = vec_ld(0, tmpbis);
-  tmpP1ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-  tmpP2ssA = vec_ld(0, tmpbis);
-  tmpP2ssB = vec_ld(16, tmpbis);
-  tmpbis += tmpStride;
-
-  for (i = 0 ; i < 16 ; i++) {
-    const vec_s16_t tmpP3ssA = vec_ld(0, tmpbis);
-    const vec_s16_t tmpP3ssB = vec_ld(16, tmpbis);
-
-    const vec_s16_t sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
-    const vec_s16_t sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
-    const vec_s16_t sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
-    const vec_s16_t sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
-    const vec_s16_t sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
-    const vec_s16_t sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+        psumA = vec_sub(pp1A, pp2A);
+        psumB = vec_sub(pp1B, pp2B);
 
+        vec_st(psumA, 0, tmp);
+        vec_st(psumB, 16, tmp);
+
+        src += srcStride;
+        tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
+    }
+
+    tmpM2ssA = vec_ld(0, tmpbis);
+    tmpM2ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpM1ssA = vec_ld(0, tmpbis);
+    tmpM1ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP0ssA = vec_ld(0, tmpbis);
+    tmpP0ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP1ssA = vec_ld(0, tmpbis);
+    tmpP1ssB = vec_ld(16, tmpbis);
+    tmpbis += tmpStride;
+    tmpP2ssA = vec_ld(0, tmpbis);
+    tmpP2ssB = vec_ld(16, tmpbis);
     tmpbis += tmpStride;
 
-    tmpM2ssA = tmpM1ssA;
-    tmpM2ssB = tmpM1ssB;
-    tmpM1ssA = tmpP0ssA;
-    tmpM1ssB = tmpP0ssB;
-    tmpP0ssA = tmpP1ssA;
-    tmpP0ssB = tmpP1ssB;
-    tmpP1ssA = tmpP2ssA;
-    tmpP1ssB = tmpP2ssB;
-    tmpP2ssA = tmpP3ssA;
-    tmpP2ssB = tmpP3ssB;
-
-    pp1Ae = vec_mule(sum1A, v20ss);
-    pp1Ao = vec_mulo(sum1A, v20ss);
-    pp1Be = vec_mule(sum1B, v20ss);
-    pp1Bo = vec_mulo(sum1B, v20ss);
-
-    pp2Ae = vec_mule(sum2A, v5ss);
-    pp2Ao = vec_mulo(sum2A, v5ss);
-    pp2Be = vec_mule(sum2B, v5ss);
-    pp2Bo = vec_mulo(sum2B, v5ss);
-
-    pp3Ae = vec_sra((vec_s32_t)sum3A, v16ui);
-    pp3Ao = vec_mulo(sum3A, v1ss);
-    pp3Be = vec_sra((vec_s32_t)sum3B, v16ui);
-    pp3Bo = vec_mulo(sum3B, v1ss);
-
-    pp1cAe = vec_add(pp1Ae, v512si);
-    pp1cAo = vec_add(pp1Ao, v512si);
-    pp1cBe = vec_add(pp1Be, v512si);
-    pp1cBo = vec_add(pp1Bo, v512si);
-
-    pp32Ae = vec_sub(pp3Ae, pp2Ae);
-    pp32Ao = vec_sub(pp3Ao, pp2Ao);
-    pp32Be = vec_sub(pp3Be, pp2Be);
-    pp32Bo = vec_sub(pp3Bo, pp2Bo);
-
-    sumAe = vec_add(pp1cAe, pp32Ae);
-    sumAo = vec_add(pp1cAo, pp32Ao);
-    sumBe = vec_add(pp1cBe, pp32Be);
-    sumBo = vec_add(pp1cBo, pp32Bo);
-
-    ssumAe = vec_sra(sumAe, v10ui);
-    ssumAo = vec_sra(sumAo, v10ui);
-    ssumBe = vec_sra(sumBe, v10ui);
-    ssumBo = vec_sra(sumBo, v10ui);
-
-    ssume = vec_packs(ssumAe, ssumBe);
-    ssumo = vec_packs(ssumAo, ssumBo);
-
-    sumv = vec_packsu(ssume, ssumo);
-    sum = vec_perm(sumv, sumv, mperm);
-
-    ASSERT_ALIGNED(dst);
-    vdst = vec_ld(0, dst);
-
-    OP_U8_ALTIVEC(fsum, sum, vdst);
-
-    vec_st(fsum, 0, dst);
-
-    dst += dstStride;
-  }
-  POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
+    for (i = 0 ; i < 16 ; i++) {
+        const vec_s16_t tmpP3ssA = vec_ld(0, tmpbis);
+        const vec_s16_t tmpP3ssB = vec_ld(16, tmpbis);
+
+        const vec_s16_t sum1A = vec_adds(tmpP0ssA, tmpP1ssA);
+        const vec_s16_t sum1B = vec_adds(tmpP0ssB, tmpP1ssB);
+        const vec_s16_t sum2A = vec_adds(tmpM1ssA, tmpP2ssA);
+        const vec_s16_t sum2B = vec_adds(tmpM1ssB, tmpP2ssB);
+        const vec_s16_t sum3A = vec_adds(tmpM2ssA, tmpP3ssA);
+        const vec_s16_t sum3B = vec_adds(tmpM2ssB, tmpP3ssB);
+
+        tmpbis += tmpStride;
+
+        tmpM2ssA = tmpM1ssA;
+        tmpM2ssB = tmpM1ssB;
+        tmpM1ssA = tmpP0ssA;
+        tmpM1ssB = tmpP0ssB;
+        tmpP0ssA = tmpP1ssA;
+        tmpP0ssB = tmpP1ssB;
+        tmpP1ssA = tmpP2ssA;
+        tmpP1ssB = tmpP2ssB;
+        tmpP2ssA = tmpP3ssA;
+        tmpP2ssB = tmpP3ssB;
+
+        pp1Ae = vec_mule(sum1A, v20ss);
+        pp1Ao = vec_mulo(sum1A, v20ss);
+        pp1Be = vec_mule(sum1B, v20ss);
+        pp1Bo = vec_mulo(sum1B, v20ss);
+
+        pp2Ae = vec_mule(sum2A, v5ss);
+        pp2Ao = vec_mulo(sum2A, v5ss);
+        pp2Be = vec_mule(sum2B, v5ss);
+        pp2Bo = vec_mulo(sum2B, v5ss);
+
+        pp3Ae = vec_sra((vec_s32_t)sum3A, v16ui);
+        pp3Ao = vec_mulo(sum3A, v1ss);
+        pp3Be = vec_sra((vec_s32_t)sum3B, v16ui);
+        pp3Bo = vec_mulo(sum3B, v1ss);
+
+        pp1cAe = vec_add(pp1Ae, v512si);
+        pp1cAo = vec_add(pp1Ao, v512si);
+        pp1cBe = vec_add(pp1Be, v512si);
+        pp1cBo = vec_add(pp1Bo, v512si);
+
+        pp32Ae = vec_sub(pp3Ae, pp2Ae);
+        pp32Ao = vec_sub(pp3Ao, pp2Ao);
+        pp32Be = vec_sub(pp3Be, pp2Be);
+        pp32Bo = vec_sub(pp3Bo, pp2Bo);
+
+        sumAe = vec_add(pp1cAe, pp32Ae);
+        sumAo = vec_add(pp1cAo, pp32Ao);
+        sumBe = vec_add(pp1cBe, pp32Be);
+        sumBo = vec_add(pp1cBo, pp32Bo);
+
+        ssumAe = vec_sra(sumAe, v10ui);
+        ssumAo = vec_sra(sumAo, v10ui);
+        ssumBe = vec_sra(sumBe, v10ui);
+        ssumBo = vec_sra(sumBo, v10ui);
+
+        ssume = vec_packs(ssumAe, ssumBe);
+        ssumo = vec_packs(ssumAo, ssumBo);
+
+        sumv = vec_packsu(ssume, ssumo);
+        sum = vec_perm(sumv, sumv, mperm);
+
+        ASSERT_ALIGNED(dst);
+        vdst = vec_ld(0, dst);
+
+        OP_U8_ALTIVEC(fsum, sum, vdst);
+
+        vec_st(fsum, 0, dst);
+
+        dst += dstStride;
+    }
+    POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_hv_lowpass_num, 1);
 }
diff --git a/libavcodec/ppc/idct_altivec.c b/libavcodec/ppc/idct_altivec.c
index 4395234..5d596b1 100644
--- a/libavcodec/ppc/idct_altivec.c
+++ b/libavcodec/ppc/idct_altivec.c
@@ -22,7 +22,6 @@
  * NOTE: This code is based on GPL code from the libmpeg2 project.  The
  * author, Michel Lespinasses, has given explicit permission to release
  * under LGPL as part of ffmpeg.
- *
  */
 
 /*
diff --git a/libavcodec/ppc/imgresample_altivec.c b/libavcodec/ppc/imgresample_altivec.c
index 3b161c5..b38e41b 100644
--- a/libavcodec/ppc/imgresample_altivec.c
+++ b/libavcodec/ppc/imgresample_altivec.c
@@ -46,8 +46,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
     vector signed short zeros, sumhv, sumlv;
     s = src;
 
-    for(i=0;i<4;i++)
-    {
+    for(i=0;i<4;i++) {
         /*
            The vec_madds later on does an implicit >>15 on the result.
            Since FILTER_BITS is 8, and we have 15 bits of magnitude in
@@ -86,13 +85,11 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 
     /* Do our altivec resampling on 16 pixels at once. */
     while(dst_width>=16) {
-        /*
-           Read 16 (potentially unaligned) bytes from each of
+        /* Read 16 (potentially unaligned) bytes from each of
            4 lines into 4 vectors, and split them into shorts.
            Interleave the multipy/accumulate for the resample
            filter with the loads to hide the 3 cycle latency
-           the vec_madds have.
-        */
+           the vec_madds have. */
         tv = (vector unsigned char *) &s[0 * wrap];
         tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap]));
         srchv[0].v = (vector signed short) vec_mergeh(zero, tmp);
@@ -121,10 +118,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
 
-        /*
-           Pack the results into our destination vector,
-           and do an aligned write of that back to memory.
-        */
+        /* Pack the results into our destination vector,
+           and do an aligned write of that back to memory. */
         dstv = vec_packsu(sumhv, sumlv) ;
         vec_st(dstv, 0, (vector unsigned char *) dst);
 
@@ -133,10 +128,8 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
         dst_width-=16;
     }
 
-    /*
-       If there are any leftover pixels, resample them
-       with the slow scalar method.
-    */
+    /* If there are any leftover pixels, resample them
+       with the slow scalar method. */
     while(dst_width>0) {
         sum = s[0 * wrap] * filter[0] +
         s[1 * wrap] * filter[1] +
diff --git a/libavcodec/ppc/int_altivec.c b/libavcodec/ppc/int_altivec.c
index 7a155a2..8bd3936 100644
--- a/libavcodec/ppc/int_altivec.c
+++ b/libavcodec/ppc/int_altivec.c
@@ -38,7 +38,7 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
     vector signed short vpix2, vdiff, vpix1l,vpix1h;
     union { vector signed int vscore;
             int32_t score[4];
-           } u;
+          } u;
     u.vscore = vec_splat_s32(0);
 //
 //XXX lazy way, fix it later
diff --git a/libavcodec/ppc/mathops.h b/libavcodec/ppc/mathops.h
index 82abadc..2259f2a 100644
--- a/libavcodec/ppc/mathops.h
+++ b/libavcodec/ppc/mathops.h
@@ -25,14 +25,14 @@
 
 #if defined(ARCH_POWERPC_405)
 /* signed 16x16 -> 32 multiply add accumulate */
-#   define MAC16(rt, ra, rb) \
-        asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
+#define MAC16(rt, ra, rb) \
+    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
 
 /* signed 16x16 -> 32 multiply */
-#   define MUL16(ra, rb) \
-        ({ int __rt; \
-         asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
-         __rt; })
+#define MUL16(ra, rb) \
+    ({ int __rt; \
+    asm ("mullhw %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb)); \
+    __rt; })
 #endif
 
 #endif /* FFMPEG_PPC_MATHOPS_H */
diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c
index 9832fb9..f2e4fae 100644
--- a/libavcodec/ppc/mpegvideo_altivec.c
+++ b/libavcodec/ppc/mpegvideo_altivec.c
@@ -41,15 +41,15 @@ do { \
 // transposes a matrix consisting of four vectors with four elements each
 #define TRANSPOSE4(a,b,c,d) \
 do { \
-  __typeof__(a) _trans_ach = vec_mergeh(a, c); \
-  __typeof__(a) _trans_acl = vec_mergel(a, c); \
-  __typeof__(a) _trans_bdh = vec_mergeh(b, d); \
-  __typeof__(a) _trans_bdl = vec_mergel(b, d); \
- \
-  a = vec_mergeh(_trans_ach, _trans_bdh); \
-  b = vec_mergel(_trans_ach, _trans_bdh); \
-  c = vec_mergeh(_trans_acl, _trans_bdl); \
-  d = vec_mergel(_trans_acl, _trans_bdl); \
+    __typeof__(a) _trans_ach = vec_mergeh(a, c); \
+    __typeof__(a) _trans_acl = vec_mergel(a, c); \
+    __typeof__(a) _trans_bdh = vec_mergeh(b, d); \
+    __typeof__(a) _trans_bdl = vec_mergel(b, d); \
+                                                 \
+    a = vec_mergeh(_trans_ach, _trans_bdh);      \
+    b = vec_mergel(_trans_ach, _trans_bdh);      \
+    c = vec_mergeh(_trans_acl, _trans_bdl);      \
+    d = vec_mergel(_trans_acl, _trans_bdl);      \
 } while (0)
 
 
@@ -58,19 +58,19 @@ do { \
 // target address is four-byte aligned (which should be always).
 #define LOAD4(vec, address) \
 { \
-    __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address); \
-    vector unsigned char _perm_vec = vec_lvsl(0,(address)); \
-    vec = vec_ld(0, _load_addr); \
-    vec = vec_perm(vec, vec, _perm_vec); \
-    vec = vec_splat(vec, 0); \
+    __typeof__(vec)* _load_addr = (__typeof__(vec)*)(address);  \
+    vector unsigned char _perm_vec = vec_lvsl(0,(address));     \
+    vec = vec_ld(0, _load_addr);                                \
+    vec = vec_perm(vec, vec, _perm_vec);                        \
+    vec = vec_splat(vec, 0);                                    \
 }
 
 
 #define FOUROF(a) AVV(a,a,a,a)
 
 int dct_quantize_altivec(MpegEncContext* s,
-                        DCTELEM* data, int n,
-                        int qscale, int* overflow)
+                         DCTELEM* data, int n,
+                         int qscale, int* overflow)
 {
     int lastNonZero;
     vector float row0, row1, row2, row3, row4, row5, row6, row7;
@@ -137,10 +137,8 @@ int dct_quantize_altivec(MpegEncContext* s,
 
         int whichPass, whichHalf;
 
-        for(whichPass = 1; whichPass<=2; whichPass++)
-        {
-            for(whichHalf = 1; whichHalf<=2; whichHalf++)
-            {
+        for(whichPass = 1; whichPass<=2; whichPass++) {
+            for(whichHalf = 1; whichHalf<=2; whichHalf++) {
                 vector float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
                 vector float tmp10, tmp11, tmp12, tmp13;
                 vector float z1, z2, z3, z4, z5;
@@ -235,8 +233,7 @@ int dct_quantize_altivec(MpegEncContext* s,
                 SWAP(row7, alt7);
             }
 
-            if (whichPass == 1)
-            {
+            if (whichPass == 1) {
                 // transpose the data for the second pass
 
                 // First, block transpose the upper right with lower left.
@@ -261,8 +258,7 @@ int dct_quantize_altivec(MpegEncContext* s,
         const vector signed int* qmat;
         vector float bias, negBias;
 
-        if (s->mb_intra)
-        {
+        if (s->mb_intra) {
             vector signed int baseVector;
 
             // We must cache element 0 in the intra case
@@ -272,9 +268,7 @@ int dct_quantize_altivec(MpegEncContext* s,
 
             qmat = (vector signed int*)s->q_intra_matrix[qscale];
             biasAddr = &(s->intra_quant_bias);
-        }
-        else
-        {
+        } else {
             qmat = (vector signed int*)s->q_inter_matrix[qscale];
             biasAddr = &(s->inter_quant_bias);
         }
@@ -439,8 +433,7 @@ int dct_quantize_altivec(MpegEncContext* s,
         // and handle it using the vector unit if we can.  This is the permute used
         // by the altivec idct, so it is common when using the altivec dct.
 
-        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
-        {
+        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) {
             TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
         }
 
@@ -456,10 +449,8 @@ int dct_quantize_altivec(MpegEncContext* s,
     }
 
     // special handling of block[0]
-    if (s->mb_intra)
-    {
-        if (!s->h263_aic)
-        {
+    if (s->mb_intra) {
+        if (!s->h263_aic) {
             if (n < 4)
                 oldBaseValue /= s->y_dc_scale;
             else
@@ -474,8 +465,7 @@ int dct_quantize_altivec(MpegEncContext* s,
     // need to permute the "no" permutation case.
     if ((lastNonZero > 0) &&
         (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
-        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
-    {
+        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) {
         ff_block_permute(data, s->dsp.idct_permutation,
                 s->intra_scantable.scantable, lastNonZero);
     }
@@ -483,10 +473,8 @@ int dct_quantize_altivec(MpegEncContext* s,
     return lastNonZero;
 }
 
-/*
-  AltiVec version of dct_unquantize_h263
-  this code assumes `block' is 16 bytes-aligned
-*/
+/* AltiVec version of dct_unquantize_h263
+   this code assumes `block' is 16 bytes-aligned */
 void dct_unquantize_h263_altivec(MpegEncContext *s,
                                  DCTELEM *block, int n, int qscale)
 {
@@ -517,82 +505,81 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
     }
 
     {
-      register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
-      DECLARE_ALIGNED_16(short, qmul8[]) =
-          {
-            qmul, qmul, qmul, qmul,
-            qmul, qmul, qmul, qmul
-          };
-      DECLARE_ALIGNED_16(short, qadd8[]) =
-          {
-            qadd, qadd, qadd, qadd,
-            qadd, qadd, qadd, qadd
-          };
-      DECLARE_ALIGNED_16(short, nqadd8[]) =
-          {
-            -qadd, -qadd, -qadd, -qadd,
-            -qadd, -qadd, -qadd, -qadd
-          };
-      register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
-      register vector bool short blockv_null, blockv_neg;
-      register short backup_0 = block[0];
-      register int j = 0;
-
-      qmulv = vec_ld(0, qmul8);
-      qaddv = vec_ld(0, qadd8);
-      nqaddv = vec_ld(0, nqadd8);
-
-#if 0 // block *is* 16 bytes-aligned, it seems.
-      // first make sure block[j] is 16 bytes-aligned
-      for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
-        level = block[j];
-        if (level) {
-          if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
+        register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
+        DECLARE_ALIGNED_16(short, qmul8[]) =
+            {
+              qmul, qmul, qmul, qmul,
+              qmul, qmul, qmul, qmul
+            };
+        DECLARE_ALIGNED_16(short, qadd8[]) =
+            {
+              qadd, qadd, qadd, qadd,
+              qadd, qadd, qadd, qadd
+            };
+        DECLARE_ALIGNED_16(short, nqadd8[]) =
+            {
+              -qadd, -qadd, -qadd, -qadd,
+              -qadd, -qadd, -qadd, -qadd
+            };
+        register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
+        register vector bool short blockv_null, blockv_neg;
+        register short backup_0 = block[0];
+        register int j = 0;
+
+        qmulv = vec_ld(0, qmul8);
+        qaddv = vec_ld(0, qadd8);
+        nqaddv = vec_ld(0, nqadd8);
+
+#if 0   // block *is* 16 bytes-aligned, it seems.
+        // first make sure block[j] is 16 bytes-aligned
+        for(j = 0; (j <= nCoeffs) && ((((unsigned long)block) + (j << 1)) & 0x0000000F) ; j++) {
+            level = block[j];
+            if (level) {
+                if (level < 0) {
+                    level = level * qmul - qadd;
+                } else {
+                    level = level * qmul + qadd;
+                }
+                block[j] = level;
             }
-            block[j] = level;
         }
-      }
 #endif
 
-      // vectorize all the 16 bytes-aligned blocks
-      // of 8 elements
-      for(; (j + 7) <= nCoeffs ; j+=8)
-      {
-        blockv = vec_ld(j << 1, block);
-        blockv_neg = vec_cmplt(blockv, vczero);
-        blockv_null = vec_cmpeq(blockv, vczero);
-        // choose between +qadd or -qadd as the third operand
-        temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
-        // multiply & add (block{i,i+7} * qmul [+-] qadd)
-        temp1 = vec_mladd(blockv, qmulv, temp1);
-        // put 0 where block[{i,i+7} used to have 0
-        blockv = vec_sel(temp1, blockv, blockv_null);
-        vec_st(blockv, j << 1, block);
-      }
-
-      // if nCoeffs isn't a multiple of 8, finish the job
-      // using good old scalar units.
-      // (we could do it using a truncated vector,
-      // but I'm not sure it's worth the hassle)
-      for(; j <= nCoeffs ; j++) {
-        level = block[j];
-        if (level) {
-          if (level < 0) {
-                level = level * qmul - qadd;
-            } else {
-                level = level * qmul + qadd;
+        // vectorize all the 16 bytes-aligned blocks
+        // of 8 elements
+        for(; (j + 7) <= nCoeffs ; j+=8) {
+            blockv = vec_ld(j << 1, block);
+            blockv_neg = vec_cmplt(blockv, vczero);
+            blockv_null = vec_cmpeq(blockv, vczero);
+            // choose between +qadd or -qadd as the third operand
+            temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
+            // multiply & add (block{i,i+7} * qmul [+-] qadd)
+            temp1 = vec_mladd(blockv, qmulv, temp1);
+            // put 0 where block[{i,i+7} used to have 0
+            blockv = vec_sel(temp1, blockv, blockv_null);
+            vec_st(blockv, j << 1, block);
+        }
+
+        // if nCoeffs isn't a multiple of 8, finish the job
+        // using good old scalar units.
+        // (we could do it using a truncated vector,
+        // but I'm not sure it's worth the hassle)
+        for(; j <= nCoeffs ; j++) {
+            level = block[j];
+            if (level) {
+                if (level < 0) {
+                    level = level * qmul - qadd;
+                } else {
+                    level = level * qmul + qadd;
+                }
+                block[j] = level;
             }
-            block[j] = level;
         }
-      }
 
-      if (i == 1)
-      { // cheat. this avoid special-casing the first iteration
-        block[0] = backup_0;
-      }
+        if (i == 1) {
+            // cheat. this avoid special-casing the first iteration
+            block[0] = backup_0;
+        }
     }
 POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63);
 }
@@ -605,11 +592,9 @@ void MPV_common_init_altivec(MpegEncContext *s)
 {
     if ((mm_flags & MM_ALTIVEC) == 0) return;
 
-    if (s->avctx->lowres==0)
-    {
+    if (s->avctx->lowres==0) {
         if ((s->avctx->idct_algo == FF_IDCT_AUTO) ||
-                (s->avctx->idct_algo == FF_IDCT_ALTIVEC))
-        {
+            (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) {
             s->dsp.idct_put = idct_put_altivec;
             s->dsp.idct_add = idct_add_altivec;
             s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
@@ -618,15 +603,13 @@ void MPV_common_init_altivec(MpegEncContext *s)
 
     // Test to make sure that the dct required alignments are met.
     if ((((long)(s->q_intra_matrix) & 0x0f) != 0) ||
-        (((long)(s->q_inter_matrix) & 0x0f) != 0))
-    {
+        (((long)(s->q_inter_matrix) & 0x0f) != 0)) {
         av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned "
                 "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
         return;
     }
 
-    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0)
-    {
+    if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) {
         av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned "
                 "to use AltiVec DCT. Reverting to non-AltiVec version.\n");
         return;
@@ -634,8 +617,7 @@ void MPV_common_init_altivec(MpegEncContext *s)
 
 
     if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
-            (s->avctx->dct_algo == FF_DCT_ALTIVEC))
-    {
+            (s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
 #if 0 /* seems to cause trouble under some circumstances */
         s->dct_quantize = dct_quantize_altivec;
 #endif
diff --git a/libavcodec/ppc/snow_altivec.c b/libavcodec/ppc/snow_altivec.c
index ea228b0..2ae32c7 100644
--- a/libavcodec/ppc/snow_altivec.c
+++ b/libavcodec/ppc/snow_altivec.c
@@ -379,8 +379,7 @@ void ff_snow_vertical_compose97i_altivec(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
     v4=(vector signed int *)b4;
     v5=(vector signed int *)b5;
 
-    for (i=0; i< w4;i++)
-    {
+    for (i=0; i< w4;i++) {
 
     #if 0
         b4[i] -= (3*(b3[i] + b5[i])+4)>>3;
@@ -782,8 +781,8 @@ void ff_snow_inner_add_yblock_altivec(uint8_t *obmc, const int obmc_stride,
 void snow_init_altivec(DSPContext* c, AVCodecContext *avctx)
 {
 #if 0
-        c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
-        c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
-        c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
+    c->horizontal_compose97i = ff_snow_horizontal_compose97i_altivec;
+    c->vertical_compose97i = ff_snow_vertical_compose97i_altivec;
+    c->inner_add_yblock = ff_snow_inner_add_yblock_altivec;
 #endif
 }

-- 
Libav/FFmpeg packaging



More information about the pkg-multimedia-commits mailing list