[SCM] intel-vaapi-driver/upstream: Imported Upstream version 1.0.17

mfv-guest at users.alioth.debian.org mfv-guest at users.alioth.debian.org
Tue Apr 10 15:37:25 UTC 2012


The following commit has been merged in the upstream branch:
commit 8d55e766b03fdae59b7ca5e1cf47cfb1537f2b1c
Author: Matteo F. Vescovi <mfv.debian at gmail.com>
Date:   Tue Apr 10 17:28:09 2012 +0200

    Imported Upstream version 1.0.17

diff --git a/NEWS b/NEWS
index f166182..511800f 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,13 @@
-libva-driver-intel NEWS -- summary of changes.  2012-02-14
+libva-driver-intel NEWS -- summary of changes.  2012-04-02
 Copyright (C) 2009-2011 Intel Corporation
 
+Version 1.0.17 - 02.Apr.2012
+* Add support for IMC1/IMC3 surface formats
+* Fix rendering of interlaced surfaces
+* Fix MPEG-2 decoding of interlaced streams (SNB, IVB)
+* Fix H.264 weighted prediction indicator (SNB)
+* Fix and simplify calculation of H.264 macroblock bit offset (ILK, SNB, IVB)
+
 Version 1.0.16 - 14.Feb.2012
 * Fix VC-1 bitplane buffer size (SNB, IVB)
 * Fix VC-1 motion vector modes for Ivy Bridge
diff --git a/configure.ac b/configure.ac
index 04528a2..cb2a7fc 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
 # intel-driver package version number
 m4_define([intel_driver_major_version], [1])
 m4_define([intel_driver_minor_version], [0])
-m4_define([intel_driver_micro_version], [16])
+m4_define([intel_driver_micro_version], [17])
 m4_define([intel_driver_pre_version],   [0])
 m4_define([intel_driver_version],
           [intel_driver_major_version.intel_driver_minor_version.intel_driver_micro_version])
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 34de745..863d4ec 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -851,8 +851,7 @@ static VAStatus gen6_mfc_avc_prepare(VADriverContextP ctx,
     /*Setup all the input&output object*/
     obj_surface = SURFACE(pPicParameter->reconstructed_picture);
     assert(obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
-
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     mfc_context->post_deblocking_output.bo = obj_surface->bo;
     dri_bo_reference(mfc_context->post_deblocking_output.bo);
 
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 2ffcbe2..51426ba 100644
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -120,7 +120,7 @@ gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
             
             assert(obj_surface);
-            i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'));
+            i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
 
             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
@@ -682,6 +682,7 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx,
                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
     int weighted_pred_idc = 0;
     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+    unsigned int chroma_log2_weight_denom, luma_log2_weight_denom;
     int slice_type;
 
     if (slice_param->slice_type == SLICE_TYPE_I ||
@@ -695,6 +696,9 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx,
         slice_type = SLICE_TYPE_B;
     }
 
+    luma_log2_weight_denom   = slice_param->luma_log2_weight_denom;
+    chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
+
     if (slice_type == SLICE_TYPE_I) {
         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
@@ -708,7 +712,13 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx,
     } else {
         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
-        weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
+        weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
+
+        if (weighted_pred_idc == 2) {
+            /* 8.4.3 - Derivation process for prediction weights (8-279) */
+            luma_log2_weight_denom   = 5;
+            chroma_log2_weight_denom = 5;
+        }
     }
 
     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
@@ -730,8 +740,8 @@ gen6_mfd_avc_slice_state(VADriverContextP ctx,
     OUT_BCS_BATCH(batch, 
                   (num_ref_idx_l1 << 24) |
                   (num_ref_idx_l0 << 16) |
-                  (slice_param->chroma_log2_weight_denom << 8) |
-                  (slice_param->luma_log2_weight_denom << 0));
+                  (chroma_log2_weight_denom << 8) |
+                  (luma_log2_weight_denom << 0));
     OUT_BCS_BATCH(batch, 
                   (weighted_pred_idc << 30) |
                   (slice_param->direct_spatial_mv_pred_flag << 29) |
@@ -846,27 +856,6 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -875,21 +864,19 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen6_mfd_context *gen6_mfd_context)
 {
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
-    OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
+    OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
                   (0 << 14) |
@@ -897,8 +884,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                   (0 << 10) |
                   (0 << 8));
     OUT_BCS_BATCH(batch,
-                  (0 << 16) |
-                  (0 << 6)  |
+                  ((slice_data_bit_offset >> 3) << 16) |
+                  (1 << 6)  |
                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
     OUT_BCS_BATCH(batch, 0);
     ADVANCE_BCS_BATCH(batch);
@@ -975,8 +962,8 @@ gen6_mfd_avc_decode_init(VADriverContextP ctx,
     assert(obj_surface);
     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
 
     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
@@ -1097,35 +1084,22 @@ gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
     VAPictureParameterBufferMPEG2 *pic_param;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface;
-    int i;
     dri_bo *bo;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
 
-    /* reference picture */
-    obj_surface = SURFACE(pic_param->forward_reference_picture);
-
-    if (obj_surface && obj_surface->bo)
-        gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
-    else
-        gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
-
-    obj_surface = SURFACE(pic_param->backward_reference_picture);
-
-    if (obj_surface && obj_surface->bo)
-        gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
-    else
-        gen6_mfd_context->reference_surface[1].surface_id = gen6_mfd_context->reference_surface[0].surface_id;
-
-    /* must do so !!! */
-    for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
-        gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
+    mpeg2_set_reference_surfaces(
+        ctx,
+        gen6_mfd_context->reference_surface,
+        decode_state,
+        pic_param
+    );
 
     /* Current decoded picture */
     obj_surface = SURFACE(decode_state->current_render_target);
     assert(obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
 
     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -1155,10 +1129,18 @@ gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
 {
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
     VAPictureParameterBufferMPEG2 *pic_param;
+    unsigned int tff, pic_structure;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
 
+    pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
+    if (pic_structure == MPEG_FRAME)
+        tff = pic_param->picture_coding_extension.bits.top_field_first;
+    else
+        tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
+                (pic_structure & MPEG_TOP_FIELD));
+
     BEGIN_BCS_BATCH(batch, 4);
     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
     OUT_BCS_BATCH(batch,
@@ -1168,7 +1150,7 @@ gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
-                  pic_param->picture_coding_extension.bits.top_field_first << 11 |
+                  tff << 11 |
                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
@@ -1245,20 +1227,22 @@ gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
 {
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
-    int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
+    int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
 
     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
         is_field_pic = 1;
+    is_field_pic_wa = is_field_pic &&
+        gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
 
-    vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
+    vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
     hpos0 = slice_param->slice_horizontal_position;
 
     if (next_slice_param == NULL) {
         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
         hpos1 = 0;
     } else {
-        vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
+        vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
         hpos1 = next_slice_param->slice_horizontal_position;
     }
 
@@ -1306,6 +1290,10 @@ gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
 
+    if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
+        gen6_mfd_context->wa_mpeg2_slice_vertical_position =
+            mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
@@ -1445,8 +1433,8 @@ gen6_mfd_vc1_decode_init(VADriverContextP ctx,
     /* Current decoded picture */
     obj_surface = SURFACE(decode_state->current_render_target);
     assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
 
     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
@@ -2042,6 +2030,8 @@ gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
     }
+
+    gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
     
     return (struct hw_context *)gen6_mfd_context;
 }
diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h
index bf06a00..6e20364 100644
--- a/src/gen6_mfd.h
+++ b/src/gen6_mfd.h
@@ -77,6 +77,8 @@ struct gen6_mfd_context
     GenBuffer           bsd_mpc_row_store_scratch_buffer;
     GenBuffer           mpr_row_store_scratch_buffer;
     GenBuffer           bitplane_read_buffer;
+
+    int                 wa_mpeg2_slice_vertical_position;
 };
 
 #endif /* _GEN6_MFD_H_ */
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 2995fe8..0a346fa 100644
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -120,7 +120,7 @@ gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
             
             assert(obj_surface);
-            i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
+            i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
 
             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
@@ -234,7 +234,7 @@ gen7_mfd_pipe_mode_select(VADriverContextP ctx,
            standard_select == MFX_FORMAT_VC1 ||
            standard_select == MFX_FORMAT_JPEG);
 
-    BEGIN_BCS_BATCH(batch, 5); /* FIXME: 5 ??? */
+    BEGIN_BCS_BATCH(batch, 5);
     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
     OUT_BCS_BATCH(batch,
                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
@@ -265,8 +265,14 @@ gen7_mfd_surface_state(VADriverContextP ctx,
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
+    unsigned int y_cb_offset;
+    unsigned int y_cr_offset;
+
     assert(obj_surface);
-    
+
+    y_cb_offset = obj_surface->y_cb_offset;
+    y_cr_offset = obj_surface->y_cr_offset;
+
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
     OUT_BCS_BATCH(batch, 0);
@@ -275,16 +281,18 @@ gen7_mfd_surface_state(VADriverContextP ctx,
                   ((obj_surface->orig_width - 1) << 4));
     OUT_BCS_BATCH(batch,
                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
-                  (1 << 27) | /* FIXME: set to 0 for JPEG */
-                  (0 << 22) | /* surface object control state, FIXME??? */
+                  ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
+                  (0 << 22) | /* surface object control state, ignored */
                   ((obj_surface->width - 1) << 3) | /* pitch */
-                  (0 << 2)  | /* must be 0 for interleave U/V */
+                  (0 << 2)  | /* must be 0 */
                   (1 << 1)  | /* must be tiled */
                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
     OUT_BCS_BATCH(batch,
-                  (0 << 16) | /* FIXME: fix it for JPEG */
-                  (obj_surface->height)); /* FIXME: fix it for JPEG */
-    OUT_BCS_BATCH(batch, 0); /* FIXME: fix it for JPEG */
+                  (0 << 16) | /* X offset for U(Cb), must be 0 */
+                  (y_cb_offset << 0)); /* Y offset for U(Cb) */
+    OUT_BCS_BATCH(batch,
+                  (0 << 16) | /* X offset for V(Cr), must be 0 */
+                  (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
     ADVANCE_BCS_BATCH(batch);
 }
 
@@ -809,27 +817,6 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -839,21 +826,19 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     /* the input bitsteam format on GEN7 differs from GEN6 */
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  (slice_param->slice_data_size));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
@@ -926,8 +911,8 @@ gen7_mfd_avc_decode_init(VADriverContextP ctx,
     assert(obj_surface);
     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
@@ -1047,35 +1032,22 @@ gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
     VAPictureParameterBufferMPEG2 *pic_param;
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface;
-    int i;
     dri_bo *bo;
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
 
-    /* reference picture */
-    obj_surface = SURFACE(pic_param->forward_reference_picture);
-
-    if (obj_surface && obj_surface->bo)
-        gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
-    else
-        gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
-
-    obj_surface = SURFACE(pic_param->backward_reference_picture);
-
-    if (obj_surface && obj_surface->bo)
-        gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
-    else
-        gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
-
-    /* must do so !!! */
-    for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
-        gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
+    mpeg2_set_reference_surfaces(
+        ctx,
+        gen7_mfd_context->reference_surface,
+        decode_state,
+        pic_param
+    );
 
     /* Current decoded picture */
     obj_surface = SURFACE(decode_state->current_render_target);
     assert(obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
 
     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
@@ -1204,20 +1176,22 @@ gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
-    int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic = 0;
+    int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
 
     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
         is_field_pic = 1;
+    is_field_pic_wa = is_field_pic &&
+        gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
 
-    vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic);
+    vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
     hpos0 = slice_param->slice_horizontal_position;
 
     if (next_slice_param == NULL) {
         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
         hpos1 = 0;
     } else {
-        vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic);
+        vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
         hpos1 = next_slice_param->slice_horizontal_position;
     }
 
@@ -1265,6 +1239,10 @@ gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
 
+    if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
+        gen7_mfd_context->wa_mpeg2_slice_vertical_position =
+            mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
@@ -1404,8 +1382,8 @@ gen7_mfd_vc1_decode_init(VADriverContextP ctx,
     /* Current decoded picture */
     obj_surface = SURFACE(decode_state->current_render_target);
     assert(obj_surface);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
-    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'));
 
     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
@@ -2001,6 +1979,8 @@ gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
     }
 
+    gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
+
     switch (profile) {
     case VAProfileH264Baseline:
     case VAProfileH264Main:
diff --git a/src/gen7_mfd.h b/src/gen7_mfd.h
index e4a7327..d44b4a1 100644
--- a/src/gen7_mfd.h
+++ b/src/gen7_mfd.h
@@ -53,6 +53,15 @@ struct gen7_avc_surface
 #define GEN7_VC1_ADVANCED_PROFILE       2
 #define GEN7_VC1_RESERVED_PROFILE       3
 
+#define GEN7_YUV400                     0
+#define GEN7_YUV420                     1
+#define GEN7_YUV422H_2Y                 2
+#define GEN7_YUV444                     3
+#define GEN7_YUV411                     4
+#define GEN7_YUV422V_2Y                 5
+#define GEN7_YUV422H_4Y                 6
+#define GEN7_YUV422V_4Y                 7
+
 struct gen7_vc1_surface
 {
     dri_bo *dmv;
@@ -78,6 +87,8 @@ struct gen7_mfd_context
     GenBuffer           bsd_mpc_row_store_scratch_buffer;
     GenBuffer           mpr_row_store_scratch_buffer;
     GenBuffer           bitplane_read_buffer;
+
+    int                 wa_mpeg2_slice_vertical_position;
 };
 
 #endif /* _GEN7_MFD_H_ */
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index 6f133a3..b2b6c92 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -482,9 +482,9 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
     assert(obj_surface);
     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+    i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
     i965_avc_bsd_init_avc_bsd_surface(ctx, obj_surface, pic_param, i965_h264_context);
     avc_bsd_surface = obj_surface->private_data;
-    i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
 
     OUT_BCS_RELOC(batch, avc_bsd_surface->dmv_top,
                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
@@ -534,35 +534,6 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-/*
- * Return the bit offset to the first bit of the slice data
- *
- * VASliceParameterBufferH264.slice_data_bit_offset will point into the part
- * of slice header if there are some escaped bytes in the slice header. The offset 
- * to slice data is needed for BSD unit so that BSD unit can fetch right slice data
- * for processing. This fixes conformance case BASQP1_Sony_C.jsv
- */
-static int
-i965_avc_bsd_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 g4x_avc_bsd_object(VADriverContextP ctx, 
                    struct decode_state *decode_state,
@@ -581,11 +552,10 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice = 0;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -595,12 +565,12 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         } else 
             cmd_len = 8;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
@@ -710,11 +680,10 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -723,12 +692,11 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         } else 
             counter_value = 0;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
@@ -927,7 +895,7 @@ i965_avc_bsd_frame_store_index(VADriverContextP ctx,
             int frame_idx;
             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
             assert(obj_surface);
-            i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
+            i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
             
             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(i965_h264_context->fsid_list); frame_idx++) {
                 for (j = 0; j < ARRAY_ELEMS(i965_h264_context->fsid_list); j++) {
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index d4400c5..8450d23 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -24,10 +24,142 @@
 #include <assert.h>
 #include <stddef.h>
 #include <string.h>
+#include <alloca.h>
 #include "intel_batchbuffer.h"
 #include "i965_decoder_utils.h"
+#include "i965_drv_video.h"
 #include "i965_defines.h"
 
+/* Set reference surface if backing store exists */
+static inline int
+set_ref_frame(
+    struct i965_driver_data *i965,
+    GenFrameStore           *ref_frame,
+    VASurfaceID              va_surface
+)
+{
+    struct object_surface *obj_surface;
+
+    if (va_surface == VA_INVALID_ID)
+        return 0;
+
+    obj_surface = SURFACE(va_surface);
+    if (!obj_surface || !obj_surface->bo)
+        return 0;
+
+    ref_frame->surface_id = va_surface;
+    return 1;
+}
+
+/* Check wether codec layer incorrectly fills in slice_vertical_position */
+int
+mpeg2_wa_slice_vertical_position(
+    struct decode_state           *decode_state,
+    VAPictureParameterBufferMPEG2 *pic_param
+)
+{
+    unsigned int i, j, mb_height, vpos, last_vpos = 0;
+
+    /* Assume progressive sequence if we got a progressive frame */
+    if (pic_param->picture_coding_extension.bits.progressive_frame)
+        return 0;
+
+    /* Wait for a field coded picture */
+    if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_FRAME)
+        return -1;
+
+    assert(decode_state && decode_state->slice_params);
+
+    mb_height = (pic_param->vertical_size + 31) / 32;
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        struct buffer_store * const buffer_store =
+            decode_state->slice_params[j];
+
+        for (i = 0; i < buffer_store->num_elements; i++) {
+            VASliceParameterBufferMPEG2 * const slice_param =
+                ((VASliceParameterBufferMPEG2 *)buffer_store->buffer) + i;
+
+            vpos = slice_param->slice_vertical_position;
+            if (vpos >= mb_height || vpos == last_vpos + 2) {
+                WARN_ONCE("codec layer incorrectly fills in MPEG-2 slice_vertical_position. Workaround applied\n");
+                return 1;
+            }
+            last_vpos = vpos;
+        }
+    }
+    return 0;
+}
+
+/* Build MPEG-2 reference frames array */
+void
+mpeg2_set_reference_surfaces(
+    VADriverContextP               ctx,
+    GenFrameStore                  ref_frames[MAX_GEN_REFERENCE_FRAMES],
+    struct decode_state           *decode_state,
+    VAPictureParameterBufferMPEG2 *pic_param
+)
+{
+    struct i965_driver_data * const i965 = i965_driver_data(ctx);
+    VASurfaceID va_surface;
+    unsigned pic_structure, is_second_field, n = 0;
+
+    pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
+    is_second_field = pic_structure != MPEG_FRAME &&
+        !pic_param->picture_coding_extension.bits.is_first_field;
+
+    ref_frames[0].surface_id = VA_INVALID_ID;
+
+    /* Reference frames are indexed by frame store ID  (0:top, 1:bottom) */
+    switch (pic_param->picture_coding_type) {
+    case MPEG_P_PICTURE:
+        if (is_second_field && pic_structure == MPEG_BOTTOM_FIELD) {
+            va_surface = decode_state->current_render_target;
+            n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        }
+        va_surface = pic_param->forward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        break;
+
+    case MPEG_B_PICTURE:
+        va_surface = pic_param->forward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        va_surface = pic_param->backward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        break;
+    }
+
+    while (n != 2)
+        ref_frames[n++].surface_id = ref_frames[0].surface_id;
+
+    if (pic_param->picture_coding_extension.bits.progressive_frame)
+        return;
+
+    ref_frames[2].surface_id = VA_INVALID_ID;
+
+    /* Bottom field pictures used as reference */
+    switch (pic_param->picture_coding_type) {
+    case MPEG_P_PICTURE:
+        if (is_second_field && pic_structure == MPEG_TOP_FIELD) {
+            va_surface = decode_state->current_render_target;
+            n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        }
+        va_surface = pic_param->forward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        break;
+
+    case MPEG_B_PICTURE:
+        va_surface = pic_param->forward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        va_surface = pic_param->backward_reference_picture;
+        n += set_ref_frame(i965, &ref_frames[n], va_surface);
+        break;
+    }
+
+    while (n != 4)
+        ref_frames[n++].surface_id = ref_frames[2].surface_id;
+}
+
 /* Generate flat scaling matrices for H.264 decoding */
 void
 avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
@@ -39,6 +171,61 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
     memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
 }
 
+/* Get first macroblock bit offset for BSD, minus EPB count (AVC) */
+/* XXX: slice_data_bit_offset does not account for EPB */
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int slice_data_bit_offset = slice_param->slice_data_bit_offset;
+
+    if (mode_flag == ENTROPY_CABAC)
+        slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+    return slice_data_bit_offset;
+}
+
+/* Get first macroblock bit offset for BSD, with EPB count (AVC) */
+/* XXX: slice_data_bit_offset does not account for EPB */
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int in_slice_data_bit_offset = slice_param->slice_data_bit_offset;
+    unsigned int out_slice_data_bit_offset;
+    unsigned int i, j, buf_size, data_size, header_size;
+    uint8_t *buf;
+    int ret;
+
+    header_size = slice_param->slice_data_bit_offset / 8;
+    data_size   = slice_param->slice_data_size - slice_param->slice_data_offset;
+    buf_size    = (header_size * 3 + 1) / 2; // Max possible header size (x1.5)
+    if (buf_size > data_size)
+        buf_size = data_size;
+
+    buf = alloca(buf_size);
+    ret = dri_bo_get_subdata(
+        slice_data_bo, slice_param->slice_data_offset,
+        buf_size, buf
+    );
+    assert(ret == 0);
+
+    for (i = 2, j = 2; i < buf_size && j < header_size; i++, j++) {
+        if (buf[i] == 0x03 && buf[i - 1] == 0x00 && buf[i - 2] == 0x00)
+            i += 2, j++;
+    }
+    out_slice_data_bit_offset = in_slice_data_bit_offset % 8 + i * 8;
+
+    if (mode_flag == ENTROPY_CABAC)
+        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+    return out_slice_data_bit_offset;
+}
+
 static inline uint8_t
 get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id)
 {
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index bf9be84..0d86523 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -27,9 +27,39 @@
 #include "i965_decoder.h"
 #include "intel_batchbuffer.h"
 
+struct decode_state;
+
+int
+mpeg2_wa_slice_vertical_position(
+    struct decode_state           *decode_state,
+    VAPictureParameterBufferMPEG2 *pic_param
+);
+
+void
+mpeg2_set_reference_surfaces(
+    VADriverContextP               ctx,
+    GenFrameStore                  ref_frames[MAX_GEN_REFERENCE_FRAMES],
+    struct decode_state           *decode_state,
+    VAPictureParameterBufferMPEG2 *pic_param
+);
+
 void
 avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
 
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
 void
 gen5_fill_avc_ref_idx_state(
     uint8_t             state[32],
diff --git a/src/i965_defines.h b/src/i965_defines.h
index 509ae9e..fcf6e10 100644
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -694,12 +694,25 @@
 #define MFD_MODE_IT             1
 
 #define MFX_SURFACE_PLANAR_420_8        4
+#define MFX_SURFACE_PLANAR_411_8        5
+#define MFX_SURFACE_PLANAR_422_8        6
 #define MFX_SURFACE_MONOCHROME          12
 
+#define MPEG_I_PICTURE          1
+#define MPEG_P_PICTURE          2
+#define MPEG_B_PICTURE          3
+
 #define MPEG_TOP_FIELD		1
 #define MPEG_BOTTOM_FIELD	2
 #define MPEG_FRAME		3
 
+#define SUBSAMPLE_YUV400        0
+#define SUBSAMPLE_YUV420        1
+#define SUBSAMPLE_YUV422H       2
+#define SUBSAMPLE_YUV422V       3
+#define SUBSAMPLE_YUV444        4
+#define SUBSAMPLE_YUV411        5
+
 #define URB_SIZE(intel)         (IS_GEN7(intel->device_id) ? 4096 :     \
                                  IS_GEN6(intel->device_id) ? 1024 :     \
                                  IS_IRONLAKE(intel->device_id) ? 1024 : \
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index e1e81c7..110e59e 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -477,22 +477,15 @@ i965_CreateSurfaces(VADriverContextP ctx,
         obj_surface->orig_width = width;
         obj_surface->orig_height = height;
 
-        if (IS_GEN6(i965->intel.device_id) ||
-            IS_GEN7(i965->intel.device_id)) {
-            obj_surface->width = ALIGN(obj_surface->orig_width, 128);
-            obj_surface->height = ALIGN(obj_surface->orig_height, 32);
-        } else {
-            obj_surface->width = ALIGN(obj_surface->orig_width, 16);
-            obj_surface->height = ALIGN(obj_surface->orig_height, 16);
-        }
-
-        obj_surface->size = SIZE_YUV420(obj_surface->width, obj_surface->height);
+        obj_surface->width = ALIGN(width, 16);
+        obj_surface->height = ALIGN(height, 16);
         obj_surface->flags = SURFACE_REFERENCED;
         obj_surface->fourcc = 0;
         obj_surface->bo = NULL;
         obj_surface->locked_image_id = VA_INVALID_ID;
         obj_surface->private_data = NULL;
         obj_surface->free_private_data = NULL;
+        obj_surface->subsampling = SUBSAMPLE_YUV420;
     }
 
     /* Error recovery */
@@ -1699,12 +1692,129 @@ void
 i965_check_alloc_surface_bo(VADriverContextP ctx,
                             struct object_surface *obj_surface,
                             int tiled,
-                            unsigned int fourcc)
+                            unsigned int fourcc,
+                            unsigned int subsampling)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+    int region_width, region_height;
 
-    if (obj_surface->bo)
+    if (obj_surface->bo) {
+        assert(obj_surface->fourcc);
+        assert(obj_surface->fourcc == fourcc);
+        assert(obj_surface->subsampling == subsampling);
         return;
+    }
+
+    obj_surface->x_cb_offset = 0; /* X offset is always 0 */
+    obj_surface->x_cr_offset = 0;
+
+    if (tiled) {
+        assert(fourcc == VA_FOURCC('N', 'V', '1', '2') ||
+               fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
+               fourcc == VA_FOURCC('I', 'M', 'C', '3'));
+
+        obj_surface->width = ALIGN(obj_surface->orig_width, 128);
+        obj_surface->height = ALIGN(obj_surface->orig_height, 32);
+        obj_surface->cb_cr_pitch = obj_surface->width;
+        region_width = obj_surface->width;
+        region_height = obj_surface->height;
+
+        if (fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+            assert(subsampling == SUBSAMPLE_YUV420);
+            obj_surface->y_cb_offset = obj_surface->height;
+            obj_surface->y_cr_offset = obj_surface->height;
+            obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+            obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+            region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32);
+        } else if (fourcc == VA_FOURCC('I', 'M', 'C', '1') ||
+                   fourcc == VA_FOURCC('I', 'M', 'C', '3')) {
+            switch (subsampling) {
+            case SUBSAMPLE_YUV400:
+                obj_surface->cb_cr_width = 0;
+                obj_surface->cb_cr_height = 0;
+                break;
+
+            case SUBSAMPLE_YUV420:
+                obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+                obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+                break;
+
+            case SUBSAMPLE_YUV422H:
+                obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+                obj_surface->cb_cr_height = obj_surface->orig_height;
+                break;
+
+            case SUBSAMPLE_YUV422V:
+                obj_surface->cb_cr_width = obj_surface->orig_width;
+                obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+                break;
+
+            case SUBSAMPLE_YUV444:
+                obj_surface->cb_cr_width = obj_surface->orig_width;
+                obj_surface->cb_cr_height = obj_surface->orig_height;
+                break;
+
+            case SUBSAMPLE_YUV411:
+                obj_surface->cb_cr_width = obj_surface->orig_width / 4;
+                obj_surface->cb_cr_height = obj_surface->orig_height;
+                break;
+
+            default:
+                assert(0);
+                break;
+            }
+
+            region_height = obj_surface->height + ALIGN(obj_surface->cb_cr_height, 32) * 2;
+
+            if (fourcc == VA_FOURCC('I', 'M', 'C', '1')) {
+                obj_surface->y_cr_offset = obj_surface->height;
+                obj_surface->y_cb_offset = obj_surface->y_cr_offset + ALIGN(obj_surface->cb_cr_height, 32);
+            } else {
+                obj_surface->y_cb_offset = obj_surface->height;
+                obj_surface->y_cr_offset = obj_surface->y_cb_offset + ALIGN(obj_surface->cb_cr_height, 32);
+            }
+        }
+    } else {
+        assert(fourcc != VA_FOURCC('I', 'M', 'C', '1') &&
+               fourcc != VA_FOURCC('I', 'M', 'C', '3'));
+        assert(subsampling == SUBSAMPLE_YUV420);
+
+        region_width = obj_surface->width;
+        region_height = obj_surface->height;
+
+        switch (fourcc) {
+        case VA_FOURCC('N', 'V', '1', '2'):
+            obj_surface->y_cb_offset = obj_surface->height;
+            obj_surface->y_cr_offset = obj_surface->height;
+            obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+            obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+            obj_surface->cb_cr_pitch = obj_surface->width;
+            region_height = obj_surface->height + obj_surface->height / 2;
+            break;
+
+        case VA_FOURCC('Y', 'V', '1', '2'):
+        case VA_FOURCC('I', '4', '2', '0'):
+            if (fourcc == VA_FOURCC('Y', 'V', '1', '2')) {
+                obj_surface->y_cr_offset = obj_surface->height;
+                obj_surface->y_cb_offset = obj_surface->height + obj_surface->height / 4;
+            } else {
+                obj_surface->y_cb_offset = obj_surface->height;
+                obj_surface->y_cr_offset = obj_surface->height + obj_surface->height / 4;
+            }
+
+            obj_surface->cb_cr_width = obj_surface->orig_width / 2;
+            obj_surface->cb_cr_height = obj_surface->orig_height / 2;
+            obj_surface->cb_cr_pitch = obj_surface->width / 2;
+            region_height = obj_surface->height + obj_surface->height / 2;
+            break;
+
+        default:
+            assert(0);
+            break;
+        }
+    }
+
+    obj_surface->size = ALIGN(region_width * region_height, 0x1000);
 
     if (tiled) {
         uint32_t tiling_mode = I915_TILING_Y; /* always uses Y-tiled format */
@@ -1712,8 +1822,8 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
 
         obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
                                                    "vaapi surface",
-                                                   obj_surface->width, 
-                                                   obj_surface->height + obj_surface->height / 2,
+                                                   region_width,
+                                                   region_height,
                                                    1,
                                                    &tiling_mode,
                                                    &pitch,
@@ -1728,6 +1838,7 @@ i965_check_alloc_surface_bo(VADriverContextP ctx,
     }
 
     obj_surface->fourcc = fourcc;
+    obj_surface->subsampling = subsampling;
     assert(obj_surface->bo);
 }
 
@@ -1812,7 +1923,7 @@ VAStatus i965_DeriveImage(VADriverContextP ctx,
         }
     }
 
-    i965_check_alloc_surface_bo(ctx, obj_surface, HAS_TILED_SURFACE(i965), image->format.fourcc);
+    i965_check_alloc_surface_bo(ctx, obj_surface, HAS_TILED_SURFACE(i965), image->format.fourcc, SUBSAMPLE_YUV420);
     va_status = i965_create_buffer_internal(ctx, 0, VAImageBufferType,
                                             obj_surface->size, 1, NULL, obj_surface->bo, &image->buf);
     if (va_status != VA_STATUS_SUCCESS)
@@ -2180,8 +2291,10 @@ i965_PutSurface(VADriverContextP ctx,
     if ((flags & VA_FILTER_SCALING_MASK) == VA_FILTER_SCALING_NL_ANAMORPHIC)
         pp_flag |= I965_PP_FLAG_AVS;
 
-    if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD))
-        pp_flag |= I965_PP_FLAG_DEINTERLACING;
+    if (flags & VA_TOP_FIELD)
+        pp_flag |= I965_PP_FLAG_TOP_FIELD;
+    else if (flags & VA_BOTTOM_FIELD)
+        pp_flag |= I965_PP_FLAG_BOTTOM_FIELD;
 
     src_rect.x      = srcx;
     src_rect.y      = srcy;
diff --git a/src/i965_drv_video.h b/src/i965_drv_video.h
index 67ca6f6..84bded6 100644
--- a/src/i965_drv_video.h
+++ b/src/i965_drv_video.h
@@ -161,6 +161,14 @@ struct object_surface
     VAImageID locked_image_id;
     void (*free_private_data)(void **data);
     void *private_data;
+    unsigned int subsampling;
+    int x_cb_offset;
+    int y_cb_offset;
+    int x_cr_offset;
+    int y_cr_offset;
+    int cb_cr_width;
+    int cb_cr_height;
+    int cb_cr_pitch;
 };
 
 struct object_buffer 
@@ -253,6 +261,7 @@ void
 i965_check_alloc_surface_bo(VADriverContextP ctx,
                             struct object_surface *obj_surface,
                             int tiled,
-                            unsigned int fourcc);
+                            unsigned int fourcc,
+                            unsigned int subsampling);
 
 #endif /* _I965_DRV_VIDEO_H_ */
diff --git a/src/i965_media_mpeg2.c b/src/i965_media_mpeg2.c
index 1d87d9b..55e2109 100644
--- a/src/i965_media_mpeg2.c
+++ b/src/i965_media_mpeg2.c
@@ -36,6 +36,7 @@
 #include "intel_driver.h"
 #include "i965_defines.h"
 #include "i965_drv_video.h"
+#include "i965_decoder_utils.h"
 
 #include "i965_media.h"
 #include "i965_media_mpeg2.h"
@@ -514,7 +515,7 @@ i965_media_mpeg2_surface_setup(VADriverContextP ctx,
     int w = obj_surface->width;
     int h = obj_surface->height;
 
-    i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('I','4','2','0'));
+    i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('I','4','2','0'), SUBSAMPLE_YUV420);
 
     if (picture_structure == MPEG_FRAME) {
 	i965_media_mpeg2_surface_state(ctx, base_index + 0, obj_surface,
@@ -879,6 +880,7 @@ i965_media_mpeg2_objects(VADriverContextP ctx,
                          struct decode_state *decode_state,
                          struct i965_media_context *media_context)
 {
+    struct i965_mpeg2_context * const i965_mpeg2_context = media_context->private_context;
     struct intel_batchbuffer *batch = media_context->base.batch;
     VASliceParameterBufferMPEG2 *slice_param;
     VAPictureParameterBufferMPEG2 *pic_param;
@@ -887,6 +889,10 @@ i965_media_mpeg2_objects(VADriverContextP ctx,
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
 
+    if (i965_mpeg2_context->wa_slice_vertical_position < 0)
+        i965_mpeg2_context->wa_slice_vertical_position =
+            mpeg2_wa_slice_vertical_position(decode_state, pic_param);
+
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params[j] && decode_state->slice_params[j]->buffer);
         assert(decode_state->slice_datas[j] && decode_state->slice_datas[j]->bo);
@@ -895,8 +901,9 @@ i965_media_mpeg2_objects(VADriverContextP ctx,
         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
             int vpos, hpos, is_field_pic = 0;
 
-            if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
-                pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
+            if (i965_mpeg2_context->wa_slice_vertical_position > 0 &&
+                (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
+                 pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD))
                 is_field_pic = 1;
 
             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
@@ -970,6 +977,7 @@ i965_media_mpeg2_dec_context_init(VADriverContextP ctx, struct i965_media_contex
     int i;
 
     i965_mpeg2_context = calloc(1, sizeof(struct i965_mpeg2_context));
+    i965_mpeg2_context->wa_slice_vertical_position = -1;
 
     /* kernel */
     assert(NUM_MPEG2_VLD_KERNELS == (sizeof(mpeg2_vld_kernels_gen4) / 
diff --git a/src/i965_media_mpeg2.h b/src/i965_media_mpeg2.h
index 65e308c..5b8867e 100644
--- a/src/i965_media_mpeg2.h
+++ b/src/i965_media_mpeg2.h
@@ -44,6 +44,7 @@ struct i965_mpeg2_context
 {
     struct i965_kernel vld_kernels[NUM_MPEG2_VLD_KERNELS];
     VAIQMatrixBufferMPEG2 iq_matrix;
+    int wa_slice_vertical_position;
 };
 
 void i965_media_mpeg2_decode_init(VADriverContextP ctx, struct decode_state * decode_state, struct i965_media_context *media_context);
diff --git a/src/i965_post_processing.c b/src/i965_post_processing.c
index 090403c..6e238b4 100644
--- a/src/i965_post_processing.c
+++ b/src/i965_post_processing.c
@@ -2236,7 +2236,7 @@ i965_post_processing(
                                              &out_surface_id);
                 assert(status == VA_STATUS_SUCCESS);
                 obj_surface = SURFACE(out_surface_id);
-                i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
+                i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
                 i965_post_processing_internal(ctx,
                                               in_surface_id, out_surface_id,
                                               src_rect, dst_rect,
@@ -2258,7 +2258,7 @@ i965_post_processing(
                                              &out_surface_id);
                 assert(status == VA_STATUS_SUCCESS);
                 obj_surface = SURFACE(out_surface_id);
-                i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'));
+                i965_check_alloc_surface_bo(ctx, obj_surface, 0, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
                 i965_post_processing_internal(ctx,
                                               in_surface_id, out_surface_id,
                                               src_rect, dst_rect,
diff --git a/src/i965_post_processing.h b/src/i965_post_processing.h
index 0981854..5f4e949 100644
--- a/src/i965_post_processing.h
+++ b/src/i965_post_processing.h
@@ -31,8 +31,10 @@
 
 #define MAX_PP_SURFACES 32
 
-#define I965_PP_FLAG_DEINTERLACING      1
-#define I965_PP_FLAG_AVS                2
+#define I965_PP_FLAG_TOP_FIELD          1
+#define I965_PP_FLAG_BOTTOM_FIELD       2
+#define I965_PP_FLAG_DEINTERLACING      4 /* XXX: don't support MCDI yet */
+#define I965_PP_FLAG_AVS                8
 
 enum
 {
diff --git a/src/i965_render.c b/src/i965_render.c
index 0fa76d2..e6e98f1 100644
--- a/src/i965_render.c
+++ b/src/i965_render.c
@@ -637,15 +637,32 @@ i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tilin
 }
 
 static void
-i965_render_set_surface_state(struct i965_surface_state *ss,
-                              dri_bo *bo, unsigned long offset,
-                              int width, int height,
-                              int pitch, int format)
+i965_render_set_surface_state(
+    struct i965_surface_state *ss,
+    dri_bo                    *bo,
+    unsigned long              offset,
+    unsigned int               width,
+    unsigned int               height,
+    unsigned int               pitch,
+    unsigned int               format,
+    unsigned int               flags
+)
 {
     unsigned int tiling;
     unsigned int swizzle;
 
     memset(ss, 0, sizeof(*ss));
+
+    switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
+    case I965_PP_FLAG_BOTTOM_FIELD:
+        ss->ss0.vert_line_stride_ofs = 1;
+        /* fall-through */
+    case I965_PP_FLAG_TOP_FIELD:
+        ss->ss0.vert_line_stride = 1;
+        height /= 2;
+        break;
+    }
+
     ss->ss0.surface_type = I965_SURFACE_2D;
     ss->ss0.surface_format = format;
     ss->ss0.color_blend = 1;
@@ -681,16 +698,32 @@ gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
 }
 
 static void
-gen7_render_set_surface_state(struct gen7_surface_state *ss,
-                              dri_bo *bo, unsigned long offset,
-                              int width, int height,
-                              int pitch, int format)
+gen7_render_set_surface_state(
+    struct gen7_surface_state *ss,
+    dri_bo                    *bo,
+    unsigned long              offset,
+    int                        width,
+    int                        height,
+    int                        pitch,
+    int                        format,
+    unsigned int               flags
+)
 {
     unsigned int tiling;
     unsigned int swizzle;
 
     memset(ss, 0, sizeof(*ss));
 
+    switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
+    case I965_PP_FLAG_BOTTOM_FIELD:
+        ss->ss0.vert_line_stride_ofs = 1;
+        /* fall-through */
+    case I965_PP_FLAG_TOP_FIELD:
+        ss->ss0.vert_line_stride = 1;
+        height /= 2;
+        break;
+    }
+
     ss->ss0.surface_type = I965_SURFACE_2D;
     ss->ss0.surface_format = format;
 
@@ -706,12 +739,17 @@ gen7_render_set_surface_state(struct gen7_surface_state *ss,
 }
 
 static void
-i965_render_src_surface_state(VADriverContextP ctx, 
-                              int index,
-                              dri_bo *region,
-                              unsigned long offset,
-                              int w, int h,
-                              int pitch, int format)
+i965_render_src_surface_state(
+    VADriverContextP ctx, 
+    int              index,
+    dri_bo          *region,
+    unsigned long    offset,
+    int              w,
+    int              h,
+    int              pitch,
+    int              format,
+    unsigned int     flags
+)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct i965_render_state *render_state = &i965->render_state;
@@ -728,7 +766,7 @@ i965_render_src_surface_state(VADriverContextP ctx,
         gen7_render_set_surface_state(ss,
                                       region, offset,
                                       w, h,
-                                      pitch, format);
+                                      pitch, format, flags);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           offset,
@@ -738,7 +776,7 @@ i965_render_src_surface_state(VADriverContextP ctx,
         i965_render_set_surface_state(ss,
                                       region, offset,
                                       w, h,
-                                      pitch, format);
+                                      pitch, format, flags);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_SAMPLER, 0,
                           offset,
@@ -752,46 +790,55 @@ i965_render_src_surface_state(VADriverContextP ctx,
 }
 
 static void
-i965_render_src_surfaces_state(VADriverContextP ctx,
-                              VASurfaceID surface)
+i965_render_src_surfaces_state(
+    VADriverContextP ctx,
+    VASurfaceID      surface,
+    unsigned int     flags
+)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct object_surface *obj_surface;
-    int w, h;
+    int region_pitch;
     int rw, rh;
     dri_bo *region;
 
     obj_surface = SURFACE(surface);
     assert(obj_surface);
 
-    w = obj_surface->width;
-    h = obj_surface->height;
+    region_pitch = obj_surface->width;
     rw = obj_surface->orig_width;
     rh = obj_surface->orig_height;
     region = obj_surface->bo;
 
-    i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);     /* Y */
-    i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, w, I965_SURFACEFORMAT_R8_UNORM);
-
-    if (obj_surface->fourcc == VA_FOURCC('Y','V','1','2')) {
-        int u3 = 5, u4 = 6, v5 = 3, v6 = 4;
-
-        i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
-        i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
-        i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
-        i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
+    i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
+    i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
+
+    if (obj_surface->fourcc == VA_FOURCC('N', 'V', '1', '2')) {
+        i965_render_src_surface_state(ctx, 3, region,
+                                      region_pitch * obj_surface->y_cb_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
+        i965_render_src_surface_state(ctx, 4, region,
+                                      region_pitch * obj_surface->y_cb_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8G8_UNORM, flags);
     } else {
-        if (obj_surface->fourcc == VA_FOURCC('N','V','1','2')) {
-            i965_render_src_surface_state(ctx, 3, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM); /* UV */
-            i965_render_src_surface_state(ctx, 4, region, w * h, rw / 2, rh / 2, w, I965_SURFACEFORMAT_R8G8_UNORM);
-        } else {
-            int u3 = 3, u4 = 4, v5 = 5, v6 = 6;
-            
-            i965_render_src_surface_state(ctx, u3, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM); /* U */
-            i965_render_src_surface_state(ctx, u4, region, w * h, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
-            i965_render_src_surface_state(ctx, v5, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);     /* V */
-            i965_render_src_surface_state(ctx, v6, region, w * h + w * h / 4, rw / 2, rh / 2, w / 2, I965_SURFACEFORMAT_R8_UNORM);
-        }
+        i965_render_src_surface_state(ctx, 3, region,
+                                      region_pitch * obj_surface->y_cb_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
+        i965_render_src_surface_state(ctx, 4, region,
+                                      region_pitch * obj_surface->y_cb_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8_UNORM, flags);
+        i965_render_src_surface_state(ctx, 5, region,
+                                      region_pitch * obj_surface->y_cr_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
+        i965_render_src_surface_state(ctx, 6, region,
+                                      region_pitch * obj_surface->y_cr_offset,
+                                      obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
+                                      I965_SURFACEFORMAT_R8_UNORM, flags);
     }
 }
 
@@ -813,8 +860,8 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
     region = obj_surface->bo;
     subpic_region = obj_image->bo;
     /*subpicture surface*/
-    i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
-    i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
+    i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
+    i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);     
 }
 
 static void
@@ -842,7 +889,7 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
         gen7_render_set_surface_state(ss,
                                       dest_region->bo, 0,
                                       dest_region->width, dest_region->height,
-                                      dest_region->pitch, format);
+                                      dest_region->pitch, format, 0);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                           0,
@@ -852,7 +899,7 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
         i965_render_set_surface_state(ss,
                                       dest_region->bo, 0,
                                       dest_region->width, dest_region->height,
-                                      dest_region->pitch, format);
+                                      dest_region->pitch, format, 0);
         dri_bo_emit_reloc(ss_bo,
                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                           0,
@@ -999,13 +1046,14 @@ i965_surface_render_state_setup(
     VADriverContextP   ctx,
     VASurfaceID        surface,
     const VARectangle *src_rect,
-    const VARectangle *dst_rect
+    const VARectangle *dst_rect,
+    unsigned int       flags
 )
 {
     i965_render_vs_unit(ctx);
     i965_render_sf_unit(ctx);
     i965_render_dest_surface_state(ctx, 0);
-    i965_render_src_surfaces_state(ctx, surface);
+    i965_render_src_surfaces_state(ctx, surface, flags);
     i965_render_sampler(ctx);
     i965_render_wm_unit(ctx);
     i965_render_cc_viewport(ctx);
@@ -1013,6 +1061,7 @@ i965_surface_render_state_setup(
     i965_render_upload_vertex(ctx, surface, src_rect, dst_rect);
     i965_render_upload_constants(ctx, surface);
 }
+
 static void
 i965_subpic_render_state_setup(
     VADriverContextP   ctx,
@@ -1529,7 +1578,7 @@ i965_render_put_surface(
     struct intel_batchbuffer *batch = i965->batch;
 
     i965_render_initialize(ctx);
-    i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect);
+    i965_surface_render_state_setup(ctx, surface, src_rect, dst_rect, flags);
     i965_surface_render_pipeline_setup(ctx);
     intel_batchbuffer_flush(batch);
 }
@@ -1683,11 +1732,12 @@ gen6_render_setup_states(
     VADriverContextP   ctx,
     VASurfaceID        surface,
     const VARectangle *src_rect,
-    const VARectangle *dst_rect
+    const VARectangle *dst_rect,
+    unsigned int       flags
 )
 {
     i965_render_dest_surface_state(ctx, 0);
-    i965_render_src_surfaces_state(ctx, surface);
+    i965_render_src_surfaces_state(ctx, surface, flags);
     i965_render_sampler(ctx);
     i965_render_cc_viewport(ctx);
     gen6_render_color_calc_state(ctx);
@@ -2053,7 +2103,7 @@ gen6_render_put_surface(
     struct intel_batchbuffer *batch = i965->batch;
 
     gen6_render_initialize(ctx);
-    gen6_render_setup_states(ctx, surface, src_rect, dst_rect);
+    gen6_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
     i965_clear_dest_region(ctx);
     gen6_render_emit_states(ctx, PS_KERNEL);
     intel_batchbuffer_flush(batch);
@@ -2275,11 +2325,12 @@ gen7_render_setup_states(
     VADriverContextP   ctx,
     VASurfaceID        surface,
     const VARectangle *src_rect,
-    const VARectangle *dst_rect
+    const VARectangle *dst_rect,
+    unsigned int       flags
 )
 {
     i965_render_dest_surface_state(ctx, 0);
-    i965_render_src_surfaces_state(ctx, surface);
+    i965_render_src_surfaces_state(ctx, surface, flags);
     gen7_render_sampler(ctx);
     i965_render_cc_viewport(ctx);
     gen7_render_color_calc_state(ctx);
@@ -2818,7 +2869,7 @@ gen7_render_put_surface(
     struct intel_batchbuffer *batch = i965->batch;
 
     gen7_render_initialize(ctx);
-    gen7_render_setup_states(ctx, surface, src_rect, dst_rect);
+    gen7_render_setup_states(ctx, surface, src_rect, dst_rect, flags);
     i965_clear_dest_region(ctx);
     gen7_render_emit_states(ctx, PS_KERNEL);
     intel_batchbuffer_flush(batch);
diff --git a/src/intel_driver.h b/src/intel_driver.h
index 0ffb3a5..07e976a 100644
--- a/src/intel_driver.h
+++ b/src/intel_driver.h
@@ -93,6 +93,14 @@ struct intel_batchbuffer;
         RESTORE_BLOCKED_SIGSET();              \
     } while (0)
 
+#define WARN_ONCE(...) do {                     \
+        static int g_once = 1;                  \
+        if (g_once) {                           \
+            g_once = 0;                         \
+            printf("WARNING: " __VA_ARGS__);    \
+        }                                       \
+    } while (0)
+
 struct intel_driver_data 
 {
     int fd;

-- 
intel-vaapi-driver packaging



More information about the pkg-multimedia-commits mailing list