[SCM] libva/upstream: Imported Upstream version 1.0.7

siretart at users.alioth.debian.org siretart at users.alioth.debian.org
Sat Jan 8 11:35:05 UTC 2011


The following commit has been merged in the upstream branch:
commit 6ce82344b119092f0f83742c4e6affe6e08d607d
Author: Reinhard Tartler <siretart at tauware.de>
Date:   Sat Jan 8 12:34:39 2011 +0100

    Imported Upstream version 1.0.7

diff --git a/.gitignore b/.gitignore
index fc9b033..a8ff985 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 *.rej
 *.loT
 *.bin
+*.pc
 .deps
 .libs
 install-sh
@@ -25,5 +26,20 @@ config.status
 config.sub
 configure
 depcomp
-fw/msvdx_bin
-fw/msvdx_fw.bin
+TAGS
+/va/va_version.h
+/test/basic/test_01
+/test/basic/test_02
+/test/basic/test_03
+/test/basic/test_04
+/test/basic/test_05
+/test/basic/test_06
+/test/basic/test_07
+/test/basic/test_08
+/test/basic/test_09
+/test/basic/test_10
+/test/basic/test_11
+/test/decode/mpeg2vldemo
+/test/encode/h264encode
+/test/putsurface/putsurface
+/test/vainfo
diff --git a/build/gen_version.sh b/build/gen_version.sh
new file mode 100644
index 0000000..dd01d95
--- /dev/null
+++ b/build/gen_version.sh
@@ -0,0 +1,32 @@
+#!/bin/sh
+
+libva_topdir="$1"
+version_h_in="$2"
+
+parse_configure_ac() {
+    sed -n "/^m4_define.*${1}.*\[\([0-9]*\)\].*/s//\1/p" ${libva_topdir}/configure.ac
+}
+
+parse_configure() {
+    sed -n "/^${1}=\([0-9]*\)/s//\1/p" ${libva_topdir}/configure
+}
+
+if test -f "${libva_topdir}/configure.ac"; then
+    libva_major_version=`parse_configure_ac libva_major_version`
+    libva_minor_version=`parse_configure_ac libva_minor_version`
+    libva_micro_version=`parse_configure_ac libva_micro_version`
+elif test -f "${libva_topdir}/configure"; then
+    libva_major_version=`parse_configure LIBVA_MAJOR_VERSION`
+    libva_minor_version=`parse_configure LIBVA_MINOR_VERSION`
+    libva_micro_version=`parse_configure LIBVA_MICRO_VERSION`
+else
+    echo "ERROR: configure or configure.ac file not found in $libva_topdir/"
+    exit 1
+fi
+libva_version="$libva_major_version.$libva_minor_version.$libva_micro_version"
+
+sed -e "s/@LIBVA_MAJOR_VERSION@/${libva_major_version}/" \
+    -e "s/@LIBVA_MINOR_VERSION@/${libva_minor_version}/" \
+    -e "s/@LIBVA_MICRO_VERSION@/${libva_micro_version}/" \
+    -e "s/@LIBVA_VERSION@/${libva_version}/" \
+    $version_h_in
diff --git a/config.h b/config.h
deleted file mode 100644
index 25a1499..0000000
--- a/config.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* config.h.  Generated from config.h.in by configure.  */
-/* config.h.in.  Generated from configure.ac by autoheader.  */
-
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#define HAVE_DLFCN_H 1
-
-/* Define to 1 if you have the <GL/glx.h> header file. */
-#define HAVE_GL_GLX_H 1
-
-/* Define to 1 if you have the <GL/gl.h> header file. */
-#define HAVE_GL_GL_H 1
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
-/* Define to 1 if you have the <memory.h> header file. */
-#define HAVE_MEMORY_H 1
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#define HAVE_UNISTD_H 1
-
-/* Define to the sub-directory in which libtool stores uninstalled libraries.
-   */
-#define LT_OBJDIR ".libs/"
-
-/* Name of package */
-#define PACKAGE "libva"
-
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT "waldo.bastian at intel.com"
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "libva"
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "libva 0.31.1"
-
-/* Define to the one symbol short name of this package. */
-#define PACKAGE_TARNAME "libva"
-
-/* Define to the home page for this package. */
-#define PACKAGE_URL ""
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION "0.31.1"
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
-/* Version number of package */
-#define VERSION "0.31.1"
-
-/* Number of bits in a file offset, on hosts where this is settable. */
-#define _FILE_OFFSET_BITS 64
-
-/* Define for large files, on AIX-style hosts. */
-/* #undef _LARGE_FILES */
diff --git a/configure.ac b/configure.ac
index 6debe95..786c725 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,7 +29,7 @@ m4_define([libva_version],
           [libva_major_version.libva_minor_version.libva_micro_version])
 
 # if the library source code has changed, increment revision
-m4_define([libva_lt_revision], [6])
+m4_define([libva_lt_revision], [7])
 # if any interface was added/removed/changed, then inc current, reset revision
 m4_define([libva_lt_current], [1])
 # if any interface was added since last public release, then increment age
@@ -42,6 +42,7 @@ AC_CONFIG_SRCDIR([Makefile.am])
 AM_INIT_AUTOMAKE([dist-bzip2])
 
 AM_CONFIG_HEADER([config.h])
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
 
 LIBVA_MAJOR_VERSION=libva_major_version
 LIBVA_MINOR_VERSION=libva_minor_version
@@ -100,12 +101,12 @@ PKG_CHECK_MODULES([XEXT],[xext])
 PKG_CHECK_MODULES([XFIXES], [xfixes])
 PKG_CHECK_MODULES([DRM], [libdrm])
 
-PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.0], [gen4asm=yes], [gen4asm=no])
+PKG_CHECK_MODULES(GEN4ASM, [intel-gen4asm >= 1.1], [gen4asm=yes], [gen4asm=no])
 AM_CONDITIONAL(HAVE_GEN4ASM, test x$gen4asm = xyes)
 
-# Check for libdrm >= 2.4.21 (needed for i965_drv_video.so)
-if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.21 libdrm; then
-    AC_MSG_WARN([libdrm < 2.4.21 found, disabling build of i965 video driver])
+# Check for libdrm >= 2.4.23 (needed for i965_drv_video.so)
+if test x$enable_i965_driver = xyes && ! $PKG_CONFIG --atleast-version=2.4.23 libdrm; then
+    AC_MSG_WARN([libdrm < 2.4.23 found, disabling build of i965 video driver])
     enable_i965_driver=no
 fi
 AM_CONDITIONAL(BUILD_I965_DRIVER, test x$enable_i965_driver = xyes)
@@ -175,3 +176,25 @@ AC_OUTPUT([
 	libva-tpi.pc
 ])
 
+# Print a small summary
+
+echo ""
+echo "libva - ${LIBVA_VERSION}"
+echo ""
+
+echo " • Global :"
+echo "     Prefix: ${prefix}"
+echo ""
+
+AS_IF([test x$enable_i965_driver = xyes], [DRIVERS="i965 $DRIVERS"]) 
+AS_IF([test x$enable_dummy_driver = xyes], [DRIVERS="dummy $DRIVERS"])
+
+echo " • Drivers: ${DRIVERS}"
+
+AS_IF([test x$USE_GLX = xyes], [BACKENDS="glx $BACKENDS"])
+BACKENDS="x11 $BACKENDS"
+AS_IF([test x$enable_dummy_backend = xyes], [BACKENDS="dummy 
+$BACKENDS"])
+
+echo " • Winsys : ${BACKENDS}"
+
diff --git a/i965_drv_video/Makefile.am b/i965_drv_video/Makefile.am
index f32d579..8dd13bd 100644
--- a/i965_drv_video/Makefile.am
+++ b/i965_drv_video/Makefile.am
@@ -43,7 +43,8 @@ i965_drv_video_la_SOURCES =	\
 	i965_avc_bsd.c		\
 	i965_avc_hw_scoreboard.c\
 	i965_avc_ildb.c		\
-	i965_post_processing.c
+	i965_post_processing.c	\
+	gen6_mfd.c
 
 noinst_HEADERS =                \
 	object_heap.h           \
@@ -61,4 +62,5 @@ noinst_HEADERS =                \
 	i965_avc_bsd.h		\
 	i965_avc_hw_scoreboard.h\
 	i965_avc_ildb.h		\
-	i965_post_processing.h
+	i965_post_processing.h	\
+	gen6_mfd.h
diff --git a/i965_drv_video/gen6_mfd.c b/i965_drv_video/gen6_mfd.c
new file mode 100644
index 0000000..0fe7860
--- /dev/null
+++ b/i965_drv_video/gen6_mfd.c
@@ -0,0 +1,1484 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Xiang Haihao <haihao.xiang at intel.com>
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#include <va/va_backend.h>
+
+#include "intel_batchbuffer.h"
+#include "intel_driver.h"
+
+#include "i965_defines.h"
+#include "i965_drv_video.h"
+
+#include "gen6_mfd.h"
+
+#define DMV_SIZE        0x88000 /* 557056 bytes for a frame */
+
+static const uint32_t zigzag_direct[64] = {
+    0,   1,  8, 16,  9,  2,  3, 10,
+    17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34,
+    27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36,
+    29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46,
+    53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static void
+gen6_mfd_avc_frame_store_index(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)media_state->private_context;
+    int i, j;
+
+    assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
+
+    for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+        int found = 0;
+
+        if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
+            continue;
+
+        for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+            VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
+            if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+                continue;
+
+            if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
+                found = 1;
+                break;
+            }
+        }
+
+        if (!found) {
+            struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+            obj_surface->flags &= ~SURFACE_REFERENCED;
+
+            if (obj_surface->flags & SURFACE_DISPLAYED) {
+                dri_bo_unreference(obj_surface->bo);
+                obj_surface->bo = NULL;
+                obj_surface->flags = 0;
+            }
+
+            if (obj_surface->free_private_data)
+                obj_surface->free_private_data(&obj_surface->private_data);
+
+            gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+            gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+        }
+    }
+
+    for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
+        VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
+        int found = 0;
+
+        if (ref_pic->flags & VA_PICTURE_H264_INVALID)
+            continue;
+
+        for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+            if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+                continue;
+            
+            if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
+                found = 1;
+                break;
+            }
+        }
+
+        if (!found) {
+            int frame_idx;
+            struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
+            
+            if (obj_surface->bo == NULL) {
+                uint32_t tiling_mode = I915_TILING_Y;
+                unsigned long pitch;
+        
+                obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
+                                                           "vaapi surface",
+                                                           obj_surface->width, 
+                                                           obj_surface->height + obj_surface->height / 2,
+                                                           1,
+                                                           &tiling_mode,
+                                                           &pitch,
+                                                           0);
+                assert(obj_surface->bo);
+                assert(tiling_mode == I915_TILING_Y);
+                assert(pitch == obj_surface->width);
+            }
+
+            for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
+                for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+                    if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
+                        continue;
+
+                    if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
+                        break;
+                }
+
+                if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
+                    break;
+            }
+
+            assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
+
+            for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+                if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
+                    gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
+                    gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+                    break;
+                }
+            }
+        }
+    }
+
+    /* sort */
+    for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
+        if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
+            gen6_mfd_context->reference_surface[i].frame_store_id == i)
+            continue;
+
+        for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
+            if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
+                gen6_mfd_context->reference_surface[j].frame_store_id == i) {
+                VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
+                int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
+
+                gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
+                gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
+                gen6_mfd_context->reference_surface[j].surface_id = id;
+                gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
+                break;
+            }
+        }
+    }
+}
+
+static void 
+gen6_mfd_free_avc_surface(void **data)
+{
+    struct gen6_mfd_surface *gen6_mfd_surface = *data;
+
+    if (!gen6_mfd_surface)
+        return;
+
+    dri_bo_unreference(gen6_mfd_surface->dmv_top);
+    gen6_mfd_surface->dmv_top = NULL;
+    dri_bo_unreference(gen6_mfd_surface->dmv_bottom);
+    gen6_mfd_surface->dmv_bottom = NULL;
+
+    free(gen6_mfd_surface);
+    *data = NULL;
+}
+
+static void
+gen6_mfd_init_avc_surface(VADriverContextP ctx, 
+                          VAPictureParameterBufferH264 *pic_param,
+                          struct object_surface *obj_surface)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_mfd_surface *gen6_mfd_surface = obj_surface->private_data;
+
+    obj_surface->free_private_data = gen6_mfd_free_avc_surface;
+
+    if (!gen6_mfd_surface) {
+        gen6_mfd_surface = calloc(sizeof(struct gen6_mfd_surface), 1);
+        assert((obj_surface->size & 0x3f) == 0);
+        obj_surface->private_data = gen6_mfd_surface;
+    }
+
+    gen6_mfd_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
+                                         !pic_param->seq_fields.bits.direct_8x8_inference_flag);
+
+    if (gen6_mfd_surface->dmv_top == NULL) {
+        gen6_mfd_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
+                                                 "direct mv w/r buffer",
+                                                 DMV_SIZE,
+                                                 0x1000);
+    }
+
+    if (gen6_mfd_surface->dmv_bottom_flag &&
+        gen6_mfd_surface->dmv_bottom == NULL) {
+        gen6_mfd_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
+                                                    "direct mv w/r buffer",
+                                                    DMV_SIZE,
+                                                    0x1000);
+    }
+}
+
+static void
+gen6_mfd_pipe_mode_select(VADriverContextP ctx,
+                          struct decode_state *decode_state,
+                          int standard_select)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+    assert(standard_select == MFX_FORMAT_MPEG2 ||
+           standard_select == MFX_FORMAT_AVC ||
+           standard_select == MFX_FORMAT_VC1);
+
+    BEGIN_BCS_BATCH(ctx, 4);
+    OUT_BCS_BATCH(ctx, MFX_PIPE_MODE_SELECT | (4 - 2));
+    OUT_BCS_BATCH(ctx,
+                  (MFD_MODE_VLD << 16) | /* VLD mode */
+                  (0 << 10) | /* disable Stream-Out */
+                  (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
+                  (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
+                  (0 << 7)  | /* disable TLB prefectch */
+                  (0 << 5)  | /* not in stitch mode */
+                  (MFX_CODEC_DECODE << 4)  | /* decoding mode */
+                  (standard_select << 0));
+    OUT_BCS_BATCH(ctx,
+                  (0 << 20) | /* round flag in PB slice */
+                  (0 << 19) | /* round flag in Intra8x8 */
+                  (0 << 7)  | /* expand NOA bus flag */
+                  (1 << 6)  | /* must be 1 */
+                  (0 << 5)  | /* disable clock gating for NOA */
+                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
+                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
+                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
+                  (0 << 1)  | /* AVC long field motion vector */
+                  (1 << 0));  /* always calculate AVC ILDB boundary strength */
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_surface_state(VADriverContextP ctx,
+                       struct decode_state *decode_state,
+                       int standard_select)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
+    assert(obj_surface);
+    
+    BEGIN_BCS_BATCH(ctx, 6);
+    OUT_BCS_BATCH(ctx, MFX_SURFACE_STATE | (6 - 2));
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx,
+                  ((obj_surface->height - 1) << 19) |
+                  ((obj_surface->width - 1) << 6));
+    OUT_BCS_BATCH(ctx,
+                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
+                  (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
+                  (0 << 22) | /* surface object control state, FIXME??? */
+                  ((obj_surface->width - 1) << 3) | /* pitch */
+                  (0 << 2)  | /* must be 0 for interleave U/V */
+                  (1 << 1)  | /* must be y-tiled */
+                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
+    OUT_BCS_BATCH(ctx,
+                  (0 << 16) | /* must be 0 for interleave U/V */
+                  (obj_surface->height)); /* y offset for U(cb) */
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
+                             struct decode_state *decode_state,
+                             int standard_select)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+    int i;
+
+    BEGIN_BCS_BATCH(ctx, 24);
+    OUT_BCS_BATCH(ctx, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
+    if (gen6_mfd_context->pre_deblocking_output.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->pre_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    if (gen6_mfd_context->post_deblocking_output.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->post_deblocking_output.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
+    OUT_BCS_BATCH(ctx, 0); /* ignore for decoding */
+
+    if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    /* DW 7..22 */
+    for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+        struct object_surface *obj_surface;
+
+        if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface && obj_surface->bo);
+
+            OUT_BCS_RELOC(ctx, obj_surface->bo,
+                          I915_GEM_DOMAIN_INSTRUCTION, 0,
+                          0);
+        } else {
+            OUT_BCS_BATCH(ctx, 0);
+        }
+    }
+
+    OUT_BCS_BATCH(ctx, 0);   /* ignore DW23 for decoding */
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
+                                 dri_bo *slice_data_bo,
+                                 int standard_select)
+{
+    BEGIN_BCS_BATCH(ctx, 11);
+    OUT_BCS_BATCH(ctx, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
+    OUT_BCS_RELOC(ctx, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0); /* ignore for VLD mode */
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
+                                 struct decode_state *decode_state,
+                                 int standard_select)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+    BEGIN_BCS_BATCH(ctx, 4);
+    OUT_BCS_BATCH(ctx, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
+
+    if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    if (gen6_mfd_context->bitplane_read_buffer.valid)
+        OUT_BCS_RELOC(ctx, gen6_mfd_context->bitplane_read_buffer.bo,
+                      I915_GEM_DOMAIN_INSTRUCTION, 0,
+                      0);
+    else
+        OUT_BCS_BATCH(ctx, 0);
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_aes_state(VADriverContextP ctx,
+                   struct decode_state *decode_state,
+                   int standard_select)
+{
+    /* FIXME */
+}
+
+static void
+gen6_mfd_wait(VADriverContextP ctx,
+              struct decode_state *decode_state,
+              int standard_select)
+{
+    BEGIN_BCS_BATCH(ctx, 1);
+    OUT_BCS_BATCH(ctx, MFX_WAIT | (1 << 8));
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_img_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    int qm_present_flag;
+    int img_struct;
+    int mbaff_frame_flag;
+    unsigned int width_in_mbs, height_in_mbs;
+    VAPictureParameterBufferH264 *pic_param;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
+
+    if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
+        qm_present_flag = 1;
+    else
+        qm_present_flag = 0; /* built-in QM matrices */
+
+    if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
+        img_struct = 1;
+    else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
+        img_struct = 3;
+    else
+        img_struct = 0;
+
+    if ((img_struct & 0x1) == 0x1) {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
+    } else {
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
+    }
+
+    if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
+        assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
+        assert(pic_param->pic_fields.bits.field_pic_flag == 0);
+    } else {
+        assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
+    }
+
+    mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
+                        !pic_param->pic_fields.bits.field_pic_flag);
+
+    width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
+    height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
+    assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
+
+    /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
+    assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
+           pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
+    assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
+
+    BEGIN_BCS_BATCH(ctx, 13);
+    OUT_BCS_BATCH(ctx, MFX_AVC_IMG_STATE | (13 - 2));
+    OUT_BCS_BATCH(ctx, 
+                  ((width_in_mbs * height_in_mbs) & 0x7fff));
+    OUT_BCS_BATCH(ctx, 
+                  (height_in_mbs << 16) | 
+                  (width_in_mbs << 0));
+    OUT_BCS_BATCH(ctx, 
+                  ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
+                  ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
+                  (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
+                  (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
+                  (1 << 12) | /* always 1, hardware requirement */
+                  (qm_present_flag << 10) |
+                  (img_struct << 8) |
+                  (16 << 0));
+    OUT_BCS_BATCH(ctx,
+                  (pic_param->seq_fields.bits.chroma_format_idc << 10) |
+                  (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
+                  ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
+                  (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
+                  (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
+                  (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
+                  (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
+                  (mbaff_frame_flag << 1) |
+                  (pic_param->pic_fields.bits.field_pic_flag << 0));
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    int cmd_len;
+    VAIQMatrixBufferH264 *iq_matrix;
+    VAPictureParameterBufferH264 *pic_param;
+
+    if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+        return;
+
+    iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
+
+    BEGIN_BCS_BATCH(ctx, cmd_len);
+    OUT_BCS_BATCH(ctx, MFX_AVC_QM_STATE | (cmd_len - 2));
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        OUT_BCS_BATCH(ctx, 
+                      (0x0  << 8) | /* don't use default built-in matrices */
+                      (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
+    else
+        OUT_BCS_BATCH(ctx, 
+                      (0x0  << 8) | /* don't use default built-in matrices */
+                      (0x3f << 0)); /* six 4x4 scaling matrices */
+
+    intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
+
+    if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
+        intel_batchbuffer_data_bcs(ctx, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_directmode_state(VADriverContextP ctx,
+                              VAPictureParameterBufferH264 *pic_param,
+                              VASliceParameterBufferH264 *slice_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+    struct object_surface *obj_surface;
+    struct gen6_mfd_surface *gen6_mfd_surface;
+    VAPictureH264 *va_pic;
+    int i, j;
+
+    BEGIN_BCS_BATCH(ctx, 69);
+    OUT_BCS_BATCH(ctx, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
+
+    /* reference surfaces 0..15 */
+    for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+        if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
+            assert(obj_surface);
+            gen6_mfd_surface = obj_surface->private_data;
+
+            if (gen6_mfd_surface == NULL) {
+                OUT_BCS_BATCH(ctx, 0);
+                OUT_BCS_BATCH(ctx, 0);
+            } else {
+                OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+                              I915_GEM_DOMAIN_INSTRUCTION, 0,
+                              0);
+
+                if (gen6_mfd_surface->dmv_bottom_flag == 1)
+                    OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom,
+                                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                                  0);
+                else
+                    OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+                                  I915_GEM_DOMAIN_INSTRUCTION, 0,
+                                  0);
+            }
+        } else {
+            OUT_BCS_BATCH(ctx, 0);
+            OUT_BCS_BATCH(ctx, 0);
+        }
+    }
+
+    /* the current decoding frame/field */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface && obj_surface->bo && obj_surface->private_data);
+    gen6_mfd_surface = obj_surface->private_data;
+
+    OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                  0);
+
+    if (gen6_mfd_surface->dmv_bottom_flag == 1)
+        OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_bottom,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+    else
+        OUT_BCS_RELOC(ctx, gen6_mfd_surface->dmv_top,
+                      I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+                      0);
+
+    /* POC List */
+    for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+        if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
+            int found = 0;
+            for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
+                va_pic = &pic_param->ReferenceFrames[j];
+                
+                if (va_pic->flags & VA_PICTURE_H264_INVALID)
+                    continue;
+
+                if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
+                    found = 1;
+                    break;
+                }
+            }
+
+            assert(found == 1);
+            assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+            
+            OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+            OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+        } else {
+            OUT_BCS_BATCH(ctx, 0);
+            OUT_BCS_BATCH(ctx, 0);
+        }
+    }
+
+    va_pic = &pic_param->CurrPic;
+    OUT_BCS_BATCH(ctx, va_pic->TopFieldOrderCnt);
+    OUT_BCS_BATCH(ctx, va_pic->BottomFieldOrderCnt);
+
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_slice_state(VADriverContextP ctx,
+                         VAPictureParameterBufferH264 *pic_param,
+                         VASliceParameterBufferH264 *slice_param,
+                         VASliceParameterBufferH264 *next_slice_param)
+{
+    int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
+    int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+    int num_ref_idx_l0, num_ref_idx_l1;
+    int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
+                         pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
+    int weighted_pred_idc = 0;
+    int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
+    int slice_type;
+
+    if (slice_param->slice_type == SLICE_TYPE_I ||
+        slice_param->slice_type == SLICE_TYPE_SI) {
+        slice_type = SLICE_TYPE_I;
+    } else if (slice_param->slice_type == SLICE_TYPE_P ||
+               slice_param->slice_type == SLICE_TYPE_SP) {
+        slice_type = SLICE_TYPE_P;
+    } else { 
+        assert(slice_param->slice_type == SLICE_TYPE_B);
+        slice_type = SLICE_TYPE_B;
+    }
+
+    if (slice_type == SLICE_TYPE_I) {
+        assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
+        assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+        num_ref_idx_l0 = 0;
+        num_ref_idx_l1 = 0;
+    } else if (slice_type == SLICE_TYPE_P) {
+        assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
+        num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+        num_ref_idx_l1 = 0;
+        weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
+    } else {
+        num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
+        num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
+        weighted_pred_idc = (pic_param->pic_fields.bits.weighted_bipred_idc == 1);
+    }
+
+    first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
+    slice_hor_pos = first_mb_in_slice % width_in_mbs; 
+    slice_ver_pos = first_mb_in_slice / width_in_mbs;
+
+    if (next_slice_param) {
+        first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
+        next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs; 
+        next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
+    } else {
+        next_slice_hor_pos = 0;
+        next_slice_ver_pos = height_in_mbs;
+    }
+
+    BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
+    OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
+    OUT_BCS_BATCH(ctx, slice_type);
+    OUT_BCS_BATCH(ctx, 
+                  (num_ref_idx_l1 << 24) |
+                  (num_ref_idx_l0 << 16) |
+                  (slice_param->chroma_log2_weight_denom << 8) |
+                  (slice_param->luma_log2_weight_denom << 0));
+    OUT_BCS_BATCH(ctx, 
+                  (weighted_pred_idc << 30) |
+                  (slice_param->direct_spatial_mv_pred_flag << 29) |
+                  (slice_param->disable_deblocking_filter_idc << 27) |
+                  (slice_param->cabac_init_idc << 24) |
+                  ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
+                  ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
+                  ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
+    OUT_BCS_BATCH(ctx, 
+                  (slice_ver_pos << 24) |
+                  (slice_hor_pos << 16) | 
+                  (first_mb_in_slice << 0));
+    OUT_BCS_BATCH(ctx,
+                  (next_slice_ver_pos << 16) |
+                  (next_slice_hor_pos << 0));
+    OUT_BCS_BATCH(ctx, 
+                  (next_slice_param == NULL) << 19); /* last slice flag */
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+    int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
+    int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
+
+    BEGIN_BCS_BATCH(ctx, 11); /* FIXME: is it 10??? */
+    OUT_BCS_BATCH(ctx, MFX_AVC_SLICE_STATE | (11 - 2));
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx,
+                  height_in_mbs << 24 |
+                  width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
+                           VAPictureParameterBufferH264 *pic_param,
+                           VASliceParameterBufferH264 *slice_param)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+    int i, j, num_ref_list;
+    struct {
+        unsigned char bottom_idc:1;
+        unsigned char frame_store_index:4;
+        unsigned char field_picture:1;
+        unsigned char long_term:1;
+        unsigned char non_exist:1;
+    } refs[32];
+
+    if (slice_param->slice_type == SLICE_TYPE_I ||
+        slice_param->slice_type == SLICE_TYPE_SI)
+        return;
+
+    if (slice_param->slice_type == SLICE_TYPE_P ||
+        slice_param->slice_type == SLICE_TYPE_SP) {
+        num_ref_list = 1;
+    } else {
+        num_ref_list = 2;
+    }
+
+    for (i = 0; i < num_ref_list; i++) {
+        VAPictureH264 *va_pic;
+
+        if (i == 0) {
+            va_pic = slice_param->RefPicList0;
+        } else {
+            va_pic = slice_param->RefPicList1;
+        }
+
+        BEGIN_BCS_BATCH(ctx, 10);
+        OUT_BCS_BATCH(ctx, MFX_AVC_REF_IDX_STATE | (10 - 2));
+        OUT_BCS_BATCH(ctx, i);
+
+        for (j = 0; j < 32; j++) {
+            if (va_pic->flags & VA_PICTURE_H264_INVALID) {
+                refs[j].non_exist = 1;
+                refs[j].long_term = 1;
+                refs[j].field_picture = 1;
+                refs[j].frame_store_index = 0xf;
+                refs[j].bottom_idc = 1;
+            } else {
+                int frame_idx;
+                
+                for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
+                    if (gen6_mfd_context->reference_surface[frame_idx].surface_id != VA_INVALID_ID &&
+                        va_pic->picture_id == gen6_mfd_context->reference_surface[frame_idx].surface_id) {
+                        assert(frame_idx == gen6_mfd_context->reference_surface[frame_idx].frame_store_id);
+                        break;
+                    }
+                }
+
+                assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
+                
+                refs[j].non_exist = 0;
+                refs[j].long_term = !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
+                refs[j].field_picture = !!(va_pic->flags & 
+                                           (VA_PICTURE_H264_TOP_FIELD | 
+                                            VA_PICTURE_H264_BOTTOM_FIELD));
+                refs[j].frame_store_index = frame_idx;
+                refs[j].bottom_idc = !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
+            }
+
+            va_pic++;
+        }
+        
+        intel_batchbuffer_data_bcs(ctx, refs, sizeof(refs));
+        ADVANCE_BCS_BATCH(ctx);
+    }
+}
+
+static void
+gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
+                                VAPictureParameterBufferH264 *pic_param,
+                                VASliceParameterBufferH264 *slice_param)
+{
+    int i, j, num_weight_offset_table = 0;
+    short weightoffsets[32 * 6];
+
+    if ((slice_param->slice_type == SLICE_TYPE_P ||
+         slice_param->slice_type == SLICE_TYPE_SP) &&
+        (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
+        num_weight_offset_table = 1;
+    }
+    
+    if ((slice_param->slice_type == SLICE_TYPE_B) &&
+        (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
+        num_weight_offset_table = 2;
+    }
+
+    for (i = 0; i < num_weight_offset_table; i++) {
+        BEGIN_BCS_BATCH(ctx, 98);
+        OUT_BCS_BATCH(ctx, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
+        OUT_BCS_BATCH(ctx, i);
+
+        if (i == 0) {
+            for (j = 0; j < 32; j++) {
+                weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
+                weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
+                weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
+                weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
+                weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
+                weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
+            }
+        } else {
+            for (j = 0; j < 32; j++) {
+                weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
+                weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
+                weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
+                weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
+                weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
+                weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
+            }
+        }
+
+        intel_batchbuffer_data_bcs(ctx, weightoffsets, sizeof(weightoffsets));
+        ADVANCE_BCS_BATCH(ctx);
+    }
+}
+
+static int
+gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
+{
+    int out_slice_data_bit_offset;
+    int slice_header_size = in_slice_data_bit_offset / 8;
+    int i, j;
+
+    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
+        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
+            i++, j += 2;
+        }
+    }
+
+    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
+
+    if (mode_flag == ENTROPY_CABAC)
+        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+
+    return out_slice_data_bit_offset;
+}
+
+static void
+gen6_mfd_avc_bsd_object(VADriverContextP ctx,
+                        VAPictureParameterBufferH264 *pic_param,
+                        VASliceParameterBufferH264 *slice_param,
+                        dri_bo *slice_data_bo)
+{
+    int slice_data_bit_offset;
+    uint8_t *slice_data = NULL;
+
+    dri_bo_map(slice_data_bo, 0);
+    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
+    slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
+                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
+                                                              slice_param->slice_data_bit_offset);
+    dri_bo_unmap(slice_data_bo);
+
+    BEGIN_BCS_BATCH(ctx, 6);
+    OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
+    OUT_BCS_BATCH(ctx, 
+                  ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
+    OUT_BCS_BATCH(ctx, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+    OUT_BCS_BATCH(ctx,
+                  (0 << 31) |
+                  (0 << 14) |
+                  (0 << 12) |
+                  (0 << 10) |
+                  (0 << 8));
+    OUT_BCS_BATCH(ctx,
+                  (0 << 16) |
+                  (0 << 6)  |
+                  ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+    BEGIN_BCS_BATCH(ctx, 6);
+    OUT_BCS_BATCH(ctx, MFD_AVC_BSD_OBJECT | (6 - 2));
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    OUT_BCS_BATCH(ctx, 0);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_avc_phantom_slice(VADriverContextP ctx, VAPictureParameterBufferH264 *pic_param)
+{
+    gen6_mfd_avc_phantom_slice_state(ctx, pic_param);
+    gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param);
+}
+
+static void
+gen6_mfd_avc_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param;
+    VAPictureH264 *va_pic;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context;
+    struct object_surface *obj_surface;
+    dri_bo *bo;
+    int i, j, enable_avc_ildb = 0;
+    
+    for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+
+        assert(decode_state->slice_params[j]->num_elements == 1);
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+            assert((slice_param->slice_type == SLICE_TYPE_I) ||
+                   (slice_param->slice_type == SLICE_TYPE_SI) ||
+                   (slice_param->slice_type == SLICE_TYPE_P) ||
+                   (slice_param->slice_type == SLICE_TYPE_SP) ||
+                   (slice_param->slice_type == SLICE_TYPE_B));
+
+            if (slice_param->disable_deblocking_filter_idc != 1) {
+                enable_avc_ildb = 1;
+                break;
+            }
+
+            slice_param++;
+        }
+    }
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+    gen6_mfd_context = media_state->private_context;
+
+    if (gen6_mfd_context == NULL) {
+        gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
+        media_state->private_context = gen6_mfd_context;
+
+        for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+            gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+            gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+        }
+    }
+
+    /* Current decoded picture */
+    va_pic = &pic_param->CurrPic;
+    assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
+    obj_surface = SURFACE(va_pic->picture_id);
+    assert(obj_surface);
+    obj_surface->flags = (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
+    gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
+
+    if (obj_surface->bo == NULL) {
+        uint32_t tiling_mode = I915_TILING_Y;
+        unsigned long pitch;
+        
+        obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
+                                                   "vaapi surface",
+                                                   obj_surface->width, 
+                                                   obj_surface->height + obj_surface->height / 2,
+                                                   1,
+                                                   &tiling_mode,
+                                                   &pitch,
+                                                   0);
+        assert(obj_surface->bo);
+        assert(tiling_mode == I915_TILING_Y);
+        assert(pitch == obj_surface->width);
+    }
+    
+    dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
+    gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
+    gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
+
+    dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+    gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
+    gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
+
+    dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "intra row store",
+                      128 * 64,
+                      0x1000);
+    assert(bo);
+    gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
+    gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "deblocking filter row store",
+                      30720, /* 4 * 120 * 64 */
+                      0x1000);
+    assert(bo);
+    gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
+    gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd mpc row store",
+                      11520, /* 1.5 * 120 * 64 */
+                      0x1000);
+    assert(bo);
+    gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+    gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+    dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "mpr row store",
+                      7680, /* 1. 0 * 120 * 64 */
+                      0x1000);
+    assert(bo);
+    gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
+    gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
+
+    gen6_mfd_context->bitplane_read_buffer.valid = 0;
+    gen6_mfd_avc_frame_store_index(ctx, pic_param);
+}
+
+static void
+gen6_mfd_avc_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAPictureParameterBufferH264 *pic_param;
+    VASliceParameterBufferH264 *slice_param, *next_slice_param;
+    dri_bo *slice_data_bo;
+    int i, j;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
+
+    gen6_mfd_avc_decode_init(ctx, decode_state);
+    intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+    intel_batchbuffer_emit_mi_flush_bcs(ctx);
+    gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC);
+    gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC);
+    gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
+    gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC);
+    gen6_mfd_avc_img_state(ctx, decode_state);
+    gen6_mfd_avc_qm_state(ctx, decode_state);
+
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+
+        if (j == decode_state->num_slice_params - 1)
+            next_slice_param = NULL;
+        else
+            next_slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
+
+        gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC);
+        assert(decode_state->slice_params[j]->num_elements == 1);
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+            assert((slice_param->slice_type == SLICE_TYPE_I) ||
+                   (slice_param->slice_type == SLICE_TYPE_SI) ||
+                   (slice_param->slice_type == SLICE_TYPE_P) ||
+                   (slice_param->slice_type == SLICE_TYPE_SP) ||
+                   (slice_param->slice_type == SLICE_TYPE_B));
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+
+            gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param);
+            gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param);
+            gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param);
+            gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param);
+            gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo);
+            slice_param++;
+        }
+    }
+    
+    gen6_mfd_avc_phantom_slice(ctx, pic_param);
+    intel_batchbuffer_end_atomic_bcs(ctx);
+    intel_batchbuffer_flush_bcs(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_decode_init(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAPictureParameterBufferMPEG2 *pic_param;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context;
+    struct object_surface *obj_surface;
+    int i;
+    dri_bo *bo;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+    gen6_mfd_context = media_state->private_context;
+
+    if (gen6_mfd_context == NULL) {
+        gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
+        media_state->private_context = gen6_mfd_context;
+
+        for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
+            gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
+            gen6_mfd_context->reference_surface[i].frame_store_id = -1;
+        }
+    }
+
+    /* reference picture */
+    obj_surface = SURFACE(pic_param->forward_reference_picture);
+
+    if (obj_surface && obj_surface->bo)
+        gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
+    else
+        gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
+
+    obj_surface = SURFACE(pic_param->backward_reference_picture);
+
+    if (obj_surface && obj_surface->bo)
+        gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
+    else
+        gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
+
+    /* must do so !!! */
+    for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
+        gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
+
+    /* Current decoded picture */
+    obj_surface = SURFACE(decode_state->current_render_target);
+    assert(obj_surface);
+    if (obj_surface->bo == NULL) {
+        uint32_t tiling_mode = I915_TILING_Y;
+        unsigned long pitch;
+
+        obj_surface->bo = drm_intel_bo_alloc_tiled(i965->intel.bufmgr, 
+                                                   "vaapi surface",
+                                                   obj_surface->width, 
+                                                   obj_surface->height + obj_surface->height / 2,
+                                                   1,
+                                                   &tiling_mode,
+                                                   &pitch,
+                                                   0);
+        assert(obj_surface->bo);
+        assert(tiling_mode == I915_TILING_Y);
+        assert(pitch == obj_surface->width);
+    }
+
+    dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+    gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
+    dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
+    gen6_mfd_context->pre_deblocking_output.valid = 1;
+
+    dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "bsd mpc row store",
+                      11520, /* 1.5 * 120 * 64 */
+                      0x1000);
+    assert(bo);
+    gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
+    gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
+
+    gen6_mfd_context->post_deblocking_output.valid = 0;
+    gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
+    gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
+    gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
+    gen6_mfd_context->bitplane_read_buffer.valid = 0;
+}
+
+static void
+gen6_mfd_mpeg2_pic_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAPictureParameterBufferMPEG2 *pic_param;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+    BEGIN_BCS_BATCH(ctx, 4);
+    OUT_BCS_BATCH(ctx, MFX_MPEG2_PIC_STATE | (4 - 2));
+    OUT_BCS_BATCH(ctx,
+                  (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
+                  ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
+                  ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
+                  ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
+                  pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
+                  pic_param->picture_coding_extension.bits.picture_structure << 12 |
+                  pic_param->picture_coding_extension.bits.top_field_first << 11 |
+                  pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
+                  pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
+                  pic_param->picture_coding_extension.bits.q_scale_type << 8 |
+                  pic_param->picture_coding_extension.bits.intra_vlc_format << 7 | 
+                  pic_param->picture_coding_extension.bits.alternate_scan << 6);
+    OUT_BCS_BATCH(ctx,
+                  pic_param->picture_coding_type << 9);
+    OUT_BCS_BATCH(ctx,
+                  (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
+                  (ALIGN(pic_param->horizontal_size, 16) / 16));
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_qm_state(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAIQMatrixBufferMPEG2 *iq_matrix;
+    int i;
+
+    if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
+        return;
+
+    iq_matrix = (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
+
+    for (i = 0; i < 2; i++) {
+        int k, m;
+        unsigned char *qm = NULL;
+        unsigned char qmx[64];
+
+        if (i == 0) {
+            if (iq_matrix->load_intra_quantiser_matrix)
+                qm = iq_matrix->intra_quantiser_matrix;
+        } else {
+            if (iq_matrix->load_non_intra_quantiser_matrix)
+                qm = iq_matrix->non_intra_quantiser_matrix;
+        }
+
+        if (!qm)
+            continue;
+
+        /* Upload quantisation matrix in raster order. The mplayer vaapi
+         * patch passes quantisation matrix in zig-zag order to va library.
+         */
+        for (k = 0; k < 64; k++) {
+            m = zigzag_direct[k];
+            qmx[m] = qm[k];
+        }
+
+        BEGIN_BCS_BATCH(ctx, 18);
+        OUT_BCS_BATCH(ctx, MFX_MPEG2_QM_STATE | (18 - 2));
+        OUT_BCS_BATCH(ctx, i);
+        intel_batchbuffer_data_bcs(ctx, qmx, 64);
+        ADVANCE_BCS_BATCH(ctx);
+    }
+}
+
+static void
+gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
+                          VAPictureParameterBufferMPEG2 *pic_param,
+                          VASliceParameterBufferMPEG2 *slice_param,
+                          VASliceParameterBufferMPEG2 *next_slice_param)
+{
+    unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
+    unsigned int height_in_mbs = ALIGN(pic_param->vertical_size, 16) / 16;
+    int mb_count;
+
+    if (next_slice_param == NULL)
+        mb_count = width_in_mbs * height_in_mbs - 
+            (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
+    else
+        mb_count = (next_slice_param->slice_vertical_position * width_in_mbs + next_slice_param->slice_horizontal_position) - 
+            (slice_param->slice_vertical_position * width_in_mbs + slice_param->slice_horizontal_position);
+
+    BEGIN_BCS_BATCH(ctx, 5);
+    OUT_BCS_BATCH(ctx, MFD_MPEG2_BSD_OBJECT | (5 - 2));
+    OUT_BCS_BATCH(ctx, 
+                  slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
+    OUT_BCS_BATCH(ctx, 
+                  slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
+    OUT_BCS_BATCH(ctx,
+                  slice_param->slice_horizontal_position << 24 |
+                  slice_param->slice_vertical_position << 16 |
+                  mb_count << 8 |
+                  (next_slice_param == NULL) << 5 |
+                  (next_slice_param == NULL) << 3 |
+                  (slice_param->macroblock_offset & 0x7));
+    OUT_BCS_BATCH(ctx,
+                  slice_param->quantiser_scale_code << 24);
+    ADVANCE_BCS_BATCH(ctx);
+}
+
+static void
+gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+    VAPictureParameterBufferMPEG2 *pic_param;
+    VASliceParameterBufferMPEG2 *slice_param, *next_slice_param;
+    dri_bo *slice_data_bo;
+    int i, j;
+
+    assert(decode_state->pic_param && decode_state->pic_param->buffer);
+    pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
+
+    gen6_mfd_mpeg2_decode_init(ctx, decode_state);
+    intel_batchbuffer_start_atomic_bcs(ctx, 0x1000);
+    intel_batchbuffer_emit_mi_flush_bcs(ctx);
+    gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2);
+    gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+    gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+    gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2);
+    gen6_mfd_mpeg2_pic_state(ctx, decode_state);
+    gen6_mfd_mpeg2_qm_state(ctx, decode_state);
+
+    assert(decode_state->num_slice_params == 1);
+    for (j = 0; j < decode_state->num_slice_params; j++) {
+        assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
+        slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
+        slice_data_bo = decode_state->slice_datas[j]->bo;
+        gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2);
+
+        for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
+            assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
+
+            if (i < decode_state->slice_params[j]->num_elements - 1)
+                next_slice_param = slice_param + 1;
+            else
+                next_slice_param = NULL;
+
+            gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param);
+            slice_param++;
+        }
+    }
+
+    intel_batchbuffer_end_atomic_bcs(ctx);
+    intel_batchbuffer_flush_bcs(ctx);
+}
+
+static void
+gen6_mfd_vc1_decode_picture(VADriverContextP ctx, struct decode_state *decode_state)
+{
+
+}
+
+void 
+gen6_mfd_decode_picture(VADriverContextP ctx, 
+                        VAProfile profile, 
+                        struct decode_state *decode_state)
+{
+    switch (profile) {
+    case VAProfileMPEG2Simple:
+    case VAProfileMPEG2Main:
+        gen6_mfd_mpeg2_decode_picture(ctx, decode_state);
+        break;
+        
+    case VAProfileH264Baseline:
+    case VAProfileH264Main:
+    case VAProfileH264High:
+        gen6_mfd_avc_decode_picture(ctx, decode_state);
+        break;
+
+    case VAProfileVC1Simple:
+    case VAProfileVC1Main:
+    case VAProfileVC1Advanced:
+        gen6_mfd_vc1_decode_picture(ctx, decode_state);
+        break;
+
+    default:
+        assert(0);
+        break;
+    }
+}
+
+Bool
+gen6_mfd_init(VADriverContextP ctx)
+{
+    return True;
+}
+
+Bool 
+gen6_mfd_terminate(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_media_state *media_state = &i965->media_state;
+    struct gen6_mfd_context *gen6_mfd_context = media_state->private_context;
+
+    if (gen6_mfd_context) {
+        dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
+        gen6_mfd_context->post_deblocking_output.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
+        gen6_mfd_context->pre_deblocking_output.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
+        gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
+        gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
+        gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
+        gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
+
+        dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
+        gen6_mfd_context->bitplane_read_buffer.bo = NULL;
+
+        free(gen6_mfd_context);
+    }
+
+    media_state->private_context = NULL;
+    return True;
+}
+
diff --git a/i965_drv_video/i965_avc_hw_scoreboard.h b/i965_drv_video/gen6_mfd.h
similarity index 54%
copy from i965_drv_video/i965_avc_hw_scoreboard.h
copy to i965_drv_video/gen6_mfd.h
index 99c0a43..dba1d07 100644
--- a/i965_drv_video/i965_avc_hw_scoreboard.h
+++ b/i965_drv_video/gen6_mfd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2010 Intel Corporation
+ * Copyright © 2010 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -26,60 +26,70 @@
  *
  */
 
-#ifndef __I965_AVC_HW_SCOREBOARD_H__
-#define __I965_AVC_HW_SCOREBOARD_H__
+#ifndef _GEN6_MFD_H_
+#define _GEN6_MFD_H_
 
-struct i965_avc_hw_scoreboard_context
+#include <xf86drm.h>
+#include <drm.h>
+#include <i915_drm.h>
+#include <intel_bufmgr.h>
+
+struct gen6_mfd_surface
 {
-    struct {
-        unsigned int num_mb_cmds;
-        unsigned int starting_mb_number;
-        unsigned int pic_width_in_mbs;
-    } inline_data;
+    dri_bo *dmv_top;
+    dri_bo *dmv_bottom;
+    int dmv_bottom_flag;
+};
 
+#define MAX_MFX_REFERENCE_SURFACES        16
+struct gen6_mfd_context
+{
     struct {
-        dri_bo *ss_bo;
-        dri_bo *s_bo;
-        unsigned int total_mbs;
-    } surface;
+        VASurfaceID surface_id;
+        int frame_store_id;
+    } reference_surface[MAX_MFX_REFERENCE_SURFACES];
 
     struct {
         dri_bo *bo;
-    } binding_table;
+        int valid;
+    } post_deblocking_output;
 
     struct {
         dri_bo *bo;
-    } idrt;
+        int valid;
+    } pre_deblocking_output;
 
     struct {
         dri_bo *bo;
-    } vfe_state;
+        int valid;
+    } intra_row_store_scratch_buffer;
 
     struct {
         dri_bo *bo;
-        int upload;
-    } curbe;
+        int valid;
+    } deblocking_filter_row_store_scratch_buffer;
 
     struct {
         dri_bo *bo;
-        unsigned long offset;
-    } hw_kernel;
+        int valid;
+    } bsd_mpc_row_store_scratch_buffer;
 
     struct {
-        unsigned int vfe_start;
-        unsigned int cs_start;
-
-        unsigned int num_vfe_entries;
-        unsigned int num_cs_entries;
+        dri_bo *bo;
+        int valid;
+    } mpr_row_store_scratch_buffer;
 
-        unsigned int size_vfe_entry;
-        unsigned int size_cs_entry;
-    } urb;
+    struct {
+        dri_bo *bo;
+        int valid;
+    } bitplane_read_buffer;
 };
 
-void i965_avc_hw_scoreboard(VADriverContextP, struct decode_state *);
-void i965_avc_hw_scoreboard_decode_init(VADriverContextP);
-Bool i965_avc_hw_scoreboard_ternimate(struct i965_avc_hw_scoreboard_context *);
-
-#endif /* __I965_AVC_HW_SCOREBOARD_H__ */
+struct decode_state;
 
+Bool gen6_mfd_init(VADriverContextP ctx);
+Bool gen6_mfd_terminate(VADriverContextP ctx);
+void gen6_mfd_decode_picture(VADriverContextP ctx, 
+                             VAProfile profile, 
+                             struct decode_state *decode_state);
+#endif /* _GEN6_MFD_H_ */
diff --git a/i965_drv_video/i965_avc_ildb.c b/i965_drv_video/i965_avc_ildb.c
index a053062..a6be42b 100644
--- a/i965_drv_video/i965_avc_ildb.c
+++ b/i965_drv_video/i965_avc_ildb.c
@@ -360,7 +360,7 @@ i965_avc_ildb_upload_constants(VADriverContextP ctx, struct decode_state *decode
     if (IS_IRONLAKE(i965->intel.device_id)) {
         root_input->max_concurrent_threads = 76; /* 72 - 2 + 8 - 2 */
     } else {
-        root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 = 2 */
+        root_input->max_concurrent_threads = 54; /* 50 - 2 + 8 - 2 */
     }
 
     if (pic_param->pic_fields.bits.field_pic_flag)
diff --git a/i965_drv_video/i965_defines.h b/i965_drv_video/i965_defines.h
index 839712e..6c202c7 100644
--- a/i965_drv_video/i965_defines.h
+++ b/i965_drv_video/i965_defines.h
@@ -29,6 +29,10 @@
 
 #define CMD_PIPELINED_POINTERS                  CMD(3, 0, 0)
 #define CMD_BINDING_TABLE_POINTERS              CMD(3, 0, 1)
+# define GEN6_BINDING_TABLE_MODIFY_PS           (1 << 12)/* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_GS           (1 << 9) /* for GEN6 */
+# define GEN6_BINDING_TABLE_MODIFY_VS           (1 << 8) /* for GEN6 */
+
 #define CMD_VERTEX_BUFFERS                      CMD(3, 0, 8)
 #define CMD_VERTEX_ELEMENTS                     CMD(3, 0, 9)
 #define CMD_DRAWING_RECTANGLE                   CMD(3, 1, 0)
@@ -36,6 +40,125 @@
 #define CMD_3DPRIMITIVE                         CMD(3, 3, 0)
 
 #define CMD_DEPTH_BUFFER                        CMD(3, 1, 5)
+# define CMD_DEPTH_BUFFER_TYPE_SHIFT            29
+# define CMD_DEPTH_BUFFER_FORMAT_SHIFT          18
+
+#define CMD_CLEAR_PARAMS                        CMD(3, 1, 0x10)
+/* DW1 */
+# define CMD_CLEAR_PARAMS_DEPTH_CLEAR_VALID     (1 << 15)
+
+/* for GEN6+ */
+#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS	CMD(3, 0, 0x02)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS	(1 << 12)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS	(1 << 9)
+# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS	(1 << 8)
+
+#define GEN6_3DSTATE_URB			CMD(3, 0, 0x05)
+/* DW1 */
+# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT		16
+# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT	0
+/* DW2 */
+# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT	8
+# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT		0
+
+#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS	CMD(3, 0, 0x0d)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC		(1 << 12)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF		(1 << 11)
+# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP	(1 << 10)
+
+#define GEN6_3DSTATE_CC_STATE_POINTERS		CMD(3, 0, 0x0e)
+
+#define GEN6_3DSTATE_VS				CMD(3, 0, 0x10)
+
+#define GEN6_3DSTATE_GS				CMD(3, 0, 0x11)
+/* DW4 */
+# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT	0
+
+#define GEN6_3DSTATE_CLIP			CMD(3, 0, 0x12)
+
+#define GEN6_3DSTATE_SF				CMD(3, 0, 0x13)
+/* DW1 */
+# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT		22
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT	11
+# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT	4
+/* DW2 */
+/* DW3 */
+# define GEN6_3DSTATE_SF_CULL_BOTH			(0 << 29)
+# define GEN6_3DSTATE_SF_CULL_NONE			(1 << 29)
+# define GEN6_3DSTATE_SF_CULL_FRONT			(2 << 29)
+# define GEN6_3DSTATE_SF_CULL_BACK			(3 << 29)
+/* DW4 */
+# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT		29
+# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT		27
+# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT		25
+
+
+#define GEN6_3DSTATE_WM				CMD(3, 0, 0x14)
+/* DW2 */
+# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF			27
+# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT	18
+/* DW4 */
+# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT		16
+/* DW5 */
+# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT			25
+# define GEN6_3DSTATE_WM_DISPATCH_ENABLE			(1 << 19)
+# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE			(1 << 1)
+# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE			(1 << 0)
+/* DW6 */
+# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT			20
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC	(1 << 15)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 14)
+# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC	(1 << 13)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC		(1 << 12)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC	(1 << 11)
+# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC		(1 << 10)
+
+
+#define GEN6_3DSTATE_CONSTANT_VS		CMD(3, 0, 0x15)
+#define GEN6_3DSTATE_CONSTANT_GS          	CMD(3, 0, 0x16)
+#define GEN6_3DSTATE_CONSTANT_PS          	CMD(3, 0, 0x17)
+
+#define GEN6_3DSTATE_SAMPLE_MASK		CMD(3, 0, 0x18)
+
+#define GEN6_3DSTATE_MULTISAMPLE		CMD(3, 1, 0x0d)
+/* DW1 */
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER         (0 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT     (1 << 4)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1                  (0 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4                  (2 << 1)
+# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8                  (3 << 1)
+
+#define MFX(pipeline, op, sub_opa, sub_opb)     \
+    (3 << 29 |                                  \
+     (pipeline) << 27 |                         \
+     (op) << 24 |                               \
+     (sub_opa) << 21 |                          \
+     (sub_opb) << 16)
+
+#define MFX_PIPE_MODE_SELECT                    MFX(2, 0, 0, 0)
+#define MFX_SURFACE_STATE                       MFX(2, 0, 0, 1)
+#define MFX_PIPE_BUF_ADDR_STATE                 MFX(2, 0, 0, 2)
+#define MFX_IND_OBJ_BASE_ADDR_STATE             MFX(2, 0, 0, 3)
+#define MFX_BSP_BUF_BASE_ADDR_STATE             MFX(2, 0, 0, 4)
+#define MFX_AES_STATE                           MFX(2, 0, 0, 5)
+#define MFX_STATE_POINTER                       MFX(2, 0, 0, 6)
+
+#define MFX_WAIT                                MFX(1, 0, 0, 0)
+
+#define MFX_AVC_IMG_STATE                       MFX(2, 1, 0, 0)
+#define MFX_AVC_QM_STATE                        MFX(2, 1, 0, 1)
+#define MFX_AVC_DIRECTMODE_STATE                MFX(2, 1, 0, 2)
+#define MFX_AVC_SLICE_STATE                     MFX(2, 1, 0, 3)
+#define MFX_AVC_REF_IDX_STATE                   MFX(2, 1, 0, 4)
+#define MFX_AVC_WEIGHTOFFSET_STATE              MFX(2, 1, 0, 5)
+
+#define MFD_AVC_BSD_OBJECT                      MFX(2, 1, 1, 8)
+
+#define MFX_MPEG2_PIC_STATE                     MFX(2, 3, 0, 0)
+#define MFX_MPEG2_QM_STATE                      MFX(2, 3, 0, 1)
+
+#define MFD_MPEG2_BSD_OBJECT                    MFX(2, 3, 1, 8)
+
 #define I965_DEPTHFORMAT_D32_FLOAT              1
 
 #define BASE_ADDRESS_MODIFY             (1 << 0)
@@ -288,7 +411,9 @@
 #define I965_VFCOMPONENT_STORE_PID    7
 
 #define VE0_VERTEX_BUFFER_INDEX_SHIFT	27
+#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT      26 /* for GEN6 */
 #define VE0_VALID			(1 << 26)
+#define GEN6_VE0_VALID                  (1 << 25) /* for GEN6 */
 #define VE0_FORMAT_SHIFT		16
 #define VE0_OFFSET_SHIFT		0
 #define VE1_VFCOMPONENT_0_SHIFT		28
@@ -298,8 +423,11 @@
 #define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT	0
 
 #define VB0_BUFFER_INDEX_SHIFT          27
+#define GEN6_VB0_BUFFER_INDEX_SHIFT     26
 #define VB0_VERTEXDATA                  (0 << 26)
 #define VB0_INSTANCEDATA                (1 << 26)
+#define GEN6_VB0_VERTEXDATA             (0 << 20)
+#define GEN6_VB0_INSTANCEDATA           (1 << 20)
 #define VB0_BUFFER_PITCH_SHIFT          0
 
 #define _3DPRIMITIVE_VERTEX_SEQUENTIAL  (0 << 15)
@@ -380,6 +508,21 @@
 #define IEF_FILTER_SIZE_3X3             0
 #define IEF_FILTER_SIZE_5X5             1
 
-#define URB_SIZE(intel)         (IS_IRONLAKE(intel->device_id) ? 1024 : \
+#define MFX_FORMAT_MPEG2        0
+#define MFX_FORMAT_VC1          1
+#define MFX_FORMAT_AVC          2
+
+#define MFX_CODEC_DECODE        0
+#define MFX_CODEC_ENCODE        1
+
+#define MFD_MODE_VLD            0
+#define MFD_MODE_IT             1
+
+#define MFX_SURFACE_PLANAR_420_8        4
+#define MFX_SURFACE_MONOCHROME          12
+
+#define URB_SIZE(intel)         (IS_GEN6(intel->device_id) ? 1024 :     \
+                                 IS_IRONLAKE(intel->device_id) ? 1024 : \
                                  IS_G4X(intel->device_id) ? 384 : 256)
+
 #endif /* _I965_DEFINES_H_ */
diff --git a/i965_drv_video/i965_drv_video.c b/i965_drv_video/i965_drv_video.c
index ec5412d..f0466ad 100644
--- a/i965_drv_video/i965_drv_video.c
+++ b/i965_drv_video/i965_drv_video.c
@@ -119,6 +119,7 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
                          VAProfile *profile_list,       /* out */
                          int *num_profiles)             /* out */
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
     int i = 0;
 
     profile_list[i++] = VAProfileMPEG2Simple;
@@ -127,6 +128,12 @@ i965_QueryConfigProfiles(VADriverContextP ctx,
     profile_list[i++] = VAProfileH264Main;
     profile_list[i++] = VAProfileH264High;
 
+    if (IS_GEN6(i965->intel.device_id)) {
+        profile_list[i++] = VAProfileVC1Simple;
+        profile_list[i++] = VAProfileVC1Main;
+        profile_list[i++] = VAProfileVC1Advanced;
+    }
+
     /* If the assert fails then I965_MAX_PROFILES needs to be bigger */
     assert(i <= I965_MAX_PROFILES);
     *num_profiles = i;
@@ -156,6 +163,13 @@ i965_QueryConfigEntrypoints(VADriverContextP ctx,
         entrypoint_list[0] = VAEntrypointVLD;
         break;
 
+    case VAProfileVC1Simple:
+    case VAProfileVC1Main:
+    case VAProfileVC1Advanced:
+        *num_entrypoints = 1;
+        entrypoint_list[0] = VAEntrypointVLD;
+        break;
+
     default:
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
         *num_entrypoints = 0;
@@ -262,6 +276,17 @@ i965_CreateConfig(VADriverContextP ctx,
 
         break;
 
+    case VAProfileVC1Simple:
+    case VAProfileVC1Main:
+    case VAProfileVC1Advanced:
+        if (VAEntrypointVLD == entrypoint) {
+            vaStatus = VA_STATUS_SUCCESS;
+        } else {
+            vaStatus = VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+        }
+
+        break;
+
     default:
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
         break;
@@ -392,8 +417,15 @@ i965_CreateSurfaces(VADriverContextP ctx,
         obj_surface->subpic = VA_INVALID_ID;
         obj_surface->orig_width = width;
         obj_surface->orig_height = height;
-        obj_surface->width = ALIGN(obj_surface->orig_width, 16);
-        obj_surface->height = ALIGN(obj_surface->orig_height, 16);
+
+        if (IS_GEN6(i965->intel.device_id)) {
+            obj_surface->width = ALIGN(obj_surface->orig_width, 128);
+            obj_surface->height = ALIGN(obj_surface->orig_height, 32);
+        } else {
+            obj_surface->width = ALIGN(obj_surface->orig_width, 16);
+            obj_surface->height = ALIGN(obj_surface->orig_height, 16);
+        }
+
         obj_surface->size = SIZE_YUV420(obj_surface->width, obj_surface->height);
         obj_surface->flags = SURFACE_REFERENCED;
         obj_surface->bo = NULL;
@@ -724,14 +756,18 @@ i965_CreateContext(VADriverContextP ctx,
         return vaStatus;
     }
 
-    switch (obj_config->profile) {
-    case VAProfileH264Baseline:
-    case VAProfileH264Main:
-    case VAProfileH264High:
+    if (IS_GEN6(i965->intel.device_id))
         render_state->interleaved_uv = 1;
-        break;
-    default:
-        render_state->interleaved_uv = 0;
+    else {
+        switch (obj_config->profile) {
+        case VAProfileH264Baseline:
+        case VAProfileH264Main:
+        case VAProfileH264High:
+            render_state->interleaved_uv = 1;
+            break;
+        default:
+            render_state->interleaved_uv = 0;
+        }
     }
 
     obj_context->context_id = contextID;
@@ -978,6 +1014,12 @@ i965_BeginPicture(VADriverContextP ctx,
         vaStatus = VA_STATUS_SUCCESS;
         break;
 
+    case VAProfileVC1Simple:
+    case VAProfileVC1Main:
+    case VAProfileVC1Advanced:
+        vaStatus = VA_STATUS_SUCCESS;
+        break;
+
     default:
         assert(0);
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
@@ -1252,7 +1294,8 @@ i965_Init(VADriverContextP ctx)
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (!IS_G4X(i965->intel.device_id) &&
-        !IS_IRONLAKE(i965->intel.device_id))
+        !IS_IRONLAKE(i965->intel.device_id) &&
+        !IS_GEN6(i965->intel.device_id))
         return VA_STATUS_ERROR_UNKNOWN;
 
     if (i965_media_init(ctx) == False)
@@ -1726,15 +1769,15 @@ i965_PutSurface(VADriverContextP ctx,
     if (flags & (VA_BOTTOM_FIELD | VA_TOP_FIELD))
         pp_flag |= I965_PP_FLAG_DEINTERLACING;
 
-    i965_render_put_surface(ctx, surface,
+    intel_render_put_surface(ctx, surface,
                             srcx, srcy, srcw, srch,
                             destx, desty, destw, desth,
                             pp_flag);
 
     if(obj_surface->subpic != VA_INVALID_ID) {	
-	i965_render_put_subpic(ctx, surface,
-                               srcx, srcy, srcw, srch,
-                               destx, desty, destw, desth);
+	intel_render_put_subpicture(ctx, surface,
+                                    srcx, srcy, srcw, srch,
+                                    destx, desty, destw, desth);
     } 
 
     dri_swap_buffer(ctx, dri_drawable);
diff --git a/i965_drv_video/i965_media.c b/i965_drv_video/i965_media.c
index 8945444..172dde5 100644
--- a/i965_drv_video/i965_media.c
+++ b/i965_drv_video/i965_media.c
@@ -39,6 +39,7 @@
 #include "i965_defines.h"
 #include "i965_media_mpeg2.h"
 #include "i965_media_h264.h"
+#include "gen6_mfd.h"
 #include "i965_media.h"
 #include "i965_drv_video.h"
 
@@ -177,7 +178,7 @@ i965_media_depth_buffer(VADriverContextP ctx)
     OUT_BATCH(ctx, 0);
     OUT_BATCH(ctx, 0);
     OUT_BATCH(ctx, 0);
-    ADVANCE_BATCH();
+    ADVANCE_BATCH(ctx);
 }
 
 static void
@@ -273,6 +274,11 @@ i965_media_decode_picture(VADriverContextP ctx,
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_media_state *media_state = &i965->media_state;
 
+    if (IS_GEN6(i965->intel.device_id)) {
+        gen6_mfd_decode_picture(ctx, profile, decode_state);
+        return;
+    }
+
     i965_media_decode_init(ctx, profile, decode_state);
     assert(media_state->media_states_setup);
     media_state->media_states_setup(ctx, decode_state);
@@ -282,6 +288,11 @@ i965_media_decode_picture(VADriverContextP ctx,
 Bool 
 i965_media_init(VADriverContextP ctx)
 {
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_GEN6(i965->intel.device_id))
+        return gen6_mfd_init(ctx);
+
     return True;
 }
 
@@ -292,6 +303,9 @@ i965_media_terminate(VADriverContextP ctx)
     struct i965_media_state *media_state = &i965->media_state;
     int i;
 
+    if (IS_GEN6(i965->intel.device_id))
+        return gen6_mfd_terminate(ctx);
+
     if (media_state->free_private_context)
         media_state->free_private_context(&media_state->private_context);
 
diff --git a/i965_drv_video/i965_render.c b/i965_drv_video/i965_render.c
index ceef319..8789ca8 100644
--- a/i965_drv_video/i965_render.c
+++ b/i965_drv_video/i965_render.c
@@ -97,6 +97,28 @@ static const unsigned int ps_subpic_kernel_static_gen5[][4] =
 #include "shaders/render/exa_wm_write.g4b.gen5"
 };
 
+/* programs for Sandybridge */
+static const unsigned int sf_kernel_static_gen6[][4] = 
+{
+};
+
+static const uint32_t ps_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_planar.g6b"
+#include "shaders/render/exa_wm_yuv_rgb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
+static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
+#include "shaders/render/exa_wm_src_affine.g6b"
+#include "shaders/render/exa_wm_src_sample_argb.g6b"
+#include "shaders/render/exa_wm_write.g6b"
+};
+
+#define SURFACE_STATE_PADDED_SIZE       ALIGN(sizeof(struct i965_surface_state), 32)
+#define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
+#define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
+
 static uint32_t float_to_uint (float f) 
 {
     union {
@@ -167,6 +189,28 @@ static struct render_kernel render_kernels_gen5[] = {
     }
 };
 
+static struct render_kernel render_kernels_gen6[] = {
+    {
+        "SF",
+        sf_kernel_static_gen6,
+        sizeof(sf_kernel_static_gen6),
+        NULL
+    },
+    {
+        "PS",
+        ps_kernel_static_gen6,
+        sizeof(ps_kernel_static_gen6),
+        NULL
+    },
+
+    {
+        "PS_SUBPIC",
+        ps_subpic_kernel_static_gen6,
+        sizeof(ps_subpic_kernel_static_gen6),
+        NULL
+    }
+};
+
 static struct render_kernel *render_kernels = NULL;
 
 #define NUM_RENDER_KERNEL (sizeof(render_kernels_gen4)/sizeof(render_kernels_gen4[0]))
@@ -532,6 +576,25 @@ i965_render_cc_unit(VADriverContextP ctx)
 }
 
 static void
+i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
+{
+    switch (tiling) {
+    case I915_TILING_NONE:
+        ss->ss3.tiled_surface = 0;
+        ss->ss3.tile_walk = 0;
+        break;
+    case I915_TILING_X:
+        ss->ss3.tiled_surface = 1;
+        ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
+        break;
+    case I915_TILING_Y:
+        ss->ss3.tiled_surface = 1;
+        ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
+        break;
+    }
+}
+
+static void
 i965_render_src_surface_state(VADriverContextP ctx, 
                               int index,
                               dri_bo *region,
@@ -542,70 +605,15 @@ i965_render_src_surface_state(VADriverContextP ctx,
     struct i965_driver_data *i965 = i965_driver_data(ctx);  
     struct i965_render_state *render_state = &i965->render_state;
     struct i965_surface_state *ss;
-    dri_bo *ss_bo;
-
-    ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 32);
-    assert(ss_bo);
-    dri_bo_map(ss_bo, 1);
-    assert(ss_bo->virtual);
-    ss = ss_bo->virtual;
-    memset(ss, 0, sizeof(*ss));
-    ss->ss0.surface_type = I965_SURFACE_2D;
-    ss->ss0.surface_format = format;
-    ss->ss0.writedisable_alpha = 0;
-    ss->ss0.writedisable_red = 0;
-    ss->ss0.writedisable_green = 0;
-    ss->ss0.writedisable_blue = 0;
-    ss->ss0.color_blend = 1;
-    ss->ss0.vert_line_stride = 0;
-    ss->ss0.vert_line_stride_ofs = 0;
-    ss->ss0.mipmap_layout_mode = 0;
-    ss->ss0.render_cache_read_mode = 0;
-
-    ss->ss1.base_addr = region->offset + offset;
-
-    ss->ss2.width = w - 1;
-    ss->ss2.height = h - 1;
-    ss->ss2.mip_count = 0;
-    ss->ss2.render_target_rotation = 0;
-
-    ss->ss3.pitch = pitch - 1;
-
-    dri_bo_emit_reloc(ss_bo,
-                      I915_GEM_DOMAIN_SAMPLER, 0,
-                      offset,
-                      offsetof(struct i965_surface_state, ss1),
-                      region);
-
-    dri_bo_unmap(ss_bo);
+    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+    unsigned int tiling;
+    unsigned int swizzle;
 
     assert(index < MAX_RENDER_SURFACES);
-    assert(render_state->wm.surface[index] == NULL);
-    render_state->wm.surface[index] = ss_bo;
-    render_state->wm.sampler_count++;
-}
 
-static void
-i965_subpic_render_src_surface_state(VADriverContextP ctx, 
-                              int index,
-                              dri_bo *region,
-                              unsigned long offset,
-                              int w, int h, int p, int format)
-{
-    struct i965_driver_data *i965 = i965_driver_data(ctx);  
-    struct i965_render_state *render_state = &i965->render_state;
-    struct i965_surface_state *ss;
-    dri_bo *ss_bo;
-
-    ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 32);
-    assert(ss_bo);
     dri_bo_map(ss_bo, 1);
     assert(ss_bo->virtual);
-    ss = ss_bo->virtual;
+    ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
     memset(ss, 0, sizeof(*ss));
     ss->ss0.surface_type = I965_SURFACE_2D;
     ss->ss0.surface_format = format;
@@ -626,19 +634,19 @@ i965_subpic_render_src_surface_state(VADriverContextP ctx,
     ss->ss2.mip_count = 0;
     ss->ss2.render_target_rotation = 0;
 
-    ss->ss3.pitch = p - 1;
+    ss->ss3.pitch = pitch - 1;
+
+    dri_bo_get_tiling(region, &tiling, &swizzle);
+    i965_render_set_surface_tiling(ss, tiling);
 
     dri_bo_emit_reloc(ss_bo,
                       I915_GEM_DOMAIN_SAMPLER, 0,
                       offset,
-                      offsetof(struct i965_surface_state, ss1),
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
                       region);
 
+    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
     dri_bo_unmap(ss_bo);
-
-    assert(index < MAX_RENDER_SURFACES);
-    assert(render_state->wm.surface[index] == NULL);
-    render_state->wm.surface[index] = ss_bo;
     render_state->wm.sampler_count++;
 }
 
@@ -702,27 +710,8 @@ i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
     region = obj_surface->bo;
     subpic_region = obj_image->bo;
     /*subpicture surface*/
-    i965_subpic_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
-    i965_subpic_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
-}
-
-static void
-i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
-{
-    switch (tiling) {
-    case I915_TILING_NONE:
-        ss->ss3.tiled_surface = 0;
-        ss->ss3.tile_walk = 0;
-        break;
-    case I915_TILING_X:
-        ss->ss3.tiled_surface = 1;
-        ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
-        break;
-    case I915_TILING_Y:
-        ss->ss3.tiled_surface = 1;
-        ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
-        break;
-    }
+    i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
+    i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format);     
 }
 
 static void
@@ -732,15 +721,13 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
     struct i965_render_state *render_state = &i965->render_state;
     struct intel_region *dest_region = render_state->draw_region;
     struct i965_surface_state *ss;
-    dri_bo *ss_bo;
+    dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
+
+    assert(index < MAX_RENDER_SURFACES);
 
-    ss_bo = dri_bo_alloc(i965->intel.bufmgr, 
-                      "surface state", 
-                      sizeof(struct i965_surface_state), 32);
-    assert(ss_bo);
     dri_bo_map(ss_bo, 1);
     assert(ss_bo->virtual);
-    ss = ss_bo->virtual;
+    ss = (struct i965_surface_state *)((char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index));
     memset(ss, 0, sizeof(*ss));
 
     ss->ss0.surface_type = I965_SURFACE_2D;
@@ -774,41 +761,11 @@ i965_render_dest_surface_state(VADriverContextP ctx, int index)
     dri_bo_emit_reloc(ss_bo,
                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
                       0,
-                      offsetof(struct i965_surface_state, ss1),
+                      SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
                       dest_region->bo);
 
+    ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
     dri_bo_unmap(ss_bo);
-
-    assert(index < MAX_RENDER_SURFACES);
-    assert(render_state->wm.surface[index] == NULL);
-    render_state->wm.surface[index] = ss_bo;
-}
-
-static void
-i965_render_binding_table(VADriverContextP ctx)
-{
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_render_state *render_state = &i965->render_state;
-    int i;
-    unsigned int *binding_table;
-
-    dri_bo_map(render_state->wm.binding_table, 1);
-    assert(render_state->wm.binding_table->virtual);
-    binding_table = render_state->wm.binding_table->virtual;
-    memset(binding_table, 0, render_state->wm.binding_table->size);
-
-    for (i = 0; i < MAX_RENDER_SURFACES; i++) {
-        if (render_state->wm.surface[i]) {
-            binding_table[i] = render_state->wm.surface[i]->offset;
-            dri_bo_emit_reloc(render_state->wm.binding_table,
-                              I915_GEM_DOMAIN_INSTRUCTION, 0,
-                              0,
-                              i * sizeof(*binding_table),
-                              render_state->wm.surface[i]);
-        }
-    }
-
-    dri_bo_unmap(render_state->wm.binding_table);
 }
 
 static void 
@@ -964,7 +921,6 @@ i965_surface_render_state_setup(VADriverContextP ctx,
     i965_render_wm_unit(ctx);
     i965_render_cc_viewport(ctx);
     i965_render_cc_unit(ctx);
-    i965_render_binding_table(ctx);
     i965_render_upload_vertex(ctx, surface,
                               srcx, srcy, srcw, srch,
                               destx, desty, destw, desth);
@@ -990,7 +946,6 @@ i965_subpic_render_state_setup(VADriverContextP ctx,
     i965_subpic_render_wm_unit(ctx);
     i965_render_cc_viewport(ctx);
     i965_subpic_render_cc_unit(ctx);
-    i965_render_binding_table(ctx);
 
     VARectangle output_rect;
     output_rect.x      = destx;
@@ -1022,12 +977,13 @@ static void
 i965_render_state_base_address(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
 
     if (IS_IRONLAKE(i965->intel.device_id)) {
         BEGIN_BATCH(ctx, 8);
         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 6);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
-        OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+        OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -1038,7 +994,7 @@ i965_render_state_base_address(VADriverContextP ctx)
         BEGIN_BATCH(ctx, 6);
         OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | 4);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
-        OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
+        OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
         OUT_BATCH(ctx, 0 | BASE_ADDRESS_MODIFY);
@@ -1049,16 +1005,13 @@ i965_render_state_base_address(VADriverContextP ctx)
 static void
 i965_render_binding_table_pointers(VADriverContextP ctx)
 {
-    struct i965_driver_data *i965 = i965_driver_data(ctx);
-    struct i965_render_state *render_state = &i965->render_state;
-
     BEGIN_BATCH(ctx, 6);
     OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS | 4);
     OUT_BATCH(ctx, 0); /* vs */
     OUT_BATCH(ctx, 0); /* gs */
     OUT_BATCH(ctx, 0); /* clip */
     OUT_BATCH(ctx, 0); /* sf */
-    OUT_RELOC(ctx, render_state->wm.binding_table, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* wm */
+    OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
     ADVANCE_BATCH(ctx);
 }
 
@@ -1312,7 +1265,10 @@ i965_clear_dest_region(VADriverContextP ctx)
 
     br13 |= pitch;
 
-    BEGIN_BATCH(ctx, 6);
+    if (IS_GEN6(i965->intel.device_id))
+        BEGIN_BLT_BATCH(ctx, 6);
+    else
+        BEGIN_BATCH(ctx, 6);
     OUT_BATCH(ctx, blt_cmd);
     OUT_BATCH(ctx, br13);
     OUT_BATCH(ctx, (dest_region->y << 16) | (dest_region->x));
@@ -1328,9 +1284,9 @@ i965_clear_dest_region(VADriverContextP ctx)
 static void
 i965_surface_render_pipeline_setup(VADriverContextP ctx)
 {
+    i965_clear_dest_region(ctx);
     intel_batchbuffer_start_atomic(ctx, 0x1000);
     intel_batchbuffer_emit_mi_flush(ctx);
-    i965_clear_dest_region(ctx);
     i965_render_pipeline_select(ctx);
     i965_render_state_sip(ctx);
     i965_render_state_base_address(ctx);
@@ -1371,7 +1327,6 @@ i965_render_initialize(VADriverContextP ctx)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct i965_render_state *render_state = &i965->render_state;
-    int i;
     dri_bo *bo;
 
     /* VERTEX BUFFER */
@@ -1404,18 +1359,13 @@ i965_render_initialize(VADriverContextP ctx)
     render_state->sf.state = bo;
 
     /* WM */
-    for (i = 0; i < MAX_RENDER_SURFACES; i++) {
-        dri_bo_unreference(render_state->wm.surface[i]);
-        render_state->wm.surface[i] = NULL;
-    }
-
-    dri_bo_unreference(render_state->wm.binding_table);
+    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
     bo = dri_bo_alloc(i965->intel.bufmgr,
-                      "binding table",
-                      MAX_RENDER_SURFACES * sizeof(unsigned int),
-                      64);
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+                      4096);
     assert(bo);
-    render_state->wm.binding_table = bo;
+    render_state->wm.surface_state_binding_table_bo = bo;
 
     dri_bo_unreference(render_state->wm.sampler);
     bo = dri_bo_alloc(i965->intel.bufmgr,
@@ -1452,7 +1402,7 @@ i965_render_initialize(VADriverContextP ctx)
     render_state->cc.viewport = bo;
 }
 
-void
+static void
 i965_render_put_surface(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
@@ -1478,8 +1428,480 @@ i965_render_put_surface(VADriverContextP ctx,
     intel_batchbuffer_flush(ctx);
 }
 
-void
-i965_render_put_subpic(VADriverContextP ctx,
+static void
+i965_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface = SURFACE(surface);
+    struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
+    assert(obj_subpic);
+
+    i965_render_initialize(ctx);
+    i965_subpic_render_state_setup(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    i965_subpic_render_pipeline_setup(ctx);
+    i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+    intel_batchbuffer_flush(ctx);
+}
+
+/*
+ * for GEN6+
+ */
+static void 
+gen6_render_initialize(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    dri_bo *bo;
+
+    /* VERTEX BUFFER */
+    dri_bo_unreference(render_state->vb.vertex_buffer);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "vertex buffer",
+                      4096,
+                      4096);
+    assert(bo);
+    render_state->vb.vertex_buffer = bo;
+
+    /* WM */
+    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "surface state & binding table",
+                      (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
+                      4096);
+    assert(bo);
+    render_state->wm.surface_state_binding_table_bo = bo;
+
+    dri_bo_unreference(render_state->wm.sampler);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "sampler state",
+                      MAX_SAMPLERS * sizeof(struct i965_sampler_state),
+                      4096);
+    assert(bo);
+    render_state->wm.sampler = bo;
+    render_state->wm.sampler_count = 0;
+
+    /* COLOR CALCULATOR */
+    dri_bo_unreference(render_state->cc.state);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "color calc state",
+                      sizeof(struct gen6_color_calc_state),
+                      4096);
+    assert(bo);
+    render_state->cc.state = bo;
+
+    /* CC VIEWPORT */
+    dri_bo_unreference(render_state->cc.viewport);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "cc viewport",
+                      sizeof(struct i965_cc_viewport),
+                      4096);
+    assert(bo);
+    render_state->cc.viewport = bo;
+
+    /* BLEND STATE */
+    dri_bo_unreference(render_state->cc.blend);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "blend state",
+                      sizeof(struct gen6_blend_state),
+                      4096);
+    assert(bo);
+    render_state->cc.blend = bo;
+
+    /* DEPTH & STENCIL STATE */
+    dri_bo_unreference(render_state->cc.depth_stencil);
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "depth & stencil state",
+                      sizeof(struct gen6_depth_stencil_state),
+                      4096);
+    assert(bo);
+    render_state->cc.depth_stencil = bo;
+}
+
+static void
+gen6_render_color_calc_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_color_calc_state *color_calc_state;
+    
+    dri_bo_map(render_state->cc.state, 1);
+    assert(render_state->cc.state->virtual);
+    color_calc_state = render_state->cc.state->virtual;
+    memset(color_calc_state, 0, sizeof(*color_calc_state));
+    color_calc_state->constant_r = 1.0;
+    color_calc_state->constant_g = 0.0;
+    color_calc_state->constant_b = 1.0;
+    color_calc_state->constant_a = 1.0;
+    dri_bo_unmap(render_state->cc.state);
+}
+
+static void
+gen6_render_blend_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_blend_state *blend_state;
+    
+    dri_bo_map(render_state->cc.blend, 1);
+    assert(render_state->cc.blend->virtual);
+    blend_state = render_state->cc.blend->virtual;
+    memset(blend_state, 0, sizeof(*blend_state));
+    blend_state->blend1.logic_op_enable = 1;
+    blend_state->blend1.logic_op_func = 0xc;
+    dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen6_render_depth_stencil_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_depth_stencil_state *depth_stencil_state;
+    
+    dri_bo_map(render_state->cc.depth_stencil, 1);
+    assert(render_state->cc.depth_stencil->virtual);
+    depth_stencil_state = render_state->cc.depth_stencil->virtual;
+    memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
+    dri_bo_unmap(render_state->cc.depth_stencil);
+}
+
+static void
+gen6_render_setup_states(VADriverContextP ctx,
+                         VASurfaceID surface,
+                         short srcx,
+                         short srcy,
+                         unsigned short srcw,
+                         unsigned short srch,
+                         short destx,
+                         short desty,
+                         unsigned short destw,
+                         unsigned short desth)
+{
+    i965_render_dest_surface_state(ctx, 0);
+    i965_render_src_surfaces_state(ctx, surface);
+    i965_render_sampler(ctx);
+    i965_render_cc_viewport(ctx);
+    gen6_render_color_calc_state(ctx);
+    gen6_render_blend_state(ctx);
+    gen6_render_depth_stencil_state(ctx);
+    i965_render_upload_vertex(ctx, surface,
+                              srcx, srcy, srcw, srch,
+                              destx, desty, destw, desth);
+}
+
+static void
+gen6_emit_invarient_states(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
+    OUT_BATCH(ctx, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
+              GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
+    OUT_BATCH(ctx, 1);
+
+    /* Set system instruction pointer */
+    OUT_BATCH(ctx, CMD_STATE_SIP | 0);
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_state_base_address(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, CMD_STATE_BASE_ADDRESS | (10 - 2));
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state base address */
+    OUT_RELOC(ctx, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction base address */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* General state upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
+    OUT_BATCH(ctx, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
+}
+
+static void
+gen6_emit_viewport_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
+              GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
+              (4 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_RELOC(ctx, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_urb(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_URB | (3 - 2));
+    OUT_BATCH(ctx, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
+              (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
+    OUT_BATCH(ctx, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
+              (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
+}
+
+static void
+gen6_emit_cc_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
+    OUT_RELOC(ctx, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+    OUT_RELOC(ctx, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+    OUT_RELOC(ctx, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
+}
+
+static void
+gen6_emit_sampler_state_pointers(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
+              GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
+              (4 - 2));
+    OUT_BATCH(ctx, 0); /* VS */
+    OUT_BATCH(ctx, 0); /* GS */
+    OUT_RELOC(ctx,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
+}
+
+static void
+gen6_emit_binding_table(VADriverContextP ctx)
+{
+    /* Binding table pointers */
+    OUT_BATCH(ctx, CMD_BINDING_TABLE_POINTERS |
+              GEN6_BINDING_TABLE_MODIFY_PS |
+              (4 - 2));
+    OUT_BATCH(ctx, 0);		/* vs */
+    OUT_BATCH(ctx, 0);		/* gs */
+    /* Only the PS uses the binding table */
+    OUT_BATCH(ctx, BINDING_TABLE_OFFSET);
+}
+
+static void
+gen6_emit_depth_buffer_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, CMD_DEPTH_BUFFER | (7 - 2));
+    OUT_BATCH(ctx, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
+              (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, CMD_CLEAR_PARAMS | (2 - 2));
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_drawing_rectangle(VADriverContextP ctx)
+{
+    i965_render_drawing_rectangle(ctx);
+}
+
+static void 
+gen6_emit_vs_state(VADriverContextP ctx)
+{
+    /* disable VS constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+	
+    OUT_BATCH(ctx, GEN6_3DSTATE_VS | (6 - 2));
+    OUT_BATCH(ctx, 0); /* without VS kernel */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void 
+gen6_emit_gs_state(VADriverContextP ctx)
+{
+    /* disable GS constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+	
+    OUT_BATCH(ctx, GEN6_3DSTATE_GS | (7 - 2));
+    OUT_BATCH(ctx, 0); /* without GS kernel */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+}
+
+static void 
+gen6_emit_clip_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_CLIP | (4 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* pass-through */
+    OUT_BATCH(ctx, 0);
+}
+
+static void 
+gen6_emit_sf_state(VADriverContextP ctx)
+{
+    OUT_BATCH(ctx, GEN6_3DSTATE_SF | (20 - 2));
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
+              (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
+              (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, GEN6_3DSTATE_SF_CULL_NONE);
+    OUT_BATCH(ctx, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW9 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW14 */
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0); /* DW19 */
+}
+
+static void 
+gen6_emit_wm_state(VADriverContextP ctx, int kernel)
+{
+    /* disable WM constant buffer */
+    OUT_BATCH(ctx, GEN6_3DSTATE_CONSTANT_PS | (5 - 2));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, GEN6_3DSTATE_WM | (9 - 2));
+    OUT_RELOC(ctx, render_kernels[kernel].bo,
+              I915_GEM_DOMAIN_INSTRUCTION, 0,
+              0);
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
+              (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
+    OUT_BATCH(ctx, ((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
+              GEN6_3DSTATE_WM_DISPATCH_ENABLE |
+              GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
+    OUT_BATCH(ctx, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
+              GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
+    OUT_BATCH(ctx, 0);
+    OUT_BATCH(ctx, 0);
+}
+
+static void
+gen6_emit_vertex_element_state(VADriverContextP ctx)
+{
+    /* Set up our vertex elements, sourced from the single vertex buffer. */
+    OUT_BATCH(ctx, CMD_VERTEX_ELEMENTS | (5 - 2));
+    /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
+    OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (0 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+    /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
+    OUT_BATCH(ctx, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
+              GEN6_VE0_VALID |
+              (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
+              (8 << VE0_OFFSET_SHIFT));
+    OUT_BATCH(ctx, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | 
+              (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
+              (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
+}
+
+static void
+gen6_emit_vertices(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+
+    BEGIN_BATCH(ctx, 11);
+    OUT_BATCH(ctx, CMD_VERTEX_BUFFERS | 3);
+    OUT_BATCH(ctx, 
+              (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
+              GEN6_VB0_VERTEXDATA |
+              ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
+    OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
+    OUT_RELOC(ctx, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
+    OUT_BATCH(ctx, 0);
+
+    OUT_BATCH(ctx, 
+              CMD_3DPRIMITIVE |
+              _3DPRIMITIVE_VERTEX_SEQUENTIAL |
+              (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
+              (0 << 9) |
+              4);
+    OUT_BATCH(ctx, 3); /* vertex count per instance */
+    OUT_BATCH(ctx, 0); /* start vertex offset */
+    OUT_BATCH(ctx, 1); /* single instance */
+    OUT_BATCH(ctx, 0); /* start instance location */
+    OUT_BATCH(ctx, 0); /* index buffer offset, ignored */
+    ADVANCE_BATCH(ctx);
+}
+
+static void
+gen6_render_emit_states(VADriverContextP ctx, int kernel)
+{
+    intel_batchbuffer_start_atomic(ctx, 0x1000);
+    intel_batchbuffer_emit_mi_flush(ctx);
+    gen6_emit_invarient_states(ctx);
+    gen6_emit_state_base_address(ctx);
+    gen6_emit_viewport_state_pointers(ctx);
+    gen6_emit_urb(ctx);
+    gen6_emit_cc_state_pointers(ctx);
+    gen6_emit_sampler_state_pointers(ctx);
+    gen6_emit_vs_state(ctx);
+    gen6_emit_gs_state(ctx);
+    gen6_emit_clip_state(ctx);
+    gen6_emit_sf_state(ctx);
+    gen6_emit_wm_state(ctx, kernel);
+    gen6_emit_binding_table(ctx);
+    gen6_emit_depth_buffer_state(ctx);
+    gen6_emit_drawing_rectangle(ctx);
+    gen6_emit_vertex_element_state(ctx);
+    gen6_emit_vertices(ctx);
+    intel_batchbuffer_end_atomic(ctx);
+}
+
+static void
+gen6_render_put_surface(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
                         short srcy,
@@ -1488,22 +1910,147 @@ i965_render_put_subpic(VADriverContextP ctx,
                         short destx,
                         short desty,
                         unsigned short destw,
-                        unsigned short desth)
+                        unsigned short desth,
+                        unsigned int flag)
+{
+    gen6_render_initialize(ctx);
+    gen6_render_setup_states(ctx, surface,
+                             srcx, srcy, srcw, srch,
+                             destx, desty, destw, desth);
+    i965_clear_dest_region(ctx);
+    gen6_render_emit_states(ctx, PS_KERNEL);
+    intel_batchbuffer_flush(ctx);
+}
+
+static void
+gen6_subpicture_render_blend_state(VADriverContextP ctx)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct i965_render_state *render_state = &i965->render_state;
+    struct gen6_blend_state *blend_state;
+
+    dri_bo_unmap(render_state->cc.state);    
+    dri_bo_map(render_state->cc.blend, 1);
+    assert(render_state->cc.blend->virtual);
+    blend_state = render_state->cc.blend->virtual;
+    memset(blend_state, 0, sizeof(*blend_state));
+    blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
+    blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
+    blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
+    blend_state->blend0.blend_enable = 1;
+    blend_state->blend1.post_blend_clamp_enable = 1;
+    blend_state->blend1.pre_blend_clamp_enable = 1;
+    blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
+    dri_bo_unmap(render_state->cc.blend);
+}
+
+static void
+gen6_subpicture_render_setup_states(VADriverContextP ctx,
+                                    VASurfaceID surface,
+                                    short srcx,
+                                    short srcy,
+                                    unsigned short srcw,
+                                    unsigned short srch,
+                                    short destx,
+                                    short desty,
+                                    unsigned short destw,
+                                    unsigned short desth)
+{
+    VARectangle output_rect;
+
+    output_rect.x      = destx;
+    output_rect.y      = desty;
+    output_rect.width  = destw;
+    output_rect.height = desth;
+
+    i965_render_dest_surface_state(ctx, 0);
+    i965_subpic_render_src_surfaces_state(ctx, surface);
+    i965_render_sampler(ctx);
+    i965_render_cc_viewport(ctx);
+    gen6_render_color_calc_state(ctx);
+    gen6_subpicture_render_blend_state(ctx);
+    gen6_render_depth_stencil_state(ctx);
+    i965_subpic_render_upload_vertex(ctx, surface, &output_rect);
+}
+
+static void
+gen6_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
 {
     struct i965_driver_data *i965 = i965_driver_data(ctx);
     struct object_surface *obj_surface = SURFACE(surface);
     struct object_subpic *obj_subpic = SUBPIC(obj_surface->subpic);
-    assert(obj_subpic);
 
-    i965_render_initialize(ctx);
-    i965_subpic_render_state_setup(ctx, surface,
-	    srcx, srcy, srcw, srch,
-	    destx, desty, destw, desth);
-    i965_subpic_render_pipeline_setup(ctx);
-    i965_render_upload_image_palette(ctx, obj_subpic->image, 0xff);
+    assert(obj_subpic);
+    gen6_render_initialize(ctx);
+    gen6_subpicture_render_setup_states(ctx, surface,
+                                        srcx, srcy, srcw, srch,
+                                        destx, desty, destw, desth);
+    gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
     intel_batchbuffer_flush(ctx);
 }
 
+/*
+ * global functions
+ */
+void
+intel_render_put_surface(VADriverContextP ctx,
+                        VASurfaceID surface,
+                        short srcx,
+                        short srcy,
+                        unsigned short srcw,
+                        unsigned short srch,
+                        short destx,
+                        short desty,
+                        unsigned short destw,
+                        unsigned short desth,
+                        unsigned int flag)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+    else
+        i965_render_put_surface(ctx, surface,
+                                srcx, srcy, srcw, srch,
+                                destx, desty, destw, desth,
+                                flag);
+}
+
+void
+intel_render_put_subpicture(VADriverContextP ctx,
+                           VASurfaceID surface,
+                           short srcx,
+                           short srcy,
+                           unsigned short srcw,
+                           unsigned short srch,
+                           short destx,
+                           short desty,
+                           unsigned short destw,
+                           unsigned short desth)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_GEN6(i965->intel.device_id))
+        gen6_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+    else
+        i965_render_put_subpicture(ctx, surface,
+                                   srcx, srcy, srcw, srch,
+                                   destx, desty, destw, desth);
+}
 
 Bool 
 i965_render_init(VADriverContextP ctx)
@@ -1515,14 +2062,22 @@ i965_render_init(VADriverContextP ctx)
     /* kernel */
     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) / 
                                  sizeof(render_kernels_gen5[0])));
+    assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) / 
+                                 sizeof(render_kernels_gen6[0])));
 
-    if (IS_IRONLAKE(i965->intel.device_id))
+    if (IS_GEN6(i965->intel.device_id))
+        render_kernels = render_kernels_gen6;
+    else if (IS_IRONLAKE(i965->intel.device_id))
         render_kernels = render_kernels_gen5;
     else
         render_kernels = render_kernels_gen4;
 
     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
         struct render_kernel *kernel = &render_kernels[i];
+
+        if (!kernel->size)
+            continue;
+
         kernel->bo = dri_bo_alloc(i965->intel.bufmgr, 
                                   kernel->name, 
                                   kernel->size, 0x1000);
@@ -1567,22 +2122,19 @@ i965_render_terminate(VADriverContextP ctx)
     render_state->vs.state = NULL;
     dri_bo_unreference(render_state->sf.state);
     render_state->sf.state = NULL;
-    dri_bo_unreference(render_state->wm.binding_table);
-    render_state->wm.binding_table = NULL;
     dri_bo_unreference(render_state->wm.sampler);
     render_state->wm.sampler = NULL;
     dri_bo_unreference(render_state->wm.state);
     render_state->wm.state = NULL;
-
-    for (i = 0; i < MAX_RENDER_SURFACES; i++) {
-        dri_bo_unreference(render_state->wm.surface[i]);
-        render_state->wm.surface[i] = NULL;
-    }
-
+    dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
     dri_bo_unreference(render_state->cc.viewport);
     render_state->cc.viewport = NULL;
     dri_bo_unreference(render_state->cc.state);
     render_state->cc.state = NULL;
+    dri_bo_unreference(render_state->cc.blend);
+    render_state->cc.blend = NULL;
+    dri_bo_unreference(render_state->cc.depth_stencil);
+    render_state->cc.depth_stencil = NULL;
 
     if (render_state->draw_region) {
         dri_bo_unreference(render_state->draw_region->bo);
diff --git a/i965_drv_video/i965_render.h b/i965_drv_video/i965_render.h
index 84b50f2..8ff4fe2 100644
--- a/i965_drv_video/i965_render.h
+++ b/i965_drv_video/i965_render.h
@@ -28,8 +28,8 @@
 #ifndef _I965_RENDER_H_
 #define _I965_RENDER_H_
 
-#define MAX_RENDER_SURFACES     16
 #define MAX_SAMPLERS            16
+#define MAX_RENDER_SURFACES     (MAX_SAMPLERS + 1)
 
 #include "i965_post_processing.h"
 
@@ -50,14 +50,15 @@ struct i965_render_state
     struct {
         int sampler_count;
         dri_bo *sampler;
-        dri_bo *surface[MAX_RENDER_SURFACES];
-        dri_bo *binding_table;
         dri_bo *state;
+        dri_bo *surface_state_binding_table_bo;
     } wm;
 
     struct {
         dri_bo *state;
         dri_bo *viewport;
+        dri_bo *blend;
+        dri_bo *depth_stencil;
     } cc;
 
     struct {
@@ -74,7 +75,7 @@ struct i965_render_state
 
 Bool i965_render_init(VADriverContextP ctx);
 Bool i965_render_terminate(VADriverContextP ctx);
-void i965_render_put_surface(VADriverContextP ctx,
+void intel_render_put_surface(VADriverContextP ctx,
                              VASurfaceID surface,
                              short srcx,
                              short srcy,
@@ -88,7 +89,7 @@ void i965_render_put_surface(VADriverContextP ctx,
 
 
 void
-i965_render_put_subpic(VADriverContextP ctx,
+intel_render_put_subpicture(VADriverContextP ctx,
                         VASurfaceID surface,
                         short srcx,
                         short srcy,
diff --git a/i965_drv_video/i965_structs.h b/i965_drv_video/i965_structs.h
index f8be616..5f85269 100644
--- a/i965_drv_video/i965_structs.h
+++ b/i965_drv_video/i965_structs.h
@@ -964,4 +964,105 @@ struct i965_sampler_dndi
     } dw7;
 };
 
+
+struct gen6_blend_state
+{
+    struct {
+        unsigned int dest_blend_factor:5;
+        unsigned int source_blend_factor:5;
+        unsigned int pad3:1;
+        unsigned int blend_func:3;
+        unsigned int pad2:1;
+        unsigned int ia_dest_blend_factor:5;
+        unsigned int ia_source_blend_factor:5;
+        unsigned int pad1:1;
+        unsigned int ia_blend_func:3;
+        unsigned int pad0:1;
+        unsigned int ia_blend_enable:1;
+        unsigned int blend_enable:1;
+    } blend0;
+
+    struct {
+        unsigned int post_blend_clamp_enable:1;
+        unsigned int pre_blend_clamp_enable:1;
+        unsigned int clamp_range:2;
+        unsigned int pad0:4;
+        unsigned int x_dither_offset:2;
+        unsigned int y_dither_offset:2;
+        unsigned int dither_enable:1;
+        unsigned int alpha_test_func:3;
+        unsigned int alpha_test_enable:1;
+        unsigned int pad1:1;
+        unsigned int logic_op_func:4;
+        unsigned int logic_op_enable:1;
+        unsigned int pad2:1;
+        unsigned int write_disable_b:1;
+        unsigned int write_disable_g:1;
+        unsigned int write_disable_r:1;
+        unsigned int write_disable_a:1;
+        unsigned int pad3:1;
+        unsigned int alpha_to_coverage_dither:1;
+        unsigned int alpha_to_one:1;
+        unsigned int alpha_to_coverage:1;
+    } blend1;
+};
+
+struct gen6_color_calc_state
+{
+    struct {
+        unsigned int alpha_test_format:1;
+        unsigned int pad0:14;
+        unsigned int round_disable:1;
+        unsigned int bf_stencil_ref:8;
+        unsigned int stencil_ref:8;
+    } cc0;
+
+    union {
+        float alpha_ref_f;
+        struct {
+            unsigned int ui:8;
+            unsigned int pad0:24;
+        } alpha_ref_fi;
+    } cc1;
+
+    float constant_r;
+    float constant_g;
+    float constant_b;
+    float constant_a;
+};
+
+struct gen6_depth_stencil_state
+{
+    struct {
+        unsigned int pad0:3;
+        unsigned int bf_stencil_pass_depth_pass_op:3;
+        unsigned int bf_stencil_pass_depth_fail_op:3;
+        unsigned int bf_stencil_fail_op:3;
+        unsigned int bf_stencil_func:3;
+        unsigned int bf_stencil_enable:1;
+        unsigned int pad1:2;
+        unsigned int stencil_write_enable:1;
+        unsigned int stencil_pass_depth_pass_op:3;
+        unsigned int stencil_pass_depth_fail_op:3;
+        unsigned int stencil_fail_op:3;
+        unsigned int stencil_func:3;
+        unsigned int stencil_enable:1;
+    } ds0;
+
+    struct {
+        unsigned int bf_stencil_write_mask:8;
+        unsigned int bf_stencil_test_mask:8;
+        unsigned int stencil_write_mask:8;
+        unsigned int stencil_test_mask:8;
+    } ds1;
+
+    struct {
+        unsigned int pad0:26;
+        unsigned int depth_write_enable:1;
+        unsigned int depth_test_func:3;
+        unsigned int pad1:1;
+        unsigned int depth_test_enable:1;
+    } ds2;
+};
+
 #endif /* _I965_STRUCTS_H_ */
diff --git a/i965_drv_video/intel_batchbuffer.c b/i965_drv_video/intel_batchbuffer.c
index 15c3201..4988e9c 100644
--- a/i965_drv_video/intel_batchbuffer.c
+++ b/i965_drv_video/intel_batchbuffer.c
@@ -40,6 +40,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch)
     int batch_size = BATCH_SIZE;
 
     assert(batch->flag == I915_EXEC_RENDER ||
+           batch->flag == I915_EXEC_BLT ||
            batch->flag == I915_EXEC_BSD);
 
     dri_bo_unreference(batch->buffer);
@@ -281,21 +282,23 @@ intel_batchbuffer_data_bcs(VADriverContextP ctx, void *data, unsigned int size)
     intel_batchbuffer_data_helper(ctx, intel->batch_bcs, data, size);
 }
 
-static void
-intel_batchbuffer_emit_mi_flush_helper(VADriverContextP ctx,
-                                       struct intel_batchbuffer *batch)
-{
-    intel_batchbuffer_require_space_helper(ctx, batch, 4);
-    intel_batchbuffer_emit_dword_helper(batch, 
-                                        MI_FLUSH | STATE_INSTRUCTION_CACHE_INVALIDATE);
-}
-
 void
 intel_batchbuffer_emit_mi_flush(VADriverContextP ctx)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
 
-    intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch);
+    if (intel->batch->flag == I915_EXEC_BLT) {
+        BEGIN_BLT_BATCH(ctx, 4);
+        OUT_BATCH(ctx, MI_FLUSH_DW);
+        OUT_BATCH(ctx, 0);
+        OUT_BATCH(ctx, 0);
+        OUT_BATCH(ctx, 0);
+        ADVANCE_BATCH(ctx);
+    } else if (intel->batch->flag == I915_EXEC_RENDER) {
+        BEGIN_BATCH(ctx, 1);
+        OUT_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
+        ADVANCE_BATCH(ctx);
+    }
 }
 
 void
@@ -303,7 +306,18 @@ intel_batchbuffer_emit_mi_flush_bcs(VADriverContextP ctx)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
 
-    intel_batchbuffer_emit_mi_flush_helper(ctx, intel->batch_bcs);
+    if (IS_GEN6(intel->device_id)) {
+        BEGIN_BCS_BATCH(ctx, 4);
+        OUT_BCS_BATCH(ctx, MI_FLUSH_DW | MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        OUT_BCS_BATCH(ctx, 0);
+        ADVANCE_BCS_BATCH(ctx);
+    } else {
+        BEGIN_BCS_BATCH(ctx, 1);
+        OUT_BCS_BATCH(ctx, MI_FLUSH | MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE);
+        ADVANCE_BCS_BATCH(ctx);
+    }
 }
 
 void
@@ -320,7 +334,7 @@ void
 intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size)
 {
     struct intel_driver_data *intel = intel_driver_data(ctx);
-
+    intel_batchbuffer_check_batchbuffer_flag(ctx, I915_EXEC_RENDER);
     intel_batchbuffer_start_atomic_helper(ctx, intel->batch, size);
 }
 
@@ -354,3 +368,64 @@ intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx)
     intel_batchbuffer_end_atomic_helper(intel->batch_bcs);
 }
 
+static void
+intel_batchbuffer_begin_batch_helper(struct intel_batchbuffer *batch, int total)
+{
+    batch->emit_total = total * 4;
+    batch->emit_start = batch->ptr;
+}
+
+void
+intel_batchbuffer_begin_batch(VADriverContextP ctx, int total)
+{
+   struct intel_driver_data *intel = intel_driver_data(ctx);
+
+   intel_batchbuffer_begin_batch_helper(intel->batch, total);
+}
+
+void
+intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total)
+{
+   struct intel_driver_data *intel = intel_driver_data(ctx);
+
+   intel_batchbuffer_begin_batch_helper(intel->batch_bcs, total);
+}
+
+static void
+intel_batchbuffer_advance_batch_helper(struct intel_batchbuffer *batch)
+{
+    assert(batch->emit_total == (batch->ptr - batch->emit_start));
+}
+
+void
+intel_batchbuffer_advance_batch(VADriverContextP ctx)
+{
+   struct intel_driver_data *intel = intel_driver_data(ctx);
+
+   intel_batchbuffer_advance_batch_helper(intel->batch);
+}
+
+void
+intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx)
+{
+   struct intel_driver_data *intel = intel_driver_data(ctx);
+
+   intel_batchbuffer_advance_batch_helper(intel->batch_bcs);
+}
+
+void
+intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag)
+{
+    struct intel_driver_data *intel = intel_driver_data(ctx);
+
+    if (flag != I915_EXEC_RENDER &&
+        flag != I915_EXEC_BLT &&
+        flag != I915_EXEC_BSD)
+        return;
+
+    if (intel->batch->flag == flag)
+        return;
+
+    intel_batchbuffer_flush_helper(ctx, intel->batch);
+    intel->batch->flag = flag;
+}
diff --git a/i965_drv_video/intel_batchbuffer.h b/i965_drv_video/intel_batchbuffer.h
index 99ab08d..25652e1 100644
--- a/i965_drv_video/intel_batchbuffer.h
+++ b/i965_drv_video/intel_batchbuffer.h
@@ -18,6 +18,9 @@ struct intel_batchbuffer
     int atomic;
     int flag;
 
+    int emit_total;
+    unsigned char *emit_start;
+
     int (*run)(drm_intel_bo *bo, int used,
                drm_clip_rect_t *cliprects, int num_cliprects,
                int DR4, int ring_flag);
@@ -37,6 +40,9 @@ void intel_batchbuffer_start_atomic(VADriverContextP ctx, unsigned int size);
 void intel_batchbuffer_end_atomic(VADriverContextP ctx);
 Bool intel_batchbuffer_flush(VADriverContextP ctx);
 
+void intel_batchbuffer_begin_batch(VADriverContextP ctx, int total);
+void intel_batchbuffer_advance_batch(VADriverContextP ctx);
+
 void intel_batchbuffer_emit_dword_bcs(VADriverContextP ctx, unsigned int x);
 void intel_batchbuffer_emit_reloc_bcs(VADriverContextP ctx, dri_bo *bo, 
                                       uint32_t read_domains, uint32_t write_domains, 
@@ -48,9 +54,19 @@ void intel_batchbuffer_start_atomic_bcs(VADriverContextP ctx, unsigned int size)
 void intel_batchbuffer_end_atomic_bcs(VADriverContextP ctx);
 Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
 
-#define BEGIN_BATCH(ctx, n) do {                                \
-   intel_batchbuffer_require_space(ctx, (n) * 4);               \
-} while (0)
+void intel_batchbuffer_begin_batch_bcs(VADriverContextP ctx, int total);
+void intel_batchbuffer_advance_batch_bcs(VADriverContextP ctx);
+
+void intel_batchbuffer_check_batchbuffer_flag(VADriverContextP ctx, int flag);
+
+#define __BEGIN_BATCH(ctx, n, flag) do {                        \
+        intel_batchbuffer_check_batchbuffer_flag(ctx, flag);    \
+        intel_batchbuffer_require_space(ctx, (n) * 4);          \
+        intel_batchbuffer_begin_batch(ctx, (n));                \
+    } while (0)
+
+#define BEGIN_BATCH(ctx, n)             __BEGIN_BATCH(ctx, n, I915_EXEC_RENDER)
+#define BEGIN_BLT_BATCH(ctx, n)         __BEGIN_BATCH(ctx, n, I915_EXEC_BLT)
 
 #define OUT_BATCH(ctx, d) do {                                  \
    intel_batchbuffer_emit_dword(ctx, d);                        \
@@ -63,10 +79,12 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
 } while (0)
 
 #define ADVANCE_BATCH(ctx) do {                                         \
+    intel_batchbuffer_advance_batch(ctx);                               \
 } while (0)
 
 #define BEGIN_BCS_BATCH(ctx, n) do {                                    \
    intel_batchbuffer_require_space_bcs(ctx, (n) * 4);                   \
+   intel_batchbuffer_begin_batch_bcs(ctx, (n));                         \
 } while (0)
 
 #define OUT_BCS_BATCH(ctx, d) do {                                      \
@@ -80,6 +98,7 @@ Bool intel_batchbuffer_flush_bcs(VADriverContextP ctx);
 } while (0)
 
 #define ADVANCE_BCS_BATCH(ctx) do {                                     \
+    intel_batchbuffer_advance_batch_bcs(ctx);                           \
 } while (0)
 
 #endif /* _INTEL_BATCHBUFFER_H_ */
diff --git a/i965_drv_video/intel_batchbuffer_dump.c b/i965_drv_video/intel_batchbuffer_dump.c
index 0732f0f..99c2c1c 100644
--- a/i965_drv_video/intel_batchbuffer_dump.c
+++ b/i965_drv_video/intel_batchbuffer_dump.c
@@ -44,6 +44,7 @@ dump_mi(unsigned int *data, unsigned int offset, int count, unsigned int device,
 	{ 0x00, 0, 1, 1, "MI_NOOP" },
 	{ 0x04, 0, 1, 1, "MI_FLUSH" },
 	{ 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" },
+	{ 0x26, 0x3f, 4, 5, "MI_FLUSH_DW" },
     };
 
     opcode = ((data[0] & MASK_MI_OPCODE) >> SHIFT_MI_OPCODE);
@@ -350,6 +351,363 @@ dump_gfxpipe_bsd(unsigned int *data, unsigned int offset, int count, unsigned in
     return length;
 }
 
+static void
+dump_mfx_mode_select(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, 
+              "decoder mode: %d(%s),"
+              "post deblocking output enable %d,"
+              "pre deblocking output enable %d,"
+              "codec select: %d(%s),"
+              "standard select: %d(%s)"
+              "\n",
+              (data[1] >> 16) & 0x1, ((data[1] >> 16) & 0x1) ? "IT" : "VLD",
+              (data[1] >> 9) & 0x1,
+              (data[1] >> 8) & 0x1,
+              (data[1] >> 4) & 0x1, ((data[1] >> 4) & 0x1) ? "Encode" : "Decode",
+              (data[1] >> 0) & 0x3, ((data[1] >> 0) & 0x3) == 0 ? "MPEG2" :
+              ((data[1] >> 0) & 0x3) == 1 ? "VC1" :
+              ((data[1] >> 0) & 0x3) == 2 ? "AVC" : "Reserved");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+}
+
+static void
+dump_mfx_surface_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+}
+
+static void
+dump_mfx_pipe_buf_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+    instr_out(data, offset, 7, "dword 07\n");
+    instr_out(data, offset, 8, "dword 08\n");
+    instr_out(data, offset, 9, "dword 09\n");
+    instr_out(data, offset, 10, "dword 10\n");
+    instr_out(data, offset, 11, "dword 11\n");
+    instr_out(data, offset, 12, "dword 12\n");
+    instr_out(data, offset, 13, "dword 13\n");
+    instr_out(data, offset, 14, "dword 14\n");
+    instr_out(data, offset, 15, "dword 15\n");
+    instr_out(data, offset, 16, "dword 16\n");
+    instr_out(data, offset, 17, "dword 17\n");
+    instr_out(data, offset, 18, "dword 18\n");
+    instr_out(data, offset, 19, "dword 19\n");
+    instr_out(data, offset, 20, "dword 20\n");
+    instr_out(data, offset, 21, "dword 21\n");
+    instr_out(data, offset, 22, "dword 22\n");
+    instr_out(data, offset, 24, "dword 23\n");
+}
+
+static void
+dump_mfx_ind_obj_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+    instr_out(data, offset, 7, "dword 07\n");
+    instr_out(data, offset, 8, "dword 08\n");
+    instr_out(data, offset, 9, "dword 09\n");
+    instr_out(data, offset, 10, "dword 10\n");
+}
+
+static void
+dump_mfx_bsp_buf_base_addr_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+}
+
+static void
+dump_mfx_aes_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+}
+
+static void
+dump_mfx_state_pointer(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+}
+
+static int
+dump_mfx_common(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    unsigned int subopcode;
+    int length, i;
+
+    struct {
+	unsigned int subopcode;
+	int min_len;
+	int max_len;
+	char *name;
+        void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int  *failures);
+    } mfx_common_commands[] = {
+        { SUBOPCODE_MFX(0, 0), 0x04, 0x04, "MFX_PIPE_MODE_SELECT", dump_mfx_mode_select },
+        { SUBOPCODE_MFX(0, 1), 0x06, 0x06, "MFX_SURFACE_STATE", dump_mfx_surface_state },
+        { SUBOPCODE_MFX(0, 2), 0x18, 0x18, "MFX_PIPE_BUF_ADDR_STATE", dump_mfx_pipe_buf_addr_state },
+        { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_IND_OBJ_BASE_ADDR_STATE", dump_mfx_ind_obj_base_addr_state },
+        { SUBOPCODE_MFX(0, 4), 0x04, 0x04, "MFX_BSP_BUF_BASE_ADDR_STATE", dump_mfx_bsp_buf_base_addr_state },
+        { SUBOPCODE_MFX(0, 5), 0x07, 0x07, "MFX_AES_STATE", dump_mfx_aes_state },
+        { SUBOPCODE_MFX(0, 6), 0x00, 0x00, "MFX_STATE_POINTER", dump_mfx_state_pointer },
+    };
+
+    subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
+
+    for (i = 0; i < ARRAY_ELEMS(mfx_common_commands); i++) {
+        if (subopcode == mfx_common_commands[i].subopcode) {
+            unsigned int index;
+
+            length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
+            length += 2;
+            instr_out(data, offset, 0, "%s\n", mfx_common_commands[i].name);
+
+            if (length < mfx_common_commands[i].min_len || 
+                length > mfx_common_commands[i].max_len) {
+                fprintf(gout, "Bad length(%d) in %s [%d, %d]\n", 
+                        length, mfx_common_commands[i].name,
+                        mfx_common_commands[i].min_len,
+                        mfx_common_commands[i].max_len);
+            }
+
+            if (length - 1 >= count)
+                BUFFER_FAIL(count, length, mfx_common_commands[i].name);
+
+            if (mfx_common_commands[i].detail)
+                mfx_common_commands[i].detail(data, offset, device, failures);
+            else {
+                for (index = 1; index < length; index++)
+                    instr_out(data, offset, index, "dword %d\n", index);
+            }
+
+	    return length;
+	}
+    }
+
+    instr_out(data, offset, 0, "UNKNOWN MFX COMMON COMMAND\n");
+    (*failures)++;
+    return 1;
+}
+
+static void
+dump_mfx_avc_img_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+    instr_out(data, offset, 7, "dword 07\n");
+    instr_out(data, offset, 8, "dword 08\n");
+    instr_out(data, offset, 9, "dword 09\n");
+    instr_out(data, offset, 10, "dword 10\n");
+    instr_out(data, offset, 11, "dword 11\n");
+    instr_out(data, offset, 12, "dword 12\n");
+}
+
+static void
+dump_mfx_avc_qm_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    unsigned int length = ((data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH) + 2;
+    int i;
+
+    instr_out(data, offset, 1, "user default: %02x, QM list present: %02x\n", 
+              (data[1] >> 8) & 0xff, data[1] & 0xff);
+
+    for (i = 2; i < length; i++) {
+        instr_out(data, offset, i, "dword %d\n", i);
+    }
+}
+
+static void
+dump_mfx_avc_directmode_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    int i;
+
+    for (i = 1; i < 33; i++) {
+        instr_out(data, offset, i, "Direct MV Buffer Base Address for Picture %d\n", i - 1);
+    }
+
+    for (i = 33; i < 35; i++) {
+        instr_out(data, offset, i, "Direct MV Buffer Base Address for Current Decoding Frame/Field\n");
+    }
+
+    for (i = 35; i < 69; i++) {
+        instr_out(data, offset, i, "POC List\n");
+    }
+}
+
+static void
+dump_mfx_avc_slice_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+    instr_out(data, offset, 7, "dword 07\n");
+    instr_out(data, offset, 8, "dword 08\n");
+    instr_out(data, offset, 9, "dword 09\n");
+}
+
+static void
+dump_mfx_avc_ref_idx_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    instr_out(data, offset, 1, "dword 01\n");
+    instr_out(data, offset, 2, "dword 02\n");
+    instr_out(data, offset, 3, "dword 03\n");
+    instr_out(data, offset, 4, "dword 04\n");
+    instr_out(data, offset, 5, "dword 05\n");
+    instr_out(data, offset, 6, "dword 06\n");
+    instr_out(data, offset, 7, "dword 07\n");
+    instr_out(data, offset, 8, "dword 08\n");
+    instr_out(data, offset, 9, "dword 09\n");
+}
+
+static void
+dump_mfx_avc_weightoffset_state(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    int i;
+
+    instr_out(data, offset, 1, 
+              "Weight and Offset L%d table\n",
+              (data[1] >> 0) & 0x1);
+
+    for (i = 2; i < 31; i++) {
+        instr_out(data, offset, i, "dword %d\n", i);
+    }
+}
+
+static void
+dump_mfd_bsd_object(unsigned int *data, unsigned int offset, unsigned int device, int *failures)
+{
+    int is_phantom_slice = ((data[1] & 0x3fffff) == 0);
+
+    if (is_phantom_slice) {
+        instr_out(data, offset, 1, "phantom slice\n");
+        instr_out(data, offset, 2, "dword 02\n");
+        instr_out(data, offset, 3, "dword 03\n");
+        instr_out(data, offset, 4, "dword 04\n");
+        instr_out(data, offset, 5, "dword 05\n");
+    } else {
+        instr_out(data, offset, 1, "Indirect BSD Data Length: %d\n", data[1] & 0x3fffff);
+        instr_out(data, offset, 2, "Indirect BSD Data Start Address: 0x%08x\n", data[2] & 0x1fffffff);
+        instr_out(data, offset, 3, "dword 03\n");
+        instr_out(data, offset, 4,
+                  "First_MB_Byte_Offset of Slice Data from Slice Header: 0x%08x,"
+                  "slice header skip mode: %d"
+                  "\n",
+                  (data[4] >> 16),
+                  (data[4] >> 6) & 0x1);
+        instr_out(data, offset, 5, "dword 05\n");
+    }
+}
+
+static int
+dump_mfx_avc(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    unsigned int subopcode;
+    int length, i;
+
+    struct {
+	unsigned int subopcode;
+	int min_len;
+	int max_len;
+	char *name;
+        void (*detail)(unsigned int *data, unsigned int offset, unsigned int device, int  *failures);
+    } mfx_avc_commands[] = {
+        { SUBOPCODE_MFX(0, 0), 0x0d, 0x0d, "MFX_AVC_IMG_STATE", dump_mfx_avc_img_state },
+        { SUBOPCODE_MFX(0, 1), 0x02, 0x3a, "MFX_AVC_QM_STATE", dump_mfx_avc_qm_state },
+        { SUBOPCODE_MFX(0, 2), 0x45, 0x45, "MFX_AVC_DIRECTMODE_STATE", dump_mfx_avc_directmode_state },
+        { SUBOPCODE_MFX(0, 3), 0x0b, 0x0b, "MFX_AVC_SLICE_STATE", dump_mfx_avc_slice_state },
+        { SUBOPCODE_MFX(0, 4), 0x0a, 0x0a, "MFX_AVC_REF_IDX_STATE", dump_mfx_avc_ref_idx_state },
+        { SUBOPCODE_MFX(0, 5), 0x32, 0x32, "MFX_AVC_WEIGHTOFFSET_STATE", dump_mfx_avc_weightoffset_state },
+        { SUBOPCODE_MFX(1, 8), 0x06, 0x06, "MFD_AVC_BSD_OBJECT", dump_mfd_bsd_object },
+    };
+
+    subopcode = ((data[0] & MASK_GFXPIPE_SUBOPCODE) >> SHIFT_GFXPIPE_SUBOPCODE);
+
+    for (i = 0; i < ARRAY_ELEMS(mfx_avc_commands); i++) {
+        if (subopcode == mfx_avc_commands[i].subopcode) {
+            unsigned int index;
+
+            length = (data[0] & MASK_GFXPIPE_LENGTH) >> SHIFT_GFXPIPE_LENGTH;
+            length += 2;
+            instr_out(data, offset, 0, "%s\n", mfx_avc_commands[i].name);
+
+            if (length < mfx_avc_commands[i].min_len || 
+                length > mfx_avc_commands[i].max_len) {
+                fprintf(gout, "Bad length(%d) in %s [%d, %d]\n", 
+                        length, mfx_avc_commands[i].name,
+                        mfx_avc_commands[i].min_len,
+                        mfx_avc_commands[i].max_len);
+            }
+
+            if (length - 1 >= count)
+                BUFFER_FAIL(count, length, mfx_avc_commands[i].name);
+
+            if (mfx_avc_commands[i].detail)
+                mfx_avc_commands[i].detail(data, offset, device, failures);
+            else {
+                for (index = 1; index < length; index++)
+                    instr_out(data, offset, index, "dword %d\n", index);
+            }
+
+	    return length;
+	}
+    }
+
+    instr_out(data, offset, 0, "UNKNOWN MFX AVC COMMAND\n");
+    (*failures)++;
+    return 1;
+}
+
+static int
+dump_gfxpipe_mfx(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
+{
+    int length;
+
+    switch ((data[0] & MASK_GFXPIPE_OPCODE) >> SHIFT_GFXPIPE_OPCODE) {
+    case OPCODE_MFX_COMMON:
+        length = dump_mfx_common(data, offset, count, device, failures);
+        break;
+
+    case OPCODE_MFX_AVC:
+        length = dump_mfx_avc(data, offset, count, device, failures);
+        break;
+
+    default:
+        length = 1;
+        (*failures)++;
+        instr_out(data, offset, 0, "UNKNOWN MFX OPCODE\n");
+        break;
+    }
+
+    return length;
+}
+
 static int
 dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int device, int *failures)
 {
@@ -361,7 +719,11 @@ dump_gfxpipe(unsigned int *data, unsigned int offset, int count, unsigned int de
         break;
 
     case GFXPIPE_BSD:
-        length = dump_gfxpipe_bsd(data, offset, count, device, failures);
+        if (IS_GEN6(device))
+            length = dump_gfxpipe_mfx(data, offset, count, device, failures);
+        else
+            length = dump_gfxpipe_bsd(data, offset, count, device, failures);
+
         break;
 
     default:
diff --git a/i965_drv_video/intel_batchbuffer_dump.h b/i965_drv_video/intel_batchbuffer_dump.h
index ad096a9..e76b4f7 100644
--- a/i965_drv_video/intel_batchbuffer_dump.h
+++ b/i965_drv_video/intel_batchbuffer_dump.h
@@ -36,6 +36,12 @@
 #define SUBOPCODE_BSD_IND_OBJ   4
 #define SUBOPCODE_BSD_OBJECT    8
 
+/* MFX */
+#define OPCODE_MFX_COMMON       0
+#define OPCODE_MFX_AVC          1
+
+#define SUBOPCODE_MFX(A, B)     ((A) << 5 | (B))
+
 /* MI */
 #define MASK_MI_OPCODE          0x1F800000
 
diff --git a/i965_drv_video/intel_driver.h b/i965_drv_video/intel_driver.h
index 1e2adfa..436cccf 100644
--- a/i965_drv_video/intel_driver.h
+++ b/i965_drv_video/intel_driver.h
@@ -29,7 +29,10 @@
 #define MI_BATCH_BUFFER_START                   (CMD_MI | (0x31 << 23))
 
 #define MI_FLUSH                                (CMD_MI | (0x4 << 23))
-#define STATE_INSTRUCTION_CACHE_INVALIDATE      (0x1 << 0)
+#define   MI_FLUSH_STATE_INSTRUCTION_CACHE_INVALIDATE   (0x1 << 0)
+
+#define MI_FLUSH_DW                             (CMD_MI | (0x26 << 23) | 0x2)
+#define   MI_FLUSH_DW_VIDEO_PIPELINE_CACHE_INVALIDATE   (0x1 << 7)
 
 #define XY_COLOR_BLT_CMD                        (CMD_2D | (0x50 << 22) | 0x04)
 #define XY_COLOR_BLT_WRITE_ALPHA                (1 << 21)
@@ -124,9 +127,20 @@ struct intel_region
 #define PCI_CHIP_IRONLAKE_D_G           0x0042
 #define PCI_CHIP_IRONLAKE_M_G           0x0046
 
-#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G || \
-                                 devid == PCI_CHIP_Q45_G || \
-                                 devid == PCI_CHIP_G45_G || \
+#ifndef PCI_CHIP_SANDYBRIDGE_GT1
+#define PCI_CHIP_SANDYBRIDGE_GT1	0x0102  /* Desktop */
+#define PCI_CHIP_SANDYBRIDGE_GT2	0x0112
+#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS	0x0122
+#define PCI_CHIP_SANDYBRIDGE_M_GT1	0x0106  /* Mobile */
+#define PCI_CHIP_SANDYBRIDGE_M_GT2	0x0116
+#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS	0x0126
+#define PCI_CHIP_SANDYBRIDGE_S_GT	0x010A  /* Server */
+#endif
+
+
+#define IS_G45(devid)           (devid == PCI_CHIP_IGD_E_G ||   \
+                                 devid == PCI_CHIP_Q45_G ||     \
+                                 devid == PCI_CHIP_G45_G ||     \
                                  devid == PCI_CHIP_G41_G)
 #define IS_GM45(devid)          (devid == PCI_CHIP_GM45_GM)
 #define IS_G4X(devid)		(IS_G45(devid) || IS_GM45(devid))
@@ -135,4 +149,12 @@ struct intel_region
 #define IS_IRONLAKE_M(devid)    (devid == PCI_CHIP_IRONLAKE_M_G)
 #define IS_IRONLAKE(devid)      (IS_IRONLAKE_D(devid) || IS_IRONLAKE_M(devid))
 
+#define IS_GEN6(devid)          (devid == PCI_CHIP_SANDYBRIDGE_GT1 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_GT2 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_GT2_PLUS ||\
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT1 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT2 || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \
+                                 devid == PCI_CHIP_SANDYBRIDGE_S_GT)
+
 #endif /* _INTEL_DRIVER_H_ */
diff --git a/i965_drv_video/shaders/render/Makefile.am b/i965_drv_video/shaders/render/Makefile.am
index fb9b11e..ba019af 100644
--- a/i965_drv_video/shaders/render/Makefile.am
+++ b/i965_drv_video/shaders/render/Makefile.am
@@ -35,17 +35,36 @@ EXTRA_DIST = $(INTEL_G4I)	\
 	     $(INTEL_G4B)    	\
 	     $(INTEL_G4B_GEN5)
 
+INTEL_G6A =				\
+	exa_wm_src_affine.g6a 		\
+	exa_wm_src_sample_argb.g6a 	\
+	exa_wm_src_sample_planar.g6a 	\
+	exa_wm_write.g6a 		\
+	exa_wm_yuv_rgb.g6a
+
+INTEL_G6B =				\
+	exa_wm_src_affine.g6b 		\
+	exa_wm_src_sample_argb.g6b 	\
+	exa_wm_src_sample_planar.g6b 	\
+	exa_wm_write.g6b 		\
+	exa_wm_yuv_rgb.g6b
+
 if HAVE_GEN4ASM
 
-SUFFIXES = .g4a .g4b
+SUFFIXES = .g4a .g4b .g6a .g6b
 .g4a.g4b:
 	m4 $*.g4a > $*.g4m && intel-gen4asm -o $@ $*.g4m && intel-gen4asm -g 5 -o $@.gen5 $*.g4m && rm $*.g4m
 
+.g6a.g6b:
+	m4 -I$(srcdir) -s $< > $*.g6m && intel-gen4asm -g 6 -o $@ $*.g6m && rm $*.g6m
+
 $(INTEL_G4B): $(INTEL_G4I)
+$(INTEL_G6B): $(INTEL_G4I)
 
-BUILT_SOURCES= $(INTEL_G4B)
+BUILT_SOURCES= $(INTEL_G4B) $(INTEL_G6B)
 
 clean-local:
 	-rm -f $(INTEL_G4B)
 	-rm -f $(INTEL_G4B_GEN5)
+	-rm -f $(INTEL_G6B)
 endif    
diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g4a b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a
similarity index 68%
copy from i965_drv_video/shaders/render/exa_wm_src_affine.g4a
copy to i965_drv_video/shaders/render/exa_wm_src_affine.g6a
index 3194b5a..08195a4 100644
--- a/i965_drv_video/shaders/render/exa_wm_src_affine.g4a
+++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6a
@@ -1,5 +1,5 @@
 /*
- * Copyright © 2006 Intel Corporation
+ * Copyright © 2010 Intel Corporation
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,26 +20,28 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  *
- * Authors:
- *    Wang Zhenyu <zhenyu.z.wang at intel.com>
- *    Keith Packard <keithp at keithp.com>
  */
 
 /*
- * Fragment to compute src u/v values under an affine transform
+ * Fragment to compute src u/v values
  */
-
 include(`exa_wm.g4i')
 
-define(`du_dx',	`src_du_dx')
-define(`du_dy',	`src_du_dy')
-define(`uo',	`src_uo')
+define(`ul',    `src_u')
+define(`uh',    `m3')
+define(`vl',    `src_v')
+define(`vh',    `m5')
+
+define(`bl',    `g2.0<8,8,1>F')
+define(`bh',    `g4.0<8,8,1>F')
 
-define(`dv_dx',	`src_dv_dx')
-define(`dv_dy',	`src_dv_dy')
-define(`vo',	`src_vo')
+define(`a0_a_x',`g6.0<0,1,0>F')
+define(`a0_a_y',`g6.16<0,1,0>F')
 
-define(`u',	`src_u')
-define(`v',	`src_v')
+/* U */
+pln (8) ul<1>F a0_a_x bl { align1 }; /* pixel 0-7 */
+pln (8) uh<1>F a0_a_x bh { align1 }; /* pixel 8-15 */
 
-include(`exa_wm_affine.g4i')
+/* V */
+pln (8) vl<1>F a0_a_y bl { align1 }; /* pixel 0-7 */
+pln (8) vh<1>F a0_a_y bh { align1 }; /* pixel 8-15 */
diff --git a/i965_drv_video/shaders/render/exa_wm_src_affine.g6b b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b
new file mode 100644
index 0000000..7035e6a
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_affine.g6b
@@ -0,0 +1,4 @@
+   { 0x0060005a, 0x204077be, 0x000000c0, 0x008d0040 },
+   { 0x0060005a, 0x206077be, 0x000000c0, 0x008d0080 },
+   { 0x0060005a, 0x208077be, 0x000000d0, 0x008d0040 },
+   { 0x0060005a, 0x20a077be, 0x000000d0, 0x008d0080 },
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a
similarity index 96%
copy from i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a
copy to i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a
index c20f53f..67bb888 100644
--- a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6a
@@ -36,12 +36,13 @@ include(`exa_wm.g4i')
 
 /* load argb */
 mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (8) src_msg<1>UD	g0<8,8,1>UD  { align1 mask_disable };
 
 /* src_msg will be copied with g0, as it contains send desc */
 /* emit sampler 'send' cmd */
 send (16) src_msg_ind		/* msg reg index */
 	src_sample_base<1>UW 	/* readback */
-	g0<8,8,1>UW		/* copy to msg start reg*/
+	null
 	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
 				/* here(src->dst) we should use src_sampler and src_surface */
 	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b
new file mode 100644
index 0000000..2846491
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g6b
@@ -0,0 +1,3 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x00000000 },
+   { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a8a0001 },
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a
similarity index 69%
copy from i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a
copy to i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a
index c20f53f..1f78629 100644
--- a/i965_drv_video/shaders/render/exa_wm_src_sample_argb.g4a
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6a
@@ -25,7 +25,7 @@
  *    Keith Packard <keithp at keithp.com>
  */
 
-/* Sample the src surface */
+/* Sample the src surface in planar format */
 
 include(`exa_wm.g4i')
 
@@ -34,14 +34,25 @@ include(`exa_wm.g4i')
 /* use simd16 sampler, param 0 is u, param 1 is v. */
 /* 'payload' loading, assuming tex coord start from g4 */
 
-/* load argb */
-mov (1) g0.8<1>UD	0x00000000UD { align1 mask_disable };
+mov (1) g0.8<1>UD	0x0000c000UD { align1 mask_disable };
+mov (8) src_msg<1>UD	g0<8,8,1>UD  { align1 mask_disable };
 
-/* src_msg will be copied with g0, as it contains send desc */
-/* emit sampler 'send' cmd */
+/* sample UV (CrCb) */
 send (16) src_msg_ind		/* msg reg index */
-	src_sample_base<1>UW 	/* readback */
-	g0<8,8,1>UW		/* copy to msg start reg*/
+	src_sample_g<1>UW 	/* readback */
+	null       
+	sampler (3,2,F)		/* sampler message description, (binding_table,sampler_index,datatype)
+				/* here(src->dst) we should use src_sampler and src_surface */
+	mlen 5 rlen 4 { align1 };   /* required message len 5, readback len 8 */
+
+mov (1) g0.8<1>UD	0x0000e000UD { align1 mask_disable };
+mov (8) src_msg<1>UD	g0<8,8,1>UD  { align1 mask_disable };
+
+/* sample Y */
+send (16) src_msg_ind		/* msg reg index */
+	src_sample_r<1>UW 	/* readback */
+	null
 	sampler (1,0,F)		/* sampler message description, (binding_table,sampler_index,datatype)
 				/* here(src->dst) we should use src_sampler and src_surface */
-	mlen 5 rlen 8 { align1 };   /* required message len 5, readback len 8 */
+	mlen 5 rlen 2 { align1 };   /* required message len 5, readback len 8 */
+	
diff --git a/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b
new file mode 100644
index 0000000..ef45022
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_src_sample_planar.g6b
@@ -0,0 +1,6 @@
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000c000 },
+   { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x22001cc9, 0x00000020, 0x0a4a0203 },
+   { 0x00000201, 0x20080061, 0x00000000, 0x0000e000 },
+   { 0x00600201, 0x20200022, 0x008d0000, 0x00000000 },
+   { 0x02800031, 0x21c01cc9, 0x00000020, 0x0a2a0001 },
diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6a b/i965_drv_video/shaders/render/exa_wm_write.g6a
new file mode 100644
index 0000000..c0f3cc1
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_write.g6a
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+include(`exa_wm.g4i')
+
+/*
+ * Prepare data in m2-m3 for Red channel, m4-m5 for Green channel,
+ * m6-m7 for Blue and m8-m9 for Alpha channel
+ */
+define(`slot_r_00',     `m2')
+define(`slot_r_01',     `m3')
+define(`slot_g_00',     `m4')
+define(`slot_g_01',     `m5')
+define(`slot_b_00',     `m6')
+define(`slot_b_01',     `m7')
+define(`slot_a_00',     `m8')
+define(`slot_a_01',     `m9')
+define(`data_port_msg_2_ind',	`2')
+
+mov (8) slot_r_00<1>F     src_sample_r_01<8,8,1>F { align1 };
+mov (8) slot_r_01<1>F     src_sample_r_23<8,8,1>F { align1 };
+
+mov (8) slot_g_00<1>F     src_sample_g_01<8,8,1>F { align1 };
+mov (8) slot_g_01<1>F     src_sample_g_23<8,8,1>F { align1 };
+
+mov (8) slot_b_00<1>F     src_sample_b_01<8,8,1>F { align1 };
+mov (8) slot_b_01<1>F     src_sample_b_23<8,8,1>F { align1 };
+
+mov (8) slot_a_00<1>F     src_sample_a_01<8,8,1>F { align1 };
+mov (8) slot_a_01<1>F     src_sample_a_23<8,8,1>F { align1 };
+
+/* write */
+send (16) 
+	data_port_msg_2_ind 
+	acc0<1>UW 
+	null
+	write (
+	       0,  /* binding_table */
+	       16,  /* pixel scordboard clear, msg type simd16 single source */
+	       12,  /* render target write */
+	       0,   /* no write commit message */
+	       0  /* headerless render target write */
+	) 
+	mlen 8
+	rlen 0
+	{ align1 EOT };
+
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+nop;
+
diff --git a/i965_drv_video/shaders/render/exa_wm_write.g6b b/i965_drv_video/shaders/render/exa_wm_write.g6b
new file mode 100644
index 0000000..3cb6bff
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_write.g6b
@@ -0,0 +1,17 @@
+   { 0x00600001, 0x204003be, 0x008d01c0, 0x00000000 },
+   { 0x00600001, 0x206003be, 0x008d01e0, 0x00000000 },
+   { 0x00600001, 0x208003be, 0x008d0200, 0x00000000 },
+   { 0x00600001, 0x20a003be, 0x008d0220, 0x00000000 },
+   { 0x00600001, 0x20c003be, 0x008d0240, 0x00000000 },
+   { 0x00600001, 0x20e003be, 0x008d0260, 0x00000000 },
+   { 0x00600001, 0x210003be, 0x008d0280, 0x00000000 },
+   { 0x00600001, 0x212003be, 0x008d02a0, 0x00000000 },
+   { 0x05800031, 0x24001cc8, 0x00000040, 0x90019000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
+   { 0x0000007e, 0x00000000, 0x00000000, 0x00000000 },
diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g4a b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a
similarity index 100%
copy from i965_drv_video/shaders/render/exa_wm_yuv_rgb.g4a
copy to i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6a
diff --git a/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b
new file mode 100644
index 0000000..21fa6fb
--- /dev/null
+++ b/i965_drv_video/shaders/render/exa_wm_yuv_rgb.g6b
@@ -0,0 +1,11 @@
+   { 0x00800040, 0x22c07fbd, 0x008d01c0, 0xbd808081 },
+   { 0x00800041, 0x22c07fbd, 0x008d02c0, 0x3f94fdf4 },
+   { 0x00800040, 0x23007fbd, 0x008d0240, 0xbf008084 },
+   { 0x00800040, 0x23407fbd, 0x008d0200, 0xbf008084 },
+   { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x80800048, 0x21c07fbd, 0x008d0300, 0x3fcc49ba },
+   { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x00800048, 0x24007fbc, 0x008d0300, 0xbf5020c5 },
+   { 0x80800048, 0x22007fbd, 0x008d0340, 0xbec8b439 },
+   { 0x00800001, 0x240003bc, 0x008d02c0, 0x00000000 },
+   { 0x80800048, 0x22407fbd, 0x008d0340, 0x40011687 },
diff --git a/va/Android.mk b/va/Android.mk
index 494f7c7..795f27f 100644
--- a/va/Android.mk
+++ b/va/Android.mk
@@ -10,15 +10,35 @@ include $(CLEAR_VARS)
 #LIBVA_MINOR_VERSION := 31
 #LIBVA_MAJOR_VERSION := 0 
 
+
+LOCAL_MODULE := libva
+
+LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils
+
+include $(BUILD_SHARED_LIBRARY)
+
+intermediates := $(local-intermediates-dir)
+GEN := $(intermediates)/va_version.h
+$(GEN): PRIVATE_GEN_VERSION := $(LOCAL_PATH)/../build/gen_version.sh
+$(GEN): PRIVATE_INPUT_FILE := $(LOCAL_PATH)/va_version.h.in
+$(GEN): PRIVATE_CUSTOM_TOOL = sh $(PRIVATE_GEN_VERSION) $(LOCAL_PATH)/.. $(PRIVATE_INPUT_FILE) > $@
+$(GEN): $(LOCAL_PATH)/va_version.h
+	$(transform-generated-source)
+
+LOCAL_GENERATED_SOURCES += $(GEN) 
+
 LOCAL_SRC_FILES := \
 	va.c \
 	va_trace.c \
-	va_fool.c 
+	va_fool.c
+
 
 LOCAL_CFLAGS += \
 	-DANDROID \
 	-DVA_DRIVERS_PATH="\"$(LIBVA_DRIVERS_PATH)\""
 
+LOCAL_COPY_HEADERS_TO := libva/va
+
 LOCAL_C_INCLUDES += \
 	$(TARGET_OUT_HEADERS)/libva \
 	$(LOCAL_PATH)/x11 \
@@ -31,13 +51,6 @@ LOCAL_COPY_HEADERS := \
 	va_version.h.in \
 	x11/va_dricommon.h 
 
-LOCAL_COPY_HEADERS_TO := libva/va
-
-LOCAL_MODULE := libva
-
-LOCAL_SHARED_LIBRARIES := libdl libdrm libcutils
-
-include $(BUILD_SHARED_LIBRARY)
 
 
 # For libva-android
diff --git a/va/glx/va_glx_impl.c b/va/glx/va_glx_impl.c
index f5bbe91..9d38930 100644
--- a/va/glx/va_glx_impl.c
+++ b/va/glx/va_glx_impl.c
@@ -36,7 +36,7 @@ static void va_glx_error_message(const char *format, ...)
 {
     va_list args;
     va_start(args, format);
-    fprintf(stderr, "[%s] ", PACKAGE_NAME);
+    fprintf(stderr, "libva-glx error: ");
     vfprintf(stderr, format, args);
     va_end(args);
 }
diff --git a/va/glx/va_glx_private.h b/va/glx/va_glx_private.h
index eb1185c..e86efb6 100644
--- a/va/glx/va_glx_private.h
+++ b/va/glx/va_glx_private.h
@@ -25,7 +25,7 @@
 #ifndef VA_GLX_PRIVATE_H
 #define VA_GLX_PRIVATE_H
 
-#include "config.h"
+#include "sysdeps.h"
 #include "va.h"
 #include "va_backend.h"
 #include "va_x11.h"
diff --git a/test/basic/test_01.c b/va/sysdeps.h
similarity index 72%
copy from test/basic/test_01.c
copy to va/sysdeps.h
index 4075655..0752b17 100644
--- a/test/basic/test_01.c
+++ b/va/sysdeps.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007 Intel Corporation. All Rights Reserved.
+ * Copyright (c) 2007-2009 Intel Corporation. All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the
@@ -16,27 +16,29 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL INTEL AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
-#define TEST_DESCRIPTION	"Initialize & Terminate"
+#ifndef SYSDEPS_H
+#define SYSDEPS_H
 
-#include "test_common.c"
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
 
-void pre()
-{
-}
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <assert.h>
 
-void post()
-{
-}
+#ifdef ANDROID
+# define Bool  int
+# define True  1
+# define False 0
+#endif
 
-void test()
-{
-  test_init();
-
-  test_terminate();
-}
+#endif /* SYSDEPS_H */
diff --git a/va/va.c b/va/va.c
index 0320ed3..268ba36 100644
--- a/va/va.c
+++ b/va/va.c
@@ -23,11 +23,11 @@
  */
 
 #define _GNU_SOURCE 1
+#include "sysdeps.h"
 #include "va.h"
 #include "va_backend.h"
 #include "va_trace.h"
 #include "va_fool.h"
-#include "config.h"
 
 #include <assert.h>
 #include <stdarg.h>
@@ -37,12 +37,6 @@
 #include <dlfcn.h>
 #include <unistd.h>
 
-#ifdef ANDROID
-#define Bool int
-#define True 1
-#define False 0
-#endif
-
 #define DRIVER_INIT_FUNC	"__vaDriverInit_0_31"
 
 #define DRIVER_EXTENSION	"_drv_video.so"
diff --git a/va/va_tpi.c b/va/va_tpi.c
index 1f01ef0..4472913 100644
--- a/va/va_tpi.c
+++ b/va/va_tpi.c
@@ -23,10 +23,10 @@
  */
 
 #define _GNU_SOURCE 1
+#include "sysdeps.h"
 #include "va.h"
 #include "va_backend.h"
 #include "va_backend_tpi.h"
-#include "config.h"
 
 #include <assert.h>
 #include <stdarg.h>
diff --git a/va/va_version.h b/va/va_version.h
deleted file mode 100644
index 55bf814..0000000
--- a/va/va_version.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (C) 2009 Splitted-Desktop Systems. All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef VA_VERSION_H
-#define VA_VERSION_H
-
-/**
- * VA_MAJOR_VERSION:
- *
- * The major version of the VA library (1, if %VA_VERSION is 1.2.3)
- */
-#define VA_MAJOR_VERSION   (0)
-
-/**
- * VA_MINOR_VERSION:
- *
- * The minor version of the VA library (2, if %VA_VERSION is 1.2.3)
- */
-#define VA_MINOR_VERSION   (31)
-
-/**
- * VA_MICRO_VERSION:
- *
- * The micro version of the VA library (3, if %VA_VERSION is 1.2.3)
- */
-#define VA_MICRO_VERSION   (1)
-
-/**
- * VA_VERSION:
- *
- * The full version of the VA library, like 1.2.3
- */
-#define VA_VERSION         0.31.1
-
-/**
- * VA_VERSION_S:
- *
- * The full version of the VA library, in string form (suited for
- * string concatenation)
- */
-#define VA_VERSION_S       "0.31.1"
-
-/**
- * VA_VERSION_HEX:
- *
- * Numerically encoded version of the VA library, like 0x010203
- */
-#define VA_VERSION_HEX     ((VA_MAJOR_VERSION << 24) | \
-                            (VA_MINOR_VERSION << 16) | \
-                            (VA_MICRO_VERSION << 8))
-
-/**
- * VA_CHECK_VERSION:
- * @major: major version, like 1 in 1.2.3
- * @minor: minor version, like 2 in 1.2.3
- * @micro: micro version, like 3 in 1.2.3
- *
- * Evaluates to %TRUE if the version of the VA library is greater
- * than @major, @minor and @micro
- */
-#define VA_CHECK_VERSION(major,minor,micro) \
-        (VA_MAJOR_VERSION > (major) || \
-         (VA_MAJOR_VERSION == (major) && VA_MINOR_VERSION > (minor)) || \
-         (VA_MAJOR_VERSION == (major) && VA_MINOR_VERSION == (minor) && VA_MICRO_VERSION >= (micro)))
-
-#endif /* VA_VERSION_H */
diff --git a/va/x11/va_x11.c b/va/x11/va_x11.c
index 70cea30..93eb243 100644
--- a/va/x11/va_x11.c
+++ b/va/x11/va_x11.c
@@ -23,7 +23,7 @@
  */
 
 #define _GNU_SOURCE 1
-#include "config.h"
+#include "sysdeps.h"
 #include "va.h"
 #include "va_backend.h"
 #include "va_x11.h"

-- 
libva packaging



More information about the pkg-multimedia-commits mailing list